[XLA:GPU] Remove unused Thunk::ShouldBlockFutureThunks function.
PiperOrigin-RevId: 196206896
This commit is contained in:
parent
2656548f3e
commit
5a492ef9bb
@ -164,9 +164,6 @@ Status GpuExecutable::ExecuteThunks(
|
|||||||
sub_streams, hlo_module_->entry_computation());
|
sub_streams, hlo_module_->entry_computation());
|
||||||
uint64 start_micros = tensorflow::Env::Default()->NowMicros();
|
uint64 start_micros = tensorflow::Env::Default()->NowMicros();
|
||||||
|
|
||||||
// The next event enqueued on stream N must not run until the thunk at
|
|
||||||
// last_blocking_thunk_for_stream[N] completes.
|
|
||||||
std::map<int32, const Thunk*> last_blocking_thunk_for_stream;
|
|
||||||
std::map<const Thunk*, std::unique_ptr<se::Event>> thunk_to_finish_event;
|
std::map<const Thunk*, std::unique_ptr<se::Event>> thunk_to_finish_event;
|
||||||
for (Thunk* thunk : thunk_schedule_->TotalOrder()) {
|
for (Thunk* thunk : thunk_schedule_->TotalOrder()) {
|
||||||
TF_RETURN_IF_ERROR(thunk->Initialize(*this));
|
TF_RETURN_IF_ERROR(thunk->Initialize(*this));
|
||||||
@ -179,18 +176,10 @@ Status GpuExecutable::ExecuteThunks(
|
|||||||
stream->ThenWaitFor(FindOrDie(thunk_to_finish_event, dependency).get());
|
stream->ThenWaitFor(FindOrDie(thunk_to_finish_event, dependency).get());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (last_blocking_thunk_for_stream.count(stream_no)) {
|
|
||||||
stream->ThenWaitFor(FindOrDie(thunk_to_finish_event,
|
|
||||||
last_blocking_thunk_for_stream[stream_no])
|
|
||||||
.get());
|
|
||||||
last_blocking_thunk_for_stream.erase(stream_no);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this thunk requests it, wait for all currently-executing thunks to
|
// If this thunk requests it, wait for all currently-executing thunks to
|
||||||
// finish. This is useful e.g. if the thunk is about to perform autotuning.
|
// finish. This is useful e.g. if the thunk is about to perform autotuning.
|
||||||
if (thunk->ShouldHaltAllActivityBeforeRunning(stream)) {
|
if (thunk->ShouldHaltAllActivityBeforeRunning(stream)) {
|
||||||
TF_RETURN_IF_ERROR(main_stream->BlockHostUntilDone());
|
TF_RETURN_IF_ERROR(main_stream->BlockHostUntilDone());
|
||||||
last_blocking_thunk_for_stream.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
profiler.StartOperation();
|
profiler.StartOperation();
|
||||||
@ -198,22 +187,11 @@ Status GpuExecutable::ExecuteThunks(
|
|||||||
<< thunk->hlo_instruction()->ToString() << " on stream "
|
<< thunk->hlo_instruction()->ToString() << " on stream "
|
||||||
<< stream_no;
|
<< stream_no;
|
||||||
TF_RETURN_IF_ERROR(thunk->ExecuteOnStream(buffer_allocations, stream));
|
TF_RETURN_IF_ERROR(thunk->ExecuteOnStream(buffer_allocations, stream));
|
||||||
if (thunk_schedule_->Depended(thunk) || thunk->ShouldBlockFutureThunks()) {
|
if (thunk_schedule_->Depended(thunk)) {
|
||||||
auto finish_event = MakeUnique<se::Event>(main_stream->parent());
|
auto finish_event = MakeUnique<se::Event>(main_stream->parent());
|
||||||
finish_event->Init();
|
finish_event->Init();
|
||||||
stream->ThenRecordEvent(finish_event.get());
|
stream->ThenRecordEvent(finish_event.get());
|
||||||
thunk_to_finish_event[thunk] = std::move(finish_event);
|
thunk_to_finish_event[thunk] = std::move(finish_event);
|
||||||
|
|
||||||
if (thunk->ShouldBlockFutureThunks()) {
|
|
||||||
// Set last_blocking_thunk_for_stream on all streams other than this one
|
|
||||||
// so that all other streams will wait for this thunk to complete before
|
|
||||||
// executing any events that occur later in the total order.
|
|
||||||
for (int32 i = 0; i < sub_streams.size() + 1; ++i) {
|
|
||||||
if (i != stream_no) {
|
|
||||||
last_blocking_thunk_for_stream[i] = thunk;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
profiler.FinishOperation(thunk->hlo_instruction());
|
profiler.FinishOperation(thunk->hlo_instruction());
|
||||||
}
|
}
|
||||||
|
@ -89,16 +89,6 @@ class Thunk {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Indicates whether thunks scheduled after this one should wait for this one
|
|
||||||
// to complete before running. For example, a convolution thunk creates a
|
|
||||||
// scratch allocator, then kicks off a convolution in cudnn via the stream
|
|
||||||
// executor. When the stream executor call returns, the scratch allocator goes
|
|
||||||
// out of scope, and the scratch memory is deallocated. In this case, the
|
|
||||||
// convolution thunk needs to return true so that future thunks wait for the
|
|
||||||
// convolution thunk to avoid reusing the deallocated memory until the
|
|
||||||
// convolution thunk is done with it.
|
|
||||||
virtual bool ShouldBlockFutureThunks() { return false; }
|
|
||||||
|
|
||||||
// Execute the kernel for the thunk on the given stream. This method must be
|
// Execute the kernel for the thunk on the given stream. This method must be
|
||||||
// called after Initialize and can be called multiple times over Thunk's
|
// called after Initialize and can be called multiple times over Thunk's
|
||||||
// lifetime. Stream argument must be non-null.
|
// lifetime. Stream argument must be non-null.
|
||||||
|
Loading…
Reference in New Issue
Block a user