[XLA/GPU] Move Thunk::Initialize() calls before ExecuteThunks().

Separating the initialization from execution makes the execution easier for LHLO graph to replace.

PiperOrigin-RevId: 313659758
Change-Id: I30f47cf6186ee907bfc67701be2d8c190f3b524e
This commit is contained in:
Tim Shen 2020-05-28 14:26:22 -07:00 committed by TensorFlower Gardener
parent 8ab1c251ed
commit 68e13f00e1

View File

@ -176,7 +176,6 @@ Status GpuExecutable::ExecuteThunks(
// module, we won't get any data, but that's probably an OK trade-off.
ScopedAnnotation annotation([&] { return thunk->profile_annotation(); });
TF_RETURN_IF_ERROR(thunk->Initialize(*this, executor));
int32 stream_no =
thunk_schedule_->StreamNumberForHlo(*thunk->hlo_instruction());
se::Stream* stream =
@ -387,6 +386,10 @@ StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStream(
assignment_.get(), executor->device_ordinal(), memory_allocator));
}
for (Thunk* thunk : thunk_schedule_->TotalOrder()) {
TF_RETURN_IF_ERROR(thunk->Initialize(*this, executor));
}
TF_RETURN_IF_ERROR(ExecuteThunks(run_options, *buffer_allocations,
block_host_until_done,
hlo_execution_profile));