[XLA/GPU] Move Thunk::Initialize() calls before ExecuteThunks().

Separating the initialization from execution makes the execution easier for LHLO graph to replace. PiperOrigin-RevId: 313659758 Change-Id: I30f47cf6186ee907bfc67701be2d8c190f3b524e
2020-05-28 14:26:22 -07:00 · 2020-05-28 14:26:22 -07:00 · 68e13f00e1
commit 68e13f00e1
parent 8ab1c251ed
1 changed files with 4 additions and 1 deletions
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@ -176,7 +176,6 @@ Status GpuExecutable::ExecuteThunks(
    // module, we won't get any data, but that's probably an OK trade-off.
    ScopedAnnotation annotation([&] { return thunk->profile_annotation(); });

-    TF_RETURN_IF_ERROR(thunk->Initialize(*this, executor));
    int32 stream_no =
        thunk_schedule_->StreamNumberForHlo(*thunk->hlo_instruction());
    se::Stream* stream =
@ -387,6 +386,10 @@ StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStream(
            assignment_.get(), executor->device_ordinal(), memory_allocator));
  }

+  for (Thunk* thunk : thunk_schedule_->TotalOrder()) {
+    TF_RETURN_IF_ERROR(thunk->Initialize(*this, executor));
+  }
+
  TF_RETURN_IF_ERROR(ExecuteThunks(run_options, *buffer_allocations,
                                   block_host_until_done,
                                   hlo_execution_profile));