From 425d4f2089710fcc4ac15cb0dedd3eac2ab1d126 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 14 Mar 2019 10:31:22 -0700 Subject: [PATCH] [XLA:GPU] Print thunk kind in thunk schedule. Particularly helpful for distinguishing between kWhile and kFor loops. We've got this info in the backend-config, but that's not as obvious. PiperOrigin-RevId: 238470458 --- tensorflow/compiler/xla/service/gpu/BUILD | 3 +- tensorflow/compiler/xla/service/gpu/thunk.cc | 44 ++++++++++--------- tensorflow/compiler/xla/service/gpu/thunk.h | 1 + .../xla/service/gpu/thunk_schedule.cc | 28 +++++++++++- 4 files changed, 53 insertions(+), 23 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 8c761df871e..53cb8c4f49e 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -372,7 +372,7 @@ cc_library( ":hlo_execution_profiler", ":infeed_manager", ":ir_emission_utils", - ":nccl_all_reduce_thunk", + ":nccl_all_reduce_thunk", # fixdeps: keep ":outfeed_manager", ":partition_assignment", ":stream_assignment", @@ -407,6 +407,7 @@ cc_library( "//tensorflow/stream_executor", "//tensorflow/stream_executor:blas", "//tensorflow/stream_executor:device_memory", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", diff --git a/tensorflow/compiler/xla/service/gpu/thunk.cc b/tensorflow/compiler/xla/service/gpu/thunk.cc index 6b98cbb6570..f43e05904dd 100644 --- a/tensorflow/compiler/xla/service/gpu/thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/thunk.cc @@ -18,48 +18,52 @@ limitations under the License. namespace xla { namespace gpu { -std::ostream& operator<<(std::ostream& os, Thunk::Kind kind) { +absl::string_view ThunkKindToString(Thunk::Kind kind) { switch (kind) { case Thunk::kCholesky: - return os << "kCholesky"; + return "kCholesky"; case Thunk::kConditional: - return os << "kConditional"; + return "kConditional"; case Thunk::kConvolution: - return os << "kConvolution"; + return "kConvolution"; case Thunk::kCopy: - return os << "kCopy"; + return "kCopy"; case Thunk::kCudnnBatchNormBackward: - return os << "kCudnnBatchNormBackward"; + return "kCudnnBatchNormBackward"; case Thunk::kCudnnBatchNormForwardInference: - return os << "kCudnnBatchNormForwardInference"; + return "kCudnnBatchNormForwardInference"; case Thunk::kCudnnBatchNormForwardTraining: - return os << "kCudnnBatchNormForwardTraining"; + return "kCudnnBatchNormForwardTraining"; case Thunk::kNcclAllReduce: - return os << "kNcclAllReduce"; + return "kNcclAllReduce"; case Thunk::kFft: - return os << "kFft"; + return "kFft"; case Thunk::kGemm: - return os << "kGemm"; + return "kGemm"; case Thunk::kInfeed: - return os << "kInfeed"; + return "kInfeed"; case Thunk::kKernel: - return os << "kKernel"; + return "kKernel"; case Thunk::kMemset32BitValue: - return os << "kMemset32BitValue"; + return "kMemset32BitValue"; case Thunk::kMemzero: - return os << "kMemzero"; + return "kMemzero"; case Thunk::kOutfeed: - return os << "kOutfeed"; + return "kOutfeed"; case Thunk::kSequential: - return os << "kSequential"; + return "kSequential"; case Thunk::kTriangularSolve: - return os << "kTriangularSolve"; + return "kTriangularSolve"; case Thunk::kTuple: - return os << "kTuple"; + return "kTuple"; case Thunk::kWhile: - return os << "kWhile"; + return "kWhile"; } } +std::ostream& operator<<(std::ostream& os, Thunk::Kind kind) { + return os << ThunkKindToString(kind); +} + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/thunk.h b/tensorflow/compiler/xla/service/gpu/thunk.h index 442506f002c..56d1176ff4e 100644 --- a/tensorflow/compiler/xla/service/gpu/thunk.h +++ b/tensorflow/compiler/xla/service/gpu/thunk.h @@ -106,6 +106,7 @@ class Thunk { // A sequence of thunks. using ThunkSequence = std::vector>; +absl::string_view ThunkKindToString(Thunk::Kind); std::ostream& operator<<(std::ostream& os, Thunk::Kind kind); } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/thunk_schedule.cc b/tensorflow/compiler/xla/service/gpu/thunk_schedule.cc index 25bad67bab9..daa5f33e560 100644 --- a/tensorflow/compiler/xla/service/gpu/thunk_schedule.cc +++ b/tensorflow/compiler/xla/service/gpu/thunk_schedule.cc @@ -14,7 +14,10 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/gpu/thunk_schedule.h" +#include +#include "absl/algorithm/container.h" #include "absl/container/flat_hash_map.h" +#include "absl/strings/str_format.h" #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/types.h" @@ -144,11 +147,32 @@ const std::list& ThunkSchedule::DependsOn( } string ThunkSchedule::ToString() const { + if (thunk_total_order_.empty()) { + return "No thunks."; + } + + const Thunk* thunk_with_longest_kind = *absl::c_max_element( + thunk_total_order_, [](const Thunk* a, const Thunk* b) { + return ThunkKindToString(a->kind()).length() < + ThunkKindToString(b->kind()).length(); + }); + int64 max_thunk_kind_len = + ThunkKindToString(thunk_with_longest_kind->kind()).length(); + string result = "Total order:\n"; for (Thunk* thunk : thunk_total_order_) { - absl::StrAppend(&result, "\t", thunk->hlo_instruction()->ToString(), "\n"); + // Write out the thunk kind, padded out to max_thunk_kind_len. + absl::string_view kind_str = ThunkKindToString(thunk->kind()); + absl::StrAppend(&result, kind_str, + string(max_thunk_kind_len - kind_str.length(), ' '), "\t"); + if (thunk->hlo_instruction() != nullptr) { + absl::StrAppend(&result, thunk->hlo_instruction()->ToString()); + } else { + absl::StrAppend(&result, "(no HloInstruction)"); + } + absl::StrAppend(&result, "\n"); } - absl::StrAppend(&result, "Dependencies:\n"); + absl::StrAppend(&result, "\nDependencies:\n"); for (const auto& entry : depends_on_) { const Thunk* dependent = entry.first; for (const Thunk* dependency : entry.second) {