From 425d4f2089710fcc4ac15cb0dedd3eac2ab1d126 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Thu, 14 Mar 2019 10:31:22 -0700
Subject: [PATCH] [XLA:GPU] Print thunk kind in thunk schedule.

Particularly helpful for distinguishing between kWhile and kFor loops.  We've
got this info in the backend-config, but that's not as obvious.

PiperOrigin-RevId: 238470458
---
 tensorflow/compiler/xla/service/gpu/BUILD     |  3 +-
 tensorflow/compiler/xla/service/gpu/thunk.cc  | 44 ++++++++++---------
 tensorflow/compiler/xla/service/gpu/thunk.h   |  1 +
 .../xla/service/gpu/thunk_schedule.cc         | 28 +++++++++++-
 4 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 8c761df871e..53cb8c4f49e 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -372,7 +372,7 @@ cc_library(
         ":hlo_execution_profiler",
         ":infeed_manager",
         ":ir_emission_utils",
-        ":nccl_all_reduce_thunk",
+        ":nccl_all_reduce_thunk",  # fixdeps: keep
         ":outfeed_manager",
         ":partition_assignment",
         ":stream_assignment",
@@ -407,6 +407,7 @@ cc_library(
         "//tensorflow/stream_executor",
         "//tensorflow/stream_executor:blas",
         "//tensorflow/stream_executor:device_memory",
+        "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:flat_hash_set",
diff --git a/tensorflow/compiler/xla/service/gpu/thunk.cc b/tensorflow/compiler/xla/service/gpu/thunk.cc
index 6b98cbb6570..f43e05904dd 100644
--- a/tensorflow/compiler/xla/service/gpu/thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/thunk.cc
@@ -18,48 +18,52 @@ limitations under the License.
 namespace xla {
 namespace gpu {
 
-std::ostream& operator<<(std::ostream& os, Thunk::Kind kind) {
+absl::string_view ThunkKindToString(Thunk::Kind kind) {
   switch (kind) {
     case Thunk::kCholesky:
-      return os << "kCholesky";
+      return "kCholesky";
     case Thunk::kConditional:
-      return os << "kConditional";
+      return "kConditional";
     case Thunk::kConvolution:
-      return os << "kConvolution";
+      return "kConvolution";
     case Thunk::kCopy:
-      return os << "kCopy";
+      return "kCopy";
     case Thunk::kCudnnBatchNormBackward:
-      return os << "kCudnnBatchNormBackward";
+      return "kCudnnBatchNormBackward";
     case Thunk::kCudnnBatchNormForwardInference:
-      return os << "kCudnnBatchNormForwardInference";
+      return "kCudnnBatchNormForwardInference";
     case Thunk::kCudnnBatchNormForwardTraining:
-      return os << "kCudnnBatchNormForwardTraining";
+      return "kCudnnBatchNormForwardTraining";
     case Thunk::kNcclAllReduce:
-      return os << "kNcclAllReduce";
+      return "kNcclAllReduce";
     case Thunk::kFft:
-      return os << "kFft";
+      return "kFft";
     case Thunk::kGemm:
-      return os << "kGemm";
+      return "kGemm";
     case Thunk::kInfeed:
-      return os << "kInfeed";
+      return "kInfeed";
     case Thunk::kKernel:
-      return os << "kKernel";
+      return "kKernel";
     case Thunk::kMemset32BitValue:
-      return os << "kMemset32BitValue";
+      return "kMemset32BitValue";
     case Thunk::kMemzero:
-      return os << "kMemzero";
+      return "kMemzero";
     case Thunk::kOutfeed:
-      return os << "kOutfeed";
+      return "kOutfeed";
     case Thunk::kSequential:
-      return os << "kSequential";
+      return "kSequential";
     case Thunk::kTriangularSolve:
-      return os << "kTriangularSolve";
+      return "kTriangularSolve";
     case Thunk::kTuple:
-      return os << "kTuple";
+      return "kTuple";
     case Thunk::kWhile:
-      return os << "kWhile";
+      return "kWhile";
   }
 }
 
+std::ostream& operator<<(std::ostream& os, Thunk::Kind kind) {
+  return os << ThunkKindToString(kind);
+}
+
 }  // namespace gpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/thunk.h b/tensorflow/compiler/xla/service/gpu/thunk.h
index 442506f002c..56d1176ff4e 100644
--- a/tensorflow/compiler/xla/service/gpu/thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/thunk.h
@@ -106,6 +106,7 @@ class Thunk {
 // A sequence of thunks.
 using ThunkSequence = std::vector<std::unique_ptr<Thunk>>;
 
+absl::string_view ThunkKindToString(Thunk::Kind);
 std::ostream& operator<<(std::ostream& os, Thunk::Kind kind);
 
 }  // namespace gpu
diff --git a/tensorflow/compiler/xla/service/gpu/thunk_schedule.cc b/tensorflow/compiler/xla/service/gpu/thunk_schedule.cc
index 25bad67bab9..daa5f33e560 100644
--- a/tensorflow/compiler/xla/service/gpu/thunk_schedule.cc
+++ b/tensorflow/compiler/xla/service/gpu/thunk_schedule.cc
@@ -14,7 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/service/gpu/thunk_schedule.h"
+#include <algorithm>
+#include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/strings/str_format.h"
 #include "tensorflow/compiler/xla/array2d.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -144,11 +147,32 @@ const std::list<const Thunk*>& ThunkSchedule::DependsOn(
 }
 
 string ThunkSchedule::ToString() const {
+  if (thunk_total_order_.empty()) {
+    return "No thunks.";
+  }
+
+  const Thunk* thunk_with_longest_kind = *absl::c_max_element(
+      thunk_total_order_, [](const Thunk* a, const Thunk* b) {
+        return ThunkKindToString(a->kind()).length() <
+               ThunkKindToString(b->kind()).length();
+      });
+  int64 max_thunk_kind_len =
+      ThunkKindToString(thunk_with_longest_kind->kind()).length();
+
   string result = "Total order:\n";
   for (Thunk* thunk : thunk_total_order_) {
-    absl::StrAppend(&result, "\t", thunk->hlo_instruction()->ToString(), "\n");
+    // Write out the thunk kind, padded out to max_thunk_kind_len.
+    absl::string_view kind_str = ThunkKindToString(thunk->kind());
+    absl::StrAppend(&result, kind_str,
+                    string(max_thunk_kind_len - kind_str.length(), ' '), "\t");
+    if (thunk->hlo_instruction() != nullptr) {
+      absl::StrAppend(&result, thunk->hlo_instruction()->ToString());
+    } else {
+      absl::StrAppend(&result, "(no HloInstruction)");
+    }
+    absl::StrAppend(&result, "\n");
   }
-  absl::StrAppend(&result, "Dependencies:\n");
+  absl::StrAppend(&result, "\nDependencies:\n");
   for (const auto& entry : depends_on_) {
     const Thunk* dependent = entry.first;
     for (const Thunk* dependency : entry.second) {