[XLA] Add a configurable limit to the number of HLO dumps

The default is unlimited. Also, make filename timestamps optional too. PiperOrigin-RevId: 293079186 Change-Id: Id1dec8816017006b2540a12f594f43af01c4de50
2020-02-03 22:29:27 -08:00 · 2020-02-03 22:29:27 -08:00 · ff3f6b6dad
commit ff3f6b6dad
parent 7a5bd40b13
8 changed files with 99 additions and 42 deletions
--- a/tensorflow/compiler/xla/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/debug_options_flags.cc
@ -39,6 +39,8 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
  opts.set_xla_gpu_cuda_data_dir("./cuda_sdk_lib");
  opts.set_xla_eliminate_hlo_implicit_broadcast(true);
  opts.set_xla_dump_hlo_as_html(false);
  opts.set_xla_dump_include_timestamp(true);
  opts.set_xla_dump_max_hlo_modules(-1);
 #ifdef INTEL_MKL
  opts.set_xla_cpu_use_mkl_dnn(true);
 #endif  // INTEL_MKL
@ -488,6 +490,17 @@ static void AllocateFlags() {
          "If specified, dumps HLO before and after optimization passes which "
          "match this regular expression, in addition to dumping at the very "
          "beginning and end of compilation."),
      tensorflow::Flag(
          "xla_dump_include_timestamp",
          bool_setter_for(&DebugOptions::set_xla_dump_include_timestamp),
          flag_values->xla_dump_include_timestamp(),
          "If specified, includes a timestamp in the dumped filenames."),
      tensorflow::Flag(
          "xla_dump_max_hlo_modules",
          int32_setter_for(&DebugOptions::set_xla_dump_max_hlo_modules),
          flag_values->xla_dump_max_hlo_modules(),
          "Max number of hlo module dumps in a directory. Set to < 0 for "
          "unbounded."),
      tensorflow::Flag(
          "xla_hlo_graph_addresses",
          bool_setter_for(&DebugOptions::set_xla_hlo_graph_addresses),
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@ -623,7 +623,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
                          absl::make_unique<SequentialHloOrdering>(schedule),
                          BufferSizeBytesFunction(), memory_alignment,
                          /*allocate_buffers_for_constants=*/true));
-  DumpHloModuleIfEnabled(*module, *assignment, "", "after_optimizations");
+  DumpHloModuleIfEnabled(*module, *assignment, "after_optimizations");
  // Each computation is a single function.  Emit all embedded computations
  // before the entry computation. The order of computations returned from
@ -821,7 +821,7 @@ CpuCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
      DumpToFileInDirOrStdout(*module, "", "buffer_assignment",
                              assignment->ToString());
    }
-    DumpHloModuleIfEnabled(*module, *assignment, "", "after_optimizations");
+    DumpHloModuleIfEnabled(*module, *assignment, "after_optimizations");
    std::unordered_map<const HloInstruction*, int64> instruction_to_profile_idx;
    std::unordered_map<const HloComputation*, int64> computation_to_profile_idx;
--- a/tensorflow/compiler/xla/service/dump.cc
+++ b/tensorflow/compiler/xla/service/dump.cc
@ -42,7 +42,9 @@ struct CanonicalDebugOptions {
        dump_as_dot(opts.xla_dump_hlo_as_dot()),
        dump_as_html(opts.xla_dump_hlo_as_html()),
        dump_as_url(opts.xla_dump_hlo_as_url()),
-        dump_snapshots(opts.xla_dump_hlo_snapshots()) {
+        dump_snapshots(opts.xla_dump_hlo_snapshots()),
        dump_include_timestamp(opts.xla_dump_include_timestamp()),
        dump_max_hlo_modules(opts.xla_dump_max_hlo_modules()) {
    // This constructor examines the values in `opts` and turns on other flags
    // based on what we think is the user's intent.  To reduce confusion about
    // what was a user-specified value versus an extrapolated value, within this
@ -135,6 +137,8 @@ struct CanonicalDebugOptions {
  bool dump_as_html;
  bool dump_as_url;
  bool dump_snapshots;
  bool dump_include_timestamp;
  int64 dump_max_hlo_modules;
 };
 void DumpToFileInDirImpl(string_view filename, string_view contents,
@ -166,6 +170,23 @@ void DumpToFileInDirImpl(string_view filename, string_view contents,
    }
  }
  // Make sure we are not going to dump more modules than the user has asked.
  if (opts.dump_max_hlo_modules > 0) {
    std::vector<string> matches;
    auto pattern = tensorflow::io::JoinPath(dir, "*module_*.0000.*");
    auto status = env->GetMatchingPaths(pattern, &matches);
    if (!status.ok()) {
      LOG(ERROR) << "Could not get matching paths for pattern " << pattern
                 << ": " << status;
    }
    if (matches.size() > opts.dump_max_hlo_modules) {
      LOG(ERROR) << "Have already dumped " << matches.size()
                 << " modules, more than the limit of "
                 << opts.dump_max_hlo_modules;
      return;
    }
  }
  string file_path =
      tensorflow::io::JoinPath(dir, SanitizeFileName(string(filename)));
  auto status = tensorflow::WriteStringToFile(env, file_path, contents);
@ -247,28 +268,44 @@ void DumpHloModuleImpl(const HloModule& module,
 static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED);
-// Maps a module's unique ID to a {counter, timestamp} indicating how many times
+// Maps a module's unique ID to a counter indicating how many times we've dumped
-// we've dumped this module during the compilation pipeline and when we first
+// this module during the compilation pipeline.  This lets us keep the filenames
-// started compiling this module.  This lets us keep the filenames ordered
+// ordered nicely.
 // nicely.
 //
 // Entries added here leak forever; we have no way to GC them when a module
 // dies.  But we only add an entry if dumping is enabled for this module, and
 // dumping a module leaks buffer space in stdout or bytes on disk *way* faster
 // than this hashtable leaks memory.
 static auto& module_id_to_step_number GUARDED_BY(mu) =
-    *new absl::flat_hash_map<int64, std::pair<int64, uint64>>();
+    *new absl::flat_hash_map<int64, int64>();
-std::pair<int64, uint64> StepNumberAndTimestampForModule(
+// Maps a module's unique ID to a timestamp indicating when we've first dumped
-    const HloModule& module) {
+// this module during the compilation pipeline and when we first started
 // compiling this module.  This lets us keep the filenames ordered nicely.
 //
 // Entries added here leak forever; we have no way to GC them when a module
 // dies.  But we only add an entry if dumping is enabled for this module, and
 // dumping a module leaks buffer space in stdout or bytes on disk *way* faster
 // than this hashtable leaks memory.
 static auto& module_id_to_timestamp GUARDED_BY(mu) =
    *new absl::flat_hash_map<int64, uint64>();
 int64 StepNumberForModule(const HloModule& module) {
  tensorflow::mutex_lock lock(mu);
-  auto result = module_id_to_step_number.try_emplace(
+  return module_id_to_step_number[module.unique_id()]++;
      module.unique_id(), 0, tensorflow::Env::Default()->NowMicros());
  return std::make_pair(result.first->second.first++,
                        result.first->second.second);
 }
 }  // namespace
 string TimestampFor(const HloModule& module) {
  if (!module.config().debug_options().xla_dump_include_timestamp()) {
    return "";
  }
  tensorflow::mutex_lock lock(mu);
  auto timestamp_emplace = module_id_to_timestamp.try_emplace(
      module.unique_id(), tensorflow::Env::Default()->NowMicros());
  return std::to_string(timestamp_emplace.first->second);
 }
 string FilenameFor(const HloModule& module, string_view prefix,
                   string_view suffix) {
  return StrFormat("%s%smodule_%04d.%s", prefix, prefix.empty() ? "" : ".",
@ -313,17 +350,17 @@ void DumpExecutionOptions(const ExecutionOptions& execution_options,
 void DumpHloModuleIfEnabled(const HloModule& module, string_view name) {
  CanonicalDebugOptions opts(module.config().debug_options());
  if (opts.should_dump_module(module.name())) {
-    DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr, "",
+    DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
-                      name, opts);
+                      TimestampFor(module), name, opts);
  }
 }
 void DumpHloModuleIfEnabled(const HloModule& module,
                            const BufferAssignment& buffer_assn,
-                            string_view prefix, string_view name) {
+                            string_view name) {
  CanonicalDebugOptions opts(module.config().debug_options());
  if (opts.should_dump_module(module.name())) {
-    DumpHloModuleImpl(module, &buffer_assn, /*profile=*/nullptr, prefix, name,
+    DumpHloModuleImpl(module, &buffer_assn, /*profile=*/nullptr,
-                      opts);
+                      TimestampFor(module), name, opts);
  }
 }
@ -332,8 +369,8 @@ void DumpHloModuleIfEnabled(const HloModule& module,
                            string_view name) {
  CanonicalDebugOptions opts(module.config().debug_options());
  if (opts.should_dump_module(module.name())) {
-    DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, &profile, "", name,
+    DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, &profile,
-                      opts);
+                      TimestampFor(module), name, opts);
  }
 }
@ -360,16 +397,14 @@ void DumpHloModuleBetweenPassesIfEnabled(string_view pipeline_name,
    return;
  }
-  int64 step_number;
+  int64 step_number = StepNumberForModule(module);
-  uint64 timestamp;
+  std::string timestamp = TimestampFor(module);
  std::tie(step_number, timestamp) = StepNumberAndTimestampForModule(module);
  string filename_prefix = std::to_string(timestamp);
  string filename_suffix =
      StrFormat("%04d.%s.after_%s.before_%s", step_number, pipeline_name,
                after_pass_name, before_pass_name);
  DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
-                    filename_prefix, filename_suffix, opts);
+                    timestamp, filename_suffix, opts);
 }
 void DumpHloModuleDuringPassIfEnabled(string_view pass_name,
@ -381,15 +416,13 @@ void DumpHloModuleDuringPassIfEnabled(string_view pass_name,
    return;
  }
-  int64 step_number;
+  int64 step_number = StepNumberForModule(module);
-  uint64 timestamp;
+  std::string timestamp = TimestampFor(module);
  std::tie(step_number, timestamp) = StepNumberAndTimestampForModule(module);
  string filename_prefix = std::to_string(timestamp);
  string filename_suffix =
      StrFormat("%04d.%s.%s", step_number, pass_name, step_name);
  DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
-                    filename_prefix, filename_suffix, opts);
+                    timestamp, filename_suffix, opts);
 }
 void DumpHloSnapshotIfEnabled(const HloModule& module,
@ -402,12 +435,12 @@ void DumpHloSnapshotIfEnabled(const HloModule& module,
  uint64 timestamp;
  {
    static auto& module_id_to_execution_count GUARDED_BY(mu) =
-        *new absl::flat_hash_map<int64, std::pair<int64, uint64>>();
+        *new absl::flat_hash_map<int64, int64>();
    tensorflow::mutex_lock lock(mu);
-    auto result = module_id_to_execution_count.try_emplace(
+    execution_count = module_id_to_execution_count[module.unique_id()]++;
-        module.unique_id(), 0, tensorflow::Env::Default()->NowMicros());
+    auto timestamp_emplace = module_id_to_timestamp.try_emplace(
-    execution_count = result.first->second.first++;
+        module.unique_id(), tensorflow::Env::Default()->NowMicros());
-    timestamp = result.first->second.second;
+    timestamp = timestamp_emplace.first->second;
  }
  string filename =
      StrCat(FilenameFor(module, std::to_string(timestamp),
--- a/tensorflow/compiler/xla/service/dump.h
+++ b/tensorflow/compiler/xla/service/dump.h
@ -33,6 +33,10 @@ class BufferAssignment;
 class HloExecutionProfile;
 class HloSnapshot;
 // Get a timestamp which we can use as a filename prefix specific to this
 // module.
 string TimestampFor(const HloModule& module);
 // Create the filename we will use to dump in DumpToFileInDir.
 string FilenameFor(const HloModule& module, absl::string_view prefix,
                   absl::string_view suffix);
@ -65,7 +69,7 @@ void DumpExecutionOptions(const ExecutionOptions& execution_options,
 void DumpHloModuleIfEnabled(const HloModule& module, absl::string_view name);
 void DumpHloModuleIfEnabled(const HloModule& module,
                            const BufferAssignment& buffer_assn,
-                            absl::string_view prefix, absl::string_view name);
+                            absl::string_view name);
 void DumpHloModuleIfEnabled(const HloModule& module,
                            const HloExecutionProfile& profile,
                            absl::string_view name);
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@ -374,8 +374,7 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
          /*allocate_buffers_for_constants=*/true,
          /*colorer=*/BufferAssigner::DefaultColorer(),
          /*must_not_live_out=*/{}, GetCanShareBuffer()));
-  DumpHloModuleIfEnabled(*module, *buffer_assignment, "",
+  DumpHloModuleIfEnabled(*module, *buffer_assignment, "after_optimizations");
                         "after_optimizations");
  IrEmitterContext ir_emitter_context(
      module.get(), buffer_assignment.get(), stream_exec->platform(),
--- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc
@ -478,8 +478,7 @@ StatusOr<std::unique_ptr<Executable>> MlirCompiler::RunBackend(
                          /*allocate_buffers_for_constants=*/true,
                          /*colorer=*/BufferAssigner::DefaultColorer(),
                          /*must_not_live_out=*/{}, &CanShareBufferHint));
-  DumpHloModuleIfEnabled(*module, *buffer_assignment, "",
+  DumpHloModuleIfEnabled(*module, *buffer_assignment, "after_optimizations");
                         "after_optimizations");
  EmissionContext emission_context(std::move(module));
  if (error_handler_) {
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto
@ -240,6 +240,12 @@ message DebugOptions {
  // directory.
  bool xla_dump_hlo_snapshots = 118;
  // Include a timestamp in the dumped filenames.
  bool xla_dump_include_timestamp = 131;
  // Max number of hlo module dumps in a directory. Set to < 0 for unbounded.
  int32 xla_dump_max_hlo_modules = 132;
  //
  // END flags controlling dumping HLO modules.
  //
@ -254,7 +260,7 @@ message DebugOptions {
  // Guarantee run-to-run determinism from reductions on XLA:GPU.
  bool xla_gpu_deterministic_reductions = 130;
-  // Next id: 131
+  // Next id: 133
  // Extra options to pass to the compilation backend (e.g. LLVM); specific
  // interpretation of these values is left to the backend.
--- a/tensorflow/compiler/xrt/xrt_util.cc
+++ b/tensorflow/compiler/xrt/xrt_util.cc
@ -173,6 +173,9 @@ xla::DebugOptions BuildXlaDebugOptions(const xla::DebugOptions& ref_options) {
  options.set_xla_dump_hlo_as_text(ref_options.xla_dump_hlo_as_text());
  options.set_xla_dump_hlo_snapshots(ref_options.xla_dump_hlo_snapshots());
  options.set_xla_dump_hlo_pass_re(ref_options.xla_dump_hlo_pass_re());
  options.set_xla_dump_include_timestamp(
      ref_options.xla_dump_include_timestamp());
  options.set_xla_dump_max_hlo_modules(ref_options.xla_dump_max_hlo_modules());
  for (auto& pass : ref_options.xla_disable_hlo_passes()) {
    options.add_xla_disable_hlo_passes(pass);
  }