[XLA] Add a configurable limit to the number of HLO dumps

The default is unlimited. Also, make filename timestamps optional too.

PiperOrigin-RevId: 293079186
Change-Id: Id1dec8816017006b2540a12f594f43af01c4de50
This commit is contained in:
David Majnemer 2020-02-03 22:29:27 -08:00 committed by TensorFlower Gardener
parent 7a5bd40b13
commit ff3f6b6dad
8 changed files with 99 additions and 42 deletions

View File

@ -39,6 +39,8 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
opts.set_xla_gpu_cuda_data_dir("./cuda_sdk_lib"); opts.set_xla_gpu_cuda_data_dir("./cuda_sdk_lib");
opts.set_xla_eliminate_hlo_implicit_broadcast(true); opts.set_xla_eliminate_hlo_implicit_broadcast(true);
opts.set_xla_dump_hlo_as_html(false); opts.set_xla_dump_hlo_as_html(false);
opts.set_xla_dump_include_timestamp(true);
opts.set_xla_dump_max_hlo_modules(-1);
#ifdef INTEL_MKL #ifdef INTEL_MKL
opts.set_xla_cpu_use_mkl_dnn(true); opts.set_xla_cpu_use_mkl_dnn(true);
#endif // INTEL_MKL #endif // INTEL_MKL
@ -488,6 +490,17 @@ static void AllocateFlags() {
"If specified, dumps HLO before and after optimization passes which " "If specified, dumps HLO before and after optimization passes which "
"match this regular expression, in addition to dumping at the very " "match this regular expression, in addition to dumping at the very "
"beginning and end of compilation."), "beginning and end of compilation."),
tensorflow::Flag(
"xla_dump_include_timestamp",
bool_setter_for(&DebugOptions::set_xla_dump_include_timestamp),
flag_values->xla_dump_include_timestamp(),
"If specified, includes a timestamp in the dumped filenames."),
tensorflow::Flag(
"xla_dump_max_hlo_modules",
int32_setter_for(&DebugOptions::set_xla_dump_max_hlo_modules),
flag_values->xla_dump_max_hlo_modules(),
"Max number of hlo module dumps in a directory. Set to < 0 for "
"unbounded."),
tensorflow::Flag( tensorflow::Flag(
"xla_hlo_graph_addresses", "xla_hlo_graph_addresses",
bool_setter_for(&DebugOptions::set_xla_hlo_graph_addresses), bool_setter_for(&DebugOptions::set_xla_hlo_graph_addresses),

View File

@ -623,7 +623,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
absl::make_unique<SequentialHloOrdering>(schedule), absl::make_unique<SequentialHloOrdering>(schedule),
BufferSizeBytesFunction(), memory_alignment, BufferSizeBytesFunction(), memory_alignment,
/*allocate_buffers_for_constants=*/true)); /*allocate_buffers_for_constants=*/true));
DumpHloModuleIfEnabled(*module, *assignment, "", "after_optimizations"); DumpHloModuleIfEnabled(*module, *assignment, "after_optimizations");
// Each computation is a single function. Emit all embedded computations // Each computation is a single function. Emit all embedded computations
// before the entry computation. The order of computations returned from // before the entry computation. The order of computations returned from
@ -821,7 +821,7 @@ CpuCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
DumpToFileInDirOrStdout(*module, "", "buffer_assignment", DumpToFileInDirOrStdout(*module, "", "buffer_assignment",
assignment->ToString()); assignment->ToString());
} }
DumpHloModuleIfEnabled(*module, *assignment, "", "after_optimizations"); DumpHloModuleIfEnabled(*module, *assignment, "after_optimizations");
std::unordered_map<const HloInstruction*, int64> instruction_to_profile_idx; std::unordered_map<const HloInstruction*, int64> instruction_to_profile_idx;
std::unordered_map<const HloComputation*, int64> computation_to_profile_idx; std::unordered_map<const HloComputation*, int64> computation_to_profile_idx;

View File

@ -42,7 +42,9 @@ struct CanonicalDebugOptions {
dump_as_dot(opts.xla_dump_hlo_as_dot()), dump_as_dot(opts.xla_dump_hlo_as_dot()),
dump_as_html(opts.xla_dump_hlo_as_html()), dump_as_html(opts.xla_dump_hlo_as_html()),
dump_as_url(opts.xla_dump_hlo_as_url()), dump_as_url(opts.xla_dump_hlo_as_url()),
dump_snapshots(opts.xla_dump_hlo_snapshots()) { dump_snapshots(opts.xla_dump_hlo_snapshots()),
dump_include_timestamp(opts.xla_dump_include_timestamp()),
dump_max_hlo_modules(opts.xla_dump_max_hlo_modules()) {
// This constructor examines the values in `opts` and turns on other flags // This constructor examines the values in `opts` and turns on other flags
// based on what we think is the user's intent. To reduce confusion about // based on what we think is the user's intent. To reduce confusion about
// what was a user-specified value versus an extrapolated value, within this // what was a user-specified value versus an extrapolated value, within this
@ -135,6 +137,8 @@ struct CanonicalDebugOptions {
bool dump_as_html; bool dump_as_html;
bool dump_as_url; bool dump_as_url;
bool dump_snapshots; bool dump_snapshots;
bool dump_include_timestamp;
int64 dump_max_hlo_modules;
}; };
void DumpToFileInDirImpl(string_view filename, string_view contents, void DumpToFileInDirImpl(string_view filename, string_view contents,
@ -166,6 +170,23 @@ void DumpToFileInDirImpl(string_view filename, string_view contents,
} }
} }
// Make sure we are not going to dump more modules than the user has asked.
if (opts.dump_max_hlo_modules > 0) {
std::vector<string> matches;
auto pattern = tensorflow::io::JoinPath(dir, "*module_*.0000.*");
auto status = env->GetMatchingPaths(pattern, &matches);
if (!status.ok()) {
LOG(ERROR) << "Could not get matching paths for pattern " << pattern
<< ": " << status;
}
if (matches.size() > opts.dump_max_hlo_modules) {
LOG(ERROR) << "Have already dumped " << matches.size()
<< " modules, more than the limit of "
<< opts.dump_max_hlo_modules;
return;
}
}
string file_path = string file_path =
tensorflow::io::JoinPath(dir, SanitizeFileName(string(filename))); tensorflow::io::JoinPath(dir, SanitizeFileName(string(filename)));
auto status = tensorflow::WriteStringToFile(env, file_path, contents); auto status = tensorflow::WriteStringToFile(env, file_path, contents);
@ -247,28 +268,44 @@ void DumpHloModuleImpl(const HloModule& module,
static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED); static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED);
// Maps a module's unique ID to a {counter, timestamp} indicating how many times // Maps a module's unique ID to a counter indicating how many times we've dumped
// we've dumped this module during the compilation pipeline and when we first // this module during the compilation pipeline. This lets us keep the filenames
// started compiling this module. This lets us keep the filenames ordered // ordered nicely.
// nicely.
// //
// Entries added here leak forever; we have no way to GC them when a module // Entries added here leak forever; we have no way to GC them when a module
// dies. But we only add an entry if dumping is enabled for this module, and // dies. But we only add an entry if dumping is enabled for this module, and
// dumping a module leaks buffer space in stdout or bytes on disk *way* faster // dumping a module leaks buffer space in stdout or bytes on disk *way* faster
// than this hashtable leaks memory. // than this hashtable leaks memory.
static auto& module_id_to_step_number GUARDED_BY(mu) = static auto& module_id_to_step_number GUARDED_BY(mu) =
*new absl::flat_hash_map<int64, std::pair<int64, uint64>>(); *new absl::flat_hash_map<int64, int64>();
std::pair<int64, uint64> StepNumberAndTimestampForModule( // Maps a module's unique ID to a timestamp indicating when we've first dumped
const HloModule& module) { // this module during the compilation pipeline and when we first started
// compiling this module. This lets us keep the filenames ordered nicely.
//
// Entries added here leak forever; we have no way to GC them when a module
// dies. But we only add an entry if dumping is enabled for this module, and
// dumping a module leaks buffer space in stdout or bytes on disk *way* faster
// than this hashtable leaks memory.
static auto& module_id_to_timestamp GUARDED_BY(mu) =
*new absl::flat_hash_map<int64, uint64>();
int64 StepNumberForModule(const HloModule& module) {
tensorflow::mutex_lock lock(mu); tensorflow::mutex_lock lock(mu);
auto result = module_id_to_step_number.try_emplace( return module_id_to_step_number[module.unique_id()]++;
module.unique_id(), 0, tensorflow::Env::Default()->NowMicros());
return std::make_pair(result.first->second.first++,
result.first->second.second);
} }
} // namespace } // namespace
string TimestampFor(const HloModule& module) {
if (!module.config().debug_options().xla_dump_include_timestamp()) {
return "";
}
tensorflow::mutex_lock lock(mu);
auto timestamp_emplace = module_id_to_timestamp.try_emplace(
module.unique_id(), tensorflow::Env::Default()->NowMicros());
return std::to_string(timestamp_emplace.first->second);
}
string FilenameFor(const HloModule& module, string_view prefix, string FilenameFor(const HloModule& module, string_view prefix,
string_view suffix) { string_view suffix) {
return StrFormat("%s%smodule_%04d.%s", prefix, prefix.empty() ? "" : ".", return StrFormat("%s%smodule_%04d.%s", prefix, prefix.empty() ? "" : ".",
@ -313,17 +350,17 @@ void DumpExecutionOptions(const ExecutionOptions& execution_options,
void DumpHloModuleIfEnabled(const HloModule& module, string_view name) { void DumpHloModuleIfEnabled(const HloModule& module, string_view name) {
CanonicalDebugOptions opts(module.config().debug_options()); CanonicalDebugOptions opts(module.config().debug_options());
if (opts.should_dump_module(module.name())) { if (opts.should_dump_module(module.name())) {
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr, "", DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
name, opts); TimestampFor(module), name, opts);
} }
} }
void DumpHloModuleIfEnabled(const HloModule& module, void DumpHloModuleIfEnabled(const HloModule& module,
const BufferAssignment& buffer_assn, const BufferAssignment& buffer_assn,
string_view prefix, string_view name) { string_view name) {
CanonicalDebugOptions opts(module.config().debug_options()); CanonicalDebugOptions opts(module.config().debug_options());
if (opts.should_dump_module(module.name())) { if (opts.should_dump_module(module.name())) {
DumpHloModuleImpl(module, &buffer_assn, /*profile=*/nullptr, prefix, name, DumpHloModuleImpl(module, &buffer_assn, /*profile=*/nullptr,
opts); TimestampFor(module), name, opts);
} }
} }
@ -332,8 +369,8 @@ void DumpHloModuleIfEnabled(const HloModule& module,
string_view name) { string_view name) {
CanonicalDebugOptions opts(module.config().debug_options()); CanonicalDebugOptions opts(module.config().debug_options());
if (opts.should_dump_module(module.name())) { if (opts.should_dump_module(module.name())) {
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, &profile, "", name, DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, &profile,
opts); TimestampFor(module), name, opts);
} }
} }
@ -360,16 +397,14 @@ void DumpHloModuleBetweenPassesIfEnabled(string_view pipeline_name,
return; return;
} }
int64 step_number; int64 step_number = StepNumberForModule(module);
uint64 timestamp; std::string timestamp = TimestampFor(module);
std::tie(step_number, timestamp) = StepNumberAndTimestampForModule(module);
string filename_prefix = std::to_string(timestamp);
string filename_suffix = string filename_suffix =
StrFormat("%04d.%s.after_%s.before_%s", step_number, pipeline_name, StrFormat("%04d.%s.after_%s.before_%s", step_number, pipeline_name,
after_pass_name, before_pass_name); after_pass_name, before_pass_name);
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr, DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
filename_prefix, filename_suffix, opts); timestamp, filename_suffix, opts);
} }
void DumpHloModuleDuringPassIfEnabled(string_view pass_name, void DumpHloModuleDuringPassIfEnabled(string_view pass_name,
@ -381,15 +416,13 @@ void DumpHloModuleDuringPassIfEnabled(string_view pass_name,
return; return;
} }
int64 step_number; int64 step_number = StepNumberForModule(module);
uint64 timestamp; std::string timestamp = TimestampFor(module);
std::tie(step_number, timestamp) = StepNumberAndTimestampForModule(module);
string filename_prefix = std::to_string(timestamp);
string filename_suffix = string filename_suffix =
StrFormat("%04d.%s.%s", step_number, pass_name, step_name); StrFormat("%04d.%s.%s", step_number, pass_name, step_name);
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr, DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
filename_prefix, filename_suffix, opts); timestamp, filename_suffix, opts);
} }
void DumpHloSnapshotIfEnabled(const HloModule& module, void DumpHloSnapshotIfEnabled(const HloModule& module,
@ -402,12 +435,12 @@ void DumpHloSnapshotIfEnabled(const HloModule& module,
uint64 timestamp; uint64 timestamp;
{ {
static auto& module_id_to_execution_count GUARDED_BY(mu) = static auto& module_id_to_execution_count GUARDED_BY(mu) =
*new absl::flat_hash_map<int64, std::pair<int64, uint64>>(); *new absl::flat_hash_map<int64, int64>();
tensorflow::mutex_lock lock(mu); tensorflow::mutex_lock lock(mu);
auto result = module_id_to_execution_count.try_emplace( execution_count = module_id_to_execution_count[module.unique_id()]++;
module.unique_id(), 0, tensorflow::Env::Default()->NowMicros()); auto timestamp_emplace = module_id_to_timestamp.try_emplace(
execution_count = result.first->second.first++; module.unique_id(), tensorflow::Env::Default()->NowMicros());
timestamp = result.first->second.second; timestamp = timestamp_emplace.first->second;
} }
string filename = string filename =
StrCat(FilenameFor(module, std::to_string(timestamp), StrCat(FilenameFor(module, std::to_string(timestamp),

View File

@ -33,6 +33,10 @@ class BufferAssignment;
class HloExecutionProfile; class HloExecutionProfile;
class HloSnapshot; class HloSnapshot;
// Get a timestamp which we can use as a filename prefix specific to this
// module.
string TimestampFor(const HloModule& module);
// Create the filename we will use to dump in DumpToFileInDir. // Create the filename we will use to dump in DumpToFileInDir.
string FilenameFor(const HloModule& module, absl::string_view prefix, string FilenameFor(const HloModule& module, absl::string_view prefix,
absl::string_view suffix); absl::string_view suffix);
@ -65,7 +69,7 @@ void DumpExecutionOptions(const ExecutionOptions& execution_options,
void DumpHloModuleIfEnabled(const HloModule& module, absl::string_view name); void DumpHloModuleIfEnabled(const HloModule& module, absl::string_view name);
void DumpHloModuleIfEnabled(const HloModule& module, void DumpHloModuleIfEnabled(const HloModule& module,
const BufferAssignment& buffer_assn, const BufferAssignment& buffer_assn,
absl::string_view prefix, absl::string_view name); absl::string_view name);
void DumpHloModuleIfEnabled(const HloModule& module, void DumpHloModuleIfEnabled(const HloModule& module,
const HloExecutionProfile& profile, const HloExecutionProfile& profile,
absl::string_view name); absl::string_view name);

View File

@ -374,8 +374,7 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
/*allocate_buffers_for_constants=*/true, /*allocate_buffers_for_constants=*/true,
/*colorer=*/BufferAssigner::DefaultColorer(), /*colorer=*/BufferAssigner::DefaultColorer(),
/*must_not_live_out=*/{}, GetCanShareBuffer())); /*must_not_live_out=*/{}, GetCanShareBuffer()));
DumpHloModuleIfEnabled(*module, *buffer_assignment, "", DumpHloModuleIfEnabled(*module, *buffer_assignment, "after_optimizations");
"after_optimizations");
IrEmitterContext ir_emitter_context( IrEmitterContext ir_emitter_context(
module.get(), buffer_assignment.get(), stream_exec->platform(), module.get(), buffer_assignment.get(), stream_exec->platform(),

View File

@ -478,8 +478,7 @@ StatusOr<std::unique_ptr<Executable>> MlirCompiler::RunBackend(
/*allocate_buffers_for_constants=*/true, /*allocate_buffers_for_constants=*/true,
/*colorer=*/BufferAssigner::DefaultColorer(), /*colorer=*/BufferAssigner::DefaultColorer(),
/*must_not_live_out=*/{}, &CanShareBufferHint)); /*must_not_live_out=*/{}, &CanShareBufferHint));
DumpHloModuleIfEnabled(*module, *buffer_assignment, "", DumpHloModuleIfEnabled(*module, *buffer_assignment, "after_optimizations");
"after_optimizations");
EmissionContext emission_context(std::move(module)); EmissionContext emission_context(std::move(module));
if (error_handler_) { if (error_handler_) {

View File

@ -240,6 +240,12 @@ message DebugOptions {
// directory. // directory.
bool xla_dump_hlo_snapshots = 118; bool xla_dump_hlo_snapshots = 118;
// Include a timestamp in the dumped filenames.
bool xla_dump_include_timestamp = 131;
// Max number of hlo module dumps in a directory. Set to < 0 for unbounded.
int32 xla_dump_max_hlo_modules = 132;
// //
// END flags controlling dumping HLO modules. // END flags controlling dumping HLO modules.
// //
@ -254,7 +260,7 @@ message DebugOptions {
// Guarantee run-to-run determinism from reductions on XLA:GPU. // Guarantee run-to-run determinism from reductions on XLA:GPU.
bool xla_gpu_deterministic_reductions = 130; bool xla_gpu_deterministic_reductions = 130;
// Next id: 131 // Next id: 133
// Extra options to pass to the compilation backend (e.g. LLVM); specific // Extra options to pass to the compilation backend (e.g. LLVM); specific
// interpretation of these values is left to the backend. // interpretation of these values is left to the backend.

View File

@ -173,6 +173,9 @@ xla::DebugOptions BuildXlaDebugOptions(const xla::DebugOptions& ref_options) {
options.set_xla_dump_hlo_as_text(ref_options.xla_dump_hlo_as_text()); options.set_xla_dump_hlo_as_text(ref_options.xla_dump_hlo_as_text());
options.set_xla_dump_hlo_snapshots(ref_options.xla_dump_hlo_snapshots()); options.set_xla_dump_hlo_snapshots(ref_options.xla_dump_hlo_snapshots());
options.set_xla_dump_hlo_pass_re(ref_options.xla_dump_hlo_pass_re()); options.set_xla_dump_hlo_pass_re(ref_options.xla_dump_hlo_pass_re());
options.set_xla_dump_include_timestamp(
ref_options.xla_dump_include_timestamp());
options.set_xla_dump_max_hlo_modules(ref_options.xla_dump_max_hlo_modules());
for (auto& pass : ref_options.xla_disable_hlo_passes()) { for (auto& pass : ref_options.xla_disable_hlo_passes()) {
options.add_xla_disable_hlo_passes(pass); options.add_xla_disable_hlo_passes(pass);
} }