[XLA] Add a configurable limit to the number of HLO dumps

The default is unlimited. Also, make filename timestamps optional too.

PiperOrigin-RevId: 293079186
Change-Id: Id1dec8816017006b2540a12f594f43af01c4de50
This commit is contained in:
David Majnemer 2020-02-03 22:29:27 -08:00 committed by TensorFlower Gardener
parent 7a5bd40b13
commit ff3f6b6dad
8 changed files with 99 additions and 42 deletions

View File

@ -39,6 +39,8 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
opts.set_xla_gpu_cuda_data_dir("./cuda_sdk_lib");
opts.set_xla_eliminate_hlo_implicit_broadcast(true);
opts.set_xla_dump_hlo_as_html(false);
opts.set_xla_dump_include_timestamp(true);
opts.set_xla_dump_max_hlo_modules(-1);
#ifdef INTEL_MKL
opts.set_xla_cpu_use_mkl_dnn(true);
#endif // INTEL_MKL
@ -488,6 +490,17 @@ static void AllocateFlags() {
"If specified, dumps HLO before and after optimization passes which "
"match this regular expression, in addition to dumping at the very "
"beginning and end of compilation."),
tensorflow::Flag(
"xla_dump_include_timestamp",
bool_setter_for(&DebugOptions::set_xla_dump_include_timestamp),
flag_values->xla_dump_include_timestamp(),
"If specified, includes a timestamp in the dumped filenames."),
tensorflow::Flag(
"xla_dump_max_hlo_modules",
int32_setter_for(&DebugOptions::set_xla_dump_max_hlo_modules),
flag_values->xla_dump_max_hlo_modules(),
"Max number of hlo module dumps in a directory. Set to < 0 for "
"unbounded."),
tensorflow::Flag(
"xla_hlo_graph_addresses",
bool_setter_for(&DebugOptions::set_xla_hlo_graph_addresses),

View File

@ -623,7 +623,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
absl::make_unique<SequentialHloOrdering>(schedule),
BufferSizeBytesFunction(), memory_alignment,
/*allocate_buffers_for_constants=*/true));
DumpHloModuleIfEnabled(*module, *assignment, "", "after_optimizations");
DumpHloModuleIfEnabled(*module, *assignment, "after_optimizations");
// Each computation is a single function. Emit all embedded computations
// before the entry computation. The order of computations returned from
@ -821,7 +821,7 @@ CpuCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
DumpToFileInDirOrStdout(*module, "", "buffer_assignment",
assignment->ToString());
}
DumpHloModuleIfEnabled(*module, *assignment, "", "after_optimizations");
DumpHloModuleIfEnabled(*module, *assignment, "after_optimizations");
std::unordered_map<const HloInstruction*, int64> instruction_to_profile_idx;
std::unordered_map<const HloComputation*, int64> computation_to_profile_idx;

View File

@ -42,7 +42,9 @@ struct CanonicalDebugOptions {
dump_as_dot(opts.xla_dump_hlo_as_dot()),
dump_as_html(opts.xla_dump_hlo_as_html()),
dump_as_url(opts.xla_dump_hlo_as_url()),
dump_snapshots(opts.xla_dump_hlo_snapshots()) {
dump_snapshots(opts.xla_dump_hlo_snapshots()),
dump_include_timestamp(opts.xla_dump_include_timestamp()),
dump_max_hlo_modules(opts.xla_dump_max_hlo_modules()) {
// This constructor examines the values in `opts` and turns on other flags
// based on what we think is the user's intent. To reduce confusion about
// what was a user-specified value versus an extrapolated value, within this
@ -135,6 +137,8 @@ struct CanonicalDebugOptions {
bool dump_as_html;
bool dump_as_url;
bool dump_snapshots;
bool dump_include_timestamp;
int64 dump_max_hlo_modules;
};
void DumpToFileInDirImpl(string_view filename, string_view contents,
@ -166,6 +170,23 @@ void DumpToFileInDirImpl(string_view filename, string_view contents,
}
}
// Make sure we are not going to dump more modules than the user has asked.
if (opts.dump_max_hlo_modules > 0) {
std::vector<string> matches;
auto pattern = tensorflow::io::JoinPath(dir, "*module_*.0000.*");
auto status = env->GetMatchingPaths(pattern, &matches);
if (!status.ok()) {
LOG(ERROR) << "Could not get matching paths for pattern " << pattern
<< ": " << status;
}
if (matches.size() > opts.dump_max_hlo_modules) {
LOG(ERROR) << "Have already dumped " << matches.size()
<< " modules, more than the limit of "
<< opts.dump_max_hlo_modules;
return;
}
}
string file_path =
tensorflow::io::JoinPath(dir, SanitizeFileName(string(filename)));
auto status = tensorflow::WriteStringToFile(env, file_path, contents);
@ -247,28 +268,44 @@ void DumpHloModuleImpl(const HloModule& module,
static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED);
// Maps a module's unique ID to a {counter, timestamp} indicating how many times
// we've dumped this module during the compilation pipeline and when we first
// started compiling this module. This lets us keep the filenames ordered
// nicely.
// Maps a module's unique ID to a counter indicating how many times we've dumped
// this module during the compilation pipeline. This lets us keep the filenames
// ordered nicely.
//
// Entries added here leak forever; we have no way to GC them when a module
// dies. But we only add an entry if dumping is enabled for this module, and
// dumping a module leaks buffer space in stdout or bytes on disk *way* faster
// than this hashtable leaks memory.
static auto& module_id_to_step_number GUARDED_BY(mu) =
*new absl::flat_hash_map<int64, std::pair<int64, uint64>>();
*new absl::flat_hash_map<int64, int64>();
std::pair<int64, uint64> StepNumberAndTimestampForModule(
const HloModule& module) {
// Maps a module's unique ID to a timestamp indicating when we've first dumped
// this module during the compilation pipeline and when we first started
// compiling this module. This lets us keep the filenames ordered nicely.
//
// Entries added here leak forever; we have no way to GC them when a module
// dies. But we only add an entry if dumping is enabled for this module, and
// dumping a module leaks buffer space in stdout or bytes on disk *way* faster
// than this hashtable leaks memory.
static auto& module_id_to_timestamp GUARDED_BY(mu) =
*new absl::flat_hash_map<int64, uint64>();
int64 StepNumberForModule(const HloModule& module) {
tensorflow::mutex_lock lock(mu);
auto result = module_id_to_step_number.try_emplace(
module.unique_id(), 0, tensorflow::Env::Default()->NowMicros());
return std::make_pair(result.first->second.first++,
result.first->second.second);
return module_id_to_step_number[module.unique_id()]++;
}
} // namespace
string TimestampFor(const HloModule& module) {
if (!module.config().debug_options().xla_dump_include_timestamp()) {
return "";
}
tensorflow::mutex_lock lock(mu);
auto timestamp_emplace = module_id_to_timestamp.try_emplace(
module.unique_id(), tensorflow::Env::Default()->NowMicros());
return std::to_string(timestamp_emplace.first->second);
}
string FilenameFor(const HloModule& module, string_view prefix,
string_view suffix) {
return StrFormat("%s%smodule_%04d.%s", prefix, prefix.empty() ? "" : ".",
@ -313,17 +350,17 @@ void DumpExecutionOptions(const ExecutionOptions& execution_options,
void DumpHloModuleIfEnabled(const HloModule& module, string_view name) {
CanonicalDebugOptions opts(module.config().debug_options());
if (opts.should_dump_module(module.name())) {
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr, "",
name, opts);
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
TimestampFor(module), name, opts);
}
}
void DumpHloModuleIfEnabled(const HloModule& module,
const BufferAssignment& buffer_assn,
string_view prefix, string_view name) {
string_view name) {
CanonicalDebugOptions opts(module.config().debug_options());
if (opts.should_dump_module(module.name())) {
DumpHloModuleImpl(module, &buffer_assn, /*profile=*/nullptr, prefix, name,
opts);
DumpHloModuleImpl(module, &buffer_assn, /*profile=*/nullptr,
TimestampFor(module), name, opts);
}
}
@ -332,8 +369,8 @@ void DumpHloModuleIfEnabled(const HloModule& module,
string_view name) {
CanonicalDebugOptions opts(module.config().debug_options());
if (opts.should_dump_module(module.name())) {
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, &profile, "", name,
opts);
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, &profile,
TimestampFor(module), name, opts);
}
}
@ -360,16 +397,14 @@ void DumpHloModuleBetweenPassesIfEnabled(string_view pipeline_name,
return;
}
int64 step_number;
uint64 timestamp;
std::tie(step_number, timestamp) = StepNumberAndTimestampForModule(module);
int64 step_number = StepNumberForModule(module);
std::string timestamp = TimestampFor(module);
string filename_prefix = std::to_string(timestamp);
string filename_suffix =
StrFormat("%04d.%s.after_%s.before_%s", step_number, pipeline_name,
after_pass_name, before_pass_name);
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
filename_prefix, filename_suffix, opts);
timestamp, filename_suffix, opts);
}
void DumpHloModuleDuringPassIfEnabled(string_view pass_name,
@ -381,15 +416,13 @@ void DumpHloModuleDuringPassIfEnabled(string_view pass_name,
return;
}
int64 step_number;
uint64 timestamp;
std::tie(step_number, timestamp) = StepNumberAndTimestampForModule(module);
int64 step_number = StepNumberForModule(module);
std::string timestamp = TimestampFor(module);
string filename_prefix = std::to_string(timestamp);
string filename_suffix =
StrFormat("%04d.%s.%s", step_number, pass_name, step_name);
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
filename_prefix, filename_suffix, opts);
timestamp, filename_suffix, opts);
}
void DumpHloSnapshotIfEnabled(const HloModule& module,
@ -402,12 +435,12 @@ void DumpHloSnapshotIfEnabled(const HloModule& module,
uint64 timestamp;
{
static auto& module_id_to_execution_count GUARDED_BY(mu) =
*new absl::flat_hash_map<int64, std::pair<int64, uint64>>();
*new absl::flat_hash_map<int64, int64>();
tensorflow::mutex_lock lock(mu);
auto result = module_id_to_execution_count.try_emplace(
module.unique_id(), 0, tensorflow::Env::Default()->NowMicros());
execution_count = result.first->second.first++;
timestamp = result.first->second.second;
execution_count = module_id_to_execution_count[module.unique_id()]++;
auto timestamp_emplace = module_id_to_timestamp.try_emplace(
module.unique_id(), tensorflow::Env::Default()->NowMicros());
timestamp = timestamp_emplace.first->second;
}
string filename =
StrCat(FilenameFor(module, std::to_string(timestamp),

View File

@ -33,6 +33,10 @@ class BufferAssignment;
class HloExecutionProfile;
class HloSnapshot;
// Get a timestamp which we can use as a filename prefix specific to this
// module.
string TimestampFor(const HloModule& module);
// Create the filename we will use to dump in DumpToFileInDir.
string FilenameFor(const HloModule& module, absl::string_view prefix,
absl::string_view suffix);
@ -65,7 +69,7 @@ void DumpExecutionOptions(const ExecutionOptions& execution_options,
void DumpHloModuleIfEnabled(const HloModule& module, absl::string_view name);
void DumpHloModuleIfEnabled(const HloModule& module,
const BufferAssignment& buffer_assn,
absl::string_view prefix, absl::string_view name);
absl::string_view name);
void DumpHloModuleIfEnabled(const HloModule& module,
const HloExecutionProfile& profile,
absl::string_view name);

View File

@ -374,8 +374,7 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
/*allocate_buffers_for_constants=*/true,
/*colorer=*/BufferAssigner::DefaultColorer(),
/*must_not_live_out=*/{}, GetCanShareBuffer()));
DumpHloModuleIfEnabled(*module, *buffer_assignment, "",
"after_optimizations");
DumpHloModuleIfEnabled(*module, *buffer_assignment, "after_optimizations");
IrEmitterContext ir_emitter_context(
module.get(), buffer_assignment.get(), stream_exec->platform(),

View File

@ -478,8 +478,7 @@ StatusOr<std::unique_ptr<Executable>> MlirCompiler::RunBackend(
/*allocate_buffers_for_constants=*/true,
/*colorer=*/BufferAssigner::DefaultColorer(),
/*must_not_live_out=*/{}, &CanShareBufferHint));
DumpHloModuleIfEnabled(*module, *buffer_assignment, "",
"after_optimizations");
DumpHloModuleIfEnabled(*module, *buffer_assignment, "after_optimizations");
EmissionContext emission_context(std::move(module));
if (error_handler_) {

View File

@ -240,6 +240,12 @@ message DebugOptions {
// directory.
bool xla_dump_hlo_snapshots = 118;
// Include a timestamp in the dumped filenames.
bool xla_dump_include_timestamp = 131;
// Max number of hlo module dumps in a directory. Set to < 0 for unbounded.
int32 xla_dump_max_hlo_modules = 132;
//
// END flags controlling dumping HLO modules.
//
@ -254,7 +260,7 @@ message DebugOptions {
// Guarantee run-to-run determinism from reductions on XLA:GPU.
bool xla_gpu_deterministic_reductions = 130;
// Next id: 131
// Next id: 133
// Extra options to pass to the compilation backend (e.g. LLVM); specific
// interpretation of these values is left to the backend.

View File

@ -173,6 +173,9 @@ xla::DebugOptions BuildXlaDebugOptions(const xla::DebugOptions& ref_options) {
options.set_xla_dump_hlo_as_text(ref_options.xla_dump_hlo_as_text());
options.set_xla_dump_hlo_snapshots(ref_options.xla_dump_hlo_snapshots());
options.set_xla_dump_hlo_pass_re(ref_options.xla_dump_hlo_pass_re());
options.set_xla_dump_include_timestamp(
ref_options.xla_dump_include_timestamp());
options.set_xla_dump_max_hlo_modules(ref_options.xla_dump_max_hlo_modules());
for (auto& pass : ref_options.xla_disable_hlo_passes()) {
options.add_xla_disable_hlo_passes(pass);
}