[XLA] Add a configurable limit to the number of HLO dumps
The default is unlimited. Also, make filename timestamps optional too. PiperOrigin-RevId: 293079186 Change-Id: Id1dec8816017006b2540a12f594f43af01c4de50
This commit is contained in:
parent
7a5bd40b13
commit
ff3f6b6dad
@ -39,6 +39,8 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
|
||||
opts.set_xla_gpu_cuda_data_dir("./cuda_sdk_lib");
|
||||
opts.set_xla_eliminate_hlo_implicit_broadcast(true);
|
||||
opts.set_xla_dump_hlo_as_html(false);
|
||||
opts.set_xla_dump_include_timestamp(true);
|
||||
opts.set_xla_dump_max_hlo_modules(-1);
|
||||
#ifdef INTEL_MKL
|
||||
opts.set_xla_cpu_use_mkl_dnn(true);
|
||||
#endif // INTEL_MKL
|
||||
@ -488,6 +490,17 @@ static void AllocateFlags() {
|
||||
"If specified, dumps HLO before and after optimization passes which "
|
||||
"match this regular expression, in addition to dumping at the very "
|
||||
"beginning and end of compilation."),
|
||||
tensorflow::Flag(
|
||||
"xla_dump_include_timestamp",
|
||||
bool_setter_for(&DebugOptions::set_xla_dump_include_timestamp),
|
||||
flag_values->xla_dump_include_timestamp(),
|
||||
"If specified, includes a timestamp in the dumped filenames."),
|
||||
tensorflow::Flag(
|
||||
"xla_dump_max_hlo_modules",
|
||||
int32_setter_for(&DebugOptions::set_xla_dump_max_hlo_modules),
|
||||
flag_values->xla_dump_max_hlo_modules(),
|
||||
"Max number of hlo module dumps in a directory. Set to < 0 for "
|
||||
"unbounded."),
|
||||
tensorflow::Flag(
|
||||
"xla_hlo_graph_addresses",
|
||||
bool_setter_for(&DebugOptions::set_xla_hlo_graph_addresses),
|
||||
|
@ -623,7 +623,7 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
|
||||
absl::make_unique<SequentialHloOrdering>(schedule),
|
||||
BufferSizeBytesFunction(), memory_alignment,
|
||||
/*allocate_buffers_for_constants=*/true));
|
||||
DumpHloModuleIfEnabled(*module, *assignment, "", "after_optimizations");
|
||||
DumpHloModuleIfEnabled(*module, *assignment, "after_optimizations");
|
||||
|
||||
// Each computation is a single function. Emit all embedded computations
|
||||
// before the entry computation. The order of computations returned from
|
||||
@ -821,7 +821,7 @@ CpuCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
|
||||
DumpToFileInDirOrStdout(*module, "", "buffer_assignment",
|
||||
assignment->ToString());
|
||||
}
|
||||
DumpHloModuleIfEnabled(*module, *assignment, "", "after_optimizations");
|
||||
DumpHloModuleIfEnabled(*module, *assignment, "after_optimizations");
|
||||
|
||||
std::unordered_map<const HloInstruction*, int64> instruction_to_profile_idx;
|
||||
std::unordered_map<const HloComputation*, int64> computation_to_profile_idx;
|
||||
|
@ -42,7 +42,9 @@ struct CanonicalDebugOptions {
|
||||
dump_as_dot(opts.xla_dump_hlo_as_dot()),
|
||||
dump_as_html(opts.xla_dump_hlo_as_html()),
|
||||
dump_as_url(opts.xla_dump_hlo_as_url()),
|
||||
dump_snapshots(opts.xla_dump_hlo_snapshots()) {
|
||||
dump_snapshots(opts.xla_dump_hlo_snapshots()),
|
||||
dump_include_timestamp(opts.xla_dump_include_timestamp()),
|
||||
dump_max_hlo_modules(opts.xla_dump_max_hlo_modules()) {
|
||||
// This constructor examines the values in `opts` and turns on other flags
|
||||
// based on what we think is the user's intent. To reduce confusion about
|
||||
// what was a user-specified value versus an extrapolated value, within this
|
||||
@ -135,6 +137,8 @@ struct CanonicalDebugOptions {
|
||||
bool dump_as_html;
|
||||
bool dump_as_url;
|
||||
bool dump_snapshots;
|
||||
bool dump_include_timestamp;
|
||||
int64 dump_max_hlo_modules;
|
||||
};
|
||||
|
||||
void DumpToFileInDirImpl(string_view filename, string_view contents,
|
||||
@ -166,6 +170,23 @@ void DumpToFileInDirImpl(string_view filename, string_view contents,
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we are not going to dump more modules than the user has asked.
|
||||
if (opts.dump_max_hlo_modules > 0) {
|
||||
std::vector<string> matches;
|
||||
auto pattern = tensorflow::io::JoinPath(dir, "*module_*.0000.*");
|
||||
auto status = env->GetMatchingPaths(pattern, &matches);
|
||||
if (!status.ok()) {
|
||||
LOG(ERROR) << "Could not get matching paths for pattern " << pattern
|
||||
<< ": " << status;
|
||||
}
|
||||
if (matches.size() > opts.dump_max_hlo_modules) {
|
||||
LOG(ERROR) << "Have already dumped " << matches.size()
|
||||
<< " modules, more than the limit of "
|
||||
<< opts.dump_max_hlo_modules;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
string file_path =
|
||||
tensorflow::io::JoinPath(dir, SanitizeFileName(string(filename)));
|
||||
auto status = tensorflow::WriteStringToFile(env, file_path, contents);
|
||||
@ -247,28 +268,44 @@ void DumpHloModuleImpl(const HloModule& module,
|
||||
|
||||
static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED);
|
||||
|
||||
// Maps a module's unique ID to a {counter, timestamp} indicating how many times
|
||||
// we've dumped this module during the compilation pipeline and when we first
|
||||
// started compiling this module. This lets us keep the filenames ordered
|
||||
// nicely.
|
||||
// Maps a module's unique ID to a counter indicating how many times we've dumped
|
||||
// this module during the compilation pipeline. This lets us keep the filenames
|
||||
// ordered nicely.
|
||||
//
|
||||
// Entries added here leak forever; we have no way to GC them when a module
|
||||
// dies. But we only add an entry if dumping is enabled for this module, and
|
||||
// dumping a module leaks buffer space in stdout or bytes on disk *way* faster
|
||||
// than this hashtable leaks memory.
|
||||
static auto& module_id_to_step_number GUARDED_BY(mu) =
|
||||
*new absl::flat_hash_map<int64, std::pair<int64, uint64>>();
|
||||
*new absl::flat_hash_map<int64, int64>();
|
||||
|
||||
std::pair<int64, uint64> StepNumberAndTimestampForModule(
|
||||
const HloModule& module) {
|
||||
// Maps a module's unique ID to a timestamp indicating when we've first dumped
|
||||
// this module during the compilation pipeline and when we first started
|
||||
// compiling this module. This lets us keep the filenames ordered nicely.
|
||||
//
|
||||
// Entries added here leak forever; we have no way to GC them when a module
|
||||
// dies. But we only add an entry if dumping is enabled for this module, and
|
||||
// dumping a module leaks buffer space in stdout or bytes on disk *way* faster
|
||||
// than this hashtable leaks memory.
|
||||
static auto& module_id_to_timestamp GUARDED_BY(mu) =
|
||||
*new absl::flat_hash_map<int64, uint64>();
|
||||
|
||||
int64 StepNumberForModule(const HloModule& module) {
|
||||
tensorflow::mutex_lock lock(mu);
|
||||
auto result = module_id_to_step_number.try_emplace(
|
||||
module.unique_id(), 0, tensorflow::Env::Default()->NowMicros());
|
||||
return std::make_pair(result.first->second.first++,
|
||||
result.first->second.second);
|
||||
return module_id_to_step_number[module.unique_id()]++;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
string TimestampFor(const HloModule& module) {
|
||||
if (!module.config().debug_options().xla_dump_include_timestamp()) {
|
||||
return "";
|
||||
}
|
||||
tensorflow::mutex_lock lock(mu);
|
||||
auto timestamp_emplace = module_id_to_timestamp.try_emplace(
|
||||
module.unique_id(), tensorflow::Env::Default()->NowMicros());
|
||||
return std::to_string(timestamp_emplace.first->second);
|
||||
}
|
||||
|
||||
string FilenameFor(const HloModule& module, string_view prefix,
|
||||
string_view suffix) {
|
||||
return StrFormat("%s%smodule_%04d.%s", prefix, prefix.empty() ? "" : ".",
|
||||
@ -313,17 +350,17 @@ void DumpExecutionOptions(const ExecutionOptions& execution_options,
|
||||
void DumpHloModuleIfEnabled(const HloModule& module, string_view name) {
|
||||
CanonicalDebugOptions opts(module.config().debug_options());
|
||||
if (opts.should_dump_module(module.name())) {
|
||||
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr, "",
|
||||
name, opts);
|
||||
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
|
||||
TimestampFor(module), name, opts);
|
||||
}
|
||||
}
|
||||
void DumpHloModuleIfEnabled(const HloModule& module,
|
||||
const BufferAssignment& buffer_assn,
|
||||
string_view prefix, string_view name) {
|
||||
string_view name) {
|
||||
CanonicalDebugOptions opts(module.config().debug_options());
|
||||
if (opts.should_dump_module(module.name())) {
|
||||
DumpHloModuleImpl(module, &buffer_assn, /*profile=*/nullptr, prefix, name,
|
||||
opts);
|
||||
DumpHloModuleImpl(module, &buffer_assn, /*profile=*/nullptr,
|
||||
TimestampFor(module), name, opts);
|
||||
}
|
||||
}
|
||||
|
||||
@ -332,8 +369,8 @@ void DumpHloModuleIfEnabled(const HloModule& module,
|
||||
string_view name) {
|
||||
CanonicalDebugOptions opts(module.config().debug_options());
|
||||
if (opts.should_dump_module(module.name())) {
|
||||
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, &profile, "", name,
|
||||
opts);
|
||||
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, &profile,
|
||||
TimestampFor(module), name, opts);
|
||||
}
|
||||
}
|
||||
|
||||
@ -360,16 +397,14 @@ void DumpHloModuleBetweenPassesIfEnabled(string_view pipeline_name,
|
||||
return;
|
||||
}
|
||||
|
||||
int64 step_number;
|
||||
uint64 timestamp;
|
||||
std::tie(step_number, timestamp) = StepNumberAndTimestampForModule(module);
|
||||
int64 step_number = StepNumberForModule(module);
|
||||
std::string timestamp = TimestampFor(module);
|
||||
|
||||
string filename_prefix = std::to_string(timestamp);
|
||||
string filename_suffix =
|
||||
StrFormat("%04d.%s.after_%s.before_%s", step_number, pipeline_name,
|
||||
after_pass_name, before_pass_name);
|
||||
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
|
||||
filename_prefix, filename_suffix, opts);
|
||||
timestamp, filename_suffix, opts);
|
||||
}
|
||||
|
||||
void DumpHloModuleDuringPassIfEnabled(string_view pass_name,
|
||||
@ -381,15 +416,13 @@ void DumpHloModuleDuringPassIfEnabled(string_view pass_name,
|
||||
return;
|
||||
}
|
||||
|
||||
int64 step_number;
|
||||
uint64 timestamp;
|
||||
std::tie(step_number, timestamp) = StepNumberAndTimestampForModule(module);
|
||||
int64 step_number = StepNumberForModule(module);
|
||||
std::string timestamp = TimestampFor(module);
|
||||
|
||||
string filename_prefix = std::to_string(timestamp);
|
||||
string filename_suffix =
|
||||
StrFormat("%04d.%s.%s", step_number, pass_name, step_name);
|
||||
DumpHloModuleImpl(module, /*buffer_assn=*/nullptr, /*profile=*/nullptr,
|
||||
filename_prefix, filename_suffix, opts);
|
||||
timestamp, filename_suffix, opts);
|
||||
}
|
||||
|
||||
void DumpHloSnapshotIfEnabled(const HloModule& module,
|
||||
@ -402,12 +435,12 @@ void DumpHloSnapshotIfEnabled(const HloModule& module,
|
||||
uint64 timestamp;
|
||||
{
|
||||
static auto& module_id_to_execution_count GUARDED_BY(mu) =
|
||||
*new absl::flat_hash_map<int64, std::pair<int64, uint64>>();
|
||||
*new absl::flat_hash_map<int64, int64>();
|
||||
tensorflow::mutex_lock lock(mu);
|
||||
auto result = module_id_to_execution_count.try_emplace(
|
||||
module.unique_id(), 0, tensorflow::Env::Default()->NowMicros());
|
||||
execution_count = result.first->second.first++;
|
||||
timestamp = result.first->second.second;
|
||||
execution_count = module_id_to_execution_count[module.unique_id()]++;
|
||||
auto timestamp_emplace = module_id_to_timestamp.try_emplace(
|
||||
module.unique_id(), tensorflow::Env::Default()->NowMicros());
|
||||
timestamp = timestamp_emplace.first->second;
|
||||
}
|
||||
string filename =
|
||||
StrCat(FilenameFor(module, std::to_string(timestamp),
|
||||
|
@ -33,6 +33,10 @@ class BufferAssignment;
|
||||
class HloExecutionProfile;
|
||||
class HloSnapshot;
|
||||
|
||||
// Get a timestamp which we can use as a filename prefix specific to this
|
||||
// module.
|
||||
string TimestampFor(const HloModule& module);
|
||||
|
||||
// Create the filename we will use to dump in DumpToFileInDir.
|
||||
string FilenameFor(const HloModule& module, absl::string_view prefix,
|
||||
absl::string_view suffix);
|
||||
@ -65,7 +69,7 @@ void DumpExecutionOptions(const ExecutionOptions& execution_options,
|
||||
void DumpHloModuleIfEnabled(const HloModule& module, absl::string_view name);
|
||||
void DumpHloModuleIfEnabled(const HloModule& module,
|
||||
const BufferAssignment& buffer_assn,
|
||||
absl::string_view prefix, absl::string_view name);
|
||||
absl::string_view name);
|
||||
void DumpHloModuleIfEnabled(const HloModule& module,
|
||||
const HloExecutionProfile& profile,
|
||||
absl::string_view name);
|
||||
|
@ -374,8 +374,7 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
|
||||
/*allocate_buffers_for_constants=*/true,
|
||||
/*colorer=*/BufferAssigner::DefaultColorer(),
|
||||
/*must_not_live_out=*/{}, GetCanShareBuffer()));
|
||||
DumpHloModuleIfEnabled(*module, *buffer_assignment, "",
|
||||
"after_optimizations");
|
||||
DumpHloModuleIfEnabled(*module, *buffer_assignment, "after_optimizations");
|
||||
|
||||
IrEmitterContext ir_emitter_context(
|
||||
module.get(), buffer_assignment.get(), stream_exec->platform(),
|
||||
|
@ -478,8 +478,7 @@ StatusOr<std::unique_ptr<Executable>> MlirCompiler::RunBackend(
|
||||
/*allocate_buffers_for_constants=*/true,
|
||||
/*colorer=*/BufferAssigner::DefaultColorer(),
|
||||
/*must_not_live_out=*/{}, &CanShareBufferHint));
|
||||
DumpHloModuleIfEnabled(*module, *buffer_assignment, "",
|
||||
"after_optimizations");
|
||||
DumpHloModuleIfEnabled(*module, *buffer_assignment, "after_optimizations");
|
||||
|
||||
EmissionContext emission_context(std::move(module));
|
||||
if (error_handler_) {
|
||||
|
@ -240,6 +240,12 @@ message DebugOptions {
|
||||
// directory.
|
||||
bool xla_dump_hlo_snapshots = 118;
|
||||
|
||||
// Include a timestamp in the dumped filenames.
|
||||
bool xla_dump_include_timestamp = 131;
|
||||
|
||||
// Max number of hlo module dumps in a directory. Set to < 0 for unbounded.
|
||||
int32 xla_dump_max_hlo_modules = 132;
|
||||
|
||||
//
|
||||
// END flags controlling dumping HLO modules.
|
||||
//
|
||||
@ -254,7 +260,7 @@ message DebugOptions {
|
||||
|
||||
// Guarantee run-to-run determinism from reductions on XLA:GPU.
|
||||
bool xla_gpu_deterministic_reductions = 130;
|
||||
// Next id: 131
|
||||
// Next id: 133
|
||||
|
||||
// Extra options to pass to the compilation backend (e.g. LLVM); specific
|
||||
// interpretation of these values is left to the backend.
|
||||
|
@ -173,6 +173,9 @@ xla::DebugOptions BuildXlaDebugOptions(const xla::DebugOptions& ref_options) {
|
||||
options.set_xla_dump_hlo_as_text(ref_options.xla_dump_hlo_as_text());
|
||||
options.set_xla_dump_hlo_snapshots(ref_options.xla_dump_hlo_snapshots());
|
||||
options.set_xla_dump_hlo_pass_re(ref_options.xla_dump_hlo_pass_re());
|
||||
options.set_xla_dump_include_timestamp(
|
||||
ref_options.xla_dump_include_timestamp());
|
||||
options.set_xla_dump_max_hlo_modules(ref_options.xla_dump_max_hlo_modules());
|
||||
for (auto& pass : ref_options.xla_disable_hlo_passes()) {
|
||||
options.add_xla_disable_hlo_passes(pass);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user