Have XLA CPU emit TraceMe calls by default in JIT mode (roll forward)

This lets Xprof to enable these `TraceMe`s and produce timelines for XLA CPU.

Rolling forward with bugfix (earlier we would try to create a `void*` type in
LLVM IR, even though LLVM does not allow pointers to void).

PiperOrigin-RevId: 307961400
Change-Id: I0624c32294387a113867c2d80de8cccfd6cd6c21
This commit is contained in:
George Karpenkov 2020-04-22 20:16:26 -07:00 committed by TensorFlower Gardener
parent 9cac016e58
commit fb9026a766
5 changed files with 33 additions and 10 deletions

View File

@ -38,6 +38,7 @@ def tf_library(
tfcompile_tool = "//tensorflow/compiler/aot:tfcompile", tfcompile_tool = "//tensorflow/compiler/aot:tfcompile",
include_standard_runtime_deps = True, include_standard_runtime_deps = True,
enable_xla_hlo_profiling = False, enable_xla_hlo_profiling = False,
enable_tracemes = False,
mlir_components = "None", mlir_components = "None",
deps = None, deps = None,
tags = []): tags = []):
@ -89,6 +90,9 @@ def tf_library(
enable_xla_hlo_profiling: Enable XLA HLO profiling in the generated enable_xla_hlo_profiling: Enable XLA HLO profiling in the generated
program, and emit metadata that lets us pretty-print the gathered program, and emit metadata that lets us pretty-print the gathered
profile counters. profile counters.
enable_tracemes: Tell tfcompile to generate calls to
TraceMe::Activity{Start|End} around HLO instructions that can be used by
Xprof to construct profiler timelines.
mlir_components: When the value is "None", no components use MLIR. When mlir_components: When the value is "None", no components use MLIR. When
the value is "Bridge", use MLIR to translate GraphDef to HLO. the value is "Bridge", use MLIR to translate GraphDef to HLO.
deps: a list of deps to include on the build rules for the generated deps: a list of deps to include on the build rules for the generated
@ -190,6 +194,11 @@ def tf_library(
else: else:
profiling_flag = "" profiling_flag = ""
if enable_tracemes:
traceme_flag = "--xla_cpu_enable_xprof_traceme=true"
else:
traceme_flag = "--xla_cpu_enable_xprof_traceme=false"
mlir_flag = "--mlir_components=" + mlir_components mlir_flag = "--mlir_components=" + mlir_components
srcs = [tfcompile_graph, config] srcs = [tfcompile_graph, config]
@ -218,7 +227,7 @@ def tf_library(
" --out_header=$(@D)/" + header_file + " --out_header=$(@D)/" + header_file +
" --out_metadata_object=$(@D)/" + metadata_object_file + " --out_metadata_object=$(@D)/" + metadata_object_file +
" --out_function_object=$(@D)/" + function_object_file + " --out_function_object=$(@D)/" + function_object_file +
" " + flags + " " + profiling_flag + " " + mlir_flag " " + flags + " " + profiling_flag + " " + mlir_flag + " " + traceme_flag
), ),
tools = [tfcompile_tool], tools = [tfcompile_tool],
visibility = visibility, visibility = visibility,

View File

@ -63,6 +63,8 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
opts.set_xla_allow_excess_precision(true); opts.set_xla_allow_excess_precision(true);
opts.set_xla_force_host_platform_device_count(1); opts.set_xla_force_host_platform_device_count(1);
opts.set_xla_gpu_deterministic_reductions(false); opts.set_xla_gpu_deterministic_reductions(false);
opts.set_xla_cpu_enable_xprof_traceme(true);
return opts; return opts;
} }
@ -529,7 +531,6 @@ static void AllocateFlags() {
flag_values->xla_gpu_algorithm_blacklist_path(), flag_values->xla_gpu_algorithm_blacklist_path(),
"An AlgorithmBlacklist text proto file as a blacklist " "An AlgorithmBlacklist text proto file as a blacklist "
"of convolutions to avoid to use."), "of convolutions to avoid to use."),
tensorflow::Flag( tensorflow::Flag(
"xla_gpu_deterministic_reductions", "xla_gpu_deterministic_reductions",
bool_setter_for(&DebugOptions::set_xla_gpu_deterministic_reductions), bool_setter_for(&DebugOptions::set_xla_gpu_deterministic_reductions),
@ -545,6 +546,12 @@ static void AllocateFlags() {
bool_setter_for(&DebugOptions::set_xla_tpu_detect_inf), bool_setter_for(&DebugOptions::set_xla_tpu_detect_inf),
flag_values->xla_tpu_detect_inf(), flag_values->xla_tpu_detect_inf(),
"Trigger error on execution on TPU if a INF value is detected"), "Trigger error on execution on TPU if a INF value is detected"),
tensorflow::Flag(
"xla_cpu_enable_xprof_traceme",
bool_setter_for(&DebugOptions::set_xla_cpu_enable_xprof_traceme),
flag_values->xla_cpu_enable_xprof_traceme(),
"If true, XLA CPU generates code to call "
"TraceMe::Activity{Start|End} around HLO operations."),
}); });
ParseFlagsFromEnvAndDieIfUnknown("XLA_FLAGS", *flag_objects); ParseFlagsFromEnvAndDieIfUnknown("XLA_FLAGS", *flag_objects);
} }

View File

@ -182,11 +182,8 @@ StatusOr<llvm::Function*> IrEmitter::EmitComputation(
arch_type_ == llvm::Triple::ArchType::x86_64; arch_type_ == llvm::Triple::ArchType::x86_64;
profiling_state_ = ProfilingState(use_rdtscp); profiling_state_ = ProfilingState(use_rdtscp);
bool emit_tracing = tracing_state_.set_enabled(
hlo_module_config_.hlo_profiling_enabled() && computation->parent()->config().cpu_traceme_enabled());
hlo_module_config_.debug_options().xla_backend_extra_options().count(
"xla_hlo_trace");
tracing_state_.set_enabled(emit_tracing);
TF_RETURN_IF_ERROR(computation->AcceptOrdered(this, instruction_order)); TF_RETURN_IF_ERROR(computation->AcceptOrdered(this, instruction_order));
llvm::Function* ir_function = compute_function_->function(); llvm::Function* ir_function = compute_function_->function();
@ -3126,7 +3123,8 @@ void IrEmitter::TracingState::EmitTracingStart(llvm::IRBuilder<>* b,
} }
llvm::Type* int8_ptr_type = b->getInt8Ty()->getPointerTo(); llvm::Type* int8_ptr_type = b->getInt8Ty()->getPointerTo();
llvm::Type* void_ptr_type = b->getVoidTy()->getPointerTo(); llvm::Type* void_ptr_type =
int8_ptr_type; // LLVM does not have a void*, we use an int8* instead.
llvm::FunctionType* fn_type = llvm::FunctionType* fn_type =
llvm::FunctionType::get(b->getInt64Ty(), {void_ptr_type, int8_ptr_type}, llvm::FunctionType::get(b->getInt64Ty(), {void_ptr_type, int8_ptr_type},
/*isVarArg=*/false); /*isVarArg=*/false);
@ -3156,7 +3154,9 @@ void IrEmitter::TracingState::EmitTracingEnd(llvm::IRBuilder<>* b,
return; return;
} }
llvm::Type* void_ptr_type = b->getVoidTy()->getPointerTo(); llvm::Type* void_ptr_type =
b->getInt8Ty()->getPointerTo(); // LLVM does not have a void*, we use an
// int8* instead.
llvm::FunctionType* fn_type = llvm::FunctionType* fn_type =
llvm::FunctionType::get(b->getVoidTy(), {void_ptr_type, b->getInt64Ty()}, llvm::FunctionType::get(b->getVoidTy(), {void_ptr_type, b->getInt64Ty()},
/*isVarArg=*/false); /*isVarArg=*/false);

View File

@ -104,6 +104,10 @@ class HloModuleConfig {
return debug_options_.xla_hlo_profile(); return debug_options_.xla_hlo_profile();
} }
bool cpu_traceme_enabled() const {
return debug_options_.xla_cpu_enable_xprof_traceme();
}
// Sets/returns the module seed set during execution. // Sets/returns the module seed set during execution.
void set_seed(uint64 seed) { seed_ = seed; } void set_seed(uint64 seed) { seed_ = seed; }
uint64 seed() const { return seed_; } uint64 seed() const { return seed_; }

View File

@ -269,7 +269,10 @@ message DebugOptions {
bool xla_tpu_detect_nan = 135; bool xla_tpu_detect_nan = 135;
bool xla_tpu_detect_inf = 136; bool xla_tpu_detect_inf = 136;
// Next id: 137 // True if TraceMe annotations are enabled for XLA:CPU.
bool xla_cpu_enable_xprof_traceme = 137;
// Next id: 138
// Extra options to pass to the compilation backend (e.g. LLVM); specific // Extra options to pass to the compilation backend (e.g. LLVM); specific
// interpretation of these values is left to the backend. // interpretation of these values is left to the backend.