From 9b544af08d7949f342b45ad138ff9204dc7af6f1 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Wed, 29 Jan 2020 08:37:15 -0800 Subject: [PATCH] Change std::call_once to absl::call_once absl::call_once is faster and supports fibers. PiperOrigin-RevId: 292148213 Change-Id: I66e96d735b722a2642508a7e7a1e73de254234d7 --- tensorflow/compiler/aot/BUILD | 1 + tensorflow/compiler/aot/compile.cc | 5 +++-- tensorflow/compiler/jit/BUILD | 2 ++ tensorflow/compiler/jit/flags.cc | 17 +++++++++-------- .../compiler/jit/mark_for_compilation_pass.cc | 5 +++-- tensorflow/compiler/xla/BUILD | 1 + tensorflow/compiler/xla/debug_options_flags.cc | 16 ++++++++-------- tensorflow/compiler/xla/service/BUILD | 1 + tensorflow/compiler/xla/service/cpu/BUILD | 1 + .../compiler/xla/service/cpu/cpu_compiler.cc | 14 +++++++------- tensorflow/compiler/xla/service/gpu/BUILD | 2 ++ .../xla/service/gpu/buffer_comparator.cc | 5 +++-- .../compiler/xla/service/gpu/gpu_compiler.cc | 1 - .../xla/service/gpu/llvm_gpu_backend/BUILD | 1 + .../gpu/llvm_gpu_backend/gpu_backend_lib.cc | 9 +++++---- .../compiler/xla/service/gpu/nvptx_compiler.cc | 5 +++-- .../xla/service/slow_operation_alarm.cc | 6 +++--- tensorflow/core/BUILD | 1 + tensorflow/core/common_runtime/process_state.cc | 5 +++-- tensorflow/core/framework/op_kernel.cc | 5 +++-- tensorflow/core/kernels/BUILD | 8 +++++++- tensorflow/core/kernels/conv_ops.cc | 1 - .../core/kernels/eigen_contraction_kernel.cc | 6 ++++-- tensorflow/core/kernels/example_parsing_ops.cc | 13 +++++++------ .../core/kernels/example_parsing_ops_test.cc | 7 ++++--- tensorflow/core/kernels/gpu_utils.cc | 9 +++++---- tensorflow/core/nccl/BUILD | 1 + tensorflow/core/nccl/nccl_manager.cc | 5 +++-- tensorflow/core/platform/BUILD | 1 + tensorflow/core/platform/cpu_feature_guard.cc | 5 +++-- tensorflow/core/platform/cpu_info.cc | 6 ++++-- .../core/platform/profile_utils/cpu_utils.cc | 5 +++-- tensorflow/stream_executor/cuda/BUILD | 1 + .../stream_executor/cuda/cuda_platform.cc | 5 +++-- tensorflow/stream_executor/gpu/BUILD | 1 + .../stream_executor/gpu/redzone_allocator.cc | 5 +++-- tensorflow/stream_executor/rocm/BUILD | 1 + .../stream_executor/rocm/rocm_platform.cc | 5 +++-- 38 files changed, 114 insertions(+), 74 deletions(-) diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD index 9ff2c227dea..2de57c1863e 100644 --- a/tensorflow/compiler/aot/BUILD +++ b/tensorflow/compiler/aot/BUILD @@ -33,6 +33,7 @@ cc_library( deps = [ ":aot_only_var_handle_op", ":embedded_protocol_buffers", + "@com_google_absl//absl/base", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", diff --git a/tensorflow/compiler/aot/compile.cc b/tensorflow/compiler/aot/compile.cc index bd6c3bc8467..29859691c0a 100644 --- a/tensorflow/compiler/aot/compile.cc +++ b/tensorflow/compiler/aot/compile.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/base/call_once.h" #include "llvm-c/Target.h" #include "tensorflow/compiler/aot/codegen.h" #include "tensorflow/compiler/aot/flags.h" @@ -142,7 +143,7 @@ static Status ReadProtoFile(const string& fname, protobuf::Message* proto) { } } -static std::once_flag targets_init; +static absl::once_flag targets_init; static void InitializeTargets() { // Initialize all LLVM targets so we can cross compile. @@ -167,7 +168,7 @@ static void InitializeTargets() { } Status Main(const MainFlags& flags) { - std::call_once(targets_init, &InitializeTargets); + absl::call_once(targets_init, &InitializeTargets); // Process config. tf2xla::Config config; diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 111bed82c54..bd3e1e111c7 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -261,6 +261,7 @@ cc_library( "//tensorflow/compiler/xla:parse_flags_from_env", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", + "@com_google_absl//absl/base", "@com_google_absl//absl/strings", ], ) @@ -642,6 +643,7 @@ cc_library( "//tensorflow/core:protos_all_cc", "//tensorflow/stream_executor/lib", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/base", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", diff --git a/tensorflow/compiler/jit/flags.cc b/tensorflow/compiler/jit/flags.cc index a3698571715..02976309bdc 100644 --- a/tensorflow/compiler/jit/flags.cc +++ b/tensorflow/compiler/jit/flags.cc @@ -17,6 +17,7 @@ limitations under the License. #include // NOLINT +#include "absl/base/call_once.h" #include "absl/strings/numbers.h" #include "absl/strings/str_split.h" #include "absl/strings/strip.h" @@ -34,7 +35,7 @@ XlaOpsCommonFlags* ops_flags; IntroduceFloatingPointJitterPassFlags* jitter_flags; std::vector* flag_list; -std::once_flag flags_init; +absl::once_flag flags_init; bool SetterForXlaAutoJitFlag(const string& value) { int32 opt_level; @@ -215,38 +216,38 @@ void AllocateAndParseFlags() { } // namespace bool SetXlaAutoJitFlagFromFlagString(const string& value) { - std::call_once(flags_init, &AllocateAndParseFlags); + absl::call_once(flags_init, &AllocateAndParseFlags); return SetterForXlaAutoJitFlag(value); } BuildXlaOpsPassFlags* GetBuildXlaOpsPassFlags() { - std::call_once(flags_init, &AllocateAndParseFlags); + absl::call_once(flags_init, &AllocateAndParseFlags); return build_ops_flags; } MarkForCompilationPassFlags* GetMarkForCompilationPassFlags() { - std::call_once(flags_init, &AllocateAndParseFlags); + absl::call_once(flags_init, &AllocateAndParseFlags); return mark_for_compilation_flags; } XlaDeviceFlags* GetXlaDeviceFlags() { - std::call_once(flags_init, &AllocateAndParseFlags); + absl::call_once(flags_init, &AllocateAndParseFlags); return device_flags; } const XlaOpsCommonFlags& GetXlaOpsCommonFlags() { - std::call_once(flags_init, &AllocateAndParseFlags); + absl::call_once(flags_init, &AllocateAndParseFlags); return *ops_flags; } const IntroduceFloatingPointJitterPassFlags& GetIntroduceFloatingPointJitterPassFlags() { - std::call_once(flags_init, &AllocateAndParseFlags); + absl::call_once(flags_init, &AllocateAndParseFlags); return *jitter_flags; } void AppendMarkForCompilationPassFlags(std::vector* flag_list) { - std::call_once(flags_init, &AllocateAndParseFlags); + absl::call_once(flags_init, &AllocateAndParseFlags); AppendMarkForCompilationPassFlagsInternal(flag_list); } diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index 8adea252e8e..b06a6f9a988 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include +#include "absl/base/call_once.h" #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/strings/str_join.h" @@ -1616,8 +1617,8 @@ StatusOr MarkForCompilationPassImpl::ShouldCompileClusterImpl( if (!should_compile && global_jit_level_ != OptimizerOptions::OFF && device_type.type_string() == DEVICE_CPU) { - static std::once_flag once; - std::call_once(once, [] { + static absl::once_flag once; + absl::call_once(once, [] { LOG(WARNING) << "(One-time warning): Not using XLA:CPU for cluster because envvar " "TF_XLA_FLAGS=--tf_xla_cpu_global_jit was not set. If you want " diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 0826201ecbd..11762dff72b 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -881,6 +881,7 @@ cc_library( ":xla_proto_cc", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", + "@com_google_absl//absl/base", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/strings", diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc index 4b5aa384e22..81669bd0f1c 100644 --- a/tensorflow/compiler/xla/debug_options_flags.cc +++ b/tensorflow/compiler/xla/debug_options_flags.cc @@ -15,9 +15,9 @@ limitations under the License. #include "tensorflow/compiler/xla/debug_options_flags.h" -#include // NOLINT(build/c++11): only using std::call_once, not mutex. #include +#include "absl/base/call_once.h" #include "absl/container/flat_hash_map.h" #include "absl/container/node_hash_map.h" #include "absl/strings/str_format.h" @@ -64,7 +64,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { return opts; } -static std::once_flag flags_init; +static absl::once_flag flags_init; static DebugOptions* flag_values; static std::vector* flag_objects; @@ -207,8 +207,8 @@ static void AllocateFlags() { // warning if a pass was specified but never consumed any fuel, on the // theory that this is may be a typo. if (!initial_fuel->empty()) { - static std::once_flag register_atexit_once; - std::call_once( + static absl::once_flag register_atexit_once; + absl::call_once( register_atexit_once, +[] { std::atexit(WarnIfFuelWasNeverConsumed); }); } @@ -527,18 +527,18 @@ static void AllocateFlags() { } void AppendDebugOptionsFlags(std::vector* flag_list) { - std::call_once(flags_init, &AllocateFlags); + absl::call_once(flags_init, &AllocateFlags); flag_list->insert(flag_list->end(), flag_objects->begin(), flag_objects->end()); } xla::DebugOptions GetDebugOptionsFromFlags() { - std::call_once(flags_init, &AllocateFlags); + absl::call_once(flags_init, &AllocateFlags); return *flag_values; } void ResetThreadLocalFuel() { - std::call_once(flags_init, &AllocateFlags); + absl::call_once(flags_init, &AllocateFlags); thread_fuel.reset(new absl::node_hash_map>()); CHECK(initial_fuel != nullptr); @@ -548,7 +548,7 @@ void ResetThreadLocalFuel() { } bool ConsumeFuel(absl::string_view pass, bool* just_ran_out) { - std::call_once(flags_init, &AllocateFlags); + absl::call_once(flags_init, &AllocateFlags); if (just_ran_out != nullptr) { *just_ran_out = false; } diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 232918802cc..8e4bed4aafb 100755 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -4473,6 +4473,7 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/core:lib", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", "@com_google_absl//absl/synchronization", diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 713f10b146f..dd659fa2aa4 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -88,6 +88,7 @@ cc_library( "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", ":target_machine_features", + "@com_google_absl//absl/base", "@com_google_absl//absl/types:span", "//tensorflow/compiler/xla/service:copy_insertion", "//tensorflow/compiler/xla/service:hlo_casting_utils", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 50d9c99fa4c..df1f1750689 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -19,7 +19,6 @@ limitations under the License. #include #include -#include // NOLINT(build/c++11): only using std::call_once, not mutex. #include #include #include @@ -27,6 +26,7 @@ limitations under the License. // IWYU pragma: no_include "llvm/Config/Disassemblers.def.inc" // IWYU pragma: no_include "llvm/Config/Targets.def.inc" +#include "absl/base/call_once.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "llvm/ADT/StringRef.h" @@ -166,7 +166,7 @@ namespace { // multiple invocations of the LLVM compilation pipeline with a different set of // flags. Therefore, we only pass command-line flags to LLVM once, before the // first module is compiled. -std::once_flag llvm_command_line_options_initialized; +absl::once_flag llvm_command_line_options_initialized; // This visitor records which HLO instructions should have profiling information // recorded. @@ -565,8 +565,8 @@ StatusOr> CpuCompiler::RunBackend( auto slow_compile_alarm = SlowCompilationAlarm(); TF_RET_CHECK(stream_exec != nullptr); - std::call_once(llvm_command_line_options_initialized, - &llvm_ir::InitializeLLVMCommandLineOptions, module->config()); + absl::call_once(llvm_command_line_options_initialized, + &llvm_ir::InitializeLLVMCommandLineOptions, module->config()); ModuleHook pre_optimization_ir_hook; ModuleHook post_optimization_ir_hook; @@ -702,9 +702,9 @@ CpuCompiler::CompileAheadOfTime(std::unique_ptr module_group, std::vector> modules = module_group->ConsumeModules(); - std::call_once(llvm_command_line_options_initialized, - &llvm_ir::InitializeLLVMCommandLineOptions, - modules[0]->config()); + absl::call_once(llvm_command_line_options_initialized, + &llvm_ir::InitializeLLVMCommandLineOptions, + modules[0]->config()); // We can pass just one llvm::TargetOptions when we compile the LLVM module, // so we bail if the configs have conflicting flags. At the moment, the only diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 94b208c5f53..49eebb70e62 100755 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -1235,6 +1235,7 @@ cc_library( "//tensorflow/stream_executor:stream_executor_headers", "//tensorflow/stream_executor/cuda:cuda_diagnostics", "//tensorflow/stream_executor/gpu:asm_compiler", + "@com_google_absl//absl/base", "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/types:optional", ], @@ -1465,6 +1466,7 @@ cc_library( "//tensorflow/compiler/xla/service:hlo_module_config", "//tensorflow/core:stream_executor_no_cuda", "//tensorflow/stream_executor:stream_executor_headers", + "@com_google_absl//absl/base", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/buffer_comparator.cc b/tensorflow/compiler/xla/service/gpu/buffer_comparator.cc index 4ecf6ed8007..3a8fcc329b3 100644 --- a/tensorflow/compiler/xla/service/gpu/buffer_comparator.cc +++ b/tensorflow/compiler/xla/service/gpu/buffer_comparator.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include "absl/base/call_once.h" #include "absl/strings/str_replace.h" #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h" #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" @@ -585,8 +586,8 @@ static StatusOr DeviceCompare(se::Stream* stream, if (compiled_ptx_or.ok()) { compiled_ptx = compiled_ptx_or.ConsumeValueOrDie(); } else { - static std::once_flag ptxas_not_found_logged; - std::call_once(ptxas_not_found_logged, [&]() { + static absl::once_flag ptxas_not_found_logged; + absl::call_once(ptxas_not_found_logged, [&]() { LOG(WARNING) << compiled_ptx_or.status().ToString() << "\nRelying on driver to perform ptx compilation. " diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 8a14822c83f..29aed5fd7ff 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -19,7 +19,6 @@ limitations under the License. #include #include -#include // NOLINT(build/c++11): only using std::call_once, not mutex. #include #include "absl/memory/memory.h" diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD index 9203664e4c7..f1083553c57 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD @@ -35,6 +35,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core/profiler/lib:traceme", + "@com_google_absl//absl/base", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 592d2494ec7..3a05daaea9a 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include +#include "absl/base/call_once.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" @@ -492,8 +493,8 @@ namespace nvptx { StatusOr CompileToPtx(llvm::Module* module, GpuVersion gpu_version, const HloModuleConfig& hlo_module_config, const string& libdevice_dir_path) { - static std::once_flag backend_init_flag; - std::call_once(backend_init_flag, NVPTXBackendInit, hlo_module_config); + static absl::once_flag backend_init_flag; + absl::call_once(backend_init_flag, NVPTXBackendInit, hlo_module_config); string ptx; std::unique_ptr target_machine; @@ -712,8 +713,8 @@ namespace amdgpu { StatusOr> CompileToHsaco( llvm::Module* module, GpuVersion gpu_version, const HloModuleConfig& hlo_module_config, const string& rocdl_dir_path) { - static std::once_flag backend_init_flag; - std::call_once(backend_init_flag, AMDGPUBackendInit, hlo_module_config); + static absl::once_flag backend_init_flag; + absl::call_once(backend_init_flag, AMDGPUBackendInit, hlo_module_config); std::vector hsaco; std::unique_ptr target_machine; diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index a95fd884a62..b3dc7a186c0 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -19,6 +19,7 @@ limitations under the License. #include +#include "absl/base/call_once.h" #include "tensorflow/compiler/xla/service/algebraic_simplifier.h" #include "tensorflow/compiler/xla/service/dump.h" #include "tensorflow/compiler/xla/service/gpu/cublas_gemm_pad_for_tensor_cores.h" @@ -247,8 +248,8 @@ absl::optional CanShareBufferHint(const HloInstruction* user, // // Only prints a warning the first time it's called. void WarnIfBadDriverJITVersion() { - static std::once_flag run_once; - std::call_once(run_once, [] { + static absl::once_flag run_once; + absl::call_once(run_once, [] { auto version_or_status = se::cuda::Diagnostician::FindKernelDriverVersion(); if (!version_or_status.ok()) { LOG(WARNING) << "Couldn't read CUDA driver version."; diff --git a/tensorflow/compiler/xla/service/slow_operation_alarm.cc b/tensorflow/compiler/xla/service/slow_operation_alarm.cc index 3a0bd830d30..2ce66b25daa 100644 --- a/tensorflow/compiler/xla/service/slow_operation_alarm.cc +++ b/tensorflow/compiler/xla/service/slow_operation_alarm.cc @@ -16,9 +16,9 @@ limitations under the License. #include "tensorflow/compiler/xla/service/slow_operation_alarm.h" #include -#include // NOLINT (for std::call_once, not std::mutex) #include "absl/algorithm/container.h" +#include "absl/base/call_once.h" #include "absl/base/thread_annotations.h" #include "absl/memory/memory.h" #include "absl/synchronization/mutex.h" @@ -29,7 +29,7 @@ namespace { absl::Mutex mu(absl::kConstInit); absl::CondVar* ready; -std::once_flag init_flag; +absl::once_flag init_flag; std::list* outstanding_alarms ABSL_PT_GUARDED_BY(mu) = nullptr; @@ -73,7 +73,7 @@ void AlarmLoop() { } void ScheduleAlarm(SlowOperationAlarm* alarm) { - std::call_once(init_flag, [] { + absl::call_once(init_flag, [] { ready = new absl::CondVar(); outstanding_alarms = new std::list(); (void)tensorflow::Env::Default()->StartThread( diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 296807c130d..b2354839021 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2578,6 +2578,7 @@ tf_cuda_library( ":lib", ":lib_internal", ":protos_all_cc", + "@com_google_absl//absl/base", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", diff --git a/tensorflow/core/common_runtime/process_state.cc b/tensorflow/core/common_runtime/process_state.cc index 06f969faf42..19f7a985f3e 100644 --- a/tensorflow/core/common_runtime/process_state.cc +++ b/tensorflow/core/common_runtime/process_state.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include "absl/base/call_once.h" #include "tensorflow/core/common_runtime/bfc_allocator.h" #include "tensorflow/core/common_runtime/pool_allocator.h" #include "tensorflow/core/framework/allocator.h" @@ -33,8 +34,8 @@ namespace tensorflow { /*static*/ ProcessState* ProcessState::singleton() { static ProcessState* instance = new ProcessState; - static std::once_flag f; - std::call_once(f, []() { + static absl::once_flag f; + absl::call_once(f, []() { AllocatorFactoryRegistry::singleton()->process_state_ = instance; }); diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index a02afa8235c..9426c75b882 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -23,6 +23,7 @@ limitations under the License. #include #include +#include "absl/base/call_once.h" #include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/attr_value_util.h" #include "tensorflow/core/framework/device_attributes.pb.h" @@ -1226,8 +1227,8 @@ void LoadDynamicKernelsInternal() { void LoadDynamicKernels() { // TODO(gunan): As more features are available, add intelligent kernel // selection, and dropping unsuitable kernel logic here. - static std::once_flag dll_loader_flag; - std::call_once(dll_loader_flag, LoadDynamicKernelsInternal); + static absl::once_flag dll_loader_flag; + absl::call_once(dll_loader_flag, LoadDynamicKernelsInternal); } void* GlobalKernelRegistry() { diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 561324b1877..328e5545413 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -536,6 +536,7 @@ tf_cuda_library( "//tensorflow/stream_executor/gpu:asm_compiler", "//tensorflow/stream_executor/gpu:redzone_allocator", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/base", "@com_google_absl//absl/types:span", ], ) @@ -782,7 +783,7 @@ cc_library( deps = select({ ":no_mkldnn_contraction_kernel": [":eigen_contraction_kernel_no_mkl"], "//conditions:default": [":eigen_contraction_kernel_with_mkl"], - }), + }) + ["@com_google_absl//absl/base"], ) cc_library( @@ -801,6 +802,7 @@ cc_library( ], }), deps = [ + "@com_google_absl//absl/base", "//third_party/eigen3", "//tensorflow/core/platform:dynamic_annotations", ] + select({ @@ -820,6 +822,7 @@ cc_library( deps = [ "//tensorflow/core/platform:dynamic_annotations", "//third_party/eigen3", + "@com_google_absl//absl/base", ], ) @@ -1888,6 +1891,7 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "@com_google_absl//absl/base", ], ) @@ -4978,6 +4982,7 @@ cc_library( ) PARSING_DEPS = [ + "@com_google_absl//absl/base", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -6868,6 +6873,7 @@ cc_library( "//tensorflow/core:protos_all_cc_impl", "//third_party/eigen3", "//third_party/fft2d:fft2d_headers", + "@com_google_absl//absl/base", "@com_google_protobuf//:protobuf", "@fft2d", "@gemmlowp", diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index be755a982cc..23a28c4e668 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -28,7 +28,6 @@ limitations under the License. #include #include -#include // NOLINT(build/c++11): only using std::call_once, not mutex. #include #include "tensorflow/core/framework/allocator.h" diff --git a/tensorflow/core/kernels/eigen_contraction_kernel.cc b/tensorflow/core/kernels/eigen_contraction_kernel.cc index ef4c8b82efa..aa6cb4b9cb9 100644 --- a/tensorflow/core/kernels/eigen_contraction_kernel.cc +++ b/tensorflow/core/kernels/eigen_contraction_kernel.cc @@ -17,6 +17,8 @@ limitations under the License. #include // NOLINT(build/c++11) +#include "absl/base/call_once.h" + // We need a pair of compile time and runtime flags to disable compilation of // custom contraction kernels for unsupported architectures (e.g. Android, // iOS, ARM and PPC CPUs, etc...), and to be able to fallback on default Eigen @@ -42,8 +44,8 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE bool UseCustomContractionKernels() { // This subroutine should not be used in GPU. In case it is, a custom kernel // should always be used #if !defined __NVCC__ && !defined __HIP_DEVICE_COMPILE__ - static std::once_flag initialized; - std::call_once(initialized, [&] { + static absl::once_flag initialized; + absl::call_once(initialized, [&] { char* flag = std::getenv("TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL"); if (flag && (strcmp(flag, "false") == 0 || strcmp(flag, "0") == 0)) { use_custom_contraction_kernel = false; diff --git a/tensorflow/core/kernels/example_parsing_ops.cc b/tensorflow/core/kernels/example_parsing_ops.cc index f2fc3a6587d..3412d00136e 100644 --- a/tensorflow/core/kernels/example_parsing_ops.cc +++ b/tensorflow/core/kernels/example_parsing_ops.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/base/call_once.h" #include "tensorflow/core/common_runtime/metrics.h" #include "tensorflow/core/example/example.pb.h" #include "tensorflow/core/example/feature.pb.h" @@ -70,7 +71,7 @@ class ParseExampleOp : public OpKernel { OP_REQUIRES_OK(ctx, GetInputListKeys(ctx, "dense_keys", &dense_keys_t)); OP_REQUIRES_OK(ctx, GetInputListKeys(ctx, "sparse_keys", &sparse_keys_t)); } - std::call_once(flag_, [&dense_keys_t, &sparse_keys_t, &ragged_keys_t]() { + absl::call_once(flag_, [&dense_keys_t, &sparse_keys_t, &ragged_keys_t]() { metrics::RecordParseDenseFeature(dense_keys_t.size()); metrics::RecordParseSparseFeature(sparse_keys_t.size()); metrics::RecordParseRaggedFeature(ragged_keys_t.size()); @@ -284,7 +285,7 @@ class ParseExampleOp : public OpKernel { ParseExampleAttrs attrs_; int op_version_; - std::once_flag flag_; + absl::once_flag flag_; }; REGISTER_KERNEL_BUILDER(Name("ParseExample").Device(DEVICE_CPU), @@ -434,7 +435,7 @@ class ParseSequenceExampleOp : public OpKernel { &feature_list_sparse_keys)); OP_REQUIRES_OK(ctx, ctx->input("feature_list_ragged_keys", &feature_list_ragged_keys)); - std::call_once(flag_, [&]() { + absl::call_once(flag_, [&]() { metrics::RecordParseDenseFeature( context_dense_keys->NumElements() + feature_list_dense_keys->NumElements()); @@ -707,7 +708,7 @@ class ParseSequenceExampleOp : public OpKernel { ParseSequenceExampleAttrs attrs_; int op_version_; - std::once_flag flag_; + absl::once_flag flag_; }; REGISTER_KERNEL_BUILDER(Name("ParseSequenceExample").Device(DEVICE_CPU), @@ -753,7 +754,7 @@ class ParseSingleSequenceExampleOp : public OpKernel { attrs_.num_feature_list_dense); std::vector feature_list_sparse_keys_t( attrs_.num_feature_list_sparse); - std::call_once( + absl::call_once( flag_, [&context_dense_keys_t, &context_sparse_keys_t, &feature_list_dense_keys_t, &feature_list_sparse_keys_t]() { metrics::RecordParseDenseFeature(context_dense_keys_t.size() + @@ -1129,7 +1130,7 @@ class ParseSingleSequenceExampleOp : public OpKernel { protected: ParseSingleSequenceExampleAttrs attrs_; - std::once_flag flag_; + absl::once_flag flag_; }; REGISTER_KERNEL_BUILDER(Name("ParseSingleSequenceExample").Device(DEVICE_CPU), diff --git a/tensorflow/core/kernels/example_parsing_ops_test.cc b/tensorflow/core/kernels/example_parsing_ops_test.cc index bbf36ef4aa6..2fb1a9261f2 100644 --- a/tensorflow/core/kernels/example_parsing_ops_test.cc +++ b/tensorflow/core/kernels/example_parsing_ops_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include +#include "absl/base/call_once.h" #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" #include "tensorflow/core/example/example.pb.h" #include "tensorflow/core/example/feature.pb.h" @@ -82,11 +83,11 @@ template struct ExampleStore { private: static ExampleTensorMap serialized_example; - static std::once_flag flags_init; + static absl::once_flag flags_init; public: static ExampleTensorMap& GetSerializedExample() { - std::call_once(flags_init, [] { + absl::call_once(flags_init, [] { AddExample(&serialized_example, 10, 1, 1); AddExample(&serialized_example, 100, 1, 1); AddExample(&serialized_example, 1000, 1, 1); @@ -133,7 +134,7 @@ struct ExampleStore { template ExampleTensorMap ExampleStore::serialized_example; template -std::once_flag ExampleStore::flags_init; +absl::once_flag ExampleStore::flags_init; template struct ExampleStore; template struct ExampleStore; diff --git a/tensorflow/core/kernels/gpu_utils.cc b/tensorflow/core/kernels/gpu_utils.cc index 5bf211dcdf2..01ae76f809c 100644 --- a/tensorflow/core/kernels/gpu_utils.cc +++ b/tensorflow/core/kernels/gpu_utils.cc @@ -21,6 +21,7 @@ limitations under the License. #include "google/protobuf/any.pb.h" #include "absl/algorithm/container.h" +#include "absl/base/call_once.h" #include "tensorflow/core/platform/logger.h" #include "tensorflow/core/protobuf/autotuning.pb.h" #include "tensorflow/core/protobuf/conv_autotuning.pb.h" @@ -42,8 +43,8 @@ se::DeviceMemoryBase WrapRedzoneBestEffort(se::RedzoneAllocator* rz_allocator, } auto output_rz_or = rz_allocator->AllocateBytes(buffer.size()); if (!output_rz_or.ok()) { - static std::once_flag rz_allocation_failure_logged; - std::call_once(rz_allocation_failure_logged, []() { + static absl::once_flag rz_allocation_failure_logged; + absl::call_once(rz_allocation_failure_logged, []() { LOG(WARNING) << "Failed to allocate memory for convolution redzone " << "checking; skipping this check. This is benign and only " << "means that we won't check cudnn for out-of-bounds reads " @@ -62,8 +63,8 @@ void CheckRedzones(const se::RedzoneAllocator& rz_allocator, se::port::StatusOr rz_status = rz_allocator.CheckRedzones(); if (!rz_status.ok()) { - static std::once_flag failure_logged; - std::call_once(failure_logged, [&]() { + static absl::once_flag failure_logged; + absl::call_once(failure_logged, [&]() { LOG(WARNING) << "Failed to check cudnn convolutions for out-of-bounds " << "reads and writes with an error message: '" << rz_status.status().error_message() diff --git a/tensorflow/core/nccl/BUILD b/tensorflow/core/nccl/BUILD index 487976bb012..b21936167d2 100644 --- a/tensorflow/core/nccl/BUILD +++ b/tensorflow/core/nccl/BUILD @@ -35,6 +35,7 @@ cc_library( "@local_config_rocm//rocm:rccl", "//tensorflow/core:gpu_runtime", ]) + if_cuda_or_rocm([ + "@com_google_absl//absl/base", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "//tensorflow/core:core_cpu", diff --git a/tensorflow/core/nccl/nccl_manager.cc b/tensorflow/core/nccl/nccl_manager.cc index 020cda22d44..8f9b32b832b 100644 --- a/tensorflow/core/nccl/nccl_manager.cc +++ b/tensorflow/core/nccl/nccl_manager.cc @@ -18,6 +18,7 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#include "absl/base/call_once.h" #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/platform/env.h" @@ -237,8 +238,8 @@ NcclManager* NcclManager::instance() { #if TENSORFLOW_USE_ROCM // singleton does not count against total instances // see comment above in Collective constructor concerning ROCm platform - static std::once_flag once; - std::call_once(once, [] { --NcclManager::instance_count; }); + static absl::once_flag once; + absl::call_once(once, [] { --NcclManager::instance_count; }); #endif return instance; } diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 6e5314c7905..6cff37d226c 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -164,6 +164,7 @@ cc_library( ":byte_order", ":logging", ":platform_port", + "@com_google_absl//absl/base", ], ) diff --git a/tensorflow/core/platform/cpu_feature_guard.cc b/tensorflow/core/platform/cpu_feature_guard.cc index 38fc453008f..235dc5756a1 100644 --- a/tensorflow/core/platform/cpu_feature_guard.cc +++ b/tensorflow/core/platform/cpu_feature_guard.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include "absl/base/call_once.h" #include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" @@ -91,12 +92,12 @@ class CPUFeatureGuard { CPUFeatureGuard g_cpu_feature_guard_singleton; -std::once_flag g_cpu_feature_guard_warn_once_flag; +absl::once_flag g_cpu_feature_guard_warn_once_flag; } // namespace void InfoAboutUnusedCPUFeatures() { - std::call_once(g_cpu_feature_guard_warn_once_flag, [] { + absl::call_once(g_cpu_feature_guard_warn_once_flag, [] { string missing_instructions; #if defined(_MSC_VER) && !defined(__clang__) diff --git a/tensorflow/core/platform/cpu_info.cc b/tensorflow/core/platform/cpu_info.cc index e9da3d8e32a..5e1a61f3860 100644 --- a/tensorflow/core/platform/cpu_info.cc +++ b/tensorflow/core/platform/cpu_info.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/platform/cpu_info.h" + +#include "absl/base/call_once.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/platform.h" #include "tensorflow/core/platform/types.h" @@ -297,12 +299,12 @@ class CPUIDInfo { int model_num_; }; -std::once_flag cpuid_once_flag; +absl::once_flag cpuid_once_flag; void InitCPUIDInfo() { // This ensures that CPUIDInfo::Initialize() is called exactly // once regardless of how many threads concurrently call us - std::call_once(cpuid_once_flag, CPUIDInfo::Initialize); + absl::call_once(cpuid_once_flag, CPUIDInfo::Initialize); } #endif // PLATFORM_IS_X86 diff --git a/tensorflow/core/platform/profile_utils/cpu_utils.cc b/tensorflow/core/platform/profile_utils/cpu_utils.cc index c83eeeae309..587c97875a0 100644 --- a/tensorflow/core/platform/profile_utils/cpu_utils.cc +++ b/tensorflow/core/platform/profile_utils/cpu_utils.cc @@ -23,6 +23,7 @@ limitations under the License. #include #endif +#include "absl/base/call_once.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h" @@ -134,8 +135,8 @@ static ICpuUtilsHelper* cpu_utils_helper_instance_ = nullptr; } /* static */ ICpuUtilsHelper& CpuUtils::GetCpuUtilsHelperSingletonInstance() { - static std::once_flag flag; - std::call_once(flag, []() { + static absl::once_flag flag; + absl::call_once(flag, []() { if (cpu_utils_helper_instance_ != nullptr) { LOG(FATAL) << "cpu_utils_helper_instance_ is already instantiated."; } diff --git a/tensorflow/stream_executor/cuda/BUILD b/tensorflow/stream_executor/cuda/BUILD index 08c6686a3c0..fa9bc9c3ee5 100644 --- a/tensorflow/stream_executor/cuda/BUILD +++ b/tensorflow/stream_executor/cuda/BUILD @@ -63,6 +63,7 @@ cc_library( "//tensorflow/stream_executor/platform", ], ) + tf_additional_cuda_platform_deps() + [ + "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", diff --git a/tensorflow/stream_executor/cuda/cuda_platform.cc b/tensorflow/stream_executor/cuda/cuda_platform.cc index 598d6923d18..92170b30129 100644 --- a/tensorflow/stream_executor/cuda/cuda_platform.cc +++ b/tensorflow/stream_executor/cuda/cuda_platform.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/stream_executor/cuda/cuda_platform.h" +#include "absl/base/call_once.h" #include "absl/base/const_init.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" @@ -76,8 +77,8 @@ CudaPlatform::~CudaPlatform() {} void CudaPlatform::InspectNumaNodes() { // To get NUMA node information, we need to create all executors, so we can // examine their device descriptions to see their bus assignments. - static std::once_flag once; - std::call_once(once, [&] { + static absl::once_flag once; + absl::call_once(once, [&] { StreamExecutorConfig config; for (int i = 0; i < VisibleDeviceCount(); i++) { config.ordinal = i; diff --git a/tensorflow/stream_executor/gpu/BUILD b/tensorflow/stream_executor/gpu/BUILD index 70ebfd14bb5..06322a501cc 100644 --- a/tensorflow/stream_executor/gpu/BUILD +++ b/tensorflow/stream_executor/gpu/BUILD @@ -237,6 +237,7 @@ cc_library( ], deps = if_gpu_is_configured([ "asm_compiler", + "@com_google_absl//absl/base", "@com_google_absl//absl/container:fixed_array", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:optional", diff --git a/tensorflow/stream_executor/gpu/redzone_allocator.cc b/tensorflow/stream_executor/gpu/redzone_allocator.cc index 7d21062ecea..ea78938c9ef 100644 --- a/tensorflow/stream_executor/gpu/redzone_allocator.cc +++ b/tensorflow/stream_executor/gpu/redzone_allocator.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/stream_executor/gpu/redzone_allocator.h" +#include "absl/base/call_once.h" #include "absl/container/fixed_array.h" #include "absl/strings/str_format.h" #include "absl/types/optional.h" @@ -307,8 +308,8 @@ port::StatusOr RedzoneAllocator::CheckRedzones() const { if (compiled_ptx_or.ok()) { compiled_ptx = compiled_ptx_or.ValueOrDie(); } else { - static std::once_flag ptxas_not_found_logged; - std::call_once(ptxas_not_found_logged, [&]() { + static absl::once_flag ptxas_not_found_logged; + absl::call_once(ptxas_not_found_logged, [&]() { LOG(WARNING) << compiled_ptx_or.status().ToString() << "\nRelying on driver to perform ptx compilation. " << "\nModify $PATH to customize ptxas location." diff --git a/tensorflow/stream_executor/rocm/BUILD b/tensorflow/stream_executor/rocm/BUILD index 6d80a7501e5..3c39b61c6b2 100644 --- a/tensorflow/stream_executor/rocm/BUILD +++ b/tensorflow/stream_executor/rocm/BUILD @@ -136,6 +136,7 @@ cc_library( ":rocm_driver", ":rocm_gpu_executor", ":rocm_platform_id", + "@com_google_absl//absl/base", "@com_google_absl//absl/memory", "//tensorflow/stream_executor", # buildcleaner: keep "//tensorflow/stream_executor:executor_cache", diff --git a/tensorflow/stream_executor/rocm/rocm_platform.cc b/tensorflow/stream_executor/rocm/rocm_platform.cc index 1dfbe2eec75..beed8b37698 100644 --- a/tensorflow/stream_executor/rocm/rocm_platform.cc +++ b/tensorflow/stream_executor/rocm/rocm_platform.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/stream_executor/rocm/rocm_platform.h" +#include "absl/base/call_once.h" #include "absl/memory/memory.h" #include "absl/strings/str_format.h" #include "tensorflow/stream_executor/gpu/gpu_driver.h" @@ -38,8 +39,8 @@ ROCmPlatform::~ROCmPlatform() {} void ROCmPlatform::InspectNumaNodes() { // To get NUMA node information, we need to create all executors, so we can // examine their device descriptions to see their bus assignments. - std::once_flag once; - std::call_once(once, [&] { + absl::once_flag once; + absl::call_once(once, [&] { StreamExecutorConfig config; for (int i = 0; i < VisibleDeviceCount(); i++) { config.ordinal = i;