From a82ec8a0726adf2a180dc6bb746edc01b738644b Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Wed, 4 Sep 2019 17:33:20 -0700 Subject: [PATCH 01/14] Disable the Grappler memory opt when GlobalJitLevel is on. --- .../grappler/optimizers/meta_optimizer.cc | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index a56b08d2e20..831ce5403be 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -126,6 +126,23 @@ bool AutoMixedPrecisionEnabled(RewriterConfig::Toggle opt_level) { return false; } +// A helper function to decide whether to enable the memory optimizer. +bool MemoryOptimizerEnabled(RewriterConfig::MemOptType mem_opt_type, + OptimizerOptions::GlobalJitLevel global_jit_level) { + // Disable the memory optimizer when GlobalJitLevel is ON as it hurts the + // XLA JIT performance. The (current) XLA clustering can result in loss of + // concurrency between kernel compute and memory copies. As such, it usually + // loses the concurrency needed to hide the latencies of the inserted swap-ins + // and swap-outs and incurs great performance overhead. Remove this check when + // the XLA JIT can better deal with the concurrency. + if (global_jit_level == OptimizerOptions::ON_1 || + global_jit_level == OptimizerOptions::ON_2) { + return false; + } + + return mem_opt_type != RewriterConfig::NO_MEM_OPT; +} + } // namespace #define MK_OPT(NAME, VALUE) \ @@ -216,7 +233,9 @@ Status MetaOptimizer::InitializeOptimizers( optimizers->push_back( MakeUnique(cfg_.auto_mixed_precision())); } - if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { + auto global_jit_level = + config_proto_.graph_options().optimizer_options().global_jit_level(); + if (MemoryOptimizerEnabled(cfg_.memory_optimization(), global_jit_level)) { if (cfg_.memory_optimizer_target_node_name_scope().empty()) { optimizers->push_back( // Use the default target node name prefix "gradients/" From cb3f976f5fe5886857c266f848977e2b5ff2e3da Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Thu, 5 Sep 2019 11:14:15 -0700 Subject: [PATCH 02/14] Check GlobalJitLevel only for DEFAULT_MEM_OPT. This relaxes the disable check and should be a slightly better behavior, as users still have some ways to enable the memory optimizer when they want to. --- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 831ce5403be..262ed722a54 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -129,14 +129,15 @@ bool AutoMixedPrecisionEnabled(RewriterConfig::Toggle opt_level) { // A helper function to decide whether to enable the memory optimizer. bool MemoryOptimizerEnabled(RewriterConfig::MemOptType mem_opt_type, OptimizerOptions::GlobalJitLevel global_jit_level) { - // Disable the memory optimizer when GlobalJitLevel is ON as it hurts the - // XLA JIT performance. The (current) XLA clustering can result in loss of + // Disable the default memory optimizer when GlobalJitLevel is ON as it hurts + // the XLA JIT performance. The (current) XLA clustering can result in loss of // concurrency between kernel compute and memory copies. As such, it usually // loses the concurrency needed to hide the latencies of the inserted swap-ins // and swap-outs and incurs great performance overhead. Remove this check when // the XLA JIT can better deal with the concurrency. - if (global_jit_level == OptimizerOptions::ON_1 || - global_jit_level == OptimizerOptions::ON_2) { + if (mem_opt_type == RewriterConfig::DEFAULT_MEM_OPT && + (global_jit_level == OptimizerOptions::ON_1 || + global_jit_level == OptimizerOptions::ON_2)) { return false; } From 28d774bde1348d73f01d0de9a84ec3f06ae82b59 Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Mon, 9 Sep 2019 14:55:10 -0700 Subject: [PATCH 03/14] Add a XlaConfigProxy class. Callbacks can be registered to this class so that runtime environment flags can be parsed to change configs in the Tensorflow core. A primary use of this is for the Tensorflow core to query the XLA JIT level, which can be configured by some runtime environment flags in addition to ConfigProto. --- tensorflow/compiler/jit/xla_cluster_util.cc | 34 ++++++-- tensorflow/core/BUILD | 1 + .../grappler/optimizers/meta_optimizer.cc | 8 +- tensorflow/core/util/xla_config_proxy.cc | 29 +++++++ tensorflow/core/util/xla_config_proxy.h | 84 +++++++++++++++++++ 5 files changed, 148 insertions(+), 8 deletions(-) create mode 100644 tensorflow/core/util/xla_config_proxy.cc create mode 100644 tensorflow/core/util/xla_config_proxy.h diff --git a/tensorflow/compiler/jit/xla_cluster_util.cc b/tensorflow/compiler/jit/xla_cluster_util.cc index 97737c5ee9c..42033d6bea1 100644 --- a/tensorflow/compiler/jit/xla_cluster_util.cc +++ b/tensorflow/compiler/jit/xla_cluster_util.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/graph/control_flow.h" #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/util/device_name_utils.h" +#include "tensorflow/core/util/xla_config_proxy.h" namespace tensorflow { @@ -224,13 +225,9 @@ struct XlaGlobalJitLevel { }; XlaGlobalJitLevel GetXlaGlobalJitLevel( - const GraphOptimizationPassOptions& options) { + const OptimizerOptions::GlobalJitLevel& jit_level_in_session_opts) { XlaGlobalJitLevel result; - OptimizerOptions::GlobalJitLevel jit_level_in_session_opts = - options.session_options->config.graph_options() - .optimizer_options() - .global_jit_level(); if (jit_level_in_session_opts == OptimizerOptions::DEFAULT) { // To set compilation to be on by default, change the following line. result.single_gpu = result.general = OptimizerOptions::OFF; @@ -289,7 +286,12 @@ bool IsSingleGpuGraph(const Graph& g) { OptimizerOptions::GlobalJitLevel GetGlobalJitLevelForGraph( const GraphOptimizationPassOptions& options) { - XlaGlobalJitLevel xla_global_jit_level = GetXlaGlobalJitLevel(options); + OptimizerOptions::GlobalJitLevel jit_level_in_session_opts = + options.session_options->config.graph_options() + .optimizer_options() + .global_jit_level(); + XlaGlobalJitLevel xla_global_jit_level = + GetXlaGlobalJitLevel(jit_level_in_session_opts); if (xla_global_jit_level.single_gpu == xla_global_jit_level.general) { VLOG(4) << "GetGlobalJitLevelForGraph returning " << xla_global_jit_level.single_gpu; @@ -386,4 +388,24 @@ XlaAutoClusteringSummary GetXlaAutoClusteringSummary(const Graph& graph) { return result; } + +// Implement and register a setter for OptimizerOptions::GlobalJitLevel, so that +// the Tensorflow core can query its value. +bool GlobalJitLevelSetter(OptimizerOptions::GlobalJitLevel& global_jit_level) { + XlaGlobalJitLevel xla_global_jit_level = + GetXlaGlobalJitLevel(global_jit_level); + // Take the general flag to avoid the dependency on Tensorflow::Graph. + OptimizerOptions::GlobalJitLevel new_jit_level = xla_global_jit_level.general; + + if (new_jit_level != global_jit_level) { + global_jit_level = new_jit_level; + return true; + } else { + return false; + } +} + +REGISTER_XLA_CONFIG_SETTER(OptimizerOptions::GlobalJitLevel, + GlobalJitLevelSetter); + } // namespace tensorflow diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d1ab6df7a81..2ff04c2cf2b 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2805,6 +2805,7 @@ FRAMEWORK_INTERNAL_PUBLIC_HEADERS = [ "util/presized_cuckoo_map.h", "util/tensor_slice_set.h", "util/tensor_slice_util.h", + "util/xla_config_proxy.h", ] tf_cuda_library( diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 262ed722a54..a80fb09cf0a 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -51,6 +51,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/util/dump_graph.h" #include "tensorflow/core/util/ptr_util.h" +#include "tensorflow/core/util/xla_config_proxy.h" namespace tensorflow { namespace grappler { @@ -127,14 +128,17 @@ bool AutoMixedPrecisionEnabled(RewriterConfig::Toggle opt_level) { } // A helper function to decide whether to enable the memory optimizer. -bool MemoryOptimizerEnabled(RewriterConfig::MemOptType mem_opt_type, - OptimizerOptions::GlobalJitLevel global_jit_level) { +bool MemoryOptimizerEnabled( + RewriterConfig::MemOptType mem_opt_type, + OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { // Disable the default memory optimizer when GlobalJitLevel is ON as it hurts // the XLA JIT performance. The (current) XLA clustering can result in loss of // concurrency between kernel compute and memory copies. As such, it usually // loses the concurrency needed to hide the latencies of the inserted swap-ins // and swap-outs and incurs great performance overhead. Remove this check when // the XLA JIT can better deal with the concurrency. + OptimizerOptions::GlobalJitLevel global_jit_level = + XlaConfigProxy::GetGlobalJitLevel(jit_level_in_session_opts); if (mem_opt_type == RewriterConfig::DEFAULT_MEM_OPT && (global_jit_level == OptimizerOptions::ON_1 || global_jit_level == OptimizerOptions::ON_2)) { diff --git a/tensorflow/core/util/xla_config_proxy.cc b/tensorflow/core/util/xla_config_proxy.cc new file mode 100644 index 00000000000..796b4b85bf1 --- /dev/null +++ b/tensorflow/core/util/xla_config_proxy.cc @@ -0,0 +1,29 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/util/xla_config_proxy.h" + +namespace tensorflow { + +/*static*/ +OptimizerOptions::GlobalJitLevel XlaConfigProxy::GetGlobalJitLevel( + OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { + OptimizerOptions::GlobalJitLevel global_jit_level = jit_level_in_session_opts; + ConfigSetterRegistry::Global()->Update( + jit_level_in_session_opts); + return global_jit_level; +} + +} // namespace tensorflow diff --git a/tensorflow/core/util/xla_config_proxy.h b/tensorflow/core/util/xla_config_proxy.h new file mode 100644 index 00000000000..3cd5146f3c2 --- /dev/null +++ b/tensorflow/core/util/xla_config_proxy.h @@ -0,0 +1,84 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H +#define TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H + +#include +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/protobuf/config.pb.h" + +namespace tensorflow { + +// A proxy class of XLA config. +class XlaConfigProxy { + public: + // This class provides a means for updating configs (mainly ConfigProto) + // according to runtime environment variable flags. As the flags may be + // maintained outside the Tensorflow core and they may or may not exist + // depending on the build configuration, here implements a registration + // mechanism to register their config setters for query. + template + class ConfigSetterRegistry { + public: + static ConfigSetterRegistry* Global() { + static ConfigSetterRegistry* config_registry = new ConfigSetterRegistry; + return config_registry; + } + + // Register a setter for ConfigType. + void Register(std::function setter) { + CHECK(!a_setter_); + a_setter_ = std::move(setter); + } + + // Invoke the registered setter to update the ConfigType value. Return true + // if the value is updated. + bool Update(ConfigType& value) { + if (!a_setter_) { + return true; + } + return a_setter_(value); + } + + private: + ConfigSetterRegistry() = default; + ConfigSetterRegistry(const ConfigSetterRegistry&) = delete; + ConfigSetterRegistry& operator=(const ConfigSetterRegistry&) = delete; + + private: + std::function a_setter_; + }; + + template + class ConfigSetterRegistration { + public: + ConfigSetterRegistration(std::function setter) { + ConfigSetterRegistry::Global()->Register(std::move(setter)); + } + }; + + public: + static OptimizerOptions::GlobalJitLevel GetGlobalJitLevel( + OptimizerOptions::GlobalJitLevel jit_level_in_session_opts); +}; + +#define REGISTER_XLA_CONFIG_SETTER(ConfigType, setter) \ + static ::tensorflow::XlaConfigProxy::ConfigSetterRegistration \ + unique_xla_config_setter_ctr_##__COUNTER__(setter) + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H From 348488b58902c7b71ae70bbba0cc805fd2a65092 Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Mon, 9 Sep 2019 16:17:49 -0700 Subject: [PATCH 04/14] Add a unittest for XlaConfigProxy. --- tensorflow/core/BUILD | 1 + tensorflow/core/util/xla_config_proxy.h | 1 + tensorflow/core/util/xla_config_proxy_test.cc | 61 +++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 tensorflow/core/util/xla_config_proxy_test.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 2ff04c2cf2b..b1baac5c4ef 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -4141,6 +4141,7 @@ tf_cc_tests( "util/tensor_slice_util_test.cc", "util/tensor_slice_writer_test.cc", "util/work_sharder_test.cc", + "util/xla_config_proxy_test.cc", ], linkopts = select({ "//tensorflow:macos": ["-headerpad_max_install_names"], diff --git a/tensorflow/core/util/xla_config_proxy.h b/tensorflow/core/util/xla_config_proxy.h index 3cd5146f3c2..d0caf2b8660 100644 --- a/tensorflow/core/util/xla_config_proxy.h +++ b/tensorflow/core/util/xla_config_proxy.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H #include +#include "tensorflow/core/platform/default/logging.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/config.pb.h" diff --git a/tensorflow/core/util/xla_config_proxy_test.cc b/tensorflow/core/util/xla_config_proxy_test.cc new file mode 100644 index 00000000000..848cff7e081 --- /dev/null +++ b/tensorflow/core/util/xla_config_proxy_test.cc @@ -0,0 +1,61 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/util/xla_config_proxy.h" + +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +namespace { + +enum ConfigA { A_ON, A_OFF }; + +enum ConfigB { B_ON, B_OFF }; + +}; // anonymous + +TEST(XlaConfigProxyTest, RegistrationsAndQueries) { + ConfigA config_a = A_ON; + ConfigB config_b = B_ON; + + XlaConfigProxy::ConfigSetterRegistry::Global()->Update(config_a); + XlaConfigProxy::ConfigSetterRegistry::Global()->Update(config_b); + + // Expect original values. + EXPECT_EQ(A_ON, config_a); + EXPECT_EQ(B_ON, config_b); + + // Register callbacks. + XlaConfigProxy::ConfigSetterRegistration + contrig_setter_registration_a([](ConfigA& config_a) -> bool { + config_a = A_OFF; + return true; + }); + + XlaConfigProxy::ConfigSetterRegistration + contrig_setter_registration_b([](ConfigB& config_b) -> bool { + config_b = B_OFF; + return true; + }); + + // Verify that callbacks work. + XlaConfigProxy::ConfigSetterRegistry::Global()->Update(config_a); + XlaConfigProxy::ConfigSetterRegistry::Global()->Update(config_b); + EXPECT_EQ(A_OFF, config_a); + EXPECT_EQ(B_OFF, config_b); +} + +} // namespace tensorflow From b77327e29523ef011ac9d8c3f73526245efd15f0 Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Mon, 9 Sep 2019 16:54:29 -0700 Subject: [PATCH 05/14] Fix a typo in XlaConfigProxy. --- tensorflow/core/util/xla_config_proxy.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/util/xla_config_proxy.cc b/tensorflow/core/util/xla_config_proxy.cc index 796b4b85bf1..952ddb2f97c 100644 --- a/tensorflow/core/util/xla_config_proxy.cc +++ b/tensorflow/core/util/xla_config_proxy.cc @@ -22,7 +22,7 @@ OptimizerOptions::GlobalJitLevel XlaConfigProxy::GetGlobalJitLevel( OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { OptimizerOptions::GlobalJitLevel global_jit_level = jit_level_in_session_opts; ConfigSetterRegistry::Global()->Update( - jit_level_in_session_opts); + global_jit_level); return global_jit_level; } From 21d983f7617d2847a06ec387ae72df57a9438809 Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Tue, 10 Sep 2019 21:42:35 -0700 Subject: [PATCH 06/14] Fix typo. --- tensorflow/core/util/xla_config_proxy_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/util/xla_config_proxy_test.cc b/tensorflow/core/util/xla_config_proxy_test.cc index 848cff7e081..e4ed8bcbb2b 100644 --- a/tensorflow/core/util/xla_config_proxy_test.cc +++ b/tensorflow/core/util/xla_config_proxy_test.cc @@ -40,13 +40,13 @@ TEST(XlaConfigProxyTest, RegistrationsAndQueries) { // Register callbacks. XlaConfigProxy::ConfigSetterRegistration - contrig_setter_registration_a([](ConfigA& config_a) -> bool { + config_setter_registration_a([](ConfigA& config_a) -> bool { config_a = A_OFF; return true; }); XlaConfigProxy::ConfigSetterRegistration - contrig_setter_registration_b([](ConfigB& config_b) -> bool { + config_setter_registration_b([](ConfigB& config_b) -> bool { config_b = B_OFF; return true; }); From 88572baddc066ac0d3c2f46b070c490ff89ac867 Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Wed, 11 Sep 2019 11:13:14 -0700 Subject: [PATCH 07/14] Simplify XlaConfigProxy by removing the utililies. Also, rename XlaConfigProxy to XlaConfigRegistry. --- tensorflow/compiler/jit/xla_cluster_util.cc | 23 ++--- tensorflow/core/BUILD | 3 +- .../grappler/optimizers/meta_optimizer.cc | 4 +- tensorflow/core/util/xla_config_proxy.h | 85 ------------------- tensorflow/core/util/xla_config_proxy_test.cc | 61 ------------- ...config_proxy.cc => xla_config_registry.cc} | 11 +-- tensorflow/core/util/xla_config_registry.h | 59 +++++++++++++ 7 files changed, 74 insertions(+), 172 deletions(-) delete mode 100644 tensorflow/core/util/xla_config_proxy.h delete mode 100644 tensorflow/core/util/xla_config_proxy_test.cc rename tensorflow/core/util/{xla_config_proxy.cc => xla_config_registry.cc} (64%) create mode 100644 tensorflow/core/util/xla_config_registry.h diff --git a/tensorflow/compiler/jit/xla_cluster_util.cc b/tensorflow/compiler/jit/xla_cluster_util.cc index 42033d6bea1..6a0259f7b0e 100644 --- a/tensorflow/compiler/jit/xla_cluster_util.cc +++ b/tensorflow/compiler/jit/xla_cluster_util.cc @@ -31,7 +31,7 @@ limitations under the License. #include "tensorflow/core/graph/control_flow.h" #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/util/device_name_utils.h" -#include "tensorflow/core/util/xla_config_proxy.h" +#include "tensorflow/core/util/xla_config_registry.h" namespace tensorflow { @@ -389,23 +389,18 @@ XlaAutoClusteringSummary GetXlaAutoClusteringSummary(const Graph& graph) { return result; } -// Implement and register a setter for OptimizerOptions::GlobalJitLevel, so that -// the Tensorflow core can query its value. -bool GlobalJitLevelSetter(OptimizerOptions::GlobalJitLevel& global_jit_level) { +// Implement and register a callback for querying GlobalJitLevel. +OptimizerOptions::GlobalJitLevel GlobalJitLevelGetter( + OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { XlaGlobalJitLevel xla_global_jit_level = - GetXlaGlobalJitLevel(global_jit_level); + GetXlaGlobalJitLevel(jit_level_in_session_opts); // Take the general flag to avoid the dependency on Tensorflow::Graph. - OptimizerOptions::GlobalJitLevel new_jit_level = xla_global_jit_level.general; + OptimizerOptions::GlobalJitLevel global_jit_level = + xla_global_jit_level.general; - if (new_jit_level != global_jit_level) { - global_jit_level = new_jit_level; - return true; - } else { - return false; - } + return global_jit_level; } -REGISTER_XLA_CONFIG_SETTER(OptimizerOptions::GlobalJitLevel, - GlobalJitLevelSetter); +REGISTER_XLA_CONFIG_GETTER(GlobalJitLevelGetter); } // namespace tensorflow diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index b1baac5c4ef..75f6d942dba 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2805,7 +2805,7 @@ FRAMEWORK_INTERNAL_PUBLIC_HEADERS = [ "util/presized_cuckoo_map.h", "util/tensor_slice_set.h", "util/tensor_slice_util.h", - "util/xla_config_proxy.h", + "util/xla_config_registry.h", ] tf_cuda_library( @@ -4141,7 +4141,6 @@ tf_cc_tests( "util/tensor_slice_util_test.cc", "util/tensor_slice_writer_test.cc", "util/work_sharder_test.cc", - "util/xla_config_proxy_test.cc", ], linkopts = select({ "//tensorflow:macos": ["-headerpad_max_install_names"], diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index a80fb09cf0a..992c8e1e64c 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -51,7 +51,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/util/dump_graph.h" #include "tensorflow/core/util/ptr_util.h" -#include "tensorflow/core/util/xla_config_proxy.h" +#include "tensorflow/core/util/xla_config_registry.h" namespace tensorflow { namespace grappler { @@ -138,7 +138,7 @@ bool MemoryOptimizerEnabled( // and swap-outs and incurs great performance overhead. Remove this check when // the XLA JIT can better deal with the concurrency. OptimizerOptions::GlobalJitLevel global_jit_level = - XlaConfigProxy::GetGlobalJitLevel(jit_level_in_session_opts); + XlaConfigRegistry::GetGlobalJitLevel(jit_level_in_session_opts); if (mem_opt_type == RewriterConfig::DEFAULT_MEM_OPT && (global_jit_level == OptimizerOptions::ON_1 || global_jit_level == OptimizerOptions::ON_2)) { diff --git a/tensorflow/core/util/xla_config_proxy.h b/tensorflow/core/util/xla_config_proxy.h deleted file mode 100644 index d0caf2b8660..00000000000 --- a/tensorflow/core/util/xla_config_proxy.h +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H -#define TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H - -#include -#include "tensorflow/core/platform/default/logging.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/protobuf/config.pb.h" - -namespace tensorflow { - -// A proxy class of XLA config. -class XlaConfigProxy { - public: - // This class provides a means for updating configs (mainly ConfigProto) - // according to runtime environment variable flags. As the flags may be - // maintained outside the Tensorflow core and they may or may not exist - // depending on the build configuration, here implements a registration - // mechanism to register their config setters for query. - template - class ConfigSetterRegistry { - public: - static ConfigSetterRegistry* Global() { - static ConfigSetterRegistry* config_registry = new ConfigSetterRegistry; - return config_registry; - } - - // Register a setter for ConfigType. - void Register(std::function setter) { - CHECK(!a_setter_); - a_setter_ = std::move(setter); - } - - // Invoke the registered setter to update the ConfigType value. Return true - // if the value is updated. - bool Update(ConfigType& value) { - if (!a_setter_) { - return true; - } - return a_setter_(value); - } - - private: - ConfigSetterRegistry() = default; - ConfigSetterRegistry(const ConfigSetterRegistry&) = delete; - ConfigSetterRegistry& operator=(const ConfigSetterRegistry&) = delete; - - private: - std::function a_setter_; - }; - - template - class ConfigSetterRegistration { - public: - ConfigSetterRegistration(std::function setter) { - ConfigSetterRegistry::Global()->Register(std::move(setter)); - } - }; - - public: - static OptimizerOptions::GlobalJitLevel GetGlobalJitLevel( - OptimizerOptions::GlobalJitLevel jit_level_in_session_opts); -}; - -#define REGISTER_XLA_CONFIG_SETTER(ConfigType, setter) \ - static ::tensorflow::XlaConfigProxy::ConfigSetterRegistration \ - unique_xla_config_setter_ctr_##__COUNTER__(setter) - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H diff --git a/tensorflow/core/util/xla_config_proxy_test.cc b/tensorflow/core/util/xla_config_proxy_test.cc deleted file mode 100644 index e4ed8bcbb2b..00000000000 --- a/tensorflow/core/util/xla_config_proxy_test.cc +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/util/xla_config_proxy.h" - -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { - -namespace { - -enum ConfigA { A_ON, A_OFF }; - -enum ConfigB { B_ON, B_OFF }; - -}; // anonymous - -TEST(XlaConfigProxyTest, RegistrationsAndQueries) { - ConfigA config_a = A_ON; - ConfigB config_b = B_ON; - - XlaConfigProxy::ConfigSetterRegistry::Global()->Update(config_a); - XlaConfigProxy::ConfigSetterRegistry::Global()->Update(config_b); - - // Expect original values. - EXPECT_EQ(A_ON, config_a); - EXPECT_EQ(B_ON, config_b); - - // Register callbacks. - XlaConfigProxy::ConfigSetterRegistration - config_setter_registration_a([](ConfigA& config_a) -> bool { - config_a = A_OFF; - return true; - }); - - XlaConfigProxy::ConfigSetterRegistration - config_setter_registration_b([](ConfigB& config_b) -> bool { - config_b = B_OFF; - return true; - }); - - // Verify that callbacks work. - XlaConfigProxy::ConfigSetterRegistry::Global()->Update(config_a); - XlaConfigProxy::ConfigSetterRegistry::Global()->Update(config_b); - EXPECT_EQ(A_OFF, config_a); - EXPECT_EQ(B_OFF, config_b); -} - -} // namespace tensorflow diff --git a/tensorflow/core/util/xla_config_proxy.cc b/tensorflow/core/util/xla_config_registry.cc similarity index 64% rename from tensorflow/core/util/xla_config_proxy.cc rename to tensorflow/core/util/xla_config_registry.cc index 952ddb2f97c..aa1a8bf127c 100644 --- a/tensorflow/core/util/xla_config_proxy.cc +++ b/tensorflow/core/util/xla_config_registry.cc @@ -13,17 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/util/xla_config_proxy.h" +#include "tensorflow/core/util/xla_config_registry.h" namespace tensorflow { /*static*/ -OptimizerOptions::GlobalJitLevel XlaConfigProxy::GetGlobalJitLevel( - OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { - OptimizerOptions::GlobalJitLevel global_jit_level = jit_level_in_session_opts; - ConfigSetterRegistry::Global()->Update( - global_jit_level); - return global_jit_level; -} +XlaConfigRegistry::global_jit_level_getter_t + XlaConfigRegistry::global_jit_level_getter_; } // namespace tensorflow diff --git a/tensorflow/core/util/xla_config_registry.h b/tensorflow/core/util/xla_config_registry.h new file mode 100644 index 00000000000..2835cee6c06 --- /dev/null +++ b/tensorflow/core/util/xla_config_registry.h @@ -0,0 +1,59 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H +#define TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H + +#include +#include "tensorflow/core/platform/default/logging.h" +#include "tensorflow/core/protobuf/config.pb.h" + +namespace tensorflow { + +// A registry class where XLA can register callbacks for Tensorflow to query +// its status. +class XlaConfigRegistry { + public: + // Input is jit_level in session config, and return is the config from + // XLA, reflecting the effect of the environment variable flags. + typedef std::function + global_jit_level_getter_t; + + static bool Register(XlaConfigRegistry::global_jit_level_getter_t getter) { + CHECK(!global_jit_level_getter_); + global_jit_level_getter_ = std::move(getter); + return true; + } + + static OptimizerOptions::GlobalJitLevel GetGlobalJitLevel( + OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { + if (!global_jit_level_getter_) { + return jit_level_in_session_opts; + } + return global_jit_level_getter_(jit_level_in_session_opts); + } + + private: + static global_jit_level_getter_t global_jit_level_getter_; +}; + +#define REGISTER_XLA_CONFIG_GETTER(getter) \ + static bool registered_##__COUNTER__ = \ + ::tensorflow::XlaConfigRegistry::Register(getter) + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H From 11e4bbee8d780a71b431c2fca3b15a2162afa5d1 Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Wed, 11 Sep 2019 12:44:11 -0700 Subject: [PATCH 08/14] Minor revision of header macros in xla_config_registry.h. --- tensorflow/core/util/xla_config_registry.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/util/xla_config_registry.h b/tensorflow/core/util/xla_config_registry.h index 2835cee6c06..17c1b27b10a 100644 --- a/tensorflow/core/util/xla_config_registry.h +++ b/tensorflow/core/util/xla_config_registry.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H -#define TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H +#ifndef TENSORFLOW_CORE_UTIL_XLA_CONFIG_REGISTRY_H +#define TENSORFLOW_CORE_UTIL_XLA_CONFIG_REGISTRY_H #include #include "tensorflow/core/platform/default/logging.h" @@ -56,4 +56,4 @@ class XlaConfigRegistry { } // namespace tensorflow -#endif // TENSORFLOW_CORE_UTIL_XLA_CONFIG_PROXY_H +#endif // TENSORFLOW_CORE_UTIL_XLA_CONFIG_REGISTRY_H From 9abe61a0bb18fe77a6cbf24fec4e587867f4503d Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Wed, 11 Sep 2019 22:54:14 -0700 Subject: [PATCH 09/14] Revise XlaConfigRegistry. 1. Return XlaGlobalJitLevel instead of GlobalJitLevel in GetGlobalJitLevel(). 2. Turn off the memory optimizer only when JIT is on for both single-gpu and multi-gpu. This is a more conservative behavior. 3. Naming style conformance. --- tensorflow/compiler/jit/xla_cluster_util.cc | 20 +++-------------- .../grappler/optimizers/meta_optimizer.cc | 19 ++++++++++++---- tensorflow/core/util/xla_config_registry.cc | 2 +- tensorflow/core/util/xla_config_registry.h | 22 +++++++++++-------- 4 files changed, 32 insertions(+), 31 deletions(-) diff --git a/tensorflow/compiler/jit/xla_cluster_util.cc b/tensorflow/compiler/jit/xla_cluster_util.cc index 6a0259f7b0e..73e0ccf63bb 100644 --- a/tensorflow/compiler/jit/xla_cluster_util.cc +++ b/tensorflow/compiler/jit/xla_cluster_util.cc @@ -219,10 +219,7 @@ void RemoveFromXlaCluster(NodeDef* node_def) { void RemoveFromXlaCluster(Node* node) { node->ClearAttr(kXlaClusterAttr); } namespace { -struct XlaGlobalJitLevel { - OptimizerOptions::GlobalJitLevel single_gpu; - OptimizerOptions::GlobalJitLevel general; -}; +typedef XlaConfigRegistry::XlaGlobalJitLevel XlaGlobalJitLevel; XlaGlobalJitLevel GetXlaGlobalJitLevel( const OptimizerOptions::GlobalJitLevel& jit_level_in_session_opts) { @@ -389,18 +386,7 @@ XlaAutoClusteringSummary GetXlaAutoClusteringSummary(const Graph& graph) { return result; } -// Implement and register a callback for querying GlobalJitLevel. -OptimizerOptions::GlobalJitLevel GlobalJitLevelGetter( - OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { - XlaGlobalJitLevel xla_global_jit_level = - GetXlaGlobalJitLevel(jit_level_in_session_opts); - // Take the general flag to avoid the dependency on Tensorflow::Graph. - OptimizerOptions::GlobalJitLevel global_jit_level = - xla_global_jit_level.general; - - return global_jit_level; -} - -REGISTER_XLA_CONFIG_GETTER(GlobalJitLevelGetter); +// Register a callback for querying XlaGlobalJitLevel. +REGISTER_XLA_CONFIG_GETTER(GetXlaGlobalJitLevel); } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 992c8e1e64c..4a071e2e6e2 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -127,6 +127,20 @@ bool AutoMixedPrecisionEnabled(RewriterConfig::Toggle opt_level) { return false; } +bool IsXlaGlobalJitOn( + const OptimizerOptions::GlobalJitLevel& jit_level_in_session_opts) { + XlaConfigRegistry::XlaGlobalJitLevel xla_global_jit_level = + XlaConfigRegistry::GetGlobalJitLevel(jit_level_in_session_opts); + // Return true only if XLA JIT is ON for both single-gpu and multi-gpu + // graphs. This is a conservative approach that turns off the memory optimizer + // when we are sure that all graphs will be processed by XLA JIT. + bool is_on = (xla_global_jit_level.single_gpu == OptimizerOptions::ON_1 || + xla_global_jit_level.single_gpu == OptimizerOptions::ON_2) && + (xla_global_jit_level.general == OptimizerOptions::ON_1 || + xla_global_jit_level.general == OptimizerOptions::ON_2); + return is_on; +} + // A helper function to decide whether to enable the memory optimizer. bool MemoryOptimizerEnabled( RewriterConfig::MemOptType mem_opt_type, @@ -137,11 +151,8 @@ bool MemoryOptimizerEnabled( // loses the concurrency needed to hide the latencies of the inserted swap-ins // and swap-outs and incurs great performance overhead. Remove this check when // the XLA JIT can better deal with the concurrency. - OptimizerOptions::GlobalJitLevel global_jit_level = - XlaConfigRegistry::GetGlobalJitLevel(jit_level_in_session_opts); if (mem_opt_type == RewriterConfig::DEFAULT_MEM_OPT && - (global_jit_level == OptimizerOptions::ON_1 || - global_jit_level == OptimizerOptions::ON_2)) { + IsXlaGlobalJitOn(jit_level_in_session_opts)) { return false; } diff --git a/tensorflow/core/util/xla_config_registry.cc b/tensorflow/core/util/xla_config_registry.cc index aa1a8bf127c..1297265a89d 100644 --- a/tensorflow/core/util/xla_config_registry.cc +++ b/tensorflow/core/util/xla_config_registry.cc @@ -18,7 +18,7 @@ limitations under the License. namespace tensorflow { /*static*/ -XlaConfigRegistry::global_jit_level_getter_t +XlaConfigRegistry::GlobalJitLevelGetterTy XlaConfigRegistry::global_jit_level_getter_; } // namespace tensorflow diff --git a/tensorflow/core/util/xla_config_registry.h b/tensorflow/core/util/xla_config_registry.h index 17c1b27b10a..99bcff1e69c 100644 --- a/tensorflow/core/util/xla_config_registry.h +++ b/tensorflow/core/util/xla_config_registry.h @@ -26,33 +26,37 @@ namespace tensorflow { // its status. class XlaConfigRegistry { public: + struct XlaGlobalJitLevel { + OptimizerOptions::GlobalJitLevel single_gpu; + OptimizerOptions::GlobalJitLevel general; + }; + // Input is jit_level in session config, and return is the config from // XLA, reflecting the effect of the environment variable flags. - typedef std::function - global_jit_level_getter_t; + typedef std::function + GlobalJitLevelGetterTy; - static bool Register(XlaConfigRegistry::global_jit_level_getter_t getter) { + static void Register(XlaConfigRegistry::GlobalJitLevelGetterTy getter) { CHECK(!global_jit_level_getter_); global_jit_level_getter_ = std::move(getter); - return true; } - static OptimizerOptions::GlobalJitLevel GetGlobalJitLevel( + static XlaGlobalJitLevel GetGlobalJitLevel( OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { if (!global_jit_level_getter_) { - return jit_level_in_session_opts; + return {jit_level_in_session_opts, jit_level_in_session_opts}; } return global_jit_level_getter_(jit_level_in_session_opts); } private: - static global_jit_level_getter_t global_jit_level_getter_; + static GlobalJitLevelGetterTy global_jit_level_getter_; }; #define REGISTER_XLA_CONFIG_GETTER(getter) \ static bool registered_##__COUNTER__ = \ - ::tensorflow::XlaConfigRegistry::Register(getter) + (::tensorflow::XlaConfigRegistry::Register(getter), true) } // namespace tensorflow From 98c9fbdd1b2e6cd74bf60dfadcde4970ad13d106 Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Wed, 11 Sep 2019 23:26:12 -0700 Subject: [PATCH 10/14] Minor comment polishing. --- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 4a071e2e6e2..c01818e370d 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -145,8 +145,8 @@ bool IsXlaGlobalJitOn( bool MemoryOptimizerEnabled( RewriterConfig::MemOptType mem_opt_type, OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { - // Disable the default memory optimizer when GlobalJitLevel is ON as it hurts - // the XLA JIT performance. The (current) XLA clustering can result in loss of + // Disable the default memory optimizer when XLA JIT is ON as it hurts the + // XLA JIT performance. The (current) XLA clustering can result in loss of // concurrency between kernel compute and memory copies. As such, it usually // loses the concurrency needed to hide the latencies of the inserted swap-ins // and swap-outs and incurs great performance overhead. Remove this check when From 2d229cd7f50746a3747f2b7bba36c7bfe9f5e1c3 Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Thu, 12 Sep 2019 11:04:07 -0700 Subject: [PATCH 11/14] Address review comments about XlaConfigRegistry. 1. Fix an oversight that the macro does not get expanded. 2. Add a mutex to the Register() method. 3. Comment polishing. --- tensorflow/core/util/xla_config_registry.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/util/xla_config_registry.h b/tensorflow/core/util/xla_config_registry.h index 99bcff1e69c..7d1cb50225b 100644 --- a/tensorflow/core/util/xla_config_registry.h +++ b/tensorflow/core/util/xla_config_registry.h @@ -18,6 +18,7 @@ limitations under the License. #include #include "tensorflow/core/platform/default/logging.h" +#include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { @@ -26,18 +27,22 @@ namespace tensorflow { // its status. class XlaConfigRegistry { public: + // XlaGlobalJitLevel is used by XLA to expose its JIT level for processing + // single gpu and general (multi-gpu) graphs. struct XlaGlobalJitLevel { OptimizerOptions::GlobalJitLevel single_gpu; OptimizerOptions::GlobalJitLevel general; }; - // Input is jit_level in session config, and return is the config from - // XLA, reflecting the effect of the environment variable flags. + // Input is the jit_level in session config, and return value is the jit_level + // from XLA, reflecting the effect of the environment variable flags. typedef std::function GlobalJitLevelGetterTy; static void Register(XlaConfigRegistry::GlobalJitLevelGetterTy getter) { + static mutex mu(LINKER_INITIALIZED); + mutex_lock l(mu); CHECK(!global_jit_level_getter_); global_jit_level_getter_ = std::move(getter); } @@ -55,7 +60,13 @@ class XlaConfigRegistry { }; #define REGISTER_XLA_CONFIG_GETTER(getter) \ - static bool registered_##__COUNTER__ = \ + REGISTER_XLA_CONFIG_GETTER_UNIQ_HELPER(__COUNTER__, getter) + +#define REGISTER_XLA_CONFIG_GETTER_UNIQ_HELPER(ctr, getter) \ + REGISTER_XLA_CONFIG_GETTER_UNIQ(ctr, getter) + +#define REGISTER_XLA_CONFIG_GETTER_UNIQ(ctr, getter) \ + static bool xla_config_registry_registration_##ctr = \ (::tensorflow::XlaConfigRegistry::Register(getter), true) } // namespace tensorflow From a908920e93e194f4789df1c4e174d166ec605353 Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Thu, 12 Sep 2019 11:45:57 -0700 Subject: [PATCH 12/14] Guard both getter and setter in XlaConfigRegistry. --- tensorflow/core/util/xla_config_registry.cc | 3 +++ tensorflow/core/util/xla_config_registry.h | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/util/xla_config_registry.cc b/tensorflow/core/util/xla_config_registry.cc index 1297265a89d..3028845895c 100644 --- a/tensorflow/core/util/xla_config_registry.cc +++ b/tensorflow/core/util/xla_config_registry.cc @@ -17,6 +17,9 @@ limitations under the License. namespace tensorflow { +/*static*/ +mutex XlaConfigRegistry::mu_(LINKER_INITIALIZED); + /*static*/ XlaConfigRegistry::GlobalJitLevelGetterTy XlaConfigRegistry::global_jit_level_getter_; diff --git a/tensorflow/core/util/xla_config_registry.h b/tensorflow/core/util/xla_config_registry.h index 7d1cb50225b..b947396e9c1 100644 --- a/tensorflow/core/util/xla_config_registry.h +++ b/tensorflow/core/util/xla_config_registry.h @@ -41,14 +41,14 @@ class XlaConfigRegistry { GlobalJitLevelGetterTy; static void Register(XlaConfigRegistry::GlobalJitLevelGetterTy getter) { - static mutex mu(LINKER_INITIALIZED); - mutex_lock l(mu); + mutex_lock l(mu_); CHECK(!global_jit_level_getter_); global_jit_level_getter_ = std::move(getter); } static XlaGlobalJitLevel GetGlobalJitLevel( OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { + mutex_lock l(mu_); if (!global_jit_level_getter_) { return {jit_level_in_session_opts, jit_level_in_session_opts}; } @@ -56,7 +56,8 @@ class XlaConfigRegistry { } private: - static GlobalJitLevelGetterTy global_jit_level_getter_; + static mutex mu_; + static GlobalJitLevelGetterTy global_jit_level_getter_ GUARDED_BY(mu_); }; #define REGISTER_XLA_CONFIG_GETTER(getter) \ From 1d997791dcca369ebf1d823c3133d8bad5cfadb5 Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Thu, 12 Sep 2019 15:52:11 -0700 Subject: [PATCH 13/14] Use `new` instead of static object to avoid destructor. By avoiding destructor, it should work better with multi-threaded program. --- tensorflow/compiler/jit/xla_cluster_util.cc | 2 +- .../grappler/optimizers/meta_optimizer.cc | 4 +- tensorflow/core/util/xla_config_registry.cc | 36 +++++++++-- tensorflow/core/util/xla_config_registry.h | 61 ++++++++----------- 4 files changed, 58 insertions(+), 45 deletions(-) diff --git a/tensorflow/compiler/jit/xla_cluster_util.cc b/tensorflow/compiler/jit/xla_cluster_util.cc index 73e0ccf63bb..3863bcf3131 100644 --- a/tensorflow/compiler/jit/xla_cluster_util.cc +++ b/tensorflow/compiler/jit/xla_cluster_util.cc @@ -219,7 +219,7 @@ void RemoveFromXlaCluster(NodeDef* node_def) { void RemoveFromXlaCluster(Node* node) { node->ClearAttr(kXlaClusterAttr); } namespace { -typedef XlaConfigRegistry::XlaGlobalJitLevel XlaGlobalJitLevel; +typedef xla_config_registry::XlaGlobalJitLevel XlaGlobalJitLevel; XlaGlobalJitLevel GetXlaGlobalJitLevel( const OptimizerOptions::GlobalJitLevel& jit_level_in_session_opts) { diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index c01818e370d..f0ded3b635e 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -129,8 +129,8 @@ bool AutoMixedPrecisionEnabled(RewriterConfig::Toggle opt_level) { bool IsXlaGlobalJitOn( const OptimizerOptions::GlobalJitLevel& jit_level_in_session_opts) { - XlaConfigRegistry::XlaGlobalJitLevel xla_global_jit_level = - XlaConfigRegistry::GetGlobalJitLevel(jit_level_in_session_opts); + xla_config_registry::XlaGlobalJitLevel xla_global_jit_level = + xla_config_registry::GetGlobalJitLevel(jit_level_in_session_opts); // Return true only if XLA JIT is ON for both single-gpu and multi-gpu // graphs. This is a conservative approach that turns off the memory optimizer // when we are sure that all graphs will be processed by XLA JIT. diff --git a/tensorflow/core/util/xla_config_registry.cc b/tensorflow/core/util/xla_config_registry.cc index 3028845895c..a3270620c02 100644 --- a/tensorflow/core/util/xla_config_registry.cc +++ b/tensorflow/core/util/xla_config_registry.cc @@ -17,11 +17,37 @@ limitations under the License. namespace tensorflow { -/*static*/ -mutex XlaConfigRegistry::mu_(LINKER_INITIALIZED); +namespace xla_config_registry { -/*static*/ -XlaConfigRegistry::GlobalJitLevelGetterTy - XlaConfigRegistry::global_jit_level_getter_; +namespace { +struct GlobalJitLevelState { + mutex mu; + GlobalJitLevelGetterTy getter; +}; + +GlobalJitLevelState* GetSingletonState() { + static GlobalJitLevelState* state = new GlobalJitLevelState; + return state; +} +} // anonymous + +void RegisterGlobalJitLevelGetter(GlobalJitLevelGetterTy getter) { + GlobalJitLevelState* state = GetSingletonState(); + mutex_lock l(state->mu); + CHECK(!state->getter); + state->getter = std::move(getter); +} + +XlaGlobalJitLevel GetGlobalJitLevel( + OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { + GlobalJitLevelState* state = GetSingletonState(); + mutex_lock l(state->mu); + if (!state->getter) { + return {jit_level_in_session_opts, jit_level_in_session_opts}; + } + return state->getter(jit_level_in_session_opts); +} + +} // namespace xla_config_registry } // namespace tensorflow diff --git a/tensorflow/core/util/xla_config_registry.h b/tensorflow/core/util/xla_config_registry.h index b947396e9c1..a7ad8d9fb61 100644 --- a/tensorflow/core/util/xla_config_registry.h +++ b/tensorflow/core/util/xla_config_registry.h @@ -23,52 +23,39 @@ limitations under the License. namespace tensorflow { -// A registry class where XLA can register callbacks for Tensorflow to query -// its status. -class XlaConfigRegistry { - public: - // XlaGlobalJitLevel is used by XLA to expose its JIT level for processing - // single gpu and general (multi-gpu) graphs. - struct XlaGlobalJitLevel { - OptimizerOptions::GlobalJitLevel single_gpu; - OptimizerOptions::GlobalJitLevel general; - }; +namespace xla_config_registry { - // Input is the jit_level in session config, and return value is the jit_level - // from XLA, reflecting the effect of the environment variable flags. - typedef std::function - GlobalJitLevelGetterTy; - - static void Register(XlaConfigRegistry::GlobalJitLevelGetterTy getter) { - mutex_lock l(mu_); - CHECK(!global_jit_level_getter_); - global_jit_level_getter_ = std::move(getter); - } - - static XlaGlobalJitLevel GetGlobalJitLevel( - OptimizerOptions::GlobalJitLevel jit_level_in_session_opts) { - mutex_lock l(mu_); - if (!global_jit_level_getter_) { - return {jit_level_in_session_opts, jit_level_in_session_opts}; - } - return global_jit_level_getter_(jit_level_in_session_opts); - } - - private: - static mutex mu_; - static GlobalJitLevelGetterTy global_jit_level_getter_ GUARDED_BY(mu_); +// XlaGlobalJitLevel is used by XLA to expose its JIT level for processing +// single gpu and general (multi-gpu) graphs. +struct XlaGlobalJitLevel { + OptimizerOptions::GlobalJitLevel single_gpu; + OptimizerOptions::GlobalJitLevel general; }; +// Input is the jit_level in session config, and return value is the jit_level +// from XLA, reflecting the effect of the environment variable flags. +typedef std::function + GlobalJitLevelGetterTy; + +void RegisterGlobalJitLevelGetter(GlobalJitLevelGetterTy getter); + +XlaGlobalJitLevel GetGlobalJitLevel( + OptimizerOptions::GlobalJitLevel jit_level_in_session_opts); + #define REGISTER_XLA_CONFIG_GETTER(getter) \ REGISTER_XLA_CONFIG_GETTER_UNIQ_HELPER(__COUNTER__, getter) #define REGISTER_XLA_CONFIG_GETTER_UNIQ_HELPER(ctr, getter) \ REGISTER_XLA_CONFIG_GETTER_UNIQ(ctr, getter) -#define REGISTER_XLA_CONFIG_GETTER_UNIQ(ctr, getter) \ - static bool xla_config_registry_registration_##ctr = \ - (::tensorflow::XlaConfigRegistry::Register(getter), true) +#define REGISTER_XLA_CONFIG_GETTER_UNIQ(ctr, getter) \ + static bool xla_config_registry_registration_##ctr = \ + (::tensorflow::xla_config_registry::RegisterGlobalJitLevelGetter( \ + getter), \ + true) + +} // namespace xla_config_registry } // namespace tensorflow From ebfad012e6bf73333003996c783461e37da2556e Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Thu, 12 Sep 2019 16:37:19 -0700 Subject: [PATCH 14/14] Add GUARDED_BY for GlobalJitLevelState. --- tensorflow/core/util/xla_config_registry.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/util/xla_config_registry.cc b/tensorflow/core/util/xla_config_registry.cc index a3270620c02..6666f192098 100644 --- a/tensorflow/core/util/xla_config_registry.cc +++ b/tensorflow/core/util/xla_config_registry.cc @@ -22,7 +22,7 @@ namespace xla_config_registry { namespace { struct GlobalJitLevelState { mutex mu; - GlobalJitLevelGetterTy getter; + GlobalJitLevelGetterTy getter GUARDED_BY(mu); }; GlobalJitLevelState* GetSingletonState() {