From 5a85d44038b1b7a2c0d2e59614bea85b52cacdfe Mon Sep 17 00:00:00 2001 From: Jing Dong Date: Thu, 10 Dec 2020 21:00:44 -0800 Subject: [PATCH] Internal change PiperOrigin-RevId: 346924792 Change-Id: I835b6685484806dc9bebc462659013bcacde508b --- .../core/common_runtime/direct_session.cc | 5 ++++ tensorflow/core/common_runtime/executor.cc | 29 +++++++++++++------ tensorflow/core/common_runtime/executor.h | 9 ++++++ tensorflow/core/protobuf/config.proto | 6 ++++ ...ensorflow.-run-options.-experimental.pbtxt | 6 ++++ .../golden/v1/tensorflow.-run-options.pbtxt | 6 ++++ 6 files changed, 52 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index c1313076600..5ff3f31cb04 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -497,6 +497,11 @@ Status DirectSession::RunInternal( CallFrameInterface* call_frame, ExecutorsAndKeys* executors_and_keys, RunMetadata* run_metadata, const thread::ThreadPoolOptions& threadpool_options) { + // This is a temporary flag for controlling whether to always track the kernel + // execution cost. We will remove this once the feature is validated. + if (run_options.experimental().always_track_kernel_execution_cost()) + EnableAlwaysTrackKernelExecutionCost(); + const uint64 start_time_usecs = options_.env->NowMicros(); const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1); RunState run_state(step_id, &devices_); diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 03c23f32880..443d588c2d3 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -73,6 +73,13 @@ limitations under the License. #include "tensorflow/core/util/tensor_slice_reader_cache.h" namespace tensorflow { + +// Temporary flag for controlling whether to always track kernel execution +// costs. +static bool always_track_kernel_execution_cost = false; +void EnableAlwaysTrackKernelExecutionCost() { + always_track_kernel_execution_cost = true; +} namespace { // 1-D, 0 element tensor. @@ -179,12 +186,6 @@ class ExecutorImpl : public Executor { // Updates the dynamic cost estimate, which is used to determine whether the // given node is expensive. The new cost estimate is a weighted average of // the old cost estimate and the latest cost. - // - // NOTE: We currently only expect updates to the cost estimate when - // `is_expensive_[node.node_id]` is true (or at least, it *was* true, when - // we started to execute the kernel. As a result, we expect that a kernel - // can only ever transition from "expensive" to "inexpensive", but not vice - // versa. void UpdateCostEstimate(const NodeItem& node, uint64 elapsed_cycles) { // N.B. Updates to `cost_estimate` are atomic but unlocked. Simultaneous // updates may result in one or more updates being ignored. This does not @@ -195,9 +196,10 @@ class ExecutorImpl : public Executor { kCostDecay + (elapsed_cycles / kCostDecay); cost_estimate.store(new_estimate, std::memory_order_relaxed); - if (new_estimate < kOpIsExpensiveThresholdCycles) { - is_expensive_[node.node_id].store(false, std::memory_order_relaxed); - } + + bool new_is_expensive = (new_estimate >= kOpIsExpensiveThresholdCycles); + is_expensive_[node.node_id].store(new_is_expensive, + std::memory_order_relaxed); } private: @@ -573,6 +575,15 @@ Status ExecutorState::ProcessSync( KernelTimer timer; device->Compute(op_kernel, &ctx); kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles()); + } else if (always_track_kernel_execution_cost) { + KernelTimer timer; + device->Compute(op_kernel, &ctx); + // If always_track_kernel_execution_cost is set, update the cost estimate + // for inexpensive kernels with ~1/8 probability. This assumes that the + // last 3 bits of the CPU cycle count is uniformly distributed. + constexpr int kKernelExecutionTrackingInvocationSkipCount = 8; + if (timer.start_cycles % kKernelExecutionTrackingInvocationSkipCount == 0) + kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles()); } else { device->Compute(op_kernel, &ctx); } diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h index d590ae0f711..d8ea85f1955 100644 --- a/tensorflow/core/common_runtime/executor.h +++ b/tensorflow/core/common_runtime/executor.h @@ -33,6 +33,15 @@ namespace tensorflow { class StepStatsCollector; +// If this is called, we will sample execution cost for "inexpensive" kernels +// and switch them to "expensive" when the estimated cost exceeds expensive-ness +// threshold. +// This is a temporary flag for validating the performance impact of +// this feature. For simplicity, a global flag is used and once the flag +// is turned on, it cannot be turned off. We will remove this flag once this +// feature is validated. +void EnableAlwaysTrackKernelExecutionCost(); + // Executor runs a graph computation. // Example: // Graph* graph = ...; diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index 9b50d5ecc26..569fe929cee 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -695,6 +695,12 @@ message RunOptions { int64 priority = 1; } RunHandlerPoolOptions run_handler_pool_options = 3; + + // If true, always track kernel execution cost. This allows the executor to + // transit kernels from "inexpensive" to "expensive" during the execution. + // This is a temporary flag for validating this feature. We will remove this + // flag once the feature is validated. + bool always_track_kernel_execution_cost = 4; } Experimental experimental = 8; diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt index 913d82f680c..9b2b7f7210a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt @@ -21,6 +21,12 @@ tf_proto { type: TYPE_MESSAGE type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions" } + field { + name: "always_track_kernel_execution_cost" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } nested_type { name: "RunHandlerPoolOptions" field { diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt index 9020b61d64f..d250ba35108 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt @@ -68,6 +68,12 @@ tf_proto { type: TYPE_MESSAGE type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions" } + field { + name: "always_track_kernel_execution_cost" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } nested_type { name: "RunHandlerPoolOptions" field {