Internal change

PiperOrigin-RevId: 346924792 Change-Id: I835b6685484806dc9bebc462659013bcacde508b
2020-12-10 21:00:44 -08:00 · 2020-12-10 21:00:44 -08:00 · 5a85d44038
commit 5a85d44038
parent 0e9a261e8f
6 changed files with 52 additions and 9 deletions
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@ -497,6 +497,11 @@ Status DirectSession::RunInternal(
    CallFrameInterface* call_frame, ExecutorsAndKeys* executors_and_keys,
    RunMetadata* run_metadata,
    const thread::ThreadPoolOptions& threadpool_options) {
+  // This is a temporary flag for controlling whether to always track the kernel
+  // execution cost. We will remove this once the feature is validated.
+  if (run_options.experimental().always_track_kernel_execution_cost())
+    EnableAlwaysTrackKernelExecutionCost();
+
  const uint64 start_time_usecs = options_.env->NowMicros();
  const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1);
  RunState run_state(step_id, &devices_);
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@ -73,6 +73,13 @@ limitations under the License.
 #include "tensorflow/core/util/tensor_slice_reader_cache.h"

 namespace tensorflow {
+
+// Temporary flag for controlling whether to always track kernel execution
+// costs.
+static bool always_track_kernel_execution_cost = false;
+void EnableAlwaysTrackKernelExecutionCost() {
+  always_track_kernel_execution_cost = true;
+}
 namespace {

 // 1-D, 0 element tensor.
@ -179,12 +186,6 @@ class ExecutorImpl : public Executor {
    // Updates the dynamic cost estimate, which is used to determine whether the
    // given node is expensive. The new cost estimate is a weighted average of
    // the old cost estimate and the latest cost.
-    //
-    // NOTE: We currently only expect updates to the cost estimate when
-    // `is_expensive_[node.node_id]` is true (or at least, it *was* true, when
-    // we started to execute the kernel. As a result, we expect that a kernel
-    // can only ever transition from "expensive" to "inexpensive", but not vice
-    // versa.
    void UpdateCostEstimate(const NodeItem& node, uint64 elapsed_cycles) {
      // N.B. Updates to `cost_estimate` are atomic but unlocked.  Simultaneous
      // updates may result in one or more updates being ignored.  This does not
@ -195,9 +196,10 @@ class ExecutorImpl : public Executor {
                                kCostDecay +
                            (elapsed_cycles / kCostDecay);
      cost_estimate.store(new_estimate, std::memory_order_relaxed);
-      if (new_estimate < kOpIsExpensiveThresholdCycles) {
-        is_expensive_[node.node_id].store(false, std::memory_order_relaxed);
-      }
+
+      bool new_is_expensive = (new_estimate >= kOpIsExpensiveThresholdCycles);
+      is_expensive_[node.node_id].store(new_is_expensive,
+                                        std::memory_order_relaxed);
    }

   private:
@ -573,6 +575,15 @@ Status ExecutorState<PropagatorStateType>::ProcessSync(
      KernelTimer timer;
      device->Compute(op_kernel, &ctx);
      kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles());
+    } else if (always_track_kernel_execution_cost) {
+      KernelTimer timer;
+      device->Compute(op_kernel, &ctx);
+      // If always_track_kernel_execution_cost is set, update the cost estimate
+      // for inexpensive kernels with ~1/8 probability. This assumes that the
+      // last 3 bits of the CPU cycle count is uniformly distributed.
+      constexpr int kKernelExecutionTrackingInvocationSkipCount = 8;
+      if (timer.start_cycles % kKernelExecutionTrackingInvocationSkipCount == 0)
+        kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles());
    } else {
      device->Compute(op_kernel, &ctx);
    }
--- a/tensorflow/core/common_runtime/executor.h
+++ b/tensorflow/core/common_runtime/executor.h
@ -33,6 +33,15 @@ namespace tensorflow {

 class StepStatsCollector;

+// If this is called, we will sample execution cost for "inexpensive" kernels
+// and switch them to "expensive" when the estimated cost exceeds expensive-ness
+// threshold.
+// This is a temporary flag for validating the performance impact of
+// this feature. For simplicity, a global flag is used and once the flag
+// is turned on, it cannot be turned off. We will remove this flag once this
+// feature is validated.
+void EnableAlwaysTrackKernelExecutionCost();
+
 // Executor runs a graph computation.
 // Example:
 //   Graph* graph = ...;
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@ -695,6 +695,12 @@ message RunOptions {
      int64 priority = 1;
    }
    RunHandlerPoolOptions run_handler_pool_options = 3;
+
+    // If true, always track kernel execution cost. This allows the executor to
+    // transit kernels from "inexpensive" to "expensive" during the execution.
+    // This is a temporary flag for validating this feature. We will remove this
+    // flag once the feature is validated.
+    bool always_track_kernel_execution_cost = 4;
  }

  Experimental experimental = 8;
--- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt
@ -21,6 +21,12 @@ tf_proto {
      type: TYPE_MESSAGE
      type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions"
    }
+    field {
+      name: "always_track_kernel_execution_cost"
+      number: 4
+      label: LABEL_OPTIONAL
+      type: TYPE_BOOL
+    }
    nested_type {
      name: "RunHandlerPoolOptions"
      field {
--- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt
@ -68,6 +68,12 @@ tf_proto {
        type: TYPE_MESSAGE
        type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions"
      }
+      field {
+        name: "always_track_kernel_execution_cost"
+        number: 4
+        label: LABEL_OPTIONAL
+        type: TYPE_BOOL
+      }
      nested_type {
        name: "RunHandlerPoolOptions"
        field {