Internal change
PiperOrigin-RevId: 346924792 Change-Id: I835b6685484806dc9bebc462659013bcacde508b
This commit is contained in:
parent
0e9a261e8f
commit
5a85d44038
@ -497,6 +497,11 @@ Status DirectSession::RunInternal(
|
||||
CallFrameInterface* call_frame, ExecutorsAndKeys* executors_and_keys,
|
||||
RunMetadata* run_metadata,
|
||||
const thread::ThreadPoolOptions& threadpool_options) {
|
||||
// This is a temporary flag for controlling whether to always track the kernel
|
||||
// execution cost. We will remove this once the feature is validated.
|
||||
if (run_options.experimental().always_track_kernel_execution_cost())
|
||||
EnableAlwaysTrackKernelExecutionCost();
|
||||
|
||||
const uint64 start_time_usecs = options_.env->NowMicros();
|
||||
const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1);
|
||||
RunState run_state(step_id, &devices_);
|
||||
|
@ -73,6 +73,13 @@ limitations under the License.
|
||||
#include "tensorflow/core/util/tensor_slice_reader_cache.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// Temporary flag for controlling whether to always track kernel execution
|
||||
// costs.
|
||||
static bool always_track_kernel_execution_cost = false;
|
||||
void EnableAlwaysTrackKernelExecutionCost() {
|
||||
always_track_kernel_execution_cost = true;
|
||||
}
|
||||
namespace {
|
||||
|
||||
// 1-D, 0 element tensor.
|
||||
@ -179,12 +186,6 @@ class ExecutorImpl : public Executor {
|
||||
// Updates the dynamic cost estimate, which is used to determine whether the
|
||||
// given node is expensive. The new cost estimate is a weighted average of
|
||||
// the old cost estimate and the latest cost.
|
||||
//
|
||||
// NOTE: We currently only expect updates to the cost estimate when
|
||||
// `is_expensive_[node.node_id]` is true (or at least, it *was* true, when
|
||||
// we started to execute the kernel. As a result, we expect that a kernel
|
||||
// can only ever transition from "expensive" to "inexpensive", but not vice
|
||||
// versa.
|
||||
void UpdateCostEstimate(const NodeItem& node, uint64 elapsed_cycles) {
|
||||
// N.B. Updates to `cost_estimate` are atomic but unlocked. Simultaneous
|
||||
// updates may result in one or more updates being ignored. This does not
|
||||
@ -195,9 +196,10 @@ class ExecutorImpl : public Executor {
|
||||
kCostDecay +
|
||||
(elapsed_cycles / kCostDecay);
|
||||
cost_estimate.store(new_estimate, std::memory_order_relaxed);
|
||||
if (new_estimate < kOpIsExpensiveThresholdCycles) {
|
||||
is_expensive_[node.node_id].store(false, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
bool new_is_expensive = (new_estimate >= kOpIsExpensiveThresholdCycles);
|
||||
is_expensive_[node.node_id].store(new_is_expensive,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -573,6 +575,15 @@ Status ExecutorState<PropagatorStateType>::ProcessSync(
|
||||
KernelTimer timer;
|
||||
device->Compute(op_kernel, &ctx);
|
||||
kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles());
|
||||
} else if (always_track_kernel_execution_cost) {
|
||||
KernelTimer timer;
|
||||
device->Compute(op_kernel, &ctx);
|
||||
// If always_track_kernel_execution_cost is set, update the cost estimate
|
||||
// for inexpensive kernels with ~1/8 probability. This assumes that the
|
||||
// last 3 bits of the CPU cycle count is uniformly distributed.
|
||||
constexpr int kKernelExecutionTrackingInvocationSkipCount = 8;
|
||||
if (timer.start_cycles % kKernelExecutionTrackingInvocationSkipCount == 0)
|
||||
kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles());
|
||||
} else {
|
||||
device->Compute(op_kernel, &ctx);
|
||||
}
|
||||
|
@ -33,6 +33,15 @@ namespace tensorflow {
|
||||
|
||||
class StepStatsCollector;
|
||||
|
||||
// If this is called, we will sample execution cost for "inexpensive" kernels
|
||||
// and switch them to "expensive" when the estimated cost exceeds expensive-ness
|
||||
// threshold.
|
||||
// This is a temporary flag for validating the performance impact of
|
||||
// this feature. For simplicity, a global flag is used and once the flag
|
||||
// is turned on, it cannot be turned off. We will remove this flag once this
|
||||
// feature is validated.
|
||||
void EnableAlwaysTrackKernelExecutionCost();
|
||||
|
||||
// Executor runs a graph computation.
|
||||
// Example:
|
||||
// Graph* graph = ...;
|
||||
|
@ -695,6 +695,12 @@ message RunOptions {
|
||||
int64 priority = 1;
|
||||
}
|
||||
RunHandlerPoolOptions run_handler_pool_options = 3;
|
||||
|
||||
// If true, always track kernel execution cost. This allows the executor to
|
||||
// transit kernels from "inexpensive" to "expensive" during the execution.
|
||||
// This is a temporary flag for validating this feature. We will remove this
|
||||
// flag once the feature is validated.
|
||||
bool always_track_kernel_execution_cost = 4;
|
||||
}
|
||||
|
||||
Experimental experimental = 8;
|
||||
|
@ -21,6 +21,12 @@ tf_proto {
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions"
|
||||
}
|
||||
field {
|
||||
name: "always_track_kernel_execution_cost"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_BOOL
|
||||
}
|
||||
nested_type {
|
||||
name: "RunHandlerPoolOptions"
|
||||
field {
|
||||
|
@ -68,6 +68,12 @@ tf_proto {
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions"
|
||||
}
|
||||
field {
|
||||
name: "always_track_kernel_execution_cost"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_BOOL
|
||||
}
|
||||
nested_type {
|
||||
name: "RunHandlerPoolOptions"
|
||||
field {
|
||||
|
Loading…
Reference in New Issue
Block a user