Internal change
PiperOrigin-RevId: 347508731 Change-Id: Id4cbecd128176c62878a9c153e79a4b2d64a52d9
This commit is contained in:
parent
3af08c5f47
commit
78d7f8b2ef
@ -497,11 +497,6 @@ Status DirectSession::RunInternal(
|
||||
CallFrameInterface* call_frame, ExecutorsAndKeys* executors_and_keys,
|
||||
RunMetadata* run_metadata,
|
||||
const thread::ThreadPoolOptions& threadpool_options) {
|
||||
// This is a temporary flag for controlling whether to always track the kernel
|
||||
// execution cost. We will remove this once the feature is validated.
|
||||
if (run_options.experimental().always_track_kernel_execution_cost())
|
||||
EnableAlwaysTrackKernelExecutionCost();
|
||||
|
||||
const uint64 start_time_usecs = options_.env->NowMicros();
|
||||
const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1);
|
||||
RunState run_state(step_id, &devices_);
|
||||
|
@ -74,12 +74,6 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// Temporary flag for controlling whether to always track kernel execution
|
||||
// costs.
|
||||
static bool always_track_kernel_execution_cost = false;
|
||||
void EnableAlwaysTrackKernelExecutionCost() {
|
||||
always_track_kernel_execution_cost = true;
|
||||
}
|
||||
namespace {
|
||||
|
||||
// 1-D, 0 element tensor.
|
||||
@ -162,7 +156,7 @@ class ExecutorImpl : public Executor {
|
||||
KernelStats() = default;
|
||||
|
||||
void Initialize(const GraphView& gview) {
|
||||
is_expensive_ = absl::make_unique<std::atomic<bool>[]>(gview.num_nodes());
|
||||
is_expensive_.resize(gview.num_nodes());
|
||||
cost_estimates_ =
|
||||
absl::make_unique<std::atomic_uint_fast64_t[]>(gview.num_nodes());
|
||||
for (int32 i = 0; i < gview.num_nodes(); ++i) {
|
||||
@ -183,23 +177,26 @@ class ExecutorImpl : public Executor {
|
||||
kOpIsExpensiveThresholdCycles);
|
||||
}
|
||||
|
||||
// Returns the value of kernel->IsExpensive().
|
||||
bool HasExpensiveMarker(const NodeItem& node) const {
|
||||
return is_expensive_[node.node_id];
|
||||
}
|
||||
|
||||
// Updates the dynamic cost estimate, which is used to determine whether the
|
||||
// given node is expensive. The new cost estimate is a weighted average of
|
||||
// the old cost estimate and the latest cost.
|
||||
// the old cost estimate and the latest cost. We only update cost estimates
|
||||
// for kernels for which IsExpensive() return true.
|
||||
void UpdateCostEstimate(const NodeItem& node, uint64 elapsed_cycles) {
|
||||
// N.B. Updates to `cost_estimate` are atomic but unlocked. Simultaneous
|
||||
// updates may result in one or more updates being ignored. This does not
|
||||
// affect correctness but may slow down the update frequency.
|
||||
std::atomic_uint_fast64_t& cost_estimate = cost_estimates_[node.node_id];
|
||||
uint64 new_estimate = (kCostDecay - 1) *
|
||||
cost_estimate.load(std::memory_order_relaxed) /
|
||||
kCostDecay +
|
||||
(elapsed_cycles / kCostDecay);
|
||||
cost_estimate.store(new_estimate, std::memory_order_relaxed);
|
||||
auto prev_estimate = cost_estimate.load(std::memory_order_relaxed);
|
||||
|
||||
bool new_is_expensive = (new_estimate >= kOpIsExpensiveThresholdCycles);
|
||||
is_expensive_[node.node_id].store(new_is_expensive,
|
||||
std::memory_order_relaxed);
|
||||
uint64 new_estimate =
|
||||
((kCostDecay - 1) * prev_estimate + elapsed_cycles) / kCostDecay;
|
||||
|
||||
cost_estimate.store(new_estimate, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -207,10 +204,11 @@ class ExecutorImpl : public Executor {
|
||||
// determine whether an operation should be place in a threadpool.
|
||||
// Operations start out "expensive".
|
||||
static constexpr uint64 kInitialCostEstimateCycles = 100 * 1000 * 1000;
|
||||
static constexpr uint64 kOpIsExpensiveThresholdCycles = 5000;
|
||||
static constexpr uint64 kOpIsExpensiveThresholdCycles = 8000;
|
||||
static constexpr uint64 kCostDecay = 10;
|
||||
|
||||
std::unique_ptr<std::atomic<bool>[]> is_expensive_;
|
||||
std::vector<bool> is_expensive_;
|
||||
// std::unique_ptr<std::atomic<bool>[]> is_expensive_;
|
||||
std::unique_ptr<std::atomic_uint_fast64_t[]> cost_estimates_;
|
||||
};
|
||||
|
||||
@ -569,24 +567,19 @@ Status ExecutorState<PropagatorStateType>::ProcessSync(
|
||||
},
|
||||
profiler::GetTFTraceMeLevel(is_expensive));
|
||||
device->Compute(op_kernel, &ctx);
|
||||
} else {
|
||||
// In the common case, avoid creating any tracing objects.
|
||||
if (is_expensive) {
|
||||
KernelTimer timer;
|
||||
device->Compute(op_kernel, &ctx);
|
||||
} else if (kernel_stats_->HasExpensiveMarker(item)) {
|
||||
KernelTimer timer;
|
||||
device->Compute(op_kernel, &ctx);
|
||||
// For expensive kernels, always update the cost estimate. For inexpensive
|
||||
// kernels, update the cost estimate with ~1/16 probability. This assumes
|
||||
// that the last 4 bits of the CPU cycle count is uniformly distributed.
|
||||
constexpr int kKernelExecutionTrackingInvocationSkipCount = 16;
|
||||
if (is_expensive ||
|
||||
timer.start_cycles % kKernelExecutionTrackingInvocationSkipCount == 0) {
|
||||
kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles());
|
||||
} else if (always_track_kernel_execution_cost) {
|
||||
KernelTimer timer;
|
||||
device->Compute(op_kernel, &ctx);
|
||||
// If always_track_kernel_execution_cost is set, update the cost estimate
|
||||
// for inexpensive kernels with ~1/8 probability. This assumes that the
|
||||
// last 3 bits of the CPU cycle count is uniformly distributed.
|
||||
constexpr int kKernelExecutionTrackingInvocationSkipCount = 8;
|
||||
if (timer.start_cycles % kKernelExecutionTrackingInvocationSkipCount == 0)
|
||||
kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles());
|
||||
} else {
|
||||
device->Compute(op_kernel, &ctx);
|
||||
}
|
||||
} else {
|
||||
device->Compute(op_kernel, &ctx);
|
||||
}
|
||||
nodestats::SetOpEnd(stats);
|
||||
if (outputs->size() < item.num_outputs) outputs->resize(item.num_outputs);
|
||||
|
@ -695,12 +695,6 @@ message RunOptions {
|
||||
int64 priority = 1;
|
||||
}
|
||||
RunHandlerPoolOptions run_handler_pool_options = 3;
|
||||
|
||||
// If true, always track kernel execution cost. This allows the executor to
|
||||
// transit kernels from "inexpensive" to "expensive" during the execution.
|
||||
// This is a temporary flag for validating this feature. We will remove this
|
||||
// flag once the feature is validated.
|
||||
bool always_track_kernel_execution_cost = 4;
|
||||
}
|
||||
|
||||
Experimental experimental = 8;
|
||||
|
@ -21,12 +21,6 @@ tf_proto {
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions"
|
||||
}
|
||||
field {
|
||||
name: "always_track_kernel_execution_cost"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_BOOL
|
||||
}
|
||||
nested_type {
|
||||
name: "RunHandlerPoolOptions"
|
||||
field {
|
||||
|
@ -68,12 +68,6 @@ tf_proto {
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions"
|
||||
}
|
||||
field {
|
||||
name: "always_track_kernel_execution_cost"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_BOOL
|
||||
}
|
||||
nested_type {
|
||||
name: "RunHandlerPoolOptions"
|
||||
field {
|
||||
|
Loading…
Reference in New Issue
Block a user