Internal change

PiperOrigin-RevId: 347508731
Change-Id: Id4cbecd128176c62878a9c153e79a4b2d64a52d9
This commit is contained in:
Jing Dong 2020-12-14 17:57:22 -08:00 committed by TensorFlower Gardener
parent 3af08c5f47
commit 78d7f8b2ef
5 changed files with 27 additions and 57 deletions

View File

@ -497,11 +497,6 @@ Status DirectSession::RunInternal(
CallFrameInterface* call_frame, ExecutorsAndKeys* executors_and_keys,
RunMetadata* run_metadata,
const thread::ThreadPoolOptions& threadpool_options) {
// This is a temporary flag for controlling whether to always track the kernel
// execution cost. We will remove this once the feature is validated.
if (run_options.experimental().always_track_kernel_execution_cost())
EnableAlwaysTrackKernelExecutionCost();
const uint64 start_time_usecs = options_.env->NowMicros();
const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1);
RunState run_state(step_id, &devices_);

View File

@ -74,12 +74,6 @@ limitations under the License.
namespace tensorflow {
// Temporary flag for controlling whether to always track kernel execution
// costs.
static bool always_track_kernel_execution_cost = false;
void EnableAlwaysTrackKernelExecutionCost() {
always_track_kernel_execution_cost = true;
}
namespace {
// 1-D, 0 element tensor.
@ -162,7 +156,7 @@ class ExecutorImpl : public Executor {
KernelStats() = default;
void Initialize(const GraphView& gview) {
is_expensive_ = absl::make_unique<std::atomic<bool>[]>(gview.num_nodes());
is_expensive_.resize(gview.num_nodes());
cost_estimates_ =
absl::make_unique<std::atomic_uint_fast64_t[]>(gview.num_nodes());
for (int32 i = 0; i < gview.num_nodes(); ++i) {
@ -183,23 +177,26 @@ class ExecutorImpl : public Executor {
kOpIsExpensiveThresholdCycles);
}
// Returns the value of kernel->IsExpensive().
bool HasExpensiveMarker(const NodeItem& node) const {
return is_expensive_[node.node_id];
}
// Updates the dynamic cost estimate, which is used to determine whether the
// given node is expensive. The new cost estimate is a weighted average of
// the old cost estimate and the latest cost.
// the old cost estimate and the latest cost. We only update cost estimates
// for kernels for which IsExpensive() return true.
void UpdateCostEstimate(const NodeItem& node, uint64 elapsed_cycles) {
// N.B. Updates to `cost_estimate` are atomic but unlocked. Simultaneous
// updates may result in one or more updates being ignored. This does not
// affect correctness but may slow down the update frequency.
std::atomic_uint_fast64_t& cost_estimate = cost_estimates_[node.node_id];
uint64 new_estimate = (kCostDecay - 1) *
cost_estimate.load(std::memory_order_relaxed) /
kCostDecay +
(elapsed_cycles / kCostDecay);
cost_estimate.store(new_estimate, std::memory_order_relaxed);
auto prev_estimate = cost_estimate.load(std::memory_order_relaxed);
bool new_is_expensive = (new_estimate >= kOpIsExpensiveThresholdCycles);
is_expensive_[node.node_id].store(new_is_expensive,
std::memory_order_relaxed);
uint64 new_estimate =
((kCostDecay - 1) * prev_estimate + elapsed_cycles) / kCostDecay;
cost_estimate.store(new_estimate, std::memory_order_relaxed);
}
private:
@ -207,10 +204,11 @@ class ExecutorImpl : public Executor {
// determine whether an operation should be place in a threadpool.
// Operations start out "expensive".
static constexpr uint64 kInitialCostEstimateCycles = 100 * 1000 * 1000;
static constexpr uint64 kOpIsExpensiveThresholdCycles = 5000;
static constexpr uint64 kOpIsExpensiveThresholdCycles = 8000;
static constexpr uint64 kCostDecay = 10;
std::unique_ptr<std::atomic<bool>[]> is_expensive_;
std::vector<bool> is_expensive_;
// std::unique_ptr<std::atomic<bool>[]> is_expensive_;
std::unique_ptr<std::atomic_uint_fast64_t[]> cost_estimates_;
};
@ -569,24 +567,19 @@ Status ExecutorState<PropagatorStateType>::ProcessSync(
},
profiler::GetTFTraceMeLevel(is_expensive));
device->Compute(op_kernel, &ctx);
} else {
// In the common case, avoid creating any tracing objects.
if (is_expensive) {
KernelTimer timer;
device->Compute(op_kernel, &ctx);
} else if (kernel_stats_->HasExpensiveMarker(item)) {
KernelTimer timer;
device->Compute(op_kernel, &ctx);
// For expensive kernels, always update the cost estimate. For inexpensive
// kernels, update the cost estimate with ~1/16 probability. This assumes
// that the last 4 bits of the CPU cycle count is uniformly distributed.
constexpr int kKernelExecutionTrackingInvocationSkipCount = 16;
if (is_expensive ||
timer.start_cycles % kKernelExecutionTrackingInvocationSkipCount == 0) {
kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles());
} else if (always_track_kernel_execution_cost) {
KernelTimer timer;
device->Compute(op_kernel, &ctx);
// If always_track_kernel_execution_cost is set, update the cost estimate
// for inexpensive kernels with ~1/8 probability. This assumes that the
// last 3 bits of the CPU cycle count is uniformly distributed.
constexpr int kKernelExecutionTrackingInvocationSkipCount = 8;
if (timer.start_cycles % kKernelExecutionTrackingInvocationSkipCount == 0)
kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles());
} else {
device->Compute(op_kernel, &ctx);
}
} else {
device->Compute(op_kernel, &ctx);
}
nodestats::SetOpEnd(stats);
if (outputs->size() < item.num_outputs) outputs->resize(item.num_outputs);

View File

@ -695,12 +695,6 @@ message RunOptions {
int64 priority = 1;
}
RunHandlerPoolOptions run_handler_pool_options = 3;
// If true, always track kernel execution cost. This allows the executor to
// transit kernels from "inexpensive" to "expensive" during the execution.
// This is a temporary flag for validating this feature. We will remove this
// flag once the feature is validated.
bool always_track_kernel_execution_cost = 4;
}
Experimental experimental = 8;

View File

@ -21,12 +21,6 @@ tf_proto {
type: TYPE_MESSAGE
type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions"
}
field {
name: "always_track_kernel_execution_cost"
number: 4
label: LABEL_OPTIONAL
type: TYPE_BOOL
}
nested_type {
name: "RunHandlerPoolOptions"
field {

View File

@ -68,12 +68,6 @@ tf_proto {
type: TYPE_MESSAGE
type_name: ".tensorflow.RunOptions.Experimental.RunHandlerPoolOptions"
}
field {
name: "always_track_kernel_execution_cost"
number: 4
label: LABEL_OPTIONAL
type: TYPE_BOOL
}
nested_type {
name: "RunHandlerPoolOptions"
field {