Add a new metric via Monitoring API: graph_run_time_usecs_histogram.

PiperOrigin-RevId: 240411081
This commit is contained in:
Xiao Yu 2019-03-26 13:05:38 -07:00 committed by TensorFlower Gardener
parent e70fd88c7b
commit 0f4563a038
3 changed files with 11 additions and 3 deletions

View File

@ -463,7 +463,7 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options,
CallFrameInterface* call_frame, CallFrameInterface* call_frame,
ExecutorsAndKeys* executors_and_keys, ExecutorsAndKeys* executors_and_keys,
RunMetadata* run_metadata) { RunMetadata* run_metadata) {
const uint64 start_time_usecs = Env::Default()->NowMicros(); const uint64 start_time_usecs = options_.env->NowMicros();
string session_id_meta = strings::StrCat("SessionRun #id=", step_id, "#"); string session_id_meta = strings::StrCat("SessionRun #id=", step_id, "#");
tracing::ScopedActivity activity(session_id_meta); tracing::ScopedActivity activity(session_id_meta);
@ -720,7 +720,7 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options,
exec_and_lib.graph->ToGraphDef(partition_graph_def); exec_and_lib.graph->ToGraphDef(partition_graph_def);
} }
} }
metrics::UpdateGraphExecTime(Env::Default()->NowMicros() - start_time_usecs); metrics::UpdateGraphExecTime(options_.env->NowMicros() - start_time_usecs);
return Status::OK(); return Status::OK();
} }

View File

@ -390,7 +390,7 @@ class DirectSession : public Session {
std::atomic<int64> edge_name_counter_ = {0}; std::atomic<int64> edge_name_counter_ = {0};
std::atomic<int64> handle_name_counter_ = {0}; std::atomic<int64> handle_name_counter_ = {0};
// For generating step ids that are unique across this sessions. // For generating step ids that are unique among all sessions.
static std::atomic_int_fast64_t step_id_counter_; static std::atomic_int_fast64_t step_id_counter_;
// Global timeout for all blocking operations in this session. // Global timeout for all blocking operations in this session.

View File

@ -15,6 +15,7 @@ limitations under the License.
#include "tensorflow/core/common_runtime/metrics.h" #include "tensorflow/core/common_runtime/metrics.h"
#include "tensorflow/core/lib/monitoring/counter.h" #include "tensorflow/core/lib/monitoring/counter.h"
#include "tensorflow/core/lib/monitoring/sampler.h"
namespace tensorflow { namespace tensorflow {
namespace metrics { namespace metrics {
@ -29,6 +30,12 @@ auto* graph_run_time_usecs = monitoring::Counter<0>::New(
"/tensorflow/core/graph_run_time_usecs", "/tensorflow/core/graph_run_time_usecs",
"The total time spent on executing graphs in microseconds."); "The total time spent on executing graphs in microseconds.");
auto* graph_run_time_usecs_histogram = monitoring::Sampler<0>::New(
{"/tensorflow/core/graph_run_time_usecs_histogram",
"The wall-clock time spent on executing graphs in microseconds."},
// Power of 2 with bucket count 20 (> 17 minutes)
{monitoring::Buckets::Exponential(1000, 2, 20)});
auto* tf_data_autotune_counter = monitoring::Counter<1>::New( auto* tf_data_autotune_counter = monitoring::Counter<1>::New(
"/tensorflow/data/autotune", "tf.data autotuning", "name"); "/tensorflow/data/autotune", "tf.data autotuning", "name");
@ -81,6 +88,7 @@ void UpdateGraphExecTime(const uint64 running_time_usecs) {
if (running_time_usecs > 0) { if (running_time_usecs > 0) {
graph_runs->GetCell()->IncrementBy(1); graph_runs->GetCell()->IncrementBy(1);
graph_run_time_usecs->GetCell()->IncrementBy(running_time_usecs); graph_run_time_usecs->GetCell()->IncrementBy(running_time_usecs);
graph_run_time_usecs_histogram->GetCell()->Add(running_time_usecs);
} }
} }