From 7eeb96e00ff8c37c03513a125fe3b5524a99cba9 Mon Sep 17 00:00:00 2001 From: Davide Libenzi <dlibenzi@google.com> Date: Wed, 12 Feb 2020 14:23:06 -0800 Subject: [PATCH] Add unit of mesaure to TF monitoring percentile metrics. PiperOrigin-RevId: 294756415 Change-Id: I52f7a7be91d39e9c7b6b13a7fbc1fec436f09931 --- tensorflow/compiler/xrt/xrt.proto | 9 +++ tensorflow/compiler/xrt/xrt_metrics.cc | 68 ++++++++++++++----- .../monitoring/collection_registry_test.cc | 11 +-- .../monitoring/mobile_percentile_sampler.h | 6 +- .../core/lib/monitoring/percentile_sampler.cc | 1 + .../core/lib/monitoring/percentile_sampler.h | 27 +++++--- .../lib/monitoring/percentile_sampler_test.cc | 4 +- tensorflow/core/lib/monitoring/types.h | 7 ++ 8 files changed, 99 insertions(+), 34 deletions(-) diff --git a/tensorflow/compiler/xrt/xrt.proto b/tensorflow/compiler/xrt/xrt.proto index 1cf9a0b650f..b885a7593f5 100644 --- a/tensorflow/compiler/xrt/xrt.proto +++ b/tensorflow/compiler/xrt/xrt.proto @@ -229,6 +229,13 @@ message Percentiles { } message MetricValues { + enum UnitOfMeasure { + INVALID = 0; + NUMBER = 1; + TIME = 2; + BYTES = 3; + } + // The metric name. string name = 1; @@ -236,6 +243,8 @@ message MetricValues { Percentiles percentiles_value = 2; int64 int64_value = 3; } + + UnitOfMeasure unit_of_measure = 4; } message MetricsReport { diff --git a/tensorflow/compiler/xrt/xrt_metrics.cc b/tensorflow/compiler/xrt/xrt_metrics.cc index ec4ac774b68..94906530ad9 100644 --- a/tensorflow/compiler/xrt/xrt_metrics.cc +++ b/tensorflow/compiler/xrt/xrt_metrics.cc @@ -40,6 +40,21 @@ bool IsSelectedMetric(const xrt::XRTMetricsCollect& metrics, return false; } +void SetUnitOfMeasure(xrt::MetricValues* metrics, + monitoring::UnitOfMeasure unit_of_measure) { + switch (unit_of_measure) { + case monitoring::UnitOfMeasure::kNumber: + metrics->set_unit_of_measure(xrt::MetricValues::NUMBER); + break; + case monitoring::UnitOfMeasure::kTime: + metrics->set_unit_of_measure(xrt::MetricValues::TIME); + break; + case monitoring::UnitOfMeasure::kBytes: + metrics->set_unit_of_measure(xrt::MetricValues::BYTES); + break; + } +} + Status AddMetrics(xrt::MetricsReport* report, const monitoring::PointSet& point_set) { for (auto& point : point_set.points) { @@ -47,6 +62,7 @@ Status AddMetrics(xrt::MetricsReport* report, metrics->set_name(point_set.metric_name); if (point->value_type == monitoring::ValueType::kPercentiles) { xrt::Percentiles* percentiles = metrics->mutable_percentiles_value(); + SetUnitOfMeasure(metrics, point->percentiles_value.unit_of_measure); percentiles->set_start_nstime(point->percentiles_value.start_nstime); percentiles->set_end_nstime(point->percentiles_value.end_nstime); percentiles->set_min_value(point->percentiles_value.min_value); @@ -62,6 +78,7 @@ Status AddMetrics(xrt::MetricsReport* report, xpoint->set_value(pct_point.value); } } else if (point->value_type == monitoring::ValueType::kInt64) { + metrics->set_unit_of_measure(xrt::MetricValues::NUMBER); metrics->set_int64_value(point->int64_value); } } @@ -76,7 +93,8 @@ monitoring::PercentileSamplerCell* GetAllocateCell() { static monitoring::PercentileSamplerCell* cell = monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/allocate", "Tracks XRTAllocate times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -86,7 +104,8 @@ monitoring::PercentileSamplerCell* GetAllocateUninitializedCell() { monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/allocate_uninitialized", "Tracks XRTAllocateUninitialized times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -96,7 +115,8 @@ monitoring::PercentileSamplerCell* GetAllocateFromTensorCell() { monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/allocate_from_tensor", "Tracks XRTAllocateFromTensor times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -105,7 +125,8 @@ monitoring::PercentileSamplerCell* GetSubTupleCell() { static monitoring::PercentileSamplerCell* cell = monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/sub_tuple", "Tracks XRTSubTuple times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -114,7 +135,8 @@ monitoring::PercentileSamplerCell* GetMakeTupleCell() { static monitoring::PercentileSamplerCell* cell = monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/make_tuple", "Tracks XRTMakeTuple times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -123,7 +145,8 @@ monitoring::PercentileSamplerCell* GetReadLiteralCell() { static monitoring::PercentileSamplerCell* cell = monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/read_literal", "Tracks XRTReadLiteral times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -132,7 +155,8 @@ monitoring::PercentileSamplerCell* GetReadToTensorCell() { static monitoring::PercentileSamplerCell* cell = monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/read_tensor", "Tracks XRTReadToTensor times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -141,7 +165,8 @@ monitoring::PercentileSamplerCell* GetWriteLiteralCell() { static monitoring::PercentileSamplerCell* cell = monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/write_literal", "Tracks XRTWriteLiteral times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -151,7 +176,8 @@ monitoring::PercentileSamplerCell* GetReleaseAllocationCell() { monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/release_allocation", "Tracks XRTReleaseAllocation times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -161,7 +187,8 @@ monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell() { monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/release_all_allocations", "Tracks XRTReleaseAllAllocations times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -171,7 +198,8 @@ monitoring::PercentileSamplerCell* GetCompactAllocationsCell() { monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/compact_allocations", "Tracks XRTCompactAllocations times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -180,7 +208,8 @@ monitoring::PercentileSamplerCell* GetCompileCell() { static monitoring::PercentileSamplerCell* cell = monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/compile", "Tracks XRTCompile times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -190,7 +219,8 @@ monitoring::PercentileSamplerCell* GetReleaseCompilationCell() { monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/release_compilation", "Tracks XRTReleaseCompilationRef times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -199,7 +229,8 @@ monitoring::PercentileSamplerCell* GetExecuteCell() { static monitoring::PercentileSamplerCell* cell = monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/execute", "Tracks XRTExecute times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -209,7 +240,8 @@ monitoring::PercentileSamplerCell* GetExecuteChainedCell() { monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/ops/execute_chained", "Tracks XRTExecuteChained times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -219,7 +251,8 @@ monitoring::PercentileSamplerCell* GetMemoryCompactCell() { monitoring::PercentileSampler<0>::New( {"/tensorflow/xrt/memory_manager/compaction", "Tracks XRT memory manager memory compaction times"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } @@ -230,7 +263,8 @@ monitoring::PercentileSamplerCell* GetTryFreeMemoryCell() { {"/tensorflow/xrt/memory_manager/try_free_memory", "Tracks XRT memory manager times in trying to " "free memory by swpping device memory to host memory"}, - GetDefaultPercentiles(), kMaxSamples) + GetDefaultPercentiles(), kMaxSamples, + monitoring::UnitOfMeasure::kTime) ->GetCell(); return cell; } diff --git a/tensorflow/core/lib/monitoring/collection_registry_test.cc b/tensorflow/core/lib/monitoring/collection_registry_test.cc index 7449ab597aa..a11ea059204 100644 --- a/tensorflow/core/lib/monitoring/collection_registry_test.cc +++ b/tensorflow/core/lib/monitoring/collection_registry_test.cc @@ -368,11 +368,12 @@ TEST(CollectMetricsTest, PercentileSampler) { std::unique_ptr<PercentileSampler<2>>(PercentileSampler<2>::New( {"/tensorflow/test/pctsampler_with_labels", "Percentile sampler with labels.", "MyLabel0", "MyLabel1"}, - {25.0, 50.0, 75.0}, 1024)); - auto sampler_without_labels = std::unique_ptr<PercentileSampler<0>>( - PercentileSampler<0>::New({"/tensorflow/test/pctsampler_without_labels", - "Percentile sampler without labels."}, - {25.0, 50.0, 75.0}, 1024)); + {25.0, 50.0, 75.0}, 1024, UnitOfMeasure::kNumber)); + auto sampler_without_labels = + std::unique_ptr<PercentileSampler<0>>(PercentileSampler<0>::New( + {"/tensorflow/test/pctsampler_without_labels", + "Percentile sampler without labels."}, + {25.0, 50.0, 75.0}, 1024, UnitOfMeasure::kNumber)); sampler_with_labels->GetCell("Label00", "Label10")->Add(0.7); sampler_with_labels->GetCell("Label01", "Label11")->Add(1.5); diff --git a/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h b/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h index a33909d564a..2c792f0e0cb 100644 --- a/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h +++ b/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h @@ -38,7 +38,8 @@ class PercentileSampler { static PercentileSampler* New( const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>& metric_def, - std::vector<double> percentiles, size_t max_samples); + std::vector<double> percentiles, size_t max_samples, + UnitOfMeasure unit_of_measure); template <typename... Labels> PercentileSamplerCell* GetCell(const Labels&... labels) { @@ -57,7 +58,8 @@ template <int NumLabels> PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New( const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>& /* metric_def */, - std::vector<double> /* percentiles */, size_t /* max_samples */) { + std::vector<double> /* percentiles */, size_t /* max_samples */, + UnitOfMeasure /* unit_of_measure */) { return new PercentileSampler<NumLabels>(); } diff --git a/tensorflow/core/lib/monitoring/percentile_sampler.cc b/tensorflow/core/lib/monitoring/percentile_sampler.cc index 988e50ded52..b7c22ae77ba 100644 --- a/tensorflow/core/lib/monitoring/percentile_sampler.cc +++ b/tensorflow/core/lib/monitoring/percentile_sampler.cc @@ -43,6 +43,7 @@ void PercentileSamplerCell::Add(double sample) { Percentiles PercentileSamplerCell::value() const { Percentiles pct_samples; + pct_samples.unit_of_measure = unit_of_measure_; size_t total_samples; long double accumulator; std::vector<Sample> samples = GetSamples(&total_samples, &accumulator); diff --git a/tensorflow/core/lib/monitoring/percentile_sampler.h b/tensorflow/core/lib/monitoring/percentile_sampler.h index ea0e0e592a1..b4e54eeec41 100644 --- a/tensorflow/core/lib/monitoring/percentile_sampler.h +++ b/tensorflow/core/lib/monitoring/percentile_sampler.h @@ -47,8 +47,10 @@ namespace monitoring { // This class is thread-safe. class PercentileSamplerCell { public: - PercentileSamplerCell(std::vector<double> percentiles, size_t max_samples) - : percentiles_(std::move(percentiles)), + PercentileSamplerCell(UnitOfMeasure unit_of_measure, + std::vector<double> percentiles, size_t max_samples) + : unit_of_measure_(unit_of_measure), + percentiles_(std::move(percentiles)), samples_(max_samples), num_samples_(0), next_position_(0), @@ -72,6 +74,7 @@ class PercentileSamplerCell { long double* accumulator) const; mutable mutex mu_; + UnitOfMeasure unit_of_measure_; const std::vector<double> percentiles_; std::vector<Sample> samples_ GUARDED_BY(mu_); size_t num_samples_ GUARDED_BY(mu_); @@ -106,11 +109,13 @@ class PercentileSampler { // Example; // auto* sampler_with_label = // PercentileSampler<1>::New({"/tensorflow/sampler", - // "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}, 1024); + // "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}, 1024, + // UnitOfMeasure::kTime); static PercentileSampler* New( const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>& metric_def, - std::vector<double> percentiles, size_t max_samples); + std::vector<double> percentiles, size_t max_samples, + UnitOfMeasure unit_of_measure); // Retrieves the cell for the specified labels, creating it on demand if // not already present. @@ -124,8 +129,10 @@ class PercentileSampler { PercentileSampler(const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>& metric_def, - std::vector<double> percentiles, size_t max_samples) + std::vector<double> percentiles, size_t max_samples, + UnitOfMeasure unit_of_measure) : metric_def_(metric_def), + unit_of_measure_(unit_of_measure), percentiles_(std::move(percentiles)), max_samples_(max_samples), registration_handle_(CollectionRegistry::Default()->Register( @@ -165,6 +172,8 @@ class PercentileSampler { // register it for collection. const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels> metric_def_; + UnitOfMeasure unit_of_measure_ = UnitOfMeasure::kNumber; + // The percentiles samples required for this metric. const std::vector<double> percentiles_; @@ -187,9 +196,10 @@ template <int NumLabels> PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New( const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>& metric_def, - std::vector<double> percentiles, size_t max_samples) { + std::vector<double> percentiles, size_t max_samples, + UnitOfMeasure unit_of_measure) { return new PercentileSampler<NumLabels>(metric_def, std::move(percentiles), - max_samples); + max_samples, unit_of_measure); } template <int NumLabels> @@ -212,7 +222,8 @@ PercentileSamplerCell* PercentileSampler<NumLabels>::GetCell( return &(cells_ .emplace(std::piecewise_construct, std::forward_as_tuple(label_array), - std::forward_as_tuple(percentiles_, max_samples_)) + std::forward_as_tuple(unit_of_measure_, percentiles_, + max_samples_)) .first->second); } diff --git a/tensorflow/core/lib/monitoring/percentile_sampler_test.cc b/tensorflow/core/lib/monitoring/percentile_sampler_test.cc index e1e4eb6fc62..e59caedcde1 100644 --- a/tensorflow/core/lib/monitoring/percentile_sampler_test.cc +++ b/tensorflow/core/lib/monitoring/percentile_sampler_test.cc @@ -24,11 +24,11 @@ namespace { auto* pctsampler_with_labels = PercentileSampler<1>::New( {"/tensorflow/test/percentile_sampler_with_labels", "Percentile sampler with one label.", "MyLabel"}, - {25.0, 50.0, 90.0, 99.0}, 1024); + {25.0, 50.0, 90.0, 99.0}, 1024, UnitOfMeasure::kNumber); auto* pctsampler_without_labels = PercentileSampler<0>::New( {"/tensorflow/test/percentile_sampler_without_labels", "Percentile sampler without labels initialized as empty."}, - {25.0, 50.0, 90.0, 99.0}, 1024); + {25.0, 50.0, 90.0, 99.0}, 1024, UnitOfMeasure::kNumber); TEST(LabeledPercentileSamplerTest, FixedPercentilesValues) { auto* cell = pctsampler_with_labels->GetCell("MyLabel"); diff --git a/tensorflow/core/lib/monitoring/types.h b/tensorflow/core/lib/monitoring/types.h index 8b78d7c53b9..ee503b5b2f8 100644 --- a/tensorflow/core/lib/monitoring/types.h +++ b/tensorflow/core/lib/monitoring/types.h @@ -24,6 +24,12 @@ limitations under the License. namespace tensorflow { namespace monitoring { +enum class UnitOfMeasure { + kNumber, + kTime, + kBytes, +}; + struct PercentilePoint { // In the [0, 100] range. double percentile = 0.0; @@ -31,6 +37,7 @@ struct PercentilePoint { }; struct Percentiles { + UnitOfMeasure unit_of_measure = UnitOfMeasure::kNumber; uint64 start_nstime = 0; uint64 end_nstime = 0; double min_value = NAN;