From 7eeb96e00ff8c37c03513a125fe3b5524a99cba9 Mon Sep 17 00:00:00 2001
From: Davide Libenzi <dlibenzi@google.com>
Date: Wed, 12 Feb 2020 14:23:06 -0800
Subject: [PATCH] Add unit of mesaure to TF monitoring percentile metrics.

PiperOrigin-RevId: 294756415
Change-Id: I52f7a7be91d39e9c7b6b13a7fbc1fec436f09931
---
 tensorflow/compiler/xrt/xrt.proto             |  9 +++
 tensorflow/compiler/xrt/xrt_metrics.cc        | 68 ++++++++++++++-----
 .../monitoring/collection_registry_test.cc    | 11 +--
 .../monitoring/mobile_percentile_sampler.h    |  6 +-
 .../core/lib/monitoring/percentile_sampler.cc |  1 +
 .../core/lib/monitoring/percentile_sampler.h  | 27 +++++---
 .../lib/monitoring/percentile_sampler_test.cc |  4 +-
 tensorflow/core/lib/monitoring/types.h        |  7 ++
 8 files changed, 99 insertions(+), 34 deletions(-)

diff --git a/tensorflow/compiler/xrt/xrt.proto b/tensorflow/compiler/xrt/xrt.proto
index 1cf9a0b650f..b885a7593f5 100644
--- a/tensorflow/compiler/xrt/xrt.proto
+++ b/tensorflow/compiler/xrt/xrt.proto
@@ -229,6 +229,13 @@ message Percentiles {
 }
 
 message MetricValues {
+  enum UnitOfMeasure {
+    INVALID = 0;
+    NUMBER = 1;
+    TIME = 2;
+    BYTES = 3;
+  }
+
   // The metric name.
   string name = 1;
 
@@ -236,6 +243,8 @@ message MetricValues {
     Percentiles percentiles_value = 2;
     int64 int64_value = 3;
   }
+
+  UnitOfMeasure unit_of_measure = 4;
 }
 
 message MetricsReport {
diff --git a/tensorflow/compiler/xrt/xrt_metrics.cc b/tensorflow/compiler/xrt/xrt_metrics.cc
index ec4ac774b68..94906530ad9 100644
--- a/tensorflow/compiler/xrt/xrt_metrics.cc
+++ b/tensorflow/compiler/xrt/xrt_metrics.cc
@@ -40,6 +40,21 @@ bool IsSelectedMetric(const xrt::XRTMetricsCollect& metrics,
   return false;
 }
 
+void SetUnitOfMeasure(xrt::MetricValues* metrics,
+                      monitoring::UnitOfMeasure unit_of_measure) {
+  switch (unit_of_measure) {
+    case monitoring::UnitOfMeasure::kNumber:
+      metrics->set_unit_of_measure(xrt::MetricValues::NUMBER);
+      break;
+    case monitoring::UnitOfMeasure::kTime:
+      metrics->set_unit_of_measure(xrt::MetricValues::TIME);
+      break;
+    case monitoring::UnitOfMeasure::kBytes:
+      metrics->set_unit_of_measure(xrt::MetricValues::BYTES);
+      break;
+  }
+}
+
 Status AddMetrics(xrt::MetricsReport* report,
                   const monitoring::PointSet& point_set) {
   for (auto& point : point_set.points) {
@@ -47,6 +62,7 @@ Status AddMetrics(xrt::MetricsReport* report,
     metrics->set_name(point_set.metric_name);
     if (point->value_type == monitoring::ValueType::kPercentiles) {
       xrt::Percentiles* percentiles = metrics->mutable_percentiles_value();
+      SetUnitOfMeasure(metrics, point->percentiles_value.unit_of_measure);
       percentiles->set_start_nstime(point->percentiles_value.start_nstime);
       percentiles->set_end_nstime(point->percentiles_value.end_nstime);
       percentiles->set_min_value(point->percentiles_value.min_value);
@@ -62,6 +78,7 @@ Status AddMetrics(xrt::MetricsReport* report,
         xpoint->set_value(pct_point.value);
       }
     } else if (point->value_type == monitoring::ValueType::kInt64) {
+      metrics->set_unit_of_measure(xrt::MetricValues::NUMBER);
       metrics->set_int64_value(point->int64_value);
     }
   }
@@ -76,7 +93,8 @@ monitoring::PercentileSamplerCell* GetAllocateCell() {
   static monitoring::PercentileSamplerCell* cell =
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/allocate", "Tracks XRTAllocate times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -86,7 +104,8 @@ monitoring::PercentileSamplerCell* GetAllocateUninitializedCell() {
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/allocate_uninitialized",
            "Tracks XRTAllocateUninitialized times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -96,7 +115,8 @@ monitoring::PercentileSamplerCell* GetAllocateFromTensorCell() {
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/allocate_from_tensor",
            "Tracks XRTAllocateFromTensor times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -105,7 +125,8 @@ monitoring::PercentileSamplerCell* GetSubTupleCell() {
   static monitoring::PercentileSamplerCell* cell =
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/sub_tuple", "Tracks XRTSubTuple times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -114,7 +135,8 @@ monitoring::PercentileSamplerCell* GetMakeTupleCell() {
   static monitoring::PercentileSamplerCell* cell =
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/make_tuple", "Tracks XRTMakeTuple times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -123,7 +145,8 @@ monitoring::PercentileSamplerCell* GetReadLiteralCell() {
   static monitoring::PercentileSamplerCell* cell =
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/read_literal", "Tracks XRTReadLiteral times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -132,7 +155,8 @@ monitoring::PercentileSamplerCell* GetReadToTensorCell() {
   static monitoring::PercentileSamplerCell* cell =
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/read_tensor", "Tracks XRTReadToTensor times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -141,7 +165,8 @@ monitoring::PercentileSamplerCell* GetWriteLiteralCell() {
   static monitoring::PercentileSamplerCell* cell =
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/write_literal", "Tracks XRTWriteLiteral times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -151,7 +176,8 @@ monitoring::PercentileSamplerCell* GetReleaseAllocationCell() {
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/release_allocation",
            "Tracks XRTReleaseAllocation times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -161,7 +187,8 @@ monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell() {
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/release_all_allocations",
            "Tracks XRTReleaseAllAllocations times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -171,7 +198,8 @@ monitoring::PercentileSamplerCell* GetCompactAllocationsCell() {
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/compact_allocations",
            "Tracks XRTCompactAllocations times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -180,7 +208,8 @@ monitoring::PercentileSamplerCell* GetCompileCell() {
   static monitoring::PercentileSamplerCell* cell =
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/compile", "Tracks XRTCompile times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -190,7 +219,8 @@ monitoring::PercentileSamplerCell* GetReleaseCompilationCell() {
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/release_compilation",
            "Tracks XRTReleaseCompilationRef times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -199,7 +229,8 @@ monitoring::PercentileSamplerCell* GetExecuteCell() {
   static monitoring::PercentileSamplerCell* cell =
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/execute", "Tracks XRTExecute times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -209,7 +240,8 @@ monitoring::PercentileSamplerCell* GetExecuteChainedCell() {
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/ops/execute_chained",
            "Tracks XRTExecuteChained times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -219,7 +251,8 @@ monitoring::PercentileSamplerCell* GetMemoryCompactCell() {
       monitoring::PercentileSampler<0>::New(
           {"/tensorflow/xrt/memory_manager/compaction",
            "Tracks XRT memory manager memory compaction times"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
@@ -230,7 +263,8 @@ monitoring::PercentileSamplerCell* GetTryFreeMemoryCell() {
           {"/tensorflow/xrt/memory_manager/try_free_memory",
            "Tracks XRT memory manager times in trying to "
            "free memory by swpping device memory to host memory"},
-          GetDefaultPercentiles(), kMaxSamples)
+          GetDefaultPercentiles(), kMaxSamples,
+          monitoring::UnitOfMeasure::kTime)
           ->GetCell();
   return cell;
 }
diff --git a/tensorflow/core/lib/monitoring/collection_registry_test.cc b/tensorflow/core/lib/monitoring/collection_registry_test.cc
index 7449ab597aa..a11ea059204 100644
--- a/tensorflow/core/lib/monitoring/collection_registry_test.cc
+++ b/tensorflow/core/lib/monitoring/collection_registry_test.cc
@@ -368,11 +368,12 @@ TEST(CollectMetricsTest, PercentileSampler) {
       std::unique_ptr<PercentileSampler<2>>(PercentileSampler<2>::New(
           {"/tensorflow/test/pctsampler_with_labels",
            "Percentile sampler with labels.", "MyLabel0", "MyLabel1"},
-          {25.0, 50.0, 75.0}, 1024));
-  auto sampler_without_labels = std::unique_ptr<PercentileSampler<0>>(
-      PercentileSampler<0>::New({"/tensorflow/test/pctsampler_without_labels",
-                                 "Percentile sampler without labels."},
-                                {25.0, 50.0, 75.0}, 1024));
+          {25.0, 50.0, 75.0}, 1024, UnitOfMeasure::kNumber));
+  auto sampler_without_labels =
+      std::unique_ptr<PercentileSampler<0>>(PercentileSampler<0>::New(
+          {"/tensorflow/test/pctsampler_without_labels",
+           "Percentile sampler without labels."},
+          {25.0, 50.0, 75.0}, 1024, UnitOfMeasure::kNumber));
 
   sampler_with_labels->GetCell("Label00", "Label10")->Add(0.7);
   sampler_with_labels->GetCell("Label01", "Label11")->Add(1.5);
diff --git a/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h b/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h
index a33909d564a..2c792f0e0cb 100644
--- a/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h
+++ b/tensorflow/core/lib/monitoring/mobile_percentile_sampler.h
@@ -38,7 +38,8 @@ class PercentileSampler {
   static PercentileSampler* New(
       const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
           metric_def,
-      std::vector<double> percentiles, size_t max_samples);
+      std::vector<double> percentiles, size_t max_samples,
+      UnitOfMeasure unit_of_measure);
 
   template <typename... Labels>
   PercentileSamplerCell* GetCell(const Labels&... labels) {
@@ -57,7 +58,8 @@ template <int NumLabels>
 PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New(
     const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
     /* metric_def */,
-    std::vector<double> /* percentiles */, size_t /* max_samples */) {
+    std::vector<double> /* percentiles */, size_t /* max_samples */,
+    UnitOfMeasure /* unit_of_measure */) {
   return new PercentileSampler<NumLabels>();
 }
 
diff --git a/tensorflow/core/lib/monitoring/percentile_sampler.cc b/tensorflow/core/lib/monitoring/percentile_sampler.cc
index 988e50ded52..b7c22ae77ba 100644
--- a/tensorflow/core/lib/monitoring/percentile_sampler.cc
+++ b/tensorflow/core/lib/monitoring/percentile_sampler.cc
@@ -43,6 +43,7 @@ void PercentileSamplerCell::Add(double sample) {
 
 Percentiles PercentileSamplerCell::value() const {
   Percentiles pct_samples;
+  pct_samples.unit_of_measure = unit_of_measure_;
   size_t total_samples;
   long double accumulator;
   std::vector<Sample> samples = GetSamples(&total_samples, &accumulator);
diff --git a/tensorflow/core/lib/monitoring/percentile_sampler.h b/tensorflow/core/lib/monitoring/percentile_sampler.h
index ea0e0e592a1..b4e54eeec41 100644
--- a/tensorflow/core/lib/monitoring/percentile_sampler.h
+++ b/tensorflow/core/lib/monitoring/percentile_sampler.h
@@ -47,8 +47,10 @@ namespace monitoring {
 // This class is thread-safe.
 class PercentileSamplerCell {
  public:
-  PercentileSamplerCell(std::vector<double> percentiles, size_t max_samples)
-      : percentiles_(std::move(percentiles)),
+  PercentileSamplerCell(UnitOfMeasure unit_of_measure,
+                        std::vector<double> percentiles, size_t max_samples)
+      : unit_of_measure_(unit_of_measure),
+        percentiles_(std::move(percentiles)),
         samples_(max_samples),
         num_samples_(0),
         next_position_(0),
@@ -72,6 +74,7 @@ class PercentileSamplerCell {
                                  long double* accumulator) const;
 
   mutable mutex mu_;
+  UnitOfMeasure unit_of_measure_;
   const std::vector<double> percentiles_;
   std::vector<Sample> samples_ GUARDED_BY(mu_);
   size_t num_samples_ GUARDED_BY(mu_);
@@ -106,11 +109,13 @@ class PercentileSampler {
   // Example;
   // auto* sampler_with_label =
   // PercentileSampler<1>::New({"/tensorflow/sampler",
-  //   "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}, 1024);
+  //   "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}, 1024,
+  //   UnitOfMeasure::kTime);
   static PercentileSampler* New(
       const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
           metric_def,
-      std::vector<double> percentiles, size_t max_samples);
+      std::vector<double> percentiles, size_t max_samples,
+      UnitOfMeasure unit_of_measure);
 
   // Retrieves the cell for the specified labels, creating it on demand if
   // not already present.
@@ -124,8 +129,10 @@ class PercentileSampler {
 
   PercentileSampler(const MetricDef<MetricKind::kCumulative, Percentiles,
                                     NumLabels>& metric_def,
-                    std::vector<double> percentiles, size_t max_samples)
+                    std::vector<double> percentiles, size_t max_samples,
+                    UnitOfMeasure unit_of_measure)
       : metric_def_(metric_def),
+        unit_of_measure_(unit_of_measure),
         percentiles_(std::move(percentiles)),
         max_samples_(max_samples),
         registration_handle_(CollectionRegistry::Default()->Register(
@@ -165,6 +172,8 @@ class PercentileSampler {
   // register it for collection.
   const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels> metric_def_;
 
+  UnitOfMeasure unit_of_measure_ = UnitOfMeasure::kNumber;
+
   // The percentiles samples required for this metric.
   const std::vector<double> percentiles_;
 
@@ -187,9 +196,10 @@ template <int NumLabels>
 PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New(
     const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
         metric_def,
-    std::vector<double> percentiles, size_t max_samples) {
+    std::vector<double> percentiles, size_t max_samples,
+    UnitOfMeasure unit_of_measure) {
   return new PercentileSampler<NumLabels>(metric_def, std::move(percentiles),
-                                          max_samples);
+                                          max_samples, unit_of_measure);
 }
 
 template <int NumLabels>
@@ -212,7 +222,8 @@ PercentileSamplerCell* PercentileSampler<NumLabels>::GetCell(
   return &(cells_
                .emplace(std::piecewise_construct,
                         std::forward_as_tuple(label_array),
-                        std::forward_as_tuple(percentiles_, max_samples_))
+                        std::forward_as_tuple(unit_of_measure_, percentiles_,
+                                              max_samples_))
                .first->second);
 }
 
diff --git a/tensorflow/core/lib/monitoring/percentile_sampler_test.cc b/tensorflow/core/lib/monitoring/percentile_sampler_test.cc
index e1e4eb6fc62..e59caedcde1 100644
--- a/tensorflow/core/lib/monitoring/percentile_sampler_test.cc
+++ b/tensorflow/core/lib/monitoring/percentile_sampler_test.cc
@@ -24,11 +24,11 @@ namespace {
 auto* pctsampler_with_labels = PercentileSampler<1>::New(
     {"/tensorflow/test/percentile_sampler_with_labels",
      "Percentile sampler with one label.", "MyLabel"},
-    {25.0, 50.0, 90.0, 99.0}, 1024);
+    {25.0, 50.0, 90.0, 99.0}, 1024, UnitOfMeasure::kNumber);
 auto* pctsampler_without_labels = PercentileSampler<0>::New(
     {"/tensorflow/test/percentile_sampler_without_labels",
      "Percentile sampler without labels initialized as empty."},
-    {25.0, 50.0, 90.0, 99.0}, 1024);
+    {25.0, 50.0, 90.0, 99.0}, 1024, UnitOfMeasure::kNumber);
 
 TEST(LabeledPercentileSamplerTest, FixedPercentilesValues) {
   auto* cell = pctsampler_with_labels->GetCell("MyLabel");
diff --git a/tensorflow/core/lib/monitoring/types.h b/tensorflow/core/lib/monitoring/types.h
index 8b78d7c53b9..ee503b5b2f8 100644
--- a/tensorflow/core/lib/monitoring/types.h
+++ b/tensorflow/core/lib/monitoring/types.h
@@ -24,6 +24,12 @@ limitations under the License.
 namespace tensorflow {
 namespace monitoring {
 
+enum class UnitOfMeasure {
+  kNumber,
+  kTime,
+  kBytes,
+};
+
 struct PercentilePoint {
   // In the [0, 100] range.
   double percentile = 0.0;
@@ -31,6 +37,7 @@ struct PercentilePoint {
 };
 
 struct Percentiles {
+  UnitOfMeasure unit_of_measure = UnitOfMeasure::kNumber;
   uint64 start_nstime = 0;
   uint64 end_nstime = 0;
   double min_value = NAN;