Add unit of mesaure to TF monitoring percentile metrics.

PiperOrigin-RevId: 294756415
Change-Id: I52f7a7be91d39e9c7b6b13a7fbc1fec436f09931
This commit is contained in:
Davide Libenzi 2020-02-12 14:23:06 -08:00 committed by TensorFlower Gardener
parent c5765d2bab
commit 7eeb96e00f
8 changed files with 99 additions and 34 deletions

View File

@ -229,6 +229,13 @@ message Percentiles {
}
message MetricValues {
enum UnitOfMeasure {
INVALID = 0;
NUMBER = 1;
TIME = 2;
BYTES = 3;
}
// The metric name.
string name = 1;
@ -236,6 +243,8 @@ message MetricValues {
Percentiles percentiles_value = 2;
int64 int64_value = 3;
}
UnitOfMeasure unit_of_measure = 4;
}
message MetricsReport {

View File

@ -40,6 +40,21 @@ bool IsSelectedMetric(const xrt::XRTMetricsCollect& metrics,
return false;
}
void SetUnitOfMeasure(xrt::MetricValues* metrics,
monitoring::UnitOfMeasure unit_of_measure) {
switch (unit_of_measure) {
case monitoring::UnitOfMeasure::kNumber:
metrics->set_unit_of_measure(xrt::MetricValues::NUMBER);
break;
case monitoring::UnitOfMeasure::kTime:
metrics->set_unit_of_measure(xrt::MetricValues::TIME);
break;
case monitoring::UnitOfMeasure::kBytes:
metrics->set_unit_of_measure(xrt::MetricValues::BYTES);
break;
}
}
Status AddMetrics(xrt::MetricsReport* report,
const monitoring::PointSet& point_set) {
for (auto& point : point_set.points) {
@ -47,6 +62,7 @@ Status AddMetrics(xrt::MetricsReport* report,
metrics->set_name(point_set.metric_name);
if (point->value_type == monitoring::ValueType::kPercentiles) {
xrt::Percentiles* percentiles = metrics->mutable_percentiles_value();
SetUnitOfMeasure(metrics, point->percentiles_value.unit_of_measure);
percentiles->set_start_nstime(point->percentiles_value.start_nstime);
percentiles->set_end_nstime(point->percentiles_value.end_nstime);
percentiles->set_min_value(point->percentiles_value.min_value);
@ -62,6 +78,7 @@ Status AddMetrics(xrt::MetricsReport* report,
xpoint->set_value(pct_point.value);
}
} else if (point->value_type == monitoring::ValueType::kInt64) {
metrics->set_unit_of_measure(xrt::MetricValues::NUMBER);
metrics->set_int64_value(point->int64_value);
}
}
@ -76,7 +93,8 @@ monitoring::PercentileSamplerCell* GetAllocateCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/allocate", "Tracks XRTAllocate times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -86,7 +104,8 @@ monitoring::PercentileSamplerCell* GetAllocateUninitializedCell() {
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/allocate_uninitialized",
"Tracks XRTAllocateUninitialized times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -96,7 +115,8 @@ monitoring::PercentileSamplerCell* GetAllocateFromTensorCell() {
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/allocate_from_tensor",
"Tracks XRTAllocateFromTensor times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -105,7 +125,8 @@ monitoring::PercentileSamplerCell* GetSubTupleCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/sub_tuple", "Tracks XRTSubTuple times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -114,7 +135,8 @@ monitoring::PercentileSamplerCell* GetMakeTupleCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/make_tuple", "Tracks XRTMakeTuple times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -123,7 +145,8 @@ monitoring::PercentileSamplerCell* GetReadLiteralCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/read_literal", "Tracks XRTReadLiteral times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -132,7 +155,8 @@ monitoring::PercentileSamplerCell* GetReadToTensorCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/read_tensor", "Tracks XRTReadToTensor times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -141,7 +165,8 @@ monitoring::PercentileSamplerCell* GetWriteLiteralCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/write_literal", "Tracks XRTWriteLiteral times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -151,7 +176,8 @@ monitoring::PercentileSamplerCell* GetReleaseAllocationCell() {
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/release_allocation",
"Tracks XRTReleaseAllocation times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -161,7 +187,8 @@ monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell() {
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/release_all_allocations",
"Tracks XRTReleaseAllAllocations times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -171,7 +198,8 @@ monitoring::PercentileSamplerCell* GetCompactAllocationsCell() {
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/compact_allocations",
"Tracks XRTCompactAllocations times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -180,7 +208,8 @@ monitoring::PercentileSamplerCell* GetCompileCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/compile", "Tracks XRTCompile times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -190,7 +219,8 @@ monitoring::PercentileSamplerCell* GetReleaseCompilationCell() {
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/release_compilation",
"Tracks XRTReleaseCompilationRef times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -199,7 +229,8 @@ monitoring::PercentileSamplerCell* GetExecuteCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/execute", "Tracks XRTExecute times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -209,7 +240,8 @@ monitoring::PercentileSamplerCell* GetExecuteChainedCell() {
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/execute_chained",
"Tracks XRTExecuteChained times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -219,7 +251,8 @@ monitoring::PercentileSamplerCell* GetMemoryCompactCell() {
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/memory_manager/compaction",
"Tracks XRT memory manager memory compaction times"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}
@ -230,7 +263,8 @@ monitoring::PercentileSamplerCell* GetTryFreeMemoryCell() {
{"/tensorflow/xrt/memory_manager/try_free_memory",
"Tracks XRT memory manager times in trying to "
"free memory by swpping device memory to host memory"},
GetDefaultPercentiles(), kMaxSamples)
GetDefaultPercentiles(), kMaxSamples,
monitoring::UnitOfMeasure::kTime)
->GetCell();
return cell;
}

View File

@ -368,11 +368,12 @@ TEST(CollectMetricsTest, PercentileSampler) {
std::unique_ptr<PercentileSampler<2>>(PercentileSampler<2>::New(
{"/tensorflow/test/pctsampler_with_labels",
"Percentile sampler with labels.", "MyLabel0", "MyLabel1"},
{25.0, 50.0, 75.0}, 1024));
auto sampler_without_labels = std::unique_ptr<PercentileSampler<0>>(
PercentileSampler<0>::New({"/tensorflow/test/pctsampler_without_labels",
"Percentile sampler without labels."},
{25.0, 50.0, 75.0}, 1024));
{25.0, 50.0, 75.0}, 1024, UnitOfMeasure::kNumber));
auto sampler_without_labels =
std::unique_ptr<PercentileSampler<0>>(PercentileSampler<0>::New(
{"/tensorflow/test/pctsampler_without_labels",
"Percentile sampler without labels."},
{25.0, 50.0, 75.0}, 1024, UnitOfMeasure::kNumber));
sampler_with_labels->GetCell("Label00", "Label10")->Add(0.7);
sampler_with_labels->GetCell("Label01", "Label11")->Add(1.5);

View File

@ -38,7 +38,8 @@ class PercentileSampler {
static PercentileSampler* New(
const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
metric_def,
std::vector<double> percentiles, size_t max_samples);
std::vector<double> percentiles, size_t max_samples,
UnitOfMeasure unit_of_measure);
template <typename... Labels>
PercentileSamplerCell* GetCell(const Labels&... labels) {
@ -57,7 +58,8 @@ template <int NumLabels>
PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New(
const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
/* metric_def */,
std::vector<double> /* percentiles */, size_t /* max_samples */) {
std::vector<double> /* percentiles */, size_t /* max_samples */,
UnitOfMeasure /* unit_of_measure */) {
return new PercentileSampler<NumLabels>();
}

View File

@ -43,6 +43,7 @@ void PercentileSamplerCell::Add(double sample) {
Percentiles PercentileSamplerCell::value() const {
Percentiles pct_samples;
pct_samples.unit_of_measure = unit_of_measure_;
size_t total_samples;
long double accumulator;
std::vector<Sample> samples = GetSamples(&total_samples, &accumulator);

View File

@ -47,8 +47,10 @@ namespace monitoring {
// This class is thread-safe.
class PercentileSamplerCell {
public:
PercentileSamplerCell(std::vector<double> percentiles, size_t max_samples)
: percentiles_(std::move(percentiles)),
PercentileSamplerCell(UnitOfMeasure unit_of_measure,
std::vector<double> percentiles, size_t max_samples)
: unit_of_measure_(unit_of_measure),
percentiles_(std::move(percentiles)),
samples_(max_samples),
num_samples_(0),
next_position_(0),
@ -72,6 +74,7 @@ class PercentileSamplerCell {
long double* accumulator) const;
mutable mutex mu_;
UnitOfMeasure unit_of_measure_;
const std::vector<double> percentiles_;
std::vector<Sample> samples_ GUARDED_BY(mu_);
size_t num_samples_ GUARDED_BY(mu_);
@ -106,11 +109,13 @@ class PercentileSampler {
// Example;
// auto* sampler_with_label =
// PercentileSampler<1>::New({"/tensorflow/sampler",
// "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}, 1024);
// "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}, 1024,
// UnitOfMeasure::kTime);
static PercentileSampler* New(
const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
metric_def,
std::vector<double> percentiles, size_t max_samples);
std::vector<double> percentiles, size_t max_samples,
UnitOfMeasure unit_of_measure);
// Retrieves the cell for the specified labels, creating it on demand if
// not already present.
@ -124,8 +129,10 @@ class PercentileSampler {
PercentileSampler(const MetricDef<MetricKind::kCumulative, Percentiles,
NumLabels>& metric_def,
std::vector<double> percentiles, size_t max_samples)
std::vector<double> percentiles, size_t max_samples,
UnitOfMeasure unit_of_measure)
: metric_def_(metric_def),
unit_of_measure_(unit_of_measure),
percentiles_(std::move(percentiles)),
max_samples_(max_samples),
registration_handle_(CollectionRegistry::Default()->Register(
@ -165,6 +172,8 @@ class PercentileSampler {
// register it for collection.
const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels> metric_def_;
UnitOfMeasure unit_of_measure_ = UnitOfMeasure::kNumber;
// The percentiles samples required for this metric.
const std::vector<double> percentiles_;
@ -187,9 +196,10 @@ template <int NumLabels>
PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New(
const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
metric_def,
std::vector<double> percentiles, size_t max_samples) {
std::vector<double> percentiles, size_t max_samples,
UnitOfMeasure unit_of_measure) {
return new PercentileSampler<NumLabels>(metric_def, std::move(percentiles),
max_samples);
max_samples, unit_of_measure);
}
template <int NumLabels>
@ -212,7 +222,8 @@ PercentileSamplerCell* PercentileSampler<NumLabels>::GetCell(
return &(cells_
.emplace(std::piecewise_construct,
std::forward_as_tuple(label_array),
std::forward_as_tuple(percentiles_, max_samples_))
std::forward_as_tuple(unit_of_measure_, percentiles_,
max_samples_))
.first->second);
}

View File

@ -24,11 +24,11 @@ namespace {
auto* pctsampler_with_labels = PercentileSampler<1>::New(
{"/tensorflow/test/percentile_sampler_with_labels",
"Percentile sampler with one label.", "MyLabel"},
{25.0, 50.0, 90.0, 99.0}, 1024);
{25.0, 50.0, 90.0, 99.0}, 1024, UnitOfMeasure::kNumber);
auto* pctsampler_without_labels = PercentileSampler<0>::New(
{"/tensorflow/test/percentile_sampler_without_labels",
"Percentile sampler without labels initialized as empty."},
{25.0, 50.0, 90.0, 99.0}, 1024);
{25.0, 50.0, 90.0, 99.0}, 1024, UnitOfMeasure::kNumber);
TEST(LabeledPercentileSamplerTest, FixedPercentilesValues) {
auto* cell = pctsampler_with_labels->GetCell("MyLabel");

View File

@ -24,6 +24,12 @@ limitations under the License.
namespace tensorflow {
namespace monitoring {
enum class UnitOfMeasure {
kNumber,
kTime,
kBytes,
};
struct PercentilePoint {
// In the [0, 100] range.
double percentile = 0.0;
@ -31,6 +37,7 @@ struct PercentilePoint {
};
struct Percentiles {
UnitOfMeasure unit_of_measure = UnitOfMeasure::kNumber;
uint64 start_nstime = 0;
uint64 end_nstime = 0;
double min_value = NAN;