Add unit of mesaure to TF monitoring percentile metrics.
PiperOrigin-RevId: 294756415 Change-Id: I52f7a7be91d39e9c7b6b13a7fbc1fec436f09931
This commit is contained in:
parent
c5765d2bab
commit
7eeb96e00f
@ -229,6 +229,13 @@ message Percentiles {
|
||||
}
|
||||
|
||||
message MetricValues {
|
||||
enum UnitOfMeasure {
|
||||
INVALID = 0;
|
||||
NUMBER = 1;
|
||||
TIME = 2;
|
||||
BYTES = 3;
|
||||
}
|
||||
|
||||
// The metric name.
|
||||
string name = 1;
|
||||
|
||||
@ -236,6 +243,8 @@ message MetricValues {
|
||||
Percentiles percentiles_value = 2;
|
||||
int64 int64_value = 3;
|
||||
}
|
||||
|
||||
UnitOfMeasure unit_of_measure = 4;
|
||||
}
|
||||
|
||||
message MetricsReport {
|
||||
|
@ -40,6 +40,21 @@ bool IsSelectedMetric(const xrt::XRTMetricsCollect& metrics,
|
||||
return false;
|
||||
}
|
||||
|
||||
void SetUnitOfMeasure(xrt::MetricValues* metrics,
|
||||
monitoring::UnitOfMeasure unit_of_measure) {
|
||||
switch (unit_of_measure) {
|
||||
case monitoring::UnitOfMeasure::kNumber:
|
||||
metrics->set_unit_of_measure(xrt::MetricValues::NUMBER);
|
||||
break;
|
||||
case monitoring::UnitOfMeasure::kTime:
|
||||
metrics->set_unit_of_measure(xrt::MetricValues::TIME);
|
||||
break;
|
||||
case monitoring::UnitOfMeasure::kBytes:
|
||||
metrics->set_unit_of_measure(xrt::MetricValues::BYTES);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Status AddMetrics(xrt::MetricsReport* report,
|
||||
const monitoring::PointSet& point_set) {
|
||||
for (auto& point : point_set.points) {
|
||||
@ -47,6 +62,7 @@ Status AddMetrics(xrt::MetricsReport* report,
|
||||
metrics->set_name(point_set.metric_name);
|
||||
if (point->value_type == monitoring::ValueType::kPercentiles) {
|
||||
xrt::Percentiles* percentiles = metrics->mutable_percentiles_value();
|
||||
SetUnitOfMeasure(metrics, point->percentiles_value.unit_of_measure);
|
||||
percentiles->set_start_nstime(point->percentiles_value.start_nstime);
|
||||
percentiles->set_end_nstime(point->percentiles_value.end_nstime);
|
||||
percentiles->set_min_value(point->percentiles_value.min_value);
|
||||
@ -62,6 +78,7 @@ Status AddMetrics(xrt::MetricsReport* report,
|
||||
xpoint->set_value(pct_point.value);
|
||||
}
|
||||
} else if (point->value_type == monitoring::ValueType::kInt64) {
|
||||
metrics->set_unit_of_measure(xrt::MetricValues::NUMBER);
|
||||
metrics->set_int64_value(point->int64_value);
|
||||
}
|
||||
}
|
||||
@ -76,7 +93,8 @@ monitoring::PercentileSamplerCell* GetAllocateCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/allocate", "Tracks XRTAllocate times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -86,7 +104,8 @@ monitoring::PercentileSamplerCell* GetAllocateUninitializedCell() {
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/allocate_uninitialized",
|
||||
"Tracks XRTAllocateUninitialized times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -96,7 +115,8 @@ monitoring::PercentileSamplerCell* GetAllocateFromTensorCell() {
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/allocate_from_tensor",
|
||||
"Tracks XRTAllocateFromTensor times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -105,7 +125,8 @@ monitoring::PercentileSamplerCell* GetSubTupleCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/sub_tuple", "Tracks XRTSubTuple times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -114,7 +135,8 @@ monitoring::PercentileSamplerCell* GetMakeTupleCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/make_tuple", "Tracks XRTMakeTuple times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -123,7 +145,8 @@ monitoring::PercentileSamplerCell* GetReadLiteralCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/read_literal", "Tracks XRTReadLiteral times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -132,7 +155,8 @@ monitoring::PercentileSamplerCell* GetReadToTensorCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/read_tensor", "Tracks XRTReadToTensor times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -141,7 +165,8 @@ monitoring::PercentileSamplerCell* GetWriteLiteralCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/write_literal", "Tracks XRTWriteLiteral times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -151,7 +176,8 @@ monitoring::PercentileSamplerCell* GetReleaseAllocationCell() {
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/release_allocation",
|
||||
"Tracks XRTReleaseAllocation times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -161,7 +187,8 @@ monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell() {
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/release_all_allocations",
|
||||
"Tracks XRTReleaseAllAllocations times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -171,7 +198,8 @@ monitoring::PercentileSamplerCell* GetCompactAllocationsCell() {
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/compact_allocations",
|
||||
"Tracks XRTCompactAllocations times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -180,7 +208,8 @@ monitoring::PercentileSamplerCell* GetCompileCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/compile", "Tracks XRTCompile times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -190,7 +219,8 @@ monitoring::PercentileSamplerCell* GetReleaseCompilationCell() {
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/release_compilation",
|
||||
"Tracks XRTReleaseCompilationRef times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -199,7 +229,8 @@ monitoring::PercentileSamplerCell* GetExecuteCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/execute", "Tracks XRTExecute times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -209,7 +240,8 @@ monitoring::PercentileSamplerCell* GetExecuteChainedCell() {
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/execute_chained",
|
||||
"Tracks XRTExecuteChained times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -219,7 +251,8 @@ monitoring::PercentileSamplerCell* GetMemoryCompactCell() {
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/memory_manager/compaction",
|
||||
"Tracks XRT memory manager memory compaction times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
@ -230,7 +263,8 @@ monitoring::PercentileSamplerCell* GetTryFreeMemoryCell() {
|
||||
{"/tensorflow/xrt/memory_manager/try_free_memory",
|
||||
"Tracks XRT memory manager times in trying to "
|
||||
"free memory by swpping device memory to host memory"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
GetDefaultPercentiles(), kMaxSamples,
|
||||
monitoring::UnitOfMeasure::kTime)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
@ -368,11 +368,12 @@ TEST(CollectMetricsTest, PercentileSampler) {
|
||||
std::unique_ptr<PercentileSampler<2>>(PercentileSampler<2>::New(
|
||||
{"/tensorflow/test/pctsampler_with_labels",
|
||||
"Percentile sampler with labels.", "MyLabel0", "MyLabel1"},
|
||||
{25.0, 50.0, 75.0}, 1024));
|
||||
auto sampler_without_labels = std::unique_ptr<PercentileSampler<0>>(
|
||||
PercentileSampler<0>::New({"/tensorflow/test/pctsampler_without_labels",
|
||||
"Percentile sampler without labels."},
|
||||
{25.0, 50.0, 75.0}, 1024));
|
||||
{25.0, 50.0, 75.0}, 1024, UnitOfMeasure::kNumber));
|
||||
auto sampler_without_labels =
|
||||
std::unique_ptr<PercentileSampler<0>>(PercentileSampler<0>::New(
|
||||
{"/tensorflow/test/pctsampler_without_labels",
|
||||
"Percentile sampler without labels."},
|
||||
{25.0, 50.0, 75.0}, 1024, UnitOfMeasure::kNumber));
|
||||
|
||||
sampler_with_labels->GetCell("Label00", "Label10")->Add(0.7);
|
||||
sampler_with_labels->GetCell("Label01", "Label11")->Add(1.5);
|
||||
|
@ -38,7 +38,8 @@ class PercentileSampler {
|
||||
static PercentileSampler* New(
|
||||
const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
|
||||
metric_def,
|
||||
std::vector<double> percentiles, size_t max_samples);
|
||||
std::vector<double> percentiles, size_t max_samples,
|
||||
UnitOfMeasure unit_of_measure);
|
||||
|
||||
template <typename... Labels>
|
||||
PercentileSamplerCell* GetCell(const Labels&... labels) {
|
||||
@ -57,7 +58,8 @@ template <int NumLabels>
|
||||
PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New(
|
||||
const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
|
||||
/* metric_def */,
|
||||
std::vector<double> /* percentiles */, size_t /* max_samples */) {
|
||||
std::vector<double> /* percentiles */, size_t /* max_samples */,
|
||||
UnitOfMeasure /* unit_of_measure */) {
|
||||
return new PercentileSampler<NumLabels>();
|
||||
}
|
||||
|
||||
|
@ -43,6 +43,7 @@ void PercentileSamplerCell::Add(double sample) {
|
||||
|
||||
Percentiles PercentileSamplerCell::value() const {
|
||||
Percentiles pct_samples;
|
||||
pct_samples.unit_of_measure = unit_of_measure_;
|
||||
size_t total_samples;
|
||||
long double accumulator;
|
||||
std::vector<Sample> samples = GetSamples(&total_samples, &accumulator);
|
||||
|
@ -47,8 +47,10 @@ namespace monitoring {
|
||||
// This class is thread-safe.
|
||||
class PercentileSamplerCell {
|
||||
public:
|
||||
PercentileSamplerCell(std::vector<double> percentiles, size_t max_samples)
|
||||
: percentiles_(std::move(percentiles)),
|
||||
PercentileSamplerCell(UnitOfMeasure unit_of_measure,
|
||||
std::vector<double> percentiles, size_t max_samples)
|
||||
: unit_of_measure_(unit_of_measure),
|
||||
percentiles_(std::move(percentiles)),
|
||||
samples_(max_samples),
|
||||
num_samples_(0),
|
||||
next_position_(0),
|
||||
@ -72,6 +74,7 @@ class PercentileSamplerCell {
|
||||
long double* accumulator) const;
|
||||
|
||||
mutable mutex mu_;
|
||||
UnitOfMeasure unit_of_measure_;
|
||||
const std::vector<double> percentiles_;
|
||||
std::vector<Sample> samples_ GUARDED_BY(mu_);
|
||||
size_t num_samples_ GUARDED_BY(mu_);
|
||||
@ -106,11 +109,13 @@ class PercentileSampler {
|
||||
// Example;
|
||||
// auto* sampler_with_label =
|
||||
// PercentileSampler<1>::New({"/tensorflow/sampler",
|
||||
// "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}, 1024);
|
||||
// "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}, 1024,
|
||||
// UnitOfMeasure::kTime);
|
||||
static PercentileSampler* New(
|
||||
const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
|
||||
metric_def,
|
||||
std::vector<double> percentiles, size_t max_samples);
|
||||
std::vector<double> percentiles, size_t max_samples,
|
||||
UnitOfMeasure unit_of_measure);
|
||||
|
||||
// Retrieves the cell for the specified labels, creating it on demand if
|
||||
// not already present.
|
||||
@ -124,8 +129,10 @@ class PercentileSampler {
|
||||
|
||||
PercentileSampler(const MetricDef<MetricKind::kCumulative, Percentiles,
|
||||
NumLabels>& metric_def,
|
||||
std::vector<double> percentiles, size_t max_samples)
|
||||
std::vector<double> percentiles, size_t max_samples,
|
||||
UnitOfMeasure unit_of_measure)
|
||||
: metric_def_(metric_def),
|
||||
unit_of_measure_(unit_of_measure),
|
||||
percentiles_(std::move(percentiles)),
|
||||
max_samples_(max_samples),
|
||||
registration_handle_(CollectionRegistry::Default()->Register(
|
||||
@ -165,6 +172,8 @@ class PercentileSampler {
|
||||
// register it for collection.
|
||||
const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels> metric_def_;
|
||||
|
||||
UnitOfMeasure unit_of_measure_ = UnitOfMeasure::kNumber;
|
||||
|
||||
// The percentiles samples required for this metric.
|
||||
const std::vector<double> percentiles_;
|
||||
|
||||
@ -187,9 +196,10 @@ template <int NumLabels>
|
||||
PercentileSampler<NumLabels>* PercentileSampler<NumLabels>::New(
|
||||
const MetricDef<MetricKind::kCumulative, Percentiles, NumLabels>&
|
||||
metric_def,
|
||||
std::vector<double> percentiles, size_t max_samples) {
|
||||
std::vector<double> percentiles, size_t max_samples,
|
||||
UnitOfMeasure unit_of_measure) {
|
||||
return new PercentileSampler<NumLabels>(metric_def, std::move(percentiles),
|
||||
max_samples);
|
||||
max_samples, unit_of_measure);
|
||||
}
|
||||
|
||||
template <int NumLabels>
|
||||
@ -212,7 +222,8 @@ PercentileSamplerCell* PercentileSampler<NumLabels>::GetCell(
|
||||
return &(cells_
|
||||
.emplace(std::piecewise_construct,
|
||||
std::forward_as_tuple(label_array),
|
||||
std::forward_as_tuple(percentiles_, max_samples_))
|
||||
std::forward_as_tuple(unit_of_measure_, percentiles_,
|
||||
max_samples_))
|
||||
.first->second);
|
||||
}
|
||||
|
||||
|
@ -24,11 +24,11 @@ namespace {
|
||||
auto* pctsampler_with_labels = PercentileSampler<1>::New(
|
||||
{"/tensorflow/test/percentile_sampler_with_labels",
|
||||
"Percentile sampler with one label.", "MyLabel"},
|
||||
{25.0, 50.0, 90.0, 99.0}, 1024);
|
||||
{25.0, 50.0, 90.0, 99.0}, 1024, UnitOfMeasure::kNumber);
|
||||
auto* pctsampler_without_labels = PercentileSampler<0>::New(
|
||||
{"/tensorflow/test/percentile_sampler_without_labels",
|
||||
"Percentile sampler without labels initialized as empty."},
|
||||
{25.0, 50.0, 90.0, 99.0}, 1024);
|
||||
{25.0, 50.0, 90.0, 99.0}, 1024, UnitOfMeasure::kNumber);
|
||||
|
||||
TEST(LabeledPercentileSamplerTest, FixedPercentilesValues) {
|
||||
auto* cell = pctsampler_with_labels->GetCell("MyLabel");
|
||||
|
@ -24,6 +24,12 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
namespace monitoring {
|
||||
|
||||
enum class UnitOfMeasure {
|
||||
kNumber,
|
||||
kTime,
|
||||
kBytes,
|
||||
};
|
||||
|
||||
struct PercentilePoint {
|
||||
// In the [0, 100] range.
|
||||
double percentile = 0.0;
|
||||
@ -31,6 +37,7 @@ struct PercentilePoint {
|
||||
};
|
||||
|
||||
struct Percentiles {
|
||||
UnitOfMeasure unit_of_measure = UnitOfMeasure::kNumber;
|
||||
uint64 start_nstime = 0;
|
||||
uint64 end_nstime = 0;
|
||||
double min_value = NAN;
|
||||
|
Loading…
Reference in New Issue
Block a user