Instrument XRT with metrics and add op to fetch them from client side.
PiperOrigin-RevId: 291547054 Change-Id: Ia44b4d724805912961cf4f1fae165df9bad0c3b2
This commit is contained in:
parent
c71fb79cbc
commit
90e6bdca1f
@ -45,6 +45,7 @@ cc_library(
|
||||
"xrt_compilation_cache.cc",
|
||||
"xrt_device.cc",
|
||||
"xrt_memory_manager.cc",
|
||||
"xrt_metrics.cc",
|
||||
"xrt_state.cc",
|
||||
"xrt_util.cc",
|
||||
],
|
||||
@ -52,6 +53,7 @@ cc_library(
|
||||
"xrt_compilation_cache.h",
|
||||
"xrt_device.h",
|
||||
"xrt_memory_manager.h",
|
||||
"xrt_metrics.h",
|
||||
"xrt_refptr.h",
|
||||
"xrt_state.h",
|
||||
"xrt_util.h",
|
||||
@ -75,10 +77,11 @@ cc_library(
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core:regexp_internal",
|
||||
"//tensorflow/core/profiler/lib:traceme",
|
||||
"//tensorflow/stream_executor",
|
||||
"//tensorflow/stream_executor:device_memory_allocator",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
],
|
||||
)
|
||||
|
@ -33,6 +33,7 @@ limitations under the License.
|
||||
#include "tensorflow/compiler/xrt/xrt.pb.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_compilation_cache.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_device.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_metrics.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_util.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/resource_mgr.h"
|
||||
@ -41,6 +42,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/framework/types.pb.h"
|
||||
#include "tensorflow/core/lib/core/refcount.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/lib/monitoring/timed.h"
|
||||
#include "tensorflow/core/lib/strings/proto_serialization.h"
|
||||
#include "tensorflow/core/platform/fingerprint.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
@ -137,6 +139,7 @@ Status XRTCompileOp::Compile(OpKernelContext* ctx,
|
||||
|
||||
void XRTCompileOp::Compute(OpKernelContext* ctx) {
|
||||
VLOG(1) << "XRTCompileOp::Compute";
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetCompileCell());
|
||||
|
||||
ResourceMgr* rm;
|
||||
OP_REQUIRES_OK(ctx, XRTGenericDeviceAccessor::GetResourceManager(ctx, &rm));
|
||||
@ -207,6 +210,7 @@ XRTReleaseCompilationRefOp::~XRTReleaseCompilationRefOp() = default;
|
||||
|
||||
void XRTReleaseCompilationRefOp::Compute(OpKernelContext* ctx) {
|
||||
VLOG(1) << "XRTReleaseCompilationRefOp::Compute";
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetReleaseCompilationCell());
|
||||
|
||||
ResourceMgr* rm;
|
||||
OP_REQUIRES_OK(ctx, XRTGenericDeviceAccessor::GetResourceManager(ctx, &rm));
|
||||
|
@ -27,6 +27,7 @@ limitations under the License.
|
||||
#include "tensorflow/compiler/xrt/xrt_compilation_cache.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_device.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_memory_manager.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_metrics.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_state.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_util.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
@ -35,6 +36,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/lib/core/refcount.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/lib/gtl/cleanup.h"
|
||||
#include "tensorflow/core/lib/monitoring/timed.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/stream_executor/stream_executor.h"
|
||||
#include "tensorflow/stream_executor/stream_executor_internal.h"
|
||||
@ -248,6 +250,7 @@ void XRTExecuteOp::ComputeAsync(OpKernelContext* context, DoneCallback done) {
|
||||
|
||||
Status XRTExecuteOp::DoWork(OpKernelContext* context) {
|
||||
VLOG(1) << "XRTExecuteOp::Compute";
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetExecuteCell());
|
||||
ResourceMgr* rm;
|
||||
TF_RETURN_IF_ERROR(
|
||||
XRTGenericDeviceAccessor::GetResourceManager(context, &rm));
|
||||
@ -333,6 +336,7 @@ void XRTExecuteChainedOp::ComputeAsync(OpKernelContext* context,
|
||||
|
||||
Status XRTExecuteChainedOp::DoWork(OpKernelContext* context) {
|
||||
VLOG(1) << "XRTExecuteChainedOp::Compute";
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetExecuteChainedCell());
|
||||
ResourceMgr* rm;
|
||||
TF_RETURN_IF_ERROR(
|
||||
XRTGenericDeviceAccessor::GetResourceManager(context, &rm));
|
||||
|
@ -16,15 +16,45 @@ limitations under the License.
|
||||
// Classes for allocating XLA literals in device memory and managing handles
|
||||
// that refer to them.
|
||||
|
||||
#include "tensorflow/compiler/xrt/kernels/xrt_state_ops.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "tensorflow/compiler/xrt/kernels/xrt_state_ops.h"
|
||||
|
||||
#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
|
||||
#include "tensorflow/compiler/xla/client/local_client.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_metrics.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
|
||||
class XRTMetricsCollectOp : public OpKernel {
|
||||
public:
|
||||
explicit XRTMetricsCollectOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTMetricsCollectOp::Compute";
|
||||
|
||||
const Tensor& metrics_proto = ctx->input(0);
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(metrics_proto.shape()),
|
||||
errors::Internal("request input should be a string scalar"));
|
||||
xrt::XRTMetricsCollect metrics;
|
||||
OP_REQUIRES(ctx, metrics.ParseFromString(metrics_proto.scalar<tstring>()()),
|
||||
errors::InvalidArgument(
|
||||
"Unable to parse request input to XRTMetricsCollect"));
|
||||
|
||||
xla::StatusOr<xrt::MetricsReport> collected_metrics_or =
|
||||
CollectMetrics(metrics);
|
||||
OP_REQUIRES_OK(ctx, collected_metrics_or.status());
|
||||
xrt::MetricsReport collected_metrics =
|
||||
collected_metrics_or.ConsumeValueOrDie();
|
||||
Tensor output(DT_STRING, TensorShape({}));
|
||||
output.scalar<tstring>()() = collected_metrics.SerializeAsString();
|
||||
ctx->set_output(0, output);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("XRTAllocate")
|
||||
.Device(DEVICE_XLA_GPU)
|
||||
@ -161,4 +191,7 @@ REGISTER_KERNEL_BUILDER(Name("XRTCompactAllocations").Device(DEVICE_XLA_GPU),
|
||||
REGISTER_KERNEL_BUILDER(Name("XRTCompactAllocations").Device(DEVICE_XLA_CPU),
|
||||
XRTCompactAllocationsOp<XRTGenericDeviceAccessor>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("XRTMetricsCollect").Device(DEVICE_CPU),
|
||||
XRTMetricsCollectOp);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -35,6 +35,7 @@ limitations under the License.
|
||||
#include "tensorflow/compiler/xrt/xrt.pb.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_device.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_memory_manager.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_metrics.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_state.h"
|
||||
#include "tensorflow/core/common_runtime/dma_helper.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
@ -46,6 +47,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/lib/core/refcount.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/lib/gtl/cleanup.h"
|
||||
#include "tensorflow/core/lib/monitoring/percentile_sampler.h"
|
||||
#include "tensorflow/core/lib/monitoring/timed.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
@ -170,6 +173,7 @@ class XRTAllocateOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTAllocateOp::Compute";
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetAllocateCell());
|
||||
|
||||
const Tensor& allocation_info = ctx->input(0);
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(allocation_info.shape()),
|
||||
@ -223,6 +227,8 @@ class XRTAllocateUninitializedOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTAllocateUninitializedOp::Compute";
|
||||
auto timed =
|
||||
monitoring::MakeTimed(xrt_metrics::GetAllocateUninitializedCell());
|
||||
ResourceMgr* rm;
|
||||
OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm));
|
||||
|
||||
@ -294,6 +300,8 @@ class XRTAllocateFromTensorOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTAllocateFromTensorOp::Compute";
|
||||
auto timed =
|
||||
monitoring::MakeTimed(xrt_metrics::GetAllocateFromTensorCell());
|
||||
|
||||
OpInputList values;
|
||||
OP_REQUIRES_OK(ctx, ctx->input_list("inputs", &values));
|
||||
@ -362,6 +370,7 @@ class XRTSubTupleOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTSubTupleOp::Compute";
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetSubTupleCell());
|
||||
|
||||
const Tensor& handle_tensor = ctx->input(0);
|
||||
OP_REQUIRES(
|
||||
@ -412,6 +421,7 @@ class XRTMakeTupleOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTMakeTupleOp::Compute";
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetMakeTupleCell());
|
||||
|
||||
const Tensor& tuple_info = ctx->input(0);
|
||||
OP_REQUIRES(
|
||||
@ -482,6 +492,7 @@ class XRTReadLiteralOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTReadLiteralOp::Compute";
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetReadLiteralCell());
|
||||
|
||||
const Tensor& handle_tensor = ctx->input(0);
|
||||
OP_REQUIRES(
|
||||
@ -532,6 +543,7 @@ class XRTReadToTensorOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTReadToTensorOp::Compute";
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetReadToTensorCell());
|
||||
|
||||
const Tensor& handle_tensor = ctx->input(0);
|
||||
// TODO(phawkins,dlibenzi): accept multiple handles (i.e., vectors, not
|
||||
@ -615,6 +627,7 @@ class XRTWriteLiteralOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTWriteLiteralOp::Compute";
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetWriteLiteralCell());
|
||||
|
||||
const Tensor& handle_tensor = ctx->input(0);
|
||||
OP_REQUIRES(
|
||||
@ -665,6 +678,7 @@ class XRTReleaseAllocationOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTReleaseAllocationOp::Compute";
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetReleaseAllocationCell());
|
||||
|
||||
ResourceMgr* rm;
|
||||
OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm));
|
||||
@ -693,6 +707,8 @@ class XRTReleaseAllAllocationsOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTReleaseAllAllocationsOp::Compute";
|
||||
auto timed =
|
||||
monitoring::MakeTimed(xrt_metrics::GetReleaseAllAllocationsCell());
|
||||
|
||||
ResourceMgr* rm;
|
||||
OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm));
|
||||
@ -710,6 +726,8 @@ class XRTCompactAllocationsOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
VLOG(1) << "XRTCompactAllocationsOp::Compute";
|
||||
auto timed =
|
||||
monitoring::MakeTimed(xrt_metrics::GetCompactAllocationsCell());
|
||||
|
||||
ResourceMgr* rm;
|
||||
OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm));
|
||||
|
@ -216,4 +216,16 @@ backing the handles, and re-allocate and send back the data to the device.
|
||||
This operation helps with device memory fragmentation.
|
||||
)");
|
||||
|
||||
REGISTER_OP("XRTMetricsCollect")
|
||||
.Input("request: string")
|
||||
.Output("result: string")
|
||||
.SetShapeFn(tensorflow::shape_inference::ScalarShape)
|
||||
.Doc(
|
||||
R"(
|
||||
Reads the selected metric values from the metrics collection registry.
|
||||
|
||||
'request' is a serialized xrt::XRTMetricsCollect proto.
|
||||
'result' is a serialized xrt::MetricsReport proto.
|
||||
)");
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -1675,6 +1675,27 @@ TEST(RawApiTest, TestDeviceMemorySwap) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RawApiTest, TestMetricsFetch) {
|
||||
xrt::XRTMetricsCollect metrics;
|
||||
metrics.add_metrics_regex("/tensorflow/xrt/.*");
|
||||
|
||||
Scope root = Scope::NewRootScope().WithDevice("/device:CPU:0");
|
||||
auto metrics_value = ops::Const(root, metrics.SerializeAsString());
|
||||
Output result = ops::XRTMetricsCollect(root, metrics_value);
|
||||
TF_ASSERT_OK(root.status());
|
||||
|
||||
ClientSession session(root);
|
||||
std::vector<Tensor> outputs;
|
||||
TF_EXPECT_OK(session.Run({result}, &outputs));
|
||||
ASSERT_EQ(outputs.size(), 1);
|
||||
|
||||
xrt::MetricsReport report;
|
||||
EXPECT_TRUE(report.ParseFromString(outputs[0].scalar<tstring>()()));
|
||||
for (auto& metric : report.metrics()) {
|
||||
EXPECT_EQ(metric.name().compare(0, 16, "/tensorflow/xrt/"), 0);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -191,3 +191,53 @@ message XRTChainedExecutePlan {
|
||||
// The post order with the XRT computations to be executed.
|
||||
repeated XRTChainedExecuteOp ops = 1;
|
||||
}
|
||||
|
||||
// The message used to encode the options for the XRTMetricsCollect operation.
|
||||
message XRTMetricsCollect {
|
||||
// A list of regular expressions to match the metric names. Empty means to
|
||||
// return all the metrics reported by the collection registry.
|
||||
repeated string metrics_regex = 1;
|
||||
}
|
||||
|
||||
message Percentiles {
|
||||
message Point {
|
||||
// In the [0, 100] range.
|
||||
double percentile = 1;
|
||||
double value = 2;
|
||||
}
|
||||
|
||||
// The time (in nanoseconds) of the first sample within the samples buffer.
|
||||
uint64 start_nstime = 1;
|
||||
// The time (in nanoseconds) of the last sample within the samples buffer.
|
||||
uint64 end_nstime = 2;
|
||||
// The minimum value of the samples within the samples buffer.
|
||||
double min_value = 3;
|
||||
// The maximum value of the samples within the samples buffer.
|
||||
double max_value = 4;
|
||||
// The mean value of the samples within the samples buffer.
|
||||
double mean = 5;
|
||||
// The stndard deviation of the samples within the samples buffer.
|
||||
double stddev = 6;
|
||||
// The number samples within the samples buffer.
|
||||
uint64 num_samples = 7;
|
||||
// The total number of times this metrics has been posted a value to.
|
||||
uint64 total_samples = 8;
|
||||
// The sum of all the posted values.
|
||||
double accumulator = 9;
|
||||
// The percentile points reported by the metric.
|
||||
repeated Point points = 10;
|
||||
}
|
||||
|
||||
message MetricValues {
|
||||
// The metric name.
|
||||
string name = 1;
|
||||
|
||||
oneof values_oneof {
|
||||
Percentiles percentiles_value = 2;
|
||||
int64 int64_value = 3;
|
||||
}
|
||||
}
|
||||
|
||||
message MetricsReport {
|
||||
repeated MetricValues metrics = 1;
|
||||
}
|
||||
|
@ -20,7 +20,10 @@ limitations under the License.
|
||||
#include <unordered_map>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "tensorflow/compiler/xrt/xrt_metrics.h"
|
||||
#include "tensorflow/core/lib/monitoring/timed.h"
|
||||
#include "tensorflow/core/lib/random/random.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
@ -97,6 +100,9 @@ class XRTMemoryManager::DeviceContext {
|
||||
|
||||
Status CompactAllocations(XRTMemoryManager* memory_manager,
|
||||
xla::Backend* backend) {
|
||||
profiler::TraceMe trace_me("XRTMemoryManager::CompactAllocations",
|
||||
/*level=*/2);
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetMemoryCompactCell());
|
||||
VLOG(4) << "CompactAllocations started";
|
||||
mutex_lock lock(lock_);
|
||||
Status status;
|
||||
@ -143,6 +149,8 @@ class XRTMemoryManager::DeviceContext {
|
||||
// Tries to free size bytes by freeing some unpinned device memory. Returns
|
||||
// the amount of memory which was able to free.
|
||||
xla::StatusOr<size_t> TryFreeMemory(xla::Backend* backend, size_t size) {
|
||||
profiler::TraceMe trace_me("XRTMemoryManager::TryFreeMemory", /*level=*/2);
|
||||
auto timed = monitoring::MakeTimed(xrt_metrics::GetTryFreeMemoryCell());
|
||||
mutex_lock lock(lock_);
|
||||
size_t swapped_size = 0;
|
||||
for (auto it = allocs_.rbegin(); it != allocs_.rend(); ++it) {
|
||||
|
255
tensorflow/compiler/xrt/xrt_metrics.cc
Normal file
255
tensorflow/compiler/xrt/xrt_metrics.cc
Normal file
@ -0,0 +1,255 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/xrt/xrt_metrics.h"
|
||||
|
||||
#include "tensorflow/core/lib/monitoring/collection_registry.h"
|
||||
#include "tensorflow/core/platform/regexp.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
|
||||
static const size_t kMaxSamples = 1024;
|
||||
|
||||
std::vector<double> GetDefaultPercentiles() {
|
||||
return {25.0, 50.0, 80.0, 90.0, 95.0, 99.0};
|
||||
}
|
||||
|
||||
bool IsSelectedMetric(const xrt::XRTMetricsCollect& metrics,
|
||||
const string& name) {
|
||||
if (metrics.metrics_regex_size() == 0) {
|
||||
return true;
|
||||
}
|
||||
for (auto& metric_regex : metrics.metrics_regex()) {
|
||||
if (RE2::FullMatch(name, metric_regex)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Status AddMetrics(xrt::MetricsReport* report,
|
||||
const monitoring::PointSet& point_set) {
|
||||
for (auto& point : point_set.points) {
|
||||
xrt::MetricValues* metrics = report->add_metrics();
|
||||
metrics->set_name(point_set.metric_name);
|
||||
if (point->value_type == monitoring::ValueType::kPercentiles) {
|
||||
xrt::Percentiles* percentiles = metrics->mutable_percentiles_value();
|
||||
percentiles->set_start_nstime(point->percentiles_value.start_nstime);
|
||||
percentiles->set_end_nstime(point->percentiles_value.end_nstime);
|
||||
percentiles->set_min_value(point->percentiles_value.min_value);
|
||||
percentiles->set_max_value(point->percentiles_value.max_value);
|
||||
percentiles->set_mean(point->percentiles_value.mean);
|
||||
percentiles->set_stddev(point->percentiles_value.stddev);
|
||||
percentiles->set_num_samples(point->percentiles_value.num_samples);
|
||||
percentiles->set_total_samples(point->percentiles_value.total_samples);
|
||||
percentiles->set_accumulator(point->percentiles_value.accumulator);
|
||||
for (auto& pct_point : point->percentiles_value.points) {
|
||||
xrt::Percentiles::Point* xpoint = percentiles->add_points();
|
||||
xpoint->set_percentile(pct_point.percentile);
|
||||
xpoint->set_value(pct_point.value);
|
||||
}
|
||||
} else if (point->value_type == monitoring::ValueType::kInt64) {
|
||||
metrics->set_int64_value(point->int64_value);
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace xrt_metrics {
|
||||
|
||||
monitoring::PercentileSamplerCell* GetAllocateCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/allocate", "Tracks XRTAllocate times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetAllocateUninitializedCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/allocate_uninitialized",
|
||||
"Tracks XRTAllocateUninitialized times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetAllocateFromTensorCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/allocate_from_tensor",
|
||||
"Tracks XRTAllocateFromTensor times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetSubTupleCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/sub_tuple", "Tracks XRTSubTuple times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetMakeTupleCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/make_tuple", "Tracks XRTMakeTuple times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetReadLiteralCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/read_literal", "Tracks XRTReadLiteral times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetReadToTensorCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/read_tensor", "Tracks XRTReadToTensor times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetWriteLiteralCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/write_literal", "Tracks XRTWriteLiteral times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetReleaseAllocationCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/release_allocation",
|
||||
"Tracks XRTReleaseAllocation times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/release_all_allocations",
|
||||
"Tracks XRTReleaseAllAllocations times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetCompactAllocationsCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/compact_allocations",
|
||||
"Tracks XRTCompactAllocations times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetCompileCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/compile", "Tracks XRTCompile times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetReleaseCompilationCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/release_compilation",
|
||||
"Tracks XRTReleaseCompilationRef times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetExecuteCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/execute", "Tracks XRTExecute times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetExecuteChainedCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/ops/execute_chained",
|
||||
"Tracks XRTExecuteChained times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetMemoryCompactCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/memory_manager/compaction",
|
||||
"Tracks XRT memory manager memory compaction times"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
monitoring::PercentileSamplerCell* GetTryFreeMemoryCell() {
|
||||
static monitoring::PercentileSamplerCell* cell =
|
||||
monitoring::PercentileSampler<0>::New(
|
||||
{"/tensorflow/xrt/memory_manager/try_free_memory",
|
||||
"Tracks XRT memory manager times in trying to "
|
||||
"free memory by swpping device memory to host memory"},
|
||||
GetDefaultPercentiles(), kMaxSamples)
|
||||
->GetCell();
|
||||
return cell;
|
||||
}
|
||||
|
||||
} // namespace xrt_metrics
|
||||
|
||||
xla::StatusOr<xrt::MetricsReport> CollectMetrics(
|
||||
const xrt::XRTMetricsCollect& metrics) {
|
||||
auto* collection_registry = monitoring::CollectionRegistry::Default();
|
||||
monitoring::CollectionRegistry::CollectMetricsOptions options;
|
||||
options.collect_metric_descriptors = false;
|
||||
auto collected_metrics = collection_registry->CollectMetrics(options);
|
||||
xrt::MetricsReport report;
|
||||
for (auto& name_pointset : collected_metrics->point_set_map) {
|
||||
if (IsSelectedMetric(metrics, name_pointset.first)) {
|
||||
TF_RETURN_IF_ERROR(AddMetrics(&report, *name_pointset.second));
|
||||
}
|
||||
}
|
||||
return std::move(report);
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
55
tensorflow/compiler/xrt/xrt_metrics.h
Normal file
55
tensorflow/compiler/xrt/xrt_metrics.h
Normal file
@ -0,0 +1,55 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_COMPILER_XRT_XRT_METRICS_H_
|
||||
#define TENSORFLOW_COMPILER_XRT_XRT_METRICS_H_
|
||||
|
||||
#include "tensorflow/compiler/xla/statusor.h"
|
||||
#include "tensorflow/compiler/xrt/xrt.pb.h"
|
||||
#include "tensorflow/core/lib/monitoring/percentile_sampler.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace xrt_metrics {
|
||||
|
||||
// Defines the singletons of the metrics populated by the XRT op framework.
|
||||
// Single of a single XRT op there can be many device specific versions (CPU,
|
||||
// GPU, TPU), and since the monitoring subsystem does not allow multiple
|
||||
// registrations of the same metric name, we define them all in this file.
|
||||
monitoring::PercentileSamplerCell* GetAllocateCell();
|
||||
monitoring::PercentileSamplerCell* GetAllocateUninitializedCell();
|
||||
monitoring::PercentileSamplerCell* GetAllocateFromTensorCell();
|
||||
monitoring::PercentileSamplerCell* GetSubTupleCell();
|
||||
monitoring::PercentileSamplerCell* GetMakeTupleCell();
|
||||
monitoring::PercentileSamplerCell* GetReadLiteralCell();
|
||||
monitoring::PercentileSamplerCell* GetReadToTensorCell();
|
||||
monitoring::PercentileSamplerCell* GetWriteLiteralCell();
|
||||
monitoring::PercentileSamplerCell* GetReleaseAllocationCell();
|
||||
monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell();
|
||||
monitoring::PercentileSamplerCell* GetCompactAllocationsCell();
|
||||
monitoring::PercentileSamplerCell* GetCompileCell();
|
||||
monitoring::PercentileSamplerCell* GetReleaseCompilationCell();
|
||||
monitoring::PercentileSamplerCell* GetExecuteCell();
|
||||
monitoring::PercentileSamplerCell* GetExecuteChainedCell();
|
||||
monitoring::PercentileSamplerCell* GetMemoryCompactCell();
|
||||
monitoring::PercentileSamplerCell* GetTryFreeMemoryCell();
|
||||
|
||||
} // namespace xrt_metrics
|
||||
|
||||
xla::StatusOr<xrt::MetricsReport> CollectMetrics(
|
||||
const xrt::XRTMetricsCollect& metrics);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_XRT_XRT_METRICS_H_
|
@ -1895,6 +1895,7 @@ cc_library(
|
||||
"//tensorflow/core/lib/monitoring:mobile_sampler",
|
||||
"//tensorflow/core/lib/monitoring:percentile_sampler",
|
||||
"//tensorflow/core/lib/monitoring:sampler",
|
||||
"//tensorflow/core/lib/monitoring:timed",
|
||||
"//tensorflow/core/lib/random:exact_uniform_int",
|
||||
"//tensorflow/core/lib/random:philox",
|
||||
"//tensorflow/core/lib/random:philox_random",
|
||||
|
@ -25,6 +25,17 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "timed",
|
||||
hdrs = [
|
||||
"timed.h",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/core/platform:env_time",
|
||||
"//tensorflow/core/platform:types",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "collected_metrics",
|
||||
hdrs = [
|
||||
@ -210,6 +221,7 @@ filegroup(
|
||||
"mobile_gauge.h",
|
||||
"mobile_sampler.h",
|
||||
"sampler.h",
|
||||
"timed.h",
|
||||
"types.h",
|
||||
],
|
||||
visibility = ["//tensorflow/core:__pkg__"],
|
||||
@ -225,6 +237,7 @@ filegroup(
|
||||
"metric_def.h",
|
||||
"percentile_sampler.h",
|
||||
"sampler.h",
|
||||
"timed.h",
|
||||
"types.h",
|
||||
],
|
||||
visibility = ["//tensorflow/core:__pkg__"],
|
||||
|
@ -31,10 +31,10 @@ void PercentileSamplerCell::Add(double sample) {
|
||||
mutex_lock l(mu_);
|
||||
samples_[next_position_] = {nstime, sample};
|
||||
++next_position_;
|
||||
if (next_position_ >= samples_.size()) {
|
||||
if (TF_PREDICT_FALSE(next_position_ >= samples_.size())) {
|
||||
next_position_ = 0;
|
||||
}
|
||||
if (num_samples_ < samples_.size()) {
|
||||
if (TF_PREDICT_FALSE(num_samples_ < samples_.size())) {
|
||||
++num_samples_;
|
||||
}
|
||||
++total_samples_;
|
||||
|
48
tensorflow/core/lib/monitoring/timed.h
Normal file
48
tensorflow/core/lib/monitoring/timed.h
Normal file
@ -0,0 +1,48 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_LIB_MONITORING_TIMED_H_
|
||||
#define TENSORFLOW_CORE_LIB_MONITORING_TIMED_H_
|
||||
|
||||
#include "tensorflow/core/platform/env_time.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace monitoring {
|
||||
|
||||
// Takes a Sampler, PercentileSample or Gauge cell, and post timing values
|
||||
// (default in milliseconds) according to its scope lifetime.
|
||||
template <typename T>
|
||||
class Timed {
|
||||
public:
|
||||
explicit Timed(T* cell, double scale = 1e-6)
|
||||
: cell_(cell), scale_(scale), start_(EnvTime::NowNanos()) {}
|
||||
|
||||
~Timed() { cell_->Add(scale_ * (EnvTime::NowNanos() - start_)); }
|
||||
|
||||
private:
|
||||
T* cell_ = nullptr;
|
||||
double scale_ = 1e-6;
|
||||
uint64 start_ = 0;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
Timed<T> MakeTimed(T* cell, double scale = 1e-6) {
|
||||
return Timed<T>(cell, scale);
|
||||
}
|
||||
|
||||
} // namespace monitoring
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_LIB_MONITORING_TIMED_H_
|
Loading…
Reference in New Issue
Block a user