Instrument XRT with metrics and add op to fetch them from client side.

PiperOrigin-RevId: 291547054
Change-Id: Ia44b4d724805912961cf4f1fae165df9bad0c3b2
This commit is contained in:
Davide Libenzi 2020-01-25 12:53:33 -08:00 committed by TensorFlower Gardener
parent c71fb79cbc
commit 90e6bdca1f
15 changed files with 530 additions and 5 deletions

View File

@ -45,6 +45,7 @@ cc_library(
"xrt_compilation_cache.cc",
"xrt_device.cc",
"xrt_memory_manager.cc",
"xrt_metrics.cc",
"xrt_state.cc",
"xrt_util.cc",
],
@ -52,6 +53,7 @@ cc_library(
"xrt_compilation_cache.h",
"xrt_device.h",
"xrt_memory_manager.h",
"xrt_metrics.h",
"xrt_refptr.h",
"xrt_state.h",
"xrt_util.h",
@ -75,10 +77,11 @@ cc_library(
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:regexp_internal",
"//tensorflow/core/profiler/lib:traceme",
"//tensorflow/stream_executor",
"//tensorflow/stream_executor:device_memory_allocator",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/synchronization",
],
)

View File

@ -33,6 +33,7 @@ limitations under the License.
#include "tensorflow/compiler/xrt/xrt.pb.h"
#include "tensorflow/compiler/xrt/xrt_compilation_cache.h"
#include "tensorflow/compiler/xrt/xrt_device.h"
#include "tensorflow/compiler/xrt/xrt_metrics.h"
#include "tensorflow/compiler/xrt/xrt_util.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/resource_mgr.h"
@ -41,6 +42,7 @@ limitations under the License.
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/lib/core/refcount.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/monitoring/timed.h"
#include "tensorflow/core/lib/strings/proto_serialization.h"
#include "tensorflow/core/platform/fingerprint.h"
#include "tensorflow/core/platform/types.h"
@ -137,6 +139,7 @@ Status XRTCompileOp::Compile(OpKernelContext* ctx,
void XRTCompileOp::Compute(OpKernelContext* ctx) {
VLOG(1) << "XRTCompileOp::Compute";
auto timed = monitoring::MakeTimed(xrt_metrics::GetCompileCell());
ResourceMgr* rm;
OP_REQUIRES_OK(ctx, XRTGenericDeviceAccessor::GetResourceManager(ctx, &rm));
@ -207,6 +210,7 @@ XRTReleaseCompilationRefOp::~XRTReleaseCompilationRefOp() = default;
void XRTReleaseCompilationRefOp::Compute(OpKernelContext* ctx) {
VLOG(1) << "XRTReleaseCompilationRefOp::Compute";
auto timed = monitoring::MakeTimed(xrt_metrics::GetReleaseCompilationCell());
ResourceMgr* rm;
OP_REQUIRES_OK(ctx, XRTGenericDeviceAccessor::GetResourceManager(ctx, &rm));

View File

@ -27,6 +27,7 @@ limitations under the License.
#include "tensorflow/compiler/xrt/xrt_compilation_cache.h"
#include "tensorflow/compiler/xrt/xrt_device.h"
#include "tensorflow/compiler/xrt/xrt_memory_manager.h"
#include "tensorflow/compiler/xrt/xrt_metrics.h"
#include "tensorflow/compiler/xrt/xrt_state.h"
#include "tensorflow/compiler/xrt/xrt_util.h"
#include "tensorflow/core/framework/op_kernel.h"
@ -35,6 +36,7 @@ limitations under the License.
#include "tensorflow/core/lib/core/refcount.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/gtl/cleanup.h"
#include "tensorflow/core/lib/monitoring/timed.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/stream_executor/stream_executor.h"
#include "tensorflow/stream_executor/stream_executor_internal.h"
@ -248,6 +250,7 @@ void XRTExecuteOp::ComputeAsync(OpKernelContext* context, DoneCallback done) {
Status XRTExecuteOp::DoWork(OpKernelContext* context) {
VLOG(1) << "XRTExecuteOp::Compute";
auto timed = monitoring::MakeTimed(xrt_metrics::GetExecuteCell());
ResourceMgr* rm;
TF_RETURN_IF_ERROR(
XRTGenericDeviceAccessor::GetResourceManager(context, &rm));
@ -333,6 +336,7 @@ void XRTExecuteChainedOp::ComputeAsync(OpKernelContext* context,
Status XRTExecuteChainedOp::DoWork(OpKernelContext* context) {
VLOG(1) << "XRTExecuteChainedOp::Compute";
auto timed = monitoring::MakeTimed(xrt_metrics::GetExecuteChainedCell());
ResourceMgr* rm;
TF_RETURN_IF_ERROR(
XRTGenericDeviceAccessor::GetResourceManager(context, &rm));

View File

@ -16,15 +16,45 @@ limitations under the License.
// Classes for allocating XLA literals in device memory and managing handles
// that refer to them.
#include "tensorflow/compiler/xrt/kernels/xrt_state_ops.h"
#include <memory>
#include <string>
#include "tensorflow/compiler/xrt/kernels/xrt_state_ops.h"
#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
#include "tensorflow/compiler/xla/client/local_client.h"
#include "tensorflow/compiler/xrt/xrt_metrics.h"
namespace tensorflow {
namespace {
class XRTMetricsCollectOp : public OpKernel {
public:
explicit XRTMetricsCollectOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTMetricsCollectOp::Compute";
const Tensor& metrics_proto = ctx->input(0);
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(metrics_proto.shape()),
errors::Internal("request input should be a string scalar"));
xrt::XRTMetricsCollect metrics;
OP_REQUIRES(ctx, metrics.ParseFromString(metrics_proto.scalar<tstring>()()),
errors::InvalidArgument(
"Unable to parse request input to XRTMetricsCollect"));
xla::StatusOr<xrt::MetricsReport> collected_metrics_or =
CollectMetrics(metrics);
OP_REQUIRES_OK(ctx, collected_metrics_or.status());
xrt::MetricsReport collected_metrics =
collected_metrics_or.ConsumeValueOrDie();
Tensor output(DT_STRING, TensorShape({}));
output.scalar<tstring>()() = collected_metrics.SerializeAsString();
ctx->set_output(0, output);
}
};
} // namespace
REGISTER_KERNEL_BUILDER(Name("XRTAllocate")
.Device(DEVICE_XLA_GPU)
@ -161,4 +191,7 @@ REGISTER_KERNEL_BUILDER(Name("XRTCompactAllocations").Device(DEVICE_XLA_GPU),
REGISTER_KERNEL_BUILDER(Name("XRTCompactAllocations").Device(DEVICE_XLA_CPU),
XRTCompactAllocationsOp<XRTGenericDeviceAccessor>);
REGISTER_KERNEL_BUILDER(Name("XRTMetricsCollect").Device(DEVICE_CPU),
XRTMetricsCollectOp);
} // namespace tensorflow

View File

@ -35,6 +35,7 @@ limitations under the License.
#include "tensorflow/compiler/xrt/xrt.pb.h"
#include "tensorflow/compiler/xrt/xrt_device.h"
#include "tensorflow/compiler/xrt/xrt_memory_manager.h"
#include "tensorflow/compiler/xrt/xrt_metrics.h"
#include "tensorflow/compiler/xrt/xrt_state.h"
#include "tensorflow/core/common_runtime/dma_helper.h"
#include "tensorflow/core/framework/op_kernel.h"
@ -46,6 +47,8 @@ limitations under the License.
#include "tensorflow/core/lib/core/refcount.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/gtl/cleanup.h"
#include "tensorflow/core/lib/monitoring/percentile_sampler.h"
#include "tensorflow/core/lib/monitoring/timed.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
@ -170,6 +173,7 @@ class XRTAllocateOp : public OpKernel {
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTAllocateOp::Compute";
auto timed = monitoring::MakeTimed(xrt_metrics::GetAllocateCell());
const Tensor& allocation_info = ctx->input(0);
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(allocation_info.shape()),
@ -223,6 +227,8 @@ class XRTAllocateUninitializedOp : public OpKernel {
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTAllocateUninitializedOp::Compute";
auto timed =
monitoring::MakeTimed(xrt_metrics::GetAllocateUninitializedCell());
ResourceMgr* rm;
OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm));
@ -294,6 +300,8 @@ class XRTAllocateFromTensorOp : public OpKernel {
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTAllocateFromTensorOp::Compute";
auto timed =
monitoring::MakeTimed(xrt_metrics::GetAllocateFromTensorCell());
OpInputList values;
OP_REQUIRES_OK(ctx, ctx->input_list("inputs", &values));
@ -362,6 +370,7 @@ class XRTSubTupleOp : public OpKernel {
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTSubTupleOp::Compute";
auto timed = monitoring::MakeTimed(xrt_metrics::GetSubTupleCell());
const Tensor& handle_tensor = ctx->input(0);
OP_REQUIRES(
@ -412,6 +421,7 @@ class XRTMakeTupleOp : public OpKernel {
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTMakeTupleOp::Compute";
auto timed = monitoring::MakeTimed(xrt_metrics::GetMakeTupleCell());
const Tensor& tuple_info = ctx->input(0);
OP_REQUIRES(
@ -482,6 +492,7 @@ class XRTReadLiteralOp : public OpKernel {
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTReadLiteralOp::Compute";
auto timed = monitoring::MakeTimed(xrt_metrics::GetReadLiteralCell());
const Tensor& handle_tensor = ctx->input(0);
OP_REQUIRES(
@ -532,6 +543,7 @@ class XRTReadToTensorOp : public OpKernel {
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTReadToTensorOp::Compute";
auto timed = monitoring::MakeTimed(xrt_metrics::GetReadToTensorCell());
const Tensor& handle_tensor = ctx->input(0);
// TODO(phawkins,dlibenzi): accept multiple handles (i.e., vectors, not
@ -615,6 +627,7 @@ class XRTWriteLiteralOp : public OpKernel {
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTWriteLiteralOp::Compute";
auto timed = monitoring::MakeTimed(xrt_metrics::GetWriteLiteralCell());
const Tensor& handle_tensor = ctx->input(0);
OP_REQUIRES(
@ -665,6 +678,7 @@ class XRTReleaseAllocationOp : public OpKernel {
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTReleaseAllocationOp::Compute";
auto timed = monitoring::MakeTimed(xrt_metrics::GetReleaseAllocationCell());
ResourceMgr* rm;
OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm));
@ -693,6 +707,8 @@ class XRTReleaseAllAllocationsOp : public OpKernel {
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTReleaseAllAllocationsOp::Compute";
auto timed =
monitoring::MakeTimed(xrt_metrics::GetReleaseAllAllocationsCell());
ResourceMgr* rm;
OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm));
@ -710,6 +726,8 @@ class XRTCompactAllocationsOp : public OpKernel {
void Compute(OpKernelContext* ctx) override {
VLOG(1) << "XRTCompactAllocationsOp::Compute";
auto timed =
monitoring::MakeTimed(xrt_metrics::GetCompactAllocationsCell());
ResourceMgr* rm;
OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm));

View File

@ -216,4 +216,16 @@ backing the handles, and re-allocate and send back the data to the device.
This operation helps with device memory fragmentation.
)");
REGISTER_OP("XRTMetricsCollect")
.Input("request: string")
.Output("result: string")
.SetShapeFn(tensorflow::shape_inference::ScalarShape)
.Doc(
R"(
Reads the selected metric values from the metrics collection registry.
'request' is a serialized xrt::XRTMetricsCollect proto.
'result' is a serialized xrt::MetricsReport proto.
)");
} // namespace tensorflow

View File

@ -1675,6 +1675,27 @@ TEST(RawApiTest, TestDeviceMemorySwap) {
}
}
TEST(RawApiTest, TestMetricsFetch) {
xrt::XRTMetricsCollect metrics;
metrics.add_metrics_regex("/tensorflow/xrt/.*");
Scope root = Scope::NewRootScope().WithDevice("/device:CPU:0");
auto metrics_value = ops::Const(root, metrics.SerializeAsString());
Output result = ops::XRTMetricsCollect(root, metrics_value);
TF_ASSERT_OK(root.status());
ClientSession session(root);
std::vector<Tensor> outputs;
TF_EXPECT_OK(session.Run({result}, &outputs));
ASSERT_EQ(outputs.size(), 1);
xrt::MetricsReport report;
EXPECT_TRUE(report.ParseFromString(outputs[0].scalar<tstring>()()));
for (auto& metric : report.metrics()) {
EXPECT_EQ(metric.name().compare(0, 16, "/tensorflow/xrt/"), 0);
}
}
} // namespace
} // namespace tensorflow

View File

@ -191,3 +191,53 @@ message XRTChainedExecutePlan {
// The post order with the XRT computations to be executed.
repeated XRTChainedExecuteOp ops = 1;
}
// The message used to encode the options for the XRTMetricsCollect operation.
message XRTMetricsCollect {
// A list of regular expressions to match the metric names. Empty means to
// return all the metrics reported by the collection registry.
repeated string metrics_regex = 1;
}
message Percentiles {
message Point {
// In the [0, 100] range.
double percentile = 1;
double value = 2;
}
// The time (in nanoseconds) of the first sample within the samples buffer.
uint64 start_nstime = 1;
// The time (in nanoseconds) of the last sample within the samples buffer.
uint64 end_nstime = 2;
// The minimum value of the samples within the samples buffer.
double min_value = 3;
// The maximum value of the samples within the samples buffer.
double max_value = 4;
// The mean value of the samples within the samples buffer.
double mean = 5;
// The stndard deviation of the samples within the samples buffer.
double stddev = 6;
// The number samples within the samples buffer.
uint64 num_samples = 7;
// The total number of times this metrics has been posted a value to.
uint64 total_samples = 8;
// The sum of all the posted values.
double accumulator = 9;
// The percentile points reported by the metric.
repeated Point points = 10;
}
message MetricValues {
// The metric name.
string name = 1;
oneof values_oneof {
Percentiles percentiles_value = 2;
int64 int64_value = 3;
}
}
message MetricsReport {
repeated MetricValues metrics = 1;
}

View File

@ -20,7 +20,10 @@ limitations under the License.
#include <unordered_map>
#include "absl/memory/memory.h"
#include "tensorflow/compiler/xrt/xrt_metrics.h"
#include "tensorflow/core/lib/monitoring/timed.h"
#include "tensorflow/core/lib/random/random.h"
#include "tensorflow/core/profiler/lib/traceme.h"
namespace tensorflow {
namespace {
@ -97,6 +100,9 @@ class XRTMemoryManager::DeviceContext {
Status CompactAllocations(XRTMemoryManager* memory_manager,
xla::Backend* backend) {
profiler::TraceMe trace_me("XRTMemoryManager::CompactAllocations",
/*level=*/2);
auto timed = monitoring::MakeTimed(xrt_metrics::GetMemoryCompactCell());
VLOG(4) << "CompactAllocations started";
mutex_lock lock(lock_);
Status status;
@ -143,6 +149,8 @@ class XRTMemoryManager::DeviceContext {
// Tries to free size bytes by freeing some unpinned device memory. Returns
// the amount of memory which was able to free.
xla::StatusOr<size_t> TryFreeMemory(xla::Backend* backend, size_t size) {
profiler::TraceMe trace_me("XRTMemoryManager::TryFreeMemory", /*level=*/2);
auto timed = monitoring::MakeTimed(xrt_metrics::GetTryFreeMemoryCell());
mutex_lock lock(lock_);
size_t swapped_size = 0;
for (auto it = allocs_.rbegin(); it != allocs_.rend(); ++it) {

View File

@ -0,0 +1,255 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/xrt/xrt_metrics.h"
#include "tensorflow/core/lib/monitoring/collection_registry.h"
#include "tensorflow/core/platform/regexp.h"
namespace tensorflow {
namespace {
static const size_t kMaxSamples = 1024;
std::vector<double> GetDefaultPercentiles() {
return {25.0, 50.0, 80.0, 90.0, 95.0, 99.0};
}
bool IsSelectedMetric(const xrt::XRTMetricsCollect& metrics,
const string& name) {
if (metrics.metrics_regex_size() == 0) {
return true;
}
for (auto& metric_regex : metrics.metrics_regex()) {
if (RE2::FullMatch(name, metric_regex)) {
return true;
}
}
return false;
}
Status AddMetrics(xrt::MetricsReport* report,
const monitoring::PointSet& point_set) {
for (auto& point : point_set.points) {
xrt::MetricValues* metrics = report->add_metrics();
metrics->set_name(point_set.metric_name);
if (point->value_type == monitoring::ValueType::kPercentiles) {
xrt::Percentiles* percentiles = metrics->mutable_percentiles_value();
percentiles->set_start_nstime(point->percentiles_value.start_nstime);
percentiles->set_end_nstime(point->percentiles_value.end_nstime);
percentiles->set_min_value(point->percentiles_value.min_value);
percentiles->set_max_value(point->percentiles_value.max_value);
percentiles->set_mean(point->percentiles_value.mean);
percentiles->set_stddev(point->percentiles_value.stddev);
percentiles->set_num_samples(point->percentiles_value.num_samples);
percentiles->set_total_samples(point->percentiles_value.total_samples);
percentiles->set_accumulator(point->percentiles_value.accumulator);
for (auto& pct_point : point->percentiles_value.points) {
xrt::Percentiles::Point* xpoint = percentiles->add_points();
xpoint->set_percentile(pct_point.percentile);
xpoint->set_value(pct_point.value);
}
} else if (point->value_type == monitoring::ValueType::kInt64) {
metrics->set_int64_value(point->int64_value);
}
}
return Status::OK();
}
} // namespace
namespace xrt_metrics {
monitoring::PercentileSamplerCell* GetAllocateCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/allocate", "Tracks XRTAllocate times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetAllocateUninitializedCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/allocate_uninitialized",
"Tracks XRTAllocateUninitialized times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetAllocateFromTensorCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/allocate_from_tensor",
"Tracks XRTAllocateFromTensor times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetSubTupleCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/sub_tuple", "Tracks XRTSubTuple times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetMakeTupleCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/make_tuple", "Tracks XRTMakeTuple times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetReadLiteralCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/read_literal", "Tracks XRTReadLiteral times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetReadToTensorCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/read_tensor", "Tracks XRTReadToTensor times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetWriteLiteralCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/write_literal", "Tracks XRTWriteLiteral times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetReleaseAllocationCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/release_allocation",
"Tracks XRTReleaseAllocation times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/release_all_allocations",
"Tracks XRTReleaseAllAllocations times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetCompactAllocationsCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/compact_allocations",
"Tracks XRTCompactAllocations times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetCompileCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/compile", "Tracks XRTCompile times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetReleaseCompilationCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/release_compilation",
"Tracks XRTReleaseCompilationRef times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetExecuteCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/execute", "Tracks XRTExecute times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetExecuteChainedCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/ops/execute_chained",
"Tracks XRTExecuteChained times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetMemoryCompactCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/memory_manager/compaction",
"Tracks XRT memory manager memory compaction times"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
monitoring::PercentileSamplerCell* GetTryFreeMemoryCell() {
static monitoring::PercentileSamplerCell* cell =
monitoring::PercentileSampler<0>::New(
{"/tensorflow/xrt/memory_manager/try_free_memory",
"Tracks XRT memory manager times in trying to "
"free memory by swpping device memory to host memory"},
GetDefaultPercentiles(), kMaxSamples)
->GetCell();
return cell;
}
} // namespace xrt_metrics
xla::StatusOr<xrt::MetricsReport> CollectMetrics(
const xrt::XRTMetricsCollect& metrics) {
auto* collection_registry = monitoring::CollectionRegistry::Default();
monitoring::CollectionRegistry::CollectMetricsOptions options;
options.collect_metric_descriptors = false;
auto collected_metrics = collection_registry->CollectMetrics(options);
xrt::MetricsReport report;
for (auto& name_pointset : collected_metrics->point_set_map) {
if (IsSelectedMetric(metrics, name_pointset.first)) {
TF_RETURN_IF_ERROR(AddMetrics(&report, *name_pointset.second));
}
}
return std::move(report);
}
} // namespace tensorflow

View File

@ -0,0 +1,55 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_COMPILER_XRT_XRT_METRICS_H_
#define TENSORFLOW_COMPILER_XRT_XRT_METRICS_H_
#include "tensorflow/compiler/xla/statusor.h"
#include "tensorflow/compiler/xrt/xrt.pb.h"
#include "tensorflow/core/lib/monitoring/percentile_sampler.h"
namespace tensorflow {
namespace xrt_metrics {
// Defines the singletons of the metrics populated by the XRT op framework.
// Single of a single XRT op there can be many device specific versions (CPU,
// GPU, TPU), and since the monitoring subsystem does not allow multiple
// registrations of the same metric name, we define them all in this file.
monitoring::PercentileSamplerCell* GetAllocateCell();
monitoring::PercentileSamplerCell* GetAllocateUninitializedCell();
monitoring::PercentileSamplerCell* GetAllocateFromTensorCell();
monitoring::PercentileSamplerCell* GetSubTupleCell();
monitoring::PercentileSamplerCell* GetMakeTupleCell();
monitoring::PercentileSamplerCell* GetReadLiteralCell();
monitoring::PercentileSamplerCell* GetReadToTensorCell();
monitoring::PercentileSamplerCell* GetWriteLiteralCell();
monitoring::PercentileSamplerCell* GetReleaseAllocationCell();
monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell();
monitoring::PercentileSamplerCell* GetCompactAllocationsCell();
monitoring::PercentileSamplerCell* GetCompileCell();
monitoring::PercentileSamplerCell* GetReleaseCompilationCell();
monitoring::PercentileSamplerCell* GetExecuteCell();
monitoring::PercentileSamplerCell* GetExecuteChainedCell();
monitoring::PercentileSamplerCell* GetMemoryCompactCell();
monitoring::PercentileSamplerCell* GetTryFreeMemoryCell();
} // namespace xrt_metrics
xla::StatusOr<xrt::MetricsReport> CollectMetrics(
const xrt::XRTMetricsCollect& metrics);
} // namespace tensorflow
#endif // TENSORFLOW_COMPILER_XRT_XRT_METRICS_H_

View File

@ -1895,6 +1895,7 @@ cc_library(
"//tensorflow/core/lib/monitoring:mobile_sampler",
"//tensorflow/core/lib/monitoring:percentile_sampler",
"//tensorflow/core/lib/monitoring:sampler",
"//tensorflow/core/lib/monitoring:timed",
"//tensorflow/core/lib/random:exact_uniform_int",
"//tensorflow/core/lib/random:philox",
"//tensorflow/core/lib/random:philox_random",

View File

@ -25,6 +25,17 @@ cc_library(
],
)
cc_library(
name = "timed",
hdrs = [
"timed.h",
],
deps = [
"//tensorflow/core/platform:env_time",
"//tensorflow/core/platform:types",
],
)
cc_library(
name = "collected_metrics",
hdrs = [
@ -210,6 +221,7 @@ filegroup(
"mobile_gauge.h",
"mobile_sampler.h",
"sampler.h",
"timed.h",
"types.h",
],
visibility = ["//tensorflow/core:__pkg__"],
@ -225,6 +237,7 @@ filegroup(
"metric_def.h",
"percentile_sampler.h",
"sampler.h",
"timed.h",
"types.h",
],
visibility = ["//tensorflow/core:__pkg__"],

View File

@ -31,10 +31,10 @@ void PercentileSamplerCell::Add(double sample) {
mutex_lock l(mu_);
samples_[next_position_] = {nstime, sample};
++next_position_;
if (next_position_ >= samples_.size()) {
if (TF_PREDICT_FALSE(next_position_ >= samples_.size())) {
next_position_ = 0;
}
if (num_samples_ < samples_.size()) {
if (TF_PREDICT_FALSE(num_samples_ < samples_.size())) {
++num_samples_;
}
++total_samples_;

View File

@ -0,0 +1,48 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_LIB_MONITORING_TIMED_H_
#define TENSORFLOW_CORE_LIB_MONITORING_TIMED_H_
#include "tensorflow/core/platform/env_time.h"
namespace tensorflow {
namespace monitoring {
// Takes a Sampler, PercentileSample or Gauge cell, and post timing values
// (default in milliseconds) according to its scope lifetime.
template <typename T>
class Timed {
public:
explicit Timed(T* cell, double scale = 1e-6)
: cell_(cell), scale_(scale), start_(EnvTime::NowNanos()) {}
~Timed() { cell_->Add(scale_ * (EnvTime::NowNanos() - start_)); }
private:
T* cell_ = nullptr;
double scale_ = 1e-6;
uint64 start_ = 0;
};
template <typename T>
Timed<T> MakeTimed(T* cell, double scale = 1e-6) {
return Timed<T>(cell, scale);
}
} // namespace monitoring
} // namespace tensorflow
#endif // TENSORFLOW_CORE_LIB_MONITORING_TIMED_H_