From 90e6bdca1ffa37bec962034cc10d1655451fd6c3 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Sat, 25 Jan 2020 12:53:33 -0800 Subject: [PATCH] Instrument XRT with metrics and add op to fetch them from client side. PiperOrigin-RevId: 291547054 Change-Id: Ia44b4d724805912961cf4f1fae165df9bad0c3b2 --- tensorflow/compiler/xrt/BUILD | 5 +- .../compiler/xrt/kernels/xrt_compile_ops.cc | 4 + .../compiler/xrt/kernels/xrt_execute_op.cc | 4 + .../compiler/xrt/kernels/xrt_state_ops.cc | 37 ++- .../compiler/xrt/kernels/xrt_state_ops.h | 18 ++ tensorflow/compiler/xrt/ops/xrt_state_ops.cc | 12 + tensorflow/compiler/xrt/tests/raw_api_test.cc | 21 ++ tensorflow/compiler/xrt/xrt.proto | 50 ++++ tensorflow/compiler/xrt/xrt_memory_manager.cc | 8 + tensorflow/compiler/xrt/xrt_metrics.cc | 255 ++++++++++++++++++ tensorflow/compiler/xrt/xrt_metrics.h | 55 ++++ tensorflow/core/BUILD | 1 + tensorflow/core/lib/monitoring/BUILD | 13 + .../core/lib/monitoring/percentile_sampler.cc | 4 +- tensorflow/core/lib/monitoring/timed.h | 48 ++++ 15 files changed, 530 insertions(+), 5 deletions(-) create mode 100644 tensorflow/compiler/xrt/xrt_metrics.cc create mode 100644 tensorflow/compiler/xrt/xrt_metrics.h create mode 100644 tensorflow/core/lib/monitoring/timed.h diff --git a/tensorflow/compiler/xrt/BUILD b/tensorflow/compiler/xrt/BUILD index a3f6dafbffb..93ad08fbfdf 100644 --- a/tensorflow/compiler/xrt/BUILD +++ b/tensorflow/compiler/xrt/BUILD @@ -45,6 +45,7 @@ cc_library( "xrt_compilation_cache.cc", "xrt_device.cc", "xrt_memory_manager.cc", + "xrt_metrics.cc", "xrt_state.cc", "xrt_util.cc", ], @@ -52,6 +53,7 @@ cc_library( "xrt_compilation_cache.h", "xrt_device.h", "xrt_memory_manager.h", + "xrt_metrics.h", "xrt_refptr.h", "xrt_state.h", "xrt_util.h", @@ -75,10 +77,11 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core:regexp_internal", + "//tensorflow/core/profiler/lib:traceme", "//tensorflow/stream_executor", "//tensorflow/stream_executor:device_memory_allocator", "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", ], ) diff --git a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc index 32030d851c8..99fb092335e 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/compiler/xrt/xrt.pb.h" #include "tensorflow/compiler/xrt/xrt_compilation_cache.h" #include "tensorflow/compiler/xrt/xrt_device.h" +#include "tensorflow/compiler/xrt/xrt_metrics.h" #include "tensorflow/compiler/xrt/xrt_util.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" @@ -41,6 +42,7 @@ limitations under the License. #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/monitoring/timed.h" #include "tensorflow/core/lib/strings/proto_serialization.h" #include "tensorflow/core/platform/fingerprint.h" #include "tensorflow/core/platform/types.h" @@ -137,6 +139,7 @@ Status XRTCompileOp::Compile(OpKernelContext* ctx, void XRTCompileOp::Compute(OpKernelContext* ctx) { VLOG(1) << "XRTCompileOp::Compute"; + auto timed = monitoring::MakeTimed(xrt_metrics::GetCompileCell()); ResourceMgr* rm; OP_REQUIRES_OK(ctx, XRTGenericDeviceAccessor::GetResourceManager(ctx, &rm)); @@ -207,6 +210,7 @@ XRTReleaseCompilationRefOp::~XRTReleaseCompilationRefOp() = default; void XRTReleaseCompilationRefOp::Compute(OpKernelContext* ctx) { VLOG(1) << "XRTReleaseCompilationRefOp::Compute"; + auto timed = monitoring::MakeTimed(xrt_metrics::GetReleaseCompilationCell()); ResourceMgr* rm; OP_REQUIRES_OK(ctx, XRTGenericDeviceAccessor::GetResourceManager(ctx, &rm)); diff --git a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc index a612f9950ad..8e54afd02ab 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/compiler/xrt/xrt_compilation_cache.h" #include "tensorflow/compiler/xrt/xrt_device.h" #include "tensorflow/compiler/xrt/xrt_memory_manager.h" +#include "tensorflow/compiler/xrt/xrt_metrics.h" #include "tensorflow/compiler/xrt/xrt_state.h" #include "tensorflow/compiler/xrt/xrt_util.h" #include "tensorflow/core/framework/op_kernel.h" @@ -35,6 +36,7 @@ limitations under the License. #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/lib/monitoring/timed.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/stream_executor/stream_executor.h" #include "tensorflow/stream_executor/stream_executor_internal.h" @@ -248,6 +250,7 @@ void XRTExecuteOp::ComputeAsync(OpKernelContext* context, DoneCallback done) { Status XRTExecuteOp::DoWork(OpKernelContext* context) { VLOG(1) << "XRTExecuteOp::Compute"; + auto timed = monitoring::MakeTimed(xrt_metrics::GetExecuteCell()); ResourceMgr* rm; TF_RETURN_IF_ERROR( XRTGenericDeviceAccessor::GetResourceManager(context, &rm)); @@ -333,6 +336,7 @@ void XRTExecuteChainedOp::ComputeAsync(OpKernelContext* context, Status XRTExecuteChainedOp::DoWork(OpKernelContext* context) { VLOG(1) << "XRTExecuteChainedOp::Compute"; + auto timed = monitoring::MakeTimed(xrt_metrics::GetExecuteChainedCell()); ResourceMgr* rm; TF_RETURN_IF_ERROR( XRTGenericDeviceAccessor::GetResourceManager(context, &rm)); diff --git a/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc index 6eab3716391..02b9a2e068b 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc @@ -16,15 +16,45 @@ limitations under the License. // Classes for allocating XLA literals in device memory and managing handles // that refer to them. +#include "tensorflow/compiler/xrt/kernels/xrt_state_ops.h" + #include #include -#include "tensorflow/compiler/xrt/kernels/xrt_state_ops.h" - #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xrt/xrt_metrics.h" namespace tensorflow { +namespace { + +class XRTMetricsCollectOp : public OpKernel { + public: + explicit XRTMetricsCollectOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + VLOG(1) << "XRTMetricsCollectOp::Compute"; + + const Tensor& metrics_proto = ctx->input(0); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(metrics_proto.shape()), + errors::Internal("request input should be a string scalar")); + xrt::XRTMetricsCollect metrics; + OP_REQUIRES(ctx, metrics.ParseFromString(metrics_proto.scalar()()), + errors::InvalidArgument( + "Unable to parse request input to XRTMetricsCollect")); + + xla::StatusOr collected_metrics_or = + CollectMetrics(metrics); + OP_REQUIRES_OK(ctx, collected_metrics_or.status()); + xrt::MetricsReport collected_metrics = + collected_metrics_or.ConsumeValueOrDie(); + Tensor output(DT_STRING, TensorShape({})); + output.scalar()() = collected_metrics.SerializeAsString(); + ctx->set_output(0, output); + } +}; + +} // namespace REGISTER_KERNEL_BUILDER(Name("XRTAllocate") .Device(DEVICE_XLA_GPU) @@ -161,4 +191,7 @@ REGISTER_KERNEL_BUILDER(Name("XRTCompactAllocations").Device(DEVICE_XLA_GPU), REGISTER_KERNEL_BUILDER(Name("XRTCompactAllocations").Device(DEVICE_XLA_CPU), XRTCompactAllocationsOp); +REGISTER_KERNEL_BUILDER(Name("XRTMetricsCollect").Device(DEVICE_CPU), + XRTMetricsCollectOp); + } // namespace tensorflow diff --git a/tensorflow/compiler/xrt/kernels/xrt_state_ops.h b/tensorflow/compiler/xrt/kernels/xrt_state_ops.h index 769ec188349..ffb5a3e8db3 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_state_ops.h +++ b/tensorflow/compiler/xrt/kernels/xrt_state_ops.h @@ -35,6 +35,7 @@ limitations under the License. #include "tensorflow/compiler/xrt/xrt.pb.h" #include "tensorflow/compiler/xrt/xrt_device.h" #include "tensorflow/compiler/xrt/xrt_memory_manager.h" +#include "tensorflow/compiler/xrt/xrt_metrics.h" #include "tensorflow/compiler/xrt/xrt_state.h" #include "tensorflow/core/common_runtime/dma_helper.h" #include "tensorflow/core/framework/op_kernel.h" @@ -46,6 +47,8 @@ limitations under the License. #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/lib/monitoring/percentile_sampler.h" +#include "tensorflow/core/lib/monitoring/timed.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { @@ -170,6 +173,7 @@ class XRTAllocateOp : public OpKernel { void Compute(OpKernelContext* ctx) override { VLOG(1) << "XRTAllocateOp::Compute"; + auto timed = monitoring::MakeTimed(xrt_metrics::GetAllocateCell()); const Tensor& allocation_info = ctx->input(0); OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(allocation_info.shape()), @@ -223,6 +227,8 @@ class XRTAllocateUninitializedOp : public OpKernel { void Compute(OpKernelContext* ctx) override { VLOG(1) << "XRTAllocateUninitializedOp::Compute"; + auto timed = + monitoring::MakeTimed(xrt_metrics::GetAllocateUninitializedCell()); ResourceMgr* rm; OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm)); @@ -294,6 +300,8 @@ class XRTAllocateFromTensorOp : public OpKernel { void Compute(OpKernelContext* ctx) override { VLOG(1) << "XRTAllocateFromTensorOp::Compute"; + auto timed = + monitoring::MakeTimed(xrt_metrics::GetAllocateFromTensorCell()); OpInputList values; OP_REQUIRES_OK(ctx, ctx->input_list("inputs", &values)); @@ -362,6 +370,7 @@ class XRTSubTupleOp : public OpKernel { void Compute(OpKernelContext* ctx) override { VLOG(1) << "XRTSubTupleOp::Compute"; + auto timed = monitoring::MakeTimed(xrt_metrics::GetSubTupleCell()); const Tensor& handle_tensor = ctx->input(0); OP_REQUIRES( @@ -412,6 +421,7 @@ class XRTMakeTupleOp : public OpKernel { void Compute(OpKernelContext* ctx) override { VLOG(1) << "XRTMakeTupleOp::Compute"; + auto timed = monitoring::MakeTimed(xrt_metrics::GetMakeTupleCell()); const Tensor& tuple_info = ctx->input(0); OP_REQUIRES( @@ -482,6 +492,7 @@ class XRTReadLiteralOp : public OpKernel { void Compute(OpKernelContext* ctx) override { VLOG(1) << "XRTReadLiteralOp::Compute"; + auto timed = monitoring::MakeTimed(xrt_metrics::GetReadLiteralCell()); const Tensor& handle_tensor = ctx->input(0); OP_REQUIRES( @@ -532,6 +543,7 @@ class XRTReadToTensorOp : public OpKernel { void Compute(OpKernelContext* ctx) override { VLOG(1) << "XRTReadToTensorOp::Compute"; + auto timed = monitoring::MakeTimed(xrt_metrics::GetReadToTensorCell()); const Tensor& handle_tensor = ctx->input(0); // TODO(phawkins,dlibenzi): accept multiple handles (i.e., vectors, not @@ -615,6 +627,7 @@ class XRTWriteLiteralOp : public OpKernel { void Compute(OpKernelContext* ctx) override { VLOG(1) << "XRTWriteLiteralOp::Compute"; + auto timed = monitoring::MakeTimed(xrt_metrics::GetWriteLiteralCell()); const Tensor& handle_tensor = ctx->input(0); OP_REQUIRES( @@ -665,6 +678,7 @@ class XRTReleaseAllocationOp : public OpKernel { void Compute(OpKernelContext* ctx) override { VLOG(1) << "XRTReleaseAllocationOp::Compute"; + auto timed = monitoring::MakeTimed(xrt_metrics::GetReleaseAllocationCell()); ResourceMgr* rm; OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm)); @@ -693,6 +707,8 @@ class XRTReleaseAllAllocationsOp : public OpKernel { void Compute(OpKernelContext* ctx) override { VLOG(1) << "XRTReleaseAllAllocationsOp::Compute"; + auto timed = + monitoring::MakeTimed(xrt_metrics::GetReleaseAllAllocationsCell()); ResourceMgr* rm; OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm)); @@ -710,6 +726,8 @@ class XRTCompactAllocationsOp : public OpKernel { void Compute(OpKernelContext* ctx) override { VLOG(1) << "XRTCompactAllocationsOp::Compute"; + auto timed = + monitoring::MakeTimed(xrt_metrics::GetCompactAllocationsCell()); ResourceMgr* rm; OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm)); diff --git a/tensorflow/compiler/xrt/ops/xrt_state_ops.cc b/tensorflow/compiler/xrt/ops/xrt_state_ops.cc index 49a2656a0f9..dca757bec3a 100644 --- a/tensorflow/compiler/xrt/ops/xrt_state_ops.cc +++ b/tensorflow/compiler/xrt/ops/xrt_state_ops.cc @@ -216,4 +216,16 @@ backing the handles, and re-allocate and send back the data to the device. This operation helps with device memory fragmentation. )"); +REGISTER_OP("XRTMetricsCollect") + .Input("request: string") + .Output("result: string") + .SetShapeFn(tensorflow::shape_inference::ScalarShape) + .Doc( + R"( +Reads the selected metric values from the metrics collection registry. + +'request' is a serialized xrt::XRTMetricsCollect proto. +'result' is a serialized xrt::MetricsReport proto. +)"); + } // namespace tensorflow diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc index 68f56a52d0e..08a99756426 100644 --- a/tensorflow/compiler/xrt/tests/raw_api_test.cc +++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc @@ -1675,6 +1675,27 @@ TEST(RawApiTest, TestDeviceMemorySwap) { } } +TEST(RawApiTest, TestMetricsFetch) { + xrt::XRTMetricsCollect metrics; + metrics.add_metrics_regex("/tensorflow/xrt/.*"); + + Scope root = Scope::NewRootScope().WithDevice("/device:CPU:0"); + auto metrics_value = ops::Const(root, metrics.SerializeAsString()); + Output result = ops::XRTMetricsCollect(root, metrics_value); + TF_ASSERT_OK(root.status()); + + ClientSession session(root); + std::vector outputs; + TF_EXPECT_OK(session.Run({result}, &outputs)); + ASSERT_EQ(outputs.size(), 1); + + xrt::MetricsReport report; + EXPECT_TRUE(report.ParseFromString(outputs[0].scalar()())); + for (auto& metric : report.metrics()) { + EXPECT_EQ(metric.name().compare(0, 16, "/tensorflow/xrt/"), 0); + } +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/xrt/xrt.proto b/tensorflow/compiler/xrt/xrt.proto index 0a123a9a48a..1cf9a0b650f 100644 --- a/tensorflow/compiler/xrt/xrt.proto +++ b/tensorflow/compiler/xrt/xrt.proto @@ -191,3 +191,53 @@ message XRTChainedExecutePlan { // The post order with the XRT computations to be executed. repeated XRTChainedExecuteOp ops = 1; } + +// The message used to encode the options for the XRTMetricsCollect operation. +message XRTMetricsCollect { + // A list of regular expressions to match the metric names. Empty means to + // return all the metrics reported by the collection registry. + repeated string metrics_regex = 1; +} + +message Percentiles { + message Point { + // In the [0, 100] range. + double percentile = 1; + double value = 2; + } + + // The time (in nanoseconds) of the first sample within the samples buffer. + uint64 start_nstime = 1; + // The time (in nanoseconds) of the last sample within the samples buffer. + uint64 end_nstime = 2; + // The minimum value of the samples within the samples buffer. + double min_value = 3; + // The maximum value of the samples within the samples buffer. + double max_value = 4; + // The mean value of the samples within the samples buffer. + double mean = 5; + // The stndard deviation of the samples within the samples buffer. + double stddev = 6; + // The number samples within the samples buffer. + uint64 num_samples = 7; + // The total number of times this metrics has been posted a value to. + uint64 total_samples = 8; + // The sum of all the posted values. + double accumulator = 9; + // The percentile points reported by the metric. + repeated Point points = 10; +} + +message MetricValues { + // The metric name. + string name = 1; + + oneof values_oneof { + Percentiles percentiles_value = 2; + int64 int64_value = 3; + } +} + +message MetricsReport { + repeated MetricValues metrics = 1; +} diff --git a/tensorflow/compiler/xrt/xrt_memory_manager.cc b/tensorflow/compiler/xrt/xrt_memory_manager.cc index 14986be3d1e..7042e35a98e 100644 --- a/tensorflow/compiler/xrt/xrt_memory_manager.cc +++ b/tensorflow/compiler/xrt/xrt_memory_manager.cc @@ -20,7 +20,10 @@ limitations under the License. #include #include "absl/memory/memory.h" +#include "tensorflow/compiler/xrt/xrt_metrics.h" +#include "tensorflow/core/lib/monitoring/timed.h" #include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/profiler/lib/traceme.h" namespace tensorflow { namespace { @@ -97,6 +100,9 @@ class XRTMemoryManager::DeviceContext { Status CompactAllocations(XRTMemoryManager* memory_manager, xla::Backend* backend) { + profiler::TraceMe trace_me("XRTMemoryManager::CompactAllocations", + /*level=*/2); + auto timed = monitoring::MakeTimed(xrt_metrics::GetMemoryCompactCell()); VLOG(4) << "CompactAllocations started"; mutex_lock lock(lock_); Status status; @@ -143,6 +149,8 @@ class XRTMemoryManager::DeviceContext { // Tries to free size bytes by freeing some unpinned device memory. Returns // the amount of memory which was able to free. xla::StatusOr TryFreeMemory(xla::Backend* backend, size_t size) { + profiler::TraceMe trace_me("XRTMemoryManager::TryFreeMemory", /*level=*/2); + auto timed = monitoring::MakeTimed(xrt_metrics::GetTryFreeMemoryCell()); mutex_lock lock(lock_); size_t swapped_size = 0; for (auto it = allocs_.rbegin(); it != allocs_.rend(); ++it) { diff --git a/tensorflow/compiler/xrt/xrt_metrics.cc b/tensorflow/compiler/xrt/xrt_metrics.cc new file mode 100644 index 00000000000..ec4ac774b68 --- /dev/null +++ b/tensorflow/compiler/xrt/xrt_metrics.cc @@ -0,0 +1,255 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xrt/xrt_metrics.h" + +#include "tensorflow/core/lib/monitoring/collection_registry.h" +#include "tensorflow/core/platform/regexp.h" + +namespace tensorflow { +namespace { + +static const size_t kMaxSamples = 1024; + +std::vector GetDefaultPercentiles() { + return {25.0, 50.0, 80.0, 90.0, 95.0, 99.0}; +} + +bool IsSelectedMetric(const xrt::XRTMetricsCollect& metrics, + const string& name) { + if (metrics.metrics_regex_size() == 0) { + return true; + } + for (auto& metric_regex : metrics.metrics_regex()) { + if (RE2::FullMatch(name, metric_regex)) { + return true; + } + } + return false; +} + +Status AddMetrics(xrt::MetricsReport* report, + const monitoring::PointSet& point_set) { + for (auto& point : point_set.points) { + xrt::MetricValues* metrics = report->add_metrics(); + metrics->set_name(point_set.metric_name); + if (point->value_type == monitoring::ValueType::kPercentiles) { + xrt::Percentiles* percentiles = metrics->mutable_percentiles_value(); + percentiles->set_start_nstime(point->percentiles_value.start_nstime); + percentiles->set_end_nstime(point->percentiles_value.end_nstime); + percentiles->set_min_value(point->percentiles_value.min_value); + percentiles->set_max_value(point->percentiles_value.max_value); + percentiles->set_mean(point->percentiles_value.mean); + percentiles->set_stddev(point->percentiles_value.stddev); + percentiles->set_num_samples(point->percentiles_value.num_samples); + percentiles->set_total_samples(point->percentiles_value.total_samples); + percentiles->set_accumulator(point->percentiles_value.accumulator); + for (auto& pct_point : point->percentiles_value.points) { + xrt::Percentiles::Point* xpoint = percentiles->add_points(); + xpoint->set_percentile(pct_point.percentile); + xpoint->set_value(pct_point.value); + } + } else if (point->value_type == monitoring::ValueType::kInt64) { + metrics->set_int64_value(point->int64_value); + } + } + return Status::OK(); +} + +} // namespace + +namespace xrt_metrics { + +monitoring::PercentileSamplerCell* GetAllocateCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/allocate", "Tracks XRTAllocate times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetAllocateUninitializedCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/allocate_uninitialized", + "Tracks XRTAllocateUninitialized times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetAllocateFromTensorCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/allocate_from_tensor", + "Tracks XRTAllocateFromTensor times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetSubTupleCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/sub_tuple", "Tracks XRTSubTuple times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetMakeTupleCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/make_tuple", "Tracks XRTMakeTuple times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetReadLiteralCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/read_literal", "Tracks XRTReadLiteral times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetReadToTensorCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/read_tensor", "Tracks XRTReadToTensor times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetWriteLiteralCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/write_literal", "Tracks XRTWriteLiteral times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetReleaseAllocationCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/release_allocation", + "Tracks XRTReleaseAllocation times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/release_all_allocations", + "Tracks XRTReleaseAllAllocations times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetCompactAllocationsCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/compact_allocations", + "Tracks XRTCompactAllocations times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetCompileCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/compile", "Tracks XRTCompile times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetReleaseCompilationCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/release_compilation", + "Tracks XRTReleaseCompilationRef times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetExecuteCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/execute", "Tracks XRTExecute times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetExecuteChainedCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/ops/execute_chained", + "Tracks XRTExecuteChained times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetMemoryCompactCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/memory_manager/compaction", + "Tracks XRT memory manager memory compaction times"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +monitoring::PercentileSamplerCell* GetTryFreeMemoryCell() { + static monitoring::PercentileSamplerCell* cell = + monitoring::PercentileSampler<0>::New( + {"/tensorflow/xrt/memory_manager/try_free_memory", + "Tracks XRT memory manager times in trying to " + "free memory by swpping device memory to host memory"}, + GetDefaultPercentiles(), kMaxSamples) + ->GetCell(); + return cell; +} + +} // namespace xrt_metrics + +xla::StatusOr CollectMetrics( + const xrt::XRTMetricsCollect& metrics) { + auto* collection_registry = monitoring::CollectionRegistry::Default(); + monitoring::CollectionRegistry::CollectMetricsOptions options; + options.collect_metric_descriptors = false; + auto collected_metrics = collection_registry->CollectMetrics(options); + xrt::MetricsReport report; + for (auto& name_pointset : collected_metrics->point_set_map) { + if (IsSelectedMetric(metrics, name_pointset.first)) { + TF_RETURN_IF_ERROR(AddMetrics(&report, *name_pointset.second)); + } + } + return std::move(report); +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/xrt/xrt_metrics.h b/tensorflow/compiler/xrt/xrt_metrics.h new file mode 100644 index 00000000000..3e61e817ebd --- /dev/null +++ b/tensorflow/compiler/xrt/xrt_metrics.h @@ -0,0 +1,55 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XRT_XRT_METRICS_H_ +#define TENSORFLOW_COMPILER_XRT_XRT_METRICS_H_ + +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xrt/xrt.pb.h" +#include "tensorflow/core/lib/monitoring/percentile_sampler.h" + +namespace tensorflow { +namespace xrt_metrics { + +// Defines the singletons of the metrics populated by the XRT op framework. +// Single of a single XRT op there can be many device specific versions (CPU, +// GPU, TPU), and since the monitoring subsystem does not allow multiple +// registrations of the same metric name, we define them all in this file. +monitoring::PercentileSamplerCell* GetAllocateCell(); +monitoring::PercentileSamplerCell* GetAllocateUninitializedCell(); +monitoring::PercentileSamplerCell* GetAllocateFromTensorCell(); +monitoring::PercentileSamplerCell* GetSubTupleCell(); +monitoring::PercentileSamplerCell* GetMakeTupleCell(); +monitoring::PercentileSamplerCell* GetReadLiteralCell(); +monitoring::PercentileSamplerCell* GetReadToTensorCell(); +monitoring::PercentileSamplerCell* GetWriteLiteralCell(); +monitoring::PercentileSamplerCell* GetReleaseAllocationCell(); +monitoring::PercentileSamplerCell* GetReleaseAllAllocationsCell(); +monitoring::PercentileSamplerCell* GetCompactAllocationsCell(); +monitoring::PercentileSamplerCell* GetCompileCell(); +monitoring::PercentileSamplerCell* GetReleaseCompilationCell(); +monitoring::PercentileSamplerCell* GetExecuteCell(); +monitoring::PercentileSamplerCell* GetExecuteChainedCell(); +monitoring::PercentileSamplerCell* GetMemoryCompactCell(); +monitoring::PercentileSamplerCell* GetTryFreeMemoryCell(); + +} // namespace xrt_metrics + +xla::StatusOr CollectMetrics( + const xrt::XRTMetricsCollect& metrics); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_XRT_XRT_METRICS_H_ diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 422df45c797..e9a2480c1bf 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1895,6 +1895,7 @@ cc_library( "//tensorflow/core/lib/monitoring:mobile_sampler", "//tensorflow/core/lib/monitoring:percentile_sampler", "//tensorflow/core/lib/monitoring:sampler", + "//tensorflow/core/lib/monitoring:timed", "//tensorflow/core/lib/random:exact_uniform_int", "//tensorflow/core/lib/random:philox", "//tensorflow/core/lib/random:philox_random", diff --git a/tensorflow/core/lib/monitoring/BUILD b/tensorflow/core/lib/monitoring/BUILD index 9fa3f2d75f3..fd74298eae0 100644 --- a/tensorflow/core/lib/monitoring/BUILD +++ b/tensorflow/core/lib/monitoring/BUILD @@ -25,6 +25,17 @@ cc_library( ], ) +cc_library( + name = "timed", + hdrs = [ + "timed.h", + ], + deps = [ + "//tensorflow/core/platform:env_time", + "//tensorflow/core/platform:types", + ], +) + cc_library( name = "collected_metrics", hdrs = [ @@ -210,6 +221,7 @@ filegroup( "mobile_gauge.h", "mobile_sampler.h", "sampler.h", + "timed.h", "types.h", ], visibility = ["//tensorflow/core:__pkg__"], @@ -225,6 +237,7 @@ filegroup( "metric_def.h", "percentile_sampler.h", "sampler.h", + "timed.h", "types.h", ], visibility = ["//tensorflow/core:__pkg__"], diff --git a/tensorflow/core/lib/monitoring/percentile_sampler.cc b/tensorflow/core/lib/monitoring/percentile_sampler.cc index 3d9c644cc0d..988e50ded52 100644 --- a/tensorflow/core/lib/monitoring/percentile_sampler.cc +++ b/tensorflow/core/lib/monitoring/percentile_sampler.cc @@ -31,10 +31,10 @@ void PercentileSamplerCell::Add(double sample) { mutex_lock l(mu_); samples_[next_position_] = {nstime, sample}; ++next_position_; - if (next_position_ >= samples_.size()) { + if (TF_PREDICT_FALSE(next_position_ >= samples_.size())) { next_position_ = 0; } - if (num_samples_ < samples_.size()) { + if (TF_PREDICT_FALSE(num_samples_ < samples_.size())) { ++num_samples_; } ++total_samples_; diff --git a/tensorflow/core/lib/monitoring/timed.h b/tensorflow/core/lib/monitoring/timed.h new file mode 100644 index 00000000000..09b412676ee --- /dev/null +++ b/tensorflow/core/lib/monitoring/timed.h @@ -0,0 +1,48 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_LIB_MONITORING_TIMED_H_ +#define TENSORFLOW_CORE_LIB_MONITORING_TIMED_H_ + +#include "tensorflow/core/platform/env_time.h" + +namespace tensorflow { +namespace monitoring { + +// Takes a Sampler, PercentileSample or Gauge cell, and post timing values +// (default in milliseconds) according to its scope lifetime. +template +class Timed { + public: + explicit Timed(T* cell, double scale = 1e-6) + : cell_(cell), scale_(scale), start_(EnvTime::NowNanos()) {} + + ~Timed() { cell_->Add(scale_ * (EnvTime::NowNanos() - start_)); } + + private: + T* cell_ = nullptr; + double scale_ = 1e-6; + uint64 start_ = 0; +}; + +template +Timed MakeTimed(T* cell, double scale = 1e-6) { + return Timed(cell, scale); +} + +} // namespace monitoring +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_LIB_MONITORING_TIMED_H_