From 7b32b175fe7e87766dce28749c16236111bea6a9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" <gardener@tensorflow.org> Date: Wed, 16 Oct 2019 12:14:25 -0700 Subject: [PATCH] Add support to TPU automatic profiler. PiperOrigin-RevId: 275086152 Change-Id: Iedbb55bbf3fd2da4f9678e61dc79d7919c33ae64 --- tensorflow/core/profiler/BUILD | 12 +++++- .../core/profiler/profiler_service.proto | 23 +++-------- .../profiler_service_monitor_result.proto | 39 +++++++++++++++++++ 3 files changed, 55 insertions(+), 19 deletions(-) create mode 100644 tensorflow/core/profiler/profiler_service_monitor_result.proto diff --git a/tensorflow/core/profiler/BUILD b/tensorflow/core/profiler/BUILD index b4ba7bc532a..329f0933509 100644 --- a/tensorflow/core/profiler/BUILD +++ b/tensorflow/core/profiler/BUILD @@ -43,13 +43,22 @@ tf_proto_library( cc_api_version = 2, ) +tf_proto_library( + name = "profiler_service_monitor_result_proto", + srcs = ["profiler_service_monitor_result.proto"], + cc_api_version = 2, +) + tf_proto_library( name = "profiler_service_proto", srcs = ["profiler_service.proto"], has_services = 1, cc_api_version = 2, cc_grpc_version = 1, - protodeps = [":op_profile_proto"] + tf_additional_all_protos(), + protodeps = [ + ":op_profile_proto", + ":profiler_service_monitor_result_proto", + ] + tf_additional_all_protos(), ) tf_proto_library( @@ -69,6 +78,7 @@ tf_proto_library( "profiler_service.proto", "profiler_analysis.proto", "op_profile.proto", + "profiler_service_monitor_result.proto", ], ), cc_api_version = 2, diff --git a/tensorflow/core/profiler/profiler_service.proto b/tensorflow/core/profiler/profiler_service.proto index 029227958a9..a5cde157981 100644 --- a/tensorflow/core/profiler/profiler_service.proto +++ b/tensorflow/core/profiler/profiler_service.proto @@ -5,6 +5,7 @@ package tensorflow; import "tensorflow/core/framework/graph.proto"; import "tensorflow/core/profiler/op_profile.proto"; import "tensorflow/core/protobuf/config.proto"; +import "tensorflow/core/profiler/profiler_service_monitor_result.proto"; // The ProfilerService service retrieves performance information about // the programs running on connected devices over a period of time. @@ -128,23 +129,9 @@ message MonitorRequest { message MonitorResponse { // Properly formatted string data that can be directly returned back to user. string data = 1; - // The following are the raw components used to construct field data. - // Percentage of time when device is idle. - double device_idle_time_percent = 2; - // TPU matrix unit utilization percentage. - double matrix_unit_utilization_percent = 3; - // Average step time in millisecond. - double step_time_ms_avg = 4; - // Minimum step time in millisecond. - double step_time_ms_min = 5; - // Maximum step time in millisecond. - double step_time_ms_max = 6; - // Average infeed percentage. - double infeed_percent_avg = 7; - // Minimum infeed percentage. - double infeed_percent_min = 8; - // Maximum infeed percentage. - double infeed_percent_max = 9; - // next-field: 10 + // A collection of monitoring results for each field show in data. + ProfilerServiceMonitorResult monitor_result = 2; + + // next-field: 3 } diff --git a/tensorflow/core/profiler/profiler_service_monitor_result.proto b/tensorflow/core/profiler/profiler_service_monitor_result.proto new file mode 100644 index 00000000000..48ec2113e2c --- /dev/null +++ b/tensorflow/core/profiler/profiler_service_monitor_result.proto @@ -0,0 +1,39 @@ +syntax = "proto3"; + +package tensorflow; + +message ProfilerServiceMonitorResult { + // Represents the different types of responses from the profiling service. + enum ResponseType { + // No result is returned from the profiling service. + EMPTY_RESULT = 0; + // Only device utilization is available. + UTIL_ONLY = 1; + // Both device utilization and device idle time are available. + UTIL_IDLE = 2; + // Device utilization, device idle time, step time, and infeed percentage + // are all available. + UTIL_IDLE_STEP = 3; + } + + // Type of profiling responses. + ResponseType response_type = 1; + // Percentage of time when device is idle. + double device_idle_time_percent = 2; + // TPU matrix unit utilization percentage. + double matrix_unit_utilization_percent = 3; + // Average step time in millisecond. + double step_time_ms_avg = 4; + // Minimum step time in millisecond. + double step_time_ms_min = 5; + // Maximum step time in millisecond. + double step_time_ms_max = 6; + // Average infeed percentage. + double infeed_percent_avg = 7; + // Minimum infeed percentage. + double infeed_percent_min = 8; + // Maximum infeed percentage. + double infeed_percent_max = 9; + + // next-field: 10 +}