From 99eb2266550b09a647c477fe0c85a12984949616 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Jan 2020 14:46:15 -0800 Subject: [PATCH] Added RunEnvironment to OpStats. Added a converter from OpStats to OverviewPage. Added input and bottleneck analysis to InputPipelineAnalysis. PiperOrigin-RevId: 289735025 Change-Id: Ice4b2db5f241573afecce52aa882216ea16bd74c --- tensorflow/core/profiler/convert/BUILD | 23 +++ .../op_stats_to_input_pipeline_analysis.cc | 163 ++++++++++++++++++ .../op_stats_to_input_pipeline_analysis.h | 37 +++- .../convert/op_stats_to_overview_page.cc | 160 +++++++++++++++++ .../convert/op_stats_to_overview_page.h | 45 +++++ tensorflow/core/profiler/protobuf/BUILD | 5 +- .../core/profiler/protobuf/op_stats.proto | 8 +- .../profiler/protobuf/overview_page.proto | 57 +----- 8 files changed, 438 insertions(+), 60 deletions(-) create mode 100644 tensorflow/core/profiler/convert/op_stats_to_overview_page.cc create mode 100644 tensorflow/core/profiler/convert/op_stats_to_overview_page.h diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index f6f1d589c0d..c41fa2dbeda 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -66,6 +66,28 @@ cc_library( ], ) +cc_library( + name = "op_stats_to_overview_page", + srcs = ["op_stats_to_overview_page.cc"], + hdrs = ["op_stats_to_overview_page.h"], + deps = [ + ":op_metrics_to_record", + ":op_stats_to_input_pipeline_analysis", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/platform:logging", + "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc", + "//tensorflow/core/profiler/protobuf:input_pipeline_proto_cc", + "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", + "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", + "//tensorflow/core/profiler/protobuf:overview_page_proto_cc", + "//tensorflow/core/profiler/utils:math_utils", + "//tensorflow/core/profiler/utils:op_metrics_db_utils", + "//tensorflow/core/profiler/utils:time_utils", + "@com_google_absl//absl/strings", + ], +) + cc_library( name = "op_stats_to_input_pipeline_analysis", srcs = ["op_stats_to_input_pipeline_analysis.cc"], @@ -88,6 +110,7 @@ cc_library( "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", ], ) diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc index 05c7ab5ebf9..be1a24b1412 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc @@ -23,6 +23,7 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/platform/logging.h" @@ -46,6 +47,28 @@ namespace { const double kNumPsPerMs = 1000000000.0; +// If the percentage of step time that is due to infeed is less than +// kModeratelyInfeedBoundThresholdInPercent, it is considered NOT +// input-bound; else if it is less than +// kHighlyInfeedBoundThresholdInPercent, it is considered MODERATELY +// input-bound; else if it is considered HIGHLY input-bound. +constexpr double kModeratelyInfeedBoundThresholdInPercent = 5; +constexpr double kHighlyInfeedBoundThresholdInPercent = 20; +// If the percentage of step time that is due to kernel launch is less than +// kModeratelyKernelLaunchBoundThresholdInPercent, it is considered NOT +// kernel-launch bound; else if it is less than +// kHighlyKernelLaunchBoundThresholdInPercent, it is considered MODERATELY +// kernel-launch bound; else if it is considered HIGHLY kernel-launch bound. +constexpr double kModeratelyKernelLaunchBoundThresholdInPercent = 3; +constexpr double kHighlyKernelLaunchBoundThresholdInPercent = 15; +// If the percentage of step time that is due to all other time is less than +// kModeratelyAllOtherBoundThresholdInPercent, it is considered NOT +// all-other bound; else if it is less than +// kHighlyAllOtherBoundThresholdInPercent, it is considered MODERATELY +// all-other bound; else if it is considered HIGHLY all-other bound. +constexpr double kModeratelyAllOtherBoundThresholdInPercent = 3; +constexpr double kHighlyAllOtherBoundThresholdInPercent = 15; + template double GetTimeInMs(const Collection& type_ps, EventType event_type) { return PicosToMillis(gtl::FindWithDefault(type_ps, event_type, /*value=*/0)); @@ -317,6 +340,47 @@ double RatioOfHostToDeviceTimeToStepTime( return 0.0; } +void KernelLaunchAnalysis(double kernel_launch_percent, int* observation_index, + string* kernel_launch_classification, + string* kernel_launch_statement) { + string percent_str = absl::StrFormat("%.1lf", kernel_launch_percent); + if (kernel_launch_percent >= kHighlyKernelLaunchBoundThresholdInPercent) { + *kernel_launch_classification = "high"; + *kernel_launch_statement = absl::StrCat( + "(", ++*observation_index, ") ", percent_str, + " % of the total step time sampled is spent on Kernel Launch."); + } else if (kernel_launch_percent >= + kModeratelyKernelLaunchBoundThresholdInPercent) { + *kernel_launch_classification = "moderate"; + *kernel_launch_statement = absl::StrCat( + "(", ++*observation_index, ") ", percent_str, + " % of the total step time sampled is spent on Kernel Launch."); + } else { + *kernel_launch_classification = "no"; + *kernel_launch_statement = ""; + } +} + +void AllOtherAnalysis(double all_other_percent, int* observation_index, + string* all_other_classification, + string* all_other_statement) { + string percent_str = absl::StrFormat("%.1lf", all_other_percent); + if (all_other_percent >= kHighlyAllOtherBoundThresholdInPercent) { + *all_other_classification = "high"; + *all_other_statement = absl::StrCat( + "(", ++*observation_index, ") ", percent_str, + " % of the total step time sampled is spent on All Others time."); + } else if (all_other_percent >= kModeratelyAllOtherBoundThresholdInPercent) { + *all_other_classification = "moderate"; + *all_other_statement = absl::StrCat( + "(", ++*observation_index, ") ", percent_str, + " % of the total step time sampled is spent on All Others time."); + } else { + *all_other_classification = "no"; + *all_other_statement = ""; + } +} + } // namespace void GenerateHostResult(const OpMetricsDb& host_tf_metrics_db, @@ -451,5 +515,104 @@ InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis( return result; } +void InfeedAnalysis(HardwareType hardware_type, double infeed_percent, + int* observation_index, string* input_classification, + string* input_statement) { + absl::string_view non_input_time = "other time"; + string infeed_percent_str = absl::StrFormat("%.1lf", infeed_percent); + if (infeed_percent >= kHighlyInfeedBoundThresholdInPercent) { + *input_classification = "host"; + *input_statement = absl::StrCat( + "(", ++*observation_index, ") ", + "Your program is HIGHLY input-bound because ", infeed_percent_str, + "% of the total step time sampled is waiting for input. Therefore, " + "you should first focus on reducing the input time."); + } else if (infeed_percent >= kModeratelyInfeedBoundThresholdInPercent) { + *input_classification = "both"; + *input_statement = absl::StrCat( + "(", ++*observation_index, ") ", + "Your program is MODERATELY input-bound because ", infeed_percent_str, + "% of the total step time sampled is waiting for input. Therefore, " + "you would need to reduce both the input time and ", + non_input_time, "."); + } else { + *input_classification = "device"; + *input_statement = absl::StrCat( + "(", ++*observation_index, ") ", + "Your program is NOT input-bound because only ", infeed_percent_str, + "% of the total step time sampled is waiting for " + "input. Therefore, you should focus on " + "reducing ", + non_input_time, "."); + } +} + +GenericBottleneck GenericOverallBottleneck( + const InputPipelineAnalysisResult& result) { + double total_step_time_ms = 0; + double total_input_ms = 0; + double total_output_ms = 0; + double total_host_compute_ms = 0; + double total_host_prepare_ms = 0; + double total_host_compile_ms = 0; + double total_device_to_device_ms = 0; + double total_unknown_ms = 0; + for (const google::protobuf::Any& step_details : result.step_details()) { + PerGenericStepDetails details; + bool success = step_details.UnpackTo(&details); + if (!success && !step_details.type_url().empty()) { + LOG(ERROR) << "Unable to unpack step_breakdown. Expected: generic" + << std::endl; + return {}; + } + total_step_time_ms += details.step_time_ms(); + total_input_ms += + details.host_wait_input_ms() + details.host_to_device_ms(); + total_output_ms += details.output_ms(); + total_host_prepare_ms += details.host_prepare_ms(); + total_device_to_device_ms += details.device_to_device_ms(); + total_host_compute_ms += details.host_compute_ms(); + total_host_compile_ms += details.host_compile_ms(); + total_unknown_ms += details.unknown_time_ms(); + } + if (total_step_time_ms == 0) { + return {{"unknown", + "No step time measured. Therefore we cannot tell where the " + "performance bottleneck is."}, + "no", + "", + "no", + ""}; + } + double input_percent = 100.0 * total_input_ms / total_step_time_ms; + double kernel_launch_percent = + 100.0 * total_host_prepare_ms / total_step_time_ms; + double all_other_percent = 100.0 * total_unknown_ms / total_step_time_ms; + int observation_index = 0; + string input_classification; + string input_statement; + InfeedAnalysis(result.hardware_type(), input_percent, &observation_index, + &input_classification, &input_statement); + + string kernel_launch_classification; + string kernel_launch_statement; + KernelLaunchAnalysis(kernel_launch_percent, &observation_index, + &kernel_launch_classification, &kernel_launch_statement); + + string all_other_classification; + string all_other_statement; + AllOtherAnalysis(all_other_percent, &observation_index, + &all_other_classification, &all_other_statement); + + return {{ + input_classification, + input_statement, + }, + kernel_launch_classification, + kernel_launch_statement, + all_other_classification, + all_other_statement}; +} + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h index aaf47b9595d..e3f40daf106 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h @@ -25,8 +25,30 @@ limitations under the License. namespace tensorflow { namespace profiler { -InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis( - const OpStats& op_stats, const HardwareType& hardware_type); +// Common performance bottleneck. +struct CommonBottleneck { + // Indicates if input is a bottleneck. Possible values: "host", "device", + // "both", or "unknown" + string input_classification; + // A human-readable description of the input bottleneck. + string input_statement; +}; + +// Generic hardware bottleneck. +struct GenericBottleneck { + // Bottleneck that exists on all hardware. + CommonBottleneck common; + // Indicates if kernel launching is a bottleneck. Possible values: "no", + // "moderate", "high". + string kernel_launch_classification; + // A human-readable description of the kernel launching overhead. + string kernel_launch_statement; + // Indicates if all other is a bottleneck. Possible values: "no", "moderate", + // "high". + string all_other_classification; + // A human-readable description of the all other overhead. + string all_other_statement; +}; // Computes the summary of step time in milliseconds. StepSummary ComputeStepTimeSummaryInMs( @@ -38,6 +60,17 @@ void GenerateHostResult(const OpMetricsDb& host_tf_metrics_db, InputPipelineAnalysisRecommendation GenerateRecommendation(); +// Returns the performance bottleneck of the program executed. +GenericBottleneck GenericOverallBottleneck( + const InputPipelineAnalysisResult& result); + +InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis( + const OpStats& op_stats, const HardwareType& hardware_type); + +void InfeedAnalysis(HardwareType hardware_type, double infeed_percent, + int* observation_index, string* input_classification, + string* input_statement); + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc new file mode 100644 index 00000000000..367d7593f7c --- /dev/null +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -0,0 +1,160 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h" + +#include +#include + +#include "google/protobuf/any.pb.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/convert/op_metrics_to_record.h" +#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" +#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" +#include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h" +#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" +#include "tensorflow/core/profiler/protobuf/op_stats.pb.h" +#include "tensorflow/core/profiler/protobuf/overview_page.pb.h" +#include "tensorflow/core/profiler/utils/math_utils.h" +#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" +#include "tensorflow/core/profiler/utils/time_utils.h" + +namespace tensorflow { +namespace profiler { + +namespace { + +OverviewPageTip MakeOverviewPageTip(const string& text) { + OverviewPageTip tip; + tip.set_link(text); + return tip; +} + +string AnchorElement(const string& url, const string& text) { + return absl::StrCat("", text, ""); +} + +// Makes a recommendation for looking up a document. +// doc_url is expected to be already be escaped suitably for use in an HTML +// attribute. +OverviewPageTip MakeOverviewPageTipDocLink(const string& doc_url, + const string& text) { + OverviewPageTip tip; + tip.set_link(AnchorElement(doc_url, text)); + return tip; +} + +void ComputeHostTips(OverviewPageRecommendation* re) { + *re->add_host_tips() = MakeOverviewPageTip( + "input_pipeline_analyzer (especially Section 3 for the breakdown of " + "input operations on the Host)"); + *re->add_host_tips() = MakeOverviewPageTip( + "trace_viewer (look at the activities on the timeline of each Host " + "Thread near the bottom of the trace view)"); +} + +void ComputeDeviceTips(HardwareType hardware_type, + OverviewPageRecommendation* re) { + const string& device_name = HardwareType_Name(hardware_type); + string timeline_name = + (hardware_type == tensorflow::profiler::TPU) ? "TPU core" : device_name; + *re->add_device_tips() = MakeOverviewPageTip(absl::StrCat( + "op_profile (identify the time-consuming operations executed on the ", + device_name, ")")); + *re->add_device_tips() = MakeOverviewPageTip(absl::StrCat( + "trace_viewer (look at the activities on the timeline of each ", + timeline_name, " in the trace view)")); +} + +void ComputeFaqTips(OverviewPageRecommendation* re) { + *re->add_faq_tips() = MakeOverviewPageTip("Refer to the Cloud tools FAQ"); +} + +void ComputeDocumentationTips(OverviewPageRecommendation* re) { + *re->add_documentation_tips() = MakeOverviewPageTipDocLink( + "https://www.tensorflow.org/versions/master/api_docs/python/tf/data/" + "Dataset", + "TensorFlow Input Pipeline API"); +} + +} // namespace + +void SetCommonRecommendation(const CommonBottleneck& bottleneck, + HardwareType hardware_type, + OverviewPageRecommendation* re) { + re->set_bottleneck(bottleneck.input_classification); + re->set_statement(bottleneck.input_statement); + ComputeHostTips(re); + ComputeDeviceTips(hardware_type, re); + ComputeDocumentationTips(re); + ComputeFaqTips(re); +} + +OverviewPageRecommendation ComputeGenericRecommendation( + const GenericBottleneck& bottleneck) { + OverviewPageRecommendation re; + GenericRecommendation generic; + generic.set_kernel_launch_bottleneck(bottleneck.kernel_launch_classification); + generic.set_kernel_launch_statement(bottleneck.kernel_launch_statement); + generic.set_all_other_bottleneck(bottleneck.all_other_classification); + generic.set_all_other_statement(bottleneck.all_other_statement); + re.mutable_recommendation()->PackFrom(generic); + return re; +} + +OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) { + OverviewPageAnalysis analysis; + OpMetricsDb metrics_db = + CreateTfMetricsDbFromHloMetricsDb(op_stats.device_op_metrics_db()); + uint64 total_device_time_ps = metrics_db.total_time_ps(); + constexpr int kNumTopOpsShown = 10; + double device_cumulative_fraction = 0.0; + for (const OpMetrics* metrics : + SortedOpMetricsDb(metrics_db, kNumTopOpsShown)) { + OverviewTfOp* op = analysis.add_top_device_ops(); + op->set_name(metrics->name()); + op->set_category(metrics->category()); + op->set_self_time_fraction( + SafeDivide(metrics->self_time_ps(), total_device_time_ps)); + device_cumulative_fraction += op->self_time_fraction(); + op->set_cumulative_time_fraction(device_cumulative_fraction); + op->set_flop_rate( + SafeDivide(metrics->flops(), PicosToNanos(metrics->time_ps()))); + } + return analysis; +} + +OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats, + HardwareType hardware_type) { + OverviewPageAnalysis analysis = ComputeAnalysisResult(op_stats); + InputPipelineAnalysisResult input_analysis = + ConvertOpStatsToInputPipelineAnalysis(op_stats, hardware_type); + GenericBottleneck bottleneck = GenericOverallBottleneck(input_analysis); + OverviewPageRecommendation recommendation = + ComputeGenericRecommendation(bottleneck); + SetCommonRecommendation(bottleneck.common, hardware_type, &recommendation); + + OverviewPage overview_page; + *overview_page.mutable_run_environment() = op_stats.run_environment(); + *overview_page.mutable_analysis() = analysis; + *overview_page.mutable_input_analysis() = input_analysis; + *overview_page.mutable_recommendation() = recommendation; + return overview_page; +} + +} // namespace profiler +} // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.h b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h new file mode 100644 index 00000000000..875f08aa956 --- /dev/null +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h @@ -0,0 +1,45 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_OVERVIEW_PAGE_H_ +#define TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_OVERVIEW_PAGE_H_ + +#include "absl/strings/string_view.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" +#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" +#include "tensorflow/core/profiler/protobuf/op_stats.pb.h" +#include "tensorflow/core/profiler/protobuf/overview_page.pb.h" + +namespace tensorflow { +namespace profiler { + +void SetCommonRecommendation(const CommonBottleneck& bottleneck, + HardwareType hardware_type, + OverviewPageRecommendation* re); + +OverviewPageRecommendation ComputeGenericRecommendation( + const GenericBottleneck& bottleneck); + +OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats); + +OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats, + HardwareType hardware_type); + +} // namespace profiler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_OVERVIEW_PAGE_H_ diff --git a/tensorflow/core/profiler/protobuf/BUILD b/tensorflow/core/profiler/protobuf/BUILD index ecf6d2b26ae..cdbf0e605da 100644 --- a/tensorflow/core/profiler/protobuf/BUILD +++ b/tensorflow/core/profiler/protobuf/BUILD @@ -40,7 +40,10 @@ tf_proto_library( name = "overview_page_proto", srcs = ["overview_page.proto"], cc_api_version = 2, - protodeps = [":input_pipeline_proto"], + protodeps = [ + ":input_pipeline_proto", + ":op_stats_proto", + ], visibility = [ ":friends", ], diff --git a/tensorflow/core/profiler/protobuf/op_stats.proto b/tensorflow/core/profiler/protobuf/op_stats.proto index a48b66204be..a3926bea7b5 100644 --- a/tensorflow/core/profiler/protobuf/op_stats.proto +++ b/tensorflow/core/profiler/protobuf/op_stats.proto @@ -54,7 +54,7 @@ message SystemTopology { int64 num_expected_reduced_chips = 4; } -// Result proto for RunEnvironment (the run environment of a profiling session). +// The run environment of a profiling session. message RunEnvironment { // Number of hosts used. int32 host_count = 1; @@ -71,9 +71,9 @@ message RunEnvironment { int32 device_core_count = 5; // The per-device-core batch size. int32 per_core_batch_size = 6; - // Host-independent job information. + // Host-independent information about this job. HostIndependentJobInfoResult host_independent_job_info = 7; - // Host-dependent job information. + // Host-dependent information about this job. repeated HostDependentJobInfoResult host_dependent_job_info = 8; // The number of replicas, corresponds to input parallelism. // If there is no model parallelism, replica_count = device_core_count @@ -97,4 +97,6 @@ message OpStats { PerfEnv perf_env = 3; // The database of step sequences. StepDatabaseResult step_db = 4; + // The run environment of this profiling session. + RunEnvironment run_environment = 5; } diff --git a/tensorflow/core/profiler/protobuf/overview_page.proto b/tensorflow/core/profiler/protobuf/overview_page.proto index c7fc6c8936b..18512cac879 100644 --- a/tensorflow/core/profiler/protobuf/overview_page.proto +++ b/tensorflow/core/profiler/protobuf/overview_page.proto @@ -4,59 +4,7 @@ package tensorflow.profiler; import "google/protobuf/any.proto"; import "tensorflow/core/profiler/protobuf/input_pipeline.proto"; - -// Overview result for host-independent job information. -message OverviewPageHostIndependentJobInfo { - // The CL of the build. - int64 change_list = 1; - // The time of this build (nanoseconds since the Unix epoch). - int64 build_time = 2; - // The target of this build. - string build_target = 3; - // Profiling duration (in ms). - uint32 profile_duration_ms = 4; -} - -// Overview result for host-dependent job information. -message OverviewPageHostDependentJobInfo { - // The ID of the host where this job was run. - string host_id = 1; - // The command line for this run. - string command_line = 2; - // The start time of this run (nanoseconds since the Unix epoch). - int64 start_time = 3; - // BNS address specified by client at time of profiling request. - string bns_address = 4; - // Profiling start walltime (in ns). - uint64 profile_time_ns = 5; -} - -// Overview result for run environment. -message OverviewPageRunEnvironment { - // Number of hosts used. - int32 host_count = 1; - // Number of tasks used. - int32 task_count = 2; - // The type of device used. - string device_type = 3; - // The number of device cores used. - // What "device core" means depends on the platform: - // For TPU, a device core is a TPU core. - // For Nvidia GPU, a device core is a GPU (not a SM). - int32 device_core_count = 4; - // The per-device-core batch size. - int32 per_core_batch_size = 5; - // Host-independent information about this job. - OverviewPageHostIndependentJobInfo host_independent_job_info = 6; - // Host-dependent information about this job. - repeated OverviewPageHostDependentJobInfo host_dependent_job_info = 7; - // The number of replicas, corresponds to input parallelism. - // If there is no model parallelism, replica_count = device_core_count - int32 replica_count = 8; - // The number of cores used for a single replica, e.g. model parallelism. - // If there is no model parallelism, then num_cores_per_replica = 1 - int32 num_cores_per_replica = 9; -} +import "tensorflow/core/profiler/protobuf/op_stats.proto"; // Overview result for a TensorFlow Op. message OverviewTfOp { @@ -138,11 +86,12 @@ message OverviewPageRecommendation { message OverviewPage { // The run environment of the profiled session. - OverviewPageRunEnvironment run_environment = 1; + RunEnvironment run_environment = 5; // The step-time result. InputPipelineAnalysisResult input_analysis = 2; // The other analysis result. OverviewPageAnalysis analysis = 3; // The recommendation made to the user. OverviewPageRecommendation recommendation = 4; + reserved 1; }