From 6fdce880fa8a5f8b752323ce9d2b49d87382d80e Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Tue, 6 Oct 2020 14:20:19 -0700 Subject: [PATCH] Add format_utils to profiler For now just provide OneDigit implemented using StrFormat. A more efficient implementation could be used in the future. PiperOrigin-RevId: 335720026 Change-Id: Iefa6ce5f5860b45533f081bbd83267347847a962 --- tensorflow/core/profiler/convert/BUILD | 3 +- .../op_stats_to_input_pipeline_analysis.cc | 48 ++++++++----------- .../convert/op_stats_to_overview_page.cc | 12 ++--- tensorflow/core/profiler/utils/BUILD | 8 ++++ tensorflow/core/profiler/utils/format_utils.h | 32 +++++++++++++ 5 files changed, 68 insertions(+), 35 deletions(-) create mode 100644 tensorflow/core/profiler/utils/format_utils.h diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index b67d706bca9..71f37b77253 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -104,6 +104,7 @@ cc_library( "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "//tensorflow/core/profiler/protobuf:tf_function_proto_cc", "//tensorflow/core/profiler/utils:diagnostics", + "//tensorflow/core/profiler/utils:format_utils", "//tensorflow/core/profiler/utils:hardware_type_utils", "//tensorflow/core/profiler/utils:html_utils", "//tensorflow/core/profiler/utils:kernel_stats_utils", @@ -206,6 +207,7 @@ cc_library( "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "//tensorflow/core/profiler/utils:diagnostics", "//tensorflow/core/profiler/utils:event_span", + "//tensorflow/core/profiler/utils:format_utils", "//tensorflow/core/profiler/utils:hardware_type_utils", "//tensorflow/core/profiler/utils:html_utils", "//tensorflow/core/profiler/utils:math_utils", @@ -214,7 +216,6 @@ cc_library( "//tensorflow/core/util:stats_calculator_portable", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", ], ) diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc index 0aea94b23d4..c339a43cefb 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc @@ -24,7 +24,6 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" -#include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/platform/logging.h" @@ -38,6 +37,7 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/utils/diagnostics.h" #include "tensorflow/core/profiler/utils/event_span.h" +#include "tensorflow/core/profiler/utils/format_utils.h" #include "tensorflow/core/profiler/utils/hardware_type_utils.h" #include "tensorflow/core/profiler/utils/html_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h" @@ -379,21 +379,18 @@ double RatioOfHostToDeviceTimeToStepTime( void DeviceCollectivesAnalysis(double device_collectives_percent, std::string* device_collectives_classification, std::string* device_collectives_statement) { - std::string percent_str = - absl::StrFormat("%.1lf", device_collectives_percent); - if (device_collectives_percent >= kHighlyDeviceCollectivesBoundThresholdInPercent) { *device_collectives_classification = "high"; *device_collectives_statement = - absl::StrCat(percent_str, + absl::StrCat(OneDigit(device_collectives_percent), " % of the total step time sampled is spent on 'Device " "Collective Communication'."); } else if (device_collectives_percent >= kModeratelyDeviceCollectivesBoundThresholdInPercent) { *device_collectives_classification = "moderate"; *device_collectives_statement = - absl::StrCat(percent_str, + absl::StrCat(OneDigit(device_collectives_percent), " % of the total step time sampled is spent on 'Device " "Collective Communication'."); } else { @@ -405,11 +402,10 @@ void DeviceCollectivesAnalysis(double device_collectives_percent, void KernelLaunchAnalysis(bool tfdata_used, double kernel_launch_percent, std::string* kernel_launch_classification, std::string* kernel_launch_statement) { - std::string percent_str = absl::StrFormat("%.1lf", kernel_launch_percent); if (kernel_launch_percent >= kHighlyKernelLaunchBoundThresholdInPercent) { *kernel_launch_classification = "high"; *kernel_launch_statement = absl::StrCat( - percent_str, + OneDigit(kernel_launch_percent), " % of the total step time sampled is spent on 'Kernel Launch'."); if (tfdata_used) { absl::StrAppend(kernel_launch_statement, kKernelLaunchTfDataContention); @@ -418,7 +414,7 @@ void KernelLaunchAnalysis(bool tfdata_used, double kernel_launch_percent, kModeratelyKernelLaunchBoundThresholdInPercent) { *kernel_launch_classification = "moderate"; *kernel_launch_statement = absl::StrCat( - percent_str, + OneDigit(kernel_launch_percent), " % of the total step time sampled is spent on 'Kernel Launch'."); if (tfdata_used) { absl::StrAppend(kernel_launch_statement, kKernelLaunchTfDataContention); @@ -437,15 +433,14 @@ void AllOtherAnalysis(bool all_other_reported, double all_other_percent, *all_other_statement = ""; return; } - std::string percent_str = absl::StrFormat("%.1lf", all_other_percent); if (all_other_percent >= kHighlyAllOtherBoundThresholdInPercent) { *all_other_classification = "high"; *all_other_statement = - absl::StrCat(percent_str, kAllOthersPythonExplanation); + absl::StrCat(OneDigit(all_other_percent), kAllOthersPythonExplanation); } else if (all_other_percent >= kModeratelyAllOtherBoundThresholdInPercent) { *all_other_classification = "moderate"; *all_other_statement = - absl::StrCat(percent_str, kAllOthersPythonExplanation); + absl::StrCat(OneDigit(all_other_percent), kAllOthersPythonExplanation); } else { *all_other_classification = "no"; *all_other_statement = ""; @@ -627,18 +622,18 @@ bool InputAnalysis(double input_percent, double all_other_percent, std::string* input_classification, std::string* input_statement) { absl::string_view non_input_time = "other time"; - std::string infeed_percent_str = absl::StrFormat("%.1lf", input_percent); if (input_percent >= kHighlyInfeedBoundThresholdInPercent) { *input_classification = "host"; *input_statement = absl::StrCat( - "Your program is HIGHLY input-bound because ", infeed_percent_str, + "Your program is HIGHLY input-bound because ", OneDigit(input_percent), "% of the total step time sampled is waiting for input. Therefore, you " "should first focus on reducing the input time."); return false; } else if (input_percent >= kModeratelyInfeedBoundThresholdInPercent) { *input_classification = "both"; *input_statement = absl::StrCat( - "Your program is MODERATELY input-bound because ", infeed_percent_str, + "Your program is MODERATELY input-bound because ", + OneDigit(input_percent), "% of the total step time sampled is waiting for input. Therefore, " "you would need to reduce both the input time and ", non_input_time, "."); @@ -647,41 +642,40 @@ bool InputAnalysis(double input_percent, double all_other_percent, // Input analysis says it is not input-bound, but "All-Other" time // is significant. It could still be input-bound (or Python overhead). *input_classification = "both"; - std::string all_other_percent_str = - absl::StrFormat("%.1lf", all_other_percent); *input_statement = absl::StrCat( "Your program is POTENTIALLY input-bound because ", - all_other_percent_str, + OneDigit(all_other_percent), "% of the total step time sampled is spent on 'All Others' time (which " "could be due to I/O or Python execution or both)."); return true; } else { // Defintely not input-bound. *input_classification = "device"; - *input_statement = absl::StrCat( - "Your program is NOT input-bound because only ", infeed_percent_str, - "% of the total step time sampled is waiting for " - "input. Therefore, you should focus on " - "reducing ", - non_input_time, "."); + *input_statement = + absl::StrCat("Your program is NOT input-bound because only ", + OneDigit(input_percent), + "% of the total step time sampled is waiting for " + "input. Therefore, you should focus on " + "reducing ", + non_input_time, "."); return false; } } void OutputAnalysis(double output_percent, std::string* output_classification, std::string* output_statement) { - string tc_outfeed_percent_str = absl::StrFormat("%.1lf", output_percent); if (output_percent >= kHighlyOutfeedBoundThresholdInPercent) { *output_classification = "host"; *output_statement = absl::StrCat( - "Your program is HIGHLY output-bound because ", tc_outfeed_percent_str, + "Your program is HIGHLY output-bound because ", + OneDigit(output_percent), "% of the total step time sampled is spent on output. Therefore, you " "should first focus on reducing the output time."); } else if (output_percent >= kModeratelyOutfeedBoundThresholdInPercent) { *output_classification = "both"; *output_statement = absl::StrCat( "Your program is MODERATELY output-bound because ", - tc_outfeed_percent_str, + OneDigit(output_percent), "% of the total step time sampled is spent on output. Therefore, " "you would need to reduce both the output time and other time."); } else { diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc index 8f0e920c7e6..cf0b7e6ad43 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -19,7 +19,6 @@ limitations under the License. #include "google/protobuf/any.pb.h" #include "absl/strings/str_cat.h" -#include "absl/strings/str_format.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/convert/op_metrics_to_record.h" #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" @@ -32,6 +31,7 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/protobuf/tf_function.pb.h" #include "tensorflow/core/profiler/utils/diagnostics.h" +#include "tensorflow/core/profiler/utils/format_utils.h" #include "tensorflow/core/profiler/utils/hardware_type_utils.h" #include "tensorflow/core/profiler/utils/html_utils.h" #include "tensorflow/core/profiler/utils/kernel_stats_utils.h" @@ -118,7 +118,7 @@ std::string GeneratePrecisionStatement(const PrecisionStats& precision_stats) { (100.0 * precision_stats.compute_16bit_ps()) / total_compute_ps; if (percent_16bit < kLowPrecisionPercentThreshold) { return absl::StrCat( - "Only ", absl::StrFormat("%.1lf", percent_16bit), + "Only ", OneDigit(percent_16bit), "% of device computation is 16 bit. So you might want to replace " "more 32-bit Ops by 16-bit Ops to improve performance (if the " "reduced accuracy is acceptable)."); @@ -338,12 +338,10 @@ std::string EagerRecommendationHtml(double host_op_time_eager_percent, double device_op_time_eager_percent) { std::string recommendation = ""; if (host_op_time_eager_percent > kEagerReportThresholdInPercent) - absl::StrAppend(&recommendation, - absl::StrFormat("%.1f", host_op_time_eager_percent), + absl::StrAppend(&recommendation, OneDigit(host_op_time_eager_percent), "% of Op time on the host used eager execution. "); if (device_op_time_eager_percent > kEagerReportThresholdInPercent) - absl::StrAppend(&recommendation, - absl::StrFormat("%.1f", device_op_time_eager_percent), + absl::StrAppend(&recommendation, OneDigit(device_op_time_eager_percent), "% of Op time on the device used eager execution. "); if (!recommendation.empty()) absl::StrAppend(&recommendation, "Performance could be improved with ", @@ -358,7 +356,7 @@ std::string OutsideCompilationRecommendationHtml( kOutsideCompilationThresholdInPercent) return ""; return absl::StrCat( - absl::StrFormat("%.1lf", device_op_time_outside_compilation_percent), + OneDigit(device_op_time_outside_compilation_percent), " % of Op time on the device are for outside compilation. Performance " "could be improved by avoiding outside compilation."); } diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index 6ba7025a5cd..664a65c880d 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -58,6 +58,14 @@ cc_library( hdrs = ["math_utils.h"], ) +cc_library( + name = "format_utils", + hdrs = ["format_utils.h"], + deps = [ + "@com_google_absl//absl/strings:str_format", + ], +) + cc_library( name = "html_utils", hdrs = ["html_utils.h"], diff --git a/tensorflow/core/profiler/utils/format_utils.h b/tensorflow/core/profiler/utils/format_utils.h new file mode 100644 index 00000000000..501bfd5f07e --- /dev/null +++ b/tensorflow/core/profiler/utils/format_utils.h @@ -0,0 +1,32 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PROFILER_UTILS_FORMAT_UTILS_H_ +#define TENSORFLOW_CORE_PROFILER_UTILS_FORMAT_UTILS_H_ + +#include + +#include "absl/strings/str_format.h" + +namespace tensorflow { +namespace profiler { + +// Formats d with one digit after the decimal point. +inline std::string OneDigit(double d) { return absl::StrFormat("%.1f", d); } + +} // namespace profiler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PROFILER_UTILS_FORMAT_UTILS_H_