From 876ffcba20bed11dc3ca8dc17c49ebd19ed8cf69 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 19 Aug 2020 13:10:39 -0700 Subject: [PATCH] Add performance analysis for TPU outside compilation. PiperOrigin-RevId: 327492744 Change-Id: Ie899823ea66e68e15fbda3578acd9cf5893554cb --- tensorflow/core/profiler/convert/BUILD | 1 + .../convert/op_stats_to_overview_page.cc | 52 +++++++++++++++---- .../convert/op_stats_to_overview_page.h | 23 +++++--- .../profiler/protobuf/overview_page.proto | 11 +++- tensorflow/core/profiler/utils/tf_op_utils.h | 10 ++++ 5 files changed, 79 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index 2274a227f4d..4931d528f50 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -108,6 +108,7 @@ cc_library( "//tensorflow/core/profiler/utils:kernel_stats_utils", "//tensorflow/core/profiler/utils:math_utils", "//tensorflow/core/profiler/utils:op_metrics_db_utils", + "//tensorflow/core/profiler/utils:tf_op_utils", "//tensorflow/core/profiler/utils:time_utils", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc index 25391b99846..276181dd7bb 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -37,6 +37,7 @@ limitations under the License. #include "tensorflow/core/profiler/utils/kernel_stats_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" +#include "tensorflow/core/profiler/utils/tf_op_utils.h" #include "tensorflow/core/profiler/utils/time_utils.h" namespace tensorflow { @@ -128,18 +129,20 @@ std::string GeneratePrecisionStatement(const PrecisionStats& precision_stats) { } // namespace -void SetCommonRecommendation(absl::string_view input_classification, - absl::string_view input_statement, - absl::string_view output_statement, - HardwareType hardware_type, - absl::string_view tf_function_statement_html, - absl::string_view eager_statement_html, - OverviewPageRecommendation* re) { +void SetCommonRecommendation( + absl::string_view input_classification, absl::string_view input_statement, + absl::string_view output_statement, HardwareType hardware_type, + absl::string_view tf_function_statement_html, + absl::string_view eager_statement_html, + absl::string_view outside_compilation_statement_html, + OverviewPageRecommendation* re) { re->set_bottleneck(std::string(input_classification)); re->set_statement(std::string(input_statement)); re->set_output_statement(std::string(output_statement)); re->set_tf_function_statement_html(std::string(tf_function_statement_html)); re->set_eager_statement_html(std::string(eager_statement_html)); + re->set_outside_compilation_statement_html( + std::string(outside_compilation_statement_html)); ComputeHostTips(re); ComputeDeviceTips(hardware_type, re); ComputeDocumentationTips(re); @@ -222,6 +225,18 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) { if (metrics.is_eager()) eager_device_op_time_ps += metrics.self_time_ps(); } } + // Figures out outside_compilation time from + // op_stats.device_op_metrics_db().metrics_db(). We don't use the + // {metrics.provenance(), metrics.name()} from + // device_tf_op_metrics_db.metrics_db(), because metrics.provenance() there is + // not set and metrics.name() can be either HLO-Op name or TF-Op name, which + // will confuse IsOutsideCompilationOp(). + uint64 outside_compilation_device_op_time_ps = 0; + for (const OpMetrics& metrics : + op_stats.device_op_metrics_db().metrics_db()) { + if (!IsOutsideCompilationOp(metrics.provenance(), metrics.name())) continue; + outside_compilation_device_op_time_ps += metrics.self_time_ps(); + } uint64 num_total_tf_ops = num_host_tf_ops + num_device_tf_ops; analysis.set_host_tf_op_percent( 100.0 * SafeDivide(num_host_tf_ops, num_total_tf_ops)); @@ -234,6 +249,9 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) { analysis.set_device_op_time_eager_percent( 100.0 * SafeDivide(eager_device_op_time_ps, total_device_op_time_ps_exclude_idle)); + analysis.set_device_op_time_outside_compilation_percent( + 100.0 * SafeDivide(outside_compilation_device_op_time_ps, + total_device_op_time_ps_exclude_idle)); return analysis; } @@ -315,10 +333,12 @@ std::string EagerRecommendationHtml(double host_op_time_eager_percent, double device_op_time_eager_percent) { std::string recommendation = ""; if (host_op_time_eager_percent > kEagerReportThresholdInPercent) - absl::StrAppend(&recommendation, host_op_time_eager_percent, + absl::StrAppend(&recommendation, + absl::StrFormat("%.1f", host_op_time_eager_percent), "% of Op time on the host used eager execution. "); if (device_op_time_eager_percent > kEagerReportThresholdInPercent) - absl::StrAppend(&recommendation, device_op_time_eager_percent, + absl::StrAppend(&recommendation, + absl::StrFormat("%.1f", device_op_time_eager_percent), "% of Op time on the device used eager execution. "); if (!recommendation.empty()) absl::StrAppend(&recommendation, "Performance could be improved with ", @@ -327,6 +347,17 @@ std::string EagerRecommendationHtml(double host_op_time_eager_percent, return recommendation; } +std::string OutsideCompilationRecommendationHtml( + double device_op_time_outside_compilation_percent) { + if (device_op_time_outside_compilation_percent <= + kOutsideCompilationThresholdInPercent) + return ""; + return absl::StrCat( + absl::StrFormat("%.1lf", device_op_time_outside_compilation_percent), + " % of Op time on the device are for outside compilation. Performance " + "could be improved by avoiding outside compilation."); +} + OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats) { OverviewPage overview_page; *overview_page.mutable_run_environment() = @@ -346,6 +377,9 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats) { EagerRecommendationHtml( overview_page.analysis().host_op_time_eager_percent(), overview_page.analysis().device_op_time_eager_percent()), + OutsideCompilationRecommendationHtml( + overview_page.analysis() + .device_op_time_outside_compilation_percent()), overview_page.mutable_recommendation()); PopulateOverviewDiagnostics(op_stats, overview_page.mutable_diagnostics()); return overview_page; diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.h b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h index 876f6847e9f..11edfc7b247 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.h +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h @@ -37,13 +37,18 @@ const double kTfFunctionReportThresholdInPercent = 20; // this threshold. const double kEagerReportThresholdInPercent = 10; -void SetCommonRecommendation(absl::string_view input_classification, - absl::string_view input_statement, - absl::string_view output_statement, - HardwareType hardware_type, - absl::string_view tf_function_statement_html, - absl::string_view eager_statement_html, - OverviewPageRecommendation* re); +// Reports outside-compilation opportunity in the Overview Page if the +// percent of Op time on device that is for outside compilation is over +// this threshold. +const double kOutsideCompilationThresholdInPercent = 5; + +void SetCommonRecommendation( + absl::string_view input_classification, absl::string_view input_statement, + absl::string_view output_statement, HardwareType hardware_type, + absl::string_view tf_function_statement_html, + absl::string_view eager_statement_html, + absl::string_view outside_compilation_statement_html, + OverviewPageRecommendation* re); OverviewPageRecommendation ComputeGenericRecommendation( const BottleneckAnalysis& bottleneck, @@ -63,6 +68,10 @@ std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db); std::string EagerRecommendationHtml(double host_op_time_eager_percent, double device_op_time_eager_percent); +// Returns a html which provides outside-compilation related recommendation. +std::string OutsideCompilationRecommendationHtml( + double device_op_time_outside_compilation_percent); + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/protobuf/overview_page.proto b/tensorflow/core/profiler/protobuf/overview_page.proto index feb3423a00e..433f8df27a6 100644 --- a/tensorflow/core/profiler/protobuf/overview_page.proto +++ b/tensorflow/core/profiler/protobuf/overview_page.proto @@ -60,6 +60,9 @@ message OverviewPageAnalysis { // Percentage of TF-op execution time on the device (excluding the idle time) // that are in eager mode. double device_op_time_eager_percent = 15; + // Percentage of TF-op execution time on the device (excluding the idle time) + // that are for outside compilation. + double device_op_time_outside_compilation_percent = 16; } // Overview result for a performance tip to users. @@ -99,10 +102,14 @@ message OverviewPageRecommendation { // bottleneck. string output_statement = 9; // A statement that recommends the next steps for investigating eager-mode - // related bottleneck (it is a html so that it can link to other tools/docs.) + // related bottleneck (it is an html so that it can link to other tools/docs.) string eager_statement_html = 12; + // A statement that recommends the next steps for investigating + // outside-compilation related bottleneck (it is an html so that it can link + // to other tools/docs.) + string outside_compilation_statement_html = 13; // A statement that recommends the next steps for investigating tf-function - // related bottleneck (it is a html so that it can link to other tools/docs.) + // related bottleneck (it is an html so that it can link to other tools/docs.) string tf_function_statement_html = 10; // A list of tips for improving host performance. repeated OverviewPageTip host_tips = 3; diff --git a/tensorflow/core/profiler/utils/tf_op_utils.h b/tensorflow/core/profiler/utils/tf_op_utils.h index 76e6256164b..af14e1ccb8e 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils.h +++ b/tensorflow/core/profiler/utils/tf_op_utils.h @@ -75,6 +75,16 @@ inline bool IsInfeedEnqueueOp(absl::string_view tf_op_type) { return tf_op_type == "InfeedEnqueue" || tf_op_type == "InfeedEnqueueTuple"; } +// Returns true if the given op is for outside compilation. +inline bool IsOutsideCompilationOp(absl::string_view tf_op_fullname, + absl::string_view hlo_expression) { + if (absl::EndsWith(tf_op_fullname, ":XlaSendToHost")) return true; + if (absl::StrContains(hlo_expression, "send-done") && + absl::StrContains(hlo_expression, "is_host_transfer=true")) + return true; + return false; +} + // Returns true if the given name is a TensorFlow embedding op. inline bool IsEmbeddingOp(absl::string_view tf_op_fullname) { return absl::StrContains(tf_op_fullname, "Embedding");