Add performance analysis for TPU outside compilation.

PiperOrigin-RevId: 327492744
Change-Id: Ie899823ea66e68e15fbda3578acd9cf5893554cb
This commit is contained in:
A. Unique TensorFlower 2020-08-19 13:10:39 -07:00 committed by TensorFlower Gardener
parent fc0ce38820
commit 876ffcba20
5 changed files with 79 additions and 18 deletions

View File

@ -108,6 +108,7 @@ cc_library(
"//tensorflow/core/profiler/utils:kernel_stats_utils", "//tensorflow/core/profiler/utils:kernel_stats_utils",
"//tensorflow/core/profiler/utils:math_utils", "//tensorflow/core/profiler/utils:math_utils",
"//tensorflow/core/profiler/utils:op_metrics_db_utils", "//tensorflow/core/profiler/utils:op_metrics_db_utils",
"//tensorflow/core/profiler/utils:tf_op_utils",
"//tensorflow/core/profiler/utils:time_utils", "//tensorflow/core/profiler/utils:time_utils",
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/strings:str_format",

View File

@ -37,6 +37,7 @@ limitations under the License.
#include "tensorflow/core/profiler/utils/kernel_stats_utils.h" #include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
#include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h"
#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
#include "tensorflow/core/profiler/utils/time_utils.h" #include "tensorflow/core/profiler/utils/time_utils.h"
namespace tensorflow { namespace tensorflow {
@ -128,18 +129,20 @@ std::string GeneratePrecisionStatement(const PrecisionStats& precision_stats) {
} // namespace } // namespace
void SetCommonRecommendation(absl::string_view input_classification, void SetCommonRecommendation(
absl::string_view input_statement, absl::string_view input_classification, absl::string_view input_statement,
absl::string_view output_statement, absl::string_view output_statement, HardwareType hardware_type,
HardwareType hardware_type, absl::string_view tf_function_statement_html,
absl::string_view tf_function_statement_html, absl::string_view eager_statement_html,
absl::string_view eager_statement_html, absl::string_view outside_compilation_statement_html,
OverviewPageRecommendation* re) { OverviewPageRecommendation* re) {
re->set_bottleneck(std::string(input_classification)); re->set_bottleneck(std::string(input_classification));
re->set_statement(std::string(input_statement)); re->set_statement(std::string(input_statement));
re->set_output_statement(std::string(output_statement)); re->set_output_statement(std::string(output_statement));
re->set_tf_function_statement_html(std::string(tf_function_statement_html)); re->set_tf_function_statement_html(std::string(tf_function_statement_html));
re->set_eager_statement_html(std::string(eager_statement_html)); re->set_eager_statement_html(std::string(eager_statement_html));
re->set_outside_compilation_statement_html(
std::string(outside_compilation_statement_html));
ComputeHostTips(re); ComputeHostTips(re);
ComputeDeviceTips(hardware_type, re); ComputeDeviceTips(hardware_type, re);
ComputeDocumentationTips(re); ComputeDocumentationTips(re);
@ -222,6 +225,18 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
if (metrics.is_eager()) eager_device_op_time_ps += metrics.self_time_ps(); if (metrics.is_eager()) eager_device_op_time_ps += metrics.self_time_ps();
} }
} }
// Figures out outside_compilation time from
// op_stats.device_op_metrics_db().metrics_db(). We don't use the
// {metrics.provenance(), metrics.name()} from
// device_tf_op_metrics_db.metrics_db(), because metrics.provenance() there is
// not set and metrics.name() can be either HLO-Op name or TF-Op name, which
// will confuse IsOutsideCompilationOp().
uint64 outside_compilation_device_op_time_ps = 0;
for (const OpMetrics& metrics :
op_stats.device_op_metrics_db().metrics_db()) {
if (!IsOutsideCompilationOp(metrics.provenance(), metrics.name())) continue;
outside_compilation_device_op_time_ps += metrics.self_time_ps();
}
uint64 num_total_tf_ops = num_host_tf_ops + num_device_tf_ops; uint64 num_total_tf_ops = num_host_tf_ops + num_device_tf_ops;
analysis.set_host_tf_op_percent( analysis.set_host_tf_op_percent(
100.0 * SafeDivide(num_host_tf_ops, num_total_tf_ops)); 100.0 * SafeDivide(num_host_tf_ops, num_total_tf_ops));
@ -234,6 +249,9 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
analysis.set_device_op_time_eager_percent( analysis.set_device_op_time_eager_percent(
100.0 * SafeDivide(eager_device_op_time_ps, 100.0 * SafeDivide(eager_device_op_time_ps,
total_device_op_time_ps_exclude_idle)); total_device_op_time_ps_exclude_idle));
analysis.set_device_op_time_outside_compilation_percent(
100.0 * SafeDivide(outside_compilation_device_op_time_ps,
total_device_op_time_ps_exclude_idle));
return analysis; return analysis;
} }
@ -315,10 +333,12 @@ std::string EagerRecommendationHtml(double host_op_time_eager_percent,
double device_op_time_eager_percent) { double device_op_time_eager_percent) {
std::string recommendation = ""; std::string recommendation = "";
if (host_op_time_eager_percent > kEagerReportThresholdInPercent) if (host_op_time_eager_percent > kEagerReportThresholdInPercent)
absl::StrAppend(&recommendation, host_op_time_eager_percent, absl::StrAppend(&recommendation,
absl::StrFormat("%.1f", host_op_time_eager_percent),
"% of Op time on the host used eager execution. "); "% of Op time on the host used eager execution. ");
if (device_op_time_eager_percent > kEagerReportThresholdInPercent) if (device_op_time_eager_percent > kEagerReportThresholdInPercent)
absl::StrAppend(&recommendation, device_op_time_eager_percent, absl::StrAppend(&recommendation,
absl::StrFormat("%.1f", device_op_time_eager_percent),
"% of Op time on the device used eager execution. "); "% of Op time on the device used eager execution. ");
if (!recommendation.empty()) if (!recommendation.empty())
absl::StrAppend(&recommendation, "Performance could be improved with ", absl::StrAppend(&recommendation, "Performance could be improved with ",
@ -327,6 +347,17 @@ std::string EagerRecommendationHtml(double host_op_time_eager_percent,
return recommendation; return recommendation;
} }
std::string OutsideCompilationRecommendationHtml(
double device_op_time_outside_compilation_percent) {
if (device_op_time_outside_compilation_percent <=
kOutsideCompilationThresholdInPercent)
return "";
return absl::StrCat(
absl::StrFormat("%.1lf", device_op_time_outside_compilation_percent),
" % of Op time on the device are for outside compilation. Performance "
"could be improved by avoiding outside compilation.");
}
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats) { OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats) {
OverviewPage overview_page; OverviewPage overview_page;
*overview_page.mutable_run_environment() = *overview_page.mutable_run_environment() =
@ -346,6 +377,9 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats) {
EagerRecommendationHtml( EagerRecommendationHtml(
overview_page.analysis().host_op_time_eager_percent(), overview_page.analysis().host_op_time_eager_percent(),
overview_page.analysis().device_op_time_eager_percent()), overview_page.analysis().device_op_time_eager_percent()),
OutsideCompilationRecommendationHtml(
overview_page.analysis()
.device_op_time_outside_compilation_percent()),
overview_page.mutable_recommendation()); overview_page.mutable_recommendation());
PopulateOverviewDiagnostics(op_stats, overview_page.mutable_diagnostics()); PopulateOverviewDiagnostics(op_stats, overview_page.mutable_diagnostics());
return overview_page; return overview_page;

View File

@ -37,13 +37,18 @@ const double kTfFunctionReportThresholdInPercent = 20;
// this threshold. // this threshold.
const double kEagerReportThresholdInPercent = 10; const double kEagerReportThresholdInPercent = 10;
void SetCommonRecommendation(absl::string_view input_classification, // Reports outside-compilation opportunity in the Overview Page if the
absl::string_view input_statement, // percent of Op time on device that is for outside compilation is over
absl::string_view output_statement, // this threshold.
HardwareType hardware_type, const double kOutsideCompilationThresholdInPercent = 5;
absl::string_view tf_function_statement_html,
absl::string_view eager_statement_html, void SetCommonRecommendation(
OverviewPageRecommendation* re); absl::string_view input_classification, absl::string_view input_statement,
absl::string_view output_statement, HardwareType hardware_type,
absl::string_view tf_function_statement_html,
absl::string_view eager_statement_html,
absl::string_view outside_compilation_statement_html,
OverviewPageRecommendation* re);
OverviewPageRecommendation ComputeGenericRecommendation( OverviewPageRecommendation ComputeGenericRecommendation(
const BottleneckAnalysis& bottleneck, const BottleneckAnalysis& bottleneck,
@ -63,6 +68,10 @@ std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db);
std::string EagerRecommendationHtml(double host_op_time_eager_percent, std::string EagerRecommendationHtml(double host_op_time_eager_percent,
double device_op_time_eager_percent); double device_op_time_eager_percent);
// Returns a html which provides outside-compilation related recommendation.
std::string OutsideCompilationRecommendationHtml(
double device_op_time_outside_compilation_percent);
} // namespace profiler } // namespace profiler
} // namespace tensorflow } // namespace tensorflow

View File

@ -60,6 +60,9 @@ message OverviewPageAnalysis {
// Percentage of TF-op execution time on the device (excluding the idle time) // Percentage of TF-op execution time on the device (excluding the idle time)
// that are in eager mode. // that are in eager mode.
double device_op_time_eager_percent = 15; double device_op_time_eager_percent = 15;
// Percentage of TF-op execution time on the device (excluding the idle time)
// that are for outside compilation.
double device_op_time_outside_compilation_percent = 16;
} }
// Overview result for a performance tip to users. // Overview result for a performance tip to users.
@ -99,10 +102,14 @@ message OverviewPageRecommendation {
// bottleneck. // bottleneck.
string output_statement = 9; string output_statement = 9;
// A statement that recommends the next steps for investigating eager-mode // A statement that recommends the next steps for investigating eager-mode
// related bottleneck (it is a html so that it can link to other tools/docs.) // related bottleneck (it is an html so that it can link to other tools/docs.)
string eager_statement_html = 12; string eager_statement_html = 12;
// A statement that recommends the next steps for investigating
// outside-compilation related bottleneck (it is an html so that it can link
// to other tools/docs.)
string outside_compilation_statement_html = 13;
// A statement that recommends the next steps for investigating tf-function // A statement that recommends the next steps for investigating tf-function
// related bottleneck (it is a html so that it can link to other tools/docs.) // related bottleneck (it is an html so that it can link to other tools/docs.)
string tf_function_statement_html = 10; string tf_function_statement_html = 10;
// A list of tips for improving host performance. // A list of tips for improving host performance.
repeated OverviewPageTip host_tips = 3; repeated OverviewPageTip host_tips = 3;

View File

@ -75,6 +75,16 @@ inline bool IsInfeedEnqueueOp(absl::string_view tf_op_type) {
return tf_op_type == "InfeedEnqueue" || tf_op_type == "InfeedEnqueueTuple"; return tf_op_type == "InfeedEnqueue" || tf_op_type == "InfeedEnqueueTuple";
} }
// Returns true if the given op is for outside compilation.
inline bool IsOutsideCompilationOp(absl::string_view tf_op_fullname,
absl::string_view hlo_expression) {
if (absl::EndsWith(tf_op_fullname, ":XlaSendToHost")) return true;
if (absl::StrContains(hlo_expression, "send-done") &&
absl::StrContains(hlo_expression, "is_host_transfer=true"))
return true;
return false;
}
// Returns true if the given name is a TensorFlow embedding op. // Returns true if the given name is a TensorFlow embedding op.
inline bool IsEmbeddingOp(absl::string_view tf_op_fullname) { inline bool IsEmbeddingOp(absl::string_view tf_op_fullname) {
return absl::StrContains(tf_op_fullname, "Embedding"); return absl::StrContains(tf_op_fullname, "Embedding");