[Profiler] Add eager-mode related stats and advices to the Overview Page.

PiperOrigin-RevId: 317742833
Change-Id: Ib78b44b673b29dd3ff0b81fa00068f64198feb30
This commit is contained in:
A. Unique TensorFlower 2020-06-22 15:03:37 -07:00 committed by TensorFlower Gardener
parent b85e963fb8
commit 00acf333e2
3 changed files with 60 additions and 1 deletions

View File

@ -130,11 +130,13 @@ void SetCommonRecommendation(absl::string_view input_classification,
absl::string_view output_statement,
HardwareType hardware_type,
absl::string_view tf_function_statement_html,
absl::string_view eager_statement_html,
OverviewPageRecommendation* re) {
re->set_bottleneck(std::string(input_classification));
re->set_statement(std::string(input_statement));
re->set_output_statement(std::string(output_statement));
re->set_tf_function_statement_html(std::string(tf_function_statement_html));
re->set_eager_statement_html(std::string(eager_statement_html));
ComputeHostTips(re);
ComputeDeviceTips(hardware_type, re);
ComputeDocumentationTips(re);
@ -188,13 +190,26 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
SafeDivide(
op_stats.device_op_metrics_db().precision_stats().compute_32bit_ps(),
total_device_compute_ps));
uint64 num_host_tf_ops = 0;
uint64 total_host_op_time_ps_exclude_idle = 0;
uint64 eager_host_op_time_ps = 0;
for (const OpMetrics& metrics : op_stats.host_op_metrics_db().metrics_db()) {
num_host_tf_ops += metrics.occurrences();
if (!IsIdleOp(metrics)) {
total_host_op_time_ps_exclude_idle += metrics.self_time_ps();
if (metrics.is_eager()) eager_host_op_time_ps += metrics.self_time_ps();
}
}
uint64 num_device_tf_ops = 0;
uint64 total_device_op_time_ps_exclude_idle = 0;
uint64 eager_device_op_time_ps = 0;
for (const OpMetrics& metrics : device_tf_op_metrics_db.metrics_db()) {
num_device_tf_ops += metrics.occurrences();
if (!IsIdleOp(metrics)) {
total_device_op_time_ps_exclude_idle += metrics.self_time_ps();
if (metrics.is_eager()) eager_device_op_time_ps += metrics.self_time_ps();
}
}
uint64 num_total_tf_ops = num_host_tf_ops + num_device_tf_ops;
analysis.set_host_tf_op_percent(
@ -202,6 +217,12 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
analysis.set_device_tf_op_percent(
100.0 * SafeDivide(num_device_tf_ops, num_total_tf_ops));
analysis.set_host_trace_level(op_stats.run_environment().host_trace_level());
analysis.set_host_op_time_eager_percent(
100.0 *
SafeDivide(eager_host_op_time_ps, total_host_op_time_ps_exclude_idle));
analysis.set_device_op_time_eager_percent(
100.0 * SafeDivide(eager_device_op_time_ps,
total_device_op_time_ps_exclude_idle));
return analysis;
}
@ -279,6 +300,22 @@ std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db) {
") due to either retracing or eager execution.");
}
std::string EagerRecommendationHtml(double host_op_time_eager_percent,
double device_op_time_eager_percent) {
std::string recommendation = "";
if (host_op_time_eager_percent > kEagerReportThresholdInPercent)
absl::StrAppend(&recommendation, host_op_time_eager_percent,
"% of Op time on the host used eager execution. ");
if (device_op_time_eager_percent > kEagerReportThresholdInPercent)
absl::StrAppend(&recommendation, device_op_time_eager_percent,
"% of Op time on the device used eager execution. ");
if (!recommendation.empty())
absl::StrAppend(&recommendation, "Performance could be improved with ",
AnchorElement("https://www.tensorflow.org/guide/function",
"tf.function."));
return recommendation;
}
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
HardwareType hardware_type) {
OverviewPage overview_page;
@ -295,6 +332,9 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
SetCommonRecommendation(
bottleneck.input_classification(), bottleneck.input_statement(), "",
hardware_type, TfFunctionRecommendationHtml(op_stats.tf_function_db()),
EagerRecommendationHtml(
overview_page.analysis().host_op_time_eager_percent(),
overview_page.analysis().device_op_time_eager_percent()),
overview_page.mutable_recommendation());
PopulateOverviewDiagnostics(op_stats, overview_page.mutable_diagnostics());
return overview_page;

View File

@ -32,11 +32,17 @@ namespace profiler {
// the tf-functions profiled.
const double kTfFunctionReportThresholdInPercent = 20;
// Reports eager-mode optimization opportunity in the Overview Page if the
// percent of Op time on host (or device) that is spent on eager mode is over
// this threshold.
const double kEagerReportThresholdInPercent = 10;
void SetCommonRecommendation(absl::string_view input_classification,
absl::string_view input_statement,
absl::string_view output_statement,
HardwareType hardware_type,
absl::string_view tf_function_statement_html,
absl::string_view eager_statement_html,
OverviewPageRecommendation* re);
OverviewPageRecommendation ComputeGenericRecommendation(
@ -54,6 +60,10 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
// Returns a html which provides tf-function related recommendation.
std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db);
// Returns a html which provides eager-mode related recommendation.
std::string EagerRecommendationHtml(double host_op_time_eager_percent,
double device_op_time_eager_percent);
} // namespace profiler
} // namespace tensorflow

View File

@ -49,6 +49,12 @@ message OverviewPageAnalysis {
double device_tf_op_percent = 12;
// Host trace level.
uint32 host_trace_level = 13;
// Percentage of TF-op execution time on the host (excluding the idle time)
// that are in eager mode.
double host_op_time_eager_percent = 14;
// Percentage of TF-op execution time on the device (excluding the idle time)
// that are in eager mode.
double device_op_time_eager_percent = 15;
}
// Overview result for a performance tip to users.
@ -87,8 +93,11 @@ message OverviewPageRecommendation {
// A statement for output that recommends the next steps for investigating the
// bottleneck.
string output_statement = 9;
// A statement that recommends the next steps for investigating eager-mode
// related bottleneck (it is a html so that it can link to other tools/docs.)
string eager_statement_html = 12;
// A statement that recommends the next steps for investigating tf-function
// related bottleneck (it is a html so that it can link to other tools/docs.
// related bottleneck (it is a html so that it can link to other tools/docs.)
string tf_function_statement_html = 10;
// A list of tips for improving host performance.
repeated OverviewPageTip host_tips = 3;