Add performance analysis for TPU outside compilation.
PiperOrigin-RevId: 327492744 Change-Id: Ie899823ea66e68e15fbda3578acd9cf5893554cb
This commit is contained in:
parent
fc0ce38820
commit
876ffcba20
@ -108,6 +108,7 @@ cc_library(
|
|||||||
"//tensorflow/core/profiler/utils:kernel_stats_utils",
|
"//tensorflow/core/profiler/utils:kernel_stats_utils",
|
||||||
"//tensorflow/core/profiler/utils:math_utils",
|
"//tensorflow/core/profiler/utils:math_utils",
|
||||||
"//tensorflow/core/profiler/utils:op_metrics_db_utils",
|
"//tensorflow/core/profiler/utils:op_metrics_db_utils",
|
||||||
|
"//tensorflow/core/profiler/utils:tf_op_utils",
|
||||||
"//tensorflow/core/profiler/utils:time_utils",
|
"//tensorflow/core/profiler/utils:time_utils",
|
||||||
"@com_google_absl//absl/strings",
|
"@com_google_absl//absl/strings",
|
||||||
"@com_google_absl//absl/strings:str_format",
|
"@com_google_absl//absl/strings:str_format",
|
||||||
|
@ -37,6 +37,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
|
#include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
|
||||||
#include "tensorflow/core/profiler/utils/math_utils.h"
|
#include "tensorflow/core/profiler/utils/math_utils.h"
|
||||||
#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
|
#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
|
||||||
|
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
|
||||||
#include "tensorflow/core/profiler/utils/time_utils.h"
|
#include "tensorflow/core/profiler/utils/time_utils.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
@ -128,18 +129,20 @@ std::string GeneratePrecisionStatement(const PrecisionStats& precision_stats) {
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void SetCommonRecommendation(absl::string_view input_classification,
|
void SetCommonRecommendation(
|
||||||
absl::string_view input_statement,
|
absl::string_view input_classification, absl::string_view input_statement,
|
||||||
absl::string_view output_statement,
|
absl::string_view output_statement, HardwareType hardware_type,
|
||||||
HardwareType hardware_type,
|
absl::string_view tf_function_statement_html,
|
||||||
absl::string_view tf_function_statement_html,
|
absl::string_view eager_statement_html,
|
||||||
absl::string_view eager_statement_html,
|
absl::string_view outside_compilation_statement_html,
|
||||||
OverviewPageRecommendation* re) {
|
OverviewPageRecommendation* re) {
|
||||||
re->set_bottleneck(std::string(input_classification));
|
re->set_bottleneck(std::string(input_classification));
|
||||||
re->set_statement(std::string(input_statement));
|
re->set_statement(std::string(input_statement));
|
||||||
re->set_output_statement(std::string(output_statement));
|
re->set_output_statement(std::string(output_statement));
|
||||||
re->set_tf_function_statement_html(std::string(tf_function_statement_html));
|
re->set_tf_function_statement_html(std::string(tf_function_statement_html));
|
||||||
re->set_eager_statement_html(std::string(eager_statement_html));
|
re->set_eager_statement_html(std::string(eager_statement_html));
|
||||||
|
re->set_outside_compilation_statement_html(
|
||||||
|
std::string(outside_compilation_statement_html));
|
||||||
ComputeHostTips(re);
|
ComputeHostTips(re);
|
||||||
ComputeDeviceTips(hardware_type, re);
|
ComputeDeviceTips(hardware_type, re);
|
||||||
ComputeDocumentationTips(re);
|
ComputeDocumentationTips(re);
|
||||||
@ -222,6 +225,18 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
|
|||||||
if (metrics.is_eager()) eager_device_op_time_ps += metrics.self_time_ps();
|
if (metrics.is_eager()) eager_device_op_time_ps += metrics.self_time_ps();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Figures out outside_compilation time from
|
||||||
|
// op_stats.device_op_metrics_db().metrics_db(). We don't use the
|
||||||
|
// {metrics.provenance(), metrics.name()} from
|
||||||
|
// device_tf_op_metrics_db.metrics_db(), because metrics.provenance() there is
|
||||||
|
// not set and metrics.name() can be either HLO-Op name or TF-Op name, which
|
||||||
|
// will confuse IsOutsideCompilationOp().
|
||||||
|
uint64 outside_compilation_device_op_time_ps = 0;
|
||||||
|
for (const OpMetrics& metrics :
|
||||||
|
op_stats.device_op_metrics_db().metrics_db()) {
|
||||||
|
if (!IsOutsideCompilationOp(metrics.provenance(), metrics.name())) continue;
|
||||||
|
outside_compilation_device_op_time_ps += metrics.self_time_ps();
|
||||||
|
}
|
||||||
uint64 num_total_tf_ops = num_host_tf_ops + num_device_tf_ops;
|
uint64 num_total_tf_ops = num_host_tf_ops + num_device_tf_ops;
|
||||||
analysis.set_host_tf_op_percent(
|
analysis.set_host_tf_op_percent(
|
||||||
100.0 * SafeDivide(num_host_tf_ops, num_total_tf_ops));
|
100.0 * SafeDivide(num_host_tf_ops, num_total_tf_ops));
|
||||||
@ -234,6 +249,9 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
|
|||||||
analysis.set_device_op_time_eager_percent(
|
analysis.set_device_op_time_eager_percent(
|
||||||
100.0 * SafeDivide(eager_device_op_time_ps,
|
100.0 * SafeDivide(eager_device_op_time_ps,
|
||||||
total_device_op_time_ps_exclude_idle));
|
total_device_op_time_ps_exclude_idle));
|
||||||
|
analysis.set_device_op_time_outside_compilation_percent(
|
||||||
|
100.0 * SafeDivide(outside_compilation_device_op_time_ps,
|
||||||
|
total_device_op_time_ps_exclude_idle));
|
||||||
return analysis;
|
return analysis;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -315,10 +333,12 @@ std::string EagerRecommendationHtml(double host_op_time_eager_percent,
|
|||||||
double device_op_time_eager_percent) {
|
double device_op_time_eager_percent) {
|
||||||
std::string recommendation = "";
|
std::string recommendation = "";
|
||||||
if (host_op_time_eager_percent > kEagerReportThresholdInPercent)
|
if (host_op_time_eager_percent > kEagerReportThresholdInPercent)
|
||||||
absl::StrAppend(&recommendation, host_op_time_eager_percent,
|
absl::StrAppend(&recommendation,
|
||||||
|
absl::StrFormat("%.1f", host_op_time_eager_percent),
|
||||||
"% of Op time on the host used eager execution. ");
|
"% of Op time on the host used eager execution. ");
|
||||||
if (device_op_time_eager_percent > kEagerReportThresholdInPercent)
|
if (device_op_time_eager_percent > kEagerReportThresholdInPercent)
|
||||||
absl::StrAppend(&recommendation, device_op_time_eager_percent,
|
absl::StrAppend(&recommendation,
|
||||||
|
absl::StrFormat("%.1f", device_op_time_eager_percent),
|
||||||
"% of Op time on the device used eager execution. ");
|
"% of Op time on the device used eager execution. ");
|
||||||
if (!recommendation.empty())
|
if (!recommendation.empty())
|
||||||
absl::StrAppend(&recommendation, "Performance could be improved with ",
|
absl::StrAppend(&recommendation, "Performance could be improved with ",
|
||||||
@ -327,6 +347,17 @@ std::string EagerRecommendationHtml(double host_op_time_eager_percent,
|
|||||||
return recommendation;
|
return recommendation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string OutsideCompilationRecommendationHtml(
|
||||||
|
double device_op_time_outside_compilation_percent) {
|
||||||
|
if (device_op_time_outside_compilation_percent <=
|
||||||
|
kOutsideCompilationThresholdInPercent)
|
||||||
|
return "";
|
||||||
|
return absl::StrCat(
|
||||||
|
absl::StrFormat("%.1lf", device_op_time_outside_compilation_percent),
|
||||||
|
" % of Op time on the device are for outside compilation. Performance "
|
||||||
|
"could be improved by avoiding outside compilation.");
|
||||||
|
}
|
||||||
|
|
||||||
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats) {
|
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats) {
|
||||||
OverviewPage overview_page;
|
OverviewPage overview_page;
|
||||||
*overview_page.mutable_run_environment() =
|
*overview_page.mutable_run_environment() =
|
||||||
@ -346,6 +377,9 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats) {
|
|||||||
EagerRecommendationHtml(
|
EagerRecommendationHtml(
|
||||||
overview_page.analysis().host_op_time_eager_percent(),
|
overview_page.analysis().host_op_time_eager_percent(),
|
||||||
overview_page.analysis().device_op_time_eager_percent()),
|
overview_page.analysis().device_op_time_eager_percent()),
|
||||||
|
OutsideCompilationRecommendationHtml(
|
||||||
|
overview_page.analysis()
|
||||||
|
.device_op_time_outside_compilation_percent()),
|
||||||
overview_page.mutable_recommendation());
|
overview_page.mutable_recommendation());
|
||||||
PopulateOverviewDiagnostics(op_stats, overview_page.mutable_diagnostics());
|
PopulateOverviewDiagnostics(op_stats, overview_page.mutable_diagnostics());
|
||||||
return overview_page;
|
return overview_page;
|
||||||
|
@ -37,13 +37,18 @@ const double kTfFunctionReportThresholdInPercent = 20;
|
|||||||
// this threshold.
|
// this threshold.
|
||||||
const double kEagerReportThresholdInPercent = 10;
|
const double kEagerReportThresholdInPercent = 10;
|
||||||
|
|
||||||
void SetCommonRecommendation(absl::string_view input_classification,
|
// Reports outside-compilation opportunity in the Overview Page if the
|
||||||
absl::string_view input_statement,
|
// percent of Op time on device that is for outside compilation is over
|
||||||
absl::string_view output_statement,
|
// this threshold.
|
||||||
HardwareType hardware_type,
|
const double kOutsideCompilationThresholdInPercent = 5;
|
||||||
absl::string_view tf_function_statement_html,
|
|
||||||
absl::string_view eager_statement_html,
|
void SetCommonRecommendation(
|
||||||
OverviewPageRecommendation* re);
|
absl::string_view input_classification, absl::string_view input_statement,
|
||||||
|
absl::string_view output_statement, HardwareType hardware_type,
|
||||||
|
absl::string_view tf_function_statement_html,
|
||||||
|
absl::string_view eager_statement_html,
|
||||||
|
absl::string_view outside_compilation_statement_html,
|
||||||
|
OverviewPageRecommendation* re);
|
||||||
|
|
||||||
OverviewPageRecommendation ComputeGenericRecommendation(
|
OverviewPageRecommendation ComputeGenericRecommendation(
|
||||||
const BottleneckAnalysis& bottleneck,
|
const BottleneckAnalysis& bottleneck,
|
||||||
@ -63,6 +68,10 @@ std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db);
|
|||||||
std::string EagerRecommendationHtml(double host_op_time_eager_percent,
|
std::string EagerRecommendationHtml(double host_op_time_eager_percent,
|
||||||
double device_op_time_eager_percent);
|
double device_op_time_eager_percent);
|
||||||
|
|
||||||
|
// Returns a html which provides outside-compilation related recommendation.
|
||||||
|
std::string OutsideCompilationRecommendationHtml(
|
||||||
|
double device_op_time_outside_compilation_percent);
|
||||||
|
|
||||||
} // namespace profiler
|
} // namespace profiler
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
@ -60,6 +60,9 @@ message OverviewPageAnalysis {
|
|||||||
// Percentage of TF-op execution time on the device (excluding the idle time)
|
// Percentage of TF-op execution time on the device (excluding the idle time)
|
||||||
// that are in eager mode.
|
// that are in eager mode.
|
||||||
double device_op_time_eager_percent = 15;
|
double device_op_time_eager_percent = 15;
|
||||||
|
// Percentage of TF-op execution time on the device (excluding the idle time)
|
||||||
|
// that are for outside compilation.
|
||||||
|
double device_op_time_outside_compilation_percent = 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Overview result for a performance tip to users.
|
// Overview result for a performance tip to users.
|
||||||
@ -99,10 +102,14 @@ message OverviewPageRecommendation {
|
|||||||
// bottleneck.
|
// bottleneck.
|
||||||
string output_statement = 9;
|
string output_statement = 9;
|
||||||
// A statement that recommends the next steps for investigating eager-mode
|
// A statement that recommends the next steps for investigating eager-mode
|
||||||
// related bottleneck (it is a html so that it can link to other tools/docs.)
|
// related bottleneck (it is an html so that it can link to other tools/docs.)
|
||||||
string eager_statement_html = 12;
|
string eager_statement_html = 12;
|
||||||
|
// A statement that recommends the next steps for investigating
|
||||||
|
// outside-compilation related bottleneck (it is an html so that it can link
|
||||||
|
// to other tools/docs.)
|
||||||
|
string outside_compilation_statement_html = 13;
|
||||||
// A statement that recommends the next steps for investigating tf-function
|
// A statement that recommends the next steps for investigating tf-function
|
||||||
// related bottleneck (it is a html so that it can link to other tools/docs.)
|
// related bottleneck (it is an html so that it can link to other tools/docs.)
|
||||||
string tf_function_statement_html = 10;
|
string tf_function_statement_html = 10;
|
||||||
// A list of tips for improving host performance.
|
// A list of tips for improving host performance.
|
||||||
repeated OverviewPageTip host_tips = 3;
|
repeated OverviewPageTip host_tips = 3;
|
||||||
|
@ -75,6 +75,16 @@ inline bool IsInfeedEnqueueOp(absl::string_view tf_op_type) {
|
|||||||
return tf_op_type == "InfeedEnqueue" || tf_op_type == "InfeedEnqueueTuple";
|
return tf_op_type == "InfeedEnqueue" || tf_op_type == "InfeedEnqueueTuple";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns true if the given op is for outside compilation.
|
||||||
|
inline bool IsOutsideCompilationOp(absl::string_view tf_op_fullname,
|
||||||
|
absl::string_view hlo_expression) {
|
||||||
|
if (absl::EndsWith(tf_op_fullname, ":XlaSendToHost")) return true;
|
||||||
|
if (absl::StrContains(hlo_expression, "send-done") &&
|
||||||
|
absl::StrContains(hlo_expression, "is_host_transfer=true"))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Returns true if the given name is a TensorFlow embedding op.
|
// Returns true if the given name is a TensorFlow embedding op.
|
||||||
inline bool IsEmbeddingOp(absl::string_view tf_op_fullname) {
|
inline bool IsEmbeddingOp(absl::string_view tf_op_fullname) {
|
||||||
return absl::StrContains(tf_op_fullname, "Embedding");
|
return absl::StrContains(tf_op_fullname, "Embedding");
|
||||||
|
Loading…
Reference in New Issue
Block a user