Add performance analysis for TPU outside compilation.
PiperOrigin-RevId: 327492744 Change-Id: Ie899823ea66e68e15fbda3578acd9cf5893554cb
This commit is contained in:
parent
fc0ce38820
commit
876ffcba20
@ -108,6 +108,7 @@ cc_library(
|
||||
"//tensorflow/core/profiler/utils:kernel_stats_utils",
|
||||
"//tensorflow/core/profiler/utils:math_utils",
|
||||
"//tensorflow/core/profiler/utils:op_metrics_db_utils",
|
||||
"//tensorflow/core/profiler/utils:tf_op_utils",
|
||||
"//tensorflow/core/profiler/utils:time_utils",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
|
@ -37,6 +37,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/math_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/time_utils.h"
|
||||
|
||||
namespace tensorflow {
|
||||
@ -128,18 +129,20 @@ std::string GeneratePrecisionStatement(const PrecisionStats& precision_stats) {
|
||||
|
||||
} // namespace
|
||||
|
||||
void SetCommonRecommendation(absl::string_view input_classification,
|
||||
absl::string_view input_statement,
|
||||
absl::string_view output_statement,
|
||||
HardwareType hardware_type,
|
||||
absl::string_view tf_function_statement_html,
|
||||
absl::string_view eager_statement_html,
|
||||
OverviewPageRecommendation* re) {
|
||||
void SetCommonRecommendation(
|
||||
absl::string_view input_classification, absl::string_view input_statement,
|
||||
absl::string_view output_statement, HardwareType hardware_type,
|
||||
absl::string_view tf_function_statement_html,
|
||||
absl::string_view eager_statement_html,
|
||||
absl::string_view outside_compilation_statement_html,
|
||||
OverviewPageRecommendation* re) {
|
||||
re->set_bottleneck(std::string(input_classification));
|
||||
re->set_statement(std::string(input_statement));
|
||||
re->set_output_statement(std::string(output_statement));
|
||||
re->set_tf_function_statement_html(std::string(tf_function_statement_html));
|
||||
re->set_eager_statement_html(std::string(eager_statement_html));
|
||||
re->set_outside_compilation_statement_html(
|
||||
std::string(outside_compilation_statement_html));
|
||||
ComputeHostTips(re);
|
||||
ComputeDeviceTips(hardware_type, re);
|
||||
ComputeDocumentationTips(re);
|
||||
@ -222,6 +225,18 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
|
||||
if (metrics.is_eager()) eager_device_op_time_ps += metrics.self_time_ps();
|
||||
}
|
||||
}
|
||||
// Figures out outside_compilation time from
|
||||
// op_stats.device_op_metrics_db().metrics_db(). We don't use the
|
||||
// {metrics.provenance(), metrics.name()} from
|
||||
// device_tf_op_metrics_db.metrics_db(), because metrics.provenance() there is
|
||||
// not set and metrics.name() can be either HLO-Op name or TF-Op name, which
|
||||
// will confuse IsOutsideCompilationOp().
|
||||
uint64 outside_compilation_device_op_time_ps = 0;
|
||||
for (const OpMetrics& metrics :
|
||||
op_stats.device_op_metrics_db().metrics_db()) {
|
||||
if (!IsOutsideCompilationOp(metrics.provenance(), metrics.name())) continue;
|
||||
outside_compilation_device_op_time_ps += metrics.self_time_ps();
|
||||
}
|
||||
uint64 num_total_tf_ops = num_host_tf_ops + num_device_tf_ops;
|
||||
analysis.set_host_tf_op_percent(
|
||||
100.0 * SafeDivide(num_host_tf_ops, num_total_tf_ops));
|
||||
@ -234,6 +249,9 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
|
||||
analysis.set_device_op_time_eager_percent(
|
||||
100.0 * SafeDivide(eager_device_op_time_ps,
|
||||
total_device_op_time_ps_exclude_idle));
|
||||
analysis.set_device_op_time_outside_compilation_percent(
|
||||
100.0 * SafeDivide(outside_compilation_device_op_time_ps,
|
||||
total_device_op_time_ps_exclude_idle));
|
||||
return analysis;
|
||||
}
|
||||
|
||||
@ -315,10 +333,12 @@ std::string EagerRecommendationHtml(double host_op_time_eager_percent,
|
||||
double device_op_time_eager_percent) {
|
||||
std::string recommendation = "";
|
||||
if (host_op_time_eager_percent > kEagerReportThresholdInPercent)
|
||||
absl::StrAppend(&recommendation, host_op_time_eager_percent,
|
||||
absl::StrAppend(&recommendation,
|
||||
absl::StrFormat("%.1f", host_op_time_eager_percent),
|
||||
"% of Op time on the host used eager execution. ");
|
||||
if (device_op_time_eager_percent > kEagerReportThresholdInPercent)
|
||||
absl::StrAppend(&recommendation, device_op_time_eager_percent,
|
||||
absl::StrAppend(&recommendation,
|
||||
absl::StrFormat("%.1f", device_op_time_eager_percent),
|
||||
"% of Op time on the device used eager execution. ");
|
||||
if (!recommendation.empty())
|
||||
absl::StrAppend(&recommendation, "Performance could be improved with ",
|
||||
@ -327,6 +347,17 @@ std::string EagerRecommendationHtml(double host_op_time_eager_percent,
|
||||
return recommendation;
|
||||
}
|
||||
|
||||
std::string OutsideCompilationRecommendationHtml(
|
||||
double device_op_time_outside_compilation_percent) {
|
||||
if (device_op_time_outside_compilation_percent <=
|
||||
kOutsideCompilationThresholdInPercent)
|
||||
return "";
|
||||
return absl::StrCat(
|
||||
absl::StrFormat("%.1lf", device_op_time_outside_compilation_percent),
|
||||
" % of Op time on the device are for outside compilation. Performance "
|
||||
"could be improved by avoiding outside compilation.");
|
||||
}
|
||||
|
||||
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats) {
|
||||
OverviewPage overview_page;
|
||||
*overview_page.mutable_run_environment() =
|
||||
@ -346,6 +377,9 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats) {
|
||||
EagerRecommendationHtml(
|
||||
overview_page.analysis().host_op_time_eager_percent(),
|
||||
overview_page.analysis().device_op_time_eager_percent()),
|
||||
OutsideCompilationRecommendationHtml(
|
||||
overview_page.analysis()
|
||||
.device_op_time_outside_compilation_percent()),
|
||||
overview_page.mutable_recommendation());
|
||||
PopulateOverviewDiagnostics(op_stats, overview_page.mutable_diagnostics());
|
||||
return overview_page;
|
||||
|
@ -37,13 +37,18 @@ const double kTfFunctionReportThresholdInPercent = 20;
|
||||
// this threshold.
|
||||
const double kEagerReportThresholdInPercent = 10;
|
||||
|
||||
void SetCommonRecommendation(absl::string_view input_classification,
|
||||
absl::string_view input_statement,
|
||||
absl::string_view output_statement,
|
||||
HardwareType hardware_type,
|
||||
absl::string_view tf_function_statement_html,
|
||||
absl::string_view eager_statement_html,
|
||||
OverviewPageRecommendation* re);
|
||||
// Reports outside-compilation opportunity in the Overview Page if the
|
||||
// percent of Op time on device that is for outside compilation is over
|
||||
// this threshold.
|
||||
const double kOutsideCompilationThresholdInPercent = 5;
|
||||
|
||||
void SetCommonRecommendation(
|
||||
absl::string_view input_classification, absl::string_view input_statement,
|
||||
absl::string_view output_statement, HardwareType hardware_type,
|
||||
absl::string_view tf_function_statement_html,
|
||||
absl::string_view eager_statement_html,
|
||||
absl::string_view outside_compilation_statement_html,
|
||||
OverviewPageRecommendation* re);
|
||||
|
||||
OverviewPageRecommendation ComputeGenericRecommendation(
|
||||
const BottleneckAnalysis& bottleneck,
|
||||
@ -63,6 +68,10 @@ std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db);
|
||||
std::string EagerRecommendationHtml(double host_op_time_eager_percent,
|
||||
double device_op_time_eager_percent);
|
||||
|
||||
// Returns a html which provides outside-compilation related recommendation.
|
||||
std::string OutsideCompilationRecommendationHtml(
|
||||
double device_op_time_outside_compilation_percent);
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -60,6 +60,9 @@ message OverviewPageAnalysis {
|
||||
// Percentage of TF-op execution time on the device (excluding the idle time)
|
||||
// that are in eager mode.
|
||||
double device_op_time_eager_percent = 15;
|
||||
// Percentage of TF-op execution time on the device (excluding the idle time)
|
||||
// that are for outside compilation.
|
||||
double device_op_time_outside_compilation_percent = 16;
|
||||
}
|
||||
|
||||
// Overview result for a performance tip to users.
|
||||
@ -99,10 +102,14 @@ message OverviewPageRecommendation {
|
||||
// bottleneck.
|
||||
string output_statement = 9;
|
||||
// A statement that recommends the next steps for investigating eager-mode
|
||||
// related bottleneck (it is a html so that it can link to other tools/docs.)
|
||||
// related bottleneck (it is an html so that it can link to other tools/docs.)
|
||||
string eager_statement_html = 12;
|
||||
// A statement that recommends the next steps for investigating
|
||||
// outside-compilation related bottleneck (it is an html so that it can link
|
||||
// to other tools/docs.)
|
||||
string outside_compilation_statement_html = 13;
|
||||
// A statement that recommends the next steps for investigating tf-function
|
||||
// related bottleneck (it is a html so that it can link to other tools/docs.)
|
||||
// related bottleneck (it is an html so that it can link to other tools/docs.)
|
||||
string tf_function_statement_html = 10;
|
||||
// A list of tips for improving host performance.
|
||||
repeated OverviewPageTip host_tips = 3;
|
||||
|
@ -75,6 +75,16 @@ inline bool IsInfeedEnqueueOp(absl::string_view tf_op_type) {
|
||||
return tf_op_type == "InfeedEnqueue" || tf_op_type == "InfeedEnqueueTuple";
|
||||
}
|
||||
|
||||
// Returns true if the given op is for outside compilation.
|
||||
inline bool IsOutsideCompilationOp(absl::string_view tf_op_fullname,
|
||||
absl::string_view hlo_expression) {
|
||||
if (absl::EndsWith(tf_op_fullname, ":XlaSendToHost")) return true;
|
||||
if (absl::StrContains(hlo_expression, "send-done") &&
|
||||
absl::StrContains(hlo_expression, "is_host_transfer=true"))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true if the given name is a TensorFlow embedding op.
|
||||
inline bool IsEmbeddingOp(absl::string_view tf_op_fullname) {
|
||||
return absl::StrContains(tf_op_fullname, "Embedding");
|
||||
|
Loading…
Reference in New Issue
Block a user