[Profiler] Add optimization advices for tf-function.
PiperOrigin-RevId: 310957738 Change-Id: I97988752045bb4037098ebd6f4c63f0a65b2cf64
This commit is contained in:
parent
04d51a661f
commit
7ac062b354
@ -92,7 +92,9 @@ cc_library(
|
||||
"//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:errors",
|
||||
"//tensorflow/core/profiler/utils:html_utils",
|
||||
"//tensorflow/core/profiler/utils:math_utils",
|
||||
"//tensorflow/core/profiler/utils:op_metrics_db_utils",
|
||||
"//tensorflow/core/profiler/utils:time_utils",
|
||||
@ -118,6 +120,7 @@ cc_library(
|
||||
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:errors",
|
||||
"//tensorflow/core/profiler/utils:event_span",
|
||||
"//tensorflow/core/profiler/utils:html_utils",
|
||||
"//tensorflow/core/profiler/utils:math_utils",
|
||||
"//tensorflow/core/profiler/utils:tf_op_utils",
|
||||
"//tensorflow/core/profiler/utils:time_utils",
|
||||
@ -390,6 +393,7 @@ cc_library(
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:math_utils",
|
||||
"//tensorflow/core/profiler/utils:tf_xplane_visitor",
|
||||
"//tensorflow/core/profiler/utils:timespan",
|
||||
"//tensorflow/core/profiler/utils:xplane_schema",
|
||||
|
@ -38,6 +38,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/errors.h"
|
||||
#include "tensorflow/core/profiler/utils/event_span.h"
|
||||
#include "tensorflow/core/profiler/utils/html_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/math_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/time_utils.h"
|
||||
@ -327,10 +328,6 @@ InputOpDetails ConvertOpMetricsToInputOpDetails(const OpMetrics& op_metrics,
|
||||
return details;
|
||||
}
|
||||
|
||||
string AnchorElement(absl::string_view url, absl::string_view text) {
|
||||
return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
|
||||
}
|
||||
|
||||
// Returns the ratio of the host-to-device time in each step to the step-time.
|
||||
double RatioOfHostToDeviceTimeToStepTime(
|
||||
const OpMetricsDb& host_tf_metrics_db,
|
||||
|
@ -30,7 +30,9 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/errors.h"
|
||||
#include "tensorflow/core/profiler/utils/html_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/math_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/time_utils.h"
|
||||
@ -44,16 +46,17 @@ namespace {
|
||||
// statement of suggestion will be made.
|
||||
constexpr double kLowPrecisionPercentThreshold = 10;
|
||||
|
||||
struct TfFunctionInfo {
|
||||
absl::string_view function_name;
|
||||
double expensive_call_percent;
|
||||
};
|
||||
|
||||
OverviewPageTip MakeOverviewPageTip(const string& text) {
|
||||
OverviewPageTip tip;
|
||||
tip.set_link(text);
|
||||
return tip;
|
||||
}
|
||||
|
||||
string AnchorElement(const string& url, const string& text) {
|
||||
return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
|
||||
}
|
||||
|
||||
// Makes a recommendation for looking up a document.
|
||||
// doc_url is expected to be already be escaped suitably for use in an HTML
|
||||
// attribute.
|
||||
@ -125,10 +128,12 @@ void SetCommonRecommendation(const string& input_classification,
|
||||
const string& input_statement,
|
||||
const string& output_statement,
|
||||
HardwareType hardware_type,
|
||||
const string& tf_function_statement_html,
|
||||
OverviewPageRecommendation* re) {
|
||||
re->set_bottleneck(input_classification);
|
||||
re->set_statement(input_statement);
|
||||
re->set_output_statement(output_statement);
|
||||
re->set_tf_function_statement_html(tf_function_statement_html);
|
||||
ComputeHostTips(re);
|
||||
ComputeDeviceTips(hardware_type, re);
|
||||
ComputeDocumentationTips(re);
|
||||
@ -245,6 +250,33 @@ OverviewPageRunEnvironment ComputeRunEnvironment(
|
||||
return re;
|
||||
}
|
||||
|
||||
std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db) {
|
||||
std::vector<TfFunctionInfo> candidates;
|
||||
for (const auto& name_fun : tf_function_db.tf_functions()) {
|
||||
const auto& fun = name_fun.second;
|
||||
if (fun.expensive_call_percent() >= kTfFunctionReportThresholdInPercent) {
|
||||
candidates.push_back({name_fun.first, fun.expensive_call_percent()});
|
||||
}
|
||||
}
|
||||
if (candidates.empty()) return "";
|
||||
auto cmp = [](const TfFunctionInfo& a, const TfFunctionInfo& b) {
|
||||
return a.expensive_call_percent > b.expensive_call_percent;
|
||||
};
|
||||
// Sorts candidates in descending order of expensive_call_percent.
|
||||
absl::c_sort(candidates, cmp);
|
||||
std::string expensive_functions = "";
|
||||
auto num_functions_shown = std::min(3ul, candidates.size());
|
||||
for (auto i = 0; i < num_functions_shown; i++) {
|
||||
if (i > 0) absl::StrAppend(&expensive_functions, ", ");
|
||||
absl::StrAppend(&expensive_functions, "\"", candidates[i].function_name,
|
||||
"\"");
|
||||
}
|
||||
if (candidates.size() > num_functions_shown)
|
||||
absl::StrAppend(&expensive_functions, " and more");
|
||||
return absl::StrCat("Expensive tf-functions detected (", expensive_functions,
|
||||
") due to either retracing or eager execution.");
|
||||
}
|
||||
|
||||
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
|
||||
HardwareType hardware_type) {
|
||||
OverviewPage overview_page;
|
||||
@ -258,9 +290,10 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
|
||||
overview_page.input_analysis().step_details());
|
||||
*overview_page.mutable_recommendation() = ComputeGenericRecommendation(
|
||||
bottleneck, op_stats.device_op_metrics_db().precision_stats());
|
||||
SetCommonRecommendation(bottleneck.input_classification(),
|
||||
bottleneck.input_statement(), "", hardware_type,
|
||||
overview_page.mutable_recommendation());
|
||||
SetCommonRecommendation(
|
||||
bottleneck.input_classification(), bottleneck.input_statement(), "",
|
||||
hardware_type, TfFunctionRecommendationHtml(op_stats.tf_function_db()),
|
||||
overview_page.mutable_recommendation());
|
||||
return overview_page;
|
||||
}
|
||||
|
||||
|
@ -29,10 +29,16 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
// Reports tf-function optimization opportunity in the Overview Page if the
|
||||
// expensive-call-time percentage is over this threshold for at least one of
|
||||
// the tf-functions profiled.
|
||||
const double kTfFunctionReportThresholdInPercent = 20;
|
||||
|
||||
void SetCommonRecommendation(const string& input_classification,
|
||||
const string& input_statement,
|
||||
const string& output_statement,
|
||||
HardwareType hardware_type,
|
||||
const string& tf_function_statement_html,
|
||||
OverviewPageRecommendation* re);
|
||||
|
||||
OverviewPageRecommendation ComputeGenericRecommendation(
|
||||
@ -47,6 +53,9 @@ OverviewPageRunEnvironment ComputeRunEnvironment(
|
||||
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
|
||||
HardwareType hardware_type);
|
||||
|
||||
// Returns a html which provides tf-function related recommendation.
|
||||
std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db);
|
||||
|
||||
void SetRemarks(const OpStats& op_stats, OverviewPageAnalysis* analysis);
|
||||
|
||||
} // namespace profiler
|
||||
|
@ -25,6 +25,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/math_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
|
||||
#include "tensorflow/core/profiler/utils/timespan.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_schema.h"
|
||||
@ -54,6 +55,21 @@ std::pair<TfFunctionExecutionMode, TfFunctionCompiler> Decode(
|
||||
DCHECK(false);
|
||||
}
|
||||
|
||||
double ComputeExpensiveCallPercent(const TfFunction& tf_function) {
|
||||
// Computes the expensiveness in terms of time (rather than count).
|
||||
uint64 total_call_time_ps = 0;
|
||||
uint64 expensive_call_time_ps = 0;
|
||||
for (const auto& mode_metrics : tf_function.metrics()) {
|
||||
const auto mode = mode_metrics.first;
|
||||
const auto& metrics = mode_metrics.second;
|
||||
total_call_time_ps += metrics.self_time_ps();
|
||||
if (mode == TRACED_MODE || mode == EAGER_MODE) {
|
||||
expensive_call_time_ps += metrics.self_time_ps();
|
||||
}
|
||||
}
|
||||
return SafeDivide(100.0 * expensive_call_time_ps, total_call_time_ps);
|
||||
}
|
||||
|
||||
// Each invocation of a tf-function creates an ActivationRecord.
|
||||
struct ActivationRecord {
|
||||
std::string function_name; // name of the tf-function.
|
||||
@ -133,6 +149,7 @@ void CombineTfFunction(const TfFunction& src, TfFunction* dst) {
|
||||
CombineTfFunctionMetrics(src_metrics, dst_metrics);
|
||||
}
|
||||
}
|
||||
dst->set_expensive_call_percent(ComputeExpensiveCallPercent(*dst));
|
||||
}
|
||||
|
||||
// Execution history of all tf-functions invoked.
|
||||
@ -210,6 +227,10 @@ class TfFunctionExecutions {
|
||||
metrics->set_count(metrics->count() + 1);
|
||||
metrics->set_self_time_ps(metrics->self_time_ps() + self_time_ps);
|
||||
}
|
||||
for (auto& name_fun : *result.mutable_tf_functions()) {
|
||||
TfFunction& fun = name_fun.second;
|
||||
fun.set_expensive_call_percent(ComputeExpensiveCallPercent(fun));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -33,6 +33,8 @@ const absl::string_view kTracedXla = "traced-xla";
|
||||
const absl::string_view kNotTracedNonXla = "notTraced-nonXla";
|
||||
const absl::string_view kNotTracedXla = "notTraced-xla";
|
||||
|
||||
constexpr double kMaxError = 0.001;
|
||||
|
||||
TfFunctionDb ConvertXSpaceToTfFunctionDb(const XSpace& space) {
|
||||
TfFunctionDb result;
|
||||
const XPlane* host_plane = FindPlaneWithName(space, kHostThreads);
|
||||
@ -75,6 +77,8 @@ TEST(ConvertXPlaneToTfFunctions, CombineTwoThreads) {
|
||||
tf_function_db.tf_functions().at(kFunctionName);
|
||||
EXPECT_EQ(tf_function.total_tracing_count(), 4);
|
||||
EXPECT_EQ(tf_function.compiler(), MIXED_COMPILER);
|
||||
EXPECT_NEAR(tf_function.expensive_call_percent(), 90, kMaxError);
|
||||
|
||||
const auto& metrics = tf_function.metrics();
|
||||
EXPECT_EQ(metrics.size(), 2);
|
||||
EXPECT_EQ(metrics.count(TRACED_MODE), 1);
|
||||
@ -108,6 +112,7 @@ TEST(ConvertXPlaneToTfFunctions, NestedFunctions) {
|
||||
tf_function_db.tf_functions().at(kOuterFunctionName);
|
||||
EXPECT_EQ(outer.total_tracing_count(), 1);
|
||||
EXPECT_EQ(outer.compiler(), OTHER_COMPILER);
|
||||
EXPECT_NEAR(outer.expensive_call_percent(), 100, kMaxError);
|
||||
const auto& outer_metrics = outer.metrics();
|
||||
EXPECT_EQ(outer_metrics.size(), 1);
|
||||
EXPECT_EQ(outer_metrics.count(TRACED_MODE), 1);
|
||||
@ -118,6 +123,7 @@ TEST(ConvertXPlaneToTfFunctions, NestedFunctions) {
|
||||
tf_function_db.tf_functions().at(kInnerFunctionName);
|
||||
EXPECT_EQ(inner.total_tracing_count(), 0);
|
||||
EXPECT_EQ(inner.compiler(), XLA_COMPILER);
|
||||
EXPECT_NEAR(inner.expensive_call_percent(), 0, kMaxError);
|
||||
const auto& inner_metrics = inner.metrics();
|
||||
EXPECT_EQ(inner_metrics.size(), 1);
|
||||
EXPECT_EQ(inner_metrics.count(NOT_TRACED_MODE), 1);
|
||||
@ -148,6 +154,7 @@ TEST(ConvertXPlaneToTfFunctions, EagerPlusConcrete) {
|
||||
tf_function_db.tf_functions().at(kEagerFunctionName);
|
||||
EXPECT_EQ(eager.total_tracing_count(), 0);
|
||||
EXPECT_EQ(eager.compiler(), INVALID_COMPILER);
|
||||
EXPECT_NEAR(eager.expensive_call_percent(), 100, kMaxError);
|
||||
const auto& eager_metrics = eager.metrics();
|
||||
EXPECT_EQ(eager_metrics.size(), 1);
|
||||
EXPECT_EQ(eager_metrics.count(EAGER_MODE), 1);
|
||||
@ -158,6 +165,7 @@ TEST(ConvertXPlaneToTfFunctions, EagerPlusConcrete) {
|
||||
tf_function_db.tf_functions().at(kConcreteFunctionName);
|
||||
EXPECT_EQ(concrete.total_tracing_count(), 0);
|
||||
EXPECT_EQ(concrete.compiler(), INVALID_COMPILER);
|
||||
EXPECT_NEAR(concrete.expensive_call_percent(), 0, kMaxError);
|
||||
const auto& concrete_metrics = concrete.metrics();
|
||||
EXPECT_EQ(concrete_metrics.size(), 1);
|
||||
EXPECT_EQ(concrete_metrics.count(CONCRETE_MODE), 1);
|
||||
|
@ -84,6 +84,9 @@ message OverviewPageRecommendation {
|
||||
// A statement for output that recommends the next steps for investigating the
|
||||
// bottleneck.
|
||||
string output_statement = 9;
|
||||
// A statement that recommends the next steps for investigating tf-function
|
||||
// related bottleneck (it is a html so that it can link to other tools/docs.
|
||||
string tf_function_statement_html = 10;
|
||||
// A list of tips for improving host performance.
|
||||
repeated OverviewPageTip host_tips = 3;
|
||||
// A list of tips for improving device performance.
|
||||
|
@ -49,6 +49,9 @@ message TfFunction {
|
||||
int64 total_tracing_count = 2;
|
||||
// Compiler used to compile this function.
|
||||
TfFunctionCompiler compiler = 3;
|
||||
// Percentage of time spent in the expensive calls to this function in the
|
||||
// profiled period.
|
||||
double expensive_call_percent = 4;
|
||||
}
|
||||
|
||||
// Statistics for all tf-functions.
|
||||
|
@ -51,6 +51,14 @@ cc_library(
|
||||
hdrs = ["math_utils.h"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "html_utils",
|
||||
hdrs = ["html_utils.h"],
|
||||
deps = [
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "op_metrics_db_utils",
|
||||
srcs = ["op_metrics_db_utils.cc"],
|
||||
|
36
tensorflow/core/profiler/utils/html_utils.h
Normal file
36
tensorflow/core/profiler/utils/html_utils.h
Normal file
@ -0,0 +1,36 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_
|
||||
#define TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
// Creates a html that links to the given url with the given text.
|
||||
inline std::string AnchorElement(absl::string_view url,
|
||||
absl::string_view text) {
|
||||
return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_
|
Loading…
Reference in New Issue
Block a user