[Profiler] Add optimization advices for tf-function.

PiperOrigin-RevId: 310957738
Change-Id: I97988752045bb4037098ebd6f4c63f0a65b2cf64
This commit is contained in:
A. Unique TensorFlower 2020-05-11 11:42:05 -07:00 committed by TensorFlower Gardener
parent 04d51a661f
commit 7ac062b354
10 changed files with 133 additions and 11 deletions

View File

@ -92,7 +92,9 @@ cc_library(
"//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
"//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
"//tensorflow/core/profiler/utils:errors",
"//tensorflow/core/profiler/utils:html_utils",
"//tensorflow/core/profiler/utils:math_utils",
"//tensorflow/core/profiler/utils:op_metrics_db_utils",
"//tensorflow/core/profiler/utils:time_utils",
@ -118,6 +120,7 @@ cc_library(
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
"//tensorflow/core/profiler/utils:errors",
"//tensorflow/core/profiler/utils:event_span",
"//tensorflow/core/profiler/utils:html_utils",
"//tensorflow/core/profiler/utils:math_utils",
"//tensorflow/core/profiler/utils:tf_op_utils",
"//tensorflow/core/profiler/utils:time_utils",
@ -390,6 +393,7 @@ cc_library(
"//tensorflow/core:lib_internal",
"//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"//tensorflow/core/profiler/utils:math_utils",
"//tensorflow/core/profiler/utils:tf_xplane_visitor",
"//tensorflow/core/profiler/utils:timespan",
"//tensorflow/core/profiler/utils:xplane_schema",

View File

@ -38,6 +38,7 @@ limitations under the License.
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
#include "tensorflow/core/profiler/utils/errors.h"
#include "tensorflow/core/profiler/utils/event_span.h"
#include "tensorflow/core/profiler/utils/html_utils.h"
#include "tensorflow/core/profiler/utils/math_utils.h"
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
#include "tensorflow/core/profiler/utils/time_utils.h"
@ -327,10 +328,6 @@ InputOpDetails ConvertOpMetricsToInputOpDetails(const OpMetrics& op_metrics,
return details;
}
string AnchorElement(absl::string_view url, absl::string_view text) {
return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
}
// Returns the ratio of the host-to-device time in each step to the step-time.
double RatioOfHostToDeviceTimeToStepTime(
const OpMetricsDb& host_tf_metrics_db,

View File

@ -30,7 +30,9 @@ limitations under the License.
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
#include "tensorflow/core/profiler/utils/errors.h"
#include "tensorflow/core/profiler/utils/html_utils.h"
#include "tensorflow/core/profiler/utils/math_utils.h"
#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
#include "tensorflow/core/profiler/utils/time_utils.h"
@ -44,16 +46,17 @@ namespace {
// statement of suggestion will be made.
constexpr double kLowPrecisionPercentThreshold = 10;
struct TfFunctionInfo {
absl::string_view function_name;
double expensive_call_percent;
};
OverviewPageTip MakeOverviewPageTip(const string& text) {
OverviewPageTip tip;
tip.set_link(text);
return tip;
}
string AnchorElement(const string& url, const string& text) {
return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
}
// Makes a recommendation for looking up a document.
// doc_url is expected to be already be escaped suitably for use in an HTML
// attribute.
@ -125,10 +128,12 @@ void SetCommonRecommendation(const string& input_classification,
const string& input_statement,
const string& output_statement,
HardwareType hardware_type,
const string& tf_function_statement_html,
OverviewPageRecommendation* re) {
re->set_bottleneck(input_classification);
re->set_statement(input_statement);
re->set_output_statement(output_statement);
re->set_tf_function_statement_html(tf_function_statement_html);
ComputeHostTips(re);
ComputeDeviceTips(hardware_type, re);
ComputeDocumentationTips(re);
@ -245,6 +250,33 @@ OverviewPageRunEnvironment ComputeRunEnvironment(
return re;
}
std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db) {
std::vector<TfFunctionInfo> candidates;
for (const auto& name_fun : tf_function_db.tf_functions()) {
const auto& fun = name_fun.second;
if (fun.expensive_call_percent() >= kTfFunctionReportThresholdInPercent) {
candidates.push_back({name_fun.first, fun.expensive_call_percent()});
}
}
if (candidates.empty()) return "";
auto cmp = [](const TfFunctionInfo& a, const TfFunctionInfo& b) {
return a.expensive_call_percent > b.expensive_call_percent;
};
// Sorts candidates in descending order of expensive_call_percent.
absl::c_sort(candidates, cmp);
std::string expensive_functions = "";
auto num_functions_shown = std::min(3ul, candidates.size());
for (auto i = 0; i < num_functions_shown; i++) {
if (i > 0) absl::StrAppend(&expensive_functions, ", ");
absl::StrAppend(&expensive_functions, "\"", candidates[i].function_name,
"\"");
}
if (candidates.size() > num_functions_shown)
absl::StrAppend(&expensive_functions, " and more");
return absl::StrCat("Expensive tf-functions detected (", expensive_functions,
") due to either retracing or eager execution.");
}
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
HardwareType hardware_type) {
OverviewPage overview_page;
@ -258,9 +290,10 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
overview_page.input_analysis().step_details());
*overview_page.mutable_recommendation() = ComputeGenericRecommendation(
bottleneck, op_stats.device_op_metrics_db().precision_stats());
SetCommonRecommendation(bottleneck.input_classification(),
bottleneck.input_statement(), "", hardware_type,
overview_page.mutable_recommendation());
SetCommonRecommendation(
bottleneck.input_classification(), bottleneck.input_statement(), "",
hardware_type, TfFunctionRecommendationHtml(op_stats.tf_function_db()),
overview_page.mutable_recommendation());
return overview_page;
}

View File

@ -29,10 +29,16 @@ limitations under the License.
namespace tensorflow {
namespace profiler {
// Reports tf-function optimization opportunity in the Overview Page if the
// expensive-call-time percentage is over this threshold for at least one of
// the tf-functions profiled.
const double kTfFunctionReportThresholdInPercent = 20;
void SetCommonRecommendation(const string& input_classification,
const string& input_statement,
const string& output_statement,
HardwareType hardware_type,
const string& tf_function_statement_html,
OverviewPageRecommendation* re);
OverviewPageRecommendation ComputeGenericRecommendation(
@ -47,6 +53,9 @@ OverviewPageRunEnvironment ComputeRunEnvironment(
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
HardwareType hardware_type);
// Returns a html which provides tf-function related recommendation.
std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db);
void SetRemarks(const OpStats& op_stats, OverviewPageAnalysis* analysis);
} // namespace profiler

View File

@ -25,6 +25,7 @@ limitations under the License.
#include "tensorflow/core/platform/protobuf.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/profiler/utils/math_utils.h"
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
#include "tensorflow/core/profiler/utils/timespan.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
@ -54,6 +55,21 @@ std::pair<TfFunctionExecutionMode, TfFunctionCompiler> Decode(
DCHECK(false);
}
double ComputeExpensiveCallPercent(const TfFunction& tf_function) {
// Computes the expensiveness in terms of time (rather than count).
uint64 total_call_time_ps = 0;
uint64 expensive_call_time_ps = 0;
for (const auto& mode_metrics : tf_function.metrics()) {
const auto mode = mode_metrics.first;
const auto& metrics = mode_metrics.second;
total_call_time_ps += metrics.self_time_ps();
if (mode == TRACED_MODE || mode == EAGER_MODE) {
expensive_call_time_ps += metrics.self_time_ps();
}
}
return SafeDivide(100.0 * expensive_call_time_ps, total_call_time_ps);
}
// Each invocation of a tf-function creates an ActivationRecord.
struct ActivationRecord {
std::string function_name; // name of the tf-function.
@ -133,6 +149,7 @@ void CombineTfFunction(const TfFunction& src, TfFunction* dst) {
CombineTfFunctionMetrics(src_metrics, dst_metrics);
}
}
dst->set_expensive_call_percent(ComputeExpensiveCallPercent(*dst));
}
// Execution history of all tf-functions invoked.
@ -210,6 +227,10 @@ class TfFunctionExecutions {
metrics->set_count(metrics->count() + 1);
metrics->set_self_time_ps(metrics->self_time_ps() + self_time_ps);
}
for (auto& name_fun : *result.mutable_tf_functions()) {
TfFunction& fun = name_fun.second;
fun.set_expensive_call_percent(ComputeExpensiveCallPercent(fun));
}
return result;
}

View File

@ -33,6 +33,8 @@ const absl::string_view kTracedXla = "traced-xla";
const absl::string_view kNotTracedNonXla = "notTraced-nonXla";
const absl::string_view kNotTracedXla = "notTraced-xla";
constexpr double kMaxError = 0.001;
TfFunctionDb ConvertXSpaceToTfFunctionDb(const XSpace& space) {
TfFunctionDb result;
const XPlane* host_plane = FindPlaneWithName(space, kHostThreads);
@ -75,6 +77,8 @@ TEST(ConvertXPlaneToTfFunctions, CombineTwoThreads) {
tf_function_db.tf_functions().at(kFunctionName);
EXPECT_EQ(tf_function.total_tracing_count(), 4);
EXPECT_EQ(tf_function.compiler(), MIXED_COMPILER);
EXPECT_NEAR(tf_function.expensive_call_percent(), 90, kMaxError);
const auto& metrics = tf_function.metrics();
EXPECT_EQ(metrics.size(), 2);
EXPECT_EQ(metrics.count(TRACED_MODE), 1);
@ -108,6 +112,7 @@ TEST(ConvertXPlaneToTfFunctions, NestedFunctions) {
tf_function_db.tf_functions().at(kOuterFunctionName);
EXPECT_EQ(outer.total_tracing_count(), 1);
EXPECT_EQ(outer.compiler(), OTHER_COMPILER);
EXPECT_NEAR(outer.expensive_call_percent(), 100, kMaxError);
const auto& outer_metrics = outer.metrics();
EXPECT_EQ(outer_metrics.size(), 1);
EXPECT_EQ(outer_metrics.count(TRACED_MODE), 1);
@ -118,6 +123,7 @@ TEST(ConvertXPlaneToTfFunctions, NestedFunctions) {
tf_function_db.tf_functions().at(kInnerFunctionName);
EXPECT_EQ(inner.total_tracing_count(), 0);
EXPECT_EQ(inner.compiler(), XLA_COMPILER);
EXPECT_NEAR(inner.expensive_call_percent(), 0, kMaxError);
const auto& inner_metrics = inner.metrics();
EXPECT_EQ(inner_metrics.size(), 1);
EXPECT_EQ(inner_metrics.count(NOT_TRACED_MODE), 1);
@ -148,6 +154,7 @@ TEST(ConvertXPlaneToTfFunctions, EagerPlusConcrete) {
tf_function_db.tf_functions().at(kEagerFunctionName);
EXPECT_EQ(eager.total_tracing_count(), 0);
EXPECT_EQ(eager.compiler(), INVALID_COMPILER);
EXPECT_NEAR(eager.expensive_call_percent(), 100, kMaxError);
const auto& eager_metrics = eager.metrics();
EXPECT_EQ(eager_metrics.size(), 1);
EXPECT_EQ(eager_metrics.count(EAGER_MODE), 1);
@ -158,6 +165,7 @@ TEST(ConvertXPlaneToTfFunctions, EagerPlusConcrete) {
tf_function_db.tf_functions().at(kConcreteFunctionName);
EXPECT_EQ(concrete.total_tracing_count(), 0);
EXPECT_EQ(concrete.compiler(), INVALID_COMPILER);
EXPECT_NEAR(concrete.expensive_call_percent(), 0, kMaxError);
const auto& concrete_metrics = concrete.metrics();
EXPECT_EQ(concrete_metrics.size(), 1);
EXPECT_EQ(concrete_metrics.count(CONCRETE_MODE), 1);

View File

@ -84,6 +84,9 @@ message OverviewPageRecommendation {
// A statement for output that recommends the next steps for investigating the
// bottleneck.
string output_statement = 9;
// A statement that recommends the next steps for investigating tf-function
// related bottleneck (it is a html so that it can link to other tools/docs.
string tf_function_statement_html = 10;
// A list of tips for improving host performance.
repeated OverviewPageTip host_tips = 3;
// A list of tips for improving device performance.

View File

@ -49,6 +49,9 @@ message TfFunction {
int64 total_tracing_count = 2;
// Compiler used to compile this function.
TfFunctionCompiler compiler = 3;
// Percentage of time spent in the expensive calls to this function in the
// profiled period.
double expensive_call_percent = 4;
}
// Statistics for all tf-functions.

View File

@ -51,6 +51,14 @@ cc_library(
hdrs = ["math_utils.h"],
)
cc_library(
name = "html_utils",
hdrs = ["html_utils.h"],
deps = [
"@com_google_absl//absl/strings",
],
)
cc_library(
name = "op_metrics_db_utils",
srcs = ["op_metrics_db_utils.cc"],

View File

@ -0,0 +1,36 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_
#define TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_
#include <string>
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace tensorflow {
namespace profiler {
// Creates a html that links to the given url with the given text.
inline std::string AnchorElement(absl::string_view url,
absl::string_view text) {
return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
}
} // namespace profiler
} // namespace tensorflow
#endif // TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_