Added RunEnvironment to OpStats.
Added a converter from OpStats to OverviewPage. Added input and bottleneck analysis to InputPipelineAnalysis. PiperOrigin-RevId: 289735025 Change-Id: Ice4b2db5f241573afecce52aa882216ea16bd74c
This commit is contained in:
parent
230ebd5d96
commit
99eb226655
@ -66,6 +66,28 @@ cc_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "op_stats_to_overview_page",
|
||||||
|
srcs = ["op_stats_to_overview_page.cc"],
|
||||||
|
hdrs = ["op_stats_to_overview_page.h"],
|
||||||
|
deps = [
|
||||||
|
":op_metrics_to_record",
|
||||||
|
":op_stats_to_input_pipeline_analysis",
|
||||||
|
"//tensorflow/core:lib",
|
||||||
|
"//tensorflow/core:lib_internal",
|
||||||
|
"//tensorflow/core/platform:logging",
|
||||||
|
"//tensorflow/core/profiler/protobuf:hardware_types_proto_cc",
|
||||||
|
"//tensorflow/core/profiler/protobuf:input_pipeline_proto_cc",
|
||||||
|
"//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
|
||||||
|
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
|
||||||
|
"//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
|
||||||
|
"//tensorflow/core/profiler/utils:math_utils",
|
||||||
|
"//tensorflow/core/profiler/utils:op_metrics_db_utils",
|
||||||
|
"//tensorflow/core/profiler/utils:time_utils",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "op_stats_to_input_pipeline_analysis",
|
name = "op_stats_to_input_pipeline_analysis",
|
||||||
srcs = ["op_stats_to_input_pipeline_analysis.cc"],
|
srcs = ["op_stats_to_input_pipeline_analysis.cc"],
|
||||||
@ -88,6 +110,7 @@ cc_library(
|
|||||||
"@com_google_absl//absl/algorithm:container",
|
"@com_google_absl//absl/algorithm:container",
|
||||||
"@com_google_absl//absl/container:flat_hash_map",
|
"@com_google_absl//absl/container:flat_hash_map",
|
||||||
"@com_google_absl//absl/strings",
|
"@com_google_absl//absl/strings",
|
||||||
|
"@com_google_absl//absl/strings:str_format",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
|||||||
#include "absl/container/flat_hash_map.h"
|
#include "absl/container/flat_hash_map.h"
|
||||||
#include "absl/strings/match.h"
|
#include "absl/strings/match.h"
|
||||||
#include "absl/strings/str_cat.h"
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "absl/strings/str_format.h"
|
||||||
#include "absl/strings/string_view.h"
|
#include "absl/strings/string_view.h"
|
||||||
#include "tensorflow/core/lib/gtl/map_util.h"
|
#include "tensorflow/core/lib/gtl/map_util.h"
|
||||||
#include "tensorflow/core/platform/logging.h"
|
#include "tensorflow/core/platform/logging.h"
|
||||||
@ -46,6 +47,28 @@ namespace {
|
|||||||
|
|
||||||
const double kNumPsPerMs = 1000000000.0;
|
const double kNumPsPerMs = 1000000000.0;
|
||||||
|
|
||||||
|
// If the percentage of step time that is due to infeed is less than
|
||||||
|
// kModeratelyInfeedBoundThresholdInPercent, it is considered NOT
|
||||||
|
// input-bound; else if it is less than
|
||||||
|
// kHighlyInfeedBoundThresholdInPercent, it is considered MODERATELY
|
||||||
|
// input-bound; else if it is considered HIGHLY input-bound.
|
||||||
|
constexpr double kModeratelyInfeedBoundThresholdInPercent = 5;
|
||||||
|
constexpr double kHighlyInfeedBoundThresholdInPercent = 20;
|
||||||
|
// If the percentage of step time that is due to kernel launch is less than
|
||||||
|
// kModeratelyKernelLaunchBoundThresholdInPercent, it is considered NOT
|
||||||
|
// kernel-launch bound; else if it is less than
|
||||||
|
// kHighlyKernelLaunchBoundThresholdInPercent, it is considered MODERATELY
|
||||||
|
// kernel-launch bound; else if it is considered HIGHLY kernel-launch bound.
|
||||||
|
constexpr double kModeratelyKernelLaunchBoundThresholdInPercent = 3;
|
||||||
|
constexpr double kHighlyKernelLaunchBoundThresholdInPercent = 15;
|
||||||
|
// If the percentage of step time that is due to all other time is less than
|
||||||
|
// kModeratelyAllOtherBoundThresholdInPercent, it is considered NOT
|
||||||
|
// all-other bound; else if it is less than
|
||||||
|
// kHighlyAllOtherBoundThresholdInPercent, it is considered MODERATELY
|
||||||
|
// all-other bound; else if it is considered HIGHLY all-other bound.
|
||||||
|
constexpr double kModeratelyAllOtherBoundThresholdInPercent = 3;
|
||||||
|
constexpr double kHighlyAllOtherBoundThresholdInPercent = 15;
|
||||||
|
|
||||||
template <class Collection>
|
template <class Collection>
|
||||||
double GetTimeInMs(const Collection& type_ps, EventType event_type) {
|
double GetTimeInMs(const Collection& type_ps, EventType event_type) {
|
||||||
return PicosToMillis(gtl::FindWithDefault(type_ps, event_type, /*value=*/0));
|
return PicosToMillis(gtl::FindWithDefault(type_ps, event_type, /*value=*/0));
|
||||||
@ -317,6 +340,47 @@ double RatioOfHostToDeviceTimeToStepTime(
|
|||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void KernelLaunchAnalysis(double kernel_launch_percent, int* observation_index,
|
||||||
|
string* kernel_launch_classification,
|
||||||
|
string* kernel_launch_statement) {
|
||||||
|
string percent_str = absl::StrFormat("%.1lf", kernel_launch_percent);
|
||||||
|
if (kernel_launch_percent >= kHighlyKernelLaunchBoundThresholdInPercent) {
|
||||||
|
*kernel_launch_classification = "high";
|
||||||
|
*kernel_launch_statement = absl::StrCat(
|
||||||
|
"(", ++*observation_index, ") ", percent_str,
|
||||||
|
" % of the total step time sampled is spent on Kernel Launch.");
|
||||||
|
} else if (kernel_launch_percent >=
|
||||||
|
kModeratelyKernelLaunchBoundThresholdInPercent) {
|
||||||
|
*kernel_launch_classification = "moderate";
|
||||||
|
*kernel_launch_statement = absl::StrCat(
|
||||||
|
"(", ++*observation_index, ") ", percent_str,
|
||||||
|
" % of the total step time sampled is spent on Kernel Launch.");
|
||||||
|
} else {
|
||||||
|
*kernel_launch_classification = "no";
|
||||||
|
*kernel_launch_statement = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AllOtherAnalysis(double all_other_percent, int* observation_index,
|
||||||
|
string* all_other_classification,
|
||||||
|
string* all_other_statement) {
|
||||||
|
string percent_str = absl::StrFormat("%.1lf", all_other_percent);
|
||||||
|
if (all_other_percent >= kHighlyAllOtherBoundThresholdInPercent) {
|
||||||
|
*all_other_classification = "high";
|
||||||
|
*all_other_statement = absl::StrCat(
|
||||||
|
"(", ++*observation_index, ") ", percent_str,
|
||||||
|
" % of the total step time sampled is spent on All Others time.");
|
||||||
|
} else if (all_other_percent >= kModeratelyAllOtherBoundThresholdInPercent) {
|
||||||
|
*all_other_classification = "moderate";
|
||||||
|
*all_other_statement = absl::StrCat(
|
||||||
|
"(", ++*observation_index, ") ", percent_str,
|
||||||
|
" % of the total step time sampled is spent on All Others time.");
|
||||||
|
} else {
|
||||||
|
*all_other_classification = "no";
|
||||||
|
*all_other_statement = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void GenerateHostResult(const OpMetricsDb& host_tf_metrics_db,
|
void GenerateHostResult(const OpMetricsDb& host_tf_metrics_db,
|
||||||
@ -451,5 +515,104 @@ InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void InfeedAnalysis(HardwareType hardware_type, double infeed_percent,
|
||||||
|
int* observation_index, string* input_classification,
|
||||||
|
string* input_statement) {
|
||||||
|
absl::string_view non_input_time = "other time";
|
||||||
|
string infeed_percent_str = absl::StrFormat("%.1lf", infeed_percent);
|
||||||
|
if (infeed_percent >= kHighlyInfeedBoundThresholdInPercent) {
|
||||||
|
*input_classification = "host";
|
||||||
|
*input_statement = absl::StrCat(
|
||||||
|
"(", ++*observation_index, ") ",
|
||||||
|
"Your program is HIGHLY input-bound because ", infeed_percent_str,
|
||||||
|
"% of the total step time sampled is waiting for input. Therefore, "
|
||||||
|
"you should first focus on reducing the input time.");
|
||||||
|
} else if (infeed_percent >= kModeratelyInfeedBoundThresholdInPercent) {
|
||||||
|
*input_classification = "both";
|
||||||
|
*input_statement = absl::StrCat(
|
||||||
|
"(", ++*observation_index, ") ",
|
||||||
|
"Your program is MODERATELY input-bound because ", infeed_percent_str,
|
||||||
|
"% of the total step time sampled is waiting for input. Therefore, "
|
||||||
|
"you would need to reduce both the input time and ",
|
||||||
|
non_input_time, ".");
|
||||||
|
} else {
|
||||||
|
*input_classification = "device";
|
||||||
|
*input_statement = absl::StrCat(
|
||||||
|
"(", ++*observation_index, ") ",
|
||||||
|
"Your program is NOT input-bound because only ", infeed_percent_str,
|
||||||
|
"% of the total step time sampled is waiting for "
|
||||||
|
"input. Therefore, you should focus on "
|
||||||
|
"reducing ",
|
||||||
|
non_input_time, ".");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GenericBottleneck GenericOverallBottleneck(
|
||||||
|
const InputPipelineAnalysisResult& result) {
|
||||||
|
double total_step_time_ms = 0;
|
||||||
|
double total_input_ms = 0;
|
||||||
|
double total_output_ms = 0;
|
||||||
|
double total_host_compute_ms = 0;
|
||||||
|
double total_host_prepare_ms = 0;
|
||||||
|
double total_host_compile_ms = 0;
|
||||||
|
double total_device_to_device_ms = 0;
|
||||||
|
double total_unknown_ms = 0;
|
||||||
|
for (const google::protobuf::Any& step_details : result.step_details()) {
|
||||||
|
PerGenericStepDetails details;
|
||||||
|
bool success = step_details.UnpackTo(&details);
|
||||||
|
if (!success && !step_details.type_url().empty()) {
|
||||||
|
LOG(ERROR) << "Unable to unpack step_breakdown. Expected: generic"
|
||||||
|
<< std::endl;
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
total_step_time_ms += details.step_time_ms();
|
||||||
|
total_input_ms +=
|
||||||
|
details.host_wait_input_ms() + details.host_to_device_ms();
|
||||||
|
total_output_ms += details.output_ms();
|
||||||
|
total_host_prepare_ms += details.host_prepare_ms();
|
||||||
|
total_device_to_device_ms += details.device_to_device_ms();
|
||||||
|
total_host_compute_ms += details.host_compute_ms();
|
||||||
|
total_host_compile_ms += details.host_compile_ms();
|
||||||
|
total_unknown_ms += details.unknown_time_ms();
|
||||||
|
}
|
||||||
|
if (total_step_time_ms == 0) {
|
||||||
|
return {{"unknown",
|
||||||
|
"No step time measured. Therefore we cannot tell where the "
|
||||||
|
"performance bottleneck is."},
|
||||||
|
"no",
|
||||||
|
"",
|
||||||
|
"no",
|
||||||
|
""};
|
||||||
|
}
|
||||||
|
double input_percent = 100.0 * total_input_ms / total_step_time_ms;
|
||||||
|
double kernel_launch_percent =
|
||||||
|
100.0 * total_host_prepare_ms / total_step_time_ms;
|
||||||
|
double all_other_percent = 100.0 * total_unknown_ms / total_step_time_ms;
|
||||||
|
int observation_index = 0;
|
||||||
|
string input_classification;
|
||||||
|
string input_statement;
|
||||||
|
InfeedAnalysis(result.hardware_type(), input_percent, &observation_index,
|
||||||
|
&input_classification, &input_statement);
|
||||||
|
|
||||||
|
string kernel_launch_classification;
|
||||||
|
string kernel_launch_statement;
|
||||||
|
KernelLaunchAnalysis(kernel_launch_percent, &observation_index,
|
||||||
|
&kernel_launch_classification, &kernel_launch_statement);
|
||||||
|
|
||||||
|
string all_other_classification;
|
||||||
|
string all_other_statement;
|
||||||
|
AllOtherAnalysis(all_other_percent, &observation_index,
|
||||||
|
&all_other_classification, &all_other_statement);
|
||||||
|
|
||||||
|
return {{
|
||||||
|
input_classification,
|
||||||
|
input_statement,
|
||||||
|
},
|
||||||
|
kernel_launch_classification,
|
||||||
|
kernel_launch_statement,
|
||||||
|
all_other_classification,
|
||||||
|
all_other_statement};
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace profiler
|
} // namespace profiler
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
@ -25,8 +25,30 @@ limitations under the License.
|
|||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace profiler {
|
namespace profiler {
|
||||||
|
|
||||||
InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
|
// Common performance bottleneck.
|
||||||
const OpStats& op_stats, const HardwareType& hardware_type);
|
struct CommonBottleneck {
|
||||||
|
// Indicates if input is a bottleneck. Possible values: "host", "device",
|
||||||
|
// "both", or "unknown"
|
||||||
|
string input_classification;
|
||||||
|
// A human-readable description of the input bottleneck.
|
||||||
|
string input_statement;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Generic hardware bottleneck.
|
||||||
|
struct GenericBottleneck {
|
||||||
|
// Bottleneck that exists on all hardware.
|
||||||
|
CommonBottleneck common;
|
||||||
|
// Indicates if kernel launching is a bottleneck. Possible values: "no",
|
||||||
|
// "moderate", "high".
|
||||||
|
string kernel_launch_classification;
|
||||||
|
// A human-readable description of the kernel launching overhead.
|
||||||
|
string kernel_launch_statement;
|
||||||
|
// Indicates if all other is a bottleneck. Possible values: "no", "moderate",
|
||||||
|
// "high".
|
||||||
|
string all_other_classification;
|
||||||
|
// A human-readable description of the all other overhead.
|
||||||
|
string all_other_statement;
|
||||||
|
};
|
||||||
|
|
||||||
// Computes the summary of step time in milliseconds.
|
// Computes the summary of step time in milliseconds.
|
||||||
StepSummary ComputeStepTimeSummaryInMs(
|
StepSummary ComputeStepTimeSummaryInMs(
|
||||||
@ -38,6 +60,17 @@ void GenerateHostResult(const OpMetricsDb& host_tf_metrics_db,
|
|||||||
|
|
||||||
InputPipelineAnalysisRecommendation GenerateRecommendation();
|
InputPipelineAnalysisRecommendation GenerateRecommendation();
|
||||||
|
|
||||||
|
// Returns the performance bottleneck of the program executed.
|
||||||
|
GenericBottleneck GenericOverallBottleneck(
|
||||||
|
const InputPipelineAnalysisResult& result);
|
||||||
|
|
||||||
|
InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
|
||||||
|
const OpStats& op_stats, const HardwareType& hardware_type);
|
||||||
|
|
||||||
|
void InfeedAnalysis(HardwareType hardware_type, double infeed_percent,
|
||||||
|
int* observation_index, string* input_classification,
|
||||||
|
string* input_statement);
|
||||||
|
|
||||||
} // namespace profiler
|
} // namespace profiler
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
160
tensorflow/core/profiler/convert/op_stats_to_overview_page.cc
Normal file
160
tensorflow/core/profiler/convert/op_stats_to_overview_page.cc
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "google/protobuf/any.pb.h"
|
||||||
|
#include "tensorflow/core/platform/logging.h"
|
||||||
|
#include "tensorflow/core/platform/protobuf.h"
|
||||||
|
#include "tensorflow/core/platform/types.h"
|
||||||
|
#include "tensorflow/core/profiler/convert/op_metrics_to_record.h"
|
||||||
|
#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
|
||||||
|
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
|
||||||
|
#include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
|
||||||
|
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
|
||||||
|
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||||
|
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
|
||||||
|
#include "tensorflow/core/profiler/utils/math_utils.h"
|
||||||
|
#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
|
||||||
|
#include "tensorflow/core/profiler/utils/time_utils.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
namespace profiler {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
OverviewPageTip MakeOverviewPageTip(const string& text) {
|
||||||
|
OverviewPageTip tip;
|
||||||
|
tip.set_link(text);
|
||||||
|
return tip;
|
||||||
|
}
|
||||||
|
|
||||||
|
string AnchorElement(const string& url, const string& text) {
|
||||||
|
return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Makes a recommendation for looking up a document.
|
||||||
|
// doc_url is expected to be already be escaped suitably for use in an HTML
|
||||||
|
// attribute.
|
||||||
|
OverviewPageTip MakeOverviewPageTipDocLink(const string& doc_url,
|
||||||
|
const string& text) {
|
||||||
|
OverviewPageTip tip;
|
||||||
|
tip.set_link(AnchorElement(doc_url, text));
|
||||||
|
return tip;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ComputeHostTips(OverviewPageRecommendation* re) {
|
||||||
|
*re->add_host_tips() = MakeOverviewPageTip(
|
||||||
|
"input_pipeline_analyzer (especially Section 3 for the breakdown of "
|
||||||
|
"input operations on the Host)");
|
||||||
|
*re->add_host_tips() = MakeOverviewPageTip(
|
||||||
|
"trace_viewer (look at the activities on the timeline of each Host "
|
||||||
|
"Thread near the bottom of the trace view)");
|
||||||
|
}
|
||||||
|
|
||||||
|
void ComputeDeviceTips(HardwareType hardware_type,
|
||||||
|
OverviewPageRecommendation* re) {
|
||||||
|
const string& device_name = HardwareType_Name(hardware_type);
|
||||||
|
string timeline_name =
|
||||||
|
(hardware_type == tensorflow::profiler::TPU) ? "TPU core" : device_name;
|
||||||
|
*re->add_device_tips() = MakeOverviewPageTip(absl::StrCat(
|
||||||
|
"op_profile (identify the time-consuming operations executed on the ",
|
||||||
|
device_name, ")"));
|
||||||
|
*re->add_device_tips() = MakeOverviewPageTip(absl::StrCat(
|
||||||
|
"trace_viewer (look at the activities on the timeline of each ",
|
||||||
|
timeline_name, " in the trace view)"));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ComputeFaqTips(OverviewPageRecommendation* re) {
|
||||||
|
*re->add_faq_tips() = MakeOverviewPageTip("Refer to the Cloud tools FAQ");
|
||||||
|
}
|
||||||
|
|
||||||
|
void ComputeDocumentationTips(OverviewPageRecommendation* re) {
|
||||||
|
*re->add_documentation_tips() = MakeOverviewPageTipDocLink(
|
||||||
|
"https://www.tensorflow.org/versions/master/api_docs/python/tf/data/"
|
||||||
|
"Dataset",
|
||||||
|
"TensorFlow Input Pipeline API");
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void SetCommonRecommendation(const CommonBottleneck& bottleneck,
|
||||||
|
HardwareType hardware_type,
|
||||||
|
OverviewPageRecommendation* re) {
|
||||||
|
re->set_bottleneck(bottleneck.input_classification);
|
||||||
|
re->set_statement(bottleneck.input_statement);
|
||||||
|
ComputeHostTips(re);
|
||||||
|
ComputeDeviceTips(hardware_type, re);
|
||||||
|
ComputeDocumentationTips(re);
|
||||||
|
ComputeFaqTips(re);
|
||||||
|
}
|
||||||
|
|
||||||
|
OverviewPageRecommendation ComputeGenericRecommendation(
|
||||||
|
const GenericBottleneck& bottleneck) {
|
||||||
|
OverviewPageRecommendation re;
|
||||||
|
GenericRecommendation generic;
|
||||||
|
generic.set_kernel_launch_bottleneck(bottleneck.kernel_launch_classification);
|
||||||
|
generic.set_kernel_launch_statement(bottleneck.kernel_launch_statement);
|
||||||
|
generic.set_all_other_bottleneck(bottleneck.all_other_classification);
|
||||||
|
generic.set_all_other_statement(bottleneck.all_other_statement);
|
||||||
|
re.mutable_recommendation()->PackFrom(generic);
|
||||||
|
return re;
|
||||||
|
}
|
||||||
|
|
||||||
|
OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
|
||||||
|
OverviewPageAnalysis analysis;
|
||||||
|
OpMetricsDb metrics_db =
|
||||||
|
CreateTfMetricsDbFromHloMetricsDb(op_stats.device_op_metrics_db());
|
||||||
|
uint64 total_device_time_ps = metrics_db.total_time_ps();
|
||||||
|
constexpr int kNumTopOpsShown = 10;
|
||||||
|
double device_cumulative_fraction = 0.0;
|
||||||
|
for (const OpMetrics* metrics :
|
||||||
|
SortedOpMetricsDb(metrics_db, kNumTopOpsShown)) {
|
||||||
|
OverviewTfOp* op = analysis.add_top_device_ops();
|
||||||
|
op->set_name(metrics->name());
|
||||||
|
op->set_category(metrics->category());
|
||||||
|
op->set_self_time_fraction(
|
||||||
|
SafeDivide(metrics->self_time_ps(), total_device_time_ps));
|
||||||
|
device_cumulative_fraction += op->self_time_fraction();
|
||||||
|
op->set_cumulative_time_fraction(device_cumulative_fraction);
|
||||||
|
op->set_flop_rate(
|
||||||
|
SafeDivide(metrics->flops(), PicosToNanos(metrics->time_ps())));
|
||||||
|
}
|
||||||
|
return analysis;
|
||||||
|
}
|
||||||
|
|
||||||
|
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
|
||||||
|
HardwareType hardware_type) {
|
||||||
|
OverviewPageAnalysis analysis = ComputeAnalysisResult(op_stats);
|
||||||
|
InputPipelineAnalysisResult input_analysis =
|
||||||
|
ConvertOpStatsToInputPipelineAnalysis(op_stats, hardware_type);
|
||||||
|
GenericBottleneck bottleneck = GenericOverallBottleneck(input_analysis);
|
||||||
|
OverviewPageRecommendation recommendation =
|
||||||
|
ComputeGenericRecommendation(bottleneck);
|
||||||
|
SetCommonRecommendation(bottleneck.common, hardware_type, &recommendation);
|
||||||
|
|
||||||
|
OverviewPage overview_page;
|
||||||
|
*overview_page.mutable_run_environment() = op_stats.run_environment();
|
||||||
|
*overview_page.mutable_analysis() = analysis;
|
||||||
|
*overview_page.mutable_input_analysis() = input_analysis;
|
||||||
|
*overview_page.mutable_recommendation() = recommendation;
|
||||||
|
return overview_page;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace profiler
|
||||||
|
} // namespace tensorflow
|
45
tensorflow/core/profiler/convert/op_stats_to_overview_page.h
Normal file
45
tensorflow/core/profiler/convert/op_stats_to_overview_page.h
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_OVERVIEW_PAGE_H_
|
||||||
|
#define TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_OVERVIEW_PAGE_H_
|
||||||
|
|
||||||
|
#include "absl/strings/string_view.h"
|
||||||
|
#include "tensorflow/core/platform/protobuf.h"
|
||||||
|
#include "tensorflow/core/platform/types.h"
|
||||||
|
#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
|
||||||
|
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
|
||||||
|
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||||
|
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
namespace profiler {
|
||||||
|
|
||||||
|
void SetCommonRecommendation(const CommonBottleneck& bottleneck,
|
||||||
|
HardwareType hardware_type,
|
||||||
|
OverviewPageRecommendation* re);
|
||||||
|
|
||||||
|
OverviewPageRecommendation ComputeGenericRecommendation(
|
||||||
|
const GenericBottleneck& bottleneck);
|
||||||
|
|
||||||
|
OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats);
|
||||||
|
|
||||||
|
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
|
||||||
|
HardwareType hardware_type);
|
||||||
|
|
||||||
|
} // namespace profiler
|
||||||
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_OVERVIEW_PAGE_H_
|
@ -40,7 +40,10 @@ tf_proto_library(
|
|||||||
name = "overview_page_proto",
|
name = "overview_page_proto",
|
||||||
srcs = ["overview_page.proto"],
|
srcs = ["overview_page.proto"],
|
||||||
cc_api_version = 2,
|
cc_api_version = 2,
|
||||||
protodeps = [":input_pipeline_proto"],
|
protodeps = [
|
||||||
|
":input_pipeline_proto",
|
||||||
|
":op_stats_proto",
|
||||||
|
],
|
||||||
visibility = [
|
visibility = [
|
||||||
":friends",
|
":friends",
|
||||||
],
|
],
|
||||||
|
@ -54,7 +54,7 @@ message SystemTopology {
|
|||||||
int64 num_expected_reduced_chips = 4;
|
int64 num_expected_reduced_chips = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Result proto for RunEnvironment (the run environment of a profiling session).
|
// The run environment of a profiling session.
|
||||||
message RunEnvironment {
|
message RunEnvironment {
|
||||||
// Number of hosts used.
|
// Number of hosts used.
|
||||||
int32 host_count = 1;
|
int32 host_count = 1;
|
||||||
@ -71,9 +71,9 @@ message RunEnvironment {
|
|||||||
int32 device_core_count = 5;
|
int32 device_core_count = 5;
|
||||||
// The per-device-core batch size.
|
// The per-device-core batch size.
|
||||||
int32 per_core_batch_size = 6;
|
int32 per_core_batch_size = 6;
|
||||||
// Host-independent job information.
|
// Host-independent information about this job.
|
||||||
HostIndependentJobInfoResult host_independent_job_info = 7;
|
HostIndependentJobInfoResult host_independent_job_info = 7;
|
||||||
// Host-dependent job information.
|
// Host-dependent information about this job.
|
||||||
repeated HostDependentJobInfoResult host_dependent_job_info = 8;
|
repeated HostDependentJobInfoResult host_dependent_job_info = 8;
|
||||||
// The number of replicas, corresponds to input parallelism.
|
// The number of replicas, corresponds to input parallelism.
|
||||||
// If there is no model parallelism, replica_count = device_core_count
|
// If there is no model parallelism, replica_count = device_core_count
|
||||||
@ -97,4 +97,6 @@ message OpStats {
|
|||||||
PerfEnv perf_env = 3;
|
PerfEnv perf_env = 3;
|
||||||
// The database of step sequences.
|
// The database of step sequences.
|
||||||
StepDatabaseResult step_db = 4;
|
StepDatabaseResult step_db = 4;
|
||||||
|
// The run environment of this profiling session.
|
||||||
|
RunEnvironment run_environment = 5;
|
||||||
}
|
}
|
||||||
|
@ -4,59 +4,7 @@ package tensorflow.profiler;
|
|||||||
|
|
||||||
import "google/protobuf/any.proto";
|
import "google/protobuf/any.proto";
|
||||||
import "tensorflow/core/profiler/protobuf/input_pipeline.proto";
|
import "tensorflow/core/profiler/protobuf/input_pipeline.proto";
|
||||||
|
import "tensorflow/core/profiler/protobuf/op_stats.proto";
|
||||||
// Overview result for host-independent job information.
|
|
||||||
message OverviewPageHostIndependentJobInfo {
|
|
||||||
// The CL of the build.
|
|
||||||
int64 change_list = 1;
|
|
||||||
// The time of this build (nanoseconds since the Unix epoch).
|
|
||||||
int64 build_time = 2;
|
|
||||||
// The target of this build.
|
|
||||||
string build_target = 3;
|
|
||||||
// Profiling duration (in ms).
|
|
||||||
uint32 profile_duration_ms = 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Overview result for host-dependent job information.
|
|
||||||
message OverviewPageHostDependentJobInfo {
|
|
||||||
// The ID of the host where this job was run.
|
|
||||||
string host_id = 1;
|
|
||||||
// The command line for this run.
|
|
||||||
string command_line = 2;
|
|
||||||
// The start time of this run (nanoseconds since the Unix epoch).
|
|
||||||
int64 start_time = 3;
|
|
||||||
// BNS address specified by client at time of profiling request.
|
|
||||||
string bns_address = 4;
|
|
||||||
// Profiling start walltime (in ns).
|
|
||||||
uint64 profile_time_ns = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Overview result for run environment.
|
|
||||||
message OverviewPageRunEnvironment {
|
|
||||||
// Number of hosts used.
|
|
||||||
int32 host_count = 1;
|
|
||||||
// Number of tasks used.
|
|
||||||
int32 task_count = 2;
|
|
||||||
// The type of device used.
|
|
||||||
string device_type = 3;
|
|
||||||
// The number of device cores used.
|
|
||||||
// What "device core" means depends on the platform:
|
|
||||||
// For TPU, a device core is a TPU core.
|
|
||||||
// For Nvidia GPU, a device core is a GPU (not a SM).
|
|
||||||
int32 device_core_count = 4;
|
|
||||||
// The per-device-core batch size.
|
|
||||||
int32 per_core_batch_size = 5;
|
|
||||||
// Host-independent information about this job.
|
|
||||||
OverviewPageHostIndependentJobInfo host_independent_job_info = 6;
|
|
||||||
// Host-dependent information about this job.
|
|
||||||
repeated OverviewPageHostDependentJobInfo host_dependent_job_info = 7;
|
|
||||||
// The number of replicas, corresponds to input parallelism.
|
|
||||||
// If there is no model parallelism, replica_count = device_core_count
|
|
||||||
int32 replica_count = 8;
|
|
||||||
// The number of cores used for a single replica, e.g. model parallelism.
|
|
||||||
// If there is no model parallelism, then num_cores_per_replica = 1
|
|
||||||
int32 num_cores_per_replica = 9;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Overview result for a TensorFlow Op.
|
// Overview result for a TensorFlow Op.
|
||||||
message OverviewTfOp {
|
message OverviewTfOp {
|
||||||
@ -138,11 +86,12 @@ message OverviewPageRecommendation {
|
|||||||
|
|
||||||
message OverviewPage {
|
message OverviewPage {
|
||||||
// The run environment of the profiled session.
|
// The run environment of the profiled session.
|
||||||
OverviewPageRunEnvironment run_environment = 1;
|
RunEnvironment run_environment = 5;
|
||||||
// The step-time result.
|
// The step-time result.
|
||||||
InputPipelineAnalysisResult input_analysis = 2;
|
InputPipelineAnalysisResult input_analysis = 2;
|
||||||
// The other analysis result.
|
// The other analysis result.
|
||||||
OverviewPageAnalysis analysis = 3;
|
OverviewPageAnalysis analysis = 3;
|
||||||
// The recommendation made to the user.
|
// The recommendation made to the user.
|
||||||
OverviewPageRecommendation recommendation = 4;
|
OverviewPageRecommendation recommendation = 4;
|
||||||
|
reserved 1;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user