Added RunEnvironment to OpStats.
Added a converter from OpStats to OverviewPage. Added input and bottleneck analysis to InputPipelineAnalysis. PiperOrigin-RevId: 289735025 Change-Id: Ice4b2db5f241573afecce52aa882216ea16bd74c
This commit is contained in:
parent
230ebd5d96
commit
99eb226655
@ -66,6 +66,28 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "op_stats_to_overview_page",
|
||||
srcs = ["op_stats_to_overview_page.cc"],
|
||||
hdrs = ["op_stats_to_overview_page.h"],
|
||||
deps = [
|
||||
":op_metrics_to_record",
|
||||
":op_stats_to_input_pipeline_analysis",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core/platform:logging",
|
||||
"//tensorflow/core/profiler/protobuf:hardware_types_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:input_pipeline_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:math_utils",
|
||||
"//tensorflow/core/profiler/utils:op_metrics_db_utils",
|
||||
"//tensorflow/core/profiler/utils:time_utils",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "op_stats_to_input_pipeline_analysis",
|
||||
srcs = ["op_stats_to_input_pipeline_analysis.cc"],
|
||||
@ -88,6 +110,7 @@ cc_library(
|
||||
"@com_google_absl//absl/algorithm:container",
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "absl/strings/match.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "tensorflow/core/lib/gtl/map_util.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
@ -46,6 +47,28 @@ namespace {
|
||||
|
||||
const double kNumPsPerMs = 1000000000.0;
|
||||
|
||||
// If the percentage of step time that is due to infeed is less than
|
||||
// kModeratelyInfeedBoundThresholdInPercent, it is considered NOT
|
||||
// input-bound; else if it is less than
|
||||
// kHighlyInfeedBoundThresholdInPercent, it is considered MODERATELY
|
||||
// input-bound; else if it is considered HIGHLY input-bound.
|
||||
constexpr double kModeratelyInfeedBoundThresholdInPercent = 5;
|
||||
constexpr double kHighlyInfeedBoundThresholdInPercent = 20;
|
||||
// If the percentage of step time that is due to kernel launch is less than
|
||||
// kModeratelyKernelLaunchBoundThresholdInPercent, it is considered NOT
|
||||
// kernel-launch bound; else if it is less than
|
||||
// kHighlyKernelLaunchBoundThresholdInPercent, it is considered MODERATELY
|
||||
// kernel-launch bound; else if it is considered HIGHLY kernel-launch bound.
|
||||
constexpr double kModeratelyKernelLaunchBoundThresholdInPercent = 3;
|
||||
constexpr double kHighlyKernelLaunchBoundThresholdInPercent = 15;
|
||||
// If the percentage of step time that is due to all other time is less than
|
||||
// kModeratelyAllOtherBoundThresholdInPercent, it is considered NOT
|
||||
// all-other bound; else if it is less than
|
||||
// kHighlyAllOtherBoundThresholdInPercent, it is considered MODERATELY
|
||||
// all-other bound; else if it is considered HIGHLY all-other bound.
|
||||
constexpr double kModeratelyAllOtherBoundThresholdInPercent = 3;
|
||||
constexpr double kHighlyAllOtherBoundThresholdInPercent = 15;
|
||||
|
||||
template <class Collection>
|
||||
double GetTimeInMs(const Collection& type_ps, EventType event_type) {
|
||||
return PicosToMillis(gtl::FindWithDefault(type_ps, event_type, /*value=*/0));
|
||||
@ -317,6 +340,47 @@ double RatioOfHostToDeviceTimeToStepTime(
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
void KernelLaunchAnalysis(double kernel_launch_percent, int* observation_index,
|
||||
string* kernel_launch_classification,
|
||||
string* kernel_launch_statement) {
|
||||
string percent_str = absl::StrFormat("%.1lf", kernel_launch_percent);
|
||||
if (kernel_launch_percent >= kHighlyKernelLaunchBoundThresholdInPercent) {
|
||||
*kernel_launch_classification = "high";
|
||||
*kernel_launch_statement = absl::StrCat(
|
||||
"(", ++*observation_index, ") ", percent_str,
|
||||
" % of the total step time sampled is spent on Kernel Launch.");
|
||||
} else if (kernel_launch_percent >=
|
||||
kModeratelyKernelLaunchBoundThresholdInPercent) {
|
||||
*kernel_launch_classification = "moderate";
|
||||
*kernel_launch_statement = absl::StrCat(
|
||||
"(", ++*observation_index, ") ", percent_str,
|
||||
" % of the total step time sampled is spent on Kernel Launch.");
|
||||
} else {
|
||||
*kernel_launch_classification = "no";
|
||||
*kernel_launch_statement = "";
|
||||
}
|
||||
}
|
||||
|
||||
void AllOtherAnalysis(double all_other_percent, int* observation_index,
|
||||
string* all_other_classification,
|
||||
string* all_other_statement) {
|
||||
string percent_str = absl::StrFormat("%.1lf", all_other_percent);
|
||||
if (all_other_percent >= kHighlyAllOtherBoundThresholdInPercent) {
|
||||
*all_other_classification = "high";
|
||||
*all_other_statement = absl::StrCat(
|
||||
"(", ++*observation_index, ") ", percent_str,
|
||||
" % of the total step time sampled is spent on All Others time.");
|
||||
} else if (all_other_percent >= kModeratelyAllOtherBoundThresholdInPercent) {
|
||||
*all_other_classification = "moderate";
|
||||
*all_other_statement = absl::StrCat(
|
||||
"(", ++*observation_index, ") ", percent_str,
|
||||
" % of the total step time sampled is spent on All Others time.");
|
||||
} else {
|
||||
*all_other_classification = "no";
|
||||
*all_other_statement = "";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void GenerateHostResult(const OpMetricsDb& host_tf_metrics_db,
|
||||
@ -451,5 +515,104 @@ InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
|
||||
return result;
|
||||
}
|
||||
|
||||
void InfeedAnalysis(HardwareType hardware_type, double infeed_percent,
|
||||
int* observation_index, string* input_classification,
|
||||
string* input_statement) {
|
||||
absl::string_view non_input_time = "other time";
|
||||
string infeed_percent_str = absl::StrFormat("%.1lf", infeed_percent);
|
||||
if (infeed_percent >= kHighlyInfeedBoundThresholdInPercent) {
|
||||
*input_classification = "host";
|
||||
*input_statement = absl::StrCat(
|
||||
"(", ++*observation_index, ") ",
|
||||
"Your program is HIGHLY input-bound because ", infeed_percent_str,
|
||||
"% of the total step time sampled is waiting for input. Therefore, "
|
||||
"you should first focus on reducing the input time.");
|
||||
} else if (infeed_percent >= kModeratelyInfeedBoundThresholdInPercent) {
|
||||
*input_classification = "both";
|
||||
*input_statement = absl::StrCat(
|
||||
"(", ++*observation_index, ") ",
|
||||
"Your program is MODERATELY input-bound because ", infeed_percent_str,
|
||||
"% of the total step time sampled is waiting for input. Therefore, "
|
||||
"you would need to reduce both the input time and ",
|
||||
non_input_time, ".");
|
||||
} else {
|
||||
*input_classification = "device";
|
||||
*input_statement = absl::StrCat(
|
||||
"(", ++*observation_index, ") ",
|
||||
"Your program is NOT input-bound because only ", infeed_percent_str,
|
||||
"% of the total step time sampled is waiting for "
|
||||
"input. Therefore, you should focus on "
|
||||
"reducing ",
|
||||
non_input_time, ".");
|
||||
}
|
||||
}
|
||||
|
||||
GenericBottleneck GenericOverallBottleneck(
|
||||
const InputPipelineAnalysisResult& result) {
|
||||
double total_step_time_ms = 0;
|
||||
double total_input_ms = 0;
|
||||
double total_output_ms = 0;
|
||||
double total_host_compute_ms = 0;
|
||||
double total_host_prepare_ms = 0;
|
||||
double total_host_compile_ms = 0;
|
||||
double total_device_to_device_ms = 0;
|
||||
double total_unknown_ms = 0;
|
||||
for (const google::protobuf::Any& step_details : result.step_details()) {
|
||||
PerGenericStepDetails details;
|
||||
bool success = step_details.UnpackTo(&details);
|
||||
if (!success && !step_details.type_url().empty()) {
|
||||
LOG(ERROR) << "Unable to unpack step_breakdown. Expected: generic"
|
||||
<< std::endl;
|
||||
return {};
|
||||
}
|
||||
total_step_time_ms += details.step_time_ms();
|
||||
total_input_ms +=
|
||||
details.host_wait_input_ms() + details.host_to_device_ms();
|
||||
total_output_ms += details.output_ms();
|
||||
total_host_prepare_ms += details.host_prepare_ms();
|
||||
total_device_to_device_ms += details.device_to_device_ms();
|
||||
total_host_compute_ms += details.host_compute_ms();
|
||||
total_host_compile_ms += details.host_compile_ms();
|
||||
total_unknown_ms += details.unknown_time_ms();
|
||||
}
|
||||
if (total_step_time_ms == 0) {
|
||||
return {{"unknown",
|
||||
"No step time measured. Therefore we cannot tell where the "
|
||||
"performance bottleneck is."},
|
||||
"no",
|
||||
"",
|
||||
"no",
|
||||
""};
|
||||
}
|
||||
double input_percent = 100.0 * total_input_ms / total_step_time_ms;
|
||||
double kernel_launch_percent =
|
||||
100.0 * total_host_prepare_ms / total_step_time_ms;
|
||||
double all_other_percent = 100.0 * total_unknown_ms / total_step_time_ms;
|
||||
int observation_index = 0;
|
||||
string input_classification;
|
||||
string input_statement;
|
||||
InfeedAnalysis(result.hardware_type(), input_percent, &observation_index,
|
||||
&input_classification, &input_statement);
|
||||
|
||||
string kernel_launch_classification;
|
||||
string kernel_launch_statement;
|
||||
KernelLaunchAnalysis(kernel_launch_percent, &observation_index,
|
||||
&kernel_launch_classification, &kernel_launch_statement);
|
||||
|
||||
string all_other_classification;
|
||||
string all_other_statement;
|
||||
AllOtherAnalysis(all_other_percent, &observation_index,
|
||||
&all_other_classification, &all_other_statement);
|
||||
|
||||
return {{
|
||||
input_classification,
|
||||
input_statement,
|
||||
},
|
||||
kernel_launch_classification,
|
||||
kernel_launch_statement,
|
||||
all_other_classification,
|
||||
all_other_statement};
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
@ -25,8 +25,30 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
|
||||
const OpStats& op_stats, const HardwareType& hardware_type);
|
||||
// Common performance bottleneck.
|
||||
struct CommonBottleneck {
|
||||
// Indicates if input is a bottleneck. Possible values: "host", "device",
|
||||
// "both", or "unknown"
|
||||
string input_classification;
|
||||
// A human-readable description of the input bottleneck.
|
||||
string input_statement;
|
||||
};
|
||||
|
||||
// Generic hardware bottleneck.
|
||||
struct GenericBottleneck {
|
||||
// Bottleneck that exists on all hardware.
|
||||
CommonBottleneck common;
|
||||
// Indicates if kernel launching is a bottleneck. Possible values: "no",
|
||||
// "moderate", "high".
|
||||
string kernel_launch_classification;
|
||||
// A human-readable description of the kernel launching overhead.
|
||||
string kernel_launch_statement;
|
||||
// Indicates if all other is a bottleneck. Possible values: "no", "moderate",
|
||||
// "high".
|
||||
string all_other_classification;
|
||||
// A human-readable description of the all other overhead.
|
||||
string all_other_statement;
|
||||
};
|
||||
|
||||
// Computes the summary of step time in milliseconds.
|
||||
StepSummary ComputeStepTimeSummaryInMs(
|
||||
@ -38,6 +60,17 @@ void GenerateHostResult(const OpMetricsDb& host_tf_metrics_db,
|
||||
|
||||
InputPipelineAnalysisRecommendation GenerateRecommendation();
|
||||
|
||||
// Returns the performance bottleneck of the program executed.
|
||||
GenericBottleneck GenericOverallBottleneck(
|
||||
const InputPipelineAnalysisResult& result);
|
||||
|
||||
InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
|
||||
const OpStats& op_stats, const HardwareType& hardware_type);
|
||||
|
||||
void InfeedAnalysis(HardwareType hardware_type, double infeed_percent,
|
||||
int* observation_index, string* input_classification,
|
||||
string* input_statement);
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
||||
|
160
tensorflow/core/profiler/convert/op_stats_to_overview_page.cc
Normal file
160
tensorflow/core/profiler/convert/op_stats_to_overview_page.cc
Normal file
@ -0,0 +1,160 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "google/protobuf/any.pb.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/convert/op_metrics_to_record.h"
|
||||
#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
|
||||
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/math_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/time_utils.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
namespace {
|
||||
|
||||
OverviewPageTip MakeOverviewPageTip(const string& text) {
|
||||
OverviewPageTip tip;
|
||||
tip.set_link(text);
|
||||
return tip;
|
||||
}
|
||||
|
||||
string AnchorElement(const string& url, const string& text) {
|
||||
return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
|
||||
}
|
||||
|
||||
// Makes a recommendation for looking up a document.
|
||||
// doc_url is expected to be already be escaped suitably for use in an HTML
|
||||
// attribute.
|
||||
OverviewPageTip MakeOverviewPageTipDocLink(const string& doc_url,
|
||||
const string& text) {
|
||||
OverviewPageTip tip;
|
||||
tip.set_link(AnchorElement(doc_url, text));
|
||||
return tip;
|
||||
}
|
||||
|
||||
void ComputeHostTips(OverviewPageRecommendation* re) {
|
||||
*re->add_host_tips() = MakeOverviewPageTip(
|
||||
"input_pipeline_analyzer (especially Section 3 for the breakdown of "
|
||||
"input operations on the Host)");
|
||||
*re->add_host_tips() = MakeOverviewPageTip(
|
||||
"trace_viewer (look at the activities on the timeline of each Host "
|
||||
"Thread near the bottom of the trace view)");
|
||||
}
|
||||
|
||||
void ComputeDeviceTips(HardwareType hardware_type,
|
||||
OverviewPageRecommendation* re) {
|
||||
const string& device_name = HardwareType_Name(hardware_type);
|
||||
string timeline_name =
|
||||
(hardware_type == tensorflow::profiler::TPU) ? "TPU core" : device_name;
|
||||
*re->add_device_tips() = MakeOverviewPageTip(absl::StrCat(
|
||||
"op_profile (identify the time-consuming operations executed on the ",
|
||||
device_name, ")"));
|
||||
*re->add_device_tips() = MakeOverviewPageTip(absl::StrCat(
|
||||
"trace_viewer (look at the activities on the timeline of each ",
|
||||
timeline_name, " in the trace view)"));
|
||||
}
|
||||
|
||||
void ComputeFaqTips(OverviewPageRecommendation* re) {
|
||||
*re->add_faq_tips() = MakeOverviewPageTip("Refer to the Cloud tools FAQ");
|
||||
}
|
||||
|
||||
void ComputeDocumentationTips(OverviewPageRecommendation* re) {
|
||||
*re->add_documentation_tips() = MakeOverviewPageTipDocLink(
|
||||
"https://www.tensorflow.org/versions/master/api_docs/python/tf/data/"
|
||||
"Dataset",
|
||||
"TensorFlow Input Pipeline API");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void SetCommonRecommendation(const CommonBottleneck& bottleneck,
|
||||
HardwareType hardware_type,
|
||||
OverviewPageRecommendation* re) {
|
||||
re->set_bottleneck(bottleneck.input_classification);
|
||||
re->set_statement(bottleneck.input_statement);
|
||||
ComputeHostTips(re);
|
||||
ComputeDeviceTips(hardware_type, re);
|
||||
ComputeDocumentationTips(re);
|
||||
ComputeFaqTips(re);
|
||||
}
|
||||
|
||||
OverviewPageRecommendation ComputeGenericRecommendation(
|
||||
const GenericBottleneck& bottleneck) {
|
||||
OverviewPageRecommendation re;
|
||||
GenericRecommendation generic;
|
||||
generic.set_kernel_launch_bottleneck(bottleneck.kernel_launch_classification);
|
||||
generic.set_kernel_launch_statement(bottleneck.kernel_launch_statement);
|
||||
generic.set_all_other_bottleneck(bottleneck.all_other_classification);
|
||||
generic.set_all_other_statement(bottleneck.all_other_statement);
|
||||
re.mutable_recommendation()->PackFrom(generic);
|
||||
return re;
|
||||
}
|
||||
|
||||
OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
|
||||
OverviewPageAnalysis analysis;
|
||||
OpMetricsDb metrics_db =
|
||||
CreateTfMetricsDbFromHloMetricsDb(op_stats.device_op_metrics_db());
|
||||
uint64 total_device_time_ps = metrics_db.total_time_ps();
|
||||
constexpr int kNumTopOpsShown = 10;
|
||||
double device_cumulative_fraction = 0.0;
|
||||
for (const OpMetrics* metrics :
|
||||
SortedOpMetricsDb(metrics_db, kNumTopOpsShown)) {
|
||||
OverviewTfOp* op = analysis.add_top_device_ops();
|
||||
op->set_name(metrics->name());
|
||||
op->set_category(metrics->category());
|
||||
op->set_self_time_fraction(
|
||||
SafeDivide(metrics->self_time_ps(), total_device_time_ps));
|
||||
device_cumulative_fraction += op->self_time_fraction();
|
||||
op->set_cumulative_time_fraction(device_cumulative_fraction);
|
||||
op->set_flop_rate(
|
||||
SafeDivide(metrics->flops(), PicosToNanos(metrics->time_ps())));
|
||||
}
|
||||
return analysis;
|
||||
}
|
||||
|
||||
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
|
||||
HardwareType hardware_type) {
|
||||
OverviewPageAnalysis analysis = ComputeAnalysisResult(op_stats);
|
||||
InputPipelineAnalysisResult input_analysis =
|
||||
ConvertOpStatsToInputPipelineAnalysis(op_stats, hardware_type);
|
||||
GenericBottleneck bottleneck = GenericOverallBottleneck(input_analysis);
|
||||
OverviewPageRecommendation recommendation =
|
||||
ComputeGenericRecommendation(bottleneck);
|
||||
SetCommonRecommendation(bottleneck.common, hardware_type, &recommendation);
|
||||
|
||||
OverviewPage overview_page;
|
||||
*overview_page.mutable_run_environment() = op_stats.run_environment();
|
||||
*overview_page.mutable_analysis() = analysis;
|
||||
*overview_page.mutable_input_analysis() = input_analysis;
|
||||
*overview_page.mutable_recommendation() = recommendation;
|
||||
return overview_page;
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
45
tensorflow/core/profiler/convert/op_stats_to_overview_page.h
Normal file
45
tensorflow/core/profiler/convert/op_stats_to_overview_page.h
Normal file
@ -0,0 +1,45 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_OVERVIEW_PAGE_H_
|
||||
#define TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_OVERVIEW_PAGE_H_
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
|
||||
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
void SetCommonRecommendation(const CommonBottleneck& bottleneck,
|
||||
HardwareType hardware_type,
|
||||
OverviewPageRecommendation* re);
|
||||
|
||||
OverviewPageRecommendation ComputeGenericRecommendation(
|
||||
const GenericBottleneck& bottleneck);
|
||||
|
||||
OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats);
|
||||
|
||||
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
|
||||
HardwareType hardware_type);
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_OVERVIEW_PAGE_H_
|
@ -40,7 +40,10 @@ tf_proto_library(
|
||||
name = "overview_page_proto",
|
||||
srcs = ["overview_page.proto"],
|
||||
cc_api_version = 2,
|
||||
protodeps = [":input_pipeline_proto"],
|
||||
protodeps = [
|
||||
":input_pipeline_proto",
|
||||
":op_stats_proto",
|
||||
],
|
||||
visibility = [
|
||||
":friends",
|
||||
],
|
||||
|
@ -54,7 +54,7 @@ message SystemTopology {
|
||||
int64 num_expected_reduced_chips = 4;
|
||||
}
|
||||
|
||||
// Result proto for RunEnvironment (the run environment of a profiling session).
|
||||
// The run environment of a profiling session.
|
||||
message RunEnvironment {
|
||||
// Number of hosts used.
|
||||
int32 host_count = 1;
|
||||
@ -71,9 +71,9 @@ message RunEnvironment {
|
||||
int32 device_core_count = 5;
|
||||
// The per-device-core batch size.
|
||||
int32 per_core_batch_size = 6;
|
||||
// Host-independent job information.
|
||||
// Host-independent information about this job.
|
||||
HostIndependentJobInfoResult host_independent_job_info = 7;
|
||||
// Host-dependent job information.
|
||||
// Host-dependent information about this job.
|
||||
repeated HostDependentJobInfoResult host_dependent_job_info = 8;
|
||||
// The number of replicas, corresponds to input parallelism.
|
||||
// If there is no model parallelism, replica_count = device_core_count
|
||||
@ -97,4 +97,6 @@ message OpStats {
|
||||
PerfEnv perf_env = 3;
|
||||
// The database of step sequences.
|
||||
StepDatabaseResult step_db = 4;
|
||||
// The run environment of this profiling session.
|
||||
RunEnvironment run_environment = 5;
|
||||
}
|
||||
|
@ -4,59 +4,7 @@ package tensorflow.profiler;
|
||||
|
||||
import "google/protobuf/any.proto";
|
||||
import "tensorflow/core/profiler/protobuf/input_pipeline.proto";
|
||||
|
||||
// Overview result for host-independent job information.
|
||||
message OverviewPageHostIndependentJobInfo {
|
||||
// The CL of the build.
|
||||
int64 change_list = 1;
|
||||
// The time of this build (nanoseconds since the Unix epoch).
|
||||
int64 build_time = 2;
|
||||
// The target of this build.
|
||||
string build_target = 3;
|
||||
// Profiling duration (in ms).
|
||||
uint32 profile_duration_ms = 4;
|
||||
}
|
||||
|
||||
// Overview result for host-dependent job information.
|
||||
message OverviewPageHostDependentJobInfo {
|
||||
// The ID of the host where this job was run.
|
||||
string host_id = 1;
|
||||
// The command line for this run.
|
||||
string command_line = 2;
|
||||
// The start time of this run (nanoseconds since the Unix epoch).
|
||||
int64 start_time = 3;
|
||||
// BNS address specified by client at time of profiling request.
|
||||
string bns_address = 4;
|
||||
// Profiling start walltime (in ns).
|
||||
uint64 profile_time_ns = 5;
|
||||
}
|
||||
|
||||
// Overview result for run environment.
|
||||
message OverviewPageRunEnvironment {
|
||||
// Number of hosts used.
|
||||
int32 host_count = 1;
|
||||
// Number of tasks used.
|
||||
int32 task_count = 2;
|
||||
// The type of device used.
|
||||
string device_type = 3;
|
||||
// The number of device cores used.
|
||||
// What "device core" means depends on the platform:
|
||||
// For TPU, a device core is a TPU core.
|
||||
// For Nvidia GPU, a device core is a GPU (not a SM).
|
||||
int32 device_core_count = 4;
|
||||
// The per-device-core batch size.
|
||||
int32 per_core_batch_size = 5;
|
||||
// Host-independent information about this job.
|
||||
OverviewPageHostIndependentJobInfo host_independent_job_info = 6;
|
||||
// Host-dependent information about this job.
|
||||
repeated OverviewPageHostDependentJobInfo host_dependent_job_info = 7;
|
||||
// The number of replicas, corresponds to input parallelism.
|
||||
// If there is no model parallelism, replica_count = device_core_count
|
||||
int32 replica_count = 8;
|
||||
// The number of cores used for a single replica, e.g. model parallelism.
|
||||
// If there is no model parallelism, then num_cores_per_replica = 1
|
||||
int32 num_cores_per_replica = 9;
|
||||
}
|
||||
import "tensorflow/core/profiler/protobuf/op_stats.proto";
|
||||
|
||||
// Overview result for a TensorFlow Op.
|
||||
message OverviewTfOp {
|
||||
@ -138,11 +86,12 @@ message OverviewPageRecommendation {
|
||||
|
||||
message OverviewPage {
|
||||
// The run environment of the profiled session.
|
||||
OverviewPageRunEnvironment run_environment = 1;
|
||||
RunEnvironment run_environment = 5;
|
||||
// The step-time result.
|
||||
InputPipelineAnalysisResult input_analysis = 2;
|
||||
// The other analysis result.
|
||||
OverviewPageAnalysis analysis = 3;
|
||||
// The recommendation made to the user.
|
||||
OverviewPageRecommendation recommendation = 4;
|
||||
reserved 1;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user