Add suggestion and summary to CombinedTfDataStats.
PiperOrigin-RevId: 337966727 Change-Id: I7a11c83f87454551d9c33383193c3d463409ae26
This commit is contained in:
parent
73b709743a
commit
9a36023aa3
tensorflow/core/profiler
@ -714,6 +714,7 @@ cc_library(
|
|||||||
"//tensorflow/core/profiler/protobuf:tf_data_stats_proto_cc",
|
"//tensorflow/core/profiler/protobuf:tf_data_stats_proto_cc",
|
||||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||||
"//tensorflow/core/profiler/utils:group_events",
|
"//tensorflow/core/profiler/utils:group_events",
|
||||||
|
"//tensorflow/core/profiler/utils:html_utils",
|
||||||
"//tensorflow/core/profiler/utils:tf_op_utils",
|
"//tensorflow/core/profiler/utils:tf_op_utils",
|
||||||
"//tensorflow/core/profiler/utils:tf_xplane_visitor",
|
"//tensorflow/core/profiler/utils:tf_xplane_visitor",
|
||||||
"//tensorflow/core/profiler/utils:timespan",
|
"//tensorflow/core/profiler/utils:timespan",
|
||||||
@ -722,6 +723,7 @@ cc_library(
|
|||||||
"@com_google_absl//absl/container:flat_hash_map",
|
"@com_google_absl//absl/container:flat_hash_map",
|
||||||
"@com_google_absl//absl/container:flat_hash_set",
|
"@com_google_absl//absl/container:flat_hash_set",
|
||||||
"@com_google_absl//absl/strings",
|
"@com_google_absl//absl/strings",
|
||||||
|
"@com_google_absl//absl/strings:str_format",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -17,12 +17,14 @@ limitations under the License.
|
|||||||
|
|
||||||
#include "absl/container/flat_hash_map.h"
|
#include "absl/container/flat_hash_map.h"
|
||||||
#include "absl/container/flat_hash_set.h"
|
#include "absl/container/flat_hash_set.h"
|
||||||
|
#include "absl/strings/str_format.h"
|
||||||
#include "absl/strings/str_split.h"
|
#include "absl/strings/str_split.h"
|
||||||
#include "absl/strings/string_view.h"
|
#include "absl/strings/string_view.h"
|
||||||
#include "tensorflow/core/lib/gtl/map_util.h"
|
#include "tensorflow/core/lib/gtl/map_util.h"
|
||||||
#include "tensorflow/core/platform/protobuf.h"
|
#include "tensorflow/core/platform/protobuf.h"
|
||||||
#include "tensorflow/core/profiler/protobuf/tf_data_stats.pb.h"
|
#include "tensorflow/core/profiler/protobuf/tf_data_stats.pb.h"
|
||||||
#include "tensorflow/core/profiler/utils/group_events.h"
|
#include "tensorflow/core/profiler/utils/group_events.h"
|
||||||
|
#include "tensorflow/core/profiler/utils/html_utils.h"
|
||||||
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
|
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
|
||||||
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
|
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
|
||||||
#include "tensorflow/core/profiler/utils/timespan.h"
|
#include "tensorflow/core/profiler/utils/timespan.h"
|
||||||
@ -274,8 +276,150 @@ void SetBottleneckAnalysis(absl::string_view host_name,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string GetSuggestion(BottleneckType type) {
|
||||||
|
constexpr absl::string_view kPlaybookLink =
|
||||||
|
"https://www.tensorflow.org/guide/data_performance_analysis";
|
||||||
|
constexpr absl::string_view kPlaybookSourceDatasetLink =
|
||||||
|
"https://www.tensorflow.org/guide/"
|
||||||
|
"data_performance_analysis#source_datasets";
|
||||||
|
constexpr absl::string_view kPlaybookCpuUtilizationLink =
|
||||||
|
"https://www.tensorflow.org/guide/"
|
||||||
|
"data_performance_analysis#3_are_you_reaching_high_cpu_utilization";
|
||||||
|
constexpr absl::string_view kPlaybookTransformationLink =
|
||||||
|
"https://www.tensorflow.org/guide/"
|
||||||
|
"data_performance_analysis#transformation_datasets";
|
||||||
|
constexpr absl::string_view kTfGuideParallelDataExtractionLink =
|
||||||
|
"https://www.tensorflow.org/guide/"
|
||||||
|
"data_performance#parallelizing_data_extraction";
|
||||||
|
constexpr absl::string_view kTfGuideParallelTransformationLink =
|
||||||
|
"https://www.tensorflow.org/guide/"
|
||||||
|
"data_performance#parallelizing_data_transformation";
|
||||||
|
constexpr absl::string_view kTfGuideCacheLink =
|
||||||
|
"https://www.tensorflow.org/guide/data_performance#caching";
|
||||||
|
switch (type) {
|
||||||
|
case BottleneckType::kSlowSource:
|
||||||
|
return absl::StrFormat(
|
||||||
|
"1. Check the locality of a host and input data. Ideally, they "
|
||||||
|
"should be in the same cell (or very close, like the same "
|
||||||
|
"region).<br/>"
|
||||||
|
"2. Parallelize reading from this dataset source. See %s and %s for "
|
||||||
|
"more details.<br/>",
|
||||||
|
AnchorElement(kPlaybookSourceDatasetLink, "here"),
|
||||||
|
AnchorElement(kTfGuideParallelDataExtractionLink, "here"));
|
||||||
|
case BottleneckType::kSlowRemoteSource:
|
||||||
|
return absl::StrFormat(
|
||||||
|
"1. The remote data source is slow. Profile its host to analyze the "
|
||||||
|
"issue further.<br/>"
|
||||||
|
"2. See %s for other suggestions.",
|
||||||
|
AnchorElement(kPlaybookLink, "this"));
|
||||||
|
case BottleneckType::kSlowTransformationWithParallelVersion:
|
||||||
|
return absl::StrFormat(
|
||||||
|
"1. Parallelize this transformation by setting "
|
||||||
|
"<code>num_parallel_calls=tf.data.experimental.AUTOTUNE</code>. See "
|
||||||
|
"%s for more details.<br/>"
|
||||||
|
"2. Consider adding <code>cache</code> after this transformation if "
|
||||||
|
"your data fits into memory and it is appropriate (e.g., there is no "
|
||||||
|
"randomness in upstream transformations like <code>shuffle</code>). "
|
||||||
|
"See %s for more details.<br/>"
|
||||||
|
"3. Find more resources %s.",
|
||||||
|
AnchorElement(kTfGuideParallelTransformationLink, "this"),
|
||||||
|
AnchorElement(kTfGuideCacheLink, "this"),
|
||||||
|
AnchorElement(kPlaybookTransformationLink, "here"));
|
||||||
|
case BottleneckType::kSlowTransformationWithoutParallelVersion:
|
||||||
|
return absl::StrFormat(
|
||||||
|
"1. This transformation is inherently sequential. Add outer "
|
||||||
|
"parallelism by running multiple copies of the input pipeline over "
|
||||||
|
"sharded inputs and combining the results. See %s for more "
|
||||||
|
"details.<br/>"
|
||||||
|
"2. Consider adding <code>cache</code> after this transformation if "
|
||||||
|
"your data fits into memory and it is appropriate (e.g., there is no "
|
||||||
|
"randomness in upstream transformations like <code>shuffle</code>). "
|
||||||
|
"See %s for more details.<br/>"
|
||||||
|
"3. Find more resources %s.",
|
||||||
|
AnchorElement(kPlaybookTransformationLink, "this"),
|
||||||
|
AnchorElement(kTfGuideCacheLink, "this"),
|
||||||
|
AnchorElement(kPlaybookCpuUtilizationLink, "here"));
|
||||||
|
default:
|
||||||
|
return absl::StrFormat("See %s for suggestions.",
|
||||||
|
AnchorElement(kPlaybookLink, "this"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetSuggestion(TfDataBottleneckAnalysis* bottleneck_analysis) {
|
||||||
|
if (bottleneck_analysis->max_latency_ps() <= kSlowCallThresholdPs) return;
|
||||||
|
bottleneck_analysis->set_suggestion(
|
||||||
|
GetSuggestion(GetBottleneckType(bottleneck_analysis->iterator_name())));
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetSummary(CombinedTfDataStats* combined_tf_data_stats) {
|
||||||
|
int64 max_latency_ps =
|
||||||
|
combined_tf_data_stats->bottleneck_analysis().max_latency_ps();
|
||||||
|
if (max_latency_ps > kSlowCallThresholdPs) {
|
||||||
|
combined_tf_data_stats->set_is_input_bound(true);
|
||||||
|
combined_tf_data_stats->set_summary(
|
||||||
|
"Your profile has a tf.data input pipeline slower than 50 us. Below "
|
||||||
|
"shows a bottleneck in the slow input pipeline and a suggestion on how "
|
||||||
|
"to fix it.");
|
||||||
|
} else if (max_latency_ps > 0) {
|
||||||
|
combined_tf_data_stats->set_is_input_bound(false);
|
||||||
|
combined_tf_data_stats->set_summary(
|
||||||
|
"Your profile does not have any tf.data input pipeline slower than 50 "
|
||||||
|
"us. Your job could be still input bound if this profile didn't "
|
||||||
|
"capture all workers.");
|
||||||
|
} else {
|
||||||
|
combined_tf_data_stats->set_is_input_bound(false);
|
||||||
|
combined_tf_data_stats->set_summary(
|
||||||
|
"No tf.data activitiy captured in your profile. If your job uses "
|
||||||
|
"tf.data, try to capture a longer profile.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
BottleneckType GetBottleneckType(absl::string_view bottleneck_iterator_name) {
|
||||||
|
static auto* kBottleneckTypeMap = new absl::flat_hash_map<absl::string_view,
|
||||||
|
BottleneckType>(
|
||||||
|
{// Read from storage.
|
||||||
|
{"TFRecord", BottleneckType::kSlowSource},
|
||||||
|
{"SSTable", BottleneckType::kSlowSource},
|
||||||
|
{"RecordIO", BottleneckType::kSlowSource},
|
||||||
|
{"Spanner", BottleneckType::kSlowSource},
|
||||||
|
{"TFColumn", BottleneckType::kSlowSource},
|
||||||
|
{"SleepwalkRemoteDataset", BottleneckType::kSlowSource},
|
||||||
|
{"TextLine", BottleneckType::kSlowSource},
|
||||||
|
{"StitchedTimelineDataset", BottleneckType::kSlowSource},
|
||||||
|
{"DateKeyDataset", BottleneckType::kSlowSource},
|
||||||
|
{"CapacitorProto", BottleneckType::kSlowSource},
|
||||||
|
{"LMDB", BottleneckType::kSlowSource},
|
||||||
|
{"ExternalDataset", BottleneckType::kSlowSource},
|
||||||
|
{"PearModel", BottleneckType::kSlowSource},
|
||||||
|
{"FixedLengthRecordV2", BottleneckType::kSlowSource},
|
||||||
|
// Read from local memory.
|
||||||
|
{"FromTensor", BottleneckType::kSlowSource},
|
||||||
|
{"TensorSlice", BottleneckType::kSlowSource},
|
||||||
|
{"Generator", BottleneckType::kSlowSource},
|
||||||
|
{"SyntheticDatasetOp", BottleneckType::kSlowSource},
|
||||||
|
// Read from remote memory.
|
||||||
|
{"GuzzlerDataGuzzlerRemoteDataset", BottleneckType::kSlowRemoteSource},
|
||||||
|
{"ReverbDataset", BottleneckType::kSlowRemoteSource},
|
||||||
|
{"DatasetService", BottleneckType::kSlowRemoteSource},
|
||||||
|
{"DatasetSampleGame", BottleneckType::kSlowRemoteSource},
|
||||||
|
{"Courier", BottleneckType::kSlowRemoteSource},
|
||||||
|
{"ReverbEpisodeDataset", BottleneckType::kSlowRemoteSource},
|
||||||
|
// Transformations with parallel version.
|
||||||
|
{"Map", BottleneckType::kSlowTransformationWithParallelVersion},
|
||||||
|
{"Interleave", BottleneckType::kSlowTransformationWithParallelVersion},
|
||||||
|
// Transformations without parallel version.
|
||||||
|
{"Filter", BottleneckType::kSlowTransformationWithoutParallelVersion},
|
||||||
|
{"Batch", BottleneckType::kSlowTransformationWithoutParallelVersion},
|
||||||
|
{"Unbatch", BottleneckType::kSlowTransformationWithoutParallelVersion}});
|
||||||
|
if (auto type =
|
||||||
|
gtl::FindOrNull(*kBottleneckTypeMap, bottleneck_iterator_name)) {
|
||||||
|
return *type;
|
||||||
|
}
|
||||||
|
return BottleneckType::kOther;
|
||||||
|
}
|
||||||
|
|
||||||
void CombinedTfDataStatsBuilder::Add(absl::string_view host_name,
|
void CombinedTfDataStatsBuilder::Add(absl::string_view host_name,
|
||||||
XPlane* host_plane) {
|
XPlane* host_plane) {
|
||||||
TfDataStats& tf_data_stats =
|
TfDataStats& tf_data_stats =
|
||||||
@ -302,6 +446,8 @@ void CombinedTfDataStatsBuilder::Finalize() {
|
|||||||
host_name_and_tf_data_stats.second,
|
host_name_and_tf_data_stats.second,
|
||||||
bottleneck_analysis);
|
bottleneck_analysis);
|
||||||
}
|
}
|
||||||
|
if (generate_suggestion_) SetSuggestion(bottleneck_analysis);
|
||||||
|
SetSummary(combined_tf_data_stats_);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace profiler
|
} // namespace profiler
|
||||||
|
@ -27,11 +27,23 @@ namespace profiler {
|
|||||||
|
|
||||||
TF_CONST_INIT extern const int64 kSlowCallThresholdPs;
|
TF_CONST_INIT extern const int64 kSlowCallThresholdPs;
|
||||||
|
|
||||||
|
enum class BottleneckType {
|
||||||
|
kSlowSource,
|
||||||
|
kSlowRemoteSource,
|
||||||
|
kSlowTransformationWithParallelVersion,
|
||||||
|
kSlowTransformationWithoutParallelVersion,
|
||||||
|
kOther,
|
||||||
|
};
|
||||||
|
|
||||||
|
BottleneckType GetBottleneckType(absl::string_view bottleneck_iterator_name);
|
||||||
|
|
||||||
class CombinedTfDataStatsBuilder {
|
class CombinedTfDataStatsBuilder {
|
||||||
public:
|
public:
|
||||||
explicit CombinedTfDataStatsBuilder(
|
explicit CombinedTfDataStatsBuilder(
|
||||||
CombinedTfDataStats* combined_tf_data_stats)
|
CombinedTfDataStats* combined_tf_data_stats,
|
||||||
: combined_tf_data_stats_(combined_tf_data_stats) {}
|
bool generate_suggestion = true)
|
||||||
|
: combined_tf_data_stats_(combined_tf_data_stats),
|
||||||
|
generate_suggestion_(generate_suggestion) {}
|
||||||
|
|
||||||
void Add(absl::string_view host_name, XPlane* host_plane);
|
void Add(absl::string_view host_name, XPlane* host_plane);
|
||||||
|
|
||||||
@ -40,6 +52,7 @@ class CombinedTfDataStatsBuilder {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
CombinedTfDataStats* combined_tf_data_stats_;
|
CombinedTfDataStats* combined_tf_data_stats_;
|
||||||
|
bool generate_suggestion_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace profiler
|
} // namespace profiler
|
||||||
|
@ -77,99 +77,103 @@ TEST(XPlaneToTfDataStatsTest, HostInputPipeline) {
|
|||||||
CombinedTfDataStatsBuilder builder(&combined_tf_data_stats);
|
CombinedTfDataStatsBuilder builder(&combined_tf_data_stats);
|
||||||
builder.Add("host1", &host_plane);
|
builder.Add("host1", &host_plane);
|
||||||
builder.Finalize();
|
builder.Finalize();
|
||||||
EXPECT_THAT(combined_tf_data_stats, EqualsProto(R"pb(
|
EXPECT_THAT(
|
||||||
bottleneck_analysis: {
|
combined_tf_data_stats, EqualsProto(R"pb(
|
||||||
host: "host1"
|
bottleneck_analysis: {
|
||||||
input_pipeline: "Host:0"
|
host: "host1"
|
||||||
max_latency_ps: 100000000
|
input_pipeline: "Host:0"
|
||||||
iterator_name: "Range"
|
max_latency_ps: 100000000
|
||||||
iterator_long_name: "Iterator::Prefetch::Range"
|
iterator_name: "Range"
|
||||||
}
|
iterator_long_name: "Iterator::Prefetch::Range"
|
||||||
tf_data_stats: {
|
suggestion: "See <a href=\"https://www.tensorflow.org/guide/data_performance_analysis\" target=\"_blank\">this</a> for suggestions."
|
||||||
key: "host1"
|
}
|
||||||
value: {
|
tf_data_stats: {
|
||||||
iterator_metadata: {
|
key: "host1"
|
||||||
key: 123,
|
value: {
|
||||||
value: {
|
iterator_metadata: {
|
||||||
id: 123
|
key: 123,
|
||||||
name: "Prefetch"
|
value: {
|
||||||
long_name: "Iterator::Prefetch"
|
id: 123
|
||||||
is_async: true
|
name: "Prefetch"
|
||||||
}
|
long_name: "Iterator::Prefetch"
|
||||||
|
is_async: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iterator_metadata: {
|
||||||
|
key: 456,
|
||||||
|
value: {
|
||||||
|
id: 456
|
||||||
|
parent_id: 123
|
||||||
|
name: "Range"
|
||||||
|
long_name: "Iterator::Prefetch::Range"
|
||||||
|
is_async: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
input_pipelines {
|
||||||
|
key: 123,
|
||||||
|
value: {
|
||||||
|
metadata { id: 123 type: HOST name: "Host:0" }
|
||||||
|
avg_latency_ps: 60000000
|
||||||
|
min_latency_ps: 20000000
|
||||||
|
max_latency_ps: 100000000
|
||||||
|
num_slow_calls: 1
|
||||||
|
stats {
|
||||||
|
bottleneck_iterator_id: 456
|
||||||
|
iterator_stats {
|
||||||
|
key: 123,
|
||||||
|
value: {
|
||||||
|
id: 123
|
||||||
|
start_time_ps: 0
|
||||||
|
duration_ps: 100000000
|
||||||
|
self_time_ps: 20000000
|
||||||
|
is_blocking: true
|
||||||
|
num_calls: 1
|
||||||
}
|
}
|
||||||
iterator_metadata: {
|
}
|
||||||
key: 456,
|
iterator_stats {
|
||||||
value: {
|
key: 456,
|
||||||
id: 456
|
value: {
|
||||||
parent_id: 123
|
id: 456
|
||||||
name: "Range"
|
start_time_ps: 0
|
||||||
long_name: "Iterator::Prefetch::Range"
|
duration_ps: 80000000
|
||||||
is_async: false
|
self_time_ps: 80000000
|
||||||
}
|
is_blocking: true
|
||||||
}
|
num_calls: 1
|
||||||
input_pipelines {
|
|
||||||
key: 123,
|
|
||||||
value: {
|
|
||||||
metadata { id: 123 type: HOST name: "Host:0" }
|
|
||||||
avg_latency_ps: 60000000
|
|
||||||
min_latency_ps: 20000000
|
|
||||||
max_latency_ps: 100000000
|
|
||||||
num_slow_calls: 1
|
|
||||||
stats {
|
|
||||||
bottleneck_iterator_id: 456
|
|
||||||
iterator_stats {
|
|
||||||
key: 123,
|
|
||||||
value: {
|
|
||||||
id: 123
|
|
||||||
start_time_ps: 0
|
|
||||||
duration_ps: 100000000
|
|
||||||
self_time_ps: 20000000
|
|
||||||
is_blocking: true
|
|
||||||
num_calls: 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
iterator_stats {
|
|
||||||
key: 456,
|
|
||||||
value: {
|
|
||||||
id: 456
|
|
||||||
start_time_ps: 0
|
|
||||||
duration_ps: 80000000
|
|
||||||
self_time_ps: 80000000
|
|
||||||
is_blocking: true
|
|
||||||
num_calls: 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stats {
|
|
||||||
bottleneck_iterator_id: 123
|
|
||||||
iterator_stats {
|
|
||||||
key: 123,
|
|
||||||
value: {
|
|
||||||
id: 123
|
|
||||||
start_time_ps: 200000000
|
|
||||||
duration_ps: 20000000
|
|
||||||
self_time_ps: 20000000
|
|
||||||
is_blocking: true
|
|
||||||
num_calls: 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
iterator_stats {
|
|
||||||
key: 456,
|
|
||||||
value: {
|
|
||||||
id: 456
|
|
||||||
start_time_ps: 100000000
|
|
||||||
duration_ps: 80000000
|
|
||||||
self_time_ps: 80000000
|
|
||||||
is_blocking: false
|
|
||||||
num_calls: 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)pb"));
|
stats {
|
||||||
|
bottleneck_iterator_id: 123
|
||||||
|
iterator_stats {
|
||||||
|
key: 123,
|
||||||
|
value: {
|
||||||
|
id: 123
|
||||||
|
start_time_ps: 200000000
|
||||||
|
duration_ps: 20000000
|
||||||
|
self_time_ps: 20000000
|
||||||
|
is_blocking: true
|
||||||
|
num_calls: 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iterator_stats {
|
||||||
|
key: 456,
|
||||||
|
value: {
|
||||||
|
id: 456
|
||||||
|
start_time_ps: 100000000
|
||||||
|
duration_ps: 80000000
|
||||||
|
self_time_ps: 80000000
|
||||||
|
is_blocking: false
|
||||||
|
num_calls: 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
is_input_bound: true
|
||||||
|
summary: "Your profile has a tf.data input pipeline slower than 50 us. Below shows a bottleneck in the slow input pipeline and a suggestion on how to fix it."
|
||||||
|
)pb"));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(XPlaneToTfDataStatsTest, DeviceInputPipeline) {
|
TEST(XPlaneToTfDataStatsTest, DeviceInputPipeline) {
|
||||||
@ -205,82 +209,84 @@ TEST(XPlaneToTfDataStatsTest, DeviceInputPipeline) {
|
|||||||
builder.Add("host1", &host_plane);
|
builder.Add("host1", &host_plane);
|
||||||
builder.Finalize();
|
builder.Finalize();
|
||||||
// Device input pipeline is not considered for bottleneck analysis.
|
// Device input pipeline is not considered for bottleneck analysis.
|
||||||
EXPECT_THAT(combined_tf_data_stats, EqualsProto(R"pb(
|
EXPECT_THAT(
|
||||||
bottleneck_analysis: {}
|
combined_tf_data_stats, EqualsProto(R"pb(
|
||||||
tf_data_stats: {
|
bottleneck_analysis: {}
|
||||||
key: "host1"
|
tf_data_stats: {
|
||||||
value: {
|
key: "host1"
|
||||||
iterator_metadata: {
|
value: {
|
||||||
key: 123,
|
iterator_metadata: {
|
||||||
value: {
|
key: 123,
|
||||||
id: 123
|
value: {
|
||||||
name: "Prefetch"
|
id: 123
|
||||||
long_name: "Iterator::Prefetch"
|
name: "Prefetch"
|
||||||
is_async: true
|
long_name: "Iterator::Prefetch"
|
||||||
}
|
is_async: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iterator_metadata: {
|
||||||
|
key: 456,
|
||||||
|
value: {
|
||||||
|
id: 456
|
||||||
|
parent_id: 123
|
||||||
|
name: "Generator"
|
||||||
|
long_name: "Iterator::Prefetch::Generator"
|
||||||
|
is_async: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
input_pipelines {
|
||||||
|
key: 123,
|
||||||
|
value: {
|
||||||
|
metadata { id: 123 type: DEVICE name: "Device:0" }
|
||||||
|
avg_latency_ps: 65000000
|
||||||
|
min_latency_ps: 30000000
|
||||||
|
max_latency_ps: 100000000
|
||||||
|
num_slow_calls: 1
|
||||||
|
stats {
|
||||||
|
bottleneck_iterator_id: 456
|
||||||
|
iterator_stats {
|
||||||
|
key: 123,
|
||||||
|
value: {
|
||||||
|
id: 123
|
||||||
|
start_time_ps: 100000000
|
||||||
|
duration_ps: 100000000
|
||||||
|
self_time_ps: 20000000
|
||||||
|
is_blocking: true
|
||||||
|
num_calls: 1
|
||||||
}
|
}
|
||||||
iterator_metadata: {
|
}
|
||||||
key: 456,
|
iterator_stats {
|
||||||
value: {
|
key: 456,
|
||||||
id: 456
|
value: {
|
||||||
parent_id: 123
|
id: 456
|
||||||
name: "Generator"
|
start_time_ps: 100000000
|
||||||
long_name: "Iterator::Prefetch::Generator"
|
duration_ps: 80000000
|
||||||
is_async: false
|
self_time_ps: 80000000
|
||||||
}
|
is_blocking: true
|
||||||
}
|
num_calls: 1
|
||||||
input_pipelines {
|
|
||||||
key: 123,
|
|
||||||
value: {
|
|
||||||
metadata { id: 123 type: DEVICE name: "Device:0" }
|
|
||||||
avg_latency_ps: 65000000
|
|
||||||
min_latency_ps: 30000000
|
|
||||||
max_latency_ps: 100000000
|
|
||||||
num_slow_calls: 1
|
|
||||||
stats {
|
|
||||||
bottleneck_iterator_id: 456
|
|
||||||
iterator_stats {
|
|
||||||
key: 123,
|
|
||||||
value: {
|
|
||||||
id: 123
|
|
||||||
start_time_ps: 100000000
|
|
||||||
duration_ps: 100000000
|
|
||||||
self_time_ps: 20000000
|
|
||||||
is_blocking: true
|
|
||||||
num_calls: 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
iterator_stats {
|
|
||||||
key: 456,
|
|
||||||
value: {
|
|
||||||
id: 456
|
|
||||||
start_time_ps: 100000000
|
|
||||||
duration_ps: 80000000
|
|
||||||
self_time_ps: 80000000
|
|
||||||
is_blocking: true
|
|
||||||
num_calls: 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stats {
|
|
||||||
bottleneck_iterator_id: 123
|
|
||||||
iterator_stats {
|
|
||||||
key: 123,
|
|
||||||
value: {
|
|
||||||
id: 123
|
|
||||||
start_time_ps: 0
|
|
||||||
duration_ps: 30000000
|
|
||||||
self_time_ps: 30000000
|
|
||||||
is_blocking: true
|
|
||||||
num_calls: 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)pb"));
|
stats {
|
||||||
|
bottleneck_iterator_id: 123
|
||||||
|
iterator_stats {
|
||||||
|
key: 123,
|
||||||
|
value: {
|
||||||
|
id: 123
|
||||||
|
start_time_ps: 0
|
||||||
|
duration_ps: 30000000
|
||||||
|
self_time_ps: 30000000
|
||||||
|
is_blocking: true
|
||||||
|
num_calls: 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
summary: "No tf.data activitiy captured in your profile. If your job uses tf.data, try to capture a longer profile."
|
||||||
|
)pb"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test with the following example dataset:
|
// Test with the following example dataset:
|
||||||
@ -325,74 +331,78 @@ TEST(XPlaneToTfDataStatsTest, MapAndBatch) {
|
|||||||
CombinedTfDataStatsBuilder builder(&combined_tf_data_stats);
|
CombinedTfDataStatsBuilder builder(&combined_tf_data_stats);
|
||||||
builder.Add("host1", &host_plane);
|
builder.Add("host1", &host_plane);
|
||||||
builder.Finalize();
|
builder.Finalize();
|
||||||
EXPECT_THAT(combined_tf_data_stats, EqualsProto(R"pb(
|
EXPECT_THAT(
|
||||||
bottleneck_analysis: {
|
combined_tf_data_stats, EqualsProto(R"pb(
|
||||||
host: "host1"
|
bottleneck_analysis: {
|
||||||
input_pipeline: "Host:0"
|
host: "host1"
|
||||||
max_latency_ps: 100000000
|
input_pipeline: "Host:0"
|
||||||
iterator_name: "Range"
|
max_latency_ps: 100000000
|
||||||
iterator_long_name: "Iterator::MapAndBatch::Range"
|
iterator_name: "Range"
|
||||||
}
|
iterator_long_name: "Iterator::MapAndBatch::Range"
|
||||||
tf_data_stats: {
|
suggestion: "See <a href=\"https://www.tensorflow.org/guide/data_performance_analysis\" target=\"_blank\">this</a> for suggestions."
|
||||||
key: "host1"
|
}
|
||||||
value: {
|
tf_data_stats: {
|
||||||
iterator_metadata: {
|
key: "host1"
|
||||||
key: 123,
|
value: {
|
||||||
value: {
|
iterator_metadata: {
|
||||||
id: 123
|
key: 123,
|
||||||
name: "MapAndBatch"
|
value: {
|
||||||
long_name: "Iterator::MapAndBatch"
|
id: 123
|
||||||
is_async: true
|
name: "MapAndBatch"
|
||||||
}
|
long_name: "Iterator::MapAndBatch"
|
||||||
|
is_async: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iterator_metadata: {
|
||||||
|
key: 456,
|
||||||
|
value: {
|
||||||
|
id: 456
|
||||||
|
parent_id: 123
|
||||||
|
name: "Range"
|
||||||
|
long_name: "Iterator::MapAndBatch::Range"
|
||||||
|
is_async: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
input_pipelines {
|
||||||
|
key: 123,
|
||||||
|
value: {
|
||||||
|
metadata { id: 123 type: HOST name: "Host:0" }
|
||||||
|
avg_latency_ps: 100000000
|
||||||
|
min_latency_ps: 100000000
|
||||||
|
max_latency_ps: 100000000
|
||||||
|
num_slow_calls: 1
|
||||||
|
stats {
|
||||||
|
bottleneck_iterator_id: 456
|
||||||
|
iterator_stats {
|
||||||
|
key: 123,
|
||||||
|
value: {
|
||||||
|
id: 123
|
||||||
|
start_time_ps: 0
|
||||||
|
duration_ps: 100000000
|
||||||
|
self_time_ps: 40000000
|
||||||
|
is_blocking: true
|
||||||
|
num_calls: 1
|
||||||
}
|
}
|
||||||
iterator_metadata: {
|
}
|
||||||
key: 456,
|
iterator_stats {
|
||||||
value: {
|
key: 456,
|
||||||
id: 456
|
value: {
|
||||||
parent_id: 123
|
id: 456
|
||||||
name: "Range"
|
start_time_ps: 0
|
||||||
long_name: "Iterator::MapAndBatch::Range"
|
duration_ps: 60000000
|
||||||
is_async: false
|
self_time_ps: 60000000
|
||||||
}
|
is_blocking: true
|
||||||
}
|
num_calls: 2
|
||||||
input_pipelines {
|
|
||||||
key: 123,
|
|
||||||
value: {
|
|
||||||
metadata { id: 123 type: HOST name: "Host:0" }
|
|
||||||
avg_latency_ps: 100000000
|
|
||||||
min_latency_ps: 100000000
|
|
||||||
max_latency_ps: 100000000
|
|
||||||
num_slow_calls: 1
|
|
||||||
stats {
|
|
||||||
bottleneck_iterator_id: 456
|
|
||||||
iterator_stats {
|
|
||||||
key: 123,
|
|
||||||
value: {
|
|
||||||
id: 123
|
|
||||||
start_time_ps: 0
|
|
||||||
duration_ps: 100000000
|
|
||||||
self_time_ps: 40000000
|
|
||||||
is_blocking: true
|
|
||||||
num_calls: 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
iterator_stats {
|
|
||||||
key: 456,
|
|
||||||
value: {
|
|
||||||
id: 456
|
|
||||||
start_time_ps: 0
|
|
||||||
duration_ps: 60000000
|
|
||||||
self_time_ps: 60000000
|
|
||||||
is_blocking: true
|
|
||||||
num_calls: 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)pb"));
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
is_input_bound: true
|
||||||
|
summary: "Your profile has a tf.data input pipeline slower than 50 us. Below shows a bottleneck in the slow input pipeline and a suggestion on how to fix it."
|
||||||
|
)pb"));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -101,10 +101,16 @@ message TfDataBottleneckAnalysis {
|
|||||||
string iterator_name = 4;
|
string iterator_name = 4;
|
||||||
// Long name of the bottleneck iterator.
|
// Long name of the bottleneck iterator.
|
||||||
string iterator_long_name = 5;
|
string iterator_long_name = 5;
|
||||||
|
// Suggestion to resolve the bottleneck.
|
||||||
|
string suggestion = 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TfDataStats of all hosts.
|
// TfDataStats of all hosts.
|
||||||
message CombinedTfDataStats {
|
message CombinedTfDataStats {
|
||||||
|
// Whether it is input bound.
|
||||||
|
bool is_input_bound = 3;
|
||||||
|
// Summary of the analysis.
|
||||||
|
string summary = 4;
|
||||||
// Bottleneck analysis result.
|
// Bottleneck analysis result.
|
||||||
TfDataBottleneckAnalysis bottleneck_analysis = 1;
|
TfDataBottleneckAnalysis bottleneck_analysis = 1;
|
||||||
// TfDataStats per host.
|
// TfDataStats per host.
|
||||||
|
Loading…
Reference in New Issue
Block a user