diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc index 83673458d21..89b4939f5d0 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc @@ -752,5 +752,17 @@ std::string GetSummaryNextStep(absl::string_view input_classification, return summary_next_step; } +double HostToDeviceTransferAsPercentOfInputTime( + const InputTimeBreakdown& breakdown) { + // Thanks to the scaling trick we did in GenerateHostResult(), we can + // estimate the percentage of input-time spent on host-to-device transfer in + // the following way. + double total_input_time_us = + breakdown.demanded_file_read_us() + breakdown.advanced_file_read_us() + + breakdown.preprocessing_us() + breakdown.enqueue_us() + + breakdown.unclassified_non_enqueue_us(); + return 100.0 * SafeDivide(breakdown.enqueue_us(), total_input_time_us); +} + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h index 93b4df0b2c2..2191251ee88 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h @@ -31,6 +31,17 @@ limitations under the License. namespace tensorflow { namespace profiler { +// If the percent of input-time spent on host-to-device transfer is greater than +// kHostToDeviceTimePercentAsSignificant, we should advise the +// user to optimize this transfer. +constexpr double kHostToDeviceTimePercentAsSignificant = 10.0; + +// If the percent of input-time spent on host-to-device transfer is greater than +// kHostToDeviceTimePercentAsDominant, we should ONLY advise the +// user to optimize this transfer; we won't bother to suggest optimization for +// tf.data. +constexpr double kHostToDeviceTimePercentAsDominant = 90.0; + // Computes the summary of step time in milliseconds. StepSummary ComputeStepTimeSummaryInMs( const ::tensorflow::protobuf::RepeatedPtrField& @@ -62,6 +73,11 @@ void OutputAnalysis(double output_percent, std::string* output_classification, string GetSummaryNextStep(absl::string_view input_classification, const InputTimeBreakdown& breakdown); +// Returns the percentage of the input time that is spent on transferring the +// data from host to device. +double HostToDeviceTransferAsPercentOfInputTime( + const InputTimeBreakdown& breakdown); + void AddErrorMessages(const OpStats& op_stats, InputPipelineAnalysisResult* result); diff --git a/tensorflow/core/profiler/protobuf/overview_page.proto b/tensorflow/core/profiler/protobuf/overview_page.proto index 018aa759cc5..1590076d55f 100644 --- a/tensorflow/core/profiler/protobuf/overview_page.proto +++ b/tensorflow/core/profiler/protobuf/overview_page.proto @@ -81,6 +81,8 @@ message OverviewPageRecommendation { // A statement for input that recommends the next steps for investigating the // bottleneck. string statement = 2; + // A list of tips for tackling input bottleneck. + repeated OverviewPageTip input_tips = 11; // A statement for output that recommends the next steps for investigating the // bottleneck. string output_statement = 9;