Consolidate error and warning message population.
1. Add a diagnostic message which contains error, warning and info for diagnosing profiling anomalies. Replace individual error and warning fields by the diagnostic message. 2. Add unified PopulateStepDiagnositics, PopulateOverviewDiagnositics utilities in diagnostics.cc to be used by all tool converters. 3. Add unifid GenerateDiagnosticDatatable in TfStatsProcessor for all child tools. Add a similar generate_diagnostics_table for python converters. PiperOrigin-RevId: 315356051 Change-Id: Ic1f1d6d43b3fec850ccb04c3184f5ba7bbd694af
This commit is contained in:
parent
d7da550fc7
commit
de901d9be9
@ -100,7 +100,7 @@ cc_library(
|
||||
"//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:errors",
|
||||
"//tensorflow/core/profiler/utils:diagnostics",
|
||||
"//tensorflow/core/profiler/utils:html_utils",
|
||||
"//tensorflow/core/profiler/utils:math_utils",
|
||||
"//tensorflow/core/profiler/utils:op_metrics_db_utils",
|
||||
@ -125,7 +125,7 @@ cc_library(
|
||||
"//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:errors",
|
||||
"//tensorflow/core/profiler/utils:diagnostics",
|
||||
"//tensorflow/core/profiler/utils:event_span",
|
||||
"//tensorflow/core/profiler/utils:html_utils",
|
||||
"//tensorflow/core/profiler/utils:math_utils",
|
||||
|
@ -36,7 +36,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/errors.h"
|
||||
#include "tensorflow/core/profiler/utils/diagnostics.h"
|
||||
#include "tensorflow/core/profiler/utils/event_span.h"
|
||||
#include "tensorflow/core/profiler/utils/html_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/math_utils.h"
|
||||
@ -552,23 +552,12 @@ StepSummary ComputeStepTimeSummaryInMs(
|
||||
return GetStepSummaryForSampleStats(total_step_stats_in_ms);
|
||||
}
|
||||
|
||||
void AddErrorMessages(const OpStats& op_stats,
|
||||
InputPipelineAnalysisResult* result) {
|
||||
if (op_stats.step_db().use_incomplete_step()) {
|
||||
*result->add_error_messages() =
|
||||
absl::StrCat("WARNING: ", kErrorIncompleteStep);
|
||||
} else if (op_stats.step_db().step_sequence().empty()) {
|
||||
*result->add_error_messages() =
|
||||
absl::StrCat("WARNING: ", kErrorNoStepMarker);
|
||||
}
|
||||
}
|
||||
|
||||
InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
|
||||
const OpStats& op_stats, const HardwareType& hardware_type) {
|
||||
InputPipelineAnalysisResult result =
|
||||
ComputeGenericInputPipelineAnalysisResult(
|
||||
op_stats.step_db().step_sequence());
|
||||
AddErrorMessages(op_stats, &result);
|
||||
PopulateStepDiagnostics(op_stats, result.mutable_diagnostics());
|
||||
result.set_hardware_type(HardwareType_Name(hardware_type));
|
||||
GenerateHostResult(op_stats.host_op_metrics_db(), &result);
|
||||
|
||||
|
@ -30,7 +30,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/errors.h"
|
||||
#include "tensorflow/core/profiler/utils/diagnostics.h"
|
||||
#include "tensorflow/core/profiler/utils/html_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/math_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
|
||||
@ -175,7 +175,6 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
|
||||
op->set_flop_rate(
|
||||
SafeDivide(metrics->flops(), PicosToNanos(metrics->time_ps())));
|
||||
}
|
||||
SetRemarks(op_stats, &analysis);
|
||||
uint64 total_device_compute_ps =
|
||||
op_stats.device_op_metrics_db().precision_stats().compute_16bit_ps() +
|
||||
op_stats.device_op_metrics_db().precision_stats().compute_32bit_ps();
|
||||
@ -297,35 +296,9 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
|
||||
bottleneck.input_classification(), bottleneck.input_statement(), "",
|
||||
hardware_type, TfFunctionRecommendationHtml(op_stats.tf_function_db()),
|
||||
overview_page.mutable_recommendation());
|
||||
SetOverviewPageErrorMessage(op_stats, &overview_page);
|
||||
PopulateOverviewDiagnostics(op_stats, overview_page.mutable_diagnostics());
|
||||
return overview_page;
|
||||
}
|
||||
|
||||
void SetRemarks(const OpStats& op_stats, OverviewPageAnalysis* analysis) {
|
||||
if (op_stats.step_db().use_incomplete_step()) {
|
||||
analysis->set_remark_text(absl::StrCat("WARNING: ", kErrorIncompleteStep));
|
||||
analysis->set_remark_color("red");
|
||||
} else if (op_stats.step_db().step_sequence().empty()) {
|
||||
analysis->set_remark_text(absl::StrCat("WARNING: ", kErrorNoStepMarker));
|
||||
analysis->set_remark_color("red");
|
||||
} else {
|
||||
analysis->set_remark_text("");
|
||||
analysis->set_remark_color("black");
|
||||
}
|
||||
}
|
||||
|
||||
void SetOverviewPageErrorMessage(const OpStats& op_stats,
|
||||
OverviewPage* overview_page) {
|
||||
*overview_page->mutable_errors() = op_stats.errors();
|
||||
absl::c_sort(*overview_page->mutable_errors());
|
||||
if (overview_page->errors().empty()) {
|
||||
// Shows run-environment error only if there is no other existing error.
|
||||
if (op_stats.run_environment().device_type() != "CPU" &&
|
||||
op_stats.run_environment().device_core_count() <= 0) {
|
||||
*overview_page->add_errors() = std::string(kNoDeviceTraceCollected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
@ -48,17 +48,12 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats);
|
||||
OverviewPageRunEnvironment ComputeRunEnvironment(
|
||||
const RunEnvironment& run_environment);
|
||||
|
||||
void SetOverviewPageErrorMessage(const OpStats& op_stats,
|
||||
OverviewPage* overview_page);
|
||||
|
||||
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
|
||||
HardwareType hardware_type);
|
||||
|
||||
// Returns a html which provides tf-function related recommendation.
|
||||
std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db);
|
||||
|
||||
void SetRemarks(const OpStats& op_stats, OverviewPageAnalysis* analysis);
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -26,10 +26,20 @@ exports_files(
|
||||
visibility = ["//tensorflow/core:__pkg__"],
|
||||
)
|
||||
|
||||
tf_proto_library(
|
||||
name = "diagnostics_proto",
|
||||
srcs = ["diagnostics.proto"],
|
||||
cc_api_version = 2,
|
||||
visibility = [
|
||||
":friends",
|
||||
],
|
||||
)
|
||||
|
||||
tf_proto_library(
|
||||
name = "input_pipeline_proto",
|
||||
srcs = ["input_pipeline.proto"],
|
||||
cc_api_version = 2,
|
||||
protodeps = [":diagnostics_proto"],
|
||||
visibility = [
|
||||
":friends",
|
||||
],
|
||||
@ -39,7 +49,10 @@ tf_proto_library(
|
||||
name = "overview_page_proto",
|
||||
srcs = ["overview_page.proto"],
|
||||
cc_api_version = 2,
|
||||
protodeps = [":input_pipeline_proto"],
|
||||
protodeps = [
|
||||
":diagnostics_proto",
|
||||
":input_pipeline_proto",
|
||||
],
|
||||
visibility = [
|
||||
":friends",
|
||||
],
|
||||
|
11
tensorflow/core/profiler/protobuf/diagnostics.proto
Normal file
11
tensorflow/core/profiler/protobuf/diagnostics.proto
Normal file
@ -0,0 +1,11 @@
|
||||
// This proto describes the diagnostics for debugging profiling issues of
|
||||
// the TensorFlow profiler.
|
||||
syntax = "proto3";
|
||||
|
||||
package tensorflow.profiler;
|
||||
|
||||
message Diagnostics {
|
||||
repeated string info = 1;
|
||||
repeated string warnings = 2;
|
||||
repeated string errors = 3;
|
||||
}
|
@ -3,6 +3,7 @@ syntax = "proto3";
|
||||
package tensorflow.profiler;
|
||||
|
||||
import "google/protobuf/any.proto";
|
||||
import "tensorflow/core/profiler/protobuf/diagnostics.proto";
|
||||
|
||||
// Generic hardware bottleneck.
|
||||
message BottleneckAnalysis {
|
||||
@ -151,7 +152,7 @@ message InputPipelineAnalysisResult {
|
||||
// Breakdown of the step time. Can be unpacked into a
|
||||
// GenericStepTimeBreakdown.
|
||||
google.protobuf.Any step_time_breakdown = 8;
|
||||
// Error messages.
|
||||
repeated string error_messages = 10;
|
||||
reserved 1;
|
||||
// Error and warning messages for diagnosing profiling issues.
|
||||
Diagnostics diagnostics = 12;
|
||||
reserved 1, 10;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ syntax = "proto3";
|
||||
package tensorflow.profiler;
|
||||
|
||||
import "google/protobuf/any.proto";
|
||||
import "tensorflow/core/profiler/protobuf/diagnostics.proto";
|
||||
import "tensorflow/core/profiler/protobuf/input_pipeline.proto";
|
||||
|
||||
// Overview result for a TensorFlow Op.
|
||||
@ -168,7 +169,7 @@ message OverviewPage {
|
||||
OverviewPageAnalysis analysis = 3;
|
||||
// The recommendation made to the user.
|
||||
OverviewPageRecommendation recommendation = 4;
|
||||
// Errors.
|
||||
repeated string errors = 7;
|
||||
reserved 1, 5;
|
||||
// Error and warning messages for diagnosing profiling issues.
|
||||
Diagnostics diagnostics = 8;
|
||||
reserved 1, 5, 7;
|
||||
}
|
||||
|
@ -13,10 +13,14 @@ package_group(
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "errors",
|
||||
srcs = ["errors.cc"],
|
||||
hdrs = ["errors.h"],
|
||||
name = "diagnostics",
|
||||
srcs = ["diagnostics.cc"],
|
||||
hdrs = ["diagnostics.h"],
|
||||
deps = [
|
||||
"//tensorflow/core/profiler/protobuf:diagnostics_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
|
||||
"@com_google_absl//absl/algorithm:container",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/profiler/utils/errors.h"
|
||||
#include "tensorflow/core/profiler/utils/diagnostics.h"
|
||||
|
||||
#include "absl/algorithm/container.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
@ -38,5 +40,26 @@ const absl::string_view kNoDeviceTraceCollected =
|
||||
"run on the device when sampling was turned on. You could try the sampling"
|
||||
" again later.";
|
||||
|
||||
void PopulateStepDiagnostics(const OpStats& op_stats, Diagnostics* diag) {
|
||||
if (op_stats.step_db().use_incomplete_step()) {
|
||||
*diag->add_warnings() = std::string(kErrorIncompleteStep);
|
||||
} else if (op_stats.step_db().step_sequence().empty()) {
|
||||
*diag->add_warnings() = std::string(kErrorNoStepMarker);
|
||||
}
|
||||
}
|
||||
|
||||
void PopulateOverviewDiagnostics(const OpStats& op_stats, Diagnostics* diag) {
|
||||
*diag->mutable_errors() = op_stats.errors();
|
||||
absl::c_sort(*diag->mutable_errors());
|
||||
if (diag->errors().empty()) {
|
||||
// Shows run-environment error only if there is no other existing error.
|
||||
if (op_stats.run_environment().device_type() != "CPU" &&
|
||||
op_stats.run_environment().device_core_count() <= 0) {
|
||||
*diag->add_errors() = std::string(kNoDeviceTraceCollected);
|
||||
}
|
||||
}
|
||||
PopulateStepDiagnostics(op_stats, diag);
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
@ -17,6 +17,8 @@ limitations under the License.
|
||||
#define TENSORFLOW_CORE_PROFILER_UTILS_ERRORS_H_
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "tensorflow/core/profiler/protobuf/diagnostics.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
@ -30,6 +32,10 @@ ABSL_CONST_INIT extern const absl::string_view kErrorNoStepMarker;
|
||||
|
||||
ABSL_CONST_INIT extern const absl::string_view kNoDeviceTraceCollected;
|
||||
|
||||
void PopulateStepDiagnostics(const OpStats& op_stats, Diagnostics* diag);
|
||||
|
||||
void PopulateOverviewDiagnostics(const OpStats& op_stats, Diagnostics* diag);
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
Loading…
x
Reference in New Issue
Block a user