Consolidate error and warning message population.

1. Add a diagnostic message which contains error, warning and info for diagnosing profiling anomalies. Replace individual error and warning fields by the diagnostic message.
2. Add unified  PopulateStepDiagnositics,  PopulateOverviewDiagnositics utilities in diagnostics.cc to be used by all tool converters.
3. Add unifid GenerateDiagnosticDatatable in TfStatsProcessor for all child tools. Add a similar generate_diagnostics_table for python converters.

PiperOrigin-RevId: 315356051
Change-Id: Ic1f1d6d43b3fec850ccb04c3184f5ba7bbd694af
This commit is contained in:
A. Unique TensorFlower 2020-06-08 14:39:56 -07:00 committed by TensorFlower Gardener
parent d7da550fc7
commit de901d9be9
11 changed files with 76 additions and 60 deletions

View File

@ -100,7 +100,7 @@ cc_library(
"//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
"//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
"//tensorflow/core/profiler/utils:errors",
"//tensorflow/core/profiler/utils:diagnostics",
"//tensorflow/core/profiler/utils:html_utils",
"//tensorflow/core/profiler/utils:math_utils",
"//tensorflow/core/profiler/utils:op_metrics_db_utils",
@ -125,7 +125,7 @@ cc_library(
"//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
"//tensorflow/core/profiler/utils:errors",
"//tensorflow/core/profiler/utils:diagnostics",
"//tensorflow/core/profiler/utils:event_span",
"//tensorflow/core/profiler/utils:html_utils",
"//tensorflow/core/profiler/utils:math_utils",

View File

@ -36,7 +36,7 @@ limitations under the License.
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
#include "tensorflow/core/profiler/utils/errors.h"
#include "tensorflow/core/profiler/utils/diagnostics.h"
#include "tensorflow/core/profiler/utils/event_span.h"
#include "tensorflow/core/profiler/utils/html_utils.h"
#include "tensorflow/core/profiler/utils/math_utils.h"
@ -552,23 +552,12 @@ StepSummary ComputeStepTimeSummaryInMs(
return GetStepSummaryForSampleStats(total_step_stats_in_ms);
}
void AddErrorMessages(const OpStats& op_stats,
InputPipelineAnalysisResult* result) {
if (op_stats.step_db().use_incomplete_step()) {
*result->add_error_messages() =
absl::StrCat("WARNING: ", kErrorIncompleteStep);
} else if (op_stats.step_db().step_sequence().empty()) {
*result->add_error_messages() =
absl::StrCat("WARNING: ", kErrorNoStepMarker);
}
}
InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
const OpStats& op_stats, const HardwareType& hardware_type) {
InputPipelineAnalysisResult result =
ComputeGenericInputPipelineAnalysisResult(
op_stats.step_db().step_sequence());
AddErrorMessages(op_stats, &result);
PopulateStepDiagnostics(op_stats, result.mutable_diagnostics());
result.set_hardware_type(HardwareType_Name(hardware_type));
GenerateHostResult(op_stats.host_op_metrics_db(), &result);

View File

@ -30,7 +30,7 @@ limitations under the License.
#include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
#include "tensorflow/core/profiler/utils/errors.h"
#include "tensorflow/core/profiler/utils/diagnostics.h"
#include "tensorflow/core/profiler/utils/html_utils.h"
#include "tensorflow/core/profiler/utils/math_utils.h"
#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
@ -175,7 +175,6 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
op->set_flop_rate(
SafeDivide(metrics->flops(), PicosToNanos(metrics->time_ps())));
}
SetRemarks(op_stats, &analysis);
uint64 total_device_compute_ps =
op_stats.device_op_metrics_db().precision_stats().compute_16bit_ps() +
op_stats.device_op_metrics_db().precision_stats().compute_32bit_ps();
@ -297,35 +296,9 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
bottleneck.input_classification(), bottleneck.input_statement(), "",
hardware_type, TfFunctionRecommendationHtml(op_stats.tf_function_db()),
overview_page.mutable_recommendation());
SetOverviewPageErrorMessage(op_stats, &overview_page);
PopulateOverviewDiagnostics(op_stats, overview_page.mutable_diagnostics());
return overview_page;
}
void SetRemarks(const OpStats& op_stats, OverviewPageAnalysis* analysis) {
if (op_stats.step_db().use_incomplete_step()) {
analysis->set_remark_text(absl::StrCat("WARNING: ", kErrorIncompleteStep));
analysis->set_remark_color("red");
} else if (op_stats.step_db().step_sequence().empty()) {
analysis->set_remark_text(absl::StrCat("WARNING: ", kErrorNoStepMarker));
analysis->set_remark_color("red");
} else {
analysis->set_remark_text("");
analysis->set_remark_color("black");
}
}
void SetOverviewPageErrorMessage(const OpStats& op_stats,
OverviewPage* overview_page) {
*overview_page->mutable_errors() = op_stats.errors();
absl::c_sort(*overview_page->mutable_errors());
if (overview_page->errors().empty()) {
// Shows run-environment error only if there is no other existing error.
if (op_stats.run_environment().device_type() != "CPU" &&
op_stats.run_environment().device_core_count() <= 0) {
*overview_page->add_errors() = std::string(kNoDeviceTraceCollected);
}
}
}
} // namespace profiler
} // namespace tensorflow

View File

@ -48,17 +48,12 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats);
OverviewPageRunEnvironment ComputeRunEnvironment(
const RunEnvironment& run_environment);
void SetOverviewPageErrorMessage(const OpStats& op_stats,
OverviewPage* overview_page);
OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
HardwareType hardware_type);
// Returns a html which provides tf-function related recommendation.
std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db);
void SetRemarks(const OpStats& op_stats, OverviewPageAnalysis* analysis);
} // namespace profiler
} // namespace tensorflow

View File

@ -26,10 +26,20 @@ exports_files(
visibility = ["//tensorflow/core:__pkg__"],
)
tf_proto_library(
name = "diagnostics_proto",
srcs = ["diagnostics.proto"],
cc_api_version = 2,
visibility = [
":friends",
],
)
tf_proto_library(
name = "input_pipeline_proto",
srcs = ["input_pipeline.proto"],
cc_api_version = 2,
protodeps = [":diagnostics_proto"],
visibility = [
":friends",
],
@ -39,7 +49,10 @@ tf_proto_library(
name = "overview_page_proto",
srcs = ["overview_page.proto"],
cc_api_version = 2,
protodeps = [":input_pipeline_proto"],
protodeps = [
":diagnostics_proto",
":input_pipeline_proto",
],
visibility = [
":friends",
],

View File

@ -0,0 +1,11 @@
// This proto describes the diagnostics for debugging profiling issues of
// the TensorFlow profiler.
syntax = "proto3";
package tensorflow.profiler;
message Diagnostics {
repeated string info = 1;
repeated string warnings = 2;
repeated string errors = 3;
}

View File

@ -3,6 +3,7 @@ syntax = "proto3";
package tensorflow.profiler;
import "google/protobuf/any.proto";
import "tensorflow/core/profiler/protobuf/diagnostics.proto";
// Generic hardware bottleneck.
message BottleneckAnalysis {
@ -151,7 +152,7 @@ message InputPipelineAnalysisResult {
// Breakdown of the step time. Can be unpacked into a
// GenericStepTimeBreakdown.
google.protobuf.Any step_time_breakdown = 8;
// Error messages.
repeated string error_messages = 10;
reserved 1;
// Error and warning messages for diagnosing profiling issues.
Diagnostics diagnostics = 12;
reserved 1, 10;
}

View File

@ -3,6 +3,7 @@ syntax = "proto3";
package tensorflow.profiler;
import "google/protobuf/any.proto";
import "tensorflow/core/profiler/protobuf/diagnostics.proto";
import "tensorflow/core/profiler/protobuf/input_pipeline.proto";
// Overview result for a TensorFlow Op.
@ -168,7 +169,7 @@ message OverviewPage {
OverviewPageAnalysis analysis = 3;
// The recommendation made to the user.
OverviewPageRecommendation recommendation = 4;
// Errors.
repeated string errors = 7;
reserved 1, 5;
// Error and warning messages for diagnosing profiling issues.
Diagnostics diagnostics = 8;
reserved 1, 5, 7;
}

View File

@ -13,10 +13,14 @@ package_group(
)
cc_library(
name = "errors",
srcs = ["errors.cc"],
hdrs = ["errors.h"],
name = "diagnostics",
srcs = ["diagnostics.cc"],
hdrs = ["diagnostics.h"],
deps = [
"//tensorflow/core/profiler/protobuf:diagnostics_proto_cc",
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/strings",
],
)

View File

@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/utils/errors.h"
#include "tensorflow/core/profiler/utils/diagnostics.h"
#include "absl/algorithm/container.h"
#include "absl/strings/string_view.h"
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
namespace tensorflow {
namespace profiler {
@ -38,5 +40,26 @@ const absl::string_view kNoDeviceTraceCollected =
"run on the device when sampling was turned on. You could try the sampling"
" again later.";
void PopulateStepDiagnostics(const OpStats& op_stats, Diagnostics* diag) {
if (op_stats.step_db().use_incomplete_step()) {
*diag->add_warnings() = std::string(kErrorIncompleteStep);
} else if (op_stats.step_db().step_sequence().empty()) {
*diag->add_warnings() = std::string(kErrorNoStepMarker);
}
}
void PopulateOverviewDiagnostics(const OpStats& op_stats, Diagnostics* diag) {
*diag->mutable_errors() = op_stats.errors();
absl::c_sort(*diag->mutable_errors());
if (diag->errors().empty()) {
// Shows run-environment error only if there is no other existing error.
if (op_stats.run_environment().device_type() != "CPU" &&
op_stats.run_environment().device_core_count() <= 0) {
*diag->add_errors() = std::string(kNoDeviceTraceCollected);
}
}
PopulateStepDiagnostics(op_stats, diag);
}
} // namespace profiler
} // namespace tensorflow

View File

@ -17,6 +17,8 @@ limitations under the License.
#define TENSORFLOW_CORE_PROFILER_UTILS_ERRORS_H_
#include "absl/strings/string_view.h"
#include "tensorflow/core/profiler/protobuf/diagnostics.pb.h"
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
namespace tensorflow {
namespace profiler {
@ -30,6 +32,10 @@ ABSL_CONST_INIT extern const absl::string_view kErrorNoStepMarker;
ABSL_CONST_INIT extern const absl::string_view kNoDeviceTraceCollected;
void PopulateStepDiagnostics(const OpStats& op_stats, Diagnostics* diag);
void PopulateOverviewDiagnostics(const OpStats& op_stats, Diagnostics* diag);
} // namespace profiler
} // namespace tensorflow