Fix a crash when combining step databases among multiple GPU hosts.

PiperOrigin-RevId: 302683087 Change-Id: I3807b55b85efcc216ede6f7c2b439543a64b3b51
2020-03-24 09:36:35 -07:00 · 2020-03-24 09:36:35 -07:00 · e1afcc5feb
commit e1afcc5feb
parent 794cb200e5
4 changed files with 14 additions and 5 deletions
--- a/tensorflow/core/profiler/convert/BUILD
+++ b/tensorflow/core/profiler/convert/BUILD
@ -103,6 +103,7 @@ cc_library(
    hdrs = ["op_stats_to_input_pipeline_analysis.h"],
    deps = [
        ":op_metrics_to_record",
+        ":step_events_to_steps_db",
        "//tensorflow/core:lib",
        "//tensorflow/core:lib_internal",
        "//tensorflow/core/platform:logging",
--- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc
+++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc
@ -30,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/convert/op_metrics_to_record.h"
+#include "tensorflow/core/profiler/convert/step_events_to_steps_db.h"
 #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
 #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
@ -163,8 +164,8 @@ InputPipelineAnalysisResult ComputeGenericInputPipelineAnalysisResult(
  Stat<double> input_summary_stats_in_percent;
  for (const auto& coreid_stepinfo_map : grouped_by_step) {
    // Iterates over each step.
-    const auto* ptr =
-        gtl::FindOrNull(coreid_stepinfo_map.step_info_per_core(), 0);
+    const auto* ptr = gtl::FindOrNull(coreid_stepinfo_map.step_info_per_core(),
+                                      kDefaultGpuLocalCoreId);
    if (ptr == nullptr) {
      // For generic hardware, all step-info is put under core-0. If ptr
      // is nullptr, it means there is no step at all.
--- a/tensorflow/core/profiler/convert/step_events_to_steps_db.cc
+++ b/tensorflow/core/profiler/convert/step_events_to_steps_db.cc
@ -22,6 +22,10 @@ limitations under the License.

 namespace tensorflow {
 namespace profiler {
+
+// Local core id should start from 1.
+const uint32 kDefaultGpuLocalCoreId = 1;
+
 namespace {

 // Converts from StepDetails to StepInfoResult.
@ -118,12 +122,13 @@ StepDatabaseResult ConvertStepEventsToStepDb(
    // When we generated StepEvents, we already put events from all device
    // cores and cpu threads on this host into a single event stream, therefore
    // we can't separate them anymore. Simply assigns all events to Core-0.
-    (*per_core_step_info.mutable_step_info_per_core())[0] =
+    (*per_core_step_info.mutable_step_info_per_core())[kDefaultGpuLocalCoreId] =
        std::move(step_info);
    VLOG(2) << std::endl
            << "step_id: " << step << ", step_info:" << std::endl
-            << DebugStepInfo(
-                   (*per_core_step_info.mutable_step_info_per_core())[0]);
+            << DebugStepInfo((
+                   *per_core_step_info
+                        .mutable_step_info_per_core())[kDefaultGpuLocalCoreId]);
    // The remaining fields in PerCoreStepInfo are not filled.
    *step_db.add_step_sequence() = per_core_step_info;
  }
--- a/tensorflow/core/profiler/convert/step_events_to_steps_db.h
+++ b/tensorflow/core/profiler/convert/step_events_to_steps_db.h
@ -22,6 +22,8 @@ limitations under the License.
 namespace tensorflow {
 namespace profiler {

+ABSL_CONST_INIT extern const uint32 kDefaultGpuLocalCoreId;
+
 // Converts from overlapped Step-Events to StepDatabaseResult.
 StepDatabaseResult ConvertStepEventsToStepDb(
    bool has_device, const StepEvents& overlapped_step_events);