diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index bd6ffa3f468..027d03ba152 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -103,6 +103,7 @@ cc_library( hdrs = ["op_stats_to_input_pipeline_analysis.h"], deps = [ ":op_metrics_to_record", + ":step_events_to_steps_db", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core/platform:logging", diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc index 92f95cf1a0b..80da34ac277 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/convert/op_metrics_to_record.h" +#include "tensorflow/core/profiler/convert/step_events_to_steps_db.h" #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" @@ -163,8 +164,8 @@ InputPipelineAnalysisResult ComputeGenericInputPipelineAnalysisResult( Stat input_summary_stats_in_percent; for (const auto& coreid_stepinfo_map : grouped_by_step) { // Iterates over each step. - const auto* ptr = - gtl::FindOrNull(coreid_stepinfo_map.step_info_per_core(), 0); + const auto* ptr = gtl::FindOrNull(coreid_stepinfo_map.step_info_per_core(), + kDefaultGpuLocalCoreId); if (ptr == nullptr) { // For generic hardware, all step-info is put under core-0. If ptr // is nullptr, it means there is no step at all. diff --git a/tensorflow/core/profiler/convert/step_events_to_steps_db.cc b/tensorflow/core/profiler/convert/step_events_to_steps_db.cc index d518c715a89..ed0d83ade2f 100644 --- a/tensorflow/core/profiler/convert/step_events_to_steps_db.cc +++ b/tensorflow/core/profiler/convert/step_events_to_steps_db.cc @@ -22,6 +22,10 @@ limitations under the License. namespace tensorflow { namespace profiler { + +// Local core id should start from 1. +const uint32 kDefaultGpuLocalCoreId = 1; + namespace { // Converts from StepDetails to StepInfoResult. @@ -118,12 +122,13 @@ StepDatabaseResult ConvertStepEventsToStepDb( // When we generated StepEvents, we already put events from all device // cores and cpu threads on this host into a single event stream, therefore // we can't separate them anymore. Simply assigns all events to Core-0. - (*per_core_step_info.mutable_step_info_per_core())[0] = + (*per_core_step_info.mutable_step_info_per_core())[kDefaultGpuLocalCoreId] = std::move(step_info); VLOG(2) << std::endl << "step_id: " << step << ", step_info:" << std::endl - << DebugStepInfo( - (*per_core_step_info.mutable_step_info_per_core())[0]); + << DebugStepInfo(( + *per_core_step_info + .mutable_step_info_per_core())[kDefaultGpuLocalCoreId]); // The remaining fields in PerCoreStepInfo are not filled. *step_db.add_step_sequence() = per_core_step_info; } diff --git a/tensorflow/core/profiler/convert/step_events_to_steps_db.h b/tensorflow/core/profiler/convert/step_events_to_steps_db.h index 6090cd1dc8e..b3ea74e905f 100644 --- a/tensorflow/core/profiler/convert/step_events_to_steps_db.h +++ b/tensorflow/core/profiler/convert/step_events_to_steps_db.h @@ -22,6 +22,8 @@ limitations under the License. namespace tensorflow { namespace profiler { +ABSL_CONST_INIT extern const uint32 kDefaultGpuLocalCoreId; + // Converts from overlapped Step-Events to StepDatabaseResult. StepDatabaseResult ConvertStepEventsToStepDb( bool has_device, const StepEvents& overlapped_step_events);