Fix a crash when combining step databases among multiple GPU hosts.

PiperOrigin-RevId: 302683087
Change-Id: I3807b55b85efcc216ede6f7c2b439543a64b3b51
This commit is contained in:
A. Unique TensorFlower 2020-03-24 09:36:35 -07:00 committed by TensorFlower Gardener
parent 794cb200e5
commit e1afcc5feb
4 changed files with 14 additions and 5 deletions

View File

@ -103,6 +103,7 @@ cc_library(
hdrs = ["op_stats_to_input_pipeline_analysis.h"],
deps = [
":op_metrics_to_record",
":step_events_to_steps_db",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core/platform:logging",

View File

@ -30,6 +30,7 @@ limitations under the License.
#include "tensorflow/core/platform/protobuf.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/convert/op_metrics_to_record.h"
#include "tensorflow/core/profiler/convert/step_events_to_steps_db.h"
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
#include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
@ -163,8 +164,8 @@ InputPipelineAnalysisResult ComputeGenericInputPipelineAnalysisResult(
Stat<double> input_summary_stats_in_percent;
for (const auto& coreid_stepinfo_map : grouped_by_step) {
// Iterates over each step.
const auto* ptr =
gtl::FindOrNull(coreid_stepinfo_map.step_info_per_core(), 0);
const auto* ptr = gtl::FindOrNull(coreid_stepinfo_map.step_info_per_core(),
kDefaultGpuLocalCoreId);
if (ptr == nullptr) {
// For generic hardware, all step-info is put under core-0. If ptr
// is nullptr, it means there is no step at all.

View File

@ -22,6 +22,10 @@ limitations under the License.
namespace tensorflow {
namespace profiler {
// Local core id should start from 1.
const uint32 kDefaultGpuLocalCoreId = 1;
namespace {
// Converts from StepDetails to StepInfoResult.
@ -118,12 +122,13 @@ StepDatabaseResult ConvertStepEventsToStepDb(
// When we generated StepEvents, we already put events from all device
// cores and cpu threads on this host into a single event stream, therefore
// we can't separate them anymore. Simply assigns all events to Core-0.
(*per_core_step_info.mutable_step_info_per_core())[0] =
(*per_core_step_info.mutable_step_info_per_core())[kDefaultGpuLocalCoreId] =
std::move(step_info);
VLOG(2) << std::endl
<< "step_id: " << step << ", step_info:" << std::endl
<< DebugStepInfo(
(*per_core_step_info.mutable_step_info_per_core())[0]);
<< DebugStepInfo((
*per_core_step_info
.mutable_step_info_per_core())[kDefaultGpuLocalCoreId]);
// The remaining fields in PerCoreStepInfo are not filled.
*step_db.add_step_sequence() = per_core_step_info;
}

View File

@ -22,6 +22,8 @@ limitations under the License.
namespace tensorflow {
namespace profiler {
ABSL_CONST_INIT extern const uint32 kDefaultGpuLocalCoreId;
// Converts from overlapped Step-Events to StepDatabaseResult.
StepDatabaseResult ConvertStepEventsToStepDb(
bool has_device, const StepEvents& overlapped_step_events);