Fix a crash when combining step databases among multiple GPU hosts.
PiperOrigin-RevId: 302683087 Change-Id: I3807b55b85efcc216ede6f7c2b439543a64b3b51
This commit is contained in:
parent
794cb200e5
commit
e1afcc5feb
@ -103,6 +103,7 @@ cc_library(
|
||||
hdrs = ["op_stats_to_input_pipeline_analysis.h"],
|
||||
deps = [
|
||||
":op_metrics_to_record",
|
||||
":step_events_to_steps_db",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core/platform:logging",
|
||||
|
@ -30,6 +30,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/convert/op_metrics_to_record.h"
|
||||
#include "tensorflow/core/profiler/convert/step_events_to_steps_db.h"
|
||||
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
|
||||
@ -163,8 +164,8 @@ InputPipelineAnalysisResult ComputeGenericInputPipelineAnalysisResult(
|
||||
Stat<double> input_summary_stats_in_percent;
|
||||
for (const auto& coreid_stepinfo_map : grouped_by_step) {
|
||||
// Iterates over each step.
|
||||
const auto* ptr =
|
||||
gtl::FindOrNull(coreid_stepinfo_map.step_info_per_core(), 0);
|
||||
const auto* ptr = gtl::FindOrNull(coreid_stepinfo_map.step_info_per_core(),
|
||||
kDefaultGpuLocalCoreId);
|
||||
if (ptr == nullptr) {
|
||||
// For generic hardware, all step-info is put under core-0. If ptr
|
||||
// is nullptr, it means there is no step at all.
|
||||
|
@ -22,6 +22,10 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
// Local core id should start from 1.
|
||||
const uint32 kDefaultGpuLocalCoreId = 1;
|
||||
|
||||
namespace {
|
||||
|
||||
// Converts from StepDetails to StepInfoResult.
|
||||
@ -118,12 +122,13 @@ StepDatabaseResult ConvertStepEventsToStepDb(
|
||||
// When we generated StepEvents, we already put events from all device
|
||||
// cores and cpu threads on this host into a single event stream, therefore
|
||||
// we can't separate them anymore. Simply assigns all events to Core-0.
|
||||
(*per_core_step_info.mutable_step_info_per_core())[0] =
|
||||
(*per_core_step_info.mutable_step_info_per_core())[kDefaultGpuLocalCoreId] =
|
||||
std::move(step_info);
|
||||
VLOG(2) << std::endl
|
||||
<< "step_id: " << step << ", step_info:" << std::endl
|
||||
<< DebugStepInfo(
|
||||
(*per_core_step_info.mutable_step_info_per_core())[0]);
|
||||
<< DebugStepInfo((
|
||||
*per_core_step_info
|
||||
.mutable_step_info_per_core())[kDefaultGpuLocalCoreId]);
|
||||
// The remaining fields in PerCoreStepInfo are not filled.
|
||||
*step_db.add_step_sequence() = per_core_step_info;
|
||||
}
|
||||
|
@ -22,6 +22,8 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
ABSL_CONST_INIT extern const uint32 kDefaultGpuLocalCoreId;
|
||||
|
||||
// Converts from overlapped Step-Events to StepDatabaseResult.
|
||||
StepDatabaseResult ConvertStepEventsToStepDb(
|
||||
bool has_device, const StepEvents& overlapped_step_events);
|
||||
|
Loading…
x
Reference in New Issue
Block a user