From 2229ae89c927b46355a15e8af22365d24afc25bf Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Fri, 19 Jun 2020 12:06:36 -0700 Subject: [PATCH] Use group_id as step_id. PiperOrigin-RevId: 317353238 Change-Id: If52b2b4872c92d3f65af8f6ce1651e8c6da7dae7 --- tensorflow/core/profiler/convert/BUILD | 1 + .../convert/xplane_to_memory_profile.cc | 59 +++++++------------ .../convert/xplane_to_memory_profile.h | 1 + .../convert/xplane_to_memory_profile_test.cc | 3 +- .../profiler/protobuf/memory_profile.proto | 4 +- 5 files changed, 25 insertions(+), 43 deletions(-) diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index abf0176bf6f..06594b1aeaf 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -525,6 +525,7 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core/profiler/protobuf:memory_profile_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "//tensorflow/core/profiler/utils:group_events", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_test_utils", diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc index d039ca8da32..d7104c2bbf5 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc @@ -42,6 +42,8 @@ namespace profiler { namespace { +constexpr int64 kInvalidStepId = -1; + // Index of the time-sorted memory_profile_snapshots list, and the // MemoryActivityMetadata proto it contains. using IndexMetaPair = std::pair; @@ -63,7 +65,7 @@ struct ActivityMetadata { int64 allocation_bytes = 0; uint64 address = 0; absl::string_view tf_op_name; - int64 step_id = -1; + int64 step_id = kInvalidStepId; absl::string_view region_type; int64 data_type = 0; absl::string_view tensor_shape; @@ -129,7 +131,6 @@ void UpdateProfileSummary(const AggregationStats& stats, int64 time_offset_ps, MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) { XPlaneVisitor plane = CreateTfXPlaneVisitor(host_trace); MemoryProfile memory_profile; - auto* step_count = memory_profile.mutable_step_count(); // Iterate over all XEvents in the XPlane, and add the XStats to a new // MemoryProfileSnapshot if the EventType is kMemoryAllocation or // kMemoryDeallocation. @@ -181,9 +182,8 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) { case StatType::kTfOp: metadata.tf_op_name = stat.StrOrRefValue(); break; - case StatType::kStepId: + case StatType::kGroupId: metadata.step_id = stat.IntValue(); - if (metadata.step_id != 0) (*step_count)[metadata.step_id]++; break; case StatType::kRegionType: metadata.region_type = stat.StrOrRefValue(); @@ -214,40 +214,21 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) { return memory_profile; } -// Sequentialize step ids for the memory profile. -void UpdateStepId(const tensorflow::protobuf::Map< - tensorflow::protobuf_int64 /*orig_step_id*/, - tensorflow::protobuf_int64 /*count*/>& step_count, - PerAllocatorMemoryProfile* memory_profile) { - // Map from original random step id to sequential step id. - absl::flat_hash_map step_map; - constexpr int kUnknownStep = -2; - constexpr double kStepFilterRatio = 0.1; // Magic number for filtering. - tensorflow::protobuf_int64 max_step_count = 0; - for (const auto& step_and_count : step_count) { - max_step_count = std::max(max_step_count, step_and_count.second); - } - // Filter out noisy and incomplete original step ids. - for (const auto& step_and_count : step_count) { - if (static_cast(step_and_count.second) / max_step_count > - kStepFilterRatio) { - step_map[step_and_count.first] = kUnknownStep; - } - } - - // Update the step ids in memory_profile for this allocator. - int64 step_id = -1; +// Fix invalid step ids of snapshots at the beginning/end of the profile or at +// the step boundaries. The snapshots with invalid step ids at the beginning get +// 0 for their step ids. Those at the step boundaries or at the end get the +// previous snapshot's step id + 1. +void UpdateStepId(PerAllocatorMemoryProfile* memory_profile) { + int64 last_valid_step_id = -1; + // Snapshots are already sorted in time. for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) { DCHECK(snapshot.has_activity_metadata()); - // Convert the random step id to sequential step id. - int64 orig_step_id = snapshot.activity_metadata().step_id(); - if (step_map.contains(orig_step_id) && - step_map[orig_step_id] == kUnknownStep) { - step_map[orig_step_id] = ++step_id; + if (snapshot.mutable_activity_metadata()->step_id() == kInvalidStepId) { + snapshot.mutable_activity_metadata()->set_step_id(last_valid_step_id + 1); + } else { + last_valid_step_id = snapshot.mutable_activity_metadata()->step_id(); } - snapshot.mutable_activity_metadata()->set_step_id(step_id); } - VLOG(2) << "Max sequential step id in profile: " << step_id; } // Update the MemoryActivityMetadata for each deallocation event by copying from @@ -481,14 +462,14 @@ void ProcessMemoryProfileProto(int64 max_num_snapshots, return a.time_offset_ps() < b.time_offset_ps(); }); - UpdateStepId(memory_profile->step_count(), allocator_memory_profile); + UpdateStepId(allocator_memory_profile); UpdateDeallocation(allocator_memory_profile); - int64 peak_bytes_profile = allocator_memory_profile->profile_summary() - .peak_stats() - .peak_bytes_in_use(); int64 peak_step_id = - GetPeakMemoryStep(peak_bytes_profile, allocator_memory_profile); + GetPeakMemoryStep(allocator_memory_profile->profile_summary() + .peak_stats() + .peak_bytes_in_use(), + allocator_memory_profile); ProcessActiveAllocations(peak_step_id, allocator_memory_profile); SampleSnapshots(max_num_snapshots, snapshots); } diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.h b/tensorflow/core/profiler/convert/xplane_to_memory_profile.h index 873ac800aa5..6eddaeeec71 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.h +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.h @@ -25,6 +25,7 @@ namespace profiler { // Process the host threads XPlane and generate MemoryProfile result; at most // max_num_snapshots will be displayed on the UI. +// REQUIRED: host_plane should have been grouped by calling GroupTfEvents(). MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane, int64 max_num_snapshots = 1000); diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc index 5ddcbcfc75d..c334318dcfe 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/memory_profile.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/group_events.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_test_utils.h" @@ -84,11 +85,11 @@ TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) { {StatType::kRegionType, "temp"}, {StatType::kTensorShapes, "[1, 2]"}}); + tensorflow::profiler::GroupTfEvents(&space, nullptr); MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane); EXPECT_EQ(memory_profile.memory_profile_per_allocator().size(), 1); EXPECT_EQ(memory_profile.num_hosts(), 1); EXPECT_EQ(memory_profile.memory_ids_size(), 1); - EXPECT_EQ(memory_profile.step_count().size(), 1); EXPECT_EQ(memory_profile.memory_profile_per_allocator().begin()->first, "GPU_0_bfc"); const auto& allocator_memory_profile = diff --git a/tensorflow/core/profiler/protobuf/memory_profile.proto b/tensorflow/core/profiler/protobuf/memory_profile.proto index 7a5272c60b2..4d492a56255 100644 --- a/tensorflow/core/profiler/protobuf/memory_profile.proto +++ b/tensorflow/core/profiler/protobuf/memory_profile.proto @@ -122,7 +122,5 @@ message MemoryProfile { // Ids for profiled memory allocators, used to populate memory selection list // at front end. repeated string memory_ids = 3; - // Map of original random int64 step id to the count of memory activity events - // assigned with it. - map step_count = 4; + reserved 4; }