Use group_id as step_id.

PiperOrigin-RevId: 317353238
Change-Id: If52b2b4872c92d3f65af8f6ce1651e8c6da7dae7
This commit is contained in:
Jiho Choi 2020-06-19 12:06:36 -07:00 committed by TensorFlower Gardener
parent a8456eae42
commit 2229ae89c9
5 changed files with 25 additions and 43 deletions

View File

@ -525,6 +525,7 @@ tf_cc_test(
"//tensorflow/core:test_main", "//tensorflow/core:test_main",
"//tensorflow/core/profiler/protobuf:memory_profile_proto_cc", "//tensorflow/core/profiler/protobuf:memory_profile_proto_cc",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"//tensorflow/core/profiler/utils:group_events",
"//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_builder",
"//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_schema",
"//tensorflow/core/profiler/utils:xplane_test_utils", "//tensorflow/core/profiler/utils:xplane_test_utils",

View File

@ -42,6 +42,8 @@ namespace profiler {
namespace { namespace {
constexpr int64 kInvalidStepId = -1;
// Index of the time-sorted memory_profile_snapshots list, and the // Index of the time-sorted memory_profile_snapshots list, and the
// MemoryActivityMetadata proto it contains. // MemoryActivityMetadata proto it contains.
using IndexMetaPair = std::pair<int64 /*index*/, const MemoryActivityMetadata*>; using IndexMetaPair = std::pair<int64 /*index*/, const MemoryActivityMetadata*>;
@ -63,7 +65,7 @@ struct ActivityMetadata {
int64 allocation_bytes = 0; int64 allocation_bytes = 0;
uint64 address = 0; uint64 address = 0;
absl::string_view tf_op_name; absl::string_view tf_op_name;
int64 step_id = -1; int64 step_id = kInvalidStepId;
absl::string_view region_type; absl::string_view region_type;
int64 data_type = 0; int64 data_type = 0;
absl::string_view tensor_shape; absl::string_view tensor_shape;
@ -129,7 +131,6 @@ void UpdateProfileSummary(const AggregationStats& stats, int64 time_offset_ps,
MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) { MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
XPlaneVisitor plane = CreateTfXPlaneVisitor(host_trace); XPlaneVisitor plane = CreateTfXPlaneVisitor(host_trace);
MemoryProfile memory_profile; MemoryProfile memory_profile;
auto* step_count = memory_profile.mutable_step_count();
// Iterate over all XEvents in the XPlane, and add the XStats to a new // Iterate over all XEvents in the XPlane, and add the XStats to a new
// MemoryProfileSnapshot if the EventType is kMemoryAllocation or // MemoryProfileSnapshot if the EventType is kMemoryAllocation or
// kMemoryDeallocation. // kMemoryDeallocation.
@ -181,9 +182,8 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
case StatType::kTfOp: case StatType::kTfOp:
metadata.tf_op_name = stat.StrOrRefValue(); metadata.tf_op_name = stat.StrOrRefValue();
break; break;
case StatType::kStepId: case StatType::kGroupId:
metadata.step_id = stat.IntValue(); metadata.step_id = stat.IntValue();
if (metadata.step_id != 0) (*step_count)[metadata.step_id]++;
break; break;
case StatType::kRegionType: case StatType::kRegionType:
metadata.region_type = stat.StrOrRefValue(); metadata.region_type = stat.StrOrRefValue();
@ -214,40 +214,21 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
return memory_profile; return memory_profile;
} }
// Sequentialize step ids for the memory profile. // Fix invalid step ids of snapshots at the beginning/end of the profile or at
void UpdateStepId(const tensorflow::protobuf::Map< // the step boundaries. The snapshots with invalid step ids at the beginning get
tensorflow::protobuf_int64 /*orig_step_id*/, // 0 for their step ids. Those at the step boundaries or at the end get the
tensorflow::protobuf_int64 /*count*/>& step_count, // previous snapshot's step id + 1.
PerAllocatorMemoryProfile* memory_profile) { void UpdateStepId(PerAllocatorMemoryProfile* memory_profile) {
// Map from original random step id to sequential step id. int64 last_valid_step_id = -1;
absl::flat_hash_map<int64 /*orig_step_id*/, int64 /*step_id*/> step_map; // Snapshots are already sorted in time.
constexpr int kUnknownStep = -2;
constexpr double kStepFilterRatio = 0.1; // Magic number for filtering.
tensorflow::protobuf_int64 max_step_count = 0;
for (const auto& step_and_count : step_count) {
max_step_count = std::max(max_step_count, step_and_count.second);
}
// Filter out noisy and incomplete original step ids.
for (const auto& step_and_count : step_count) {
if (static_cast<double>(step_and_count.second) / max_step_count >
kStepFilterRatio) {
step_map[step_and_count.first] = kUnknownStep;
}
}
// Update the step ids in memory_profile for this allocator.
int64 step_id = -1;
for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) { for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
DCHECK(snapshot.has_activity_metadata()); DCHECK(snapshot.has_activity_metadata());
// Convert the random step id to sequential step id. if (snapshot.mutable_activity_metadata()->step_id() == kInvalidStepId) {
int64 orig_step_id = snapshot.activity_metadata().step_id(); snapshot.mutable_activity_metadata()->set_step_id(last_valid_step_id + 1);
if (step_map.contains(orig_step_id) && } else {
step_map[orig_step_id] == kUnknownStep) { last_valid_step_id = snapshot.mutable_activity_metadata()->step_id();
step_map[orig_step_id] = ++step_id;
} }
snapshot.mutable_activity_metadata()->set_step_id(step_id);
} }
VLOG(2) << "Max sequential step id in profile: " << step_id;
} }
// Update the MemoryActivityMetadata for each deallocation event by copying from // Update the MemoryActivityMetadata for each deallocation event by copying from
@ -481,14 +462,14 @@ void ProcessMemoryProfileProto(int64 max_num_snapshots,
return a.time_offset_ps() < b.time_offset_ps(); return a.time_offset_ps() < b.time_offset_ps();
}); });
UpdateStepId(memory_profile->step_count(), allocator_memory_profile); UpdateStepId(allocator_memory_profile);
UpdateDeallocation(allocator_memory_profile); UpdateDeallocation(allocator_memory_profile);
int64 peak_bytes_profile = allocator_memory_profile->profile_summary()
.peak_stats()
.peak_bytes_in_use();
int64 peak_step_id = int64 peak_step_id =
GetPeakMemoryStep(peak_bytes_profile, allocator_memory_profile); GetPeakMemoryStep(allocator_memory_profile->profile_summary()
.peak_stats()
.peak_bytes_in_use(),
allocator_memory_profile);
ProcessActiveAllocations(peak_step_id, allocator_memory_profile); ProcessActiveAllocations(peak_step_id, allocator_memory_profile);
SampleSnapshots(max_num_snapshots, snapshots); SampleSnapshots(max_num_snapshots, snapshots);
} }

View File

@ -25,6 +25,7 @@ namespace profiler {
// Process the host threads XPlane and generate MemoryProfile result; at most // Process the host threads XPlane and generate MemoryProfile result; at most
// max_num_snapshots will be displayed on the UI. // max_num_snapshots will be displayed on the UI.
// REQUIRED: host_plane should have been grouped by calling GroupTfEvents().
MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane, MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane,
int64 max_num_snapshots = 1000); int64 max_num_snapshots = 1000);

View File

@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/core/platform/types.h" #include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h" #include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/profiler/utils/group_events.h"
#include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_builder.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "tensorflow/core/profiler/utils/xplane_test_utils.h" #include "tensorflow/core/profiler/utils/xplane_test_utils.h"
@ -84,11 +85,11 @@ TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) {
{StatType::kRegionType, "temp"}, {StatType::kRegionType, "temp"},
{StatType::kTensorShapes, "[1, 2]"}}); {StatType::kTensorShapes, "[1, 2]"}});
tensorflow::profiler::GroupTfEvents(&space, nullptr);
MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane); MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane);
EXPECT_EQ(memory_profile.memory_profile_per_allocator().size(), 1); EXPECT_EQ(memory_profile.memory_profile_per_allocator().size(), 1);
EXPECT_EQ(memory_profile.num_hosts(), 1); EXPECT_EQ(memory_profile.num_hosts(), 1);
EXPECT_EQ(memory_profile.memory_ids_size(), 1); EXPECT_EQ(memory_profile.memory_ids_size(), 1);
EXPECT_EQ(memory_profile.step_count().size(), 1);
EXPECT_EQ(memory_profile.memory_profile_per_allocator().begin()->first, EXPECT_EQ(memory_profile.memory_profile_per_allocator().begin()->first,
"GPU_0_bfc"); "GPU_0_bfc");
const auto& allocator_memory_profile = const auto& allocator_memory_profile =

View File

@ -122,7 +122,5 @@ message MemoryProfile {
// Ids for profiled memory allocators, used to populate memory selection list // Ids for profiled memory allocators, used to populate memory selection list
// at front end. // at front end.
repeated string memory_ids = 3; repeated string memory_ids = 3;
// Map of original random int64 step id to the count of memory activity events reserved 4;
// assigned with it.
map<int64 /*orig_step_id*/, int64 /*count*/> step_count = 4;
} }