Use group_id as step_id.

PiperOrigin-RevId: 317353238
Change-Id: If52b2b4872c92d3f65af8f6ce1651e8c6da7dae7
This commit is contained in:
Jiho Choi 2020-06-19 12:06:36 -07:00 committed by TensorFlower Gardener
parent a8456eae42
commit 2229ae89c9
5 changed files with 25 additions and 43 deletions

View File

@ -525,6 +525,7 @@ tf_cc_test(
"//tensorflow/core:test_main",
"//tensorflow/core/profiler/protobuf:memory_profile_proto_cc",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"//tensorflow/core/profiler/utils:group_events",
"//tensorflow/core/profiler/utils:xplane_builder",
"//tensorflow/core/profiler/utils:xplane_schema",
"//tensorflow/core/profiler/utils:xplane_test_utils",

View File

@ -42,6 +42,8 @@ namespace profiler {
namespace {
constexpr int64 kInvalidStepId = -1;
// Index of the time-sorted memory_profile_snapshots list, and the
// MemoryActivityMetadata proto it contains.
using IndexMetaPair = std::pair<int64 /*index*/, const MemoryActivityMetadata*>;
@ -63,7 +65,7 @@ struct ActivityMetadata {
int64 allocation_bytes = 0;
uint64 address = 0;
absl::string_view tf_op_name;
int64 step_id = -1;
int64 step_id = kInvalidStepId;
absl::string_view region_type;
int64 data_type = 0;
absl::string_view tensor_shape;
@ -129,7 +131,6 @@ void UpdateProfileSummary(const AggregationStats& stats, int64 time_offset_ps,
MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
XPlaneVisitor plane = CreateTfXPlaneVisitor(host_trace);
MemoryProfile memory_profile;
auto* step_count = memory_profile.mutable_step_count();
// Iterate over all XEvents in the XPlane, and add the XStats to a new
// MemoryProfileSnapshot if the EventType is kMemoryAllocation or
// kMemoryDeallocation.
@ -181,9 +182,8 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
case StatType::kTfOp:
metadata.tf_op_name = stat.StrOrRefValue();
break;
case StatType::kStepId:
case StatType::kGroupId:
metadata.step_id = stat.IntValue();
if (metadata.step_id != 0) (*step_count)[metadata.step_id]++;
break;
case StatType::kRegionType:
metadata.region_type = stat.StrOrRefValue();
@ -214,40 +214,21 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
return memory_profile;
}
// Sequentialize step ids for the memory profile.
void UpdateStepId(const tensorflow::protobuf::Map<
tensorflow::protobuf_int64 /*orig_step_id*/,
tensorflow::protobuf_int64 /*count*/>& step_count,
PerAllocatorMemoryProfile* memory_profile) {
// Map from original random step id to sequential step id.
absl::flat_hash_map<int64 /*orig_step_id*/, int64 /*step_id*/> step_map;
constexpr int kUnknownStep = -2;
constexpr double kStepFilterRatio = 0.1; // Magic number for filtering.
tensorflow::protobuf_int64 max_step_count = 0;
for (const auto& step_and_count : step_count) {
max_step_count = std::max(max_step_count, step_and_count.second);
}
// Filter out noisy and incomplete original step ids.
for (const auto& step_and_count : step_count) {
if (static_cast<double>(step_and_count.second) / max_step_count >
kStepFilterRatio) {
step_map[step_and_count.first] = kUnknownStep;
}
}
// Update the step ids in memory_profile for this allocator.
int64 step_id = -1;
// Fix invalid step ids of snapshots at the beginning/end of the profile or at
// the step boundaries. The snapshots with invalid step ids at the beginning get
// 0 for their step ids. Those at the step boundaries or at the end get the
// previous snapshot's step id + 1.
void UpdateStepId(PerAllocatorMemoryProfile* memory_profile) {
int64 last_valid_step_id = -1;
// Snapshots are already sorted in time.
for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
DCHECK(snapshot.has_activity_metadata());
// Convert the random step id to sequential step id.
int64 orig_step_id = snapshot.activity_metadata().step_id();
if (step_map.contains(orig_step_id) &&
step_map[orig_step_id] == kUnknownStep) {
step_map[orig_step_id] = ++step_id;
if (snapshot.mutable_activity_metadata()->step_id() == kInvalidStepId) {
snapshot.mutable_activity_metadata()->set_step_id(last_valid_step_id + 1);
} else {
last_valid_step_id = snapshot.mutable_activity_metadata()->step_id();
}
snapshot.mutable_activity_metadata()->set_step_id(step_id);
}
VLOG(2) << "Max sequential step id in profile: " << step_id;
}
// Update the MemoryActivityMetadata for each deallocation event by copying from
@ -481,14 +462,14 @@ void ProcessMemoryProfileProto(int64 max_num_snapshots,
return a.time_offset_ps() < b.time_offset_ps();
});
UpdateStepId(memory_profile->step_count(), allocator_memory_profile);
UpdateStepId(allocator_memory_profile);
UpdateDeallocation(allocator_memory_profile);
int64 peak_bytes_profile = allocator_memory_profile->profile_summary()
.peak_stats()
.peak_bytes_in_use();
int64 peak_step_id =
GetPeakMemoryStep(peak_bytes_profile, allocator_memory_profile);
GetPeakMemoryStep(allocator_memory_profile->profile_summary()
.peak_stats()
.peak_bytes_in_use(),
allocator_memory_profile);
ProcessActiveAllocations(peak_step_id, allocator_memory_profile);
SampleSnapshots(max_num_snapshots, snapshots);
}

View File

@ -25,6 +25,7 @@ namespace profiler {
// Process the host threads XPlane and generate MemoryProfile result; at most
// max_num_snapshots will be displayed on the UI.
// REQUIRED: host_plane should have been grouped by calling GroupTfEvents().
MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane,
int64 max_num_snapshots = 1000);

View File

@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/profiler/utils/group_events.h"
#include "tensorflow/core/profiler/utils/xplane_builder.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "tensorflow/core/profiler/utils/xplane_test_utils.h"
@ -84,11 +85,11 @@ TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) {
{StatType::kRegionType, "temp"},
{StatType::kTensorShapes, "[1, 2]"}});
tensorflow::profiler::GroupTfEvents(&space, nullptr);
MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane);
EXPECT_EQ(memory_profile.memory_profile_per_allocator().size(), 1);
EXPECT_EQ(memory_profile.num_hosts(), 1);
EXPECT_EQ(memory_profile.memory_ids_size(), 1);
EXPECT_EQ(memory_profile.step_count().size(), 1);
EXPECT_EQ(memory_profile.memory_profile_per_allocator().begin()->first,
"GPU_0_bfc");
const auto& allocator_memory_profile =

View File

@ -122,7 +122,5 @@ message MemoryProfile {
// Ids for profiled memory allocators, used to populate memory selection list
// at front end.
repeated string memory_ids = 3;
// Map of original random int64 step id to the count of memory activity events
// assigned with it.
map<int64 /*orig_step_id*/, int64 /*count*/> step_count = 4;
reserved 4;
}