Use group_id as step_id.
PiperOrigin-RevId: 317353238 Change-Id: If52b2b4872c92d3f65af8f6ce1651e8c6da7dae7
This commit is contained in:
parent
a8456eae42
commit
2229ae89c9
@ -525,6 +525,7 @@ tf_cc_test(
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core/profiler/protobuf:memory_profile_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:group_events",
|
||||
"//tensorflow/core/profiler/utils:xplane_builder",
|
||||
"//tensorflow/core/profiler/utils:xplane_schema",
|
||||
"//tensorflow/core/profiler/utils:xplane_test_utils",
|
||||
|
@ -42,6 +42,8 @@ namespace profiler {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int64 kInvalidStepId = -1;
|
||||
|
||||
// Index of the time-sorted memory_profile_snapshots list, and the
|
||||
// MemoryActivityMetadata proto it contains.
|
||||
using IndexMetaPair = std::pair<int64 /*index*/, const MemoryActivityMetadata*>;
|
||||
@ -63,7 +65,7 @@ struct ActivityMetadata {
|
||||
int64 allocation_bytes = 0;
|
||||
uint64 address = 0;
|
||||
absl::string_view tf_op_name;
|
||||
int64 step_id = -1;
|
||||
int64 step_id = kInvalidStepId;
|
||||
absl::string_view region_type;
|
||||
int64 data_type = 0;
|
||||
absl::string_view tensor_shape;
|
||||
@ -129,7 +131,6 @@ void UpdateProfileSummary(const AggregationStats& stats, int64 time_offset_ps,
|
||||
MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
|
||||
XPlaneVisitor plane = CreateTfXPlaneVisitor(host_trace);
|
||||
MemoryProfile memory_profile;
|
||||
auto* step_count = memory_profile.mutable_step_count();
|
||||
// Iterate over all XEvents in the XPlane, and add the XStats to a new
|
||||
// MemoryProfileSnapshot if the EventType is kMemoryAllocation or
|
||||
// kMemoryDeallocation.
|
||||
@ -181,9 +182,8 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
|
||||
case StatType::kTfOp:
|
||||
metadata.tf_op_name = stat.StrOrRefValue();
|
||||
break;
|
||||
case StatType::kStepId:
|
||||
case StatType::kGroupId:
|
||||
metadata.step_id = stat.IntValue();
|
||||
if (metadata.step_id != 0) (*step_count)[metadata.step_id]++;
|
||||
break;
|
||||
case StatType::kRegionType:
|
||||
metadata.region_type = stat.StrOrRefValue();
|
||||
@ -214,40 +214,21 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
|
||||
return memory_profile;
|
||||
}
|
||||
|
||||
// Sequentialize step ids for the memory profile.
|
||||
void UpdateStepId(const tensorflow::protobuf::Map<
|
||||
tensorflow::protobuf_int64 /*orig_step_id*/,
|
||||
tensorflow::protobuf_int64 /*count*/>& step_count,
|
||||
PerAllocatorMemoryProfile* memory_profile) {
|
||||
// Map from original random step id to sequential step id.
|
||||
absl::flat_hash_map<int64 /*orig_step_id*/, int64 /*step_id*/> step_map;
|
||||
constexpr int kUnknownStep = -2;
|
||||
constexpr double kStepFilterRatio = 0.1; // Magic number for filtering.
|
||||
tensorflow::protobuf_int64 max_step_count = 0;
|
||||
for (const auto& step_and_count : step_count) {
|
||||
max_step_count = std::max(max_step_count, step_and_count.second);
|
||||
}
|
||||
// Filter out noisy and incomplete original step ids.
|
||||
for (const auto& step_and_count : step_count) {
|
||||
if (static_cast<double>(step_and_count.second) / max_step_count >
|
||||
kStepFilterRatio) {
|
||||
step_map[step_and_count.first] = kUnknownStep;
|
||||
}
|
||||
}
|
||||
|
||||
// Update the step ids in memory_profile for this allocator.
|
||||
int64 step_id = -1;
|
||||
// Fix invalid step ids of snapshots at the beginning/end of the profile or at
|
||||
// the step boundaries. The snapshots with invalid step ids at the beginning get
|
||||
// 0 for their step ids. Those at the step boundaries or at the end get the
|
||||
// previous snapshot's step id + 1.
|
||||
void UpdateStepId(PerAllocatorMemoryProfile* memory_profile) {
|
||||
int64 last_valid_step_id = -1;
|
||||
// Snapshots are already sorted in time.
|
||||
for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
|
||||
DCHECK(snapshot.has_activity_metadata());
|
||||
// Convert the random step id to sequential step id.
|
||||
int64 orig_step_id = snapshot.activity_metadata().step_id();
|
||||
if (step_map.contains(orig_step_id) &&
|
||||
step_map[orig_step_id] == kUnknownStep) {
|
||||
step_map[orig_step_id] = ++step_id;
|
||||
if (snapshot.mutable_activity_metadata()->step_id() == kInvalidStepId) {
|
||||
snapshot.mutable_activity_metadata()->set_step_id(last_valid_step_id + 1);
|
||||
} else {
|
||||
last_valid_step_id = snapshot.mutable_activity_metadata()->step_id();
|
||||
}
|
||||
snapshot.mutable_activity_metadata()->set_step_id(step_id);
|
||||
}
|
||||
VLOG(2) << "Max sequential step id in profile: " << step_id;
|
||||
}
|
||||
|
||||
// Update the MemoryActivityMetadata for each deallocation event by copying from
|
||||
@ -481,14 +462,14 @@ void ProcessMemoryProfileProto(int64 max_num_snapshots,
|
||||
return a.time_offset_ps() < b.time_offset_ps();
|
||||
});
|
||||
|
||||
UpdateStepId(memory_profile->step_count(), allocator_memory_profile);
|
||||
UpdateStepId(allocator_memory_profile);
|
||||
UpdateDeallocation(allocator_memory_profile);
|
||||
|
||||
int64 peak_bytes_profile = allocator_memory_profile->profile_summary()
|
||||
.peak_stats()
|
||||
.peak_bytes_in_use();
|
||||
int64 peak_step_id =
|
||||
GetPeakMemoryStep(peak_bytes_profile, allocator_memory_profile);
|
||||
GetPeakMemoryStep(allocator_memory_profile->profile_summary()
|
||||
.peak_stats()
|
||||
.peak_bytes_in_use(),
|
||||
allocator_memory_profile);
|
||||
ProcessActiveAllocations(peak_step_id, allocator_memory_profile);
|
||||
SampleSnapshots(max_num_snapshots, snapshots);
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ namespace profiler {
|
||||
|
||||
// Process the host threads XPlane and generate MemoryProfile result; at most
|
||||
// max_num_snapshots will be displayed on the UI.
|
||||
// REQUIRED: host_plane should have been grouped by calling GroupTfEvents().
|
||||
MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane,
|
||||
int64 max_num_snapshots = 1000);
|
||||
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/group_events.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_builder.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_schema.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_test_utils.h"
|
||||
@ -84,11 +85,11 @@ TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) {
|
||||
{StatType::kRegionType, "temp"},
|
||||
{StatType::kTensorShapes, "[1, 2]"}});
|
||||
|
||||
tensorflow::profiler::GroupTfEvents(&space, nullptr);
|
||||
MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane);
|
||||
EXPECT_EQ(memory_profile.memory_profile_per_allocator().size(), 1);
|
||||
EXPECT_EQ(memory_profile.num_hosts(), 1);
|
||||
EXPECT_EQ(memory_profile.memory_ids_size(), 1);
|
||||
EXPECT_EQ(memory_profile.step_count().size(), 1);
|
||||
EXPECT_EQ(memory_profile.memory_profile_per_allocator().begin()->first,
|
||||
"GPU_0_bfc");
|
||||
const auto& allocator_memory_profile =
|
||||
|
@ -122,7 +122,5 @@ message MemoryProfile {
|
||||
// Ids for profiled memory allocators, used to populate memory selection list
|
||||
// at front end.
|
||||
repeated string memory_ids = 3;
|
||||
// Map of original random int64 step id to the count of memory activity events
|
||||
// assigned with it.
|
||||
map<int64 /*orig_step_id*/, int64 /*count*/> step_count = 4;
|
||||
reserved 4;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user