[Profiler] Fix a bug in the memory profiler that caused the disappearance of the TF ops in the memory breakdown table.

PiperOrigin-RevId: 325921811
Change-Id: I22877b935fa9edcbd66be5489c0b7da29d0276fc
This commit is contained in:
A. Unique TensorFlower 2020-08-10 17:23:57 -07:00 committed by TensorFlower Gardener
parent f621eebcff
commit 36d55f1c56
2 changed files with 81 additions and 14 deletions

View File

@ -514,6 +514,7 @@ cc_library(
"//tensorflow/core/profiler/utils:xplane_visitor",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/types:optional",

View File

@ -24,11 +24,13 @@ limitations under the License.
#include "absl/algorithm/container.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/protobuf.h"
#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
@ -424,23 +426,86 @@ void ProcessActiveAllocations(int64 peak_bytes_profile_step_id,
<< memory_profile->active_allocations_size();
}
struct Sample {
int64 orig_index; // original index to the snapshot.
MemoryProfileSnapshot* snapshot;
};
// This function samples max_num_snapshots from snapshots. We first keep the
// snapshots referenced by active_allocations in the samples. After this, if
// there is still room for more samples, we pick more from snapshots into the
// samples. Then, we sort the samples in time (so that they can be correctly
// displayed on the timeline). Finally, we need to adjust the original indices
// (to snapshots) in active_allocations to the new indices in the samples.
void SampleSnapshots(
int64 max_num_snapshots,
protobuf::RepeatedPtrField<MemoryProfileSnapshot>* snapshots) {
protobuf::RepeatedPtrField<MemoryProfileSnapshot>* snapshots,
protobuf::RepeatedPtrField<ActiveAllocation>* active_allocations) {
if (snapshots->size() <= max_num_snapshots) return;
absl::c_partial_sort(
*snapshots, snapshots->begin() + max_num_snapshots,
[](const MemoryProfileSnapshot& a, const MemoryProfileSnapshot& b) {
return a.aggregation_stats().free_memory_bytes() <
b.aggregation_stats().free_memory_bytes();
});
snapshots->erase(snapshots->begin() + max_num_snapshots, snapshots->end());
// Sort the memory_profile_snapshots by time_offset_ps (ascending) after
// sampling.
absl::c_sort(*snapshots, [](const MemoryProfileSnapshot& a,
const MemoryProfileSnapshot& b) {
return a.time_offset_ps() < b.time_offset_ps();
std::vector<Sample> samples;
// First, puts the snapshots referenced by active_allocations in samples[].
absl::flat_hash_set<int64> allocation_snapshot_indices;
for (const auto& allocation : *active_allocations) {
auto orig_index = allocation.snapshot_index();
if (orig_index < 0) continue;
allocation_snapshot_indices.insert(orig_index);
samples.push_back({orig_index, &(*snapshots)[orig_index]});
if (allocation_snapshot_indices.size() >= max_num_snapshots) break;
}
// Second, extracts remaining samples from snapshots.
int64 num_samples_remained =
max_num_snapshots - allocation_snapshot_indices.size();
if (num_samples_remained > 0) {
std::vector<Sample> remaining;
for (int64 i = 0; i < snapshots->size(); i++) {
if (allocation_snapshot_indices.contains(i)) continue;
// snapshots[i] is not yet sampled; put it in remaining[] for further
// consideration.
remaining.push_back({i, &(*snapshots)[i]});
}
// Moves the num_samples_remained snapshots with least free bytes to the
// beginning of remaining[].
absl::c_partial_sort(
remaining, remaining.begin() + num_samples_remained,
[](const Sample& a, const Sample& b) {
return a.snapshot->aggregation_stats().free_memory_bytes() <
b.snapshot->aggregation_stats().free_memory_bytes();
});
// Copies the first num_samples_remained in remaining[] to samples[].
for (int64 i = 0; i < num_samples_remained; i++)
samples.push_back(remaining[i]);
}
// Third, sorts samples[] in ascending order of time_offset_ps.
absl::c_sort(samples, [](const Sample& a, const Sample& b) {
return a.snapshot->time_offset_ps() < b.snapshot->time_offset_ps();
});
// Fourth, constructs a map from the original snapshot index to samples index.
absl::flat_hash_map</*original=*/int64, /*new=*/int64> index_map;
for (int64 i = 0; i < samples.size(); i++) {
index_map[samples[i].orig_index] = i;
}
// Fifth, changes the original snapshot indices in active_allocations to the
// sample indices.
for (auto& allocation : *active_allocations) {
auto orig_index = allocation.snapshot_index();
if (orig_index < 0) continue;
auto new_index = gtl::FindWithDefault(index_map, orig_index, -1);
allocation.set_snapshot_index(new_index);
}
// Sixth, replaces *snapshot by samples[]
protobuf::RepeatedPtrField<MemoryProfileSnapshot> new_snapshots;
new_snapshots.Reserve(samples.size());
for (const auto& sample : samples) {
*new_snapshots.Add() = std::move(*sample.snapshot);
}
*snapshots = std::move(new_snapshots);
}
// Post-process the memory profile to correctly update proto fields, and break
@ -478,7 +543,8 @@ void ProcessMemoryProfileProto(int64 max_num_snapshots,
.peak_bytes_in_use(),
allocator_memory_profile);
ProcessActiveAllocations(peak_step_id, allocator_memory_profile);
SampleSnapshots(max_num_snapshots, snapshots);
SampleSnapshots(max_num_snapshots, snapshots,
allocator_memory_profile->mutable_active_allocations());
}
}