Create ConvertXPlaneToMemoryProfile function for OSS TF memory profiler.

PiperOrigin-RevId: 309666664 Change-Id: If89abf5cf2dca667e7c712ecde0d086e793cfc1b
2020-05-03 17:51:12 -07:00 · 2020-05-03 17:51:12 -07:00 · 308bc07737
commit 308bc07737
parent 078aa26e41
8 changed files with 698 additions and 0 deletions
--- a/tensorflow/core/profiler/convert/BUILD
+++ b/tensorflow/core/profiler/convert/BUILD
@ -256,6 +256,7 @@ cc_library(
        ":op_stats_to_overview_page",
        ":op_stats_to_tf_stats",
        ":trace_events_to_json",
+        ":xplane_to_memory_profile",
        ":xplane_to_op_stats",
        ":xplane_to_trace_events",
        "//tensorflow/core:lib",
@ -263,11 +264,14 @@ cc_library(
        "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc",
        "//tensorflow/core/profiler/protobuf:input_pipeline_proto_cc",
        "//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:memory_profile_proto_cc",
        "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
        "//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
        "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc",
        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
        "//tensorflow/core/profiler/rpc/client:save_profile",
+        "//tensorflow/core/profiler/utils:xplane_schema",
+        "//tensorflow/core/profiler/utils:xplane_utils",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/strings",
    ],
@ -414,3 +418,37 @@ tf_cc_test(
        "//tensorflow/core/profiler/utils:xplane_utils",
    ],
 )
+
+cc_library(
+    name = "xplane_to_memory_profile",
+    srcs = ["xplane_to_memory_profile.cc"],
+    hdrs = ["xplane_to_memory_profile.h"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/platform:protobuf",
+        "//tensorflow/core/profiler/protobuf:memory_profile_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "//tensorflow/core/profiler/utils:tf_xplane_visitor",
+        "//tensorflow/core/profiler/utils:xplane_schema",
+        "//tensorflow/core/profiler/utils:xplane_utils",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+)
+
+tf_cc_test(
+    name = "xplane_to_memory_profile_test",
+    size = "small",
+    srcs = ["xplane_to_memory_profile_test.cc"],
+    deps = [
+        ":xplane_to_memory_profile",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core/profiler/utils:xplane_builder",
+        "//tensorflow/core/profiler/utils:xplane_schema",
+        "//tensorflow/core/profiler/utils:xplane_utils",
+    ],
+)
--- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc
@ -0,0 +1,467 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
+
+#include <cstddef>
+#include <string>
+#include <tuple>
+#include <utility>
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
+#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_utils.h"
+
+namespace tensorflow {
+namespace profiler {
+
+namespace {
+
+// Index of the time-sorted memory_profile_snapshots list, and the
+// MemoryActivityMetadata proto it contains.
+using IndexMetaPair = std::pair<int64 /*index*/, const MemoryActivityMetadata*>;
+
+// Aggregated memory stats from an allocator. Temporary container to fill
+// MemoryAggregationStats.
+struct AggregationStats {
+  int64 bytes_reserved = 0;
+  int64 bytes_allocated = 0;
+  int64 bytes_available = 0;
+  double fragmentation = 0;
+  int64 peak_bytes_in_use = 0;
+};
+
+// Metadata associated with each memory allocation/deallocation activity.
+// Temporary container to fill MemoryActivityMetadata.
+struct ActivityMetadata {
+  int64 requested_bytes = 0;
+  int64 allocation_bytes = 0;
+  uint64 address = 0;
+  absl::string_view tf_op_name;
+  int64 step_id = -1;
+  absl::string_view region_type;
+  int64 data_type = 0;
+  absl::string_view tensor_shape;
+};
+
+bool IsMemoryAllocation(int64 event_type) {
+  return event_type == HostEventType::kMemoryAllocation;
+}
+
+bool IsMemoryDeallocation(int64 event_type) {
+  return event_type == HostEventType::kMemoryDeallocation;
+}
+
+void FillAggregationStats(const AggregationStats& src,
+                          MemoryAggregationStats* dst) {
+  dst->set_stack_reserved_bytes(src.bytes_reserved);
+  dst->set_heap_allocated_bytes(src.bytes_allocated);
+  dst->set_free_memory_bytes(src.bytes_available);
+  dst->set_fragmentation(src.fragmentation);
+  dst->set_peak_bytes_in_use(src.peak_bytes_in_use);
+}
+
+void FillActivityMetadata(int64 event_type, const ActivityMetadata& src,
+                          MemoryActivityMetadata* dst) {
+  if (IsMemoryAllocation(event_type)) {
+    dst->set_memory_activity(ALLOCATION);
+  } else if (IsMemoryDeallocation(event_type)) {
+    dst->set_memory_activity(DEALLOCATION);
+  }
+  dst->set_requested_bytes(src.requested_bytes);
+  dst->set_allocation_bytes(src.allocation_bytes);
+  dst->set_address(src.address);
+  dst->set_tf_op_name(std::string(src.tf_op_name));
+  dst->set_step_id(src.step_id);
+  dst->set_region_type(std::string(src.region_type));
+  dst->set_data_type(tensorflow::DataTypeString(
+      static_cast<tensorflow::DataType>(src.data_type)));
+  dst->set_tensor_shape(std::string(src.tensor_shape));
+}
+
+void UpdateProfileSummary(const AggregationStats& stats, int64 time_offset_ps,
+                          MemoryProfileSummary* summary) {
+  // Update the peak memory usage over allocator's lifetime.
+  summary->set_peak_bytes_usage_lifetime(stats.peak_bytes_in_use);
+  MemoryAggregationStats* peak_stats = summary->mutable_peak_stats();
+  // If we reach (or stay at) peak memory usage within the profiling window,
+  // update memory profile summary.
+  if (stats.bytes_reserved + stats.bytes_allocated >=
+      peak_stats->peak_bytes_in_use()) {
+    peak_stats->set_peak_bytes_in_use(stats.bytes_reserved +
+                                      stats.bytes_allocated);
+    peak_stats->set_stack_reserved_bytes(stats.bytes_reserved);
+    peak_stats->set_heap_allocated_bytes(stats.bytes_allocated);
+    peak_stats->set_free_memory_bytes(stats.bytes_available);
+    peak_stats->set_fragmentation(stats.fragmentation);
+    summary->set_peak_stats_time_ps(time_offset_ps);
+    summary->set_memory_capacity(stats.bytes_reserved + stats.bytes_allocated +
+                                 stats.bytes_available);
+  }
+}
+
+// Generate memory profile proto by processing host trace XPlane.
+MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
+  XPlaneVisitor plane = CreateTfXPlaneVisitor(host_trace);
+  MemoryProfile memory_profile;
+  auto* step_count = memory_profile.mutable_step_count();
+  // Iterate over all XEvents in the XPlane, and add the XStats to a new
+  // MemoryProfileSnapshot if the EventType is kMemoryAllocation or
+  // kMemoryDeallocation.
+  plane.ForEachLine([&](const XLineVisitor& line) {
+    line.ForEachEvent([&](const XEventVisitor& event) {
+      int64 event_type = event.Type().value_or(kUnknownHostEventType);
+      if (!(IsMemoryAllocation(event_type) ||
+            IsMemoryDeallocation(event_type))) {
+        return;
+      }
+
+      AggregationStats stats;
+      ActivityMetadata metadata;
+      std::string memory_id;
+      event.ForEachStat([&](const XStatVisitor& stat) {
+        if (stat.Type() == StatType::kIndexOnHost ||
+            stat.Type() == StatType::kDeviceOrdinal) {
+          memory_id = absl::StrFormat("%d", stat.IntValue());
+        } else if (stat.Type() == StatType::kAllocatorName) {
+          memory_id = stat.ToString();
+        } else if (stat.Type() == StatType::kBytesReserved) {
+          stats.bytes_reserved = stat.IntValue();
+        } else if (stat.Type() == StatType::kBytesAllocated) {
+          stats.bytes_allocated = stat.IntValue();
+        } else if (stat.Type() == StatType::kBytesAvailable) {
+          stats.bytes_available = stat.IntValue();
+        } else if (stat.Type() == StatType::kFragmentation) {
+          stats.fragmentation = stat.DoubleValue();
+        } else if (stat.Type() == StatType::kPeakBytesInUse) {
+          stats.peak_bytes_in_use = stat.IntValue();
+        } else if (stat.Type() == StatType::kRequestedBytes) {
+          metadata.requested_bytes = stat.IntValue();
+        } else if (stat.Type() == StatType::kAllocationBytes) {
+          metadata.allocation_bytes = stat.IntValue();
+        } else if (stat.Type() == StatType::kAddress) {
+          metadata.address = stat.IntValue();
+        } else if (stat.Type() == StatType::kTfOp) {
+          metadata.tf_op_name = stat.StrValue();
+        } else if (stat.Type() == StatType::kStepId) {
+          metadata.step_id = stat.IntValue();
+          if (metadata.step_id != 0) (*step_count)[metadata.step_id]++;
+        } else if (stat.Type() == StatType::kRegionType) {
+          metadata.region_type = stat.StrValue();
+        } else if (stat.Type() == StatType::kDataType) {
+          metadata.data_type = stat.IntValue();
+        } else if (stat.Type() == StatType::kTensorShapes) {
+          metadata.tensor_shape = stat.StrValue();
+        }
+      });
+
+      MemoryProfileSnapshot* snapshot =
+          (*memory_profile.mutable_memory_profile_per_allocator())[memory_id]
+              .add_memory_profile_snapshots();
+      snapshot->set_time_offset_ps(event.OffsetPs());
+      FillAggregationStats(stats, snapshot->mutable_aggregation_stats());
+      FillActivityMetadata(event_type, metadata,
+                           snapshot->mutable_activity_metadata());
+
+      MemoryProfileSummary* summary =
+          (*memory_profile.mutable_memory_profile_per_allocator())[memory_id]
+              .mutable_profile_summary();
+      UpdateProfileSummary(stats, event.OffsetPs(), summary);
+    });
+  });
+  return memory_profile;
+}
+
+// Sequentialize step ids for the memory profile.
+void UpdateStepId(const tensorflow::protobuf::Map<
+                      tensorflow::protobuf_int64 /*orig_step_id*/,
+                      tensorflow::protobuf_int64 /*count*/>& step_count,
+                  PerAllocatorMemoryProfile* memory_profile) {
+  // Map from original random step id to sequential step id.
+  absl::flat_hash_map<int64 /*orig_step_id*/, int64 /*step_id*/> step_map;
+  constexpr int kUnknownStep = -2;
+  constexpr double kStepFilterRatio = 0.1;  // Magic number for filtering.
+  tensorflow::protobuf_int64 max_step_count = 0;
+  for (const auto& step_and_count : step_count) {
+    max_step_count = std::max(max_step_count, step_and_count.second);
+  }
+  // Filter out noisy and incomplete original step ids.
+  for (const auto& step_and_count : step_count) {
+    if (static_cast<double>(step_and_count.second) / max_step_count >
+        kStepFilterRatio) {
+      step_map[step_and_count.first] = kUnknownStep;
+    }
+  }
+
+  // Update the step ids in memory_profile for this allocator.
+  int64 step_id = -1;
+  for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
+    DCHECK(snapshot.has_activity_metadata());
+    // Convert the random step id to sequential step id.
+    int64 orig_step_id = snapshot.activity_metadata().step_id();
+    if (step_map.contains(orig_step_id) &&
+        step_map[orig_step_id] == kUnknownStep) {
+      step_map[orig_step_id] = ++step_id;
+    }
+    snapshot.mutable_activity_metadata()->set_step_id(step_id);
+  }
+  VLOG(2) << "Max sequential step id in profile: " << step_id;
+}
+
+// Update the MemoryActivityMetadata for each deallocation event by copying from
+// matching allocation.
+void UpdateDeallocation(PerAllocatorMemoryProfile* memory_profile) {
+  absl::flat_hash_map<uint64 /*address*/, const MemoryActivityMetadata*>
+      addr_metadata_map;
+  for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
+    // Match the deallocation with previous allocation based on address.
+    uint64 address = snapshot.activity_metadata().address();
+    if (snapshot.activity_metadata().memory_activity() == DEALLOCATION) {
+      if (addr_metadata_map.contains(address)) {
+        const MemoryActivityMetadata* alloc_meta = addr_metadata_map[address];
+        snapshot.mutable_activity_metadata()->set_tf_op_name(
+            alloc_meta->tf_op_name());
+        snapshot.mutable_activity_metadata()->set_region_type(
+            alloc_meta->region_type());
+        snapshot.mutable_activity_metadata()->set_data_type(
+            alloc_meta->data_type());
+        snapshot.mutable_activity_metadata()->set_tensor_shape(
+            alloc_meta->tensor_shape());
+        // In case of following (unexpected) deallocations to the same chunk
+        // address, leave the metadata as it is (empty or already captured).
+        addr_metadata_map.erase(address);
+      } else {
+        VLOG(2)
+            << "Can't find matching memory allocation for this deallocation: "
+            << snapshot.DebugString();
+      }
+    } else if (!addr_metadata_map.contains(address)) {  // Allocation.
+      addr_metadata_map[address] = &snapshot.activity_metadata();
+    } else {
+      VLOG(2) << "There are two allocations recorded for the same address: "
+              << address
+              << ". The later allocation event is: " << snapshot.DebugString();
+    }
+  }
+  VLOG(2) << "Number of allocations that cannot find matching dealloctions: "
+          << addr_metadata_map.size();
+}
+
+// Return the step id for the peak memory usage data point.
+int64 GetPeakMemoryStep(int64 peak_bytes_profile,
+                        const PerAllocatorMemoryProfile* memory_profile) {
+  int64 peak_bytes_profile_step_id = 0;
+  for (const auto& snapshot : memory_profile->memory_profile_snapshots()) {
+    // Get the step id of the peak memory usage.
+    if (peak_bytes_profile ==
+        snapshot.aggregation_stats().heap_allocated_bytes() +
+            snapshot.aggregation_stats().stack_reserved_bytes()) {
+      DCHECK(snapshot.has_activity_metadata());
+      peak_bytes_profile_step_id = snapshot.activity_metadata().step_id();
+    }
+  }
+  return peak_bytes_profile_step_id;
+}
+
+// Functor that compares (index, metadata) pair to sort in the order of
+// allocation bytes and requested bytes (descending), as well as TF Op name,
+// region type, data type, and tensor shape (ascending).
+struct MetadataComparator {
+  bool operator()(const IndexMetaPair& a, const IndexMetaPair& b) const {
+    const MemoryActivityMetadata* a_meta = a.second;
+    const MemoryActivityMetadata* b_meta = b.second;
+    DCHECK_NE(a_meta, nullptr);
+    DCHECK_NE(b_meta, nullptr);
+
+    auto lhs =
+        std::make_tuple(-a_meta->allocation_bytes(), -a_meta->requested_bytes(),
+                        a_meta->tf_op_name(), a_meta->region_type(),
+                        a_meta->data_type(), a_meta->tensor_shape());
+    auto rhs =
+        std::make_tuple(-b_meta->allocation_bytes(), -b_meta->requested_bytes(),
+                        b_meta->tf_op_name(), b_meta->region_type(),
+                        b_meta->data_type(), b_meta->tensor_shape());
+    return lhs < rhs;
+  }
+};
+
+// If applicable, add items into active_allocs vector and special_allocations
+// proto for the unmapped memory usage (in heap) and stack reservation at peak.
+void InsertSpecialAllocations(int64 unmapped_allocation_bytes, int64 step_id,
+                              PerAllocatorMemoryProfile* memory_profile,
+                              std::vector<IndexMetaPair>* active_allocs) {
+  int index = 0;
+  if (unmapped_allocation_bytes > 0) {
+    MemoryActivityMetadata* special_allocation =
+        memory_profile->add_special_allocations();
+    FillActivityMetadata(
+        HostEventType::kMemoryAllocation,
+        {unmapped_allocation_bytes, unmapped_allocation_bytes, 0,
+         "preallocated/unknown", step_id, "persist", 0, "unknown"},
+        special_allocation);
+    active_allocs->push_back({--index, special_allocation});
+  }
+  int64 stack_bytes =
+      memory_profile->profile_summary().peak_stats().stack_reserved_bytes();
+  if (stack_bytes > 0) {
+    MemoryActivityMetadata* special_allocation =
+        memory_profile->add_special_allocations();
+    FillActivityMetadata(
+        HostEventType::kMemoryAllocation,
+        {stack_bytes, stack_bytes, 0, "stack", step_id, "stack", 0, "unknown"},
+        special_allocation);
+    active_allocs->push_back({--index, special_allocation});
+  }
+}
+
+bool operator==(const IndexMetaPair& a, const IndexMetaPair& b) {
+  const MemoryActivityMetadata* a_meta = a.second;
+  const MemoryActivityMetadata* b_meta = b.second;
+  return a_meta->allocation_bytes() == b_meta->allocation_bytes() &&
+         a_meta->requested_bytes() == b_meta->requested_bytes() &&
+         a_meta->tf_op_name() == b_meta->tf_op_name() &&
+         a_meta->region_type() == b_meta->region_type() &&
+         a_meta->data_type() == b_meta->data_type() &&
+         a_meta->tensor_shape() == b_meta->tensor_shape();
+}
+
+// Generate the memory breakdown table of active allocations at the peak usage
+// (within profiling window) and fill each ActiveAllocation proto (i.e. a row).
+void ProcessActiveAllocations(int64 peak_bytes_profile_step_id,
+                              PerAllocatorMemoryProfile* memory_profile) {
+  int64 unmapped_allocation_bytes =
+      memory_profile->profile_summary().peak_stats().heap_allocated_bytes();
+  int64 unmapped_deallocation_bytes = 0;
+  absl::flat_hash_map<int64 /*address*/, IndexMetaPair> active_alloc_map;
+  // Only account for the memory activities in the step that includes peak
+  // memory usage.
+  for (int i = 0; i < memory_profile->memory_profile_snapshots_size(); i++) {
+    const auto& snapshot = memory_profile->memory_profile_snapshots().at(i);
+    DCHECK(snapshot.has_activity_metadata());
+    const MemoryActivityMetadata& metadata = snapshot.activity_metadata();
+    if (snapshot.time_offset_ps() >
+        memory_profile->profile_summary().peak_stats_time_ps())
+      break;
+    if (metadata.step_id() != peak_bytes_profile_step_id) continue;
+
+    if (metadata.memory_activity() == ALLOCATION) {
+      active_alloc_map[metadata.address()] = {i, &metadata};
+      unmapped_allocation_bytes -= metadata.allocation_bytes();
+    } else {
+      DCHECK_EQ(metadata.memory_activity(), DEALLOCATION);
+      if (active_alloc_map.contains(metadata.address())) {
+        active_alloc_map.erase(metadata.address());
+      } else {
+        unmapped_deallocation_bytes += metadata.allocation_bytes();
+      }
+      unmapped_allocation_bytes += metadata.allocation_bytes();
+    }
+  }
+  // This separates the persistent memory from the freed memory from last step's
+  // allocations.
+  unmapped_allocation_bytes -= unmapped_deallocation_bytes;
+
+  VLOG(2) << "unmapped_allocation_bytes=" << unmapped_allocation_bytes
+          << ", unmapped_deallocation_bytes=" << unmapped_deallocation_bytes;
+
+  // Using pair of (index, MemoryActivityMetadata*) so that we can sort by the
+  // metadata, and fetch metadata by indexing the time-sorted snapshots at
+  // frontend.
+  std::vector<IndexMetaPair> active_allocs;
+  for (const auto& address_and_index_meta : active_alloc_map) {
+    active_allocs.push_back(address_and_index_meta.second);
+  }
+
+  InsertSpecialAllocations(unmapped_allocation_bytes,
+                           peak_bytes_profile_step_id, memory_profile,
+                           &active_allocs);
+
+  std::sort(active_allocs.begin(), active_allocs.end(), MetadataComparator());
+
+  // Fill the sorted active_allocations proto messages at peak memory usage.
+  // Merge identical allocations and show occurrences.
+  for (int i = 0; i < active_allocs.size(); i++) {
+    ActiveAllocation* allocation = memory_profile->add_active_allocations();
+    allocation->set_snapshot_index(active_allocs[i].first);
+    if (active_allocs[i].first < 0) {
+      allocation->set_special_index(-active_allocs[i].first - 1);
+    } else {
+      allocation->set_special_index(-1);
+    }
+    allocation->set_num_occurrences(1);
+    while (i < active_allocs.size() - 1 &&
+           active_allocs[i] == active_allocs[i + 1]) {
+      allocation->set_num_occurrences(allocation->num_occurrences() + 1);
+      i++;
+    }
+  }
+
+  VLOG(2) << "Distinctive active allocation count="
+          << memory_profile->active_allocations_size();
+}
+
+// Post-process the memory profile to correctly update proto fields, and break
+// down peak memory usage for each allocator.
+void ProcessMemoryProfileProto(MemoryProfile* memory_profile) {
+  memory_profile->set_num_hosts(1);
+  // Add sorted memory ids within memory profile data to the selection list.
+  for (const auto& id_and_allocator_profile :
+       memory_profile->memory_profile_per_allocator()) {
+    if (!id_and_allocator_profile.second.memory_profile_snapshots().empty()) {
+      memory_profile->add_memory_ids(id_and_allocator_profile.first);
+    }
+  }
+  absl::c_sort(*memory_profile->mutable_memory_ids());
+
+  for (auto& id_and_allocator_profile :
+       *memory_profile->mutable_memory_profile_per_allocator()) {
+    PerAllocatorMemoryProfile* allocator_memory_profile =
+        &id_and_allocator_profile.second;
+    // Sort the memory_profile_snapshots by time_offset_ps (ascending) in proto.
+    absl::c_sort(
+        *allocator_memory_profile->mutable_memory_profile_snapshots(),
+        [](const MemoryProfileSnapshot& a, const MemoryProfileSnapshot& b) {
+          return a.time_offset_ps() < b.time_offset_ps();
+        });
+
+    UpdateStepId(memory_profile->step_count(), allocator_memory_profile);
+    UpdateDeallocation(allocator_memory_profile);
+
+    int64 peak_bytes_profile = allocator_memory_profile->profile_summary()
+                                   .peak_stats()
+                                   .peak_bytes_in_use();
+    int64 peak_step_id =
+        GetPeakMemoryStep(peak_bytes_profile, allocator_memory_profile);
+    ProcessActiveAllocations(peak_step_id, allocator_memory_profile);
+  }
+}
+
+}  // namespace
+
+MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane) {
+  MemoryProfile memory_profile = GenerateMemoryProfile(&host_plane);
+  ProcessMemoryProfileProto(&memory_profile);
+  return memory_profile;
+}
+
+}  // namespace profiler
+}  // namespace tensorflow
--- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.h
+++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.h
@ -0,0 +1,31 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_MEMORY_PROFILE_H_
+#define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_MEMORY_PROFILE_H_
+
+#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+
+namespace tensorflow {
+namespace profiler {
+
+// Process the host threads XPlane and generate MemoryProfile result.
+MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane);
+
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_MEMORY_PROFILE_H_
--- a/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc
@ -0,0 +1,117 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
+
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/profiler/utils/xplane_builder.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_utils.h"
+
+namespace tensorflow {
+namespace profiler {
+namespace {
+
+// Tests with a sample profile with multiple memory allocation and deallocation
+// activities within one memory allocator captured in host trace.
+TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) {
+  XSpace space;
+  XPlane* host_plane = space.add_planes();
+  XPlaneBuilder host_plane_builder(host_plane);
+  host_plane_builder.SetName(kHostThreads);
+  host_plane_builder.ReserveLines(1);
+
+  auto tf_executor_thread = host_plane_builder.GetOrCreateLine(0);
+  CreateXEventWithIntAndStringViewMetadataValue(
+      &host_plane_builder, &tf_executor_thread, "MemoryAllocation", 40000, 1000,
+      {{StatType::kBytesReserved, 2000},
+       {StatType::kBytesAllocated, 3000},
+       {StatType::kBytesAvailable, 5000},
+       {StatType::kPeakBytesInUse, 8500},
+       {StatType::kRequestedBytes, 200},
+       {StatType::kAllocationBytes, 256},
+       {StatType::kAddress, 222333},
+       {StatType::kStepId, -93746},
+       {StatType::kDataType, 1}},
+      {{StatType::kAllocatorName, "GPU_0_bfc"},
+       {StatType::kTfOp, "foo/bar"},
+       {StatType::kRegionType, "output"},
+       {StatType::kTensorShapes, "[3, 3, 512, 512]"}});
+
+  CreateXEventWithIntAndStringViewMetadataValue(
+      &host_plane_builder, &tf_executor_thread, "MemoryDeallocation", 50000,
+      1000,
+      {{StatType::kBytesReserved, 2000},
+       {StatType::kBytesAllocated, 2744},
+       {StatType::kBytesAvailable, 5256},
+       {StatType::kPeakBytesInUse, 8500},
+       {StatType::kRequestedBytes, 200},
+       {StatType::kAllocationBytes, 256},
+       {StatType::kAddress, 222333},
+       {StatType::kStepId, 0},
+       {StatType::kDataType, 0}},
+      {{StatType::kAllocatorName, "GPU_0_bfc"},
+       {StatType::kRegionType, ""},
+       {StatType::kTensorShapes, ""}});
+
+  CreateXEventWithIntAndStringViewMetadataValue(
+      &host_plane_builder, &tf_executor_thread, "MemoryAllocation", 70000, 1000,
+      {{StatType::kBytesReserved, 2000},
+       {StatType::kBytesAllocated, 5000},
+       {StatType::kBytesAvailable, 3000},
+       {StatType::kPeakBytesInUse, 9500},
+       {StatType::kRequestedBytes, 300},
+       {StatType::kAllocationBytes, 300},
+       {StatType::kAddress, 345678},
+       {StatType::kStepId, -93746},
+       {StatType::kDataType, 9}},
+      {{StatType::kAllocatorName, "GPU_0_bfc"},
+       {StatType::kTfOp, "mul_grad/Sum"},
+       {StatType::kRegionType, "temp"},
+       {StatType::kTensorShapes, "[1, 2]"}});
+
+  MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane);
+  EXPECT_EQ(memory_profile.memory_profile_per_allocator().size(), 1);
+  EXPECT_EQ(memory_profile.num_hosts(), 1);
+  EXPECT_EQ(memory_profile.memory_ids_size(), 1);
+  EXPECT_EQ(memory_profile.step_count().size(), 1);
+  EXPECT_EQ(memory_profile.memory_profile_per_allocator().begin()->first,
+            "GPU_0_bfc");
+  const auto& allocator_memory_profile =
+      memory_profile.memory_profile_per_allocator().begin()->second;
+  EXPECT_EQ(
+      allocator_memory_profile.profile_summary().peak_bytes_usage_lifetime(),
+      9500);
+  EXPECT_EQ(allocator_memory_profile.profile_summary()
+                .peak_stats()
+                .peak_bytes_in_use(),
+            7000);
+  EXPECT_EQ(allocator_memory_profile.profile_summary().peak_stats_time_ps(),
+            70000);
+  EXPECT_EQ(allocator_memory_profile.memory_profile_snapshots_size(), 3);
+  EXPECT_EQ(allocator_memory_profile.active_allocations_size(), 3);
+  EXPECT_EQ(
+      allocator_memory_profile.active_allocations().at(2).snapshot_index(), 2);
+  EXPECT_EQ(allocator_memory_profile.special_allocations_size(), 2);
+  EXPECT_EQ(allocator_memory_profile.special_allocations().at(1).tf_op_name(),
+            "stack");
+  EXPECT_EQ(
+      allocator_memory_profile.special_allocations().at(1).allocation_bytes(),
+      2000);
+}
+
+}  // namespace
+}  // namespace profiler
+}  // namespace tensorflow
--- a/tensorflow/core/profiler/convert/xplane_to_profile_response.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_profile_response.cc
@ -21,17 +21,21 @@ limitations under the License.
 #include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h"
 #include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h"
 #include "tensorflow/core/profiler/convert/trace_events_to_json.h"
+#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
 #include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
 #include "tensorflow/core/profiler/convert/xplane_to_trace_events.h"
 #include "tensorflow/core/profiler/profiler_service.pb.h"
 #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
 #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
 #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
 #include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
 #include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
 #include "tensorflow/core/profiler/protobuf/tf_stats.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/rpc/client/save_profile.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_utils.h"

 namespace tensorflow {
 namespace profiler {
@ -42,6 +46,7 @@ const absl::string_view kTensorflowStats = "tensorflow_stats";
 const absl::string_view kInputPipeline = "input_pipeline";
 const absl::string_view kOverviewPage = "overview_page";
 const absl::string_view kKernelStats = "kernel_stats";
+const absl::string_view kMemoryProfile = "memory_profile";

 HardwareType HardwareTypeFromRunEnvironment(const RunEnvironment& run_env) {
  if (run_env.device_type() == "GPU") return HardwareType::GPU;
@ -107,6 +112,12 @@ Status ConvertXSpaceToProfileResponse(const XSpace& xspace,
  if (tools.contains(kKernelStats)) {
    AddToolData(ToolName(kKernelStats), op_stats.kernel_stats_db(), response);
  }
+  if (tools.contains(kMemoryProfile)) {
+    if (const XPlane* host_plane = FindPlaneWithName(xspace, kHostThreads)) {
+      MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane);
+      AddToolData(ToolName(kMemoryProfile), memory_profile, response);
+    }
+  }
  return Status::OK();
 }

--- a/tensorflow/core/profiler/protobuf/memory_profile.proto
+++ b/tensorflow/core/profiler/protobuf/memory_profile.proto
@ -122,4 +122,7 @@ message MemoryProfile {
  // Ids for profiled memory allocators, used to populate memory selection list
  // at front end.
  repeated string memory_ids = 3;
+  // Map of original random int64 step id to the count of memory activity events
+  // assigned with it.
+  map<int64 /*orig_step_id*/, int64 /*count*/> step_count = 4;
 }
--- a/tensorflow/core/profiler/utils/xplane_utils.cc
+++ b/tensorflow/core/profiler/utils/xplane_utils.cc
@ -171,6 +171,29 @@ XEventBuilder CreateXEventWithStringViewMetadataValue(
  return event_builder;
 }

+XEventBuilder CreateXEventWithIntAndStringViewMetadataValue(
+    XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
+    absl::string_view event_name, int64 offset_ps, int64 duration_ps,
+    const absl::flat_hash_map<StatType, int64 /*stat_value*/>& int_stats,
+    const absl::flat_hash_map<StatType, absl::string_view /*stat_value*/>&
+        str_stats) {
+  auto event_builder = line_builder->AddEvent(
+      *plane_builder->GetOrCreateEventMetadata(event_name));
+  event_builder.SetOffsetPs(offset_ps);
+  event_builder.SetDurationPs(duration_ps);
+  for (const auto& stat_type_and_value : int_stats) {
+    event_builder.AddStatValue(*plane_builder->GetOrCreateStatMetadata(
+                                   GetStatTypeStr(stat_type_and_value.first)),
+                               stat_type_and_value.second);
+  }
+  for (const auto& stat_type_and_value : str_stats) {
+    event_builder.AddStatValue(*plane_builder->GetOrCreateStatMetadata(
+                                   GetStatTypeStr(stat_type_and_value.first)),
+                               stat_type_and_value.second);
+  }
+  return event_builder;
+}
+
 void RemovePlaneWithName(XSpace* space, absl::string_view name) {
  auto* planes = space->mutable_planes();
  planes->erase(
--- a/tensorflow/core/profiler/utils/xplane_utils.h
+++ b/tensorflow/core/profiler/utils/xplane_utils.h
@ -63,6 +63,14 @@ XEventBuilder CreateXEventWithStringViewMetadataValue(
    const absl::flat_hash_map<StatType, absl::string_view /*stat_value*/>&
        stats);

+// Creates an XEvent with int64 and string stats.
+XEventBuilder CreateXEventWithIntAndStringViewMetadataValue(
+    XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
+    absl::string_view event_name, int64 offset_ps, int64 duration_ps,
+    const absl::flat_hash_map<StatType, int64 /*stat_value*/>& int_stats,
+    const absl::flat_hash_map<StatType, absl::string_view /*stat_value*/>&
+        str_stats);
+
 void RemovePlaneWithName(XSpace* space, absl::string_view name);
 void RemoveEmptyPlanes(XSpace* space);
 void RemoveEmptyLines(XPlane* plane);