From 81e97f2832cd2d7fb71f91d0e8258ef17031f8ea Mon Sep 17 00:00:00 2001
From: Frank Chen <frankchn@google.com>
Date: Mon, 8 Feb 2021 21:23:54 -0800
Subject: [PATCH] Add space for reporting memory bandwidth statistics

PiperOrigin-RevId: 356421433
Change-Id: I52f9d8db3a2fc8209a247f300e79287fd9fa2555
---
 tensorflow/core/kernels/data/BUILD           |  1 +
 tensorflow/core/kernels/data/iterator_ops.cc | 17 ++++++++++++++---
 tensorflow/core/platform/default/port.cc     |  2 +-
 tensorflow/core/platform/mem.h               |  1 +
 tensorflow/core/platform/windows/port.cc     |  2 +-
 5 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index e5adb2dfba2..448a5def807 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -463,6 +463,7 @@ tf_kernel_library(
         "//tensorflow/core:session_options",
         "//tensorflow/core/kernels:ops_util",
         "//tensorflow/core/profiler/lib:traceme",
+        "//tensorflow/core/profiler/lib:traceme_encode",
         "@com_google_absl//absl/memory",
     ],
 )
diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index f3d2382db1f..f79d8e7d8c7 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -47,10 +47,12 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/casts.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/mem.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/refcount.h"
 #include "tensorflow/core/platform/resource.h"
 #include "tensorflow/core/profiler/lib/traceme.h"
+#include "tensorflow/core/profiler/lib/traceme_encode.h"
 #include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
@@ -965,9 +967,18 @@ void RecordElementSize(const std::vector<Tensor> element,
 Status IteratorGetNextOp::DoCompute(OpKernelContext* ctx) {
   profiler::TraceMe traceme(
       [&] {
-        return strings::StrCat(
-            "IteratorGetNextOp::DoCompute#id=", ctx->step_id(),
-            ",iter_num=", ctx->frame_iter().iter_id, "#");
+        int64 mem_bw = port::GetMemoryInfo().bw_used;
+
+        if (mem_bw != INT64_MAX) {
+          return profiler::TraceMeEncode(
+              "IteratorGetNextOp::DoCompute",
+              {{"id", ctx->step_id()},
+               {"iter_num", ctx->frame_iter().iter_id},
+               {"mem_bw_used_megabytes_per_sec", mem_bw}});
+        }
+        return profiler::TraceMeEncode(
+            "IteratorGetNextOp::DoCompute",
+            {{"id", ctx->step_id()}, {"iter_num", ctx->frame_iter().iter_id}});
       },
       profiler::kInfo);
   tensorflow::ResourceTagger tag(kTFDataResourceTag,
diff --git a/tensorflow/core/platform/default/port.cc b/tensorflow/core/platform/default/port.cc
index e25ed074844..6e82c67be99 100644
--- a/tensorflow/core/platform/default/port.cc
+++ b/tensorflow/core/platform/default/port.cc
@@ -357,7 +357,7 @@ double NominalCPUFrequency() {
 }
 
 MemoryInfo GetMemoryInfo() {
-  MemoryInfo mem_info = {INT64_MAX, INT64_MAX};
+  MemoryInfo mem_info = {INT64_MAX, INT64_MAX, INT64_MAX};
 #if defined(__linux__) && !defined(__ANDROID__)
   struct sysinfo info;
   int err = sysinfo(&info);
diff --git a/tensorflow/core/platform/mem.h b/tensorflow/core/platform/mem.h
index 27ad3574182..e01d4953c62 100644
--- a/tensorflow/core/platform/mem.h
+++ b/tensorflow/core/platform/mem.h
@@ -62,6 +62,7 @@ std::size_t MallocExtension_GetAllocatedSize(const void* p);
 struct MemoryInfo {
   int64 total = 0;
   int64 free = 0;
+  int64 bw_used = 0;  // memory bandwidth used across all CPU (in MBs/second)
 };
 
 // Retrieves the host memory information. If any of the fields in the returned
diff --git a/tensorflow/core/platform/windows/port.cc b/tensorflow/core/platform/windows/port.cc
index 16b5a328256..256f525a38d 100644
--- a/tensorflow/core/platform/windows/port.cc
+++ b/tensorflow/core/platform/windows/port.cc
@@ -192,7 +192,7 @@ double NominalCPUFrequency() {
 }
 
 MemoryInfo GetMemoryInfo() {
-  MemoryInfo mem_info = {INT64_MAX, INT64_MAX};
+  MemoryInfo mem_info = {INT64_MAX, INT64_MAX, INT64_MAX};
   MEMORYSTATUSEX statex;
   statex.dwLength = sizeof(statex);
   if (GlobalMemoryStatusEx(&statex)) {