Split the memory bandwidth reporting into its own function so that users trying to get memory usage information won't incur additional calls

PiperOrigin-RevId: 357122076 Change-Id: Ia23cbb2e06c36cf23a8b0f3f31ffb9d62d780b56
2021-02-11 21:41:51 -08:00 · 2021-02-11 21:41:51 -08:00 · 2ad0a33259
commit 2ad0a33259
parent 0f270839dc
5 changed files with 33 additions and 3 deletions
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@ -463,6 +463,7 @@ tf_kernel_library(
        "//tensorflow/core:session_options",
        "//tensorflow/core/kernels:ops_util",
        "//tensorflow/core/profiler/lib:traceme",
+        "//tensorflow/core/profiler/lib:traceme_encode",
        "@com_google_absl//absl/memory",
    ],
 )
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@ -47,10 +47,12 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/casts.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/mem.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/refcount.h"
 #include "tensorflow/core/platform/resource.h"
 #include "tensorflow/core/profiler/lib/traceme.h"
+#include "tensorflow/core/profiler/lib/traceme_encode.h"
 #include "tensorflow/core/public/session_options.h"

 namespace tensorflow {
@ -965,9 +967,18 @@ void RecordElementSize(const std::vector<Tensor> element,
 Status IteratorGetNextOp::DoCompute(OpKernelContext* ctx) {
  profiler::TraceMe traceme(
      [&] {
-        return strings::StrCat(
-            "IteratorGetNextOp::DoCompute#id=", ctx->step_id(),
-            ",iter_num=", ctx->frame_iter().iter_id, "#");
+        int64 mem_bw = port::GetMemoryBandwidthInfo().bw_used;
+
+        if (mem_bw != INT64_MAX) {
+          return profiler::TraceMeEncode(
+              "IteratorGetNextOp::DoCompute",
+              {{"id", ctx->step_id()},
+               {"iter_num", ctx->frame_iter().iter_id},
+               {"mem_bw_used_megabytes_per_sec", mem_bw}});
+        }
+        return profiler::TraceMeEncode(
+            "IteratorGetNextOp::DoCompute",
+            {{"id", ctx->step_id()}, {"iter_num", ctx->frame_iter().iter_id}});
      },
      profiler::kInfo);
  tensorflow::ResourceTagger tag(kTFDataResourceTag,
--- a/tensorflow/core/platform/default/port.cc
+++ b/tensorflow/core/platform/default/port.cc
@ -369,5 +369,10 @@ MemoryInfo GetMemoryInfo() {
  return mem_info;
 }

+MemoryBandwidthInfo GetMemoryBandwidthInfo() {
+  MemoryBandwidthInfo membw_info = {INT64_MAX};
+  return membw_info;
+}
+
 }  // namespace port
 }  // namespace tensorflow
--- a/tensorflow/core/platform/mem.h
+++ b/tensorflow/core/platform/mem.h
@ -64,11 +64,19 @@ struct MemoryInfo {
  int64 free = 0;
 };

+struct MemoryBandwidthInfo {
+  int64 bw_used = 0;  // memory bandwidth used across all CPU (in MBs/second)
+};
+
 // Retrieves the host memory information. If any of the fields in the returned
 // MemoryInfo structure is INT64_MAX, it means such information is not
 // available.
 MemoryInfo GetMemoryInfo();

+// Retrieves the host memory bandwidth information. If any field in the returned
+// structure is INT64_MAX, it means such information is not available.
+MemoryBandwidthInfo GetMemoryBandwidthInfo();
+
 // Returns the amount of RAM available in bytes, or INT64_MAX if unknown.
 static inline int64 AvailableRam() { return GetMemoryInfo().free; }

--- a/tensorflow/core/platform/windows/port.cc
+++ b/tensorflow/core/platform/windows/port.cc
@ -202,6 +202,11 @@ MemoryInfo GetMemoryInfo() {
  return mem_info;
 }

+MemoryBandwidthInfo GetMemoryBandwidthInfo() {
+  MemoryBandwidthInfo membw_info = {INT64_MAX};
+  return membw_info;
+}
+
 int NumHyperthreadsPerCore() {
  static const int ht_per_core = tensorflow::port::CPUIDNumSMT();
  return (ht_per_core > 0) ? ht_per_core : 1;