From 3bb7d689d301cbbb5cd752f1ea3f19c7bdbc99df Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Wed, 27 May 2020 14:57:12 -0700 Subject: [PATCH] Populate the memory access breakdown information in OpMetrics. PiperOrigin-RevId: 313469967 Change-Id: Ib8783f7a4a81cbda79a32410440866946972a1f2 --- tensorflow/core/profiler/utils/op_utils.cc | 15 ++++++++------- tensorflow/core/profiler/utils/op_utils.h | 6 +++++- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/profiler/utils/op_utils.cc b/tensorflow/core/profiler/utils/op_utils.cc index 921e0617902..75789bc1071 100644 --- a/tensorflow/core/profiler/utils/op_utils.cc +++ b/tensorflow/core/profiler/utils/op_utils.cc @@ -63,13 +63,11 @@ void HostOpMetricsDbBuilder::UpdateHostInfeedEnqInfo( start_timestamp_ps_diff); } -void DeviceOpMetricsDbBuilder::EnterOp(uint64 program_id, - absl::string_view name, - absl::string_view category, - absl::string_view provenance, - bool is_eager, uint64 occurrences, - uint64 time_ps, uint64 children_time_ps, - int64 flops, int64 bytes_accessed) { +void DeviceOpMetricsDbBuilder::EnterOp( + uint64 program_id, absl::string_view name, absl::string_view category, + absl::string_view provenance, bool is_eager, uint64 occurrences, + uint64 time_ps, uint64 children_time_ps, int64 flops, int64 bytes_accessed, + const std::vector& memory_accessed_breakdown) { uint64 self_time_ps = time_ps - children_time_ps; DCHECK_GE(time_ps, self_time_ps); OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(program_id, name); @@ -89,6 +87,9 @@ void DeviceOpMetricsDbBuilder::EnterOp(uint64 program_id, op_metrics->bytes_accessed() + GetCappedPerf(bytes_accessed * occurrences, self_time_ps, peak_hbm_bw_giga_bytes_per_second_ / 1000)); + for (const auto& memory_accessed : memory_accessed_breakdown) { + *op_metrics->add_memory_accessed_breakdown() = memory_accessed; + } db()->set_total_op_time_ps(db()->total_op_time_ps() + self_time_ps); } diff --git a/tensorflow/core/profiler/utils/op_utils.h b/tensorflow/core/profiler/utils/op_utils.h index f94328d1b8d..9c9762853b8 100644 --- a/tensorflow/core/profiler/utils/op_utils.h +++ b/tensorflow/core/profiler/utils/op_utils.h @@ -69,10 +69,14 @@ class DeviceOpMetricsDbBuilder : public OpMetricsDbBuilder { // picoseconds. // flops = the number of floating-point operations computed. // bytes_accessed = the sum of bytes read and bytes written by this OP. + // memory_accessed_breakdown = the breakdown of memory accessed by operation + // type and memory space. void EnterOp(uint64 program_id, absl::string_view name, absl::string_view category, absl::string_view provenance, bool is_eager, uint64 occurrences, uint64 time_ps, - uint64 children_time_ps, int64 flops, int64 bytes_accessed); + uint64 children_time_ps, int64 flops, int64 bytes_accessed, + const std::vector& + memory_accessed_breakdown = {}); protected: // Peak performance of a TensorCore or a GPU in TFLOP/s.