Populate the memory access breakdown information in OpMetrics.
PiperOrigin-RevId: 313469967 Change-Id: Ib8783f7a4a81cbda79a32410440866946972a1f2
This commit is contained in:
parent
b3925cc9b6
commit
3bb7d689d3
@ -63,13 +63,11 @@ void HostOpMetricsDbBuilder::UpdateHostInfeedEnqInfo(
|
|||||||
start_timestamp_ps_diff);
|
start_timestamp_ps_diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeviceOpMetricsDbBuilder::EnterOp(uint64 program_id,
|
void DeviceOpMetricsDbBuilder::EnterOp(
|
||||||
absl::string_view name,
|
uint64 program_id, absl::string_view name, absl::string_view category,
|
||||||
absl::string_view category,
|
absl::string_view provenance, bool is_eager, uint64 occurrences,
|
||||||
absl::string_view provenance,
|
uint64 time_ps, uint64 children_time_ps, int64 flops, int64 bytes_accessed,
|
||||||
bool is_eager, uint64 occurrences,
|
const std::vector<OpMetrics::MemoryAccessed>& memory_accessed_breakdown) {
|
||||||
uint64 time_ps, uint64 children_time_ps,
|
|
||||||
int64 flops, int64 bytes_accessed) {
|
|
||||||
uint64 self_time_ps = time_ps - children_time_ps;
|
uint64 self_time_ps = time_ps - children_time_ps;
|
||||||
DCHECK_GE(time_ps, self_time_ps);
|
DCHECK_GE(time_ps, self_time_ps);
|
||||||
OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(program_id, name);
|
OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(program_id, name);
|
||||||
@ -89,6 +87,9 @@ void DeviceOpMetricsDbBuilder::EnterOp(uint64 program_id,
|
|||||||
op_metrics->bytes_accessed() +
|
op_metrics->bytes_accessed() +
|
||||||
GetCappedPerf(bytes_accessed * occurrences, self_time_ps,
|
GetCappedPerf(bytes_accessed * occurrences, self_time_ps,
|
||||||
peak_hbm_bw_giga_bytes_per_second_ / 1000));
|
peak_hbm_bw_giga_bytes_per_second_ / 1000));
|
||||||
|
for (const auto& memory_accessed : memory_accessed_breakdown) {
|
||||||
|
*op_metrics->add_memory_accessed_breakdown() = memory_accessed;
|
||||||
|
}
|
||||||
db()->set_total_op_time_ps(db()->total_op_time_ps() + self_time_ps);
|
db()->set_total_op_time_ps(db()->total_op_time_ps() + self_time_ps);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,10 +69,14 @@ class DeviceOpMetricsDbBuilder : public OpMetricsDbBuilder {
|
|||||||
// picoseconds.
|
// picoseconds.
|
||||||
// flops = the number of floating-point operations computed.
|
// flops = the number of floating-point operations computed.
|
||||||
// bytes_accessed = the sum of bytes read and bytes written by this OP.
|
// bytes_accessed = the sum of bytes read and bytes written by this OP.
|
||||||
|
// memory_accessed_breakdown = the breakdown of memory accessed by operation
|
||||||
|
// type and memory space.
|
||||||
void EnterOp(uint64 program_id, absl::string_view name,
|
void EnterOp(uint64 program_id, absl::string_view name,
|
||||||
absl::string_view category, absl::string_view provenance,
|
absl::string_view category, absl::string_view provenance,
|
||||||
bool is_eager, uint64 occurrences, uint64 time_ps,
|
bool is_eager, uint64 occurrences, uint64 time_ps,
|
||||||
uint64 children_time_ps, int64 flops, int64 bytes_accessed);
|
uint64 children_time_ps, int64 flops, int64 bytes_accessed,
|
||||||
|
const std::vector<OpMetrics::MemoryAccessed>&
|
||||||
|
memory_accessed_breakdown = {});
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// Peak performance of a TensorCore or a GPU in TFLOP/s.
|
// Peak performance of a TensorCore or a GPU in TFLOP/s.
|
||||||
|
Loading…
Reference in New Issue
Block a user