Populate the memory access breakdown information in OpMetrics.
PiperOrigin-RevId: 313469967 Change-Id: Ib8783f7a4a81cbda79a32410440866946972a1f2
This commit is contained in:
parent
b3925cc9b6
commit
3bb7d689d3
@ -63,13 +63,11 @@ void HostOpMetricsDbBuilder::UpdateHostInfeedEnqInfo(
|
||||
start_timestamp_ps_diff);
|
||||
}
|
||||
|
||||
void DeviceOpMetricsDbBuilder::EnterOp(uint64 program_id,
|
||||
absl::string_view name,
|
||||
absl::string_view category,
|
||||
absl::string_view provenance,
|
||||
bool is_eager, uint64 occurrences,
|
||||
uint64 time_ps, uint64 children_time_ps,
|
||||
int64 flops, int64 bytes_accessed) {
|
||||
void DeviceOpMetricsDbBuilder::EnterOp(
|
||||
uint64 program_id, absl::string_view name, absl::string_view category,
|
||||
absl::string_view provenance, bool is_eager, uint64 occurrences,
|
||||
uint64 time_ps, uint64 children_time_ps, int64 flops, int64 bytes_accessed,
|
||||
const std::vector<OpMetrics::MemoryAccessed>& memory_accessed_breakdown) {
|
||||
uint64 self_time_ps = time_ps - children_time_ps;
|
||||
DCHECK_GE(time_ps, self_time_ps);
|
||||
OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(program_id, name);
|
||||
@ -89,6 +87,9 @@ void DeviceOpMetricsDbBuilder::EnterOp(uint64 program_id,
|
||||
op_metrics->bytes_accessed() +
|
||||
GetCappedPerf(bytes_accessed * occurrences, self_time_ps,
|
||||
peak_hbm_bw_giga_bytes_per_second_ / 1000));
|
||||
for (const auto& memory_accessed : memory_accessed_breakdown) {
|
||||
*op_metrics->add_memory_accessed_breakdown() = memory_accessed;
|
||||
}
|
||||
db()->set_total_op_time_ps(db()->total_op_time_ps() + self_time_ps);
|
||||
}
|
||||
|
||||
|
@ -69,10 +69,14 @@ class DeviceOpMetricsDbBuilder : public OpMetricsDbBuilder {
|
||||
// picoseconds.
|
||||
// flops = the number of floating-point operations computed.
|
||||
// bytes_accessed = the sum of bytes read and bytes written by this OP.
|
||||
// memory_accessed_breakdown = the breakdown of memory accessed by operation
|
||||
// type and memory space.
|
||||
void EnterOp(uint64 program_id, absl::string_view name,
|
||||
absl::string_view category, absl::string_view provenance,
|
||||
bool is_eager, uint64 occurrences, uint64 time_ps,
|
||||
uint64 children_time_ps, int64 flops, int64 bytes_accessed);
|
||||
uint64 children_time_ps, int64 flops, int64 bytes_accessed,
|
||||
const std::vector<OpMetrics::MemoryAccessed>&
|
||||
memory_accessed_breakdown = {});
|
||||
|
||||
protected:
|
||||
// Peak performance of a TensorCore or a GPU in TFLOP/s.
|
||||
|
Loading…
Reference in New Issue
Block a user