From b0594e1b82180efe5b1d0558b4410137f3974b93 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 20 Apr 2017 21:32:47 -0800 Subject: [PATCH] [XLA] Fixes some div-by-zero bugs. Change: 153795265 --- .../xla/service/hlo_execution_profile.cc | 53 +++++++++++-------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc index 447892c8dec..9e25f1aceb1 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc @@ -70,6 +70,7 @@ string HloExecutionProfile::ToString( string result; const int64 total_cycles = total_cycles_executed(computation); double clock_rate_ghz = device_description.clock_rate_ghz(); + CHECK_GE(clock_rate_ghz, 1e-9); const auto cycles_to_microseconds = [&](double cycles) { return cycles / clock_rate_ghz / 1000.0; @@ -80,14 +81,19 @@ string HloExecutionProfile::ToString( double nsecs = cycles / clock_rate_ghz; string bytes_per_sec; string bytes_per_cycle; - if (bytes_accessed >= 0) { + if (cycles <= 0 || bytes_accessed < 0) { + bytes_per_sec = ""; + bytes_per_cycle = ""; + } else { bytes_per_sec = tensorflow::strings::HumanReadableNumBytes( bytes_accessed / (nsecs / 1e9)); bytes_per_cycle = tensorflow::strings::HumanReadableNumBytes(bytes_accessed / cycles); - } else { - bytes_per_sec = ""; - bytes_per_cycle = ""; + } + + double cycles_percent = 0; + if (total_cycles > 0) { + cycles_percent = cycles / static_cast(total_cycles) * 100; } tensorflow::strings::StrAppend( @@ -97,8 +103,7 @@ string HloExecutionProfile::ToString( ":: " "%12s/cycle :: " "%s", - cycles, cycles / static_cast(total_cycles) * 100, - cycles_to_microseconds(cycles), + cycles, cycles_percent, cycles_to_microseconds(cycles), flops <= 0 ? "" : HumanReadableNumFlops(flops, nsecs).c_str(), bytes_per_sec.c_str(), bytes_per_cycle.c_str(), name.c_str())); }; @@ -114,26 +119,30 @@ string HloExecutionProfile::ToString( for (const auto& item : items) { const HloInstruction* hlo = item.first; tensorflow::strings::StrAppend(&result, "\n\t"); - int64 flops = hlo == nullptr ? -1 : cost_analysis.flop_count(*hlo); - int64 bytes_accessed = - hlo == nullptr ? -1 : cost_analysis.bytes_accessed(*hlo); - string display = hlo == nullptr ? "" : hlo->ToString(); + const int64 flops = (hlo == nullptr) ? -1 : cost_analysis.flop_count(*hlo); + const int64 bytes_accessed = + (hlo == nullptr) ? -1 : cost_analysis.bytes_accessed(*hlo); + const string display = (hlo == nullptr) ? "" : hlo->ToString(); append_item(item.second, flops, bytes_accessed, display); } - MetricTableReport table; - table.SetMetricName("microseconds"); - table.SetEntryName("ops"); - table.SetShowCategoryTable(); - for (const auto& item : items) { - MetricTableReport::Entry entry; - entry.text = item.first->ToString(); - entry.short_text = item.first->ToString(/*compact_operands=*/true); - entry.category_text = item.first->ToCategory(); - entry.metric = cycles_to_microseconds(item.second); - table.AddEntry(std::move(entry)); + if (total_cycles <= 0) { + result += "****** 0 total cycles ******\n"; + } else { + MetricTableReport table; + table.SetMetricName("microseconds"); + table.SetEntryName("ops"); + table.SetShowCategoryTable(); + for (const auto& item : items) { + MetricTableReport::Entry entry; + entry.text = item.first->ToString(); + entry.short_text = item.first->ToString(/*compact_operands=*/true); + entry.category_text = item.first->ToCategory(); + entry.metric = cycles_to_microseconds(item.second); + table.AddEntry(std::move(entry)); + } + result += table.MakeReport(cycles_to_microseconds(total_cycles)); } - result += table.MakeReport(cycles_to_microseconds(total_cycles)); return result; }