diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index 027d03ba152..dc73ed66bad 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -37,6 +37,7 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", + "//tensorflow/core/profiler/utils:op_metrics_db_utils", "//tensorflow/core/profiler/utils:time_utils", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", diff --git a/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc b/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc index 4ce14f54d47..23561169c4e 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc @@ -57,7 +57,7 @@ TfStatsTable GenerateTfStatsTable(const OpMetricsDb& host_tf_metrics_db, } double total_device_time_us = PicosToMicros(total_device_time_ps); for (const OpMetrics* metrics : SortedOpMetricsDb(device_tf_metrics_db)) { - if (exclude_idle && metrics->category() == "IDLE") continue; + if (exclude_idle && IsIdleOp(*metrics)) continue; TfStatsRecord* record = tf_stats_table.add_tf_stats_record(); *record = ConvertOpMetricsToTfStatsRecord( /*on_device=*/true, *metrics, ridge_point); @@ -73,7 +73,7 @@ TfStatsTable GenerateTfStatsTable(const OpMetricsDb& host_tf_metrics_db, double total_host_time_us = PicosToMicros(total_host_time_ps); for (const OpMetrics* metrics : tensorflow::profiler::SortedOpMetricsDb(host_tf_metrics_db)) { - if (exclude_idle && metrics->category() == "IDLE") continue; + if (exclude_idle && IsIdleOp(*metrics)) continue; TfStatsRecord* record = tf_stats_table.add_tf_stats_record(); *record = ConvertOpMetricsToTfStatsRecord( /*on_device=*/false, *metrics, ridge_point); diff --git a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc index 3c8d5525370..3ddc5227038 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/core/platform/test.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" +#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" #include "tensorflow/core/profiler/utils/time_utils.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" @@ -85,7 +86,7 @@ TEST(ConvertXPlaneToOpMetricsDb, HostOpMetricsDb) { EXPECT_EQ(NanosToPicos(kTfOp1DurationNs) * 2, op_1.time_ps()); const OpMetrics& idle = op_metrics.metrics_db().at(1); - EXPECT_EQ("IDLE", idle.name()); + EXPECT_EQ(kIdle, idle.name()); // Idle time is the gap between Op2 start and the end of Op1, which is 2000ns. EXPECT_EQ(NanosToPicos(2000), idle.time_ps()); @@ -149,7 +150,7 @@ TEST(ConvertXPlaneToOpMetricsDb, DeviceOpMetricsDb) { EXPECT_EQ(NanosToPicos(kTfOp2DurationNs), op_2.time_ps()); const OpMetrics& idle = op_metrics.metrics_db().at(2); - EXPECT_EQ("IDLE", idle.name()); + EXPECT_EQ(kIdle, idle.name()); // GPU is always busy in this example. EXPECT_EQ(NanosToPicos(0), idle.time_ps()); } diff --git a/tensorflow/core/profiler/utils/op_metrics_db_utils.cc b/tensorflow/core/profiler/utils/op_metrics_db_utils.cc index dee33f1d1ce..07d1be230f0 100644 --- a/tensorflow/core/profiler/utils/op_metrics_db_utils.cc +++ b/tensorflow/core/profiler/utils/op_metrics_db_utils.cc @@ -23,6 +23,9 @@ limitations under the License. namespace tensorflow { namespace profiler { + +const absl::string_view kIdle = "IDLE"; + namespace { class DeviceTfOpMetricsDbBuilder : public OpMetricsDbBuilder { @@ -85,9 +88,9 @@ uint64 IdleTimePs(const OpMetricsDb& metrics_db) { void AddIdleOp(OpMetricsDb* db) { uint64 idle_time_ps = IdleTimePs(*db); OpMetrics* metrics = db->add_metrics_db(); - metrics->set_name("IDLE"); - metrics->set_category("IDLE"); - metrics->set_occurrences(1); + metrics->set_name(string(kIdle)); + metrics->set_category(string(kIdle)); + metrics->set_occurrences(0); metrics->set_time_ps(idle_time_ps); metrics->set_self_time_ps(idle_time_ps); } @@ -102,9 +105,9 @@ OpMetricsDb CreateTfMetricsDbFromDeviceOpMetricsDb( builder.UpdateTfOpMetricsWithDeviceOpMetrics(tf_op.name, tf_op.type, device_op_metrics); } else { - DCHECK_EQ(device_op_metrics.name(), "IDLE"); + DCHECK(IsIdleOp(device_op_metrics)); if (with_idle) { - builder.UpdateTfOpMetricsWithDeviceOpMetrics("IDLE", "IDLE", + builder.UpdateTfOpMetricsWithDeviceOpMetrics(kIdle, kIdle, device_op_metrics); } } diff --git a/tensorflow/core/profiler/utils/op_metrics_db_utils.h b/tensorflow/core/profiler/utils/op_metrics_db_utils.h index a1f1a045cdd..7cb776abfe7 100644 --- a/tensorflow/core/profiler/utils/op_metrics_db_utils.h +++ b/tensorflow/core/profiler/utils/op_metrics_db_utils.h @@ -25,6 +25,10 @@ limitations under the License. namespace tensorflow { namespace profiler { + +// The name of OpMetrics to represent the idle time. +ABSL_CONST_INIT extern const absl::string_view kIdle; + // Helps build an op metrics database (borrowed). // Enables fast lookup of existing ops and prevents the creation of duplicate // ops. It is the user's responsibility to ensure an op metrics database @@ -67,6 +71,11 @@ uint64 IdleTimePs(const OpMetricsDb& metrics_db); // must have been set. void AddIdleOp(OpMetricsDb* db); +// Returns true if the given metrics represents idle time. +inline bool IsIdleOp(const OpMetrics& metrics) { + return metrics.name() == kIdle; +} + // Converts from the device op metrics to Tf-op metrics. OpMetricsDb CreateTfMetricsDbFromDeviceOpMetricsDb( const OpMetricsDb& device_op_metrics_db, bool with_idle = true);