Add XSpace to OpStats converter for GPU.

PiperOrigin-RevId: 291187224
Change-Id: Ibcbaeaf1cc4fc8d2fbf2be77bba0145ce0b1e4ae
This commit is contained in:
A. Unique TensorFlower 2020-01-23 09:52:28 -08:00 committed by TensorFlower Gardener
parent e9b0344dfa
commit c3a2c28c9c
9 changed files with 336 additions and 1 deletions

View File

@ -27,6 +27,21 @@ cc_library(
],
)
tf_cc_test(
name = "xplane_to_op_metrics_db_test",
size = "small",
srcs = ["xplane_to_op_metrics_db_test.cc"],
deps = [
":xplane_to_op_metrics_db",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
"//tensorflow/core/profiler/utils:time_utils",
"//tensorflow/core/profiler/utils:xplane_builder",
"//tensorflow/core/profiler/utils:xplane_schema",
],
)
cc_library(
name = "run_metadata_to_trace_events",
srcs = ["run_metadata_to_trace_events.cc"],
@ -151,10 +166,31 @@ cc_library(
hdrs = ["xplane_to_op_stats.h"],
deps = [
":xplane_to_op_metrics_db",
"//tensorflow/core:lib",
"//tensorflow/core/profiler/protobuf:hardware_types_proto_cc",
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"//tensorflow/core/profiler/utils:hardware_type_utils",
"//tensorflow/core/profiler/utils:xplane_schema",
"//tensorflow/core/profiler/utils:xplane_utils",
"//tensorflow/core/profiler/utils:xplane_visitor",
],
)
tf_cc_test(
name = "xplane_to_op_stats_test",
size = "small",
srcs = ["xplane_to_op_stats_test.cc"],
deps = [
":xplane_to_op_stats",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/profiler/utils:xplane_builder",
"//tensorflow/core/profiler/utils:xplane_schema",
],
)

View File

@ -219,6 +219,7 @@ OpMetricsDb ConvertDeviceTraceXPlaneToOpMetricsDb(
});
});
result.set_total_time_ps(last_op_offset_ps - first_op_offset_ps);
AddIdleOp(&result);
return result;
}

View File

@ -0,0 +1,159 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
#include "tensorflow/core/profiler/utils/time_utils.h"
#include "tensorflow/core/profiler/utils/xplane_builder.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
namespace tensorflow {
namespace profiler {
namespace {
void AddTensorFlowOpEvent(absl::string_view tf_op_fullname,
int64 start_timestamp_ns, int64 duration_ns,
bool on_device, XPlaneBuilder* plane,
XLineBuilder* line) {
XEventBuilder event =
line->AddEvent(*plane->GetOrCreateEventMetadata(tf_op_fullname));
event.SetTimestampNs(start_timestamp_ns);
event.SetDurationNs(duration_ns);
if (!on_device) return;
event.ParseAndAddStatValue(*plane->GetOrCreateStatMetadata("level 0"),
tf_op_fullname);
}
void SetXPlaneNameAndId(absl::string_view name, int64 id,
XPlaneBuilder* plane) {
plane->SetName(name);
plane->SetId(id);
}
TEST(ConvertXPlaneToOpMetricsDb, HostOpMetricsDb) {
static constexpr char TfOp1[] = "TfOp1";
static constexpr char TfOp2[] = "TfOp2";
constexpr int64 kTfOp1StartNs = 100000;
constexpr int64 kTfOp1DurationNs = 8000;
constexpr int64 kTfOp2StartNs = 110000;
constexpr int64 kTfOp2DurationNs = 10000;
XPlane xplane;
XPlaneBuilder host_plane(&xplane);
SetXPlaneNameAndId(kHostThreads, /*id=*/0, &host_plane);
XLineBuilder thread1 = host_plane.GetOrCreateLine(/*line_id=*/10);
AddTensorFlowOpEvent(absl::StrCat(TfOp1, ":", TfOp1), kTfOp1StartNs,
kTfOp1DurationNs, /*on_device=*/false, &host_plane,
&thread1);
XLineBuilder thread2 = host_plane.GetOrCreateLine(/*line_id=*/20);
AddTensorFlowOpEvent(absl::StrCat(TfOp1, ":", TfOp1), kTfOp1StartNs,
kTfOp1DurationNs, /*on_device=*/false, &host_plane,
&thread2);
AddTensorFlowOpEvent(absl::StrCat(TfOp2, ":", TfOp2), kTfOp2StartNs,
kTfOp2DurationNs, /*on_device=*/false, &host_plane,
&thread2);
OpMetricsDb op_metrics = ConvertHostThreadsXPlaneToOpMetricsDb(xplane);
// Op1, Op2, Idle.
EXPECT_EQ(3, op_metrics.metrics_db_size());
uint64 total_op_duration =
NanosToPicos(kTfOp1DurationNs * 2 + kTfOp2DurationNs);
EXPECT_EQ(total_op_duration, op_metrics.total_op_time_ps());
uint64 total_duration = NanosToPicos(kTfOp2StartNs - kTfOp1StartNs +
kTfOp2DurationNs + kTfOp1DurationNs);
EXPECT_EQ(total_duration, op_metrics.total_time_ps());
// Verifies OpMetricsDb is built correctly.
const OpMetrics& op_1 = op_metrics.metrics_db().at(0);
EXPECT_EQ(TfOp1, op_1.name());
EXPECT_EQ(TfOp1, op_1.category());
EXPECT_EQ(2, op_1.occurrences());
EXPECT_EQ(NanosToPicos(kTfOp1DurationNs) * 2, op_1.time_ps());
const OpMetrics& idle = op_metrics.metrics_db().at(1);
EXPECT_EQ("IDLE", idle.name());
// Idle time is the gap between Op2 start and the end of Op1, which is 2000ns.
EXPECT_EQ(NanosToPicos(2000), idle.time_ps());
const OpMetrics& op_2 = op_metrics.metrics_db().at(2);
EXPECT_EQ(TfOp2, op_2.name());
EXPECT_EQ(TfOp2, op_2.category());
EXPECT_EQ(1, op_2.occurrences());
EXPECT_EQ(NanosToPicos(kTfOp2DurationNs), op_2.time_ps());
}
TEST(ConvertXPlaneToOpMetricsDb, DeviceOpMetricsDb) {
static constexpr char TfOp1[] = "TfOp1";
static constexpr char TfOp2[] = "TfOp2";
constexpr int64 kTfOp1StartNs = 100000;
constexpr int64 kTfOp1DurationNs = 8000;
constexpr int64 kTfOp2StartNs = 110000;
constexpr int64 kTfOp2DurationNs = 10000;
XPlane xplane;
XPlaneBuilder device_plane(&xplane);
SetXPlaneNameAndId(absl::StrCat(kGpuPlanePrefix, ":0"), /*id=*/1,
&device_plane);
XLineBuilder stream1 = device_plane.GetOrCreateLine(/*line_id=*/10);
AddTensorFlowOpEvent(absl::StrCat(TfOp1, ":", TfOp1), kTfOp1StartNs,
kTfOp1DurationNs, /*on_device=*/true, &device_plane,
&stream1);
XLineBuilder stream2 = device_plane.GetOrCreateLine(/*line_id=*/20);
AddTensorFlowOpEvent(absl::StrCat(TfOp1, ":", TfOp1), kTfOp1StartNs,
kTfOp1DurationNs, /*on_device=*/true, &device_plane,
&stream2);
AddTensorFlowOpEvent(absl::StrCat(TfOp2, ":", TfOp2), kTfOp2StartNs,
kTfOp2DurationNs, /*on_device=*/true, &device_plane,
&stream2);
OpMetricsDb op_metrics = ConvertDeviceTraceXPlaneToOpMetricsDb(
xplane, /*peak_tera_flops_per_second=*/0,
/*peak_hbm_bw_giga_bytes_per_second=*/0);
// Op1, Op2, Idle.
EXPECT_EQ(3, op_metrics.metrics_db_size());
uint64 total_op_duration =
NanosToPicos(kTfOp1DurationNs * 2 + kTfOp2DurationNs);
EXPECT_EQ(total_op_duration, op_metrics.total_op_time_ps());
// For device, the total_duration for each device is the total duration merged
// from all GPU streams, which is from 100000 to 120000.
uint64 total_duration =
NanosToPicos(kTfOp2StartNs + kTfOp2DurationNs - kTfOp1StartNs);
EXPECT_EQ(total_duration, op_metrics.total_time_ps());
// Verifies OpMetricsDb is built correctly.
const OpMetrics& op_1 = op_metrics.metrics_db().at(0);
EXPECT_EQ(TfOp1, op_1.name());
EXPECT_EQ(TfOp1, op_1.category());
EXPECT_EQ(2, op_1.occurrences());
EXPECT_EQ(NanosToPicos(kTfOp1DurationNs) * 2, op_1.time_ps());
const OpMetrics& op_2 = op_metrics.metrics_db().at(1);
EXPECT_EQ(TfOp2, op_2.name());
EXPECT_EQ(TfOp2, op_2.category());
EXPECT_EQ(1, op_2.occurrences());
EXPECT_EQ(NanosToPicos(kTfOp2DurationNs), op_2.time_ps());
const OpMetrics& idle = op_metrics.metrics_db().at(2);
EXPECT_EQ("IDLE", idle.name());
// GPU is always busy in this example.
EXPECT_EQ(NanosToPicos(0), idle.time_ps());
}
} // namespace
} // namespace profiler
} // namespace tensorflow

View File

@ -15,19 +15,77 @@ limitations under the License.
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
#include "tensorflow/core/profiler/utils/hardware_type_utils.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "tensorflow/core/profiler/utils/xplane_utils.h"
#include "tensorflow/core/profiler/utils/xplane_visitor.h"
namespace tensorflow {
namespace profiler {
namespace {
DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane) {
DeviceCapabilities cap;
XPlaneVisitor plane(&device_plane);
if (auto clock_rate_khz = plane.GetStats(kDevCapClockRateKHz)) {
cap.set_clock_rate_in_ghz(clock_rate_khz->int64_value() / 1000000.0);
}
if (auto core_count = plane.GetStats(kDevCapCoreCount)) {
cap.set_num_cores(core_count->int64_value());
}
// Set memory bandwidth in bytes/s.
if (auto memory_bw = plane.GetStats(kDevCapMemoryBandwidth)) {
cap.set_memory_bandwidth(memory_bw->int64_value());
}
if (auto memory_size_in_bytes = plane.GetStats(kDevCapMemorySize)) {
cap.set_memory_size_in_bytes(memory_size_in_bytes->uint64_value());
}
if (auto cap_major = plane.GetStats(kDevCapComputeCapMajor)) {
cap.mutable_compute_capability()->set_major(cap_major->int64_value());
}
if (auto cap_minor = plane.GetStats(kDevCapComputeCapMinor)) {
cap.mutable_compute_capability()->set_minor(cap_minor->int64_value());
}
return cap;
}
PerfEnv GetPerfEnvFromXPlane(const XPlane& device_plane) {
PerfEnv result;
DeviceCapabilities cap = GetDeviceCapFromXPlane(device_plane);
result.set_peak_tera_flops_per_second(GetFlopMaxThroughputPerSM(cap) / 1000 *
cap.num_cores());
result.set_peak_hbm_bw_giga_bytes_per_second(cap.memory_bandwidth() / 1e9);
result.set_ridge_point(result.peak_tera_flops_per_second() * 1000 /
result.peak_hbm_bw_giga_bytes_per_second());
return result;
}
} // namespace
OpStats ConvertXSpaceToOpStats(const XSpace& space) {
OpStats op_stats;
// Hosts.
if (const XPlane* host_trace = FindPlaneWithName(space, kHostThreads)) {
*op_stats.mutable_host_op_metrics_db() =
ConvertHostThreadsXPlaneToOpMetricsDb(*host_trace);
}
// Device.
OpMetricsDbCombiner op_metrics_db_combiner(
op_stats.mutable_device_op_metrics_db());
for (const XPlane* device_trace :
FindPlanesWithPrefix(space, kGpuPlanePrefix)) {
if (!op_stats.has_perf_env()) {
*op_stats.mutable_perf_env() = GetPerfEnvFromXPlane(*device_trace);
}
const PerfEnv& perf_env = op_stats.perf_env();
OpMetricsDb device_op_metrics_db = ConvertDeviceTraceXPlaneToOpMetricsDb(
*device_trace, perf_env.peak_tera_flops_per_second(),
perf_env.peak_hbm_bw_giga_bytes_per_second());
op_metrics_db_combiner.Combine(device_op_metrics_db);
}
return op_stats;
}

View File

@ -0,0 +1,63 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/profiler/utils/xplane_builder.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
namespace tensorflow {
namespace profiler {
namespace {
TEST(ConvertXPlaneToOpStats, PerfEnv) {
XSpace xspace;
constexpr double kMaxError = 0.01;
constexpr int kClockRateKHz = 1530000;
constexpr int kCoreCount = 80;
constexpr uint64 kMemoryBandwidthBytesPerSecond = 900 * 1e9;
// Volta.
constexpr int kComputeCapMajor = 7;
constexpr int kComputeCapMinor = 0;
XPlaneBuilder device_plane(xspace.add_planes());
device_plane.SetName(absl::StrCat(kGpuPlanePrefix, ":0"));
device_plane.ParseAndAddStatValue(
*device_plane.GetOrCreateStatMetadata("clock_rate"),
absl::StrCat(kClockRateKHz));
device_plane.ParseAndAddStatValue(
*device_plane.GetOrCreateStatMetadata("core_count"),
absl::StrCat(kCoreCount));
device_plane.ParseAndAddStatValue(
*device_plane.GetOrCreateStatMetadata("memory_bandwidth"),
absl::StrCat(kMemoryBandwidthBytesPerSecond));
device_plane.ParseAndAddStatValue(
*device_plane.GetOrCreateStatMetadata("compute_cap_major"),
absl::StrCat(kComputeCapMajor));
device_plane.ParseAndAddStatValue(
*device_plane.GetOrCreateStatMetadata("compute_cap_minor"),
absl::StrCat(kComputeCapMinor));
OpStats op_stats = ConvertXSpaceToOpStats(xspace);
const PerfEnv& perf_env = op_stats.perf_env();
EXPECT_NEAR(141, perf_env.peak_tera_flops_per_second(), kMaxError);
EXPECT_NEAR(900, perf_env.peak_hbm_bw_giga_bytes_per_second(), kMaxError);
EXPECT_NEAR(156.67, perf_env.ridge_point(), kMaxError);
}
} // namespace
} // namespace profiler
} // namespace tensorflow

View File

@ -25,6 +25,6 @@ message DeviceCapabilities {
double clock_rate_in_ghz = 1;
uint32 num_cores = 2;
uint64 memory_size_in_bytes = 3;
uint64 memory_bandwidth = 4;
uint64 memory_bandwidth = 4; // Bytes/s.
CudaComputeCapability compute_capability = 5;
}

View File

@ -76,6 +76,7 @@ double IdleTimeRatio(const OpMetricsDb& metrics_db) {
}
uint64 IdleTimePs(const OpMetricsDb& metrics_db) {
if (metrics_db.total_time_ps() <= metrics_db.total_op_time_ps()) return 0;
return metrics_db.total_time_ps() - metrics_db.total_op_time_ps();
}

View File

@ -14,6 +14,8 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/utils/xplane_utils.h"
#include "absl/strings/match.h"
namespace tensorflow {
namespace profiler {
@ -24,6 +26,15 @@ const XPlane* FindPlaneWithName(const XSpace& space, absl::string_view name) {
return nullptr;
}
std::vector<const XPlane*> FindPlanesWithPrefix(const XSpace& space,
absl::string_view prefix) {
std::vector<const XPlane*> result;
for (const XPlane& plane : space.planes()) {
if (absl::StartsWith(plane.name(), prefix)) result.push_back(&plane);
}
return result;
}
XPlane* GetOrCreatePlane(XSpace* space, absl::string_view name) {
for (XPlane& plane : *space->mutable_planes()) {
if (plane.name() == name) return &plane;

View File

@ -15,6 +15,8 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_UTILS_H_
#define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_UTILS_H_
#include <vector>
#include "absl/strings/string_view.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
@ -24,6 +26,10 @@ namespace profiler {
// Returns the plane with the given name or nullptr if not found.
const XPlane* FindPlaneWithName(const XSpace& space, absl::string_view name);
// Returns all the planes with a given prefix.
std::vector<const XPlane*> FindPlanesWithPrefix(const XSpace& space,
absl::string_view prefix);
// Returns the plane with the given name, create it if necessary.
XPlane* GetOrCreatePlane(XSpace* space, absl::string_view name);