Add XSpace to OpStats converter for GPU.
PiperOrigin-RevId: 291187224 Change-Id: Ibcbaeaf1cc4fc8d2fbf2be77bba0145ce0b1e4ae
This commit is contained in:
parent
e9b0344dfa
commit
c3a2c28c9c
@ -27,6 +27,21 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "xplane_to_op_metrics_db_test",
|
||||
size = "small",
|
||||
srcs = ["xplane_to_op_metrics_db_test.cc"],
|
||||
deps = [
|
||||
":xplane_to_op_metrics_db",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:time_utils",
|
||||
"//tensorflow/core/profiler/utils:xplane_builder",
|
||||
"//tensorflow/core/profiler/utils:xplane_schema",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "run_metadata_to_trace_events",
|
||||
srcs = ["run_metadata_to_trace_events.cc"],
|
||||
@ -151,10 +166,31 @@ cc_library(
|
||||
hdrs = ["xplane_to_op_stats.h"],
|
||||
deps = [
|
||||
":xplane_to_op_metrics_db",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core/profiler/protobuf:hardware_types_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:hardware_type_utils",
|
||||
"//tensorflow/core/profiler/utils:xplane_schema",
|
||||
"//tensorflow/core/profiler/utils:xplane_utils",
|
||||
"//tensorflow/core/profiler/utils:xplane_visitor",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "xplane_to_op_stats_test",
|
||||
size = "small",
|
||||
srcs = ["xplane_to_op_stats_test.cc"],
|
||||
deps = [
|
||||
":xplane_to_op_stats",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/profiler/utils:xplane_builder",
|
||||
"//tensorflow/core/profiler/utils:xplane_schema",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -219,6 +219,7 @@ OpMetricsDb ConvertDeviceTraceXPlaneToOpMetricsDb(
|
||||
});
|
||||
});
|
||||
result.set_total_time_ps(last_op_offset_ps - first_op_offset_ps);
|
||||
AddIdleOp(&result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
159
tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc
Normal file
159
tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc
Normal file
@ -0,0 +1,159 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
|
||||
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/time_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_builder.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_schema.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
namespace {
|
||||
|
||||
void AddTensorFlowOpEvent(absl::string_view tf_op_fullname,
|
||||
int64 start_timestamp_ns, int64 duration_ns,
|
||||
bool on_device, XPlaneBuilder* plane,
|
||||
XLineBuilder* line) {
|
||||
XEventBuilder event =
|
||||
line->AddEvent(*plane->GetOrCreateEventMetadata(tf_op_fullname));
|
||||
event.SetTimestampNs(start_timestamp_ns);
|
||||
event.SetDurationNs(duration_ns);
|
||||
if (!on_device) return;
|
||||
event.ParseAndAddStatValue(*plane->GetOrCreateStatMetadata("level 0"),
|
||||
tf_op_fullname);
|
||||
}
|
||||
|
||||
void SetXPlaneNameAndId(absl::string_view name, int64 id,
|
||||
XPlaneBuilder* plane) {
|
||||
plane->SetName(name);
|
||||
plane->SetId(id);
|
||||
}
|
||||
|
||||
TEST(ConvertXPlaneToOpMetricsDb, HostOpMetricsDb) {
|
||||
static constexpr char TfOp1[] = "TfOp1";
|
||||
static constexpr char TfOp2[] = "TfOp2";
|
||||
constexpr int64 kTfOp1StartNs = 100000;
|
||||
constexpr int64 kTfOp1DurationNs = 8000;
|
||||
constexpr int64 kTfOp2StartNs = 110000;
|
||||
constexpr int64 kTfOp2DurationNs = 10000;
|
||||
|
||||
XPlane xplane;
|
||||
XPlaneBuilder host_plane(&xplane);
|
||||
SetXPlaneNameAndId(kHostThreads, /*id=*/0, &host_plane);
|
||||
XLineBuilder thread1 = host_plane.GetOrCreateLine(/*line_id=*/10);
|
||||
AddTensorFlowOpEvent(absl::StrCat(TfOp1, ":", TfOp1), kTfOp1StartNs,
|
||||
kTfOp1DurationNs, /*on_device=*/false, &host_plane,
|
||||
&thread1);
|
||||
XLineBuilder thread2 = host_plane.GetOrCreateLine(/*line_id=*/20);
|
||||
AddTensorFlowOpEvent(absl::StrCat(TfOp1, ":", TfOp1), kTfOp1StartNs,
|
||||
kTfOp1DurationNs, /*on_device=*/false, &host_plane,
|
||||
&thread2);
|
||||
AddTensorFlowOpEvent(absl::StrCat(TfOp2, ":", TfOp2), kTfOp2StartNs,
|
||||
kTfOp2DurationNs, /*on_device=*/false, &host_plane,
|
||||
&thread2);
|
||||
|
||||
OpMetricsDb op_metrics = ConvertHostThreadsXPlaneToOpMetricsDb(xplane);
|
||||
// Op1, Op2, Idle.
|
||||
EXPECT_EQ(3, op_metrics.metrics_db_size());
|
||||
uint64 total_op_duration =
|
||||
NanosToPicos(kTfOp1DurationNs * 2 + kTfOp2DurationNs);
|
||||
EXPECT_EQ(total_op_duration, op_metrics.total_op_time_ps());
|
||||
uint64 total_duration = NanosToPicos(kTfOp2StartNs - kTfOp1StartNs +
|
||||
kTfOp2DurationNs + kTfOp1DurationNs);
|
||||
EXPECT_EQ(total_duration, op_metrics.total_time_ps());
|
||||
|
||||
// Verifies OpMetricsDb is built correctly.
|
||||
const OpMetrics& op_1 = op_metrics.metrics_db().at(0);
|
||||
EXPECT_EQ(TfOp1, op_1.name());
|
||||
EXPECT_EQ(TfOp1, op_1.category());
|
||||
EXPECT_EQ(2, op_1.occurrences());
|
||||
EXPECT_EQ(NanosToPicos(kTfOp1DurationNs) * 2, op_1.time_ps());
|
||||
|
||||
const OpMetrics& idle = op_metrics.metrics_db().at(1);
|
||||
EXPECT_EQ("IDLE", idle.name());
|
||||
// Idle time is the gap between Op2 start and the end of Op1, which is 2000ns.
|
||||
EXPECT_EQ(NanosToPicos(2000), idle.time_ps());
|
||||
|
||||
const OpMetrics& op_2 = op_metrics.metrics_db().at(2);
|
||||
EXPECT_EQ(TfOp2, op_2.name());
|
||||
EXPECT_EQ(TfOp2, op_2.category());
|
||||
EXPECT_EQ(1, op_2.occurrences());
|
||||
EXPECT_EQ(NanosToPicos(kTfOp2DurationNs), op_2.time_ps());
|
||||
}
|
||||
|
||||
TEST(ConvertXPlaneToOpMetricsDb, DeviceOpMetricsDb) {
|
||||
static constexpr char TfOp1[] = "TfOp1";
|
||||
static constexpr char TfOp2[] = "TfOp2";
|
||||
constexpr int64 kTfOp1StartNs = 100000;
|
||||
constexpr int64 kTfOp1DurationNs = 8000;
|
||||
constexpr int64 kTfOp2StartNs = 110000;
|
||||
constexpr int64 kTfOp2DurationNs = 10000;
|
||||
|
||||
XPlane xplane;
|
||||
XPlaneBuilder device_plane(&xplane);
|
||||
SetXPlaneNameAndId(absl::StrCat(kGpuPlanePrefix, ":0"), /*id=*/1,
|
||||
&device_plane);
|
||||
XLineBuilder stream1 = device_plane.GetOrCreateLine(/*line_id=*/10);
|
||||
AddTensorFlowOpEvent(absl::StrCat(TfOp1, ":", TfOp1), kTfOp1StartNs,
|
||||
kTfOp1DurationNs, /*on_device=*/true, &device_plane,
|
||||
&stream1);
|
||||
XLineBuilder stream2 = device_plane.GetOrCreateLine(/*line_id=*/20);
|
||||
AddTensorFlowOpEvent(absl::StrCat(TfOp1, ":", TfOp1), kTfOp1StartNs,
|
||||
kTfOp1DurationNs, /*on_device=*/true, &device_plane,
|
||||
&stream2);
|
||||
AddTensorFlowOpEvent(absl::StrCat(TfOp2, ":", TfOp2), kTfOp2StartNs,
|
||||
kTfOp2DurationNs, /*on_device=*/true, &device_plane,
|
||||
&stream2);
|
||||
|
||||
OpMetricsDb op_metrics = ConvertDeviceTraceXPlaneToOpMetricsDb(
|
||||
xplane, /*peak_tera_flops_per_second=*/0,
|
||||
/*peak_hbm_bw_giga_bytes_per_second=*/0);
|
||||
|
||||
// Op1, Op2, Idle.
|
||||
EXPECT_EQ(3, op_metrics.metrics_db_size());
|
||||
uint64 total_op_duration =
|
||||
NanosToPicos(kTfOp1DurationNs * 2 + kTfOp2DurationNs);
|
||||
EXPECT_EQ(total_op_duration, op_metrics.total_op_time_ps());
|
||||
// For device, the total_duration for each device is the total duration merged
|
||||
// from all GPU streams, which is from 100000 to 120000.
|
||||
uint64 total_duration =
|
||||
NanosToPicos(kTfOp2StartNs + kTfOp2DurationNs - kTfOp1StartNs);
|
||||
EXPECT_EQ(total_duration, op_metrics.total_time_ps());
|
||||
|
||||
// Verifies OpMetricsDb is built correctly.
|
||||
const OpMetrics& op_1 = op_metrics.metrics_db().at(0);
|
||||
EXPECT_EQ(TfOp1, op_1.name());
|
||||
EXPECT_EQ(TfOp1, op_1.category());
|
||||
EXPECT_EQ(2, op_1.occurrences());
|
||||
EXPECT_EQ(NanosToPicos(kTfOp1DurationNs) * 2, op_1.time_ps());
|
||||
|
||||
const OpMetrics& op_2 = op_metrics.metrics_db().at(1);
|
||||
EXPECT_EQ(TfOp2, op_2.name());
|
||||
EXPECT_EQ(TfOp2, op_2.category());
|
||||
EXPECT_EQ(1, op_2.occurrences());
|
||||
EXPECT_EQ(NanosToPicos(kTfOp2DurationNs), op_2.time_ps());
|
||||
|
||||
const OpMetrics& idle = op_metrics.metrics_db().at(2);
|
||||
EXPECT_EQ("IDLE", idle.name());
|
||||
// GPU is always busy in this example.
|
||||
EXPECT_EQ(NanosToPicos(0), idle.time_ps());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
@ -15,19 +15,77 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
|
||||
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
|
||||
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/hardware_type_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_schema.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_visitor.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
namespace {
|
||||
|
||||
DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane) {
|
||||
DeviceCapabilities cap;
|
||||
XPlaneVisitor plane(&device_plane);
|
||||
if (auto clock_rate_khz = plane.GetStats(kDevCapClockRateKHz)) {
|
||||
cap.set_clock_rate_in_ghz(clock_rate_khz->int64_value() / 1000000.0);
|
||||
}
|
||||
if (auto core_count = plane.GetStats(kDevCapCoreCount)) {
|
||||
cap.set_num_cores(core_count->int64_value());
|
||||
}
|
||||
// Set memory bandwidth in bytes/s.
|
||||
if (auto memory_bw = plane.GetStats(kDevCapMemoryBandwidth)) {
|
||||
cap.set_memory_bandwidth(memory_bw->int64_value());
|
||||
}
|
||||
if (auto memory_size_in_bytes = plane.GetStats(kDevCapMemorySize)) {
|
||||
cap.set_memory_size_in_bytes(memory_size_in_bytes->uint64_value());
|
||||
}
|
||||
if (auto cap_major = plane.GetStats(kDevCapComputeCapMajor)) {
|
||||
cap.mutable_compute_capability()->set_major(cap_major->int64_value());
|
||||
}
|
||||
if (auto cap_minor = plane.GetStats(kDevCapComputeCapMinor)) {
|
||||
cap.mutable_compute_capability()->set_minor(cap_minor->int64_value());
|
||||
}
|
||||
return cap;
|
||||
}
|
||||
|
||||
PerfEnv GetPerfEnvFromXPlane(const XPlane& device_plane) {
|
||||
PerfEnv result;
|
||||
DeviceCapabilities cap = GetDeviceCapFromXPlane(device_plane);
|
||||
result.set_peak_tera_flops_per_second(GetFlopMaxThroughputPerSM(cap) / 1000 *
|
||||
cap.num_cores());
|
||||
result.set_peak_hbm_bw_giga_bytes_per_second(cap.memory_bandwidth() / 1e9);
|
||||
result.set_ridge_point(result.peak_tera_flops_per_second() * 1000 /
|
||||
result.peak_hbm_bw_giga_bytes_per_second());
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
OpStats ConvertXSpaceToOpStats(const XSpace& space) {
|
||||
OpStats op_stats;
|
||||
// Hosts.
|
||||
if (const XPlane* host_trace = FindPlaneWithName(space, kHostThreads)) {
|
||||
*op_stats.mutable_host_op_metrics_db() =
|
||||
ConvertHostThreadsXPlaneToOpMetricsDb(*host_trace);
|
||||
}
|
||||
// Device.
|
||||
OpMetricsDbCombiner op_metrics_db_combiner(
|
||||
op_stats.mutable_device_op_metrics_db());
|
||||
for (const XPlane* device_trace :
|
||||
FindPlanesWithPrefix(space, kGpuPlanePrefix)) {
|
||||
if (!op_stats.has_perf_env()) {
|
||||
*op_stats.mutable_perf_env() = GetPerfEnvFromXPlane(*device_trace);
|
||||
}
|
||||
const PerfEnv& perf_env = op_stats.perf_env();
|
||||
OpMetricsDb device_op_metrics_db = ConvertDeviceTraceXPlaneToOpMetricsDb(
|
||||
*device_trace, perf_env.peak_tera_flops_per_second(),
|
||||
perf_env.peak_hbm_bw_giga_bytes_per_second());
|
||||
op_metrics_db_combiner.Combine(device_op_metrics_db);
|
||||
}
|
||||
return op_stats;
|
||||
}
|
||||
|
||||
|
63
tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc
Normal file
63
tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc
Normal file
@ -0,0 +1,63 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
|
||||
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_builder.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_schema.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
namespace {
|
||||
|
||||
TEST(ConvertXPlaneToOpStats, PerfEnv) {
|
||||
XSpace xspace;
|
||||
constexpr double kMaxError = 0.01;
|
||||
constexpr int kClockRateKHz = 1530000;
|
||||
constexpr int kCoreCount = 80;
|
||||
constexpr uint64 kMemoryBandwidthBytesPerSecond = 900 * 1e9;
|
||||
// Volta.
|
||||
constexpr int kComputeCapMajor = 7;
|
||||
constexpr int kComputeCapMinor = 0;
|
||||
|
||||
XPlaneBuilder device_plane(xspace.add_planes());
|
||||
device_plane.SetName(absl::StrCat(kGpuPlanePrefix, ":0"));
|
||||
device_plane.ParseAndAddStatValue(
|
||||
*device_plane.GetOrCreateStatMetadata("clock_rate"),
|
||||
absl::StrCat(kClockRateKHz));
|
||||
device_plane.ParseAndAddStatValue(
|
||||
*device_plane.GetOrCreateStatMetadata("core_count"),
|
||||
absl::StrCat(kCoreCount));
|
||||
device_plane.ParseAndAddStatValue(
|
||||
*device_plane.GetOrCreateStatMetadata("memory_bandwidth"),
|
||||
absl::StrCat(kMemoryBandwidthBytesPerSecond));
|
||||
device_plane.ParseAndAddStatValue(
|
||||
*device_plane.GetOrCreateStatMetadata("compute_cap_major"),
|
||||
absl::StrCat(kComputeCapMajor));
|
||||
device_plane.ParseAndAddStatValue(
|
||||
*device_plane.GetOrCreateStatMetadata("compute_cap_minor"),
|
||||
absl::StrCat(kComputeCapMinor));
|
||||
|
||||
OpStats op_stats = ConvertXSpaceToOpStats(xspace);
|
||||
const PerfEnv& perf_env = op_stats.perf_env();
|
||||
EXPECT_NEAR(141, perf_env.peak_tera_flops_per_second(), kMaxError);
|
||||
EXPECT_NEAR(900, perf_env.peak_hbm_bw_giga_bytes_per_second(), kMaxError);
|
||||
EXPECT_NEAR(156.67, perf_env.ridge_point(), kMaxError);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
@ -25,6 +25,6 @@ message DeviceCapabilities {
|
||||
double clock_rate_in_ghz = 1;
|
||||
uint32 num_cores = 2;
|
||||
uint64 memory_size_in_bytes = 3;
|
||||
uint64 memory_bandwidth = 4;
|
||||
uint64 memory_bandwidth = 4; // Bytes/s.
|
||||
CudaComputeCapability compute_capability = 5;
|
||||
}
|
||||
|
@ -76,6 +76,7 @@ double IdleTimeRatio(const OpMetricsDb& metrics_db) {
|
||||
}
|
||||
|
||||
uint64 IdleTimePs(const OpMetricsDb& metrics_db) {
|
||||
if (metrics_db.total_time_ps() <= metrics_db.total_op_time_ps()) return 0;
|
||||
return metrics_db.total_time_ps() - metrics_db.total_op_time_ps();
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,8 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/core/profiler/utils/xplane_utils.h"
|
||||
|
||||
#include "absl/strings/match.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
@ -24,6 +26,15 @@ const XPlane* FindPlaneWithName(const XSpace& space, absl::string_view name) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<const XPlane*> FindPlanesWithPrefix(const XSpace& space,
|
||||
absl::string_view prefix) {
|
||||
std::vector<const XPlane*> result;
|
||||
for (const XPlane& plane : space.planes()) {
|
||||
if (absl::StartsWith(plane.name(), prefix)) result.push_back(&plane);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
XPlane* GetOrCreatePlane(XSpace* space, absl::string_view name) {
|
||||
for (XPlane& plane : *space->mutable_planes()) {
|
||||
if (plane.name() == name) return &plane;
|
||||
|
@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_UTILS_H_
|
||||
#define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_UTILS_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
|
||||
|
||||
@ -24,6 +26,10 @@ namespace profiler {
|
||||
// Returns the plane with the given name or nullptr if not found.
|
||||
const XPlane* FindPlaneWithName(const XSpace& space, absl::string_view name);
|
||||
|
||||
// Returns all the planes with a given prefix.
|
||||
std::vector<const XPlane*> FindPlanesWithPrefix(const XSpace& space,
|
||||
absl::string_view prefix);
|
||||
|
||||
// Returns the plane with the given name, create it if necessary.
|
||||
XPlane* GetOrCreatePlane(XSpace* space, absl::string_view name);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user