serialize the device capability for gpu device.

PiperOrigin-RevId: 289189439
Change-Id: Ie655ea1129344e3b3f22262fa286a61150169599
This commit is contained in:
A. Unique TensorFlower 2020-01-10 16:42:21 -08:00 committed by TensorFlower Gardener
parent 20b5cbd0ca
commit a28a77c0dd
6 changed files with 189 additions and 80 deletions

View File

@ -126,6 +126,14 @@ void CreateXEvent(const CuptiTracerEvent& event, uint64 offset_ns,
}
}
}
absl::optional<int> GetDeviceAttribute(CUdevice device,
CUdevice_attribute attrib) {
int ret_val;
CUresult err = cuDeviceGetAttribute(&ret_val, attrib, device);
if (err != CUDA_SUCCESS) return absl::nullopt;
return ret_val;
}
} // namespace
// CuptiTraceCollectorImpl store the CuptiTracerEvents from CuptiTracer and
@ -180,6 +188,8 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector {
XPlaneBuilder device_plane(GetOrCreatePlane(space, name));
per_device_collector_[device_ordinal].Flush(
start_walltime_ns_, start_gpu_ns_, &device_plane, &host_plane);
per_device_collector_[device_ordinal].GetDeviceCapabilities(
device_ordinal, &device_plane);
}
}
@ -318,6 +328,70 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector {
}
}
void GetDeviceCapabilities(int32 device_ordinal,
XPlaneBuilder* device_plane) {
CUdevice device;
if (cuDeviceGet(&device, device_ordinal) != CUDA_SUCCESS) return;
auto clock_rate_in_khz =
GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_CLOCK_RATE);
if (clock_rate_in_khz) {
device_plane->AddStatValue(
*device_plane->GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kDevCapClockRateKHz)),
*clock_rate_in_khz);
}
auto core_count =
GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT);
if (core_count) {
device_plane->AddStatValue(
*device_plane->GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kDevCapCoreCount)),
*core_count);
}
auto mem_clock_khz =
GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE);
auto mem_bus_width_bits = GetDeviceAttribute(
device, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH);
if (mem_clock_khz && mem_bus_width_bits) {
// Times 2 because HBM is DDR memory; it gets two data bits per each
// data lane.
auto memory_bandwidth =
2ULL * (*mem_clock_khz) * 1000 * (*mem_bus_width_bits) / 8;
device_plane->AddStatValue(
*device_plane->GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kDevCapMemoryBandwidth)),
memory_bandwidth);
}
size_t total_memory = 0;
if (cuDeviceTotalMem(&total_memory, device) == CUDA_SUCCESS) {
device_plane->AddStatValue(
*device_plane->GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kDevCapMemorySize)),
static_cast<uint64>(total_memory));
}
auto compute_capability_major = GetDeviceAttribute(
device, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR);
if (compute_capability_major) {
device_plane->AddStatValue(
*device_plane->GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kDevCapComputeCapMajor)),
*compute_capability_major);
}
auto compute_capability_minor = GetDeviceAttribute(
device, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR);
if (compute_capability_minor) {
device_plane->AddStatValue(
*device_plane->GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kDevCapComputeCapMinor)),
*compute_capability_minor);
}
}
absl::Mutex mutex;
std::string stream_device GUARDED_BY(mutex);
std::string memcpy_device GUARDED_BY(mutex);

View File

@ -12,7 +12,7 @@ message XSpace {
// An XPlane is a container of parallel timelines (XLines), generated by a
// profiling source or by post-processing one or more XPlanes.
// Next ID: 6
// Next ID: 7
message XPlane {
int64 id = 1;
@ -30,6 +30,9 @@ message XPlane {
// XStatMetadata map, each entry uses the XStatMetadata.id as key. This map
// should be used for stats that share the same ID over the whole XPlane.
map<int64, XStatMetadata> stat_metadata = 5;
// XStats associated with this plane, e.g. device capabilities.
repeated XStat stats = 6;
}
// An XLine is a timeline of trace events (XEvents).

View File

@ -14,13 +14,13 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/utils/xplane_builder.h"
#include "absl/strings/numbers.h"
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
namespace tensorflow {
namespace profiler {
XPlaneBuilder::XPlaneBuilder(XPlane* plane) : plane_(plane) {
XPlaneBuilder::XPlaneBuilder(XPlane* plane)
: XStatsBuilder<XPlane>(plane), plane_(plane) {
for (auto& iter : *plane->mutable_event_metadata()) {
last_event_metadata_id_ =
std::max<int64>(last_event_metadata_id_, iter.second.id());
@ -95,27 +95,5 @@ XEventBuilder XLineBuilder::AddEvent(const XEventMetadata& metadata) {
return XEventBuilder(line_, event);
}
XStat* XEventBuilder::AddStat(const XStatMetadata& metadata) {
XStat* stat = event_->add_stats();
stat->set_metadata_id(metadata.id());
return stat;
}
void XEventBuilder::ParseAndAddStatValue(const XStatMetadata& metadata,
absl::string_view value) {
int64 int_value;
uint64 uint_value;
double double_value;
if (absl::SimpleAtoi(value, &int_value)) {
AddStatValue(metadata, int_value);
} else if (absl::SimpleAtoi(value, &uint_value)) {
AddStatValue(metadata, uint_value);
} else if (absl::SimpleAtod(value, &double_value)) {
AddStatValue(metadata, double_value);
} else {
AddStatValue(metadata, value);
}
}
} // namespace profiler
} // namespace tensorflow

View File

@ -16,6 +16,7 @@ limitations under the License.
#define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_BUILDER_H_
#include "absl/container/flat_hash_map.h"
#include "absl/strings/numbers.h"
#include "absl/strings/string_view.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/types.h"
@ -25,10 +26,71 @@ limitations under the License.
namespace tensorflow {
namespace profiler {
class XEventBuilder {
template <class T>
class XStatsBuilder {
public:
explicit XStatsBuilder(T* stats_owner) : stats_owner_(stats_owner) {}
void AddStatValue(const XStatMetadata& metadata, uint32 value) {
AddStat(metadata)->set_uint64_value(value);
}
void AddStatValue(const XStatMetadata& metadata, uint64 value) {
AddStat(metadata)->set_uint64_value(value);
}
void AddStatValue(const XStatMetadata& metadata, int32 value) {
AddStat(metadata)->set_int64_value(value);
}
void AddStatValue(const XStatMetadata& metadata, int64 value) {
AddStat(metadata)->set_int64_value(value);
}
void AddStatValue(const XStatMetadata& metadata, double value) {
AddStat(metadata)->set_double_value(value);
}
void AddStatValue(const XStatMetadata& metadata, absl::string_view value) {
AddStat(metadata)->set_str_value(string(value));
}
void AddStatValue(const XStatMetadata& metadata, string&& value) {
AddStat(metadata)->set_str_value(std::move(value));
}
void AddStat(const XStatMetadata& metadata, const XStat& stat) {
DCHECK_EQ(metadata.id(), stat.metadata_id());
*stats_owner_->add_stats() = stat;
}
void ParseAndAddStatValue(const XStatMetadata& metadata,
absl::string_view value) {
int64 int_value;
uint64 uint_value;
double double_value;
if (absl::SimpleAtoi(value, &int_value)) {
AddStatValue(metadata, int_value);
} else if (absl::SimpleAtoi(value, &uint_value)) {
AddStatValue(metadata, uint_value);
} else if (absl::SimpleAtod(value, &double_value)) {
AddStatValue(metadata, double_value);
} else {
AddStatValue(metadata, value);
}
}
void ReserveStats(size_t num_stats) {
stats_owner_->mutable_stats()->Reserve(num_stats);
}
private:
XStat* AddStat(const XStatMetadata& metadata) {
XStat* stat = stats_owner_->add_stats();
stat->set_metadata_id(metadata.id());
return stat;
}
T* stats_owner_;
};
class XEventBuilder : public XStatsBuilder<XEvent> {
public:
XEventBuilder(const XLine* line, XEvent* event)
: line_(line), event_(event) {}
: XStatsBuilder<XEvent>(event), line_(line), event_(event) {}
void SetOffsetPs(int64 offset_ps) { event_->set_offset_ps(offset_ps); }
@ -55,43 +117,7 @@ class XEventBuilder {
event_->offset_ps());
}
void ReserveStats(size_t num_stats) {
event_->mutable_stats()->Reserve(num_stats);
}
void AddStatValue(const XStatMetadata& metadata, uint32 value) {
AddStat(metadata)->set_uint64_value(value);
}
void AddStatValue(const XStatMetadata& metadata, uint64 value) {
AddStat(metadata)->set_uint64_value(value);
}
void AddStatValue(const XStatMetadata& metadata, int32 value) {
AddStat(metadata)->set_int64_value(value);
}
void AddStatValue(const XStatMetadata& metadata, int64 value) {
AddStat(metadata)->set_int64_value(value);
}
void AddStatValue(const XStatMetadata& metadata, double value) {
AddStat(metadata)->set_double_value(value);
}
void AddStatValue(const XStatMetadata& metadata, absl::string_view value) {
AddStat(metadata)->set_str_value(string(value));
}
void AddStatValue(const XStatMetadata& metadata, string&& value) {
AddStat(metadata)->set_str_value(std::move(value));
}
void ParseAndAddStatValue(const XStatMetadata& metadata,
absl::string_view value);
void AddStat(const XStatMetadata& metadata, const XStat& stat) {
DCHECK_EQ(metadata.id(), stat.metadata_id());
*event_->add_stats() = stat;
}
private:
XStat* AddStat(const XStatMetadata& metadata);
const XLine* line_;
XEvent* event_;
};
@ -126,7 +152,7 @@ class XLineBuilder {
// Provides methods to build an XPlane.
// NOTE: avoid to use two builders to wrap the same XPlane.
class XPlaneBuilder {
class XPlaneBuilder : public XStatsBuilder<XPlane> {
public:
explicit XPlaneBuilder(XPlane* plane);

View File

@ -64,22 +64,43 @@ static_assert(sizeof(kHostEventTypeMetadataMap) / sizeof(absl::string_view) ==
"Mismatch between enum and string map.");
static const absl::string_view kStatTypeStrMap[] = {
"UnknownStatType", "id",
"parent_step_id", "function_step_id",
"device_ordinal", "chip_ordinal",
"node_ordinal", "model_id",
"queue_addr", "request_id",
"run_id", "graph_type",
"step_num", "iter_num",
"index_on_host", "bytes_reserved",
"bytes_allocated", "bytes_available",
"fragmentation", "device_id",
"context_id", "correlation_id",
"memcpy_details", "memalloc_details",
"kernel_details", "group_id",
"step_name", "level 0",
"tf_op", "hlo_op",
"UnknownStatType",
"id",
"parent_step_id",
"function_step_id",
"device_ordinal",
"chip_ordinal",
"node_ordinal",
"model_id",
"queue_addr",
"request_id",
"run_id",
"graph_type",
"step_num",
"iter_num",
"index_on_host",
"bytes_reserved",
"bytes_allocated",
"bytes_available",
"fragmentation",
"device_id",
"context_id",
"correlation_id",
"memcpy_details",
"memalloc_details",
"kernel_details",
"group_id",
"step_name",
"level 0",
"tf_op",
"hlo_op",
"hlo_module",
"clock_rate",
"core_count",
"memory_bandwidth",
"memory_size",
"compute_cap_major",
"compute_cap_minor",
};
static_assert(sizeof(kStatTypeStrMap) / sizeof(absl::string_view) ==

View File

@ -98,7 +98,14 @@ enum StatType {
kTfOp,
kHloOp,
kHloModule,
kLastStatType = kHloModule,
// Device capability related.
kDevCapClockRateKHz,
kDevCapCoreCount,
kDevCapMemoryBandwidth,
kDevCapMemorySize,
kDevCapComputeCapMajor,
kDevCapComputeCapMinor,
kLastStatType = kDevCapComputeCapMinor,
};
absl::Span<const absl::string_view> GetHostEventTypeStrMap();