serialize the device capability for gpu device.
PiperOrigin-RevId: 289189439 Change-Id: Ie655ea1129344e3b3f22262fa286a61150169599
This commit is contained in:
parent
20b5cbd0ca
commit
a28a77c0dd
@ -126,6 +126,14 @@ void CreateXEvent(const CuptiTracerEvent& event, uint64 offset_ns,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
absl::optional<int> GetDeviceAttribute(CUdevice device,
|
||||
CUdevice_attribute attrib) {
|
||||
int ret_val;
|
||||
CUresult err = cuDeviceGetAttribute(&ret_val, attrib, device);
|
||||
if (err != CUDA_SUCCESS) return absl::nullopt;
|
||||
return ret_val;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// CuptiTraceCollectorImpl store the CuptiTracerEvents from CuptiTracer and
|
||||
@ -180,6 +188,8 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector {
|
||||
XPlaneBuilder device_plane(GetOrCreatePlane(space, name));
|
||||
per_device_collector_[device_ordinal].Flush(
|
||||
start_walltime_ns_, start_gpu_ns_, &device_plane, &host_plane);
|
||||
per_device_collector_[device_ordinal].GetDeviceCapabilities(
|
||||
device_ordinal, &device_plane);
|
||||
}
|
||||
}
|
||||
|
||||
@ -318,6 +328,70 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector {
|
||||
}
|
||||
}
|
||||
|
||||
void GetDeviceCapabilities(int32 device_ordinal,
|
||||
XPlaneBuilder* device_plane) {
|
||||
CUdevice device;
|
||||
if (cuDeviceGet(&device, device_ordinal) != CUDA_SUCCESS) return;
|
||||
|
||||
auto clock_rate_in_khz =
|
||||
GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_CLOCK_RATE);
|
||||
if (clock_rate_in_khz) {
|
||||
device_plane->AddStatValue(
|
||||
*device_plane->GetOrCreateStatMetadata(
|
||||
GetStatTypeStr(StatType::kDevCapClockRateKHz)),
|
||||
*clock_rate_in_khz);
|
||||
}
|
||||
|
||||
auto core_count =
|
||||
GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT);
|
||||
if (core_count) {
|
||||
device_plane->AddStatValue(
|
||||
*device_plane->GetOrCreateStatMetadata(
|
||||
GetStatTypeStr(StatType::kDevCapCoreCount)),
|
||||
*core_count);
|
||||
}
|
||||
|
||||
auto mem_clock_khz =
|
||||
GetDeviceAttribute(device, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE);
|
||||
auto mem_bus_width_bits = GetDeviceAttribute(
|
||||
device, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH);
|
||||
if (mem_clock_khz && mem_bus_width_bits) {
|
||||
// Times 2 because HBM is DDR memory; it gets two data bits per each
|
||||
// data lane.
|
||||
auto memory_bandwidth =
|
||||
2ULL * (*mem_clock_khz) * 1000 * (*mem_bus_width_bits) / 8;
|
||||
device_plane->AddStatValue(
|
||||
*device_plane->GetOrCreateStatMetadata(
|
||||
GetStatTypeStr(StatType::kDevCapMemoryBandwidth)),
|
||||
memory_bandwidth);
|
||||
}
|
||||
|
||||
size_t total_memory = 0;
|
||||
if (cuDeviceTotalMem(&total_memory, device) == CUDA_SUCCESS) {
|
||||
device_plane->AddStatValue(
|
||||
*device_plane->GetOrCreateStatMetadata(
|
||||
GetStatTypeStr(StatType::kDevCapMemorySize)),
|
||||
static_cast<uint64>(total_memory));
|
||||
}
|
||||
|
||||
auto compute_capability_major = GetDeviceAttribute(
|
||||
device, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR);
|
||||
if (compute_capability_major) {
|
||||
device_plane->AddStatValue(
|
||||
*device_plane->GetOrCreateStatMetadata(
|
||||
GetStatTypeStr(StatType::kDevCapComputeCapMajor)),
|
||||
*compute_capability_major);
|
||||
}
|
||||
auto compute_capability_minor = GetDeviceAttribute(
|
||||
device, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR);
|
||||
if (compute_capability_minor) {
|
||||
device_plane->AddStatValue(
|
||||
*device_plane->GetOrCreateStatMetadata(
|
||||
GetStatTypeStr(StatType::kDevCapComputeCapMinor)),
|
||||
*compute_capability_minor);
|
||||
}
|
||||
}
|
||||
|
||||
absl::Mutex mutex;
|
||||
std::string stream_device GUARDED_BY(mutex);
|
||||
std::string memcpy_device GUARDED_BY(mutex);
|
||||
|
@ -12,7 +12,7 @@ message XSpace {
|
||||
|
||||
// An XPlane is a container of parallel timelines (XLines), generated by a
|
||||
// profiling source or by post-processing one or more XPlanes.
|
||||
// Next ID: 6
|
||||
// Next ID: 7
|
||||
message XPlane {
|
||||
int64 id = 1;
|
||||
|
||||
@ -30,6 +30,9 @@ message XPlane {
|
||||
// XStatMetadata map, each entry uses the XStatMetadata.id as key. This map
|
||||
// should be used for stats that share the same ID over the whole XPlane.
|
||||
map<int64, XStatMetadata> stat_metadata = 5;
|
||||
|
||||
// XStats associated with this plane, e.g. device capabilities.
|
||||
repeated XStat stats = 6;
|
||||
}
|
||||
|
||||
// An XLine is a timeline of trace events (XEvents).
|
||||
|
@ -14,13 +14,13 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/core/profiler/utils/xplane_builder.h"
|
||||
|
||||
#include "absl/strings/numbers.h"
|
||||
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
XPlaneBuilder::XPlaneBuilder(XPlane* plane) : plane_(plane) {
|
||||
XPlaneBuilder::XPlaneBuilder(XPlane* plane)
|
||||
: XStatsBuilder<XPlane>(plane), plane_(plane) {
|
||||
for (auto& iter : *plane->mutable_event_metadata()) {
|
||||
last_event_metadata_id_ =
|
||||
std::max<int64>(last_event_metadata_id_, iter.second.id());
|
||||
@ -95,27 +95,5 @@ XEventBuilder XLineBuilder::AddEvent(const XEventMetadata& metadata) {
|
||||
return XEventBuilder(line_, event);
|
||||
}
|
||||
|
||||
XStat* XEventBuilder::AddStat(const XStatMetadata& metadata) {
|
||||
XStat* stat = event_->add_stats();
|
||||
stat->set_metadata_id(metadata.id());
|
||||
return stat;
|
||||
}
|
||||
|
||||
void XEventBuilder::ParseAndAddStatValue(const XStatMetadata& metadata,
|
||||
absl::string_view value) {
|
||||
int64 int_value;
|
||||
uint64 uint_value;
|
||||
double double_value;
|
||||
if (absl::SimpleAtoi(value, &int_value)) {
|
||||
AddStatValue(metadata, int_value);
|
||||
} else if (absl::SimpleAtoi(value, &uint_value)) {
|
||||
AddStatValue(metadata, uint_value);
|
||||
} else if (absl::SimpleAtod(value, &double_value)) {
|
||||
AddStatValue(metadata, double_value);
|
||||
} else {
|
||||
AddStatValue(metadata, value);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
@ -16,6 +16,7 @@ limitations under the License.
|
||||
#define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_BUILDER_H_
|
||||
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "absl/strings/numbers.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
@ -25,10 +26,71 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
class XEventBuilder {
|
||||
template <class T>
|
||||
class XStatsBuilder {
|
||||
public:
|
||||
explicit XStatsBuilder(T* stats_owner) : stats_owner_(stats_owner) {}
|
||||
|
||||
void AddStatValue(const XStatMetadata& metadata, uint32 value) {
|
||||
AddStat(metadata)->set_uint64_value(value);
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, uint64 value) {
|
||||
AddStat(metadata)->set_uint64_value(value);
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, int32 value) {
|
||||
AddStat(metadata)->set_int64_value(value);
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, int64 value) {
|
||||
AddStat(metadata)->set_int64_value(value);
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, double value) {
|
||||
AddStat(metadata)->set_double_value(value);
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, absl::string_view value) {
|
||||
AddStat(metadata)->set_str_value(string(value));
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, string&& value) {
|
||||
AddStat(metadata)->set_str_value(std::move(value));
|
||||
}
|
||||
|
||||
void AddStat(const XStatMetadata& metadata, const XStat& stat) {
|
||||
DCHECK_EQ(metadata.id(), stat.metadata_id());
|
||||
*stats_owner_->add_stats() = stat;
|
||||
}
|
||||
|
||||
void ParseAndAddStatValue(const XStatMetadata& metadata,
|
||||
absl::string_view value) {
|
||||
int64 int_value;
|
||||
uint64 uint_value;
|
||||
double double_value;
|
||||
if (absl::SimpleAtoi(value, &int_value)) {
|
||||
AddStatValue(metadata, int_value);
|
||||
} else if (absl::SimpleAtoi(value, &uint_value)) {
|
||||
AddStatValue(metadata, uint_value);
|
||||
} else if (absl::SimpleAtod(value, &double_value)) {
|
||||
AddStatValue(metadata, double_value);
|
||||
} else {
|
||||
AddStatValue(metadata, value);
|
||||
}
|
||||
}
|
||||
void ReserveStats(size_t num_stats) {
|
||||
stats_owner_->mutable_stats()->Reserve(num_stats);
|
||||
}
|
||||
|
||||
private:
|
||||
XStat* AddStat(const XStatMetadata& metadata) {
|
||||
XStat* stat = stats_owner_->add_stats();
|
||||
stat->set_metadata_id(metadata.id());
|
||||
return stat;
|
||||
}
|
||||
|
||||
T* stats_owner_;
|
||||
};
|
||||
|
||||
class XEventBuilder : public XStatsBuilder<XEvent> {
|
||||
public:
|
||||
XEventBuilder(const XLine* line, XEvent* event)
|
||||
: line_(line), event_(event) {}
|
||||
: XStatsBuilder<XEvent>(event), line_(line), event_(event) {}
|
||||
|
||||
void SetOffsetPs(int64 offset_ps) { event_->set_offset_ps(offset_ps); }
|
||||
|
||||
@ -55,43 +117,7 @@ class XEventBuilder {
|
||||
event_->offset_ps());
|
||||
}
|
||||
|
||||
void ReserveStats(size_t num_stats) {
|
||||
event_->mutable_stats()->Reserve(num_stats);
|
||||
}
|
||||
|
||||
void AddStatValue(const XStatMetadata& metadata, uint32 value) {
|
||||
AddStat(metadata)->set_uint64_value(value);
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, uint64 value) {
|
||||
AddStat(metadata)->set_uint64_value(value);
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, int32 value) {
|
||||
AddStat(metadata)->set_int64_value(value);
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, int64 value) {
|
||||
AddStat(metadata)->set_int64_value(value);
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, double value) {
|
||||
AddStat(metadata)->set_double_value(value);
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, absl::string_view value) {
|
||||
AddStat(metadata)->set_str_value(string(value));
|
||||
}
|
||||
void AddStatValue(const XStatMetadata& metadata, string&& value) {
|
||||
AddStat(metadata)->set_str_value(std::move(value));
|
||||
}
|
||||
|
||||
void ParseAndAddStatValue(const XStatMetadata& metadata,
|
||||
absl::string_view value);
|
||||
|
||||
void AddStat(const XStatMetadata& metadata, const XStat& stat) {
|
||||
DCHECK_EQ(metadata.id(), stat.metadata_id());
|
||||
*event_->add_stats() = stat;
|
||||
}
|
||||
|
||||
private:
|
||||
XStat* AddStat(const XStatMetadata& metadata);
|
||||
|
||||
const XLine* line_;
|
||||
XEvent* event_;
|
||||
};
|
||||
@ -126,7 +152,7 @@ class XLineBuilder {
|
||||
|
||||
// Provides methods to build an XPlane.
|
||||
// NOTE: avoid to use two builders to wrap the same XPlane.
|
||||
class XPlaneBuilder {
|
||||
class XPlaneBuilder : public XStatsBuilder<XPlane> {
|
||||
public:
|
||||
explicit XPlaneBuilder(XPlane* plane);
|
||||
|
||||
|
@ -64,22 +64,43 @@ static_assert(sizeof(kHostEventTypeMetadataMap) / sizeof(absl::string_view) ==
|
||||
"Mismatch between enum and string map.");
|
||||
|
||||
static const absl::string_view kStatTypeStrMap[] = {
|
||||
"UnknownStatType", "id",
|
||||
"parent_step_id", "function_step_id",
|
||||
"device_ordinal", "chip_ordinal",
|
||||
"node_ordinal", "model_id",
|
||||
"queue_addr", "request_id",
|
||||
"run_id", "graph_type",
|
||||
"step_num", "iter_num",
|
||||
"index_on_host", "bytes_reserved",
|
||||
"bytes_allocated", "bytes_available",
|
||||
"fragmentation", "device_id",
|
||||
"context_id", "correlation_id",
|
||||
"memcpy_details", "memalloc_details",
|
||||
"kernel_details", "group_id",
|
||||
"step_name", "level 0",
|
||||
"tf_op", "hlo_op",
|
||||
"UnknownStatType",
|
||||
"id",
|
||||
"parent_step_id",
|
||||
"function_step_id",
|
||||
"device_ordinal",
|
||||
"chip_ordinal",
|
||||
"node_ordinal",
|
||||
"model_id",
|
||||
"queue_addr",
|
||||
"request_id",
|
||||
"run_id",
|
||||
"graph_type",
|
||||
"step_num",
|
||||
"iter_num",
|
||||
"index_on_host",
|
||||
"bytes_reserved",
|
||||
"bytes_allocated",
|
||||
"bytes_available",
|
||||
"fragmentation",
|
||||
"device_id",
|
||||
"context_id",
|
||||
"correlation_id",
|
||||
"memcpy_details",
|
||||
"memalloc_details",
|
||||
"kernel_details",
|
||||
"group_id",
|
||||
"step_name",
|
||||
"level 0",
|
||||
"tf_op",
|
||||
"hlo_op",
|
||||
"hlo_module",
|
||||
"clock_rate",
|
||||
"core_count",
|
||||
"memory_bandwidth",
|
||||
"memory_size",
|
||||
"compute_cap_major",
|
||||
"compute_cap_minor",
|
||||
};
|
||||
|
||||
static_assert(sizeof(kStatTypeStrMap) / sizeof(absl::string_view) ==
|
||||
|
@ -98,7 +98,14 @@ enum StatType {
|
||||
kTfOp,
|
||||
kHloOp,
|
||||
kHloModule,
|
||||
kLastStatType = kHloModule,
|
||||
// Device capability related.
|
||||
kDevCapClockRateKHz,
|
||||
kDevCapCoreCount,
|
||||
kDevCapMemoryBandwidth,
|
||||
kDevCapMemorySize,
|
||||
kDevCapComputeCapMajor,
|
||||
kDevCapComputeCapMinor,
|
||||
kLastStatType = kDevCapComputeCapMinor,
|
||||
};
|
||||
|
||||
absl::Span<const absl::string_view> GetHostEventTypeStrMap();
|
||||
|
Loading…
Reference in New Issue
Block a user