move libcupti interface and wrapper to open source to unify internal and external device tracer.

PiperOrigin-RevId: 260779108
This commit is contained in:
A. Unique TensorFlower 2019-07-30 13:09:13 -07:00 committed by TensorFlower Gardener
parent 2641108b39
commit 5944bf515a
4 changed files with 631 additions and 0 deletions

View File

@ -7,3 +7,34 @@ alias(
name = "device_tracer",
actual = "//tensorflow/core:device_tracer",
)
load(
"//tensorflow:tensorflow.bzl",
"tf_copts",
"tf_cuda_library",
"if_cuda_is_configured_compat",
)
tf_cuda_library(
name = "cupti_interface",
hdrs = if_cuda_is_configured_compat(["cupti_interface.h"]),
copts = tf_copts(),
visibility = ["//visibility:public"],
deps = [
"//tensorflow/core:platform_base",
"//tensorflow/stream_executor/cuda:cupti_stub",
"@com_google_absl//absl/base:core_headers",
],
)
tf_cuda_library(
name = "cupti_wrapper",
srcs = if_cuda_is_configured_compat(["cupti_wrapper.cc"]),
hdrs = if_cuda_is_configured_compat(["cupti_wrapper.h"]),
copts = tf_copts(),
visibility = ["//visibility:public"],
deps = [
":cupti_interface",
"//tensorflow/stream_executor/cuda:cupti_stub",
],
)

View File

@ -0,0 +1,193 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_INTERFACE_H_
#define TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_INTERFACE_H_
#include <stddef.h>
#include <stdint.h>
#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h"
#include "third_party/gpus/cuda/include/cuda.h"
#include "tensorflow/core/platform/macros.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
namespace profiler {
// Provides a wrapper interface to every single CUPTI API function. This class
// is needed to create an easy mock object for CUPTI API calls. All member
// functions are defined in the following order: activity related APIs, callback
// related APIs, Event APIs, and metric APIs. Within each category, we follow
// the order in the original CUPTI documentation.
class CuptiInterface {
public:
CuptiInterface() {}
virtual ~CuptiInterface() {}
// CUPTI activity API
virtual CUptiResult ActivityDisable(CUpti_ActivityKind kind) = 0;
virtual CUptiResult ActivityEnable(CUpti_ActivityKind kind) = 0;
virtual CUptiResult ActivityFlushAll(uint32_t flag) = 0;
virtual CUptiResult ActivityGetNextRecord(uint8_t* buffer,
size_t valid_buffer_size_bytes,
CUpti_Activity** record) = 0;
virtual CUptiResult ActivityGetNumDroppedRecords(CUcontext context,
uint32_t stream_id,
size_t* dropped) = 0;
virtual CUptiResult ActivityRegisterCallbacks(
CUpti_BuffersCallbackRequestFunc func_buffer_requested,
CUpti_BuffersCallbackCompleteFunc func_buffer_completed) = 0;
virtual CUptiResult GetDeviceId(CUcontext context, uint32* deviceId) = 0;
virtual CUptiResult GetTimestamp(uint64_t* timestamp) = 0;
virtual CUptiResult Finalize() = 0;
// CUPTI callback API
virtual CUptiResult EnableCallback(uint32_t enable,
CUpti_SubscriberHandle subscriber,
CUpti_CallbackDomain domain,
CUpti_CallbackId cbid) = 0;
virtual CUptiResult EnableDomain(uint32_t enable,
CUpti_SubscriberHandle subscriber,
CUpti_CallbackDomain domain) = 0;
virtual CUptiResult Subscribe(CUpti_SubscriberHandle* subscriber,
CUpti_CallbackFunc callback,
void* userdata) = 0;
virtual CUptiResult Unsubscribe(CUpti_SubscriberHandle subscriber) = 0;
// CUPTI event API
virtual CUptiResult DeviceEnumEventDomains(
CUdevice device, size_t* array_size_bytes,
CUpti_EventDomainID* domain_array) = 0;
virtual CUptiResult DeviceGetEventDomainAttribute(
CUdevice device, CUpti_EventDomainID event_domain,
CUpti_EventDomainAttribute attrib, size_t* value_size, void* value) = 0;
virtual CUptiResult DisableKernelReplayMode(CUcontext context) = 0;
virtual CUptiResult EnableKernelReplayMode(CUcontext context) = 0;
virtual CUptiResult DeviceGetNumEventDomains(CUdevice device,
uint32_t* num_domains) = 0;
virtual CUptiResult EventDomainEnumEvents(CUpti_EventDomainID event_domain,
size_t* array_size_bytes,
CUpti_EventID* event_array) = 0;
virtual CUptiResult EventDomainGetNumEvents(CUpti_EventDomainID event_domain,
uint32_t* num_events) = 0;
virtual CUptiResult EventGetAttribute(CUpti_EventID event,
CUpti_EventAttribute attrib,
size_t* value_size, void* value) = 0;
virtual CUptiResult EventGetIdFromName(CUdevice device,
const char* event_name,
CUpti_EventID* event) = 0;
virtual CUptiResult EventGroupDisable(CUpti_EventGroup event_group) = 0;
virtual CUptiResult EventGroupEnable(CUpti_EventGroup event_group) = 0;
virtual CUptiResult EventGroupGetAttribute(CUpti_EventGroup event_group,
CUpti_EventGroupAttribute attrib,
size_t* value_size,
void* value) = 0;
virtual CUptiResult EventGroupReadEvent(CUpti_EventGroup event_group,
CUpti_ReadEventFlags flags,
CUpti_EventID event,
size_t* event_value_buffer_size_bytes,
uint64_t* eventValueBuffer) = 0;
virtual CUptiResult EventGroupSetAttribute(CUpti_EventGroup event_group,
CUpti_EventGroupAttribute attrib,
size_t value_size,
void* value) = 0;
virtual CUptiResult EventGroupSetsCreate(
CUcontext context, size_t event_id_array_size_bytes,
CUpti_EventID* event_id_array,
CUpti_EventGroupSets** event_group_passes) = 0;
virtual CUptiResult EventGroupSetsDestroy(
CUpti_EventGroupSets* event_group_sets) = 0;
// CUPTI metric API
virtual CUptiResult DeviceEnumMetrics(CUdevice device, size_t* arraySizeBytes,
CUpti_MetricID* metricArray) = 0;
virtual CUptiResult DeviceGetNumMetrics(CUdevice device,
uint32_t* num_metrics) = 0;
virtual CUptiResult MetricGetIdFromName(CUdevice device,
const char* metric_name,
CUpti_MetricID* metric) = 0;
virtual CUptiResult MetricGetNumEvents(CUpti_MetricID metric,
uint32_t* num_events) = 0;
virtual CUptiResult MetricEnumEvents(CUpti_MetricID metric,
size_t* event_id_array_size_bytes,
CUpti_EventID* event_id_array) = 0;
virtual CUptiResult MetricGetAttribute(CUpti_MetricID metric,
CUpti_MetricAttribute attrib,
size_t* value_size, void* value) = 0;
virtual CUptiResult MetricGetValue(CUdevice device, CUpti_MetricID metric,
size_t event_id_array_size_bytes,
CUpti_EventID* event_id_array,
size_t event_value_array_size_bytes,
uint64_t* event_value_array,
uint64_t time_duration,
CUpti_MetricValue* metric_value) = 0;
virtual CUptiResult GetResultString(CUptiResult result, const char** str) = 0;
// Interface maintenance functions. Not directly related to CUPTI, but
// required for implementing an error resilient layer over CUPTI API.
// Performance any clean up work that is required each time profile session
// is done. Therefore this can be called multiple times during process life
// time.
virtual void CleanUp() = 0;
// Whether CUPTI API is currently disabled due to unrecoverable errors.
// All subsequent calls will fail immediately without forwarding calls to
// CUPTI library.
virtual bool Disabled() const = 0;
private:
TF_DISALLOW_COPY_AND_ASSIGN(CuptiInterface);
};
} // namespace profiler
} // namespace tensorflow
#endif // TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_INTERFACE_H_

View File

@ -0,0 +1,232 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/internal/gpu/cupti_wrapper.h"
#include <type_traits>
namespace tensorflow {
namespace profiler {
CUptiResult CuptiWrapper::ActivityDisable(CUpti_ActivityKind kind) {
return cuptiActivityDisable(kind);
}
CUptiResult CuptiWrapper::ActivityEnable(CUpti_ActivityKind kind) {
return cuptiActivityEnable(kind);
}
CUptiResult CuptiWrapper::ActivityFlushAll(uint32_t flag) {
return cuptiActivityFlushAll(flag);
}
CUptiResult CuptiWrapper::ActivityGetNextRecord(uint8_t* buffer,
size_t valid_buffer_size_bytes,
CUpti_Activity** record) {
return cuptiActivityGetNextRecord(buffer, valid_buffer_size_bytes, record);
}
CUptiResult CuptiWrapper::ActivityGetNumDroppedRecords(CUcontext context,
uint32_t stream_id,
size_t* dropped) {
return cuptiActivityGetNumDroppedRecords(context, stream_id, dropped);
}
CUptiResult CuptiWrapper::ActivityRegisterCallbacks(
CUpti_BuffersCallbackRequestFunc func_buffer_requested,
CUpti_BuffersCallbackCompleteFunc func_buffer_completed) {
return cuptiActivityRegisterCallbacks(func_buffer_requested,
func_buffer_completed);
}
CUptiResult CuptiWrapper::GetDeviceId(CUcontext context, uint32* deviceId) {
return cuptiGetDeviceId(context, deviceId);
}
CUptiResult CuptiWrapper::GetTimestamp(uint64_t* timestamp) {
return cuptiGetTimestamp(timestamp);
}
CUptiResult CuptiWrapper::Finalize() { return cuptiFinalize(); }
CUptiResult CuptiWrapper::EnableCallback(uint32_t enable,
CUpti_SubscriberHandle subscriber,
CUpti_CallbackDomain domain,
CUpti_CallbackId cbid) {
return cuptiEnableCallback(enable, subscriber, domain, cbid);
}
CUptiResult CuptiWrapper::EnableDomain(uint32_t enable,
CUpti_SubscriberHandle subscriber,
CUpti_CallbackDomain domain) {
return cuptiEnableDomain(enable, subscriber, domain);
}
CUptiResult CuptiWrapper::Subscribe(CUpti_SubscriberHandle* subscriber,
CUpti_CallbackFunc callback,
void* userdata) {
return cuptiSubscribe(subscriber, callback, userdata);
}
CUptiResult CuptiWrapper::Unsubscribe(CUpti_SubscriberHandle subscriber) {
return cuptiUnsubscribe(subscriber);
}
CUptiResult CuptiWrapper::DeviceEnumEventDomains(
CUdevice device, size_t* array_size_bytes,
CUpti_EventDomainID* domain_array) {
return cuptiDeviceEnumEventDomains(device, array_size_bytes, domain_array);
}
CUptiResult CuptiWrapper::DeviceGetEventDomainAttribute(
CUdevice device, CUpti_EventDomainID event_domain,
CUpti_EventDomainAttribute attrib, size_t* value_size, void* value) {
return cuptiDeviceGetEventDomainAttribute(device, event_domain, attrib,
value_size, value);
}
CUptiResult CuptiWrapper::DisableKernelReplayMode(CUcontext context) {
return cuptiDisableKernelReplayMode(context);
}
CUptiResult CuptiWrapper::EnableKernelReplayMode(CUcontext context) {
return cuptiEnableKernelReplayMode(context);
}
CUptiResult CuptiWrapper::DeviceGetNumEventDomains(CUdevice device,
uint32_t* num_domains) {
return cuptiDeviceGetNumEventDomains(device, num_domains);
}
CUptiResult CuptiWrapper::EventDomainEnumEvents(
CUpti_EventDomainID event_domain, size_t* array_size_bytes,
CUpti_EventID* event_array) {
return cuptiEventDomainEnumEvents(event_domain, array_size_bytes,
event_array);
}
CUptiResult CuptiWrapper::EventDomainGetNumEvents(
CUpti_EventDomainID event_domain, uint32_t* num_events) {
return cuptiEventDomainGetNumEvents(event_domain, num_events);
}
CUptiResult CuptiWrapper::EventGetAttribute(CUpti_EventID event,
CUpti_EventAttribute attrib,
size_t* value_size, void* value) {
return cuptiEventGetAttribute(event, attrib, value_size, value);
}
CUptiResult CuptiWrapper::EventGetIdFromName(CUdevice device,
const char* event_name,
CUpti_EventID* event) {
return cuptiEventGetIdFromName(device, event_name, event);
}
CUptiResult CuptiWrapper::EventGroupDisable(CUpti_EventGroup event_group) {
return cuptiEventGroupDisable(event_group);
}
CUptiResult CuptiWrapper::EventGroupEnable(CUpti_EventGroup event_group) {
return cuptiEventGroupEnable(event_group);
}
CUptiResult CuptiWrapper::EventGroupGetAttribute(
CUpti_EventGroup event_group, CUpti_EventGroupAttribute attrib,
size_t* value_size, void* value) {
return cuptiEventGroupGetAttribute(event_group, attrib, value_size, value);
}
CUptiResult CuptiWrapper::EventGroupReadEvent(
CUpti_EventGroup event_group, CUpti_ReadEventFlags flags,
CUpti_EventID event, size_t* event_value_buffer_size_bytes,
uint64_t* event_value_buffer) {
return cuptiEventGroupReadEvent(event_group, flags, event,
event_value_buffer_size_bytes,
event_value_buffer);
}
CUptiResult CuptiWrapper::EventGroupSetAttribute(
CUpti_EventGroup event_group, CUpti_EventGroupAttribute attrib,
size_t value_size, void* value) {
return cuptiEventGroupSetAttribute(event_group, attrib, value_size, value);
}
CUptiResult CuptiWrapper::EventGroupSetsCreate(
CUcontext context, size_t event_id_array_size_bytes,
CUpti_EventID* event_id_array, CUpti_EventGroupSets** event_group_passes) {
return cuptiEventGroupSetsCreate(context, event_id_array_size_bytes,
event_id_array, event_group_passes);
}
CUptiResult CuptiWrapper::EventGroupSetsDestroy(
CUpti_EventGroupSets* event_group_sets) {
return cuptiEventGroupSetsDestroy(event_group_sets);
}
// CUPTI metric API
CUptiResult CuptiWrapper::DeviceEnumMetrics(CUdevice device,
size_t* arraySizeBytes,
CUpti_MetricID* metricArray) {
return cuptiDeviceEnumMetrics(device, arraySizeBytes, metricArray);
}
CUptiResult CuptiWrapper::DeviceGetNumMetrics(CUdevice device,
uint32_t* num_metrics) {
return cuptiDeviceGetNumMetrics(device, num_metrics);
}
CUptiResult CuptiWrapper::MetricGetIdFromName(CUdevice device,
const char* metric_name,
CUpti_MetricID* metric) {
return cuptiMetricGetIdFromName(device, metric_name, metric);
}
CUptiResult CuptiWrapper::MetricGetNumEvents(CUpti_MetricID metric,
uint32_t* num_events) {
return cuptiMetricGetNumEvents(metric, num_events);
}
CUptiResult CuptiWrapper::MetricEnumEvents(CUpti_MetricID metric,
size_t* event_id_array_size_bytes,
CUpti_EventID* event_id_array) {
return cuptiMetricEnumEvents(metric, event_id_array_size_bytes,
event_id_array);
}
CUptiResult CuptiWrapper::MetricGetAttribute(CUpti_MetricID metric,
CUpti_MetricAttribute attrib,
size_t* value_size, void* value) {
return cuptiMetricGetAttribute(metric, attrib, value_size, value);
}
CUptiResult CuptiWrapper::MetricGetValue(CUdevice device, CUpti_MetricID metric,
size_t event_id_array_size_bytes,
CUpti_EventID* event_id_array,
size_t event_value_array_size_bytes,
uint64_t* event_value_array,
uint64_t time_duration,
CUpti_MetricValue* metric_value) {
return cuptiMetricGetValue(device, metric, event_id_array_size_bytes,
event_id_array, event_value_array_size_bytes,
event_value_array, time_duration, metric_value);
}
CUptiResult CuptiWrapper::GetResultString(CUptiResult result,
const char** str) {
return cuptiGetResultString(result, str);
}
} // namespace profiler
} // namespace tensorflow

View File

@ -0,0 +1,175 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_WRAPPER_H_
#define TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_WRAPPER_H_
#include <stddef.h>
#include <stdint.h>
#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h"
#include "third_party/gpus/cuda/include/cuda.h"
#include "tensorflow/core/profiler/internal/gpu/cupti_interface.h"
namespace tensorflow {
namespace profiler {
class CuptiWrapper : public tensorflow::profiler::CuptiInterface {
public:
CuptiWrapper() {}
~CuptiWrapper() override {}
// CUPTI activity API
CUptiResult ActivityDisable(CUpti_ActivityKind kind) override;
CUptiResult ActivityEnable(CUpti_ActivityKind kind) override;
CUptiResult ActivityFlushAll(uint32_t flag) override;
CUptiResult ActivityGetNextRecord(uint8_t* buffer,
size_t valid_buffer_size_bytes,
CUpti_Activity** record) override;
CUptiResult ActivityGetNumDroppedRecords(CUcontext context,
uint32_t stream_id,
size_t* dropped) override;
CUptiResult ActivityRegisterCallbacks(
CUpti_BuffersCallbackRequestFunc func_buffer_requested,
CUpti_BuffersCallbackCompleteFunc func_buffer_completed) override;
CUptiResult GetDeviceId(CUcontext context, uint32* deviceId) override;
CUptiResult GetTimestamp(uint64_t* timestamp) override;
// cuptiFinalize is only defined in CUDA8 and above.
// To enable it in CUDA8, the environment variable CUPTI_ENABLE_FINALIZE must
// be set to 1.
CUptiResult Finalize() override;
// CUPTI callback API
CUptiResult EnableCallback(uint32_t enable, CUpti_SubscriberHandle subscriber,
CUpti_CallbackDomain domain,
CUpti_CallbackId cbid) override;
CUptiResult EnableDomain(uint32_t enable, CUpti_SubscriberHandle subscriber,
CUpti_CallbackDomain domain) override;
CUptiResult Subscribe(CUpti_SubscriberHandle* subscriber,
CUpti_CallbackFunc callback, void* userdata) override;
CUptiResult Unsubscribe(CUpti_SubscriberHandle subscriber) override;
// CUPTI event API
CUptiResult DeviceEnumEventDomains(
CUdevice device, size_t* array_size_bytes,
CUpti_EventDomainID* domain_array) override;
CUptiResult DeviceGetEventDomainAttribute(CUdevice device,
CUpti_EventDomainID event_domain,
CUpti_EventDomainAttribute attrib,
size_t* value_size,
void* value) override;
CUptiResult DisableKernelReplayMode(CUcontext context) override;
CUptiResult EnableKernelReplayMode(CUcontext context) override;
CUptiResult DeviceGetNumEventDomains(CUdevice device,
uint32_t* num_domains) override;
CUptiResult EventDomainEnumEvents(CUpti_EventDomainID event_domain,
size_t* array_size_bytes,
CUpti_EventID* event_array) override;
CUptiResult EventDomainGetNumEvents(CUpti_EventDomainID event_domain,
uint32_t* num_events) override;
CUptiResult EventGetAttribute(CUpti_EventID event,
CUpti_EventAttribute attrib, size_t* value_size,
void* value) override;
CUptiResult EventGetIdFromName(CUdevice device, const char* event_name,
CUpti_EventID* event) override;
CUptiResult EventGroupDisable(CUpti_EventGroup event_group) override;
CUptiResult EventGroupEnable(CUpti_EventGroup event_group) override;
CUptiResult EventGroupGetAttribute(CUpti_EventGroup event_group,
CUpti_EventGroupAttribute attrib,
size_t* value_size, void* value) override;
CUptiResult EventGroupReadEvent(CUpti_EventGroup event_group,
CUpti_ReadEventFlags flags,
CUpti_EventID event,
size_t* event_value_buffer_size_bytes,
uint64_t* event_value_buffer) override;
CUptiResult EventGroupSetAttribute(CUpti_EventGroup event_group,
CUpti_EventGroupAttribute attrib,
size_t value_size, void* value) override;
CUptiResult EventGroupSetsCreate(
CUcontext context, size_t event_id_array_size_bytes,
CUpti_EventID* event_id_array,
CUpti_EventGroupSets** event_group_passes) override;
CUptiResult EventGroupSetsDestroy(
CUpti_EventGroupSets* event_group_sets) override;
// CUPTI metric API
CUptiResult DeviceEnumMetrics(CUdevice device, size_t* arraySizeBytes,
CUpti_MetricID* metricArray) override;
CUptiResult DeviceGetNumMetrics(CUdevice device,
uint32_t* num_metrics) override;
CUptiResult MetricGetIdFromName(CUdevice device, const char* metric_name,
CUpti_MetricID* metric) override;
CUptiResult MetricGetNumEvents(CUpti_MetricID metric,
uint32_t* num_events) override;
CUptiResult MetricEnumEvents(CUpti_MetricID metric,
size_t* event_id_array_size_bytes,
CUpti_EventID* event_id_array) override;
CUptiResult MetricGetAttribute(CUpti_MetricID metric,
CUpti_MetricAttribute attrib,
size_t* value_size, void* value) override;
CUptiResult MetricGetValue(CUdevice device, CUpti_MetricID metric,
size_t event_id_array_size_bytes,
CUpti_EventID* event_id_array,
size_t event_value_array_size_bytes,
uint64_t* event_value_array,
uint64_t time_duration,
CUpti_MetricValue* metric_value) override;
CUptiResult GetResultString(CUptiResult result, const char** str) override;
void CleanUp() override {}
bool Disabled() const override { return false; }
private:
TF_DISALLOW_COPY_AND_ASSIGN(CuptiWrapper);
};
} // namespace profiler
} // namespace tensorflow
#endif // PERFTOOLS_ACCELERATORS_XPROF_XPROFILEZ_NVIDIA_GPU_CUPTI_WRAPPER_H_