move libcupti interface and wrapper to open source to unify internal and external device tracer.
PiperOrigin-RevId: 260779108
This commit is contained in:
parent
2641108b39
commit
5944bf515a
@ -7,3 +7,34 @@ alias(
|
||||
name = "device_tracer",
|
||||
actual = "//tensorflow/core:device_tracer",
|
||||
)
|
||||
|
||||
load(
|
||||
"//tensorflow:tensorflow.bzl",
|
||||
"tf_copts",
|
||||
"tf_cuda_library",
|
||||
"if_cuda_is_configured_compat",
|
||||
)
|
||||
|
||||
tf_cuda_library(
|
||||
name = "cupti_interface",
|
||||
hdrs = if_cuda_is_configured_compat(["cupti_interface.h"]),
|
||||
copts = tf_copts(),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//tensorflow/core:platform_base",
|
||||
"//tensorflow/stream_executor/cuda:cupti_stub",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cuda_library(
|
||||
name = "cupti_wrapper",
|
||||
srcs = if_cuda_is_configured_compat(["cupti_wrapper.cc"]),
|
||||
hdrs = if_cuda_is_configured_compat(["cupti_wrapper.h"]),
|
||||
copts = tf_copts(),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":cupti_interface",
|
||||
"//tensorflow/stream_executor/cuda:cupti_stub",
|
||||
],
|
||||
)
|
||||
|
193
tensorflow/core/profiler/internal/gpu/cupti_interface.h
Normal file
193
tensorflow/core/profiler/internal/gpu/cupti_interface.h
Normal file
@ -0,0 +1,193 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_INTERFACE_H_
|
||||
#define TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_INTERFACE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h"
|
||||
#include "third_party/gpus/cuda/include/cuda.h"
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
// Provides a wrapper interface to every single CUPTI API function. This class
|
||||
// is needed to create an easy mock object for CUPTI API calls. All member
|
||||
// functions are defined in the following order: activity related APIs, callback
|
||||
// related APIs, Event APIs, and metric APIs. Within each category, we follow
|
||||
// the order in the original CUPTI documentation.
|
||||
class CuptiInterface {
|
||||
public:
|
||||
CuptiInterface() {}
|
||||
|
||||
virtual ~CuptiInterface() {}
|
||||
|
||||
// CUPTI activity API
|
||||
virtual CUptiResult ActivityDisable(CUpti_ActivityKind kind) = 0;
|
||||
|
||||
virtual CUptiResult ActivityEnable(CUpti_ActivityKind kind) = 0;
|
||||
|
||||
virtual CUptiResult ActivityFlushAll(uint32_t flag) = 0;
|
||||
|
||||
virtual CUptiResult ActivityGetNextRecord(uint8_t* buffer,
|
||||
size_t valid_buffer_size_bytes,
|
||||
CUpti_Activity** record) = 0;
|
||||
|
||||
virtual CUptiResult ActivityGetNumDroppedRecords(CUcontext context,
|
||||
uint32_t stream_id,
|
||||
size_t* dropped) = 0;
|
||||
|
||||
virtual CUptiResult ActivityRegisterCallbacks(
|
||||
CUpti_BuffersCallbackRequestFunc func_buffer_requested,
|
||||
CUpti_BuffersCallbackCompleteFunc func_buffer_completed) = 0;
|
||||
|
||||
virtual CUptiResult GetDeviceId(CUcontext context, uint32* deviceId) = 0;
|
||||
|
||||
virtual CUptiResult GetTimestamp(uint64_t* timestamp) = 0;
|
||||
|
||||
virtual CUptiResult Finalize() = 0;
|
||||
|
||||
// CUPTI callback API
|
||||
virtual CUptiResult EnableCallback(uint32_t enable,
|
||||
CUpti_SubscriberHandle subscriber,
|
||||
CUpti_CallbackDomain domain,
|
||||
CUpti_CallbackId cbid) = 0;
|
||||
|
||||
virtual CUptiResult EnableDomain(uint32_t enable,
|
||||
CUpti_SubscriberHandle subscriber,
|
||||
CUpti_CallbackDomain domain) = 0;
|
||||
|
||||
virtual CUptiResult Subscribe(CUpti_SubscriberHandle* subscriber,
|
||||
CUpti_CallbackFunc callback,
|
||||
void* userdata) = 0;
|
||||
|
||||
virtual CUptiResult Unsubscribe(CUpti_SubscriberHandle subscriber) = 0;
|
||||
|
||||
// CUPTI event API
|
||||
virtual CUptiResult DeviceEnumEventDomains(
|
||||
CUdevice device, size_t* array_size_bytes,
|
||||
CUpti_EventDomainID* domain_array) = 0;
|
||||
|
||||
virtual CUptiResult DeviceGetEventDomainAttribute(
|
||||
CUdevice device, CUpti_EventDomainID event_domain,
|
||||
CUpti_EventDomainAttribute attrib, size_t* value_size, void* value) = 0;
|
||||
|
||||
virtual CUptiResult DisableKernelReplayMode(CUcontext context) = 0;
|
||||
|
||||
virtual CUptiResult EnableKernelReplayMode(CUcontext context) = 0;
|
||||
|
||||
virtual CUptiResult DeviceGetNumEventDomains(CUdevice device,
|
||||
uint32_t* num_domains) = 0;
|
||||
|
||||
virtual CUptiResult EventDomainEnumEvents(CUpti_EventDomainID event_domain,
|
||||
size_t* array_size_bytes,
|
||||
CUpti_EventID* event_array) = 0;
|
||||
|
||||
virtual CUptiResult EventDomainGetNumEvents(CUpti_EventDomainID event_domain,
|
||||
uint32_t* num_events) = 0;
|
||||
|
||||
virtual CUptiResult EventGetAttribute(CUpti_EventID event,
|
||||
CUpti_EventAttribute attrib,
|
||||
size_t* value_size, void* value) = 0;
|
||||
|
||||
virtual CUptiResult EventGetIdFromName(CUdevice device,
|
||||
const char* event_name,
|
||||
CUpti_EventID* event) = 0;
|
||||
|
||||
virtual CUptiResult EventGroupDisable(CUpti_EventGroup event_group) = 0;
|
||||
|
||||
virtual CUptiResult EventGroupEnable(CUpti_EventGroup event_group) = 0;
|
||||
|
||||
virtual CUptiResult EventGroupGetAttribute(CUpti_EventGroup event_group,
|
||||
CUpti_EventGroupAttribute attrib,
|
||||
size_t* value_size,
|
||||
void* value) = 0;
|
||||
|
||||
virtual CUptiResult EventGroupReadEvent(CUpti_EventGroup event_group,
|
||||
CUpti_ReadEventFlags flags,
|
||||
CUpti_EventID event,
|
||||
size_t* event_value_buffer_size_bytes,
|
||||
uint64_t* eventValueBuffer) = 0;
|
||||
|
||||
virtual CUptiResult EventGroupSetAttribute(CUpti_EventGroup event_group,
|
||||
CUpti_EventGroupAttribute attrib,
|
||||
size_t value_size,
|
||||
void* value) = 0;
|
||||
|
||||
virtual CUptiResult EventGroupSetsCreate(
|
||||
CUcontext context, size_t event_id_array_size_bytes,
|
||||
CUpti_EventID* event_id_array,
|
||||
CUpti_EventGroupSets** event_group_passes) = 0;
|
||||
|
||||
virtual CUptiResult EventGroupSetsDestroy(
|
||||
CUpti_EventGroupSets* event_group_sets) = 0;
|
||||
|
||||
// CUPTI metric API
|
||||
virtual CUptiResult DeviceEnumMetrics(CUdevice device, size_t* arraySizeBytes,
|
||||
CUpti_MetricID* metricArray) = 0;
|
||||
|
||||
virtual CUptiResult DeviceGetNumMetrics(CUdevice device,
|
||||
uint32_t* num_metrics) = 0;
|
||||
|
||||
virtual CUptiResult MetricGetIdFromName(CUdevice device,
|
||||
const char* metric_name,
|
||||
CUpti_MetricID* metric) = 0;
|
||||
|
||||
virtual CUptiResult MetricGetNumEvents(CUpti_MetricID metric,
|
||||
uint32_t* num_events) = 0;
|
||||
|
||||
virtual CUptiResult MetricEnumEvents(CUpti_MetricID metric,
|
||||
size_t* event_id_array_size_bytes,
|
||||
CUpti_EventID* event_id_array) = 0;
|
||||
|
||||
virtual CUptiResult MetricGetAttribute(CUpti_MetricID metric,
|
||||
CUpti_MetricAttribute attrib,
|
||||
size_t* value_size, void* value) = 0;
|
||||
|
||||
virtual CUptiResult MetricGetValue(CUdevice device, CUpti_MetricID metric,
|
||||
size_t event_id_array_size_bytes,
|
||||
CUpti_EventID* event_id_array,
|
||||
size_t event_value_array_size_bytes,
|
||||
uint64_t* event_value_array,
|
||||
uint64_t time_duration,
|
||||
CUpti_MetricValue* metric_value) = 0;
|
||||
|
||||
virtual CUptiResult GetResultString(CUptiResult result, const char** str) = 0;
|
||||
|
||||
// Interface maintenance functions. Not directly related to CUPTI, but
|
||||
// required for implementing an error resilient layer over CUPTI API.
|
||||
|
||||
// Performance any clean up work that is required each time profile session
|
||||
// is done. Therefore this can be called multiple times during process life
|
||||
// time.
|
||||
virtual void CleanUp() = 0;
|
||||
|
||||
// Whether CUPTI API is currently disabled due to unrecoverable errors.
|
||||
// All subsequent calls will fail immediately without forwarding calls to
|
||||
// CUPTI library.
|
||||
virtual bool Disabled() const = 0;
|
||||
|
||||
private:
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(CuptiInterface);
|
||||
};
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_INTERFACE_H_
|
232
tensorflow/core/profiler/internal/gpu/cupti_wrapper.cc
Normal file
232
tensorflow/core/profiler/internal/gpu/cupti_wrapper.cc
Normal file
@ -0,0 +1,232 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/profiler/internal/gpu/cupti_wrapper.h"
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
CUptiResult CuptiWrapper::ActivityDisable(CUpti_ActivityKind kind) {
|
||||
return cuptiActivityDisable(kind);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::ActivityEnable(CUpti_ActivityKind kind) {
|
||||
return cuptiActivityEnable(kind);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::ActivityFlushAll(uint32_t flag) {
|
||||
return cuptiActivityFlushAll(flag);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::ActivityGetNextRecord(uint8_t* buffer,
|
||||
size_t valid_buffer_size_bytes,
|
||||
CUpti_Activity** record) {
|
||||
return cuptiActivityGetNextRecord(buffer, valid_buffer_size_bytes, record);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::ActivityGetNumDroppedRecords(CUcontext context,
|
||||
uint32_t stream_id,
|
||||
size_t* dropped) {
|
||||
return cuptiActivityGetNumDroppedRecords(context, stream_id, dropped);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::ActivityRegisterCallbacks(
|
||||
CUpti_BuffersCallbackRequestFunc func_buffer_requested,
|
||||
CUpti_BuffersCallbackCompleteFunc func_buffer_completed) {
|
||||
return cuptiActivityRegisterCallbacks(func_buffer_requested,
|
||||
func_buffer_completed);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::GetDeviceId(CUcontext context, uint32* deviceId) {
|
||||
return cuptiGetDeviceId(context, deviceId);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::GetTimestamp(uint64_t* timestamp) {
|
||||
return cuptiGetTimestamp(timestamp);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::Finalize() { return cuptiFinalize(); }
|
||||
|
||||
CUptiResult CuptiWrapper::EnableCallback(uint32_t enable,
|
||||
CUpti_SubscriberHandle subscriber,
|
||||
CUpti_CallbackDomain domain,
|
||||
CUpti_CallbackId cbid) {
|
||||
return cuptiEnableCallback(enable, subscriber, domain, cbid);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EnableDomain(uint32_t enable,
|
||||
CUpti_SubscriberHandle subscriber,
|
||||
CUpti_CallbackDomain domain) {
|
||||
return cuptiEnableDomain(enable, subscriber, domain);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::Subscribe(CUpti_SubscriberHandle* subscriber,
|
||||
CUpti_CallbackFunc callback,
|
||||
void* userdata) {
|
||||
return cuptiSubscribe(subscriber, callback, userdata);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::Unsubscribe(CUpti_SubscriberHandle subscriber) {
|
||||
return cuptiUnsubscribe(subscriber);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::DeviceEnumEventDomains(
|
||||
CUdevice device, size_t* array_size_bytes,
|
||||
CUpti_EventDomainID* domain_array) {
|
||||
return cuptiDeviceEnumEventDomains(device, array_size_bytes, domain_array);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::DeviceGetEventDomainAttribute(
|
||||
CUdevice device, CUpti_EventDomainID event_domain,
|
||||
CUpti_EventDomainAttribute attrib, size_t* value_size, void* value) {
|
||||
return cuptiDeviceGetEventDomainAttribute(device, event_domain, attrib,
|
||||
value_size, value);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::DisableKernelReplayMode(CUcontext context) {
|
||||
return cuptiDisableKernelReplayMode(context);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EnableKernelReplayMode(CUcontext context) {
|
||||
return cuptiEnableKernelReplayMode(context);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::DeviceGetNumEventDomains(CUdevice device,
|
||||
uint32_t* num_domains) {
|
||||
return cuptiDeviceGetNumEventDomains(device, num_domains);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EventDomainEnumEvents(
|
||||
CUpti_EventDomainID event_domain, size_t* array_size_bytes,
|
||||
CUpti_EventID* event_array) {
|
||||
return cuptiEventDomainEnumEvents(event_domain, array_size_bytes,
|
||||
event_array);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EventDomainGetNumEvents(
|
||||
CUpti_EventDomainID event_domain, uint32_t* num_events) {
|
||||
return cuptiEventDomainGetNumEvents(event_domain, num_events);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EventGetAttribute(CUpti_EventID event,
|
||||
CUpti_EventAttribute attrib,
|
||||
size_t* value_size, void* value) {
|
||||
return cuptiEventGetAttribute(event, attrib, value_size, value);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EventGetIdFromName(CUdevice device,
|
||||
const char* event_name,
|
||||
CUpti_EventID* event) {
|
||||
return cuptiEventGetIdFromName(device, event_name, event);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EventGroupDisable(CUpti_EventGroup event_group) {
|
||||
return cuptiEventGroupDisable(event_group);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EventGroupEnable(CUpti_EventGroup event_group) {
|
||||
return cuptiEventGroupEnable(event_group);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EventGroupGetAttribute(
|
||||
CUpti_EventGroup event_group, CUpti_EventGroupAttribute attrib,
|
||||
size_t* value_size, void* value) {
|
||||
return cuptiEventGroupGetAttribute(event_group, attrib, value_size, value);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EventGroupReadEvent(
|
||||
CUpti_EventGroup event_group, CUpti_ReadEventFlags flags,
|
||||
CUpti_EventID event, size_t* event_value_buffer_size_bytes,
|
||||
uint64_t* event_value_buffer) {
|
||||
return cuptiEventGroupReadEvent(event_group, flags, event,
|
||||
event_value_buffer_size_bytes,
|
||||
event_value_buffer);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EventGroupSetAttribute(
|
||||
CUpti_EventGroup event_group, CUpti_EventGroupAttribute attrib,
|
||||
size_t value_size, void* value) {
|
||||
return cuptiEventGroupSetAttribute(event_group, attrib, value_size, value);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EventGroupSetsCreate(
|
||||
CUcontext context, size_t event_id_array_size_bytes,
|
||||
CUpti_EventID* event_id_array, CUpti_EventGroupSets** event_group_passes) {
|
||||
return cuptiEventGroupSetsCreate(context, event_id_array_size_bytes,
|
||||
event_id_array, event_group_passes);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::EventGroupSetsDestroy(
|
||||
CUpti_EventGroupSets* event_group_sets) {
|
||||
return cuptiEventGroupSetsDestroy(event_group_sets);
|
||||
}
|
||||
|
||||
// CUPTI metric API
|
||||
CUptiResult CuptiWrapper::DeviceEnumMetrics(CUdevice device,
|
||||
size_t* arraySizeBytes,
|
||||
CUpti_MetricID* metricArray) {
|
||||
return cuptiDeviceEnumMetrics(device, arraySizeBytes, metricArray);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::DeviceGetNumMetrics(CUdevice device,
|
||||
uint32_t* num_metrics) {
|
||||
return cuptiDeviceGetNumMetrics(device, num_metrics);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::MetricGetIdFromName(CUdevice device,
|
||||
const char* metric_name,
|
||||
CUpti_MetricID* metric) {
|
||||
return cuptiMetricGetIdFromName(device, metric_name, metric);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::MetricGetNumEvents(CUpti_MetricID metric,
|
||||
uint32_t* num_events) {
|
||||
return cuptiMetricGetNumEvents(metric, num_events);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::MetricEnumEvents(CUpti_MetricID metric,
|
||||
size_t* event_id_array_size_bytes,
|
||||
CUpti_EventID* event_id_array) {
|
||||
return cuptiMetricEnumEvents(metric, event_id_array_size_bytes,
|
||||
event_id_array);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::MetricGetAttribute(CUpti_MetricID metric,
|
||||
CUpti_MetricAttribute attrib,
|
||||
size_t* value_size, void* value) {
|
||||
return cuptiMetricGetAttribute(metric, attrib, value_size, value);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::MetricGetValue(CUdevice device, CUpti_MetricID metric,
|
||||
size_t event_id_array_size_bytes,
|
||||
CUpti_EventID* event_id_array,
|
||||
size_t event_value_array_size_bytes,
|
||||
uint64_t* event_value_array,
|
||||
uint64_t time_duration,
|
||||
CUpti_MetricValue* metric_value) {
|
||||
return cuptiMetricGetValue(device, metric, event_id_array_size_bytes,
|
||||
event_id_array, event_value_array_size_bytes,
|
||||
event_value_array, time_duration, metric_value);
|
||||
}
|
||||
|
||||
CUptiResult CuptiWrapper::GetResultString(CUptiResult result,
|
||||
const char** str) {
|
||||
return cuptiGetResultString(result, str);
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
175
tensorflow/core/profiler/internal/gpu/cupti_wrapper.h
Normal file
175
tensorflow/core/profiler/internal/gpu/cupti_wrapper.h
Normal file
@ -0,0 +1,175 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_WRAPPER_H_
|
||||
#define TENSORFLOW_CORE_PROFILER_INTERNAL_GPU_CUPTI_WRAPPER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h"
|
||||
#include "third_party/gpus/cuda/include/cuda.h"
|
||||
#include "tensorflow/core/profiler/internal/gpu/cupti_interface.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
class CuptiWrapper : public tensorflow::profiler::CuptiInterface {
|
||||
public:
|
||||
CuptiWrapper() {}
|
||||
|
||||
~CuptiWrapper() override {}
|
||||
|
||||
// CUPTI activity API
|
||||
CUptiResult ActivityDisable(CUpti_ActivityKind kind) override;
|
||||
|
||||
CUptiResult ActivityEnable(CUpti_ActivityKind kind) override;
|
||||
|
||||
CUptiResult ActivityFlushAll(uint32_t flag) override;
|
||||
|
||||
CUptiResult ActivityGetNextRecord(uint8_t* buffer,
|
||||
size_t valid_buffer_size_bytes,
|
||||
CUpti_Activity** record) override;
|
||||
|
||||
CUptiResult ActivityGetNumDroppedRecords(CUcontext context,
|
||||
uint32_t stream_id,
|
||||
size_t* dropped) override;
|
||||
|
||||
CUptiResult ActivityRegisterCallbacks(
|
||||
CUpti_BuffersCallbackRequestFunc func_buffer_requested,
|
||||
CUpti_BuffersCallbackCompleteFunc func_buffer_completed) override;
|
||||
|
||||
CUptiResult GetDeviceId(CUcontext context, uint32* deviceId) override;
|
||||
|
||||
CUptiResult GetTimestamp(uint64_t* timestamp) override;
|
||||
|
||||
// cuptiFinalize is only defined in CUDA8 and above.
|
||||
// To enable it in CUDA8, the environment variable CUPTI_ENABLE_FINALIZE must
|
||||
// be set to 1.
|
||||
CUptiResult Finalize() override;
|
||||
|
||||
// CUPTI callback API
|
||||
CUptiResult EnableCallback(uint32_t enable, CUpti_SubscriberHandle subscriber,
|
||||
CUpti_CallbackDomain domain,
|
||||
CUpti_CallbackId cbid) override;
|
||||
|
||||
CUptiResult EnableDomain(uint32_t enable, CUpti_SubscriberHandle subscriber,
|
||||
CUpti_CallbackDomain domain) override;
|
||||
|
||||
CUptiResult Subscribe(CUpti_SubscriberHandle* subscriber,
|
||||
CUpti_CallbackFunc callback, void* userdata) override;
|
||||
|
||||
CUptiResult Unsubscribe(CUpti_SubscriberHandle subscriber) override;
|
||||
|
||||
// CUPTI event API
|
||||
CUptiResult DeviceEnumEventDomains(
|
||||
CUdevice device, size_t* array_size_bytes,
|
||||
CUpti_EventDomainID* domain_array) override;
|
||||
|
||||
CUptiResult DeviceGetEventDomainAttribute(CUdevice device,
|
||||
CUpti_EventDomainID event_domain,
|
||||
CUpti_EventDomainAttribute attrib,
|
||||
size_t* value_size,
|
||||
void* value) override;
|
||||
|
||||
CUptiResult DisableKernelReplayMode(CUcontext context) override;
|
||||
|
||||
CUptiResult EnableKernelReplayMode(CUcontext context) override;
|
||||
|
||||
CUptiResult DeviceGetNumEventDomains(CUdevice device,
|
||||
uint32_t* num_domains) override;
|
||||
|
||||
CUptiResult EventDomainEnumEvents(CUpti_EventDomainID event_domain,
|
||||
size_t* array_size_bytes,
|
||||
CUpti_EventID* event_array) override;
|
||||
|
||||
CUptiResult EventDomainGetNumEvents(CUpti_EventDomainID event_domain,
|
||||
uint32_t* num_events) override;
|
||||
|
||||
CUptiResult EventGetAttribute(CUpti_EventID event,
|
||||
CUpti_EventAttribute attrib, size_t* value_size,
|
||||
void* value) override;
|
||||
|
||||
CUptiResult EventGetIdFromName(CUdevice device, const char* event_name,
|
||||
CUpti_EventID* event) override;
|
||||
|
||||
CUptiResult EventGroupDisable(CUpti_EventGroup event_group) override;
|
||||
|
||||
CUptiResult EventGroupEnable(CUpti_EventGroup event_group) override;
|
||||
|
||||
CUptiResult EventGroupGetAttribute(CUpti_EventGroup event_group,
|
||||
CUpti_EventGroupAttribute attrib,
|
||||
size_t* value_size, void* value) override;
|
||||
|
||||
CUptiResult EventGroupReadEvent(CUpti_EventGroup event_group,
|
||||
CUpti_ReadEventFlags flags,
|
||||
CUpti_EventID event,
|
||||
size_t* event_value_buffer_size_bytes,
|
||||
uint64_t* event_value_buffer) override;
|
||||
|
||||
CUptiResult EventGroupSetAttribute(CUpti_EventGroup event_group,
|
||||
CUpti_EventGroupAttribute attrib,
|
||||
size_t value_size, void* value) override;
|
||||
|
||||
CUptiResult EventGroupSetsCreate(
|
||||
CUcontext context, size_t event_id_array_size_bytes,
|
||||
CUpti_EventID* event_id_array,
|
||||
CUpti_EventGroupSets** event_group_passes) override;
|
||||
|
||||
CUptiResult EventGroupSetsDestroy(
|
||||
CUpti_EventGroupSets* event_group_sets) override;
|
||||
|
||||
// CUPTI metric API
|
||||
CUptiResult DeviceEnumMetrics(CUdevice device, size_t* arraySizeBytes,
|
||||
CUpti_MetricID* metricArray) override;
|
||||
|
||||
CUptiResult DeviceGetNumMetrics(CUdevice device,
|
||||
uint32_t* num_metrics) override;
|
||||
|
||||
CUptiResult MetricGetIdFromName(CUdevice device, const char* metric_name,
|
||||
CUpti_MetricID* metric) override;
|
||||
|
||||
CUptiResult MetricGetNumEvents(CUpti_MetricID metric,
|
||||
uint32_t* num_events) override;
|
||||
|
||||
CUptiResult MetricEnumEvents(CUpti_MetricID metric,
|
||||
size_t* event_id_array_size_bytes,
|
||||
CUpti_EventID* event_id_array) override;
|
||||
|
||||
CUptiResult MetricGetAttribute(CUpti_MetricID metric,
|
||||
CUpti_MetricAttribute attrib,
|
||||
size_t* value_size, void* value) override;
|
||||
|
||||
CUptiResult MetricGetValue(CUdevice device, CUpti_MetricID metric,
|
||||
size_t event_id_array_size_bytes,
|
||||
CUpti_EventID* event_id_array,
|
||||
size_t event_value_array_size_bytes,
|
||||
uint64_t* event_value_array,
|
||||
uint64_t time_duration,
|
||||
CUpti_MetricValue* metric_value) override;
|
||||
|
||||
CUptiResult GetResultString(CUptiResult result, const char** str) override;
|
||||
|
||||
void CleanUp() override {}
|
||||
bool Disabled() const override { return false; }
|
||||
|
||||
private:
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(CuptiWrapper);
|
||||
};
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // PERFTOOLS_ACCELERATORS_XPROF_XPROFILEZ_NVIDIA_GPU_CUPTI_WRAPPER_H_
|
Loading…
Reference in New Issue
Block a user