diff --git a/tensorflow/core/platform/cupti_wrapper.h b/tensorflow/core/platform/cupti_wrapper.h deleted file mode 100644 index 9a17ab60c0d..00000000000 --- a/tensorflow/core/platform/cupti_wrapper.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_PLATFORM_CUPTI_WRAPPER_H_ -#define TENSORFLOW_CORE_PLATFORM_CUPTI_WRAPPER_H_ - -#include "tensorflow/core/platform/platform.h" - -#if defined(PLATFORM_GOOGLE) -#include "tensorflow/core/platform/google/cupti_wrapper.h" -#else -#include "tensorflow/core/platform/default/gpu/cupti_wrapper.h" -#endif - -#endif // TENSORFLOW_CORE_PLATFORM_CUPTI_WRAPPER_H_ diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index bd35e64ef47..6be5240cbfb 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -578,7 +578,7 @@ def tf_protos_grappler(): def tf_additional_cupti_wrapper_deps(): return [ - "//tensorflow/core/platform/default/gpu:cupti_wrapper", + "//tensorflow/stream_executor/cuda:cupti_stub", "@com_google_absl//absl/base", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", diff --git a/tensorflow/core/platform/default/device_tracer.cc b/tensorflow/core/platform/default/device_tracer.cc index fdd934cdcaf..340d2d93720 100644 --- a/tensorflow/core/platform/default/device_tracer.cc +++ b/tensorflow/core/platform/default/device_tracer.cc @@ -25,13 +25,13 @@ limitations under the License. #include "absl/container/node_hash_map.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" +#include "cuda/extras/CUPTI/include/cupti.h" #include "tensorflow/core/common_runtime/step_stats_collector.h" #include "tensorflow/core/framework/step_stats.pb.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" -#include "tensorflow/core/platform/cupti_wrapper.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mem.h" @@ -47,10 +47,7 @@ Status ToStatus(CUptiResult result) { return Status::OK(); } const char* str = nullptr; - if (auto wrapper = - absl::make_unique()) { - wrapper->GetResultString(result, &str); - } + cuptiGetResultString(result, &str); return errors::Unavailable("CUPTI error: ", str ? str : ""); } @@ -164,13 +161,11 @@ class CudaEventRecorder { // and after kernel launches and memory copies. class CuptiCallbackHook { public: - CuptiCallbackHook() - : cupti_wrapper_(new perftools::gputools::profiler::CuptiWrapper()), - subscriber_(nullptr) {} + CuptiCallbackHook() : subscriber_(nullptr) {} Status Enable(CudaEventRecorder* recorder) { - TF_RETURN_IF_ERROR(ToStatus( - cupti_wrapper_->Subscribe(&subscriber_, &CuptiCallback, recorder))); + TF_RETURN_IF_ERROR( + ToStatus(cuptiSubscribe(&subscriber_, &CuptiCallback, recorder))); for (auto cbid : {CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel, CUPTI_DRIVER_TRACE_CBID_cuMemcpy, CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync, @@ -180,15 +175,13 @@ class CuptiCallbackHook { CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2, CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoD_v2, CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2}) { - TF_RETURN_IF_ERROR(ToStatus(cupti_wrapper_->EnableCallback( + TF_RETURN_IF_ERROR(ToStatus(cuptiEnableCallback( /*enable=*/1, subscriber_, CUPTI_CB_DOMAIN_DRIVER_API, cbid))); } return Status::OK(); } - ~CuptiCallbackHook() { - LogIfError(ToStatus(cupti_wrapper_->Unsubscribe(subscriber_))); - } + ~CuptiCallbackHook() { LogIfError(ToStatus(cuptiUnsubscribe(subscriber_))); } private: static void CUPTIAPI CuptiCallback(void* userdata, @@ -312,7 +305,6 @@ class CuptiCallbackHook { } } - std::unique_ptr cupti_wrapper_; CUpti_SubscriberHandle subscriber_; }; } // namespace diff --git a/tensorflow/core/platform/default/gpu/BUILD b/tensorflow/core/platform/default/gpu/BUILD deleted file mode 100644 index 3965c7d2ec6..00000000000 --- a/tensorflow/core/platform/default/gpu/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -load( - "//tensorflow:tensorflow.bzl", - "tf_copts", - "tf_cuda_library", -) - -tf_cuda_library( - name = "cupti_wrapper", - srcs = [ - "cupti_wrapper.cc", - ], - hdrs = [ - "cupti_wrapper.h", - ], - copts = tf_copts(), - cuda_deps = [ - "//tensorflow/core:stream_executor", - "@local_config_cuda//cuda:cupti_headers", - ], - data = ["@local_config_cuda//cuda:cupti_dsos"], - visibility = ["//visibility:public"], -) diff --git a/tensorflow/core/platform/default/gpu/cupti_wrapper.cc b/tensorflow/core/platform/default/gpu/cupti_wrapper.cc deleted file mode 100644 index 671d8cf8bf9..00000000000 --- a/tensorflow/core/platform/default/gpu/cupti_wrapper.cc +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/platform/default/gpu/cupti_wrapper.h" - -#if GOOGLE_CUDA - -#include - -#include "tensorflow/core/platform/env.h" -#include "tensorflow/core/platform/stream_executor.h" - -namespace perftools { -namespace gputools { -namespace profiler { - -namespace dynload { - -#define LIBCUPTI_WRAP(__name) \ - struct DynLoadShim__##__name { \ - static const char* kName; \ - using FuncPointerT = std::add_pointer::type; \ - template \ - CUptiResult operator()(Args... args) { \ - static auto fn = []() -> FuncPointerT { \ - auto handle_or = \ - stream_executor::internal::CachedDsoLoader::GetCuptiDsoHandle(); \ - if (!handle_or.ok()) return nullptr; \ - void* symbol; \ - stream_executor::port::Env::Default() \ - ->GetSymbolFromLibrary(handle_or.ValueOrDie(), kName, &symbol) \ - .IgnoreError(); \ - return reinterpret_cast(symbol); \ - }(); \ - if (fn == nullptr) return CUPTI_ERROR_UNKNOWN; \ - return fn(args...); \ - } \ - } __name; \ - const char* DynLoadShim__##__name::kName = #__name; - -LIBCUPTI_WRAP(cuptiActivityDisable); -LIBCUPTI_WRAP(cuptiActivityEnable); -LIBCUPTI_WRAP(cuptiActivityFlushAll); -LIBCUPTI_WRAP(cuptiActivityGetNextRecord); -LIBCUPTI_WRAP(cuptiActivityGetNumDroppedRecords); -LIBCUPTI_WRAP(cuptiActivityRegisterCallbacks); -LIBCUPTI_WRAP(cuptiGetTimestamp); -LIBCUPTI_WRAP(cuptiEnableCallback); -LIBCUPTI_WRAP(cuptiEnableDomain); -LIBCUPTI_WRAP(cuptiSubscribe); -LIBCUPTI_WRAP(cuptiUnsubscribe); -LIBCUPTI_WRAP(cuptiGetResultString); - -} // namespace dynload - -CUptiResult CuptiWrapper::ActivityDisable(CUpti_ActivityKind kind) { - return dynload::cuptiActivityDisable(kind); -} - -CUptiResult CuptiWrapper::ActivityEnable(CUpti_ActivityKind kind) { - return dynload::cuptiActivityEnable(kind); -} - -CUptiResult CuptiWrapper::ActivityFlushAll(uint32_t flag) { - return dynload::cuptiActivityFlushAll(flag); -} - -CUptiResult CuptiWrapper::ActivityGetNextRecord(uint8_t* buffer, - size_t valid_buffer_size_bytes, - CUpti_Activity** record) { - return dynload::cuptiActivityGetNextRecord(buffer, valid_buffer_size_bytes, - record); -} - -CUptiResult CuptiWrapper::ActivityGetNumDroppedRecords(CUcontext context, - uint32_t stream_id, - size_t* dropped) { - return dynload::cuptiActivityGetNumDroppedRecords(context, stream_id, - dropped); -} - -CUptiResult CuptiWrapper::ActivityRegisterCallbacks( - CUpti_BuffersCallbackRequestFunc func_buffer_requested, - CUpti_BuffersCallbackCompleteFunc func_buffer_completed) { - return dynload::cuptiActivityRegisterCallbacks(func_buffer_requested, - func_buffer_completed); -} - -CUptiResult CuptiWrapper::GetTimestamp(uint64_t* timestamp) { - return dynload::cuptiGetTimestamp(timestamp); -} - -CUptiResult CuptiWrapper::EnableCallback(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid) { - return dynload::cuptiEnableCallback(enable, subscriber, domain, cbid); -} - -CUptiResult CuptiWrapper::EnableDomain(uint32_t enable, - CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain) { - return dynload::cuptiEnableDomain(enable, subscriber, domain); -} - -CUptiResult CuptiWrapper::Subscribe(CUpti_SubscriberHandle* subscriber, - CUpti_CallbackFunc callback, - void* userdata) { - return dynload::cuptiSubscribe(subscriber, callback, userdata); -} - -CUptiResult CuptiWrapper::Unsubscribe(CUpti_SubscriberHandle subscriber) { - return dynload::cuptiUnsubscribe(subscriber); -} - -CUptiResult CuptiWrapper::GetResultString(CUptiResult result, - const char** str) { - return dynload::cuptiGetResultString(result, str); -} - -} // namespace profiler -} // namespace gputools -} // namespace perftools - -#endif // GOOGLE_CUDA diff --git a/tensorflow/core/platform/default/gpu/cupti_wrapper.h b/tensorflow/core/platform/default/gpu/cupti_wrapper.h deleted file mode 100644 index b35a5ab4c31..00000000000 --- a/tensorflow/core/platform/default/gpu/cupti_wrapper.h +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_CUPTI_WRAPPER_H_ -#define TENSORFLOW_CORE_PLATFORM_DEFAULT_CUPTI_WRAPPER_H_ - -#if GOOGLE_CUDA - -#include -#include -#if defined(WIN32) -#include "extras/CUPTI/include/cupti.h" -#else -#include "cupti.h" -#endif -namespace perftools { -namespace gputools { -namespace profiler { - -// Wraps the CUPTI API so that we can dynamically load the library. -class CuptiWrapper { - public: - CuptiWrapper() {} - - // CUPTI activity API - CUptiResult ActivityDisable(CUpti_ActivityKind kind); - - CUptiResult ActivityEnable(CUpti_ActivityKind kind); - - CUptiResult ActivityFlushAll(uint32_t flag); - - CUptiResult ActivityGetNextRecord(uint8_t* buffer, - size_t valid_buffer_size_bytes, - CUpti_Activity** record); - - CUptiResult ActivityGetNumDroppedRecords(CUcontext context, - uint32_t stream_id, size_t* dropped); - - CUptiResult ActivityRegisterCallbacks( - CUpti_BuffersCallbackRequestFunc func_buffer_requested, - CUpti_BuffersCallbackCompleteFunc func_buffer_completed); - - CUptiResult GetDeviceId(CUcontext context, uint32_t* deviceId); - - CUptiResult GetTimestamp(uint64_t* timestamp); - - // CUPTI callback API - CUptiResult EnableCallback(uint32_t enable, CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid); - - CUptiResult EnableDomain(uint32_t enable, CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain); - - CUptiResult Subscribe(CUpti_SubscriberHandle* subscriber, - CUpti_CallbackFunc callback, void* userdata); - - CUptiResult Unsubscribe(CUpti_SubscriberHandle subscriber); - - CUptiResult GetResultString(CUptiResult result, const char** str); -}; - -} // namespace profiler -} // namespace gputools -} // namespace perftools - -#endif // GOOGLE_CUDA - -#endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_CUPTI_WRAPPER_H_ diff --git a/tensorflow/stream_executor/build_defs.bzl b/tensorflow/stream_executor/build_defs.bzl index 575ff639e75..469f5511e99 100644 --- a/tensorflow/stream_executor/build_defs.bzl +++ b/tensorflow/stream_executor/build_defs.bzl @@ -13,6 +13,9 @@ def tf_additional_cuda_driver_deps(): def tf_additional_cudnn_plugin_deps(): return [] +def tf_additional_cupti_stub_data(): + return ["@local_config_cuda//cuda:cupti_dsos"] + # Returns whether any GPU backend is configuered. def if_gpu_is_configured(x): if cuda_is_configured() or rocm_is_configured(): diff --git a/tensorflow/stream_executor/cuda/BUILD b/tensorflow/stream_executor/cuda/BUILD index 42d37424b25..ff5dc735018 100644 --- a/tensorflow/stream_executor/cuda/BUILD +++ b/tensorflow/stream_executor/cuda/BUILD @@ -10,6 +10,7 @@ load( "tf_additional_cuda_driver_deps", "tf_additional_cuda_platform_deps", "tf_additional_cudnn_plugin_deps", + "tf_additional_cupti_stub_data", ) load("//tensorflow:tensorflow.bzl", "tf_copts") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured") @@ -337,6 +338,18 @@ cc_library( alwayslink = True, ) +cc_library( + name = "cupti_stub", + srcs = if_cuda_is_configured(["cupti_stub.cc"]), + data = if_cuda_is_configured(tf_additional_cupti_stub_data()), + textual_hdrs = ["cupti_10_0.inc"], + deps = if_cuda_is_configured([ + "@local_config_cuda//cuda:cupti_headers", + "//tensorflow/stream_executor/lib", + "//tensorflow/stream_executor/platform:dso_loader", + ]), +) + cc_library( name = "cuda_kernel", srcs = if_cuda_is_configured(["cuda_kernel.cc"]), diff --git a/tensorflow/stream_executor/cuda/cupti_10_0.inc b/tensorflow/stream_executor/cuda/cupti_10_0.inc new file mode 100644 index 00000000000..53a06cd8f15 --- /dev/null +++ b/tensorflow/stream_executor/cuda/cupti_10_0.inc @@ -0,0 +1,753 @@ +// Auto-generated, do not edit. + +extern "C" { +CUptiResult CUPTIAPI cuptiGetResultString(CUptiResult result, + const char** str) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUptiResult, const char**); + static auto func_ptr = LoadSymbol("cuptiGetResultString"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(result, str); +} + +CUptiResult CUPTIAPI cuptiGetVersion(uint32_t* version) { + using FuncPtr = CUptiResult(CUPTIAPI*)(uint32_t*); + static auto func_ptr = LoadSymbol("cuptiGetVersion"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(version); +} + +CUptiResult CUPTIAPI cuptiSupportedDomains(size_t* domainCount, + CUpti_DomainTable* domainTable) { + using FuncPtr = CUptiResult(CUPTIAPI*)(size_t*, CUpti_DomainTable*); + static auto func_ptr = LoadSymbol("cuptiSupportedDomains"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(domainCount, domainTable); +} + +CUptiResult CUPTIAPI cuptiSubscribe(CUpti_SubscriberHandle* subscriber, + CUpti_CallbackFunc callback, + void* userdata) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_SubscriberHandle*, + CUpti_CallbackFunc, void*); + static auto func_ptr = LoadSymbol("cuptiSubscribe"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(subscriber, callback, userdata); +} + +CUptiResult CUPTIAPI cuptiUnsubscribe(CUpti_SubscriberHandle subscriber) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_SubscriberHandle); + static auto func_ptr = LoadSymbol("cuptiUnsubscribe"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(subscriber); +} + +CUptiResult CUPTIAPI cuptiGetCallbackState(uint32_t* enable, + CUpti_SubscriberHandle subscriber, + CUpti_CallbackDomain domain, + CUpti_CallbackId cbid) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(uint32_t*, CUpti_SubscriberHandle, + CUpti_CallbackDomain, CUpti_CallbackId); + static auto func_ptr = LoadSymbol("cuptiGetCallbackState"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(enable, subscriber, domain, cbid); +} + +CUptiResult CUPTIAPI cuptiEnableCallback(uint32_t enable, + CUpti_SubscriberHandle subscriber, + CUpti_CallbackDomain domain, + CUpti_CallbackId cbid) { + using FuncPtr = CUptiResult(CUPTIAPI*)( + uint32_t, CUpti_SubscriberHandle, CUpti_CallbackDomain, CUpti_CallbackId); + static auto func_ptr = LoadSymbol("cuptiEnableCallback"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(enable, subscriber, domain, cbid); +} + +CUptiResult CUPTIAPI cuptiEnableDomain(uint32_t enable, + CUpti_SubscriberHandle subscriber, + CUpti_CallbackDomain domain) { + using FuncPtr = CUptiResult(CUPTIAPI*)(uint32_t, CUpti_SubscriberHandle, + CUpti_CallbackDomain); + static auto func_ptr = LoadSymbol("cuptiEnableDomain"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(enable, subscriber, domain); +} + +CUptiResult CUPTIAPI cuptiEnableAllDomains(uint32_t enable, + CUpti_SubscriberHandle subscriber) { + using FuncPtr = CUptiResult(CUPTIAPI*)(uint32_t, CUpti_SubscriberHandle); + static auto func_ptr = LoadSymbol("cuptiEnableAllDomains"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(enable, subscriber); +} + +CUptiResult CUPTIAPI cuptiGetCallbackName(CUpti_CallbackDomain domain, + uint32_t cbid, const char** name) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUpti_CallbackDomain, uint32_t, const char**); + static auto func_ptr = LoadSymbol("cuptiGetCallbackName"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(domain, cbid, name); +} + +CUptiResult CUPTIAPI +cuptiSetEventCollectionMode(CUcontext context, CUpti_EventCollectionMode mode) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext, CUpti_EventCollectionMode); + static auto func_ptr = LoadSymbol("cuptiSetEventCollectionMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, mode); +} + +CUptiResult CUPTIAPI cuptiDeviceGetAttribute(CUdevice device, + CUpti_DeviceAttribute attrib, + size_t* valueSize, void* value) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUdevice, CUpti_DeviceAttribute, size_t*, void*); + static auto func_ptr = LoadSymbol("cuptiDeviceGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, attrib, valueSize, value); +} + +CUptiResult CUPTIAPI cuptiDeviceGetTimestamp(CUcontext context, + uint64_t* timestamp) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext, uint64_t*); + static auto func_ptr = LoadSymbol("cuptiDeviceGetTimestamp"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, timestamp); +} + +CUptiResult CUPTIAPI cuptiDeviceGetNumEventDomains(CUdevice device, + uint32_t* numDomains) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUdevice, uint32_t*); + static auto func_ptr = LoadSymbol("cuptiDeviceGetNumEventDomains"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, numDomains); +} + +CUptiResult CUPTIAPI cuptiDeviceEnumEventDomains( + CUdevice device, size_t* arraySizeBytes, CUpti_EventDomainID* domainArray) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUdevice, size_t*, CUpti_EventDomainID*); + static auto func_ptr = LoadSymbol("cuptiDeviceEnumEventDomains"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, arraySizeBytes, domainArray); +} + +CUptiResult CUPTIAPI cuptiDeviceGetEventDomainAttribute( + CUdevice device, CUpti_EventDomainID eventDomain, + CUpti_EventDomainAttribute attrib, size_t* valueSize, void* value) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUdevice, CUpti_EventDomainID, + CUpti_EventDomainAttribute, size_t*, void*); + static auto func_ptr = + LoadSymbol("cuptiDeviceGetEventDomainAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, eventDomain, attrib, valueSize, value); +} + +CUptiResult CUPTIAPI cuptiGetNumEventDomains(uint32_t* numDomains) { + using FuncPtr = CUptiResult(CUPTIAPI*)(uint32_t*); + static auto func_ptr = LoadSymbol("cuptiGetNumEventDomains"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numDomains); +} + +CUptiResult CUPTIAPI cuptiEnumEventDomains(size_t* arraySizeBytes, + CUpti_EventDomainID* domainArray) { + using FuncPtr = CUptiResult(CUPTIAPI*)(size_t*, CUpti_EventDomainID*); + static auto func_ptr = LoadSymbol("cuptiEnumEventDomains"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(arraySizeBytes, domainArray); +} + +CUptiResult CUPTIAPI cuptiEventDomainGetAttribute( + CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, + size_t* valueSize, void* value) { + using FuncPtr = CUptiResult(CUPTIAPI*)( + CUpti_EventDomainID, CUpti_EventDomainAttribute, size_t*, void*); + static auto func_ptr = LoadSymbol("cuptiEventDomainGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventDomain, attrib, valueSize, value); +} + +CUptiResult CUPTIAPI cuptiEventDomainGetNumEvents( + CUpti_EventDomainID eventDomain, uint32_t* numEvents) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventDomainID, uint32_t*); + static auto func_ptr = LoadSymbol("cuptiEventDomainGetNumEvents"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventDomain, numEvents); +} + +CUptiResult CUPTIAPI cuptiEventDomainEnumEvents(CUpti_EventDomainID eventDomain, + size_t* arraySizeBytes, + CUpti_EventID* eventArray) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUpti_EventDomainID, size_t*, CUpti_EventID*); + static auto func_ptr = LoadSymbol("cuptiEventDomainEnumEvents"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventDomain, arraySizeBytes, eventArray); +} + +CUptiResult CUPTIAPI cuptiEventGetAttribute(CUpti_EventID event, + CUpti_EventAttribute attrib, + size_t* valueSize, void* value) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventID, CUpti_EventAttribute, + size_t*, void*); + static auto func_ptr = LoadSymbol("cuptiEventGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event, attrib, valueSize, value); +} + +CUptiResult CUPTIAPI cuptiEventGetIdFromName(CUdevice device, + const char* eventName, + CUpti_EventID* event) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUdevice, const char*, CUpti_EventID*); + static auto func_ptr = LoadSymbol("cuptiEventGetIdFromName"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, eventName, event); +} + +CUptiResult CUPTIAPI cuptiEventGroupCreate(CUcontext context, + CUpti_EventGroup* eventGroup, + uint32_t flags) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUcontext, CUpti_EventGroup*, uint32_t); + static auto func_ptr = LoadSymbol("cuptiEventGroupCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, eventGroup, flags); +} + +CUptiResult CUPTIAPI cuptiEventGroupDestroy(CUpti_EventGroup eventGroup) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventGroup); + static auto func_ptr = LoadSymbol("cuptiEventGroupDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroup); +} + +CUptiResult CUPTIAPI cuptiEventGroupGetAttribute( + CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, + size_t* valueSize, void* value) { + using FuncPtr = CUptiResult(CUPTIAPI*)( + CUpti_EventGroup, CUpti_EventGroupAttribute, size_t*, void*); + static auto func_ptr = LoadSymbol("cuptiEventGroupGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroup, attrib, valueSize, value); +} + +CUptiResult CUPTIAPI cuptiEventGroupSetAttribute( + CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, + size_t valueSize, void* value) { + using FuncPtr = CUptiResult(CUPTIAPI*)( + CUpti_EventGroup, CUpti_EventGroupAttribute, size_t, void*); + static auto func_ptr = LoadSymbol("cuptiEventGroupSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroup, attrib, valueSize, value); +} + +CUptiResult CUPTIAPI cuptiEventGroupAddEvent(CUpti_EventGroup eventGroup, + CUpti_EventID event) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventGroup, CUpti_EventID); + static auto func_ptr = LoadSymbol("cuptiEventGroupAddEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroup, event); +} + +CUptiResult CUPTIAPI cuptiEventGroupRemoveEvent(CUpti_EventGroup eventGroup, + CUpti_EventID event) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventGroup, CUpti_EventID); + static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroup, event); +} + +CUptiResult CUPTIAPI +cuptiEventGroupRemoveAllEvents(CUpti_EventGroup eventGroup) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventGroup); + static auto func_ptr = LoadSymbol("cuptiEventGroupRemoveAllEvents"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroup); +} + +CUptiResult CUPTIAPI +cuptiEventGroupResetAllEvents(CUpti_EventGroup eventGroup) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventGroup); + static auto func_ptr = LoadSymbol("cuptiEventGroupResetAllEvents"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroup); +} + +CUptiResult CUPTIAPI cuptiEventGroupEnable(CUpti_EventGroup eventGroup) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventGroup); + static auto func_ptr = LoadSymbol("cuptiEventGroupEnable"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroup); +} + +CUptiResult CUPTIAPI cuptiEventGroupDisable(CUpti_EventGroup eventGroup) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventGroup); + static auto func_ptr = LoadSymbol("cuptiEventGroupDisable"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroup); +} + +CUptiResult CUPTIAPI cuptiEventGroupReadEvent(CUpti_EventGroup eventGroup, + CUpti_ReadEventFlags flags, + CUpti_EventID event, + size_t* eventValueBufferSizeBytes, + uint64_t* eventValueBuffer) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventGroup, CUpti_ReadEventFlags, + CUpti_EventID, size_t*, uint64_t*); + static auto func_ptr = LoadSymbol("cuptiEventGroupReadEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroup, flags, event, eventValueBufferSizeBytes, + eventValueBuffer); +} + +CUptiResult CUPTIAPI cuptiEventGroupReadAllEvents( + CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, + size_t* eventValueBufferSizeBytes, uint64_t* eventValueBuffer, + size_t* eventIdArraySizeBytes, CUpti_EventID* eventIdArray, + size_t* numEventIdsRead) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUpti_EventGroup, CUpti_ReadEventFlags, size_t*, + uint64_t*, size_t*, CUpti_EventID*, size_t*); + static auto func_ptr = LoadSymbol("cuptiEventGroupReadAllEvents"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroup, flags, eventValueBufferSizeBytes, + eventValueBuffer, eventIdArraySizeBytes, eventIdArray, + numEventIdsRead); +} + +CUptiResult CUPTIAPI cuptiEventGroupSetsCreate( + CUcontext context, size_t eventIdArraySizeBytes, + CUpti_EventID* eventIdArray, CUpti_EventGroupSets** eventGroupPasses) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext, size_t, CUpti_EventID*, + CUpti_EventGroupSets**); + static auto func_ptr = LoadSymbol("cuptiEventGroupSetsCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, eventIdArraySizeBytes, eventIdArray, + eventGroupPasses); +} + +CUptiResult CUPTIAPI +cuptiEventGroupSetsDestroy(CUpti_EventGroupSets* eventGroupSets) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventGroupSets*); + static auto func_ptr = LoadSymbol("cuptiEventGroupSetsDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroupSets); +} + +CUptiResult CUPTIAPI +cuptiEventGroupSetEnable(CUpti_EventGroupSet* eventGroupSet) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventGroupSet*); + static auto func_ptr = LoadSymbol("cuptiEventGroupSetEnable"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroupSet); +} + +CUptiResult CUPTIAPI +cuptiEventGroupSetDisable(CUpti_EventGroupSet* eventGroupSet) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_EventGroupSet*); + static auto func_ptr = LoadSymbol("cuptiEventGroupSetDisable"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(eventGroupSet); +} + +CUptiResult CUPTIAPI cuptiEnableKernelReplayMode(CUcontext context) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext); + static auto func_ptr = LoadSymbol("cuptiEnableKernelReplayMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context); +} + +CUptiResult CUPTIAPI cuptiDisableKernelReplayMode(CUcontext context) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext); + static auto func_ptr = LoadSymbol("cuptiDisableKernelReplayMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context); +} + +CUptiResult CUPTIAPI cuptiKernelReplaySubscribeUpdate( + CUpti_KernelReplayUpdateFunc updateFunc, void* customData) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_KernelReplayUpdateFunc, void*); + static auto func_ptr = + LoadSymbol("cuptiKernelReplaySubscribeUpdate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(updateFunc, customData); +} + +CUptiResult CUPTIAPI cuptiGetNumMetrics(uint32_t* numMetrics) { + using FuncPtr = CUptiResult(CUPTIAPI*)(uint32_t*); + static auto func_ptr = LoadSymbol("cuptiGetNumMetrics"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numMetrics); +} + +CUptiResult CUPTIAPI cuptiEnumMetrics(size_t* arraySizeBytes, + CUpti_MetricID* metricArray) { + using FuncPtr = CUptiResult(CUPTIAPI*)(size_t*, CUpti_MetricID*); + static auto func_ptr = LoadSymbol("cuptiEnumMetrics"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(arraySizeBytes, metricArray); +} + +CUptiResult CUPTIAPI cuptiDeviceGetNumMetrics(CUdevice device, + uint32_t* numMetrics) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUdevice, uint32_t*); + static auto func_ptr = LoadSymbol("cuptiDeviceGetNumMetrics"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, numMetrics); +} + +CUptiResult CUPTIAPI cuptiDeviceEnumMetrics(CUdevice device, + size_t* arraySizeBytes, + CUpti_MetricID* metricArray) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUdevice, size_t*, CUpti_MetricID*); + static auto func_ptr = LoadSymbol("cuptiDeviceEnumMetrics"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, arraySizeBytes, metricArray); +} + +CUptiResult CUPTIAPI cuptiMetricGetAttribute(CUpti_MetricID metric, + CUpti_MetricAttribute attrib, + size_t* valueSize, void* value) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_MetricID, CUpti_MetricAttribute, + size_t*, void*); + static auto func_ptr = LoadSymbol("cuptiMetricGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(metric, attrib, valueSize, value); +} + +CUptiResult CUPTIAPI cuptiMetricGetIdFromName(CUdevice device, + const char* metricName, + CUpti_MetricID* metric) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUdevice, const char*, CUpti_MetricID*); + static auto func_ptr = LoadSymbol("cuptiMetricGetIdFromName"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, metricName, metric); +} + +CUptiResult CUPTIAPI cuptiMetricGetNumEvents(CUpti_MetricID metric, + uint32_t* numEvents) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_MetricID, uint32_t*); + static auto func_ptr = LoadSymbol("cuptiMetricGetNumEvents"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(metric, numEvents); +} + +CUptiResult CUPTIAPI cuptiMetricEnumEvents(CUpti_MetricID metric, + size_t* eventIdArraySizeBytes, + CUpti_EventID* eventIdArray) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUpti_MetricID, size_t*, CUpti_EventID*); + static auto func_ptr = LoadSymbol("cuptiMetricEnumEvents"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(metric, eventIdArraySizeBytes, eventIdArray); +} + +CUptiResult CUPTIAPI cuptiMetricGetNumProperties(CUpti_MetricID metric, + uint32_t* numProp) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_MetricID, uint32_t*); + static auto func_ptr = LoadSymbol("cuptiMetricGetNumProperties"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(metric, numProp); +} + +CUptiResult CUPTIAPI +cuptiMetricEnumProperties(CUpti_MetricID metric, size_t* propIdArraySizeBytes, + CUpti_MetricPropertyID* propIdArray) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUpti_MetricID, size_t*, CUpti_MetricPropertyID*); + static auto func_ptr = LoadSymbol("cuptiMetricEnumProperties"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(metric, propIdArraySizeBytes, propIdArray); +} + +CUptiResult CUPTIAPI +cuptiMetricGetRequiredEventGroupSets(CUcontext context, CUpti_MetricID metric, + CUpti_EventGroupSets** eventGroupSets) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUcontext, CUpti_MetricID, CUpti_EventGroupSets**); + static auto func_ptr = + LoadSymbol("cuptiMetricGetRequiredEventGroupSets"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, metric, eventGroupSets); +} + +CUptiResult CUPTIAPI cuptiMetricCreateEventGroupSets( + CUcontext context, size_t metricIdArraySizeBytes, + CUpti_MetricID* metricIdArray, CUpti_EventGroupSets** eventGroupPasses) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext, size_t, CUpti_MetricID*, + CUpti_EventGroupSets**); + static auto func_ptr = LoadSymbol("cuptiMetricCreateEventGroupSets"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, metricIdArraySizeBytes, metricIdArray, + eventGroupPasses); +} + +CUptiResult CUPTIAPI cuptiMetricGetValue(CUdevice device, CUpti_MetricID metric, + size_t eventIdArraySizeBytes, + CUpti_EventID* eventIdArray, + size_t eventValueArraySizeBytes, + uint64_t* eventValueArray, + uint64_t timeDuration, + CUpti_MetricValue* metricValue) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUdevice, CUpti_MetricID, size_t, CUpti_EventID*, + size_t, uint64_t*, uint64_t, CUpti_MetricValue*); + static auto func_ptr = LoadSymbol("cuptiMetricGetValue"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, metric, eventIdArraySizeBytes, eventIdArray, + eventValueArraySizeBytes, eventValueArray, timeDuration, + metricValue); +} + +CUptiResult CUPTIAPI cuptiMetricGetValue2( + CUpti_MetricID metric, size_t eventIdArraySizeBytes, + CUpti_EventID* eventIdArray, size_t eventValueArraySizeBytes, + uint64_t* eventValueArray, size_t propIdArraySizeBytes, + CUpti_MetricPropertyID* propIdArray, size_t propValueArraySizeBytes, + uint64_t* propValueArray, CUpti_MetricValue* metricValue) { + using FuncPtr = CUptiResult(CUPTIAPI*)( + CUpti_MetricID, size_t, CUpti_EventID*, size_t, uint64_t*, size_t, + CUpti_MetricPropertyID*, size_t, uint64_t*, CUpti_MetricValue*); + static auto func_ptr = LoadSymbol("cuptiMetricGetValue2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(metric, eventIdArraySizeBytes, eventIdArray, + eventValueArraySizeBytes, eventValueArray, + propIdArraySizeBytes, propIdArray, propValueArraySizeBytes, + propValueArray, metricValue); +} + +CUptiResult CUPTIAPI cuptiGetTimestamp(uint64_t* timestamp) { + using FuncPtr = CUptiResult(CUPTIAPI*)(uint64_t*); + static auto func_ptr = LoadSymbol("cuptiGetTimestamp"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(timestamp); +} + +CUptiResult CUPTIAPI cuptiGetContextId(CUcontext context, uint32_t* contextId) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext, uint32_t*); + static auto func_ptr = LoadSymbol("cuptiGetContextId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, contextId); +} + +CUptiResult CUPTIAPI cuptiGetStreamId(CUcontext context, CUstream stream, + uint32_t* streamId) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext, CUstream, uint32_t*); + static auto func_ptr = LoadSymbol("cuptiGetStreamId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, stream, streamId); +} + +CUptiResult CUPTIAPI cuptiGetStreamIdEx(CUcontext context, CUstream stream, + uint8_t perThreadStream, + uint32_t* streamId) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUcontext, CUstream, uint8_t, uint32_t*); + static auto func_ptr = LoadSymbol("cuptiGetStreamIdEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, stream, perThreadStream, streamId); +} + +CUptiResult CUPTIAPI cuptiGetDeviceId(CUcontext context, uint32_t* deviceId) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext, uint32_t*); + static auto func_ptr = LoadSymbol("cuptiGetDeviceId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, deviceId); +} + +CUptiResult CUPTIAPI cuptiActivityEnable(CUpti_ActivityKind kind) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_ActivityKind); + static auto func_ptr = LoadSymbol("cuptiActivityEnable"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(kind); +} + +CUptiResult CUPTIAPI cuptiActivityDisable(CUpti_ActivityKind kind) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_ActivityKind); + static auto func_ptr = LoadSymbol("cuptiActivityDisable"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(kind); +} + +CUptiResult CUPTIAPI cuptiActivityEnableContext(CUcontext context, + CUpti_ActivityKind kind) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext, CUpti_ActivityKind); + static auto func_ptr = LoadSymbol("cuptiActivityEnableContext"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, kind); +} + +CUptiResult CUPTIAPI cuptiActivityDisableContext(CUcontext context, + CUpti_ActivityKind kind) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext, CUpti_ActivityKind); + static auto func_ptr = LoadSymbol("cuptiActivityDisableContext"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, kind); +} + +CUptiResult CUPTIAPI cuptiActivityGetNumDroppedRecords(CUcontext context, + uint32_t streamId, + size_t* dropped) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext, uint32_t, size_t*); + static auto func_ptr = + LoadSymbol("cuptiActivityGetNumDroppedRecords"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, streamId, dropped); +} + +CUptiResult CUPTIAPI cuptiActivityGetNextRecord(uint8_t* buffer, + size_t validBufferSizeBytes, + CUpti_Activity** record) { + using FuncPtr = CUptiResult(CUPTIAPI*)(uint8_t*, size_t, CUpti_Activity**); + static auto func_ptr = LoadSymbol("cuptiActivityGetNextRecord"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(buffer, validBufferSizeBytes, record); +} + +CUptiResult CUPTIAPI cuptiActivityRegisterCallbacks( + CUpti_BuffersCallbackRequestFunc funcBufferRequested, + CUpti_BuffersCallbackCompleteFunc funcBufferCompleted) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_BuffersCallbackRequestFunc, + CUpti_BuffersCallbackCompleteFunc); + static auto func_ptr = LoadSymbol("cuptiActivityRegisterCallbacks"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(funcBufferRequested, funcBufferCompleted); +} + +CUptiResult CUPTIAPI cuptiActivityFlush(CUcontext context, uint32_t streamId, + uint32_t flag) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUcontext, uint32_t, uint32_t); + static auto func_ptr = LoadSymbol("cuptiActivityFlush"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, streamId, flag); +} + +CUptiResult CUPTIAPI cuptiActivityFlushAll(uint32_t flag) { + using FuncPtr = CUptiResult(CUPTIAPI*)(uint32_t); + static auto func_ptr = LoadSymbol("cuptiActivityFlushAll"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(flag); +} + +CUptiResult CUPTIAPI cuptiActivityGetAttribute(CUpti_ActivityAttribute attr, + size_t* valueSize, void* value) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUpti_ActivityAttribute, size_t*, void*); + static auto func_ptr = LoadSymbol("cuptiActivityGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attr, valueSize, value); +} + +CUptiResult CUPTIAPI cuptiActivitySetAttribute(CUpti_ActivityAttribute attr, + size_t* valueSize, void* value) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUpti_ActivityAttribute, size_t*, void*); + static auto func_ptr = LoadSymbol("cuptiActivitySetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attr, valueSize, value); +} + +CUptiResult CUPTIAPI cuptiActivityConfigureUnifiedMemoryCounter( + CUpti_ActivityUnifiedMemoryCounterConfig* config, uint32_t count) { + using FuncPtr = CUptiResult(CUPTIAPI*)( + CUpti_ActivityUnifiedMemoryCounterConfig*, uint32_t); + static auto func_ptr = + LoadSymbol("cuptiActivityConfigureUnifiedMemoryCounter"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(config, count); +} + +CUptiResult CUPTIAPI +cuptiGetAutoBoostState(CUcontext context, CUpti_ActivityAutoBoostState* state) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUcontext, CUpti_ActivityAutoBoostState*); + static auto func_ptr = LoadSymbol("cuptiGetAutoBoostState"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(context, state); +} + +CUptiResult CUPTIAPI cuptiActivityConfigurePCSampling( + CUcontext ctx, CUpti_ActivityPCSamplingConfig* config) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUcontext, CUpti_ActivityPCSamplingConfig*); + static auto func_ptr = + LoadSymbol("cuptiActivityConfigurePCSampling"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctx, config); +} + +CUptiResult CUPTIAPI cuptiGetLastError(void) { + using FuncPtr = CUptiResult(CUPTIAPI*)(); + static auto func_ptr = LoadSymbol("cuptiGetLastError"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +CUptiResult CUPTIAPI cuptiSetThreadIdType(CUpti_ActivityThreadIdType type) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_ActivityThreadIdType); + static auto func_ptr = LoadSymbol("cuptiSetThreadIdType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(type); +} + +CUptiResult CUPTIAPI cuptiGetThreadIdType(CUpti_ActivityThreadIdType* type) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUpti_ActivityThreadIdType*); + static auto func_ptr = LoadSymbol("cuptiGetThreadIdType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(type); +} + +CUptiResult CUPTIAPI cuptiComputeCapabilitySupported(int major, int minor, + int* support) { + using FuncPtr = CUptiResult(CUPTIAPI*)(int, int, int*); + static auto func_ptr = LoadSymbol("cuptiComputeCapabilitySupported"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(major, minor, support); +} + +CUptiResult CUPTIAPI cuptiDeviceSupported(CUdevice dev, int* support) { + using FuncPtr = CUptiResult(CUPTIAPI*)(CUdevice, int*); + static auto func_ptr = LoadSymbol("cuptiDeviceSupported"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dev, support); +} + +CUptiResult CUPTIAPI cuptiFinalize(void) { + using FuncPtr = CUptiResult(CUPTIAPI*)(); + static auto func_ptr = LoadSymbol("cuptiFinalize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +CUptiResult CUPTIAPI cuptiActivityPushExternalCorrelationId( + CUpti_ExternalCorrelationKind kind, uint64_t id) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUpti_ExternalCorrelationKind, uint64_t); + static auto func_ptr = + LoadSymbol("cuptiActivityPushExternalCorrelationId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(kind, id); +} + +CUptiResult CUPTIAPI cuptiActivityPopExternalCorrelationId( + CUpti_ExternalCorrelationKind kind, uint64_t* lastId) { + using FuncPtr = + CUptiResult(CUPTIAPI*)(CUpti_ExternalCorrelationKind, uint64_t*); + static auto func_ptr = + LoadSymbol("cuptiActivityPopExternalCorrelationId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(kind, lastId); +} + +CUptiResult CUPTIAPI cuptiActivityEnableLatencyTimestamps(uint8_t enable) { + using FuncPtr = CUptiResult(CUPTIAPI*)(uint8_t); + static auto func_ptr = + LoadSymbol("cuptiActivityEnableLatencyTimestamps"); + return func_ptr(enable); +} +} // extern "C" diff --git a/tensorflow/stream_executor/cuda/cupti_stub.cc b/tensorflow/stream_executor/cuda/cupti_stub.cc new file mode 100644 index 00000000000..ef883f9bf98 --- /dev/null +++ b/tensorflow/stream_executor/cuda/cupti_stub.cc @@ -0,0 +1,59 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "cuda/extras/CUPTI/include/cupti.h" +// IWYU pragma: no_include "perftools/gputools/executor/stream_executor.h" +#include "tensorflow/stream_executor/lib/env.h" +#include "tensorflow/stream_executor/platform/dso_loader.h" + +// Implements the CUPTI API by forwarding to CUPTI loaded from the DSO. + +namespace { +// Returns DSO handle or null if loading the DSO fails. +void* GetDsoHandle() { +#ifdef PLATFORM_GOOGLE + return nullptr; +#else + static auto handle = []() -> void* { + auto handle_or = stream_executor::internal::DsoLoader::GetCuptiDsoHandle(); + if (!handle_or.ok()) return nullptr; + return handle_or.ValueOrDie(); + }(); + return handle; +#endif +} + +template +T LoadSymbol(const char* symbol_name) { + void* symbol = nullptr; + if (auto handle = GetDsoHandle()) { + stream_executor::port::Env::Default() + ->GetSymbolFromLibrary(handle, symbol_name, &symbol) + .IgnoreError(); + } + return reinterpret_cast(symbol); +} + +CUptiResult GetSymbolNotFoundError() { return CUPTI_ERROR_UNKNOWN; } +} // namespace + +// For now we only need one stub implementation. We will need to generate +// a new file when CUPTI breaks backwards compatibility (has not been the case +// for quite a while) or if we want to use functionality introduced in a new +// version. +// +// Calling a function that is not yet available in the loaded CUPTI version will +// return CUPTI_ERROR_UNKNOWN. +#include "tensorflow/stream_executor/cuda/cupti_10_0.inc"