From 642db2faf55e3ca7acd06ea236e9d47f63190718 Mon Sep 17 00:00:00 2001 From: Anna R Date: Tue, 11 Aug 2020 19:56:45 -0700 Subject: [PATCH] Remove SharedMemoryConfig since it is not used anywhere. PiperOrigin-RevId: 326154532 Change-Id: I13be21f577226c48c7e0d5bfc7efb787f0422e85 --- .../xla/service/interpreter/executor.h | 10 -- tensorflow/stream_executor/BUILD | 12 --- .../stream_executor/cuda/cuda_gpu_executor.cc | 98 ++++++------------- tensorflow/stream_executor/gpu/gpu_executor.h | 4 - .../stream_executor/host/host_gpu_executor.h | 14 --- .../stream_executor/rocm/rocm_gpu_executor.cc | 71 +++----------- .../stream_executor/shared_memory_config.h | 34 ------- .../stream_executor_internal.h | 4 - .../stream_executor/stream_executor_pimpl.cc | 19 +--- .../stream_executor/stream_executor_pimpl.h | 23 ++--- tensorflow/stream_executor/tpu/tpu_executor.h | 12 +-- 11 files changed, 54 insertions(+), 247 deletions(-) delete mode 100644 tensorflow/stream_executor/shared_memory_config.h diff --git a/tensorflow/compiler/xla/service/interpreter/executor.h b/tensorflow/compiler/xla/service/interpreter/executor.h index 9e4bdeb2b2d..9416b11a07e 100644 --- a/tensorflow/compiler/xla/service/interpreter/executor.h +++ b/tensorflow/compiler/xla/service/interpreter/executor.h @@ -38,7 +38,6 @@ limitations under the License. #include "tensorflow/stream_executor/launch_dim.h" #include "tensorflow/stream_executor/plugin.h" #include "tensorflow/stream_executor/rng.h" -#include "tensorflow/stream_executor/shared_memory_config.h" #include "tensorflow/stream_executor/stream.h" #include "tensorflow/stream_executor/stream_executor.h" #include "tensorflow/stream_executor/stream_executor_internal.h" @@ -182,15 +181,6 @@ class XlaInterpreterExecutor : public internal::StreamExecutorInterface { return true; } - SharedMemoryConfig GetDeviceSharedMemoryConfig() override { - return SharedMemoryConfig::kDefault; - } - - port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override { - return port::Status{port::error::UNIMPLEMENTED, - "Shared memory not supported"}; - } - std::unique_ptr CreateEventImplementation() override { return nullptr; diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD index 871576f6cef..22aa60a70a4 100644 --- a/tensorflow/stream_executor/BUILD +++ b/tensorflow/stream_executor/BUILD @@ -67,7 +67,6 @@ cc_library( "plugin.h", "plugin_registry.h", "rng.h", - "shared_memory_config.h", "stream_executor_pimpl.h", "temporary_device_memory.h", "temporary_memory_manager.h", @@ -123,7 +122,6 @@ cc_library( "multi_platform_manager.h", "platform.h", "plugin_registry.h", - "shared_memory_config.h", "stream_executor.h", "stream_executor_internal.h", "timer.h", @@ -173,11 +171,6 @@ cc_library( ], ) -cc_library( - name = "shared_memory_config", - hdrs = ["shared_memory_config.h"], -) - # Aliases for backwards compatibility. alias( name = "stream_header", @@ -343,7 +336,6 @@ cc_library( "kernel_cache_config.h", "kernel_spec.h", "platform.h", - "shared_memory_config.h", "stream.h", "stream_executor_internal.h", "trace_listener.h", @@ -455,7 +447,6 @@ cc_library( "stream_executor_internal.cc", ], hdrs = [ - "shared_memory_config.h", "stream_executor_internal.h", ], deps = [ @@ -484,7 +475,6 @@ cc_library( "dnn.h", "kernel.h", "kernel_cache_config.h", - "shared_memory_config.h", "stream_executor_pimpl.h", ], visibility = ["//visibility:public"], @@ -569,7 +559,6 @@ cc_library( "plugin.h", "plugin_registry.h", "rng.h", - "shared_memory_config.h", "stream.h", "stream_executor.h", "stream_executor_internal.h", @@ -619,7 +608,6 @@ cc_library( "plugin.h", "plugin_registry.h", "rng.h", - "shared_memory_config.h", "stream.h", "stream_executor.h", "stream_executor_internal.h", diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 79a027f1255..d649d00ded9 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -101,12 +101,12 @@ static GpuTimer* AsGpuTimer(Timer* timer) { // N.B. we must lose constness in order to pass a suitable type to the existing // libcuda APIs, so the caller should take care to only pass the result of const // GPU memory conversions to libcuda functions which will honor constness. -static CUdeviceptr AsCudaDevicePtr(const DeviceMemoryBase &gpu_mem) { +static CUdeviceptr AsCudaDevicePtr(const DeviceMemoryBase& gpu_mem) { return reinterpret_cast(gpu_mem.opaque()); } // See description on const version above. -static CUdeviceptr AsCudaDevicePtr(DeviceMemoryBase *gpu_mem) { +static CUdeviceptr AsCudaDevicePtr(DeviceMemoryBase* gpu_mem) { return AsCudaDevicePtr(*gpu_mem); } @@ -225,11 +225,11 @@ port::Status GpuExecutor::LoadModuleFromCuBin(const char* cubin, if (*module == nullptr) { TF_RETURN_IF_ERROR(GpuDriver::LoadCubin(context_, cubin, module)); module_refcount = 1; - VLOG(3) << "Loaded CUBIN " << static_cast(cubin) + VLOG(3) << "Loaded CUBIN " << static_cast(cubin) << " as module " << *module; } else { ++module_refcount; - VLOG(3) << "CUBIN " << static_cast(cubin) + VLOG(3) << "CUBIN " << static_cast(cubin) << " is already loaded as module " << *module; } gpu_binary_to_module_[cubin] = {*module, module_refcount}; @@ -242,12 +242,12 @@ port::Status GpuExecutor::LoadModuleFromPtx(const char* ptx, CUmodule* module) { if (*module == nullptr) { TF_RETURN_IF_ERROR(GpuDriver::LoadPtx(context_, ptx, module)); - VLOG(3) << "Loaded PTX " << static_cast(ptx) << " as module " + VLOG(3) << "Loaded PTX " << static_cast(ptx) << " as module " << *module; module_refcount = 1; } else { ++module_refcount; - VLOG(3) << "PTX " << static_cast(ptx) + VLOG(3) << "PTX " << static_cast(ptx) << " is already loaded as module " << module; } gpu_binary_to_module_[ptx] = {*module, module_refcount}; @@ -271,7 +271,7 @@ port::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, if (spec.has_cuda_cubin_in_memory()) { absl::MutexLock lock{&in_memory_modules_mu_}; kernelname = &spec.cuda_cubin_in_memory().kernelname(); - const char *cubin = spec.cuda_cubin_in_memory().bytes(); + const char* cubin = spec.cuda_cubin_in_memory().bytes(); TF_RETURN_IF_ERROR(LoadModuleFromCuBin(cubin, &module)); kernel_to_gpu_binary_[kernel] = cubin; } else if (spec.has_cuda_ptx_in_memory()) { @@ -281,7 +281,7 @@ port::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec, return port::InternalError("Compute capability not set"); } - const char *ptx = spec.cuda_ptx_in_memory().text(cc_major_, cc_minor_); + const char* ptx = spec.cuda_ptx_in_memory().text(cc_major_, cc_minor_); if (ptx == nullptr) { ptx = spec.cuda_ptx_in_memory().default_text(); } @@ -318,8 +318,8 @@ bool GpuExecutor::UnloadGpuBinary(const void* gpu_binary) { VLOG(3) << "No loaded CUDA module for " << gpu_binary; return false; } - auto &module = module_it->second.first; - auto &refcount = module_it->second.second; + auto& module = module_it->second.first; + auto& refcount = module_it->second.second; VLOG(3) << "Found CUDA module " << module << " with refcount " << refcount; if (--refcount == 0) { VLOG(3) << "Unloading CUDA module " << module; @@ -355,8 +355,8 @@ port::Status GpuExecutor::LoadModule(const MultiModuleLoaderSpec& spec, TF_RETURN_IF_ERROR(LoadModuleFromCuBin( reinterpret_cast(spec.cuda_cubin_in_memory().data()), &cu_module)); - *module_handle = ModuleHandle(const_cast( - static_cast(spec.cuda_cubin_in_memory().data()))); + *module_handle = ModuleHandle(const_cast( + static_cast(spec.cuda_cubin_in_memory().data()))); return port::Status::OK(); } else if (spec.has_cuda_ptx_in_memory()) { if (cc_major_ == 0 && cc_minor_ == 0) { @@ -370,15 +370,15 @@ port::Status GpuExecutor::LoadModule(const MultiModuleLoaderSpec& spec, absl::MutexLock lock{&in_memory_modules_mu_}; TF_RETURN_IF_ERROR( LoadModuleFromPtx(spec.cuda_ptx_in_memory(), &cu_module)); - *module_handle = ModuleHandle(const_cast( - static_cast(spec.cuda_ptx_in_memory()))); + *module_handle = ModuleHandle( + const_cast(static_cast(spec.cuda_ptx_in_memory()))); return port::Status::OK(); } return port::InternalError("No method of loading CUDA module provided"); } bool GpuExecutor::UnloadModule(ModuleHandle module_handle) { - const char *gpu_binary = reinterpret_cast(module_handle.id()); + const char* gpu_binary = reinterpret_cast(module_handle.id()); absl::MutexLock lock{&in_memory_modules_mu_}; return UnloadGpuBinary(gpu_binary); } @@ -425,7 +425,7 @@ port::Status GpuExecutor::Launch(Stream* stream, const ThreadDim& thread_dims, cufunc, cuda_kernel->GetGpuCacheConfig())); } - void **kernel_params = const_cast(args.argument_addresses().data()); + void** kernel_params = const_cast(args.argument_addresses().data()); return GpuDriver::LaunchKernel( context_, cufunc, block_dims.x, block_dims.y, block_dims.z, thread_dims.x, @@ -454,7 +454,7 @@ void GpuExecutor::VlogOccupancyInfo(const KernelBase& kernel, return; } - const DeviceDescription &device_description = + const DeviceDescription& device_description = kernel.parent()->GetDeviceDescription(); const GpuKernel* cuda_kernel = AsGpuKernel(&kernel); @@ -522,7 +522,7 @@ DeviceMemoryBase GpuExecutor::Allocate(uint64 size, int64 memory_space) { void* GpuExecutor::GetSubBuffer(DeviceMemoryBase* mem, uint64 offset_bytes, uint64 size_bytes) { // offset and size are in bytes, so char* works as the pointer type. - return reinterpret_cast(mem->opaque()) + offset_bytes; + return reinterpret_cast(mem->opaque()) + offset_bytes; } void GpuExecutor::Deallocate(DeviceMemoryBase* mem) { @@ -662,8 +662,8 @@ bool GpuExecutor::HostCallback(Stream* stream, /* static */ void GpuExecutor::InternalHostCallback(CUstream stream, CUresult status, void* data) { - std::function *callback = - reinterpret_cast *>(data); + std::function* callback = + reinterpret_cast*>(data); (*callback)(); delete callback; } @@ -744,7 +744,7 @@ port::Status GpuExecutor::BlockHostUntilDone(Stream* stream) { } blas::BlasSupport* GpuExecutor::CreateBlas() { - PluginRegistry *registry = PluginRegistry::Instance(); + PluginRegistry* registry = PluginRegistry::Instance(); port::StatusOr status = registry->GetFactory(cuda::kCudaPlatformId, plugin_config_.blas()); @@ -758,7 +758,7 @@ blas::BlasSupport* GpuExecutor::CreateBlas() { } dnn::DnnSupport* GpuExecutor::CreateDnn() { - PluginRegistry *registry = PluginRegistry::Instance(); + PluginRegistry* registry = PluginRegistry::Instance(); port::StatusOr status = registry->GetFactory(cuda::kCudaPlatformId, plugin_config_.dnn()); @@ -772,7 +772,7 @@ dnn::DnnSupport* GpuExecutor::CreateDnn() { } fft::FftSupport* GpuExecutor::CreateFft() { - PluginRegistry *registry = PluginRegistry::Instance(); + PluginRegistry* registry = PluginRegistry::Instance(); port::StatusOr status = registry->GetFactory(cuda::kCudaPlatformId, plugin_config_.fft()); @@ -786,7 +786,7 @@ fft::FftSupport* GpuExecutor::CreateFft() { } rng::RngSupport* GpuExecutor::CreateRng() { - PluginRegistry *registry = PluginRegistry::Instance(); + PluginRegistry* registry = PluginRegistry::Instance(); port::StatusOr status = registry->GetFactory(cuda::kCudaPlatformId, plugin_config_.rng()); @@ -812,47 +812,6 @@ port::Status GpuExecutor::EnablePeerAccessTo(StreamExecutorInterface* other) { return GpuDriver::EnablePeerAccess(context_, cuda_other->context_); } -SharedMemoryConfig GpuExecutor::GetDeviceSharedMemoryConfig() { - port::StatusOr cuda_config = - GpuDriver::ContextGetSharedMemConfig(context_); - if (!cuda_config.ok()) { - // Don't log; the failed call will log necessary output. - return SharedMemoryConfig::kDefault; - } - - switch (cuda_config.ValueOrDie()) { - case CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: - return SharedMemoryConfig::kDefault; - case CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: - return SharedMemoryConfig::kFourByte; - case CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: - return SharedMemoryConfig::kEightByte; - default: - LOG(FATAL) << "Invalid shared memory configuration returned: " - << cuda_config.ValueOrDie(); - } -} - -port::Status GpuExecutor::SetDeviceSharedMemoryConfig( - SharedMemoryConfig config) { - CUsharedconfig cuda_config; - switch (config) { - case SharedMemoryConfig::kDefault: - cuda_config = CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE; - break; - case SharedMemoryConfig::kFourByte: - cuda_config = CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE; - break; - case SharedMemoryConfig::kEightByte: - cuda_config = CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE; - break; - default: - LOG(FATAL) << "Invalid shared memory configuration specified: " - << static_cast(config); - } - return GpuDriver::ContextSetSharedMemConfig(context_, cuda_config); -} - bool GpuExecutor::DeviceMemoryUsage(int64* free, int64* total) const { return GpuDriver::GetDeviceMemoryInfo(context_, free, total); } @@ -875,7 +834,7 @@ bool GpuExecutor::GetSymbol(const std::string& symbol_name, return lookup_in_module(it->second.first); } - for (auto &it : gpu_binary_to_module_) { + for (auto& it : gpu_binary_to_module_) { if (lookup_in_module(it.second.first)) { return true; } @@ -963,7 +922,7 @@ static int TryToReadNumaNode(const std::string& pci_bus_id, // We have to use fopen/fread here so that the device properties can be // populated before InitGoogle procedure has been completed (at which point we // could use the file::* utilities). - FILE *file = fopen(filename.c_str(), "r"); + FILE* file = fopen(filename.c_str(), "r"); if (file == nullptr) { LOG(ERROR) << "could not open file to read NUMA node: " << filename << "\nYour kernel may have been built without NUMA support."; @@ -980,8 +939,9 @@ static int TryToReadNumaNode(const std::string& pci_bus_id, if (port::safe_strto32(content, &value)) { if (value < 0) { // See http://b/18228951 for details on this path. LOG(INFO) << "successful NUMA node read from SysFS had negative value (" - << value << "), but there must be at least one NUMA node" - ", so returning NUMA node zero"; + << value + << "), but there must be at least one NUMA node" + ", so returning NUMA node zero"; fclose(file); return 0; } diff --git a/tensorflow/stream_executor/gpu/gpu_executor.h b/tensorflow/stream_executor/gpu/gpu_executor.h index fc4ea0e0ab2..edc015c6126 100644 --- a/tensorflow/stream_executor/gpu/gpu_executor.h +++ b/tensorflow/stream_executor/gpu/gpu_executor.h @@ -188,10 +188,6 @@ class GpuExecutor : public internal::StreamExecutorInterface { bool CanEnablePeerAccessTo(StreamExecutorInterface* other) override; - SharedMemoryConfig GetDeviceSharedMemoryConfig() override; - - port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override; - bool DeviceMemoryUsage(int64* free, int64* total) const override; // Search for the symbol and returns a device pointer and size. diff --git a/tensorflow/stream_executor/host/host_gpu_executor.h b/tensorflow/stream_executor/host/host_gpu_executor.h index 9b896fe06f8..953f8ced47f 100644 --- a/tensorflow/stream_executor/host/host_gpu_executor.h +++ b/tensorflow/stream_executor/host/host_gpu_executor.h @@ -148,20 +148,6 @@ class HostExecutor : public internal::StreamExecutorInterface { return true; } - SharedMemoryConfig GetDeviceSharedMemoryConfig() override { - LOG(INFO) << "Shared memory configuration is unsupported for host " - << "executors."; - return SharedMemoryConfig::kDefault; - } - - port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override { - std::string error_msg{ - "Shared memory configuration is unsupported for host " - "executors."}; - LOG(INFO) << error_msg; - return port::Status(port::error::UNIMPLEMENTED, error_msg); - } - bool SupportsBlas() const override; blas::BlasSupport *CreateBlas() override; diff --git a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc index fd3b5f19913..2a85cb820ed 100644 --- a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc +++ b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc @@ -720,47 +720,6 @@ port::Status GpuExecutor::EnablePeerAccessTo(StreamExecutorInterface* other) { return GpuDriver::EnablePeerAccess(context_, rocm_other->context_); } -SharedMemoryConfig GpuExecutor::GetDeviceSharedMemoryConfig() { - port::StatusOr rocm_config = - GpuDriver::ContextGetSharedMemConfig(context_); - if (!rocm_config.ok()) { - // Don't log; the failed call will log necessary output. - return SharedMemoryConfig::kDefault; - } - - switch (rocm_config.ValueOrDie()) { - case hipSharedMemBankSizeDefault: - return SharedMemoryConfig::kDefault; - case hipSharedMemBankSizeFourByte: - return SharedMemoryConfig::kFourByte; - case hipSharedMemBankSizeEightByte: - return SharedMemoryConfig::kEightByte; - default: - LOG(FATAL) << "Invalid shared memory configuration returned: " - << rocm_config.ValueOrDie(); - } -} - -port::Status GpuExecutor::SetDeviceSharedMemoryConfig( - SharedMemoryConfig config) { - hipSharedMemConfig rocm_config; - switch (config) { - case SharedMemoryConfig::kDefault: - rocm_config = hipSharedMemBankSizeDefault; - break; - case SharedMemoryConfig::kFourByte: - rocm_config = hipSharedMemBankSizeFourByte; - break; - case SharedMemoryConfig::kEightByte: - rocm_config = hipSharedMemBankSizeEightByte; - break; - default: - LOG(FATAL) << "Invalid shared memory configuration specified: " - << static_cast(config); - } - return GpuDriver::ContextSetSharedMemConfig(context_, rocm_config); -} - bool GpuExecutor::DeviceMemoryUsage(int64* free, int64* total) const { return GpuDriver::GetDeviceMemoryInfo(context_, free, total); } @@ -768,24 +727,24 @@ bool GpuExecutor::DeviceMemoryUsage(int64* free, int64* total) const { bool GpuExecutor::GetSymbol(const string& symbol_name, ModuleHandle module_handle, void** mem, size_t* bytes) { - absl::MutexLock lock{&in_memory_modules_mu_}; - if (static_cast(module_handle)) { - auto it = gpu_binary_to_module_.find(module_handle.id()); - CHECK(it != gpu_binary_to_module_.end()); - if (GpuDriver::GetModuleSymbol( - context_, it->second.first, symbol_name.c_str(), - reinterpret_cast(mem), bytes)) { - return true; - } + absl::MutexLock lock{&in_memory_modules_mu_}; + if (static_cast(module_handle)) { + auto it = gpu_binary_to_module_.find(module_handle.id()); + CHECK(it != gpu_binary_to_module_.end()); + if (GpuDriver::GetModuleSymbol( + context_, it->second.first, symbol_name.c_str(), + reinterpret_cast(mem), bytes)) { + return true; } + } - for (auto& it : gpu_binary_to_module_) { - if (GpuDriver::GetModuleSymbol( - context_, it.second.first, symbol_name.c_str(), - reinterpret_cast(mem), bytes)) { - return true; - } + for (auto& it : gpu_binary_to_module_) { + if (GpuDriver::GetModuleSymbol( + context_, it.second.first, symbol_name.c_str(), + reinterpret_cast(mem), bytes)) { + return true; } + } LOG(INFO) << "Falied to find symbol in any modules: " << symbol_name; return false; diff --git a/tensorflow/stream_executor/shared_memory_config.h b/tensorflow/stream_executor/shared_memory_config.h deleted file mode 100644 index 7cbeb3bcd91..00000000000 --- a/tensorflow/stream_executor/shared_memory_config.h +++ /dev/null @@ -1,34 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This file defines a uniform interface to configuration options for shared -// memory for supported devices. As with many StreamExecutor-supported features, -// support for the options defined herein is device-dependent. -#ifndef TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_ -#define TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_ - -namespace stream_executor { - -// SharedMemoryConfig enum describes potential widths of shared memory banks for -// a device or kernel. -enum class SharedMemoryConfig { - kDefault, // Use the device default configuration. - kFourByte, // Sets shared memory banks to be four bytes wide. - kEightByte, // Sets shared memory banks to be eight bytes wide. -}; - -} // namespace stream_executor - -#endif // TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_ diff --git a/tensorflow/stream_executor/stream_executor_internal.h b/tensorflow/stream_executor/stream_executor_internal.h index 408b4fc8207..437338085b3 100644 --- a/tensorflow/stream_executor/stream_executor_internal.h +++ b/tensorflow/stream_executor/stream_executor_internal.h @@ -44,7 +44,6 @@ limitations under the License. #include "tensorflow/stream_executor/platform.h" #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/plugin_registry.h" -#include "tensorflow/stream_executor/shared_memory_config.h" #include "tensorflow/stream_executor/trace_listener.h" namespace stream_executor { @@ -267,9 +266,6 @@ class StreamExecutorInterface { virtual int PlatformDeviceCount() = 0; virtual port::Status EnablePeerAccessTo(StreamExecutorInterface *other) = 0; virtual bool CanEnablePeerAccessTo(StreamExecutorInterface *other) = 0; - virtual SharedMemoryConfig GetDeviceSharedMemoryConfig() = 0; - virtual port::Status SetDeviceSharedMemoryConfig( - SharedMemoryConfig config) = 0; virtual int64 GetDeviceLoad() { return -1; } diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc index d23f1472e33..db4e8f9b694 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.cc +++ b/tensorflow/stream_executor/stream_executor_pimpl.cc @@ -230,23 +230,6 @@ port::Status StreamExecutor::EnablePeerAccessTo(StreamExecutor *other) { return implementation_->EnablePeerAccessTo(other->implementation_.get()); } -SharedMemoryConfig StreamExecutor::GetDeviceSharedMemoryConfig() { - return implementation_->GetDeviceSharedMemoryConfig(); -} - -port::Status StreamExecutor::SetDeviceSharedMemoryConfig( - SharedMemoryConfig config) { - if (config != SharedMemoryConfig::kDefault && - config != SharedMemoryConfig::kFourByte && - config != SharedMemoryConfig::kEightByte) { - std::string error_msg = absl::StrFormat( - "Invalid shared memory config specified: %d", static_cast(config)); - LOG(ERROR) << error_msg; - return port::Status(port::error::INVALID_ARGUMENT, error_msg); - } - return implementation_->SetDeviceSharedMemoryConfig(config); -} - const DeviceDescription &StreamExecutor::GetDeviceDescription() const { absl::MutexLock lock(&mu_); if (device_description_ != nullptr) { @@ -858,7 +841,7 @@ absl::optional StreamExecutor::GetAllocatorStats() { } template -void StreamExecutor::SubmitTrace(TraceCallT trace_call, ArgsT &&... args) { +void StreamExecutor::SubmitTrace(TraceCallT trace_call, ArgsT &&...args) { if (tracing_enabled_) { { // instance tracers held in a block to limit the lock lifetime. diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h index f7f69f78e89..b9b118ca42c 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.h +++ b/tensorflow/stream_executor/stream_executor_pimpl.h @@ -35,7 +35,6 @@ limitations under the License. #include "tensorflow/stream_executor/platform/logging.h" #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/rng.h" -#include "tensorflow/stream_executor/shared_memory_config.h" #include "tensorflow/stream_executor/stream.h" #include "tensorflow/stream_executor/stream_executor_internal.h" #include "tensorflow/stream_executor/trace_listener.h" @@ -54,8 +53,8 @@ struct AllocRecord { }; // Forward declaration of private friend class. -template +template class ScopedTracer; // A StreamExecutor manages a single device, in terms of executing work (kernel @@ -322,14 +321,6 @@ class StreamExecutor { // this is more an up-front test as to whether it's expressly forbidden. bool CanEnablePeerAccessTo(StreamExecutor *other); - // Gets the preferred shared memory configuration for the device to which this - // executor is bound. - SharedMemoryConfig GetDeviceSharedMemoryConfig(); - - // Sets the preferred shared memory configuration for the device to which this - // executor is bound. - port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config); - // Obtains metadata about the underlying device. // The value is cached on first use. const DeviceDescription &GetDeviceDescription() const; @@ -507,12 +498,12 @@ class StreamExecutor { // To register a listener for all executors for a given platform, see // Platform::RegisterTraceListener(). // Does not take ownership of listener. - void RegisterTraceListener(TraceListener* listener); + void RegisterTraceListener(TraceListener *listener); // Removes a TraceListener from this StreamExecutor instance. // Returns false (and logs) in cases where the argument listener was not // previously registered. - bool UnregisterTraceListener(TraceListener* listener); + bool UnregisterTraceListener(TraceListener *listener); // Return allocator statistics. absl::optional GetAllocatorStats(); @@ -522,8 +513,8 @@ class StreamExecutor { StreamExecutorMemoryAllocator *GetAllocator() { return &allocator_; } private: - template + template friend class ScopedTracer; friend class Event; friend class Stream; @@ -648,7 +639,7 @@ class StreamExecutor { // Calls the relevant TraceListener routine to begin tracing for the specified // asynchronous method. template - void SubmitTrace(TraceCallT trace_call, ArgsT&&... args); + void SubmitTrace(TraceCallT trace_call, ArgsT &&...args); // Reader/writer lock for class-static StreamExecutor members. static absl::Mutex static_mu_; diff --git a/tensorflow/stream_executor/tpu/tpu_executor.h b/tensorflow/stream_executor/tpu/tpu_executor.h index faeae86da9b..2430a350463 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor.h +++ b/tensorflow/stream_executor/tpu/tpu_executor.h @@ -96,8 +96,7 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface { void DequeueOutfeed(int32 outfeed_queue_index, absl::Span bytes, StatusCallback done); - Status EnqueueInfeed(int32 infeed_queue_index, - absl::Span bytes); + Status EnqueueInfeed(int32 infeed_queue_index, absl::Span bytes); absl::optional GetAllocatorStats() override; @@ -175,10 +174,6 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface { LOG(FATAL) << "Not yet implemented"; } - stream_executor::SharedMemoryConfig GetDeviceSharedMemoryConfig() override { - LOG(FATAL) << "not yet implemented"; - } - void* GetSubBuffer(DeviceMemoryBase* parent, uint64 offset, uint64 size) override { LOG(FATAL) << "not yet implemented"; @@ -197,10 +192,7 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface { bool CanEnablePeerAccessTo(StreamExecutorInterface* other) override { LOG(FATAL) << "not yet implemented"; } - Status SetDeviceSharedMemoryConfig( - stream_executor::SharedMemoryConfig config) override { - LOG(FATAL) << "not yet implemented"; - } + void* HostMemoryAllocate(uint64 size) override { LOG(FATAL) << "not yet implemented"; }