Remove SharedMemoryConfig since it is not used anywhere.

PiperOrigin-RevId: 326154532
Change-Id: I13be21f577226c48c7e0d5bfc7efb787f0422e85
This commit is contained in:
Anna R 2020-08-11 19:56:45 -07:00 committed by TensorFlower Gardener
parent 9636571807
commit 642db2faf5
11 changed files with 54 additions and 247 deletions

View File

@ -38,7 +38,6 @@ limitations under the License.
#include "tensorflow/stream_executor/launch_dim.h"
#include "tensorflow/stream_executor/plugin.h"
#include "tensorflow/stream_executor/rng.h"
#include "tensorflow/stream_executor/shared_memory_config.h"
#include "tensorflow/stream_executor/stream.h"
#include "tensorflow/stream_executor/stream_executor.h"
#include "tensorflow/stream_executor/stream_executor_internal.h"
@ -182,15 +181,6 @@ class XlaInterpreterExecutor : public internal::StreamExecutorInterface {
return true;
}
SharedMemoryConfig GetDeviceSharedMemoryConfig() override {
return SharedMemoryConfig::kDefault;
}
port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override {
return port::Status{port::error::UNIMPLEMENTED,
"Shared memory not supported"};
}
std::unique_ptr<internal::EventInterface> CreateEventImplementation()
override {
return nullptr;

View File

@ -67,7 +67,6 @@ cc_library(
"plugin.h",
"plugin_registry.h",
"rng.h",
"shared_memory_config.h",
"stream_executor_pimpl.h",
"temporary_device_memory.h",
"temporary_memory_manager.h",
@ -123,7 +122,6 @@ cc_library(
"multi_platform_manager.h",
"platform.h",
"plugin_registry.h",
"shared_memory_config.h",
"stream_executor.h",
"stream_executor_internal.h",
"timer.h",
@ -173,11 +171,6 @@ cc_library(
],
)
cc_library(
name = "shared_memory_config",
hdrs = ["shared_memory_config.h"],
)
# Aliases for backwards compatibility.
alias(
name = "stream_header",
@ -343,7 +336,6 @@ cc_library(
"kernel_cache_config.h",
"kernel_spec.h",
"platform.h",
"shared_memory_config.h",
"stream.h",
"stream_executor_internal.h",
"trace_listener.h",
@ -455,7 +447,6 @@ cc_library(
"stream_executor_internal.cc",
],
hdrs = [
"shared_memory_config.h",
"stream_executor_internal.h",
],
deps = [
@ -484,7 +475,6 @@ cc_library(
"dnn.h",
"kernel.h",
"kernel_cache_config.h",
"shared_memory_config.h",
"stream_executor_pimpl.h",
],
visibility = ["//visibility:public"],
@ -569,7 +559,6 @@ cc_library(
"plugin.h",
"plugin_registry.h",
"rng.h",
"shared_memory_config.h",
"stream.h",
"stream_executor.h",
"stream_executor_internal.h",
@ -619,7 +608,6 @@ cc_library(
"plugin.h",
"plugin_registry.h",
"rng.h",
"shared_memory_config.h",
"stream.h",
"stream_executor.h",
"stream_executor_internal.h",

View File

@ -101,12 +101,12 @@ static GpuTimer* AsGpuTimer(Timer* timer) {
// N.B. we must lose constness in order to pass a suitable type to the existing
// libcuda APIs, so the caller should take care to only pass the result of const
// GPU memory conversions to libcuda functions which will honor constness.
static CUdeviceptr AsCudaDevicePtr(const DeviceMemoryBase &gpu_mem) {
static CUdeviceptr AsCudaDevicePtr(const DeviceMemoryBase& gpu_mem) {
return reinterpret_cast<CUdeviceptr>(gpu_mem.opaque());
}
// See description on const version above.
static CUdeviceptr AsCudaDevicePtr(DeviceMemoryBase *gpu_mem) {
static CUdeviceptr AsCudaDevicePtr(DeviceMemoryBase* gpu_mem) {
return AsCudaDevicePtr(*gpu_mem);
}
@ -225,11 +225,11 @@ port::Status GpuExecutor::LoadModuleFromCuBin(const char* cubin,
if (*module == nullptr) {
TF_RETURN_IF_ERROR(GpuDriver::LoadCubin(context_, cubin, module));
module_refcount = 1;
VLOG(3) << "Loaded CUBIN " << static_cast<const void *>(cubin)
VLOG(3) << "Loaded CUBIN " << static_cast<const void*>(cubin)
<< " as module " << *module;
} else {
++module_refcount;
VLOG(3) << "CUBIN " << static_cast<const void *>(cubin)
VLOG(3) << "CUBIN " << static_cast<const void*>(cubin)
<< " is already loaded as module " << *module;
}
gpu_binary_to_module_[cubin] = {*module, module_refcount};
@ -242,12 +242,12 @@ port::Status GpuExecutor::LoadModuleFromPtx(const char* ptx, CUmodule* module) {
if (*module == nullptr) {
TF_RETURN_IF_ERROR(GpuDriver::LoadPtx(context_, ptx, module));
VLOG(3) << "Loaded PTX " << static_cast<const void *>(ptx) << " as module "
VLOG(3) << "Loaded PTX " << static_cast<const void*>(ptx) << " as module "
<< *module;
module_refcount = 1;
} else {
++module_refcount;
VLOG(3) << "PTX " << static_cast<const void *>(ptx)
VLOG(3) << "PTX " << static_cast<const void*>(ptx)
<< " is already loaded as module " << module;
}
gpu_binary_to_module_[ptx] = {*module, module_refcount};
@ -271,7 +271,7 @@ port::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec,
if (spec.has_cuda_cubin_in_memory()) {
absl::MutexLock lock{&in_memory_modules_mu_};
kernelname = &spec.cuda_cubin_in_memory().kernelname();
const char *cubin = spec.cuda_cubin_in_memory().bytes();
const char* cubin = spec.cuda_cubin_in_memory().bytes();
TF_RETURN_IF_ERROR(LoadModuleFromCuBin(cubin, &module));
kernel_to_gpu_binary_[kernel] = cubin;
} else if (spec.has_cuda_ptx_in_memory()) {
@ -281,7 +281,7 @@ port::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec,
return port::InternalError("Compute capability not set");
}
const char *ptx = spec.cuda_ptx_in_memory().text(cc_major_, cc_minor_);
const char* ptx = spec.cuda_ptx_in_memory().text(cc_major_, cc_minor_);
if (ptx == nullptr) {
ptx = spec.cuda_ptx_in_memory().default_text();
}
@ -318,8 +318,8 @@ bool GpuExecutor::UnloadGpuBinary(const void* gpu_binary) {
VLOG(3) << "No loaded CUDA module for " << gpu_binary;
return false;
}
auto &module = module_it->second.first;
auto &refcount = module_it->second.second;
auto& module = module_it->second.first;
auto& refcount = module_it->second.second;
VLOG(3) << "Found CUDA module " << module << " with refcount " << refcount;
if (--refcount == 0) {
VLOG(3) << "Unloading CUDA module " << module;
@ -355,8 +355,8 @@ port::Status GpuExecutor::LoadModule(const MultiModuleLoaderSpec& spec,
TF_RETURN_IF_ERROR(LoadModuleFromCuBin(
reinterpret_cast<const char*>(spec.cuda_cubin_in_memory().data()),
&cu_module));
*module_handle = ModuleHandle(const_cast<void *>(
static_cast<const void *>(spec.cuda_cubin_in_memory().data())));
*module_handle = ModuleHandle(const_cast<void*>(
static_cast<const void*>(spec.cuda_cubin_in_memory().data())));
return port::Status::OK();
} else if (spec.has_cuda_ptx_in_memory()) {
if (cc_major_ == 0 && cc_minor_ == 0) {
@ -370,15 +370,15 @@ port::Status GpuExecutor::LoadModule(const MultiModuleLoaderSpec& spec,
absl::MutexLock lock{&in_memory_modules_mu_};
TF_RETURN_IF_ERROR(
LoadModuleFromPtx(spec.cuda_ptx_in_memory(), &cu_module));
*module_handle = ModuleHandle(const_cast<void *>(
static_cast<const void *>(spec.cuda_ptx_in_memory())));
*module_handle = ModuleHandle(
const_cast<void*>(static_cast<const void*>(spec.cuda_ptx_in_memory())));
return port::Status::OK();
}
return port::InternalError("No method of loading CUDA module provided");
}
bool GpuExecutor::UnloadModule(ModuleHandle module_handle) {
const char *gpu_binary = reinterpret_cast<const char *>(module_handle.id());
const char* gpu_binary = reinterpret_cast<const char*>(module_handle.id());
absl::MutexLock lock{&in_memory_modules_mu_};
return UnloadGpuBinary(gpu_binary);
}
@ -425,7 +425,7 @@ port::Status GpuExecutor::Launch(Stream* stream, const ThreadDim& thread_dims,
cufunc, cuda_kernel->GetGpuCacheConfig()));
}
void **kernel_params = const_cast<void **>(args.argument_addresses().data());
void** kernel_params = const_cast<void**>(args.argument_addresses().data());
return GpuDriver::LaunchKernel(
context_, cufunc, block_dims.x, block_dims.y, block_dims.z, thread_dims.x,
@ -454,7 +454,7 @@ void GpuExecutor::VlogOccupancyInfo(const KernelBase& kernel,
return;
}
const DeviceDescription &device_description =
const DeviceDescription& device_description =
kernel.parent()->GetDeviceDescription();
const GpuKernel* cuda_kernel = AsGpuKernel(&kernel);
@ -522,7 +522,7 @@ DeviceMemoryBase GpuExecutor::Allocate(uint64 size, int64 memory_space) {
void* GpuExecutor::GetSubBuffer(DeviceMemoryBase* mem, uint64 offset_bytes,
uint64 size_bytes) {
// offset and size are in bytes, so char* works as the pointer type.
return reinterpret_cast<char *>(mem->opaque()) + offset_bytes;
return reinterpret_cast<char*>(mem->opaque()) + offset_bytes;
}
void GpuExecutor::Deallocate(DeviceMemoryBase* mem) {
@ -662,8 +662,8 @@ bool GpuExecutor::HostCallback(Stream* stream,
/* static */ void GpuExecutor::InternalHostCallback(CUstream stream,
CUresult status,
void* data) {
std::function<void()> *callback =
reinterpret_cast<std::function<void()> *>(data);
std::function<void()>* callback =
reinterpret_cast<std::function<void()>*>(data);
(*callback)();
delete callback;
}
@ -744,7 +744,7 @@ port::Status GpuExecutor::BlockHostUntilDone(Stream* stream) {
}
blas::BlasSupport* GpuExecutor::CreateBlas() {
PluginRegistry *registry = PluginRegistry::Instance();
PluginRegistry* registry = PluginRegistry::Instance();
port::StatusOr<PluginRegistry::BlasFactory> status =
registry->GetFactory<PluginRegistry::BlasFactory>(cuda::kCudaPlatformId,
plugin_config_.blas());
@ -758,7 +758,7 @@ blas::BlasSupport* GpuExecutor::CreateBlas() {
}
dnn::DnnSupport* GpuExecutor::CreateDnn() {
PluginRegistry *registry = PluginRegistry::Instance();
PluginRegistry* registry = PluginRegistry::Instance();
port::StatusOr<PluginRegistry::DnnFactory> status =
registry->GetFactory<PluginRegistry::DnnFactory>(cuda::kCudaPlatformId,
plugin_config_.dnn());
@ -772,7 +772,7 @@ dnn::DnnSupport* GpuExecutor::CreateDnn() {
}
fft::FftSupport* GpuExecutor::CreateFft() {
PluginRegistry *registry = PluginRegistry::Instance();
PluginRegistry* registry = PluginRegistry::Instance();
port::StatusOr<PluginRegistry::FftFactory> status =
registry->GetFactory<PluginRegistry::FftFactory>(cuda::kCudaPlatformId,
plugin_config_.fft());
@ -786,7 +786,7 @@ fft::FftSupport* GpuExecutor::CreateFft() {
}
rng::RngSupport* GpuExecutor::CreateRng() {
PluginRegistry *registry = PluginRegistry::Instance();
PluginRegistry* registry = PluginRegistry::Instance();
port::StatusOr<PluginRegistry::RngFactory> status =
registry->GetFactory<PluginRegistry::RngFactory>(cuda::kCudaPlatformId,
plugin_config_.rng());
@ -812,47 +812,6 @@ port::Status GpuExecutor::EnablePeerAccessTo(StreamExecutorInterface* other) {
return GpuDriver::EnablePeerAccess(context_, cuda_other->context_);
}
SharedMemoryConfig GpuExecutor::GetDeviceSharedMemoryConfig() {
port::StatusOr<CUsharedconfig> cuda_config =
GpuDriver::ContextGetSharedMemConfig(context_);
if (!cuda_config.ok()) {
// Don't log; the failed call will log necessary output.
return SharedMemoryConfig::kDefault;
}
switch (cuda_config.ValueOrDie()) {
case CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE:
return SharedMemoryConfig::kDefault;
case CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE:
return SharedMemoryConfig::kFourByte;
case CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE:
return SharedMemoryConfig::kEightByte;
default:
LOG(FATAL) << "Invalid shared memory configuration returned: "
<< cuda_config.ValueOrDie();
}
}
port::Status GpuExecutor::SetDeviceSharedMemoryConfig(
SharedMemoryConfig config) {
CUsharedconfig cuda_config;
switch (config) {
case SharedMemoryConfig::kDefault:
cuda_config = CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE;
break;
case SharedMemoryConfig::kFourByte:
cuda_config = CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE;
break;
case SharedMemoryConfig::kEightByte:
cuda_config = CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE;
break;
default:
LOG(FATAL) << "Invalid shared memory configuration specified: "
<< static_cast<int>(config);
}
return GpuDriver::ContextSetSharedMemConfig(context_, cuda_config);
}
bool GpuExecutor::DeviceMemoryUsage(int64* free, int64* total) const {
return GpuDriver::GetDeviceMemoryInfo(context_, free, total);
}
@ -875,7 +834,7 @@ bool GpuExecutor::GetSymbol(const std::string& symbol_name,
return lookup_in_module(it->second.first);
}
for (auto &it : gpu_binary_to_module_) {
for (auto& it : gpu_binary_to_module_) {
if (lookup_in_module(it.second.first)) {
return true;
}
@ -963,7 +922,7 @@ static int TryToReadNumaNode(const std::string& pci_bus_id,
// We have to use fopen/fread here so that the device properties can be
// populated before InitGoogle procedure has been completed (at which point we
// could use the file::* utilities).
FILE *file = fopen(filename.c_str(), "r");
FILE* file = fopen(filename.c_str(), "r");
if (file == nullptr) {
LOG(ERROR) << "could not open file to read NUMA node: " << filename
<< "\nYour kernel may have been built without NUMA support.";
@ -980,8 +939,9 @@ static int TryToReadNumaNode(const std::string& pci_bus_id,
if (port::safe_strto32(content, &value)) {
if (value < 0) { // See http://b/18228951 for details on this path.
LOG(INFO) << "successful NUMA node read from SysFS had negative value ("
<< value << "), but there must be at least one NUMA node"
", so returning NUMA node zero";
<< value
<< "), but there must be at least one NUMA node"
", so returning NUMA node zero";
fclose(file);
return 0;
}

View File

@ -188,10 +188,6 @@ class GpuExecutor : public internal::StreamExecutorInterface {
bool CanEnablePeerAccessTo(StreamExecutorInterface* other) override;
SharedMemoryConfig GetDeviceSharedMemoryConfig() override;
port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override;
bool DeviceMemoryUsage(int64* free, int64* total) const override;
// Search for the symbol and returns a device pointer and size.

View File

@ -148,20 +148,6 @@ class HostExecutor : public internal::StreamExecutorInterface {
return true;
}
SharedMemoryConfig GetDeviceSharedMemoryConfig() override {
LOG(INFO) << "Shared memory configuration is unsupported for host "
<< "executors.";
return SharedMemoryConfig::kDefault;
}
port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override {
std::string error_msg{
"Shared memory configuration is unsupported for host "
"executors."};
LOG(INFO) << error_msg;
return port::Status(port::error::UNIMPLEMENTED, error_msg);
}
bool SupportsBlas() const override;
blas::BlasSupport *CreateBlas() override;

View File

@ -720,47 +720,6 @@ port::Status GpuExecutor::EnablePeerAccessTo(StreamExecutorInterface* other) {
return GpuDriver::EnablePeerAccess(context_, rocm_other->context_);
}
SharedMemoryConfig GpuExecutor::GetDeviceSharedMemoryConfig() {
port::StatusOr<hipSharedMemConfig> rocm_config =
GpuDriver::ContextGetSharedMemConfig(context_);
if (!rocm_config.ok()) {
// Don't log; the failed call will log necessary output.
return SharedMemoryConfig::kDefault;
}
switch (rocm_config.ValueOrDie()) {
case hipSharedMemBankSizeDefault:
return SharedMemoryConfig::kDefault;
case hipSharedMemBankSizeFourByte:
return SharedMemoryConfig::kFourByte;
case hipSharedMemBankSizeEightByte:
return SharedMemoryConfig::kEightByte;
default:
LOG(FATAL) << "Invalid shared memory configuration returned: "
<< rocm_config.ValueOrDie();
}
}
port::Status GpuExecutor::SetDeviceSharedMemoryConfig(
SharedMemoryConfig config) {
hipSharedMemConfig rocm_config;
switch (config) {
case SharedMemoryConfig::kDefault:
rocm_config = hipSharedMemBankSizeDefault;
break;
case SharedMemoryConfig::kFourByte:
rocm_config = hipSharedMemBankSizeFourByte;
break;
case SharedMemoryConfig::kEightByte:
rocm_config = hipSharedMemBankSizeEightByte;
break;
default:
LOG(FATAL) << "Invalid shared memory configuration specified: "
<< static_cast<int>(config);
}
return GpuDriver::ContextSetSharedMemConfig(context_, rocm_config);
}
bool GpuExecutor::DeviceMemoryUsage(int64* free, int64* total) const {
return GpuDriver::GetDeviceMemoryInfo(context_, free, total);
}
@ -768,24 +727,24 @@ bool GpuExecutor::DeviceMemoryUsage(int64* free, int64* total) const {
bool GpuExecutor::GetSymbol(const string& symbol_name,
ModuleHandle module_handle, void** mem,
size_t* bytes) {
absl::MutexLock lock{&in_memory_modules_mu_};
if (static_cast<bool>(module_handle)) {
auto it = gpu_binary_to_module_.find(module_handle.id());
CHECK(it != gpu_binary_to_module_.end());
if (GpuDriver::GetModuleSymbol(
context_, it->second.first, symbol_name.c_str(),
reinterpret_cast<hipDeviceptr_t*>(mem), bytes)) {
return true;
}
absl::MutexLock lock{&in_memory_modules_mu_};
if (static_cast<bool>(module_handle)) {
auto it = gpu_binary_to_module_.find(module_handle.id());
CHECK(it != gpu_binary_to_module_.end());
if (GpuDriver::GetModuleSymbol(
context_, it->second.first, symbol_name.c_str(),
reinterpret_cast<hipDeviceptr_t*>(mem), bytes)) {
return true;
}
}
for (auto& it : gpu_binary_to_module_) {
if (GpuDriver::GetModuleSymbol(
context_, it.second.first, symbol_name.c_str(),
reinterpret_cast<hipDeviceptr_t*>(mem), bytes)) {
return true;
}
for (auto& it : gpu_binary_to_module_) {
if (GpuDriver::GetModuleSymbol(
context_, it.second.first, symbol_name.c_str(),
reinterpret_cast<hipDeviceptr_t*>(mem), bytes)) {
return true;
}
}
LOG(INFO) << "Falied to find symbol in any modules: " << symbol_name;
return false;

View File

@ -1,34 +0,0 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// This file defines a uniform interface to configuration options for shared
// memory for supported devices. As with many StreamExecutor-supported features,
// support for the options defined herein is device-dependent.
#ifndef TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_
#define TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_
namespace stream_executor {
// SharedMemoryConfig enum describes potential widths of shared memory banks for
// a device or kernel.
enum class SharedMemoryConfig {
kDefault, // Use the device default configuration.
kFourByte, // Sets shared memory banks to be four bytes wide.
kEightByte, // Sets shared memory banks to be eight bytes wide.
};
} // namespace stream_executor
#endif // TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_

View File

@ -44,7 +44,6 @@ limitations under the License.
#include "tensorflow/stream_executor/platform.h"
#include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/plugin_registry.h"
#include "tensorflow/stream_executor/shared_memory_config.h"
#include "tensorflow/stream_executor/trace_listener.h"
namespace stream_executor {
@ -267,9 +266,6 @@ class StreamExecutorInterface {
virtual int PlatformDeviceCount() = 0;
virtual port::Status EnablePeerAccessTo(StreamExecutorInterface *other) = 0;
virtual bool CanEnablePeerAccessTo(StreamExecutorInterface *other) = 0;
virtual SharedMemoryConfig GetDeviceSharedMemoryConfig() = 0;
virtual port::Status SetDeviceSharedMemoryConfig(
SharedMemoryConfig config) = 0;
virtual int64 GetDeviceLoad() { return -1; }

View File

@ -230,23 +230,6 @@ port::Status StreamExecutor::EnablePeerAccessTo(StreamExecutor *other) {
return implementation_->EnablePeerAccessTo(other->implementation_.get());
}
SharedMemoryConfig StreamExecutor::GetDeviceSharedMemoryConfig() {
return implementation_->GetDeviceSharedMemoryConfig();
}
port::Status StreamExecutor::SetDeviceSharedMemoryConfig(
SharedMemoryConfig config) {
if (config != SharedMemoryConfig::kDefault &&
config != SharedMemoryConfig::kFourByte &&
config != SharedMemoryConfig::kEightByte) {
std::string error_msg = absl::StrFormat(
"Invalid shared memory config specified: %d", static_cast<int>(config));
LOG(ERROR) << error_msg;
return port::Status(port::error::INVALID_ARGUMENT, error_msg);
}
return implementation_->SetDeviceSharedMemoryConfig(config);
}
const DeviceDescription &StreamExecutor::GetDeviceDescription() const {
absl::MutexLock lock(&mu_);
if (device_description_ != nullptr) {
@ -858,7 +841,7 @@ absl::optional<AllocatorStats> StreamExecutor::GetAllocatorStats() {
}
template <typename TraceCallT, typename... ArgsT>
void StreamExecutor::SubmitTrace(TraceCallT trace_call, ArgsT &&... args) {
void StreamExecutor::SubmitTrace(TraceCallT trace_call, ArgsT &&...args) {
if (tracing_enabled_) {
{
// instance tracers held in a block to limit the lock lifetime.

View File

@ -35,7 +35,6 @@ limitations under the License.
#include "tensorflow/stream_executor/platform/logging.h"
#include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/rng.h"
#include "tensorflow/stream_executor/shared_memory_config.h"
#include "tensorflow/stream_executor/stream.h"
#include "tensorflow/stream_executor/stream_executor_internal.h"
#include "tensorflow/stream_executor/trace_listener.h"
@ -54,8 +53,8 @@ struct AllocRecord {
};
// Forward declaration of private friend class.
template <typename BeginCallT, typename CompleteCallT,
typename ReturnT, typename... BeginArgsT>
template <typename BeginCallT, typename CompleteCallT, typename ReturnT,
typename... BeginArgsT>
class ScopedTracer;
// A StreamExecutor manages a single device, in terms of executing work (kernel
@ -322,14 +321,6 @@ class StreamExecutor {
// this is more an up-front test as to whether it's expressly forbidden.
bool CanEnablePeerAccessTo(StreamExecutor *other);
// Gets the preferred shared memory configuration for the device to which this
// executor is bound.
SharedMemoryConfig GetDeviceSharedMemoryConfig();
// Sets the preferred shared memory configuration for the device to which this
// executor is bound.
port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config);
// Obtains metadata about the underlying device.
// The value is cached on first use.
const DeviceDescription &GetDeviceDescription() const;
@ -507,12 +498,12 @@ class StreamExecutor {
// To register a listener for all executors for a given platform, see
// Platform::RegisterTraceListener().
// Does not take ownership of listener.
void RegisterTraceListener(TraceListener* listener);
void RegisterTraceListener(TraceListener *listener);
// Removes a TraceListener from this StreamExecutor instance.
// Returns false (and logs) in cases where the argument listener was not
// previously registered.
bool UnregisterTraceListener(TraceListener* listener);
bool UnregisterTraceListener(TraceListener *listener);
// Return allocator statistics.
absl::optional<AllocatorStats> GetAllocatorStats();
@ -522,8 +513,8 @@ class StreamExecutor {
StreamExecutorMemoryAllocator *GetAllocator() { return &allocator_; }
private:
template <typename BeginCallT, typename CompleteCallT,
typename ReturnT, typename... BeginArgsT>
template <typename BeginCallT, typename CompleteCallT, typename ReturnT,
typename... BeginArgsT>
friend class ScopedTracer;
friend class Event;
friend class Stream;
@ -648,7 +639,7 @@ class StreamExecutor {
// Calls the relevant TraceListener routine to begin tracing for the specified
// asynchronous method.
template <typename TraceCallT, typename... ArgsT>
void SubmitTrace(TraceCallT trace_call, ArgsT&&... args);
void SubmitTrace(TraceCallT trace_call, ArgsT &&...args);
// Reader/writer lock for class-static StreamExecutor members.
static absl::Mutex static_mu_;

View File

@ -96,8 +96,7 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface {
void DequeueOutfeed(int32 outfeed_queue_index, absl::Span<uint8> bytes,
StatusCallback done);
Status EnqueueInfeed(int32 infeed_queue_index,
absl::Span<const uint8> bytes);
Status EnqueueInfeed(int32 infeed_queue_index, absl::Span<const uint8> bytes);
absl::optional<stream_executor::AllocatorStats> GetAllocatorStats() override;
@ -175,10 +174,6 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface {
LOG(FATAL) << "Not yet implemented";
}
stream_executor::SharedMemoryConfig GetDeviceSharedMemoryConfig() override {
LOG(FATAL) << "not yet implemented";
}
void* GetSubBuffer(DeviceMemoryBase* parent, uint64 offset,
uint64 size) override {
LOG(FATAL) << "not yet implemented";
@ -197,10 +192,7 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface {
bool CanEnablePeerAccessTo(StreamExecutorInterface* other) override {
LOG(FATAL) << "not yet implemented";
}
Status SetDeviceSharedMemoryConfig(
stream_executor::SharedMemoryConfig config) override {
LOG(FATAL) << "not yet implemented";
}
void* HostMemoryAllocate(uint64 size) override {
LOG(FATAL) << "not yet implemented";
}