Remove SharedMemoryConfig since it is not used anywhere.

PiperOrigin-RevId: 326154532 Change-Id: I13be21f577226c48c7e0d5bfc7efb787f0422e85
2020-08-11 19:56:45 -07:00 · 2020-08-11 19:56:45 -07:00 · 642db2faf5
commit 642db2faf5
parent 9636571807
11 changed files with 54 additions and 247 deletions
--- a/tensorflow/compiler/xla/service/interpreter/executor.h
+++ b/tensorflow/compiler/xla/service/interpreter/executor.h
@ -38,7 +38,6 @@ limitations under the License.
 #include "tensorflow/stream_executor/launch_dim.h"
 #include "tensorflow/stream_executor/plugin.h"
 #include "tensorflow/stream_executor/rng.h"
-#include "tensorflow/stream_executor/shared_memory_config.h"
 #include "tensorflow/stream_executor/stream.h"
 #include "tensorflow/stream_executor/stream_executor.h"
 #include "tensorflow/stream_executor/stream_executor_internal.h"
@ -182,15 +181,6 @@ class XlaInterpreterExecutor : public internal::StreamExecutorInterface {
    return true;
  }

-  SharedMemoryConfig GetDeviceSharedMemoryConfig() override {
-    return SharedMemoryConfig::kDefault;
-  }
-
-  port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override {
-    return port::Status{port::error::UNIMPLEMENTED,
-                        "Shared memory not supported"};
-  }
-
  std::unique_ptr<internal::EventInterface> CreateEventImplementation()
      override {
    return nullptr;
--- a/tensorflow/stream_executor/BUILD
+++ b/tensorflow/stream_executor/BUILD
@ -67,7 +67,6 @@ cc_library(
        "plugin.h",
        "plugin_registry.h",
        "rng.h",
-        "shared_memory_config.h",
        "stream_executor_pimpl.h",
        "temporary_device_memory.h",
        "temporary_memory_manager.h",
@ -123,7 +122,6 @@ cc_library(
        "multi_platform_manager.h",
        "platform.h",
        "plugin_registry.h",
-        "shared_memory_config.h",
        "stream_executor.h",
        "stream_executor_internal.h",
        "timer.h",
@ -173,11 +171,6 @@ cc_library(
    ],
 )

-cc_library(
-    name = "shared_memory_config",
-    hdrs = ["shared_memory_config.h"],
-)
-
 # Aliases for backwards compatibility.
 alias(
    name = "stream_header",
@ -343,7 +336,6 @@ cc_library(
        "kernel_cache_config.h",
        "kernel_spec.h",
        "platform.h",
-        "shared_memory_config.h",
        "stream.h",
        "stream_executor_internal.h",
        "trace_listener.h",
@ -455,7 +447,6 @@ cc_library(
        "stream_executor_internal.cc",
    ],
    hdrs = [
-        "shared_memory_config.h",
        "stream_executor_internal.h",
    ],
    deps = [
@ -484,7 +475,6 @@ cc_library(
        "dnn.h",
        "kernel.h",
        "kernel_cache_config.h",
-        "shared_memory_config.h",
        "stream_executor_pimpl.h",
    ],
    visibility = ["//visibility:public"],
@ -569,7 +559,6 @@ cc_library(
        "plugin.h",
        "plugin_registry.h",
        "rng.h",
-        "shared_memory_config.h",
        "stream.h",
        "stream_executor.h",
        "stream_executor_internal.h",
@ -619,7 +608,6 @@ cc_library(
        "plugin.h",
        "plugin_registry.h",
        "rng.h",
-        "shared_memory_config.h",
        "stream.h",
        "stream_executor.h",
        "stream_executor_internal.h",
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@ -101,12 +101,12 @@ static GpuTimer* AsGpuTimer(Timer* timer) {
 // N.B. we must lose constness in order to pass a suitable type to the existing
 // libcuda APIs, so the caller should take care to only pass the result of const
 // GPU memory conversions to libcuda functions which will honor constness.
-static CUdeviceptr AsCudaDevicePtr(const DeviceMemoryBase &gpu_mem) {
+static CUdeviceptr AsCudaDevicePtr(const DeviceMemoryBase& gpu_mem) {
  return reinterpret_cast<CUdeviceptr>(gpu_mem.opaque());
 }

 // See description on const version above.
-static CUdeviceptr AsCudaDevicePtr(DeviceMemoryBase *gpu_mem) {
+static CUdeviceptr AsCudaDevicePtr(DeviceMemoryBase* gpu_mem) {
  return AsCudaDevicePtr(*gpu_mem);
 }

@ -225,11 +225,11 @@ port::Status GpuExecutor::LoadModuleFromCuBin(const char* cubin,
  if (*module == nullptr) {
    TF_RETURN_IF_ERROR(GpuDriver::LoadCubin(context_, cubin, module));
    module_refcount = 1;
-    VLOG(3) << "Loaded CUBIN " << static_cast<const void *>(cubin)
+    VLOG(3) << "Loaded CUBIN " << static_cast<const void*>(cubin)
            << " as module " << *module;
  } else {
    ++module_refcount;
-    VLOG(3) << "CUBIN " << static_cast<const void *>(cubin)
+    VLOG(3) << "CUBIN " << static_cast<const void*>(cubin)
            << " is already loaded as module " << *module;
  }
  gpu_binary_to_module_[cubin] = {*module, module_refcount};
@ -242,12 +242,12 @@ port::Status GpuExecutor::LoadModuleFromPtx(const char* ptx, CUmodule* module) {

  if (*module == nullptr) {
    TF_RETURN_IF_ERROR(GpuDriver::LoadPtx(context_, ptx, module));
-    VLOG(3) << "Loaded PTX " << static_cast<const void *>(ptx) << " as module "
+    VLOG(3) << "Loaded PTX " << static_cast<const void*>(ptx) << " as module "
            << *module;
    module_refcount = 1;
  } else {
    ++module_refcount;
-    VLOG(3) << "PTX " << static_cast<const void *>(ptx)
+    VLOG(3) << "PTX " << static_cast<const void*>(ptx)
            << " is already loaded as module " << module;
  }
  gpu_binary_to_module_[ptx] = {*module, module_refcount};
@ -271,7 +271,7 @@ port::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec,
  if (spec.has_cuda_cubin_in_memory()) {
    absl::MutexLock lock{&in_memory_modules_mu_};
    kernelname = &spec.cuda_cubin_in_memory().kernelname();
-    const char *cubin = spec.cuda_cubin_in_memory().bytes();
+    const char* cubin = spec.cuda_cubin_in_memory().bytes();
    TF_RETURN_IF_ERROR(LoadModuleFromCuBin(cubin, &module));
    kernel_to_gpu_binary_[kernel] = cubin;
  } else if (spec.has_cuda_ptx_in_memory()) {
@ -281,7 +281,7 @@ port::Status GpuExecutor::GetKernel(const MultiKernelLoaderSpec& spec,
      return port::InternalError("Compute capability not set");
    }

-    const char *ptx = spec.cuda_ptx_in_memory().text(cc_major_, cc_minor_);
+    const char* ptx = spec.cuda_ptx_in_memory().text(cc_major_, cc_minor_);
    if (ptx == nullptr) {
      ptx = spec.cuda_ptx_in_memory().default_text();
    }
@ -318,8 +318,8 @@ bool GpuExecutor::UnloadGpuBinary(const void* gpu_binary) {
    VLOG(3) << "No loaded CUDA module for " << gpu_binary;
    return false;
  }
-  auto &module = module_it->second.first;
-  auto &refcount = module_it->second.second;
+  auto& module = module_it->second.first;
+  auto& refcount = module_it->second.second;
  VLOG(3) << "Found CUDA module " << module << " with refcount " << refcount;
  if (--refcount == 0) {
    VLOG(3) << "Unloading CUDA module " << module;
@ -355,8 +355,8 @@ port::Status GpuExecutor::LoadModule(const MultiModuleLoaderSpec& spec,
    TF_RETURN_IF_ERROR(LoadModuleFromCuBin(
        reinterpret_cast<const char*>(spec.cuda_cubin_in_memory().data()),
        &cu_module));
-    *module_handle = ModuleHandle(const_cast<void *>(
-        static_cast<const void *>(spec.cuda_cubin_in_memory().data())));
+    *module_handle = ModuleHandle(const_cast<void*>(
+        static_cast<const void*>(spec.cuda_cubin_in_memory().data())));
    return port::Status::OK();
  } else if (spec.has_cuda_ptx_in_memory()) {
    if (cc_major_ == 0 && cc_minor_ == 0) {
@ -370,15 +370,15 @@ port::Status GpuExecutor::LoadModule(const MultiModuleLoaderSpec& spec,
    absl::MutexLock lock{&in_memory_modules_mu_};
    TF_RETURN_IF_ERROR(
        LoadModuleFromPtx(spec.cuda_ptx_in_memory(), &cu_module));
-    *module_handle = ModuleHandle(const_cast<void *>(
-        static_cast<const void *>(spec.cuda_ptx_in_memory())));
+    *module_handle = ModuleHandle(
+        const_cast<void*>(static_cast<const void*>(spec.cuda_ptx_in_memory())));
    return port::Status::OK();
  }
  return port::InternalError("No method of loading CUDA module provided");
 }

 bool GpuExecutor::UnloadModule(ModuleHandle module_handle) {
-  const char *gpu_binary = reinterpret_cast<const char *>(module_handle.id());
+  const char* gpu_binary = reinterpret_cast<const char*>(module_handle.id());
  absl::MutexLock lock{&in_memory_modules_mu_};
  return UnloadGpuBinary(gpu_binary);
 }
@ -425,7 +425,7 @@ port::Status GpuExecutor::Launch(Stream* stream, const ThreadDim& thread_dims,
        cufunc, cuda_kernel->GetGpuCacheConfig()));
  }

-  void **kernel_params = const_cast<void **>(args.argument_addresses().data());
+  void** kernel_params = const_cast<void**>(args.argument_addresses().data());

  return GpuDriver::LaunchKernel(
      context_, cufunc, block_dims.x, block_dims.y, block_dims.z, thread_dims.x,
@ -454,7 +454,7 @@ void GpuExecutor::VlogOccupancyInfo(const KernelBase& kernel,
    return;
  }

-  const DeviceDescription &device_description =
+  const DeviceDescription& device_description =
      kernel.parent()->GetDeviceDescription();

  const GpuKernel* cuda_kernel = AsGpuKernel(&kernel);
@ -522,7 +522,7 @@ DeviceMemoryBase GpuExecutor::Allocate(uint64 size, int64 memory_space) {
 void* GpuExecutor::GetSubBuffer(DeviceMemoryBase* mem, uint64 offset_bytes,
                                uint64 size_bytes) {
  // offset and size are in bytes, so char* works as the pointer type.
-  return reinterpret_cast<char *>(mem->opaque()) + offset_bytes;
+  return reinterpret_cast<char*>(mem->opaque()) + offset_bytes;
 }

 void GpuExecutor::Deallocate(DeviceMemoryBase* mem) {
@ -662,8 +662,8 @@ bool GpuExecutor::HostCallback(Stream* stream,
 /* static */ void GpuExecutor::InternalHostCallback(CUstream stream,
                                                    CUresult status,
                                                    void* data) {
-  std::function<void()> *callback =
-      reinterpret_cast<std::function<void()> *>(data);
+  std::function<void()>* callback =
+      reinterpret_cast<std::function<void()>*>(data);
  (*callback)();
  delete callback;
 }
@ -744,7 +744,7 @@ port::Status GpuExecutor::BlockHostUntilDone(Stream* stream) {
 }

 blas::BlasSupport* GpuExecutor::CreateBlas() {
-  PluginRegistry *registry = PluginRegistry::Instance();
+  PluginRegistry* registry = PluginRegistry::Instance();
  port::StatusOr<PluginRegistry::BlasFactory> status =
      registry->GetFactory<PluginRegistry::BlasFactory>(cuda::kCudaPlatformId,
                                                        plugin_config_.blas());
@ -758,7 +758,7 @@ blas::BlasSupport* GpuExecutor::CreateBlas() {
 }

 dnn::DnnSupport* GpuExecutor::CreateDnn() {
-  PluginRegistry *registry = PluginRegistry::Instance();
+  PluginRegistry* registry = PluginRegistry::Instance();
  port::StatusOr<PluginRegistry::DnnFactory> status =
      registry->GetFactory<PluginRegistry::DnnFactory>(cuda::kCudaPlatformId,
                                                       plugin_config_.dnn());
@ -772,7 +772,7 @@ dnn::DnnSupport* GpuExecutor::CreateDnn() {
 }

 fft::FftSupport* GpuExecutor::CreateFft() {
-  PluginRegistry *registry = PluginRegistry::Instance();
+  PluginRegistry* registry = PluginRegistry::Instance();
  port::StatusOr<PluginRegistry::FftFactory> status =
      registry->GetFactory<PluginRegistry::FftFactory>(cuda::kCudaPlatformId,
                                                       plugin_config_.fft());
@ -786,7 +786,7 @@ fft::FftSupport* GpuExecutor::CreateFft() {
 }

 rng::RngSupport* GpuExecutor::CreateRng() {
-  PluginRegistry *registry = PluginRegistry::Instance();
+  PluginRegistry* registry = PluginRegistry::Instance();
  port::StatusOr<PluginRegistry::RngFactory> status =
      registry->GetFactory<PluginRegistry::RngFactory>(cuda::kCudaPlatformId,
                                                       plugin_config_.rng());
@ -812,47 +812,6 @@ port::Status GpuExecutor::EnablePeerAccessTo(StreamExecutorInterface* other) {
  return GpuDriver::EnablePeerAccess(context_, cuda_other->context_);
 }

-SharedMemoryConfig GpuExecutor::GetDeviceSharedMemoryConfig() {
-  port::StatusOr<CUsharedconfig> cuda_config =
-      GpuDriver::ContextGetSharedMemConfig(context_);
-  if (!cuda_config.ok()) {
-    // Don't log; the failed call will log necessary output.
-    return SharedMemoryConfig::kDefault;
-  }
-
-  switch (cuda_config.ValueOrDie()) {
-    case CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE:
-      return SharedMemoryConfig::kDefault;
-    case CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE:
-      return SharedMemoryConfig::kFourByte;
-    case CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE:
-      return SharedMemoryConfig::kEightByte;
-    default:
-      LOG(FATAL) << "Invalid shared memory configuration returned: "
-                 << cuda_config.ValueOrDie();
-  }
-}
-
-port::Status GpuExecutor::SetDeviceSharedMemoryConfig(
-    SharedMemoryConfig config) {
-  CUsharedconfig cuda_config;
-  switch (config) {
-    case SharedMemoryConfig::kDefault:
-      cuda_config = CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE;
-      break;
-    case SharedMemoryConfig::kFourByte:
-      cuda_config = CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE;
-      break;
-    case SharedMemoryConfig::kEightByte:
-      cuda_config = CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE;
-      break;
-    default:
-      LOG(FATAL) << "Invalid shared memory configuration specified: "
-                 << static_cast<int>(config);
-  }
-  return GpuDriver::ContextSetSharedMemConfig(context_, cuda_config);
-}
-
 bool GpuExecutor::DeviceMemoryUsage(int64* free, int64* total) const {
  return GpuDriver::GetDeviceMemoryInfo(context_, free, total);
 }
@ -875,7 +834,7 @@ bool GpuExecutor::GetSymbol(const std::string& symbol_name,
      return lookup_in_module(it->second.first);
    }

-    for (auto &it : gpu_binary_to_module_) {
+    for (auto& it : gpu_binary_to_module_) {
      if (lookup_in_module(it.second.first)) {
        return true;
      }
@ -963,7 +922,7 @@ static int TryToReadNumaNode(const std::string& pci_bus_id,
  // We have to use fopen/fread here so that the device properties can be
  // populated before InitGoogle procedure has been completed (at which point we
  // could use the file::* utilities).
-  FILE *file = fopen(filename.c_str(), "r");
+  FILE* file = fopen(filename.c_str(), "r");
  if (file == nullptr) {
    LOG(ERROR) << "could not open file to read NUMA node: " << filename
               << "\nYour kernel may have been built without NUMA support.";
@ -980,8 +939,9 @@ static int TryToReadNumaNode(const std::string& pci_bus_id,
  if (port::safe_strto32(content, &value)) {
    if (value < 0) {  // See http://b/18228951 for details on this path.
      LOG(INFO) << "successful NUMA node read from SysFS had negative value ("
-                << value << "), but there must be at least one NUMA node"
-                            ", so returning NUMA node zero";
+                << value
+                << "), but there must be at least one NUMA node"
+                   ", so returning NUMA node zero";
      fclose(file);
      return 0;
    }
--- a/tensorflow/stream_executor/gpu/gpu_executor.h
+++ b/tensorflow/stream_executor/gpu/gpu_executor.h
@ -188,10 +188,6 @@ class GpuExecutor : public internal::StreamExecutorInterface {

  bool CanEnablePeerAccessTo(StreamExecutorInterface* other) override;

-  SharedMemoryConfig GetDeviceSharedMemoryConfig() override;
-
-  port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override;
-
  bool DeviceMemoryUsage(int64* free, int64* total) const override;

  // Search for the symbol and returns a device pointer and size.
--- a/tensorflow/stream_executor/host/host_gpu_executor.h
+++ b/tensorflow/stream_executor/host/host_gpu_executor.h
@ -148,20 +148,6 @@ class HostExecutor : public internal::StreamExecutorInterface {
    return true;
  }

-  SharedMemoryConfig GetDeviceSharedMemoryConfig() override {
-    LOG(INFO) << "Shared memory configuration is unsupported for host "
-              << "executors.";
-    return SharedMemoryConfig::kDefault;
-  }
-
-  port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override {
-    std::string error_msg{
-        "Shared memory configuration is unsupported for host "
-        "executors."};
-    LOG(INFO) << error_msg;
-    return port::Status(port::error::UNIMPLEMENTED, error_msg);
-  }
-
  bool SupportsBlas() const override;
  blas::BlasSupport *CreateBlas() override;

--- a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc
+++ b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc
@ -720,47 +720,6 @@ port::Status GpuExecutor::EnablePeerAccessTo(StreamExecutorInterface* other) {
  return GpuDriver::EnablePeerAccess(context_, rocm_other->context_);
 }

-SharedMemoryConfig GpuExecutor::GetDeviceSharedMemoryConfig() {
-  port::StatusOr<hipSharedMemConfig> rocm_config =
-      GpuDriver::ContextGetSharedMemConfig(context_);
-  if (!rocm_config.ok()) {
-    // Don't log; the failed call will log necessary output.
-    return SharedMemoryConfig::kDefault;
-  }
-
-  switch (rocm_config.ValueOrDie()) {
-    case hipSharedMemBankSizeDefault:
-      return SharedMemoryConfig::kDefault;
-    case hipSharedMemBankSizeFourByte:
-      return SharedMemoryConfig::kFourByte;
-    case hipSharedMemBankSizeEightByte:
-      return SharedMemoryConfig::kEightByte;
-    default:
-      LOG(FATAL) << "Invalid shared memory configuration returned: "
-                 << rocm_config.ValueOrDie();
-  }
-}
-
-port::Status GpuExecutor::SetDeviceSharedMemoryConfig(
-    SharedMemoryConfig config) {
-  hipSharedMemConfig rocm_config;
-  switch (config) {
-    case SharedMemoryConfig::kDefault:
-      rocm_config = hipSharedMemBankSizeDefault;
-      break;
-    case SharedMemoryConfig::kFourByte:
-      rocm_config = hipSharedMemBankSizeFourByte;
-      break;
-    case SharedMemoryConfig::kEightByte:
-      rocm_config = hipSharedMemBankSizeEightByte;
-      break;
-    default:
-      LOG(FATAL) << "Invalid shared memory configuration specified: "
-                 << static_cast<int>(config);
-  }
-  return GpuDriver::ContextSetSharedMemConfig(context_, rocm_config);
-}
-
 bool GpuExecutor::DeviceMemoryUsage(int64* free, int64* total) const {
  return GpuDriver::GetDeviceMemoryInfo(context_, free, total);
 }
@ -768,24 +727,24 @@ bool GpuExecutor::DeviceMemoryUsage(int64* free, int64* total) const {
 bool GpuExecutor::GetSymbol(const string& symbol_name,
                            ModuleHandle module_handle, void** mem,
                            size_t* bytes) {
-    absl::MutexLock lock{&in_memory_modules_mu_};
-    if (static_cast<bool>(module_handle)) {
-      auto it = gpu_binary_to_module_.find(module_handle.id());
-      CHECK(it != gpu_binary_to_module_.end());
-      if (GpuDriver::GetModuleSymbol(
-              context_, it->second.first, symbol_name.c_str(),
-              reinterpret_cast<hipDeviceptr_t*>(mem), bytes)) {
-        return true;
-      }
+  absl::MutexLock lock{&in_memory_modules_mu_};
+  if (static_cast<bool>(module_handle)) {
+    auto it = gpu_binary_to_module_.find(module_handle.id());
+    CHECK(it != gpu_binary_to_module_.end());
+    if (GpuDriver::GetModuleSymbol(
+            context_, it->second.first, symbol_name.c_str(),
+            reinterpret_cast<hipDeviceptr_t*>(mem), bytes)) {
+      return true;
    }
+  }

-    for (auto& it : gpu_binary_to_module_) {
-      if (GpuDriver::GetModuleSymbol(
-              context_, it.second.first, symbol_name.c_str(),
-              reinterpret_cast<hipDeviceptr_t*>(mem), bytes)) {
-        return true;
-      }
+  for (auto& it : gpu_binary_to_module_) {
+    if (GpuDriver::GetModuleSymbol(
+            context_, it.second.first, symbol_name.c_str(),
+            reinterpret_cast<hipDeviceptr_t*>(mem), bytes)) {
+      return true;
    }
+  }

  LOG(INFO) << "Falied to find symbol in any modules: " << symbol_name;
  return false;
--- a/tensorflow/stream_executor/shared_memory_config.h
+++ b/tensorflow/stream_executor/shared_memory_config.h
@ -1,34 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// This file defines a uniform interface to configuration options for shared
-// memory for supported devices. As with many StreamExecutor-supported features,
-// support for the options defined herein is device-dependent.
-#ifndef TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_
-#define TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_
-
-namespace stream_executor {
-
-// SharedMemoryConfig enum describes potential widths of shared memory banks for
-// a device or kernel.
-enum class SharedMemoryConfig {
-  kDefault,    // Use the device default configuration.
-  kFourByte,   // Sets shared memory banks to be four bytes wide.
-  kEightByte,  // Sets shared memory banks to be eight bytes wide.
-};
-
-}  // namespace stream_executor
-
-#endif  // TENSORFLOW_STREAM_EXECUTOR_SHARED_MEMORY_CONFIG_H_
--- a/tensorflow/stream_executor/stream_executor_internal.h
+++ b/tensorflow/stream_executor/stream_executor_internal.h
@ -44,7 +44,6 @@ limitations under the License.
 #include "tensorflow/stream_executor/platform.h"
 #include "tensorflow/stream_executor/platform/port.h"
 #include "tensorflow/stream_executor/plugin_registry.h"
-#include "tensorflow/stream_executor/shared_memory_config.h"
 #include "tensorflow/stream_executor/trace_listener.h"

 namespace stream_executor {
@ -267,9 +266,6 @@ class StreamExecutorInterface {
  virtual int PlatformDeviceCount() = 0;
  virtual port::Status EnablePeerAccessTo(StreamExecutorInterface *other) = 0;
  virtual bool CanEnablePeerAccessTo(StreamExecutorInterface *other) = 0;
-  virtual SharedMemoryConfig GetDeviceSharedMemoryConfig() = 0;
-  virtual port::Status SetDeviceSharedMemoryConfig(
-      SharedMemoryConfig config) = 0;

  virtual int64 GetDeviceLoad() { return -1; }

--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@ -230,23 +230,6 @@ port::Status StreamExecutor::EnablePeerAccessTo(StreamExecutor *other) {
  return implementation_->EnablePeerAccessTo(other->implementation_.get());
 }

-SharedMemoryConfig StreamExecutor::GetDeviceSharedMemoryConfig() {
-  return implementation_->GetDeviceSharedMemoryConfig();
-}
-
-port::Status StreamExecutor::SetDeviceSharedMemoryConfig(
-    SharedMemoryConfig config) {
-  if (config != SharedMemoryConfig::kDefault &&
-      config != SharedMemoryConfig::kFourByte &&
-      config != SharedMemoryConfig::kEightByte) {
-    std::string error_msg = absl::StrFormat(
-        "Invalid shared memory config specified: %d", static_cast<int>(config));
-    LOG(ERROR) << error_msg;
-    return port::Status(port::error::INVALID_ARGUMENT, error_msg);
-  }
-  return implementation_->SetDeviceSharedMemoryConfig(config);
-}
-
 const DeviceDescription &StreamExecutor::GetDeviceDescription() const {
  absl::MutexLock lock(&mu_);
  if (device_description_ != nullptr) {
@ -858,7 +841,7 @@ absl::optional<AllocatorStats> StreamExecutor::GetAllocatorStats() {
 }

 template <typename TraceCallT, typename... ArgsT>
-void StreamExecutor::SubmitTrace(TraceCallT trace_call, ArgsT &&... args) {
+void StreamExecutor::SubmitTrace(TraceCallT trace_call, ArgsT &&...args) {
  if (tracing_enabled_) {
    {
      // instance tracers held in a block to limit the lock lifetime.
--- a/tensorflow/stream_executor/stream_executor_pimpl.h
+++ b/tensorflow/stream_executor/stream_executor_pimpl.h
@ -35,7 +35,6 @@ limitations under the License.
 #include "tensorflow/stream_executor/platform/logging.h"
 #include "tensorflow/stream_executor/platform/port.h"
 #include "tensorflow/stream_executor/rng.h"
-#include "tensorflow/stream_executor/shared_memory_config.h"
 #include "tensorflow/stream_executor/stream.h"
 #include "tensorflow/stream_executor/stream_executor_internal.h"
 #include "tensorflow/stream_executor/trace_listener.h"
@ -54,8 +53,8 @@ struct AllocRecord {
 };

 // Forward declaration of private friend class.
-template <typename BeginCallT, typename CompleteCallT,
-          typename ReturnT, typename... BeginArgsT>
+template <typename BeginCallT, typename CompleteCallT, typename ReturnT,
+          typename... BeginArgsT>
 class ScopedTracer;

 // A StreamExecutor manages a single device, in terms of executing work (kernel
@ -322,14 +321,6 @@ class StreamExecutor {
  // this is more an up-front test as to whether it's expressly forbidden.
  bool CanEnablePeerAccessTo(StreamExecutor *other);

-  // Gets the preferred shared memory configuration for the device to which this
-  // executor is bound.
-  SharedMemoryConfig GetDeviceSharedMemoryConfig();
-
-  // Sets the preferred shared memory configuration for the device to which this
-  // executor is bound.
-  port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config);
-
  // Obtains metadata about the underlying device.
  // The value is cached on first use.
  const DeviceDescription &GetDeviceDescription() const;
@ -507,12 +498,12 @@ class StreamExecutor {
  // To register a listener for all executors for a given platform, see
  // Platform::RegisterTraceListener().
  // Does not take ownership of listener.
-  void RegisterTraceListener(TraceListener* listener);
+  void RegisterTraceListener(TraceListener *listener);

  // Removes a TraceListener from this StreamExecutor instance.
  // Returns false (and logs) in cases where the argument listener was not
  // previously registered.
-  bool UnregisterTraceListener(TraceListener* listener);
+  bool UnregisterTraceListener(TraceListener *listener);

  // Return allocator statistics.
  absl::optional<AllocatorStats> GetAllocatorStats();
@ -522,8 +513,8 @@ class StreamExecutor {
  StreamExecutorMemoryAllocator *GetAllocator() { return &allocator_; }

 private:
-  template <typename BeginCallT, typename CompleteCallT,
-            typename ReturnT, typename... BeginArgsT>
+  template <typename BeginCallT, typename CompleteCallT, typename ReturnT,
+            typename... BeginArgsT>
  friend class ScopedTracer;
  friend class Event;
  friend class Stream;
@ -648,7 +639,7 @@ class StreamExecutor {
  // Calls the relevant TraceListener routine to begin tracing for the specified
  // asynchronous method.
  template <typename TraceCallT, typename... ArgsT>
-  void SubmitTrace(TraceCallT trace_call, ArgsT&&... args);
+  void SubmitTrace(TraceCallT trace_call, ArgsT &&...args);

  // Reader/writer lock for class-static StreamExecutor members.
  static absl::Mutex static_mu_;
--- a/tensorflow/stream_executor/tpu/tpu_executor.h
+++ b/tensorflow/stream_executor/tpu/tpu_executor.h
@ -96,8 +96,7 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface {
  void DequeueOutfeed(int32 outfeed_queue_index, absl::Span<uint8> bytes,
                      StatusCallback done);

-  Status EnqueueInfeed(int32 infeed_queue_index,
-                       absl::Span<const uint8> bytes);
+  Status EnqueueInfeed(int32 infeed_queue_index, absl::Span<const uint8> bytes);

  absl::optional<stream_executor::AllocatorStats> GetAllocatorStats() override;

@ -175,10 +174,6 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface {
    LOG(FATAL) << "Not yet implemented";
  }

-  stream_executor::SharedMemoryConfig GetDeviceSharedMemoryConfig() override {
-    LOG(FATAL) << "not yet implemented";
-  }
-
  void* GetSubBuffer(DeviceMemoryBase* parent, uint64 offset,
                     uint64 size) override {
    LOG(FATAL) << "not yet implemented";
@ -197,10 +192,7 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface {
  bool CanEnablePeerAccessTo(StreamExecutorInterface* other) override {
    LOG(FATAL) << "not yet implemented";
  }
-  Status SetDeviceSharedMemoryConfig(
-      stream_executor::SharedMemoryConfig config) override {
-    LOG(FATAL) << "not yet implemented";
-  }
+
  void* HostMemoryAllocate(uint64 size) override {
    LOG(FATAL) << "not yet implemented";
  }