From f7d29c94dfda42b3531bddc911cf5b9fdd4bddab Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Mon, 6 Jul 2020 21:30:27 +1000 Subject: [PATCH] Remove duplicated code in conv and fft kernels --- tensorflow/core/kernels/conv_ops.cc | 14 +------- tensorflow/core/kernels/conv_ops_gpu.h | 47 +------------------------- tensorflow/core/kernels/fft_ops.cc | 16 ++------- 3 files changed, 4 insertions(+), 73 deletions(-) diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index b8c2671e7d2..fd8cad0d67c 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -619,19 +619,7 @@ template struct LaunchConv2DOp; int64 GetDnnWorkspaceLimit(const string& envvar_in_mb, int64 default_value_in_bytes) { - const char* workspace_limit_in_mb_str = getenv(envvar_in_mb.c_str()); - if (workspace_limit_in_mb_str != nullptr && - strcmp(workspace_limit_in_mb_str, "") != 0) { - int64 scratch_limit_in_mb = -1; - if (strings::safe_strto64(workspace_limit_in_mb_str, - &scratch_limit_in_mb)) { - return scratch_limit_in_mb * (1 << 20); - } else { - LOG(WARNING) << "Invalid value for env-var " << envvar_in_mb << ": " - << workspace_limit_in_mb_str; - } - } - return default_value_in_bytes; + return GetWorkspaceLimit(envvar_in_mb, default_value_in_bytes); } // A dummy type to group forward convolution autotune results together. diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h index 2e97d486b54..8beab722a64 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.h +++ b/tensorflow/core/kernels/conv_ops_gpu.h @@ -48,52 +48,7 @@ int64 GetDnnWorkspaceLimit(const string& envvar_in_mb, // A class to provide scratch-space allocator for Stream-Executor Cudnn // callback. TensorFlow is responsible for releasing the temporary buffers after // the kernel finishes. -class DnnScratchAllocator : public se::ScratchAllocator { - public: - virtual ~DnnScratchAllocator() {} - DnnScratchAllocator(int64 memory_limit, OpKernelContext* context) - : memory_limit_(memory_limit), total_byte_size_(0), context_(context) {} - int64 GetMemoryLimitInBytes() override { return memory_limit_; } - se::port::StatusOr> AllocateBytes( - int64 byte_size) override { - Tensor temporary_memory; - if (byte_size < 0) { - return se::port::Status{se::port::error::INVALID_ARGUMENT, - "Requested negative byte size!"}; - } - if (byte_size > memory_limit_) { - return se::port::Status{se::port::error::UNAVAILABLE, - absl::StrCat("Requested memory size (", byte_size, - ") exceeds the max memory limit (", - memory_limit_, ").")}; - } - AllocationAttributes allocation_attr; - allocation_attr.retry_on_failure = false; - Status allocation_status(context_->allocate_temp( - DT_UINT8, TensorShape({byte_size}), &temporary_memory, - AllocatorAttributes(), allocation_attr)); - if (!allocation_status.ok()) { - return se::port::Status{ - se::port::error::UNAVAILABLE, - absl::StrCat("Failed to allocate the requested memory size (", - byte_size, ").")}; - } - // Hold the reference of the allocated tensors until the end of the - // allocator. - allocated_tensors_.push_back(temporary_memory); - total_byte_size_ += byte_size; - return se::port::StatusOr>( - AsDeviceMemory(temporary_memory.flat().data(), - temporary_memory.flat().size())); - } - int64 TotalByteSize() { return total_byte_size_; } - - private: - int64 memory_limit_; - int64 total_byte_size_; - OpKernelContext* context_; - std::vector allocated_tensors_; -}; +using DnnScratchAllocator = GpuScratchAllocator; // Encapsulate all the shape information that is used in both forward and // backward conv operations. diff --git a/tensorflow/core/kernels/fft_ops.cc b/tensorflow/core/kernels/fft_ops.cc index 050b83980c6..f73cc29ecf0 100644 --- a/tensorflow/core/kernels/fft_ops.cc +++ b/tensorflow/core/kernels/fft_ops.cc @@ -31,6 +31,7 @@ limitations under the License. #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) +#include "tensorflow/core/kernels/gpu_utils.h" #include "tensorflow/core/platform/stream_executor.h" #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -400,20 +401,7 @@ class CufftScratchAllocator : public se::ScratchAllocator { int64 GetCufftWorkspaceLimit(const string& envvar_in_mb, int64 default_value_in_bytes) { - const char* workspace_limit_in_mb_str = getenv(envvar_in_mb.c_str()); - if (workspace_limit_in_mb_str != nullptr && - strcmp(workspace_limit_in_mb_str, "") != 0) { - int64 scratch_limit_in_mb = -1; - Status status = ReadInt64FromEnvVar(envvar_in_mb, default_value_in_bytes, - &scratch_limit_in_mb); - if (!status.ok()) { - LOG(WARNING) << "Invalid value for env-var " << envvar_in_mb << ": " - << workspace_limit_in_mb_str; - } else { - return scratch_limit_in_mb * (1 << 20); - } - } - return default_value_in_bytes; + return GetWorkspaceLimit(envvar_in_mb, default_value_in_bytes); } class FFTGPUBase : public FFTBase {