Put the reusable class CudnnAllocatorInTemp to a separate file
This commit is contained in:
parent
1ab863f591
commit
46aa1ca220
@ -367,6 +367,16 @@ cc_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "cudnn_scratch_allocator",
|
||||||
|
srcs = ["util/cudnn_scratch_allocator.cc"],
|
||||||
|
hdrs = ["util/cudnn_scratch_allocator.h"],
|
||||||
|
deps = [
|
||||||
|
"//tensorflow/core:framework",
|
||||||
|
"//tensorflow/stream_executor:scratch_allocator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
filegroup(
|
filegroup(
|
||||||
name = "util_port_hdrs",
|
name = "util_port_hdrs",
|
||||||
srcs = [
|
srcs = [
|
||||||
@ -2885,6 +2895,7 @@ tf_cuda_library(
|
|||||||
"util/version_info.cc",
|
"util/version_info.cc",
|
||||||
"util/env_var.cc",
|
"util/env_var.cc",
|
||||||
"util/port.cc",
|
"util/port.cc",
|
||||||
|
"util/cudnn_scratch_allocator.cc",
|
||||||
],
|
],
|
||||||
) + select({
|
) + select({
|
||||||
"//tensorflow:windows": [],
|
"//tensorflow:windows": [],
|
||||||
|
@ -2298,6 +2298,7 @@ tf_kernel_library(
|
|||||||
"//tensorflow/core/util/ctc:ctc_loss_calculator_lib",
|
"//tensorflow/core/util/ctc:ctc_loss_calculator_lib",
|
||||||
] + if_cuda([
|
] + if_cuda([
|
||||||
"//tensorflow/core:stream_executor",
|
"//tensorflow/core:stream_executor",
|
||||||
|
"//tensorflow/core:cudnn_scratch_allocator",
|
||||||
]),
|
]),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -32,6 +32,7 @@ limitations under the License.
|
|||||||
#if GOOGLE_CUDA
|
#if GOOGLE_CUDA
|
||||||
#include "tensorflow/core/platform/stream_executor.h"
|
#include "tensorflow/core/platform/stream_executor.h"
|
||||||
#include "tensorflow/core/util/stream_executor_util.h"
|
#include "tensorflow/core/util/stream_executor_util.h"
|
||||||
|
#include "tensorflow/core/util/cudnn_scratch_allocator.h"
|
||||||
#endif // GOOGLE_CUDA
|
#endif // GOOGLE_CUDA
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
@ -41,14 +42,11 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
|
|||||||
using GPUDevice = Eigen::GpuDevice;
|
using GPUDevice = Eigen::GpuDevice;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
using se::DeviceMemory;
|
|
||||||
using se::Stream;
|
using se::Stream;
|
||||||
using se::StreamExecutor;
|
using se::StreamExecutor;
|
||||||
using se::ScratchAllocator;
|
|
||||||
using se::dnn::CtcLossDescriptor;
|
using se::dnn::CtcLossDescriptor;
|
||||||
using se::dnn::RnnStateTensorDescriptor;
|
using se::dnn::RnnStateTensorDescriptor;
|
||||||
using se::dnn::ToDataType;
|
using se::dnn::ToDataType;
|
||||||
using se::port::StatusOr;
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void DoHistogram(OpKernelContext* ctx, const Tensor* labels_indices,
|
void DoHistogram(OpKernelContext* ctx, const Tensor* labels_indices,
|
||||||
@ -56,56 +54,11 @@ void DoHistogram(OpKernelContext* ctx, const Tensor* labels_indices,
|
|||||||
std::vector<int> *labels_lengths) {
|
std::vector<int> *labels_lengths) {
|
||||||
const T* h_in = labels_indices->flat<T>().data();
|
const T* h_in = labels_indices->flat<T>().data();
|
||||||
for(int i = 0; i < num_indices; i++) {
|
for(int i = 0; i < num_indices; i++) {
|
||||||
T key = h_in[i * 2];
|
const T& key = h_in[i * 2];
|
||||||
(*labels_lengths)[key]++;
|
(*labels_lengths)[key]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// A helper to allocate temporary scratch memory for cudnnCTCLoss ops. It
|
|
||||||
// takes the ownership of the underlying memory. The expectation is that the
|
|
||||||
// memory should be alive for the span of the cudnnCTCLoss itself.
|
|
||||||
template <typename T>
|
|
||||||
class CudnnCtcLossAllocatorInTemp : public ScratchAllocator {
|
|
||||||
public:
|
|
||||||
~CudnnCtcLossAllocatorInTemp() override = default;
|
|
||||||
|
|
||||||
explicit CudnnCtcLossAllocatorInTemp(OpKernelContext* context)
|
|
||||||
: context_(context) {}
|
|
||||||
|
|
||||||
int64 GetMemoryLimitInBytes() override {
|
|
||||||
return std::numeric_limits<int64>::max();
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<DeviceMemory<uint8>> AllocateBytes(int64 byte_size) override {
|
|
||||||
Tensor temporary_memory;
|
|
||||||
const DataType tf_data_type = DataTypeToEnum<T>::v();
|
|
||||||
int64 allocate_count =
|
|
||||||
Eigen::divup(byte_size, static_cast<int64>(sizeof(T)));
|
|
||||||
Status allocation_status(context_->allocate_temp(
|
|
||||||
tf_data_type, TensorShape({allocate_count}), &temporary_memory));
|
|
||||||
if (!allocation_status.ok()) {
|
|
||||||
return allocation_status;
|
|
||||||
}
|
|
||||||
// Hold the reference of the allocated tensors until the end of the
|
|
||||||
// allocator.
|
|
||||||
allocated_tensors_.push_back(temporary_memory);
|
|
||||||
total_byte_size_ += byte_size;
|
|
||||||
return DeviceMemory<uint8>::MakeFromByteSize(
|
|
||||||
temporary_memory.template flat<T>().data(),
|
|
||||||
temporary_memory.template flat<T>().size() * sizeof(T));
|
|
||||||
}
|
|
||||||
|
|
||||||
int64 TotalByteSize() const { return total_byte_size_; }
|
|
||||||
|
|
||||||
Tensor get_allocated_tensor(int index) const {
|
|
||||||
return allocated_tensors_[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
int64 total_byte_size_ = 0;
|
|
||||||
OpKernelContext* context_; // not owned
|
|
||||||
std::vector<Tensor> allocated_tensors_;
|
|
||||||
};
|
|
||||||
} // end namespace
|
} // end namespace
|
||||||
#endif // GOOGLE_CUDA
|
#endif // GOOGLE_CUDA
|
||||||
|
|
||||||
@ -389,7 +342,7 @@ class CTCLossOpGPU : public OpKernel {
|
|||||||
auto costs_data = StreamExecutorUtil::AsDeviceMemory<float>(*loss);
|
auto costs_data = StreamExecutorUtil::AsDeviceMemory<float>(*loss);
|
||||||
auto grads_data = StreamExecutorUtil::AsDeviceMemory<float>(*gradient);
|
auto grads_data = StreamExecutorUtil::AsDeviceMemory<float>(*gradient);
|
||||||
|
|
||||||
CudnnCtcLossAllocatorInTemp<uint8> workspace_allocator(ctx);
|
CudnnAllocatorInTemp workspace_allocator(ctx);
|
||||||
|
|
||||||
Stream* stream = ctx->op_device_context()->stream();
|
Stream* stream = ctx->op_device_context()->stream();
|
||||||
bool cudnn_launch_status =
|
bool cudnn_launch_status =
|
||||||
|
57
tensorflow/core/util/cudnn_scratch_allocator.cc
Normal file
57
tensorflow/core/util/cudnn_scratch_allocator.cc
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/core/util/cudnn_scratch_allocator.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
|
||||||
|
CudnnAllocatorInTemp::~CudnnAllocatorInTemp() {}
|
||||||
|
|
||||||
|
CudnnAllocatorInTemp::CudnnAllocatorInTemp(OpKernelContext* context)
|
||||||
|
: context_(context) {}
|
||||||
|
|
||||||
|
int64 CudnnAllocatorInTemp::GetMemoryLimitInBytes() {
|
||||||
|
return std::numeric_limits<int64>::max();
|
||||||
|
}
|
||||||
|
|
||||||
|
StatusOr<DeviceMemory<uint8>> CudnnAllocatorInTemp::AllocateBytes(
|
||||||
|
int64 byte_size) {
|
||||||
|
Tensor temporary_memory;
|
||||||
|
const DataType tf_data_type = DataTypeToEnum<uint8>::v();
|
||||||
|
int64 allocate_count =
|
||||||
|
Eigen::divup(byte_size, static_cast<int64>(sizeof(uint8)));
|
||||||
|
Status allocation_status(context_->allocate_temp(
|
||||||
|
tf_data_type, TensorShape({allocate_count}), &temporary_memory));
|
||||||
|
if (!allocation_status.ok()) {
|
||||||
|
return allocation_status;
|
||||||
|
}
|
||||||
|
// Hold the reference of the allocated tensors until the end of the
|
||||||
|
// allocator.
|
||||||
|
allocated_tensors_.push_back(temporary_memory);
|
||||||
|
total_byte_size_ += byte_size;
|
||||||
|
return DeviceMemory<uint8>::MakeFromByteSize(
|
||||||
|
temporary_memory.template flat<uint8>().data(),
|
||||||
|
temporary_memory.template flat<uint8>().size() * sizeof(uint8));
|
||||||
|
}
|
||||||
|
|
||||||
|
int64 CudnnAllocatorInTemp::TotalByteSize() const {
|
||||||
|
return total_byte_size_;
|
||||||
|
}
|
||||||
|
|
||||||
|
Tensor CudnnAllocatorInTemp::get_allocated_tensor(int index) const {
|
||||||
|
return allocated_tensors_[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace tensorflow
|
50
tensorflow/core/util/cudnn_scratch_allocator.h
Normal file
50
tensorflow/core/util/cudnn_scratch_allocator.h
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_CORE_UTIL_CUDNN_SCRATCH_ALLOCATOR_H_
|
||||||
|
#define TENSORFLOW_CORE_UTIL_CUDNN_SCRATCH_ALLOCATOR_H_
|
||||||
|
|
||||||
|
#include "tensorflow/core/framework/op_kernel.h"
|
||||||
|
#include "tensorflow/stream_executor/scratch_allocator.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
|
||||||
|
using stream_executor::ScratchAllocator;
|
||||||
|
using stream_executor::port::StatusOr;
|
||||||
|
using stream_executor::DeviceMemory;
|
||||||
|
|
||||||
|
// A helper to allocate temporary scratch memory for CUDNN ops. It
|
||||||
|
// takes the ownership of the underlying memory. The expectation is that the
|
||||||
|
// memory should be alive for the span of the cudnnXXX itself.
|
||||||
|
class CudnnAllocatorInTemp : public ScratchAllocator {
|
||||||
|
public:
|
||||||
|
explicit CudnnAllocatorInTemp(OpKernelContext* context);
|
||||||
|
~CudnnAllocatorInTemp() override;
|
||||||
|
int64 GetMemoryLimitInBytes() override;
|
||||||
|
StatusOr<DeviceMemory<uint8>> AllocateBytes(int64 byte_size) override;
|
||||||
|
int64 TotalByteSize() const;
|
||||||
|
Tensor get_allocated_tensor(int index) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
int64 total_byte_size_ = 0;
|
||||||
|
OpKernelContext* context_; // not owned
|
||||||
|
std::vector<Tensor> allocated_tensors_;
|
||||||
|
|
||||||
|
SE_DISALLOW_COPY_AND_ASSIGN(CudnnAllocatorInTemp);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_CORE_UTIL_CUDNN_STREAM_ALLOCATOR_H_
|
Loading…
Reference in New Issue
Block a user