diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index 35bee2ed29c..9ae3836d6c4 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -400,11 +400,9 @@ cc_library( srcs = ["linear_storage.cc"], hdrs = ["linear_storage.h"], deps = [ - ":buffer", ":gpu_object", ":opencl_wrapper", ":tensor_type", - ":texture2d", ":util", "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:status", diff --git a/tensorflow/lite/delegates/gpu/cl/buffer.cc b/tensorflow/lite/delegates/gpu/cl/buffer.cc index c59d27687fa..340c2a7f9ac 100644 --- a/tensorflow/lite/delegates/gpu/cl/buffer.cc +++ b/tensorflow/lite/delegates/gpu/cl/buffer.cc @@ -28,19 +28,10 @@ namespace { absl::Status CreateBuffer(size_t size_in_bytes, bool gpu_read_only, const void* data, CLContext* context, Buffer* result) { - cl_mem_flags flags = gpu_read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE; - if (data != nullptr) { - flags |= CL_MEM_COPY_HOST_PTR; - } - cl_int error_code; - cl_mem buffer = clCreateBuffer(context->context(), flags, size_in_bytes, - const_cast<void*>(data), &error_code); - if (!buffer) { - return absl::UnknownError( - absl::StrCat("Failed to allocate device memory (clCreateBuffer): ", - CLErrorCodeToString(error_code))); - } - + cl_mem buffer; + RETURN_IF_ERROR(CreateCLBuffer(context->context(), size_in_bytes, + gpu_read_only, const_cast<void*>(data), + &buffer)); *result = Buffer(buffer, size_in_bytes); return absl::OkStatus(); @@ -185,28 +176,13 @@ absl::Status Buffer::GetGPUResources(const GPUObjectDescriptor* obj_ptr, absl::Status Buffer::CreateFromBufferDescriptor(const BufferDescriptor& desc, CLContext* context) { - cl_mem_flags flags = desc.memory_type == MemoryType::CONSTANT - ? CL_MEM_READ_ONLY - : CL_MEM_READ_WRITE; - if (!desc.data.empty()) { - flags |= CL_MEM_COPY_HOST_PTR; - } - cl_int error_code; + bool read_only = desc.memory_type == MemoryType::CONSTANT; + uint8_t* data_ptr = desc.data.empty() + ? nullptr + : const_cast<unsigned char*>(desc.data.data()); size_ = desc.size; - if (desc.data.empty()) { - buffer_ = clCreateBuffer(context->context(), flags, desc.size, nullptr, - &error_code); - } else { - buffer_ = clCreateBuffer(context->context(), flags, desc.size, - const_cast<unsigned char*>(desc.data.data()), - &error_code); - } - if (!buffer_) { - return absl::UnknownError( - absl::StrCat("Failed to allocate device memory (clCreateBuffer): ", - CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); + return CreateCLBuffer(context->context(), desc.size, read_only, data_ptr, + &buffer_); } absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext* context, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h index 2ba576e2f1e..78dc2c82a3c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h @@ -127,12 +127,9 @@ absl::Status Conv3D::UploadData(const tflite::gpu::Tensor<OHWDI, T>& weights, ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D; desc.element_type = definition_.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR(CreateLinearStorage(desc, biases, context, <)); - args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(biases); + args_.AddObject("biases", + absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h index 08a1bc207d5..632896f8cd6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h @@ -178,16 +178,10 @@ absl::Status ConvBuffer1x1::UploadBiases( TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::BUFFER; desc.element_type = definition_.GetDataType(); - - tflite::gpu::Tensor<Linear, DataType::FLOAT32> bias = biases; - int channels = AlignByN(biases.shape.v, 4 * conv_params_.block_size.z); - bias.shape = Linear(channels); - bias.data.resize(channels, 0.0f); - LinearStorage lt; - RETURN_IF_ERROR(CreateLinearStorage(desc, bias, context, <)); - args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + int depth = AlignByN(biases.shape.v, 4 * conv_params_.block_size.z) / 4; + desc.UploadLinearData(biases, depth); + args_.AddObject("biases", + absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc index 1ed900a2080..772af1d3d4a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc @@ -291,13 +291,9 @@ absl::Status CreateConvConstants(const CreationContext& creation_context, desc.storage_type = LinearStorageType::BUFFER; desc.element_type = definition.GetDataType(); desc.memory_type = MemoryType::CONSTANT; - - LinearStorage lt; - RETURN_IF_ERROR( - CreateLinearStorage(desc, attr.bias, creation_context.context, <)); - result->args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(attr.bias); + result->args_.AddObject( + "biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h index 1e490c972e7..35ee630e633 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h @@ -121,12 +121,9 @@ absl::Status ConvTexture::UploadData( TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition_.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR(CreateLinearStorage(desc, biases, context, <)); - args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(biases); + args_.AddObject("biases", + absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } @@ -144,12 +141,9 @@ absl::Status ConvTexture::UploadDataForWinograd4x4To6x6( TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition_.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR(CreateLinearStorage(desc, bias, context, <)); - args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(bias); + args_.AddObject("biases", + absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc index f63b9db6007..0b02cb0f3bf 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc @@ -371,13 +371,9 @@ absl::Status CreateConvolutionTransposed( desc.storage_type = DeduceLinearStorageType(definition.GetPrimaryStorageType()); desc.element_type = definition.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR( - CreateLinearStorage(desc, attr.bias, creation_context.context, <)); - result->args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(attr.bias); + result->args_.AddObject( + "biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc index 2b35080b1ab..df1e01deea8 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc @@ -414,13 +414,9 @@ absl::Status CreateConvolutionTransposed3D( desc.storage_type = DeduceLinearStorageType(definition.GetPrimaryStorageType()); desc.element_type = definition.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR( - CreateLinearStorage(desc, attr.bias, creation_context.context, <)); - result->args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(attr.bias); + result->args_.AddObject( + "biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc index 3e3a5a1f7f4..644e5ad09ea 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc @@ -353,13 +353,9 @@ absl::Status CreateConvolutionTransposed3x3( TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR( - CreateLinearStorage(desc, attr.bias, creation_context.context, <)); - result->args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(attr.bias); + result->args_.AddObject( + "biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc index 4ecb23c318c..2b6d502e79f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc @@ -330,13 +330,9 @@ absl::Status CreateConvolutionTransposed4x4( TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR( - CreateLinearStorage(desc, attr.bias, creation_context.context, <)); - result->args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(attr.bias); + result->args_.AddObject( + "biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc index 4b4416751fb..5b4476a0a09 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc @@ -319,13 +319,9 @@ absl::Status CreateDepthwiseConvolution( desc.storage_type = weights_are_buffer ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D; desc.element_type = definition.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR( - CreateLinearStorage(desc, attr.bias, creation_context.context, <)); - result->args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(attr.bias); + result->args_.AddObject( + "biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } @@ -342,13 +338,9 @@ absl::Status CreateDepthwiseConvolution( desc.storage_type = weights_are_buffer ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D; desc.element_type = definition.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR( - CreateLinearStorage(desc, attr.bias, creation_context.context, <)); - result->args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(attr.bias); + result->args_.AddObject( + "biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc index ec18fa9f6e2..69cc12740a6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc @@ -121,13 +121,9 @@ absl::Status CreateFullyConnected(const CreationContext& creation_context, TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR( - CreateLinearStorage(desc, attr.bias, creation_context.context, <)); - result->args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(attr.bias); + result->args_.AddObject( + "biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc b/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc index 1ca2e096a0e..7a29d5752fe 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc @@ -53,13 +53,10 @@ absl::Status CreatePReLU(const CreationContext& creation_context, desc.storage_type = DeduceLinearStorageType(definition.GetPrimaryStorageType()); desc.element_type = definition.GetPrimaryDataType(); + desc.UploadLinearData(*alpha); - LinearStorage lt; - RETURN_IF_ERROR( - CreateLinearStorage(desc, *alpha, creation_context.context, <)); - result->args_.AddObject("alpha", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + result->args_.AddObject( + "alpha", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index ae738cce923..d8457c15d51 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -250,12 +250,9 @@ absl::Status Winograd4x4To36::UploadBt(CLContext* context) { TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition_.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR(CreateLinearStorage(desc, bt_aligned, context, <)); - args_.AddObject("bt", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(bt_aligned); + args_.AddObject("bt", + absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } @@ -456,11 +453,9 @@ absl::Status Winograd36To4x4::UploadAt(CLContext* context) { TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition_.GetDataType(); - LinearStorage lt; - RETURN_IF_ERROR(CreateLinearStorage(desc, at_aligned, context, <)); - args_.AddObject("at", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(at_aligned); + args_.AddObject("at", + absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return absl::OkStatus(); } @@ -509,12 +504,9 @@ absl::Status CreateWinograd36To4x4( TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition.GetDataType(); - LinearStorage lt; - RETURN_IF_ERROR( - CreateLinearStorage(desc, biases, creation_context.context, <)); - result->args_.AddObject("biases", AccessType::READ, - absl::make_unique<LinearStorage>(std::move(lt)), - absl::make_unique<TensorLinearDescriptor>(desc)); + desc.UploadLinearData(biases); + result->args_.AddObject( + "biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc))); return result->UploadAt(creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/linear_storage.cc b/tensorflow/lite/delegates/gpu/cl/linear_storage.cc index 0ff17d0e3de..75920f4f8c5 100644 --- a/tensorflow/lite/delegates/gpu/cl/linear_storage.cc +++ b/tensorflow/lite/delegates/gpu/cl/linear_storage.cc @@ -23,6 +23,29 @@ namespace tflite { namespace gpu { namespace cl { +TensorLinearDescriptor::TensorLinearDescriptor(TensorLinearDescriptor&& desc) + : GPUObjectDescriptor(std::move(desc)), + storage_type(desc.storage_type), + element_type(desc.element_type), + memory_type(desc.memory_type), + size(desc.size), + data(std::move(desc.data)) {} + +TensorLinearDescriptor& TensorLinearDescriptor::operator=( + TensorLinearDescriptor&& desc) { + if (this != &desc) { + std::swap(storage_type, desc.storage_type); + std::swap(element_type, desc.element_type); + std::swap(memory_type, desc.memory_type); + std::swap(size, desc.size); + data = std::move(desc.data); + GPUObjectDescriptor::operator=(std::move(desc)); + } + return *this; +} + +void TensorLinearDescriptor::Release() { data.clear(); } + GPUResources TensorLinearDescriptor::GetGPUResources() const { GPUResources resources; resources.ints.push_back("length"); @@ -81,20 +104,60 @@ absl::Status TensorLinearDescriptor::PerformReadSelector( } } -LinearStorage::LinearStorage(int depth, LinearStorageType storage_type) - : depth_(depth), storage_type_(storage_type) {} +absl::Status TensorLinearDescriptor::CreateGPUObject( + CLContext* context, GPUObjectPtr* result) const { + LinearStorage gpu_storage; + RETURN_IF_ERROR(gpu_storage.CreateFromTensorLinearDescriptor(*this, context)); + *result = absl::make_unique<LinearStorage>(std::move(gpu_storage)); + return absl::OkStatus(); +} + +void TensorLinearDescriptor::UploadLinearData( + const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src, + int aligned_size) { + size = aligned_size == 0 ? DivideRoundUp(src.shape.v, 4) : aligned_size; + if (element_type == DataType::FLOAT32) { + data.resize(size * sizeof(float) * 4); + float* gpu_data = reinterpret_cast<float*>(data.data()); + for (int i = 0; i < size * 4; ++i) { + if (i < src.shape.v) { + gpu_data[i] = src.data[i]; + } else { + gpu_data[i] = 0.0f; + } + } + } else { + data.resize(size * sizeof(half) * 4); + half* gpu_data = reinterpret_cast<half*>(data.data()); + for (int i = 0; i < size * 4; ++i) { + if (i < src.shape.v) { + gpu_data[i] = src.data[i]; + } else { + gpu_data[i] = 0.0f; + } + } + } +} + +void LinearStorage::Release() { + if (memory_) { + clReleaseMemObject(memory_); + memory_ = nullptr; + } +} LinearStorage::LinearStorage(LinearStorage&& storage) : GPUObject(std::move(storage)), - texture_storage_(std::move(storage.texture_storage_)), - buffer_storage_(std::move(storage.buffer_storage_)), + memory_(storage.memory_), depth_(storage.depth_), - storage_type_(storage.storage_type_) {} + storage_type_(storage.storage_type_) { + storage.memory_ = nullptr; +} LinearStorage& LinearStorage::operator=(LinearStorage&& storage) { if (this != &storage) { - texture_storage_ = std::move(storage.texture_storage_); - buffer_storage_ = std::move(storage.buffer_storage_); + Release(); + std::swap(memory_, storage.memory_); std::swap(depth_, storage.depth_); std::swap(storage_type_, storage.storage_type_); GPUObject::operator=(std::move(storage)); @@ -115,14 +178,37 @@ absl::Status LinearStorage::GetGPUResources( resources->ints.push_back({"length", depth_}); if (storage_type_ == LinearStorageType::BUFFER) { - resources->buffers.push_back({"buffer", buffer_storage_.GetMemoryPtr()}); + resources->buffers.push_back({"buffer", memory_}); } else { - resources->images2d.push_back({"tex2d", texture_storage_.GetMemoryPtr()}); + resources->images2d.push_back({"tex2d", memory_}); } return absl::OkStatus(); } +absl::Status LinearStorage::CreateFromTensorLinearDescriptor( + const TensorLinearDescriptor& desc, CLContext* context) { + storage_type_ = desc.storage_type; + depth_ = desc.size; + uint8_t* data_ptr = desc.data.empty() + ? nullptr + : const_cast<unsigned char*>(desc.data.data()); + if (storage_type_ == LinearStorageType::BUFFER) { + bool read_only = desc.memory_type == MemoryType::CONSTANT; + uint8_t* data_ptr = desc.data.empty() + ? nullptr + : const_cast<unsigned char*>(desc.data.data()); + const int float4_size = desc.element_type == DataType::FLOAT32 + ? sizeof(float) * 4 + : sizeof(half) * 4; + return CreateCLBuffer(context->context(), depth_ * float4_size, read_only, + data_ptr, &memory_); + } else { + return CreateFloatRGBAImage2D(context->context(), depth_, 1, + desc.element_type, data_ptr, &memory_); + } +} + LinearStorageType DeduceLinearStorageType( TensorStorageType tensor_storage_type) { if (tensor_storage_type == TensorStorageType::BUFFER) { @@ -132,24 +218,6 @@ LinearStorageType DeduceLinearStorageType( } } -absl::Status CreateLinearStorage(LinearStorageType storage_type, - DataType data_type, int size, void* data, - CLContext* context, LinearStorage* result) { - if (storage_type == LinearStorageType::BUFFER) { - const int float4_size = - data_type == DataType::FLOAT32 ? sizeof(float4) : sizeof(half4); - *result = LinearStorage(size, LinearStorageType::BUFFER); - RETURN_IF_ERROR(CreateReadOnlyBuffer(float4_size * size, data, context, - &result->buffer_storage_)); - return absl::OkStatus(); - } else { - *result = LinearStorage(size, LinearStorageType::TEXTURE_2D); - RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, size, 1, data, context, - &result->texture_storage_)); - return absl::OkStatus(); - } -} - } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/linear_storage.h b/tensorflow/lite/delegates/gpu/cl/linear_storage.h index b69f76b9c1a..37e7f12dfb3 100644 --- a/tensorflow/lite/delegates/gpu/cl/linear_storage.h +++ b/tensorflow/lite/delegates/gpu/cl/linear_storage.h @@ -21,11 +21,9 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/types/span.h" -#include "tensorflow/lite/delegates/gpu/cl/buffer.h" #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" -#include "tensorflow/lite/delegates/gpu/cl/texture2d.h" #include "tensorflow/lite/delegates/gpu/cl/util.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/status.h" @@ -42,6 +40,20 @@ struct TensorLinearDescriptor : public GPUObjectDescriptor { DataType element_type; // FLOAT32 or FLOAT16 MemoryType memory_type = MemoryType::GLOBAL; // applicable for BUFFER + // optional + int size = 0; + std::vector<uint8_t> data; + + TensorLinearDescriptor() = default; + TensorLinearDescriptor(const TensorLinearDescriptor&) = default; + TensorLinearDescriptor& operator=(const TensorLinearDescriptor&) = default; + TensorLinearDescriptor(TensorLinearDescriptor&& desc); + TensorLinearDescriptor& operator=(TensorLinearDescriptor&& desc); + + void UploadLinearData( + const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src, + int aligned_size = 0); + absl::Status PerformSelector(const std::string& selector, const std::vector<std::string>& args, const std::vector<std::string>& template_args, @@ -50,6 +62,10 @@ struct TensorLinearDescriptor : public GPUObjectDescriptor { GPUResources GetGPUResources() const override; absl::Status PerformReadSelector(const std::vector<std::string>& args, std::string* result) const; + + absl::Status CreateGPUObject(CLContext* context, + GPUObjectPtr* result) const override; + void Release() override; }; LinearStorageType DeduceLinearStorageType( @@ -60,8 +76,7 @@ LinearStorageType DeduceLinearStorageType( class LinearStorage : public GPUObject { public: LinearStorage() {} - - virtual ~LinearStorage() {} + ~LinearStorage() override { Release(); } // Move only LinearStorage(LinearStorage&& storage); @@ -72,46 +87,17 @@ class LinearStorage : public GPUObject { absl::Status GetGPUResources(const GPUObjectDescriptor* obj_ptr, GPUResourcesWithValue* resources) const override; + absl::Status CreateFromTensorLinearDescriptor( + const TensorLinearDescriptor& desc, CLContext* context); + private: - friend absl::Status CreateLinearStorage(LinearStorageType storage_type, - DataType data_type, int size, - void* data, CLContext* context, - LinearStorage* result); - - LinearStorage(int depth, LinearStorageType storage_type); - - Texture2D texture_storage_; - Buffer buffer_storage_; + void Release(); + cl_mem memory_ = nullptr; int depth_; LinearStorageType storage_type_; }; -absl::Status CreateLinearStorage(LinearStorageType storage_type, - DataType data_type, int size, void* data, - CLContext* context, LinearStorage* result); - -template <DataType T> -absl::Status CreateLinearStorage(const TensorLinearDescriptor& descriptor, - const tflite::gpu::Tensor<Linear, T>& tensor, - CLContext* context, LinearStorage* result) { - const int depth = DivideRoundUp(tensor.shape.v, 4); - if (descriptor.element_type == DataType::FLOAT32) { - std::vector<float4> gpu_data(depth); - CopyLinearFLT4(tensor, absl::MakeSpan(gpu_data)); - RETURN_IF_ERROR(CreateLinearStorage(descriptor.storage_type, - descriptor.element_type, depth, - gpu_data.data(), context, result)); - } else { - std::vector<half4> gpu_data(depth); - CopyLinearFLT4(tensor, absl::MakeSpan(gpu_data)); - RETURN_IF_ERROR(CreateLinearStorage(descriptor.storage_type, - descriptor.element_type, depth, - gpu_data.data(), context, result)); - } - return absl::OkStatus(); -} - } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/texture2d.cc b/tensorflow/lite/delegates/gpu/cl/texture2d.cc index 0fb1e06fe89..28d26f03260 100644 --- a/tensorflow/lite/delegates/gpu/cl/texture2d.cc +++ b/tensorflow/lite/delegates/gpu/cl/texture2d.cc @@ -21,39 +21,14 @@ namespace cl { namespace { // Creates new 4-channel 2D texture with cl_channel_type elements -absl::Status CreateTexture2D(int width, int height, cl_channel_type type, - void* data, CLContext* context, - Texture2D* result) { - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = width; - desc.image_height = height; - desc.image_depth = 0; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - desc.buffer = nullptr; - - cl_image_format format; - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = type; - - cl_mem_flags flags = CL_MEM_READ_WRITE; - if (data != nullptr) { - flags |= CL_MEM_COPY_HOST_PTR; - } - - cl_int error_code; - cl_mem texture = CreateImage2DLegacy(context->context(), flags, &format, - &desc, data, &error_code); - if (error_code != CL_SUCCESS) { - return absl::UnknownError( - absl::StrCat("Failed to create 2D texture (clCreateImage): ", - CLErrorCodeToString(error_code))); - } - - *result = Texture2D(texture, width, height, type); +absl::Status CreateTexture2D(int width, int height, DataType type, void* data, + CLContext* context, Texture2D* result) { + cl_mem texture; + RETURN_IF_ERROR(CreateFloatRGBAImage2D(context->context(), width, height, + type, data, &texture)); + cl_channel_type channel_type = + type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT; + *result = Texture2D(texture, width, height, channel_type); return absl::OkStatus(); } @@ -94,7 +69,7 @@ absl::Status Texture2DDescriptor::PerformSelector( return PerformReadSelector(args, result); } else { return absl::NotFoundError(absl::StrCat( - "TensorLinearDescriptor don't have selector with name - ", selector)); + "Texture2DDescriptor don't have selector with name - ", selector)); } } @@ -167,79 +142,41 @@ absl::Status Texture2D::GetGPUResources( } absl::Status Texture2D::CreateFromTexture2DDescriptor( - const Texture2DDescriptor& tex_desc, CLContext* context) { - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = tex_desc.size.x; - desc.image_height = tex_desc.size.y; - desc.image_depth = 0; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - desc.buffer = nullptr; - - cl_image_format format; - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = - tex_desc.element_type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT; - - cl_mem_flags flags = CL_MEM_READ_WRITE; - if (!tex_desc.data.empty()) { - flags |= CL_MEM_COPY_HOST_PTR; - } - - cl_int error_code; - width_ = tex_desc.size.x; - height_ = tex_desc.size.y; - channel_type_ = format.image_channel_data_type; - if (tex_desc.data.empty()) { - texture_ = CreateImage2DLegacy(context->context(), flags, &format, &desc, - nullptr, &error_code); - } else { - texture_ = CreateImage2DLegacy( - context->context(), flags, &format, &desc, - const_cast<unsigned char*>(tex_desc.data.data()), &error_code); - } - if (error_code != CL_SUCCESS) { - return absl::UnknownError( - absl::StrCat("Failed to create 2D texture (clCreateImage): ", - CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); + const Texture2DDescriptor& desc, CLContext* context) { + width_ = desc.size.x; + height_ = desc.size.y; + channel_type_ = + desc.element_type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT; + uint8_t* data_ptr = desc.data.empty() + ? nullptr + : const_cast<unsigned char*>(desc.data.data()); + return CreateFloatRGBAImage2D(context->context(), desc.size.x, desc.size.y, + desc.element_type, data_ptr, &texture_); } // Creates new 4-channel 2D texture with f32 elements absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext* context, Texture2D* result) { - return CreateTexture2D(width, height, CL_FLOAT, nullptr, context, result); + return CreateTexture2D(width, height, DataType::FLOAT32, nullptr, context, + result); } // Creates new 4-channel 2D texture with f16 elements absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext* context, Texture2D* result) { - return CreateTexture2D(width, height, CL_HALF_FLOAT, nullptr, context, + return CreateTexture2D(width, height, DataType::FLOAT16, nullptr, context, result); } absl::Status CreateTexture2DRGBA(DataType type, int width, int height, CLContext* context, Texture2D* result) { - if (type == DataType::FLOAT32) { - return CreateTexture2D(width, height, CL_FLOAT, nullptr, context, result); - } else { - return CreateTexture2D(width, height, CL_HALF_FLOAT, nullptr, context, - result); - } + return CreateTexture2D(width, height, type, nullptr, context, result); } absl::Status CreateTexture2DRGBA(DataType type, int width, int height, void* data, CLContext* context, Texture2D* result) { - if (type == DataType::FLOAT32) { - return CreateTexture2D(width, height, CL_FLOAT, data, context, result); - } else { - return CreateTexture2D(width, height, CL_HALF_FLOAT, data, context, result); - } + return CreateTexture2D(width, height, type, data, context, result); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/util.cc b/tensorflow/lite/delegates/gpu/cl/util.cc index ac996d8ffa6..199e0129968 100644 --- a/tensorflow/lite/delegates/gpu/cl/util.cc +++ b/tensorflow/lite/delegates/gpu/cl/util.cc @@ -168,6 +168,56 @@ int ChannelTypeToSizeInBytes(cl_channel_type type) { bool OpenCLSupported() { return LoadOpenCL().ok(); } +absl::Status CreateCLBuffer(cl_context context, int size_in_bytes, + bool read_only, void* data, cl_mem* result) { + cl_mem_flags flags = read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE; + if (data) { + flags |= CL_MEM_COPY_HOST_PTR; + } + cl_int error_code; + *result = clCreateBuffer(context, flags, size_in_bytes, data, &error_code); + if (!*result) { + return absl::UnknownError( + absl::StrCat("Failed to allocate device memory (clCreateBuffer): ", + CLErrorCodeToString(error_code))); + } + return absl::OkStatus(); +} + +absl::Status CreateFloatRGBAImage2D(cl_context context, int width, int height, + DataType type, void* data, cl_mem* result) { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + desc.image_depth = 0; + desc.image_row_pitch = 0; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + desc.buffer = nullptr; + + cl_image_format format; + format.image_channel_order = CL_RGBA; + format.image_channel_data_type = + type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT; + + cl_mem_flags flags = CL_MEM_READ_WRITE; + if (data) { + flags |= CL_MEM_COPY_HOST_PTR; + } + + cl_int error_code; + *result = + CreateImage2DLegacy(context, flags, &format, &desc, data, &error_code); + if (error_code != CL_SUCCESS) { + return absl::UnknownError( + absl::StrCat("Failed to create 2D texture (clCreateImage): ", + CLErrorCodeToString(error_code))); + } + return absl::OkStatus(); +} + } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/util.h b/tensorflow/lite/delegates/gpu/cl/util.h index 9435bb3a8a2..8e22c017fe7 100644 --- a/tensorflow/lite/delegates/gpu/cl/util.h +++ b/tensorflow/lite/delegates/gpu/cl/util.h @@ -49,6 +49,12 @@ void CopyLinearFLT4(const tflite::gpu::Tensor<Linear, S>& src, } } +absl::Status CreateCLBuffer(cl_context context, int size_in_bytes, + bool read_only, void* data, cl_mem* result); + +absl::Status CreateFloatRGBAImage2D(cl_context context, int width, int height, + DataType type, void* data, cl_mem* result); + } // namespace cl } // namespace gpu } // namespace tflite