From c1a32fd496228b8dd021d719ebce27b9b4b791e5 Mon Sep 17 00:00:00 2001 From: Raman Sarokin <sorokin@google.com> Date: Tue, 18 Aug 2020 13:40:57 -0700 Subject: [PATCH] Added CPU representation for Tensor. PiperOrigin-RevId: 327297658 Change-Id: Iff651c9c21df506cf6a968d8c5000707d9bcf4cf --- .../delegates/gpu/cl/kernels/elementwise.cc | 22 +- tensorflow/lite/delegates/gpu/cl/tensor.cc | 417 +++++++++--------- tensorflow/lite/delegates/gpu/cl/tensor.h | 34 +- .../lite/delegates/gpu/cl/tensor_type.cc | 147 ++++++ .../lite/delegates/gpu/cl/tensor_type.h | 26 ++ 5 files changed, 380 insertions(+), 266 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc index edd6dee7fc0..d433006ac4b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc @@ -170,17 +170,12 @@ absl::Status CreateElementwiseTwoInput( creation_context.device->info_, shape, definition.GetPrimaryStorageType(), definition.GetDataType(), Layout::HWC); TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC}; - Tensor gpu_tensor; - RETURN_IF_ERROR( - CreateTensor(*creation_context.context, shape, desc, &gpu_tensor)); - RETURN_IF_ERROR( - gpu_tensor.WriteData(creation_context.queue, constant_tensor)); + desc.UploadData(constant_tensor); *result = GPUOperation(definition); result->elementwise_ = true; - result->args_.AddObject("second_tensor", AccessType::READ, - absl::make_unique<Tensor>(std::move(gpu_tensor)), - absl::make_unique<TensorDescriptor>(desc)); + result->args_.AddObject("second_tensor", + absl::make_unique<TensorDescriptor>(std::move(desc))); const std::string s_coord = shape.c == 1 ? "0" : "S_COORD"; result->code_ = absl::StrCat( "FLT4 second_val = args.second_tensor.Read(0, 0, ", s_coord, ");\n"); @@ -207,17 +202,12 @@ absl::Status CreateElementwiseTwoInput( creation_context.device->info_, shape, definition.GetPrimaryStorageType(), definition.GetDataType(), Layout::HWC); TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC}; - Tensor gpu_tensor; - RETURN_IF_ERROR( - CreateTensor(*creation_context.context, shape, desc, &gpu_tensor)); - RETURN_IF_ERROR( - gpu_tensor.WriteData(creation_context.queue, constant_tensor)); + desc.UploadData(constant_tensor); *result = GPUOperation(definition); result->elementwise_ = true; - result->args_.AddObject("second_tensor", AccessType::READ, - absl::make_unique<Tensor>(std::move(gpu_tensor)), - absl::make_unique<TensorDescriptor>(desc)); + result->args_.AddObject("second_tensor", + absl::make_unique<TensorDescriptor>(std::move(desc))); const std::string x_coord = shape.w == 1 ? "0" : "X_COORD"; const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD"; const std::string s_coord = shape.c == 1 ? "0" : "S_COORD"; diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.cc b/tensorflow/lite/delegates/gpu/cl/tensor.cc index 9fd9778a17f..72c53c5b1ac 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor.cc +++ b/tensorflow/lite/delegates/gpu/cl/tensor.cc @@ -28,6 +28,164 @@ namespace tflite { namespace gpu { namespace cl { namespace { +absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape, + const TensorDescriptor& descriptor, + const void* data_ptr, CLMemory* result) { + const int slices = DivideRoundUp(shape.c, 4); + cl_mem_flags mem_flags = CL_MEM_READ_WRITE; + if (data_ptr) { + mem_flags |= CL_MEM_COPY_HOST_PTR; + } + switch (descriptor.storage_type) { + case TensorStorageType::BUFFER: + case TensorStorageType::IMAGE_BUFFER: { + const size_t data_size = shape.b * shape.w * shape.h * shape.d * slices * + 4 * SizeOf(descriptor.data_type); + cl_int error_code; + cl_mem memory = clCreateBuffer(context.context(), mem_flags, data_size, + const_cast<void*>(data_ptr), &error_code); + if (!memory) { + return absl::UnknownError( + absl::StrCat("Failed to allocate device memory (clCreateBuffer): ", + CLErrorCodeToString(error_code))); + } + *result = CLMemory(memory, true); + return absl::OkStatus(); + } + case TensorStorageType::TEXTURE_2D: { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = shape.w * shape.b * shape.d; + desc.image_height = shape.h * slices; + desc.image_depth = 0; + desc.image_row_pitch = 0; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + desc.buffer = nullptr; + + cl_image_format format; + format.image_channel_order = CL_RGBA; + format.image_channel_data_type = ToImageChannelType(descriptor.data_type); + + cl_int error_code; + cl_mem memory = + CreateImage2DLegacy(context.context(), mem_flags, &format, &desc, + const_cast<void*>(data_ptr), &error_code); + if (error_code != CL_SUCCESS) { + return absl::UnknownError( + absl::StrCat("Failed to create 2D texture (clCreateImage): ", + CLErrorCodeToString(error_code))); + } + + *result = CLMemory(memory, true); + return absl::OkStatus(); + } + case TensorStorageType::TEXTURE_3D: { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE3D; + desc.image_width = shape.w * shape.b; + desc.image_height = shape.h; + desc.image_depth = slices * shape.d; + desc.image_row_pitch = 0; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + desc.buffer = nullptr; + + cl_image_format format; + format.image_channel_order = CL_RGBA; + format.image_channel_data_type = ToImageChannelType(descriptor.data_type); + + cl_int error_code; + cl_mem memory = + CreateImage3DLegacy(context.context(), mem_flags, &format, &desc, + const_cast<void*>(data_ptr), &error_code); + if (error_code != CL_SUCCESS) { + return absl::UnknownError( + absl::StrCat("Failed to create 3D texture (clCreateImage): ", + CLErrorCodeToString(error_code))); + } + + *result = CLMemory(memory, true); + return absl::OkStatus(); + } + case TensorStorageType::TEXTURE_ARRAY: { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; + desc.image_width = shape.w * shape.b; + desc.image_height = shape.h; + desc.image_depth = 0; + desc.image_array_size = slices * shape.d; + desc.image_row_pitch = 0; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + desc.buffer = nullptr; + + cl_image_format format; + format.image_channel_order = CL_RGBA; + format.image_channel_data_type = ToImageChannelType(descriptor.data_type); + + cl_int error_code; + cl_mem memory = + clCreateImage(context.context(), mem_flags, &format, &desc, + const_cast<void*>(data_ptr), &error_code); + if (error_code != CL_SUCCESS) { + return absl::UnknownError( + absl::StrCat("Failed to create 2D texture array (clCreateImage): ", + CLErrorCodeToString(error_code))); + } + + *result = CLMemory(memory, true); + return absl::OkStatus(); + } + + case TensorStorageType::SINGLE_TEXTURE_2D: { + if (slices != 1) { + return absl::InvalidArgumentError(absl::StrCat( + "SINGLE_TEXTURE_2D support only channels in range [1-4], but ", + shape.c, "was provided")); + } + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = shape.w * shape.b * shape.d; + desc.image_height = shape.h; + desc.image_depth = 0; + desc.image_row_pitch = 0; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + desc.buffer = nullptr; + + cl_image_format format; + if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type)) { + format.image_channel_order = ToChannelOrder(shape.c); + format.image_channel_data_type = + ToImageChannelType(descriptor.data_type); + } else { + return absl::InvalidArgumentError(absl::StrCat( + "This device doesn't support ", shape.c, "-channel textures.")); + } + + cl_int error_code; + cl_mem memory = + CreateImage2DLegacy(context.context(), mem_flags, &format, &desc, + const_cast<void*>(data_ptr), &error_code); + if (error_code != CL_SUCCESS) { + return absl::UnknownError( + absl::StrCat("Failed to create single 2D texture (clCreateImage): ", + CLErrorCodeToString(error_code))); + } + + *result = CLMemory(memory, true); + return absl::OkStatus(); + } + + default: + return absl::InternalError("Unsupported tensor storage type"); + } +} absl::Status CreateImageBufferFromBuffer(const CLContext& context, cl_mem memory, DataType data_type, @@ -59,7 +217,8 @@ absl::Status CreateTensor(const CLContext& context, const BHWDC& shape, const bool memory_owner = memory == nullptr; if (memory_owner) { CLMemory mem; - RETURN_IF_ERROR(AllocateTensorMemory(context, shape, descriptor, &mem)); + RETURN_IF_ERROR( + AllocateTensorMemory(context, shape, descriptor, nullptr, &mem)); memory = mem.Release(); } if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) { @@ -94,6 +253,14 @@ absl::Status CreateTensorShared(const CLContext& context, const BHWDC& shape, } // namespace +absl::Status TensorDescriptor::CreateGPUObject(CLContext* context, + GPUObjectPtr* result) const { + Tensor gpu_tensor; + RETURN_IF_ERROR(gpu_tensor.CreateFromDescriptor(*this, context)); + *result = absl::make_unique<Tensor>(std::move(gpu_tensor)); + return absl::OkStatus(); +} + Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWC& shape, const TensorDescriptor& descriptor) : memory_(memory), @@ -279,12 +446,6 @@ absl::Status Tensor::IsValid(const BHWDC& shape) const { return absl::OkStatus(); } -int Tensor::GetChannelsAlignment() const { - return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D - ? shape_.c - : 4; -} - int Tensor::GetAlignedChannels() const { return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape_.c @@ -329,11 +490,13 @@ absl::Status Tensor::WriteDataBHWDC(absl::Span<const float> in, if (descriptor_.data_type == DataType::FLOAT32) { data_f.resize(elements_count); data_ptr = data_f.data(); - DataFromBHWDC(in, absl::MakeSpan(data_f.data(), data_f.size())); + DataFromBHWDC(in, shape_, descriptor_, + absl::MakeSpan(data_f.data(), data_f.size())); } else { data_h.resize(elements_count); data_ptr = data_h.data(); - DataFromBHWDC(in, absl::MakeSpan(data_h.data(), data_h.size())); + DataFromBHWDC(in, shape_, descriptor_, + absl::MakeSpan(data_h.data(), data_h.size())); } switch (descriptor_.storage_type) { @@ -413,9 +576,11 @@ absl::Status Tensor::ReadDataBHWDC(absl::Span<float> out, } if (descriptor_.data_type == DataType::FLOAT32) { - DataToBHWDC(absl::MakeConstSpan(data_f.data(), data_f.size()), out); + DataToBHWDC(absl::MakeConstSpan(data_f.data(), data_f.size()), shape_, + descriptor_, out); } else { - DataToBHWDC(absl::MakeConstSpan(data_h.data(), data_h.size()), out); + DataToBHWDC(absl::MakeConstSpan(data_h.data(), data_h.size()), shape_, + descriptor_, out); } return absl::OkStatus(); @@ -432,6 +597,26 @@ absl::Status Tensor::ReadData(CLCommandQueue* queue, return ReadDataBHWDC(absl::MakeSpan(dst->data), queue); } +absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc, + CLContext* context) { + shape_ = desc.shape; + descriptor_.data_type = desc.data_type; + descriptor_.storage_type = desc.storage_type; + descriptor_.layout = desc.layout; + memory_owner_ = true; + CLMemory memory; + RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_, + desc.data.data(), &memory)); + memory_ = memory.Release(); + if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) { + RETURN_IF_ERROR(CreateImageBufferFromBuffer( + *context, memory_, desc.data_type, + shape_.b * shape_.w * shape_.h * shape_.d * DivideRoundUp(shape_.c, 4), + &image_buffer_memory_)); + } + return absl::OkStatus(); +} + absl::Status CreateTensor(const CLContext& context, const BHWC& shape, const TensorDescriptor& descriptor, Tensor* result) { const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c); @@ -462,221 +647,15 @@ absl::Status AllocateTensorMemory(const CLContext& context, const BHWC& shape, const TensorDescriptor& descriptor, CLMemory* result) { const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c); - return AllocateTensorMemory(context, shape5D, descriptor, result); + return AllocateTensorMemory(context, shape5D, descriptor, nullptr, result); } absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape, const TensorDescriptor& descriptor, CLMemory* result) { - const int slices = DivideRoundUp(shape.c, 4); - switch (descriptor.storage_type) { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: { - const size_t data_size = shape.b * shape.w * shape.h * shape.d * slices * - 4 * SizeOf(descriptor.data_type); - cl_int error_code; - cl_mem memory = clCreateBuffer(context.context(), CL_MEM_READ_WRITE, - data_size, nullptr, &error_code); - if (!memory) { - return absl::UnknownError( - absl::StrCat("Failed to allocate device memory (clCreateBuffer): ", - CLErrorCodeToString(error_code))); - } - *result = CLMemory(memory, true); - return absl::OkStatus(); - } - case TensorStorageType::TEXTURE_2D: { - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = shape.w * shape.b * shape.d; - desc.image_height = shape.h * slices; - desc.image_depth = 0; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - desc.buffer = nullptr; - - cl_image_format format; - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = ToImageChannelType(descriptor.data_type); - - cl_int error_code; - cl_mem memory = CreateImage2DLegacy(context.context(), CL_MEM_READ_WRITE, - &format, &desc, nullptr, &error_code); - if (error_code != CL_SUCCESS) { - return absl::UnknownError( - absl::StrCat("Failed to create 2D texture (clCreateImage): ", - CLErrorCodeToString(error_code))); - } - - *result = CLMemory(memory, true); - return absl::OkStatus(); - } - case TensorStorageType::TEXTURE_3D: { - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE3D; - desc.image_width = shape.w * shape.b; - desc.image_height = shape.h; - desc.image_depth = slices * shape.d; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - desc.buffer = nullptr; - - cl_image_format format; - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = ToImageChannelType(descriptor.data_type); - - cl_int error_code; - cl_mem memory = CreateImage3DLegacy(context.context(), CL_MEM_READ_WRITE, - &format, &desc, nullptr, &error_code); - if (error_code != CL_SUCCESS) { - return absl::UnknownError( - absl::StrCat("Failed to create 3D texture (clCreateImage): ", - CLErrorCodeToString(error_code))); - } - - *result = CLMemory(memory, true); - return absl::OkStatus(); - } - case TensorStorageType::TEXTURE_ARRAY: { - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; - desc.image_width = shape.w * shape.b; - desc.image_height = shape.h; - desc.image_depth = 0; - desc.image_array_size = slices * shape.d; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - desc.buffer = nullptr; - - cl_image_format format; - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = ToImageChannelType(descriptor.data_type); - - cl_int error_code; - cl_mem memory = clCreateImage(context.context(), CL_MEM_READ_WRITE, - &format, &desc, nullptr, &error_code); - if (error_code != CL_SUCCESS) { - return absl::UnknownError( - absl::StrCat("Failed to create 2D texture array (clCreateImage): ", - CLErrorCodeToString(error_code))); - } - - *result = CLMemory(memory, true); - return absl::OkStatus(); - } - - case TensorStorageType::SINGLE_TEXTURE_2D: { - if (slices != 1) { - return absl::InvalidArgumentError(absl::StrCat( - "SINGLE_TEXTURE_2D support only channels in range [1-4], but ", - shape.c, "was provided")); - } - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = shape.w * shape.b * shape.d; - desc.image_height = shape.h; - desc.image_depth = 0; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - desc.buffer = nullptr; - - cl_image_format format; - if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type)) { - format.image_channel_order = ToChannelOrder(shape.c); - format.image_channel_data_type = - ToImageChannelType(descriptor.data_type); - } else { - return absl::InvalidArgumentError(absl::StrCat( - "This device doesn't support ", shape.c, "-channel textures.")); - } - - cl_int error_code; - cl_mem memory = CreateImage2DLegacy(context.context(), CL_MEM_READ_WRITE, - &format, &desc, nullptr, &error_code); - if (error_code != CL_SUCCESS) { - return absl::UnknownError( - absl::StrCat("Failed to create 2D texture (clCreateImage): ", - CLErrorCodeToString(error_code))); - } - - *result = CLMemory(memory, true); - return absl::OkStatus(); - } - - default: - return absl::InternalError("Unsupported tensor storage type"); - } + return AllocateTensorMemory(context, shape, descriptor, nullptr, result); } -template <typename T> -void Tensor::DataFromBHWDC(absl::Span<const float> src, - absl::Span<T> dst) const { - const int channels_batch = GetChannelsAlignment(); - for (int b = 0; b < shape_.b; ++b) { - for (int s = 0; s < Slices(); ++s) { - for (int y = 0; y < shape_.h; ++y) { - for (int x = 0; x < shape_.w; ++x) { - for (int d = 0; d < shape_.d; ++d) { - for (int c = 0; c < channels_batch; ++c) { - float value; - if (s * 4 + c < shape_.c) { - const int cpu_index = - shape_.LinearIndex({b, y, x, d, s * 4 + c}); - value = src[cpu_index]; - } else { - value = 0.0f; - } - const int gpu_index = GetLinearIndex(b, x, y, d, s, c); - dst[gpu_index] = value; - } - } - } - } - } - } -} - -template void Tensor::DataFromBHWDC<float>(absl::Span<const float> src, - absl::Span<float> dst) const; -template void Tensor::DataFromBHWDC<half>(absl::Span<const float> src, - absl::Span<half> dst) const; - -template <typename T> -void Tensor::DataToBHWDC(absl::Span<const T> src, absl::Span<float> dst) const { - const int channels_batch = GetChannelsAlignment(); - for (int b = 0; b < shape_.b; ++b) { - for (int s = 0; s < Slices(); ++s) { - for (int y = 0; y < shape_.h; ++y) { - for (int x = 0; x < shape_.w; ++x) { - for (int d = 0; d < shape_.d; ++d) { - for (int c = 0; c < channels_batch; ++c) { - if (s * 4 + c >= shape_.c) { - continue; - } - const int cpu_index = shape_.LinearIndex({b, y, x, d, s * 4 + c}); - const int gpu_index = GetLinearIndex(b, x, y, d, s, c); - dst[cpu_index] = src[gpu_index]; - } - } - } - } - } - } -} - -template void Tensor::DataToBHWDC<float>(absl::Span<const float> src, - absl::Span<float> dst) const; -template void Tensor::DataToBHWDC<half>(absl::Span<const half> src, - absl::Span<float> dst) const; - } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.h b/tensorflow/lite/delegates/gpu/cl/tensor.h index 1e02c77fd13..c6056dbbbec 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor.h +++ b/tensorflow/lite/delegates/gpu/cl/tensor.h @@ -92,6 +92,9 @@ class Tensor : public GPUObject { absl::Status ReadData(CLCommandQueue* queue, TensorFloat32* dst) const; absl::Status ReadData(CLCommandQueue* queue, Tensor5DFloat32* dst) const; + absl::Status CreateFromDescriptor(const TensorDescriptor& desc, + CLContext* context); + private: absl::Status IsValid(const BHWC& shape) const; absl::Status IsValid(const BHWDC& shape) const; @@ -104,37 +107,6 @@ class Tensor : public GPUObject { absl::Status ReadDataBHWDC(absl::Span<float> out, CLCommandQueue* queue) const; - template <typename T> - void DataFromBHWDC(absl::Span<const float> src, absl::Span<T> dst) const; - template <typename T> - void DataToBHWDC(absl::Span<const T> src, absl::Span<float> dst) const; - - // TODO(sorokin) might be bad performance - int GetLinearIndex(int b, int x, int y, int d, int s, int sub_c) const { - switch (descriptor_.storage_type) { - case TensorStorageType::BUFFER: - case TensorStorageType::IMAGE_BUFFER: - case TensorStorageType::TEXTURE_ARRAY: - case TensorStorageType::TEXTURE_3D: - return ((((d * Slices() + s) * shape_.h + y) * shape_.w + x) * - shape_.b + - b) * - 4 + - sub_c; // DSHWBC4 - case TensorStorageType::TEXTURE_2D: - return ((((y * Slices() + s) * shape_.w + x) * shape_.b + b) * - shape_.d + - d) * - 4 + - sub_c; // HSWBDC4 - case TensorStorageType::SINGLE_TEXTURE_2D: - return (((y * shape_.w + x) * shape_.b + b) * shape_.d + d) * shape_.c + - sub_c; // HWBDC - case TensorStorageType::UNKNOWN: - return -1; - } - } - int3 GetFullTensorRegion() const; void Release(); diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc index e19de02d59d..7bd5de6e31e 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc +++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc @@ -73,6 +73,25 @@ std::string ToString(TensorStorageType type) { } } +TensorDescriptor::TensorDescriptor(TensorDescriptor&& desc) + : GPUObjectDescriptor(std::move(desc)), + data_type(desc.data_type), + storage_type(desc.storage_type), + layout(desc.layout), + shape(desc.shape), + data(std::move(desc.data)) {} +TensorDescriptor& TensorDescriptor::operator=(TensorDescriptor&& desc) { + if (this != &desc) { + std::swap(data_type, desc.data_type); + std::swap(storage_type, desc.storage_type); + std::swap(layout, desc.layout); + std::swap(shape, desc.shape); + data = std::move(desc.data); + GPUObjectDescriptor::operator=(std::move(desc)); + } + return *this; +} + GPUResources TensorDescriptor::GetGPUResources() const { GPUResources resources; if (HasAxis(Axis::WIDTH)) { @@ -725,6 +744,134 @@ TextureAddressMode TensorDescriptor::ModeFromState() const { } } +void TensorDescriptor::UploadData( + const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src) { + shape = BHWDC(1, src.shape.h, src.shape.w, 1, src.shape.c); + UploadData(absl::MakeConstSpan(src.data)); +} + +void TensorDescriptor::UploadData( + const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src) { + shape = BHWDC(1, 1, 1, 1, src.shape.v); + UploadData(absl::MakeConstSpan(src.data)); +} + +void TensorDescriptor::UploadData(absl::Span<const float> src) { + int aligned_channels = storage_type == TensorStorageType::SINGLE_TEXTURE_2D + ? shape.c + : AlignByN(shape.c, 4); + int elements_count = shape.b * shape.w * shape.h * shape.d * aligned_channels; + data.resize(elements_count * SizeOf(data_type)); + if (data_type == DataType::FLOAT32) { + float* gpu_data = reinterpret_cast<float*>(data.data()); + DataFromBHWDC(src, shape, *this, absl::MakeSpan(gpu_data, elements_count)); + } else { + half* gpu_data = reinterpret_cast<half*>(data.data()); + DataFromBHWDC(src, shape, *this, absl::MakeSpan(gpu_data, elements_count)); + } +} + +namespace { +int GetLinearIndex(const TensorDescriptor& desc, const BHWDC& shape, int b, + int x, int y, int d, int s, int sub_c) { + const int slices = DivideRoundUp(shape.c, 4); + switch (desc.storage_type) { + case TensorStorageType::BUFFER: + case TensorStorageType::IMAGE_BUFFER: + case TensorStorageType::TEXTURE_ARRAY: + case TensorStorageType::TEXTURE_3D: + return ((((d * slices + s) * shape.h + y) * shape.w + x) * shape.b + b) * + 4 + + sub_c; // DSHWBC4 + case TensorStorageType::TEXTURE_2D: + return ((((y * slices + s) * shape.w + x) * shape.b + b) * shape.d + d) * + 4 + + sub_c; // HSWBDC4 + case TensorStorageType::SINGLE_TEXTURE_2D: + return (((y * shape.w + x) * shape.b + b) * shape.d + d) * shape.c + + sub_c; // HWBDC + case TensorStorageType::UNKNOWN: + return -1; + } +} + +int GetChannelsAlignment(const TensorDescriptor& desc, const BHWDC& shape) { + return desc.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c + : 4; +} +} // namespace + +template <typename T> +void DataFromBHWDC(absl::Span<const float> src, const BHWDC& shape, + const TensorDescriptor& desc, absl::Span<T> dst) { + const int channels_alignment = GetChannelsAlignment(desc, shape); + const int slices = DivideRoundUp(shape.c, 4); + for (int b = 0; b < shape.b; ++b) { + for (int s = 0; s < slices; ++s) { + for (int y = 0; y < shape.h; ++y) { + for (int x = 0; x < shape.w; ++x) { + for (int d = 0; d < shape.d; ++d) { + for (int c = 0; c < channels_alignment; ++c) { + float value; + if (s * 4 + c < shape.c) { + const int cpu_index = + shape.LinearIndex({b, y, x, d, s * 4 + c}); + value = src[cpu_index]; + } else { + value = 0.0f; + } + int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c); + dst[gpu_index] = value; + } + } + } + } + } + } +} + +template void DataFromBHWDC<float>(absl::Span<const float> src, + const BHWDC& shape, + const TensorDescriptor& desc, + absl::Span<float> dst); +template void DataFromBHWDC<half>(absl::Span<const float> src, + const BHWDC& shape, + const TensorDescriptor& desc, + absl::Span<half> dst); + +template <typename T> +void DataToBHWDC(absl::Span<const T> src, const BHWDC& shape, + const TensorDescriptor& desc, absl::Span<float> dst) { + const int channels_alignment = GetChannelsAlignment(desc, shape); + const int slices = DivideRoundUp(shape.c, 4); + for (int b = 0; b < shape.b; ++b) { + for (int s = 0; s < slices; ++s) { + for (int y = 0; y < shape.h; ++y) { + for (int x = 0; x < shape.w; ++x) { + for (int d = 0; d < shape.d; ++d) { + for (int c = 0; c < channels_alignment; ++c) { + if (s * 4 + c >= shape.c) { + continue; + } + int cpu_index = shape.LinearIndex({b, y, x, d, s * 4 + c}); + int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c); + dst[cpu_index] = src[gpu_index]; + } + } + } + } + } + } +} + +template void DataToBHWDC<float>(absl::Span<const float> src, + const BHWDC& shape, + const TensorDescriptor& desc, + absl::Span<float> dst); +template void DataToBHWDC<half>(absl::Span<const half> src, const BHWDC& shape, + const TensorDescriptor& desc, + absl::Span<float> dst); + } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_type.h b/tensorflow/lite/delegates/gpu/cl/tensor_type.h index 73b15ca322d..094e3905966 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor_type.h +++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.h @@ -49,6 +49,11 @@ struct TensorDescriptor : public GPUObjectDescriptor { TensorDescriptor(DataType dt, TensorStorageType st, Layout l) : data_type(dt), storage_type(st), layout(l) {} + TensorDescriptor(const TensorDescriptor&) = default; + TensorDescriptor& operator=(const TensorDescriptor&) = default; + TensorDescriptor(TensorDescriptor&& desc); + TensorDescriptor& operator=(TensorDescriptor&& desc); + bool operator==(const TensorDescriptor& d) const { return data_type == d.data_type && storage_type == d.storage_type && layout == d.layout; @@ -63,6 +68,10 @@ struct TensorDescriptor : public GPUObjectDescriptor { GPUResources GetGPUResources() const override; + absl::Status CreateGPUObject(CLContext* context, + GPUObjectPtr* result) const override; + void Release() override { data.clear(); } + bool HasAxis(Axis axis) const; void SetTextureAddressMode(TextureAddressMode mode); @@ -70,6 +79,9 @@ struct TensorDescriptor : public GPUObjectDescriptor { const std::vector<std::string>& args, std::string* value_name, std::string* x_coord, std::string* y_coord, std::string* s_coord) const; + void UploadData(const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src); + void UploadData(const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src); + DataType data_type = DataType::UNKNOWN; TensorStorageType storage_type = TensorStorageType::UNKNOWN; // This field describes logical layout, actual(physical) GPU layout can be @@ -77,6 +89,10 @@ struct TensorDescriptor : public GPUObjectDescriptor { Layout layout = Layout::UNKNOWN; // Supported layouts is HWC, BHWC, HWDC, BHWDC + // optional + BHWDC shape; + std::vector<uint8_t> data; + private: absl::Status PerformReadSelector( const std::vector<std::string>& args, @@ -145,8 +161,18 @@ struct TensorDescriptor : public GPUObjectDescriptor { bool ParseCoordsFromArgs(const std::vector<std::string>& args, int offset, std::string* xc, std::string* yc, std::string* zc, std::string* sc, std::string* bc) const; + + void UploadData(absl::Span<const float> src); }; +template <typename T> +void DataFromBHWDC(absl::Span<const float> src, const BHWDC& shape, + const TensorDescriptor& desc, absl::Span<T> dst); + +template <typename T> +void DataToBHWDC(absl::Span<const T> src, const BHWDC& shape, + const TensorDescriptor& desc, absl::Span<float> dst); + std::string ToString(TensorStorageType type); } // namespace cl