Added CPU representation for LinearStorage.

PiperOrigin-RevId: 327265279
Change-Id: I53a90f385a618ad3ebae6c651b14401018019204
This commit is contained in:
Raman Sarokin 2020-08-18 11:06:52 -07:00 committed by TensorFlower Gardener
parent 77cf50d67b
commit 6702ae97ff
19 changed files with 258 additions and 295 deletions

View File

@ -400,11 +400,9 @@ cc_library(
srcs = ["linear_storage.cc"],
hdrs = ["linear_storage.h"],
deps = [
":buffer",
":gpu_object",
":opencl_wrapper",
":tensor_type",
":texture2d",
":util",
"//tensorflow/lite/delegates/gpu/common:data_type",
"//tensorflow/lite/delegates/gpu/common:status",

View File

@ -28,19 +28,10 @@ namespace {
absl::Status CreateBuffer(size_t size_in_bytes, bool gpu_read_only,
const void* data, CLContext* context,
Buffer* result) {
cl_mem_flags flags = gpu_read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
if (data != nullptr) {
flags |= CL_MEM_COPY_HOST_PTR;
}
cl_int error_code;
cl_mem buffer = clCreateBuffer(context->context(), flags, size_in_bytes,
const_cast<void*>(data), &error_code);
if (!buffer) {
return absl::UnknownError(
absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
CLErrorCodeToString(error_code)));
}
cl_mem buffer;
RETURN_IF_ERROR(CreateCLBuffer(context->context(), size_in_bytes,
gpu_read_only, const_cast<void*>(data),
&buffer));
*result = Buffer(buffer, size_in_bytes);
return absl::OkStatus();
@ -185,28 +176,13 @@ absl::Status Buffer::GetGPUResources(const GPUObjectDescriptor* obj_ptr,
absl::Status Buffer::CreateFromBufferDescriptor(const BufferDescriptor& desc,
CLContext* context) {
cl_mem_flags flags = desc.memory_type == MemoryType::CONSTANT
? CL_MEM_READ_ONLY
: CL_MEM_READ_WRITE;
if (!desc.data.empty()) {
flags |= CL_MEM_COPY_HOST_PTR;
}
cl_int error_code;
bool read_only = desc.memory_type == MemoryType::CONSTANT;
uint8_t* data_ptr = desc.data.empty()
? nullptr
: const_cast<unsigned char*>(desc.data.data());
size_ = desc.size;
if (desc.data.empty()) {
buffer_ = clCreateBuffer(context->context(), flags, desc.size, nullptr,
&error_code);
} else {
buffer_ = clCreateBuffer(context->context(), flags, desc.size,
const_cast<unsigned char*>(desc.data.data()),
&error_code);
}
if (!buffer_) {
return absl::UnknownError(
absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
return CreateCLBuffer(context->context(), desc.size, read_only, data_ptr,
&buffer_);
}
absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext* context,

View File

@ -127,12 +127,9 @@ absl::Status Conv3D::UploadData(const tflite::gpu::Tensor<OHWDI, T>& weights,
? LinearStorageType::BUFFER
: LinearStorageType::TEXTURE_2D;
desc.element_type = definition_.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(CreateLinearStorage(desc, biases, context, &lt));
args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(biases);
args_.AddObject("biases",
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}

View File

@ -178,16 +178,10 @@ absl::Status ConvBuffer1x1::UploadBiases(
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::BUFFER;
desc.element_type = definition_.GetDataType();
tflite::gpu::Tensor<Linear, DataType::FLOAT32> bias = biases;
int channels = AlignByN(biases.shape.v, 4 * conv_params_.block_size.z);
bias.shape = Linear(channels);
bias.data.resize(channels, 0.0f);
LinearStorage lt;
RETURN_IF_ERROR(CreateLinearStorage(desc, bias, context, &lt));
args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
int depth = AlignByN(biases.shape.v, 4 * conv_params_.block_size.z) / 4;
desc.UploadLinearData(biases, depth);
args_.AddObject("biases",
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}

View File

@ -291,13 +291,9 @@ absl::Status CreateConvConstants(const CreationContext& creation_context,
desc.storage_type = LinearStorageType::BUFFER;
desc.element_type = definition.GetDataType();
desc.memory_type = MemoryType::CONSTANT;
LinearStorage lt;
RETURN_IF_ERROR(
CreateLinearStorage(desc, attr.bias, creation_context.context, &lt));
result->args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}

View File

@ -121,12 +121,9 @@ absl::Status ConvTexture::UploadData(
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition_.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(CreateLinearStorage(desc, biases, context, &lt));
args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(biases);
args_.AddObject("biases",
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}
@ -144,12 +141,9 @@ absl::Status ConvTexture::UploadDataForWinograd4x4To6x6(
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition_.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(CreateLinearStorage(desc, bias, context, &lt));
args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(bias);
args_.AddObject("biases",
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}

View File

@ -371,13 +371,9 @@ absl::Status CreateConvolutionTransposed(
desc.storage_type =
DeduceLinearStorageType(definition.GetPrimaryStorageType());
desc.element_type = definition.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(
CreateLinearStorage(desc, attr.bias, creation_context.context, &lt));
result->args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}

View File

@ -414,13 +414,9 @@ absl::Status CreateConvolutionTransposed3D(
desc.storage_type =
DeduceLinearStorageType(definition.GetPrimaryStorageType());
desc.element_type = definition.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(
CreateLinearStorage(desc, attr.bias, creation_context.context, &lt));
result->args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}

View File

@ -353,13 +353,9 @@ absl::Status CreateConvolutionTransposed3x3(
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(
CreateLinearStorage(desc, attr.bias, creation_context.context, &lt));
result->args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}

View File

@ -330,13 +330,9 @@ absl::Status CreateConvolutionTransposed4x4(
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(
CreateLinearStorage(desc, attr.bias, creation_context.context, &lt));
result->args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}

View File

@ -319,13 +319,9 @@ absl::Status CreateDepthwiseConvolution(
desc.storage_type = weights_are_buffer ? LinearStorageType::BUFFER
: LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(
CreateLinearStorage(desc, attr.bias, creation_context.context, &lt));
result->args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}
@ -342,13 +338,9 @@ absl::Status CreateDepthwiseConvolution(
desc.storage_type = weights_are_buffer ? LinearStorageType::BUFFER
: LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(
CreateLinearStorage(desc, attr.bias, creation_context.context, &lt));
result->args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}

View File

@ -121,13 +121,9 @@ absl::Status CreateFullyConnected(const CreationContext& creation_context,
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(
CreateLinearStorage(desc, attr.bias, creation_context.context, &lt));
result->args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(attr.bias);
result->args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}

View File

@ -53,13 +53,10 @@ absl::Status CreatePReLU(const CreationContext& creation_context,
desc.storage_type =
DeduceLinearStorageType(definition.GetPrimaryStorageType());
desc.element_type = definition.GetPrimaryDataType();
desc.UploadLinearData(*alpha);
LinearStorage lt;
RETURN_IF_ERROR(
CreateLinearStorage(desc, *alpha, creation_context.context, &lt));
result->args_.AddObject("alpha", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
result->args_.AddObject(
"alpha", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}

View File

@ -250,12 +250,9 @@ absl::Status Winograd4x4To36::UploadBt(CLContext* context) {
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition_.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(CreateLinearStorage(desc, bt_aligned, context, &lt));
args_.AddObject("bt", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(bt_aligned);
args_.AddObject("bt",
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}
@ -456,11 +453,9 @@ absl::Status Winograd36To4x4::UploadAt(CLContext* context) {
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition_.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(CreateLinearStorage(desc, at_aligned, context, &lt));
args_.AddObject("at", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(at_aligned);
args_.AddObject("at",
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return absl::OkStatus();
}
@ -509,12 +504,9 @@ absl::Status CreateWinograd36To4x4(
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
LinearStorage lt;
RETURN_IF_ERROR(
CreateLinearStorage(desc, biases, creation_context.context, &lt));
result->args_.AddObject("biases", AccessType::READ,
absl::make_unique<LinearStorage>(std::move(lt)),
absl::make_unique<TensorLinearDescriptor>(desc));
desc.UploadLinearData(biases);
result->args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return result->UploadAt(creation_context.context);
}

View File

@ -23,6 +23,29 @@ namespace tflite {
namespace gpu {
namespace cl {
TensorLinearDescriptor::TensorLinearDescriptor(TensorLinearDescriptor&& desc)
: GPUObjectDescriptor(std::move(desc)),
storage_type(desc.storage_type),
element_type(desc.element_type),
memory_type(desc.memory_type),
size(desc.size),
data(std::move(desc.data)) {}
TensorLinearDescriptor& TensorLinearDescriptor::operator=(
TensorLinearDescriptor&& desc) {
if (this != &desc) {
std::swap(storage_type, desc.storage_type);
std::swap(element_type, desc.element_type);
std::swap(memory_type, desc.memory_type);
std::swap(size, desc.size);
data = std::move(desc.data);
GPUObjectDescriptor::operator=(std::move(desc));
}
return *this;
}
void TensorLinearDescriptor::Release() { data.clear(); }
GPUResources TensorLinearDescriptor::GetGPUResources() const {
GPUResources resources;
resources.ints.push_back("length");
@ -81,20 +104,60 @@ absl::Status TensorLinearDescriptor::PerformReadSelector(
}
}
LinearStorage::LinearStorage(int depth, LinearStorageType storage_type)
: depth_(depth), storage_type_(storage_type) {}
absl::Status TensorLinearDescriptor::CreateGPUObject(
CLContext* context, GPUObjectPtr* result) const {
LinearStorage gpu_storage;
RETURN_IF_ERROR(gpu_storage.CreateFromTensorLinearDescriptor(*this, context));
*result = absl::make_unique<LinearStorage>(std::move(gpu_storage));
return absl::OkStatus();
}
void TensorLinearDescriptor::UploadLinearData(
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src,
int aligned_size) {
size = aligned_size == 0 ? DivideRoundUp(src.shape.v, 4) : aligned_size;
if (element_type == DataType::FLOAT32) {
data.resize(size * sizeof(float) * 4);
float* gpu_data = reinterpret_cast<float*>(data.data());
for (int i = 0; i < size * 4; ++i) {
if (i < src.shape.v) {
gpu_data[i] = src.data[i];
} else {
gpu_data[i] = 0.0f;
}
}
} else {
data.resize(size * sizeof(half) * 4);
half* gpu_data = reinterpret_cast<half*>(data.data());
for (int i = 0; i < size * 4; ++i) {
if (i < src.shape.v) {
gpu_data[i] = src.data[i];
} else {
gpu_data[i] = 0.0f;
}
}
}
}
void LinearStorage::Release() {
if (memory_) {
clReleaseMemObject(memory_);
memory_ = nullptr;
}
}
LinearStorage::LinearStorage(LinearStorage&& storage)
: GPUObject(std::move(storage)),
texture_storage_(std::move(storage.texture_storage_)),
buffer_storage_(std::move(storage.buffer_storage_)),
memory_(storage.memory_),
depth_(storage.depth_),
storage_type_(storage.storage_type_) {}
storage_type_(storage.storage_type_) {
storage.memory_ = nullptr;
}
LinearStorage& LinearStorage::operator=(LinearStorage&& storage) {
if (this != &storage) {
texture_storage_ = std::move(storage.texture_storage_);
buffer_storage_ = std::move(storage.buffer_storage_);
Release();
std::swap(memory_, storage.memory_);
std::swap(depth_, storage.depth_);
std::swap(storage_type_, storage.storage_type_);
GPUObject::operator=(std::move(storage));
@ -115,14 +178,37 @@ absl::Status LinearStorage::GetGPUResources(
resources->ints.push_back({"length", depth_});
if (storage_type_ == LinearStorageType::BUFFER) {
resources->buffers.push_back({"buffer", buffer_storage_.GetMemoryPtr()});
resources->buffers.push_back({"buffer", memory_});
} else {
resources->images2d.push_back({"tex2d", texture_storage_.GetMemoryPtr()});
resources->images2d.push_back({"tex2d", memory_});
}
return absl::OkStatus();
}
absl::Status LinearStorage::CreateFromTensorLinearDescriptor(
const TensorLinearDescriptor& desc, CLContext* context) {
storage_type_ = desc.storage_type;
depth_ = desc.size;
uint8_t* data_ptr = desc.data.empty()
? nullptr
: const_cast<unsigned char*>(desc.data.data());
if (storage_type_ == LinearStorageType::BUFFER) {
bool read_only = desc.memory_type == MemoryType::CONSTANT;
uint8_t* data_ptr = desc.data.empty()
? nullptr
: const_cast<unsigned char*>(desc.data.data());
const int float4_size = desc.element_type == DataType::FLOAT32
? sizeof(float) * 4
: sizeof(half) * 4;
return CreateCLBuffer(context->context(), depth_ * float4_size, read_only,
data_ptr, &memory_);
} else {
return CreateFloatRGBAImage2D(context->context(), depth_, 1,
desc.element_type, data_ptr, &memory_);
}
}
LinearStorageType DeduceLinearStorageType(
TensorStorageType tensor_storage_type) {
if (tensor_storage_type == TensorStorageType::BUFFER) {
@ -132,24 +218,6 @@ LinearStorageType DeduceLinearStorageType(
}
}
absl::Status CreateLinearStorage(LinearStorageType storage_type,
DataType data_type, int size, void* data,
CLContext* context, LinearStorage* result) {
if (storage_type == LinearStorageType::BUFFER) {
const int float4_size =
data_type == DataType::FLOAT32 ? sizeof(float4) : sizeof(half4);
*result = LinearStorage(size, LinearStorageType::BUFFER);
RETURN_IF_ERROR(CreateReadOnlyBuffer(float4_size * size, data, context,
&result->buffer_storage_));
return absl::OkStatus();
} else {
*result = LinearStorage(size, LinearStorageType::TEXTURE_2D);
RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, size, 1, data, context,
&result->texture_storage_));
return absl::OkStatus();
}
}
} // namespace cl
} // namespace gpu
} // namespace tflite

View File

@ -21,11 +21,9 @@ limitations under the License.
#include "absl/strings/str_cat.h"
#include "absl/types/span.h"
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
#include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
#include "tensorflow/lite/delegates/gpu/cl/util.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
@ -42,6 +40,20 @@ struct TensorLinearDescriptor : public GPUObjectDescriptor {
DataType element_type; // FLOAT32 or FLOAT16
MemoryType memory_type = MemoryType::GLOBAL; // applicable for BUFFER
// optional
int size = 0;
std::vector<uint8_t> data;
TensorLinearDescriptor() = default;
TensorLinearDescriptor(const TensorLinearDescriptor&) = default;
TensorLinearDescriptor& operator=(const TensorLinearDescriptor&) = default;
TensorLinearDescriptor(TensorLinearDescriptor&& desc);
TensorLinearDescriptor& operator=(TensorLinearDescriptor&& desc);
void UploadLinearData(
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src,
int aligned_size = 0);
absl::Status PerformSelector(const std::string& selector,
const std::vector<std::string>& args,
const std::vector<std::string>& template_args,
@ -50,6 +62,10 @@ struct TensorLinearDescriptor : public GPUObjectDescriptor {
GPUResources GetGPUResources() const override;
absl::Status PerformReadSelector(const std::vector<std::string>& args,
std::string* result) const;
absl::Status CreateGPUObject(CLContext* context,
GPUObjectPtr* result) const override;
void Release() override;
};
LinearStorageType DeduceLinearStorageType(
@ -60,8 +76,7 @@ LinearStorageType DeduceLinearStorageType(
class LinearStorage : public GPUObject {
public:
LinearStorage() {}
virtual ~LinearStorage() {}
~LinearStorage() override { Release(); }
// Move only
LinearStorage(LinearStorage&& storage);
@ -72,46 +87,17 @@ class LinearStorage : public GPUObject {
absl::Status GetGPUResources(const GPUObjectDescriptor* obj_ptr,
GPUResourcesWithValue* resources) const override;
absl::Status CreateFromTensorLinearDescriptor(
const TensorLinearDescriptor& desc, CLContext* context);
private:
friend absl::Status CreateLinearStorage(LinearStorageType storage_type,
DataType data_type, int size,
void* data, CLContext* context,
LinearStorage* result);
LinearStorage(int depth, LinearStorageType storage_type);
Texture2D texture_storage_;
Buffer buffer_storage_;
void Release();
cl_mem memory_ = nullptr;
int depth_;
LinearStorageType storage_type_;
};
absl::Status CreateLinearStorage(LinearStorageType storage_type,
DataType data_type, int size, void* data,
CLContext* context, LinearStorage* result);
template <DataType T>
absl::Status CreateLinearStorage(const TensorLinearDescriptor& descriptor,
const tflite::gpu::Tensor<Linear, T>& tensor,
CLContext* context, LinearStorage* result) {
const int depth = DivideRoundUp(tensor.shape.v, 4);
if (descriptor.element_type == DataType::FLOAT32) {
std::vector<float4> gpu_data(depth);
CopyLinearFLT4(tensor, absl::MakeSpan(gpu_data));
RETURN_IF_ERROR(CreateLinearStorage(descriptor.storage_type,
descriptor.element_type, depth,
gpu_data.data(), context, result));
} else {
std::vector<half4> gpu_data(depth);
CopyLinearFLT4(tensor, absl::MakeSpan(gpu_data));
RETURN_IF_ERROR(CreateLinearStorage(descriptor.storage_type,
descriptor.element_type, depth,
gpu_data.data(), context, result));
}
return absl::OkStatus();
}
} // namespace cl
} // namespace gpu
} // namespace tflite

View File

@ -21,39 +21,14 @@ namespace cl {
namespace {
// Creates new 4-channel 2D texture with cl_channel_type elements
absl::Status CreateTexture2D(int width, int height, cl_channel_type type,
void* data, CLContext* context,
Texture2D* result) {
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = width;
desc.image_height = height;
desc.image_depth = 0;
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
desc.buffer = nullptr;
cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = type;
cl_mem_flags flags = CL_MEM_READ_WRITE;
if (data != nullptr) {
flags |= CL_MEM_COPY_HOST_PTR;
}
cl_int error_code;
cl_mem texture = CreateImage2DLegacy(context->context(), flags, &format,
&desc, data, &error_code);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
absl::StrCat("Failed to create 2D texture (clCreateImage): ",
CLErrorCodeToString(error_code)));
}
*result = Texture2D(texture, width, height, type);
absl::Status CreateTexture2D(int width, int height, DataType type, void* data,
CLContext* context, Texture2D* result) {
cl_mem texture;
RETURN_IF_ERROR(CreateFloatRGBAImage2D(context->context(), width, height,
type, data, &texture));
cl_channel_type channel_type =
type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT;
*result = Texture2D(texture, width, height, channel_type);
return absl::OkStatus();
}
@ -94,7 +69,7 @@ absl::Status Texture2DDescriptor::PerformSelector(
return PerformReadSelector(args, result);
} else {
return absl::NotFoundError(absl::StrCat(
"TensorLinearDescriptor don't have selector with name - ", selector));
"Texture2DDescriptor don't have selector with name - ", selector));
}
}
@ -167,79 +142,41 @@ absl::Status Texture2D::GetGPUResources(
}
absl::Status Texture2D::CreateFromTexture2DDescriptor(
const Texture2DDescriptor& tex_desc, CLContext* context) {
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = tex_desc.size.x;
desc.image_height = tex_desc.size.y;
desc.image_depth = 0;
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
desc.buffer = nullptr;
cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type =
tex_desc.element_type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT;
cl_mem_flags flags = CL_MEM_READ_WRITE;
if (!tex_desc.data.empty()) {
flags |= CL_MEM_COPY_HOST_PTR;
}
cl_int error_code;
width_ = tex_desc.size.x;
height_ = tex_desc.size.y;
channel_type_ = format.image_channel_data_type;
if (tex_desc.data.empty()) {
texture_ = CreateImage2DLegacy(context->context(), flags, &format, &desc,
nullptr, &error_code);
} else {
texture_ = CreateImage2DLegacy(
context->context(), flags, &format, &desc,
const_cast<unsigned char*>(tex_desc.data.data()), &error_code);
}
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
absl::StrCat("Failed to create 2D texture (clCreateImage): ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
const Texture2DDescriptor& desc, CLContext* context) {
width_ = desc.size.x;
height_ = desc.size.y;
channel_type_ =
desc.element_type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT;
uint8_t* data_ptr = desc.data.empty()
? nullptr
: const_cast<unsigned char*>(desc.data.data());
return CreateFloatRGBAImage2D(context->context(), desc.size.x, desc.size.y,
desc.element_type, data_ptr, &texture_);
}
// Creates new 4-channel 2D texture with f32 elements
absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext* context,
Texture2D* result) {
return CreateTexture2D(width, height, CL_FLOAT, nullptr, context, result);
return CreateTexture2D(width, height, DataType::FLOAT32, nullptr, context,
result);
}
// Creates new 4-channel 2D texture with f16 elements
absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext* context,
Texture2D* result) {
return CreateTexture2D(width, height, CL_HALF_FLOAT, nullptr, context,
return CreateTexture2D(width, height, DataType::FLOAT16, nullptr, context,
result);
}
absl::Status CreateTexture2DRGBA(DataType type, int width, int height,
CLContext* context, Texture2D* result) {
if (type == DataType::FLOAT32) {
return CreateTexture2D(width, height, CL_FLOAT, nullptr, context, result);
} else {
return CreateTexture2D(width, height, CL_HALF_FLOAT, nullptr, context,
result);
}
return CreateTexture2D(width, height, type, nullptr, context, result);
}
absl::Status CreateTexture2DRGBA(DataType type, int width, int height,
void* data, CLContext* context,
Texture2D* result) {
if (type == DataType::FLOAT32) {
return CreateTexture2D(width, height, CL_FLOAT, data, context, result);
} else {
return CreateTexture2D(width, height, CL_HALF_FLOAT, data, context, result);
}
return CreateTexture2D(width, height, type, data, context, result);
}
} // namespace cl

View File

@ -168,6 +168,56 @@ int ChannelTypeToSizeInBytes(cl_channel_type type) {
bool OpenCLSupported() { return LoadOpenCL().ok(); }
absl::Status CreateCLBuffer(cl_context context, int size_in_bytes,
bool read_only, void* data, cl_mem* result) {
cl_mem_flags flags = read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
if (data) {
flags |= CL_MEM_COPY_HOST_PTR;
}
cl_int error_code;
*result = clCreateBuffer(context, flags, size_in_bytes, data, &error_code);
if (!*result) {
return absl::UnknownError(
absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
}
absl::Status CreateFloatRGBAImage2D(cl_context context, int width, int height,
DataType type, void* data, cl_mem* result) {
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = width;
desc.image_height = height;
desc.image_depth = 0;
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
desc.buffer = nullptr;
cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type =
type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT;
cl_mem_flags flags = CL_MEM_READ_WRITE;
if (data) {
flags |= CL_MEM_COPY_HOST_PTR;
}
cl_int error_code;
*result =
CreateImage2DLegacy(context, flags, &format, &desc, data, &error_code);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
absl::StrCat("Failed to create 2D texture (clCreateImage): ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
}
} // namespace cl
} // namespace gpu
} // namespace tflite

View File

@ -49,6 +49,12 @@ void CopyLinearFLT4(const tflite::gpu::Tensor<Linear, S>& src,
}
}
absl::Status CreateCLBuffer(cl_context context, int size_in_bytes,
bool read_only, void* data, cl_mem* result);
absl::Status CreateFloatRGBAImage2D(cl_context context, int width, int height,
DataType type, void* data, cl_mem* result);
} // namespace cl
} // namespace gpu
} // namespace tflite