Added CPU representation for LinearStorage.
PiperOrigin-RevId: 327265279 Change-Id: I53a90f385a618ad3ebae6c651b14401018019204
This commit is contained in:
parent
77cf50d67b
commit
6702ae97ff
tensorflow/lite/delegates/gpu/cl
BUILDbuffer.cc
kernels
conv_3d.hconv_buffer_1x1.hconv_constants.ccconv_texture.hconvolution_transposed.ccconvolution_transposed_3d.ccconvolution_transposed_3x3.ccconvolution_transposed_4x4.ccdepthwise_conv.ccfully_connected.ccprelu.ccwinograd.cc
linear_storage.cclinear_storage.htexture2d.ccutil.ccutil.h@ -400,11 +400,9 @@ cc_library(
|
||||
srcs = ["linear_storage.cc"],
|
||||
hdrs = ["linear_storage.h"],
|
||||
deps = [
|
||||
":buffer",
|
||||
":gpu_object",
|
||||
":opencl_wrapper",
|
||||
":tensor_type",
|
||||
":texture2d",
|
||||
":util",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
|
@ -28,19 +28,10 @@ namespace {
|
||||
absl::Status CreateBuffer(size_t size_in_bytes, bool gpu_read_only,
|
||||
const void* data, CLContext* context,
|
||||
Buffer* result) {
|
||||
cl_mem_flags flags = gpu_read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
|
||||
if (data != nullptr) {
|
||||
flags |= CL_MEM_COPY_HOST_PTR;
|
||||
}
|
||||
cl_int error_code;
|
||||
cl_mem buffer = clCreateBuffer(context->context(), flags, size_in_bytes,
|
||||
const_cast<void*>(data), &error_code);
|
||||
if (!buffer) {
|
||||
return absl::UnknownError(
|
||||
absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
|
||||
CLErrorCodeToString(error_code)));
|
||||
}
|
||||
|
||||
cl_mem buffer;
|
||||
RETURN_IF_ERROR(CreateCLBuffer(context->context(), size_in_bytes,
|
||||
gpu_read_only, const_cast<void*>(data),
|
||||
&buffer));
|
||||
*result = Buffer(buffer, size_in_bytes);
|
||||
|
||||
return absl::OkStatus();
|
||||
@ -185,28 +176,13 @@ absl::Status Buffer::GetGPUResources(const GPUObjectDescriptor* obj_ptr,
|
||||
|
||||
absl::Status Buffer::CreateFromBufferDescriptor(const BufferDescriptor& desc,
|
||||
CLContext* context) {
|
||||
cl_mem_flags flags = desc.memory_type == MemoryType::CONSTANT
|
||||
? CL_MEM_READ_ONLY
|
||||
: CL_MEM_READ_WRITE;
|
||||
if (!desc.data.empty()) {
|
||||
flags |= CL_MEM_COPY_HOST_PTR;
|
||||
}
|
||||
cl_int error_code;
|
||||
bool read_only = desc.memory_type == MemoryType::CONSTANT;
|
||||
uint8_t* data_ptr = desc.data.empty()
|
||||
? nullptr
|
||||
: const_cast<unsigned char*>(desc.data.data());
|
||||
size_ = desc.size;
|
||||
if (desc.data.empty()) {
|
||||
buffer_ = clCreateBuffer(context->context(), flags, desc.size, nullptr,
|
||||
&error_code);
|
||||
} else {
|
||||
buffer_ = clCreateBuffer(context->context(), flags, desc.size,
|
||||
const_cast<unsigned char*>(desc.data.data()),
|
||||
&error_code);
|
||||
}
|
||||
if (!buffer_) {
|
||||
return absl::UnknownError(
|
||||
absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
|
||||
CLErrorCodeToString(error_code)));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
return CreateCLBuffer(context->context(), desc.size, read_only, data_ptr,
|
||||
&buffer_);
|
||||
}
|
||||
|
||||
absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext* context,
|
||||
|
@ -127,12 +127,9 @@ absl::Status Conv3D::UploadData(const tflite::gpu::Tensor<OHWDI, T>& weights,
|
||||
? LinearStorageType::BUFFER
|
||||
: LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition_.GetDataType();
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(CreateLinearStorage(desc, biases, context, <));
|
||||
args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(biases);
|
||||
args_.AddObject("biases",
|
||||
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -178,16 +178,10 @@ absl::Status ConvBuffer1x1::UploadBiases(
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::BUFFER;
|
||||
desc.element_type = definition_.GetDataType();
|
||||
|
||||
tflite::gpu::Tensor<Linear, DataType::FLOAT32> bias = biases;
|
||||
int channels = AlignByN(biases.shape.v, 4 * conv_params_.block_size.z);
|
||||
bias.shape = Linear(channels);
|
||||
bias.data.resize(channels, 0.0f);
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(CreateLinearStorage(desc, bias, context, <));
|
||||
args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
int depth = AlignByN(biases.shape.v, 4 * conv_params_.block_size.z) / 4;
|
||||
desc.UploadLinearData(biases, depth);
|
||||
args_.AddObject("biases",
|
||||
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -291,13 +291,9 @@ absl::Status CreateConvConstants(const CreationContext& creation_context,
|
||||
desc.storage_type = LinearStorageType::BUFFER;
|
||||
desc.element_type = definition.GetDataType();
|
||||
desc.memory_type = MemoryType::CONSTANT;
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(
|
||||
CreateLinearStorage(desc, attr.bias, creation_context.context, <));
|
||||
result->args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -121,12 +121,9 @@ absl::Status ConvTexture::UploadData(
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition_.GetDataType();
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(CreateLinearStorage(desc, biases, context, <));
|
||||
args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(biases);
|
||||
args_.AddObject("biases",
|
||||
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
@ -144,12 +141,9 @@ absl::Status ConvTexture::UploadDataForWinograd4x4To6x6(
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition_.GetDataType();
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(CreateLinearStorage(desc, bias, context, <));
|
||||
args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(bias);
|
||||
args_.AddObject("biases",
|
||||
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -371,13 +371,9 @@ absl::Status CreateConvolutionTransposed(
|
||||
desc.storage_type =
|
||||
DeduceLinearStorageType(definition.GetPrimaryStorageType());
|
||||
desc.element_type = definition.GetDataType();
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(
|
||||
CreateLinearStorage(desc, attr.bias, creation_context.context, <));
|
||||
result->args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -414,13 +414,9 @@ absl::Status CreateConvolutionTransposed3D(
|
||||
desc.storage_type =
|
||||
DeduceLinearStorageType(definition.GetPrimaryStorageType());
|
||||
desc.element_type = definition.GetDataType();
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(
|
||||
CreateLinearStorage(desc, attr.bias, creation_context.context, <));
|
||||
result->args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -353,13 +353,9 @@ absl::Status CreateConvolutionTransposed3x3(
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(
|
||||
CreateLinearStorage(desc, attr.bias, creation_context.context, <));
|
||||
result->args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -330,13 +330,9 @@ absl::Status CreateConvolutionTransposed4x4(
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(
|
||||
CreateLinearStorage(desc, attr.bias, creation_context.context, <));
|
||||
result->args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -319,13 +319,9 @@ absl::Status CreateDepthwiseConvolution(
|
||||
desc.storage_type = weights_are_buffer ? LinearStorageType::BUFFER
|
||||
: LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(
|
||||
CreateLinearStorage(desc, attr.bias, creation_context.context, <));
|
||||
result->args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
@ -342,13 +338,9 @@ absl::Status CreateDepthwiseConvolution(
|
||||
desc.storage_type = weights_are_buffer ? LinearStorageType::BUFFER
|
||||
: LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(
|
||||
CreateLinearStorage(desc, attr.bias, creation_context.context, <));
|
||||
result->args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -121,13 +121,9 @@ absl::Status CreateFullyConnected(const CreationContext& creation_context,
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(
|
||||
CreateLinearStorage(desc, attr.bias, creation_context.context, <));
|
||||
result->args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(attr.bias);
|
||||
result->args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
@ -53,13 +53,10 @@ absl::Status CreatePReLU(const CreationContext& creation_context,
|
||||
desc.storage_type =
|
||||
DeduceLinearStorageType(definition.GetPrimaryStorageType());
|
||||
desc.element_type = definition.GetPrimaryDataType();
|
||||
desc.UploadLinearData(*alpha);
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(
|
||||
CreateLinearStorage(desc, *alpha, creation_context.context, <));
|
||||
result->args_.AddObject("alpha", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
result->args_.AddObject(
|
||||
"alpha", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
@ -250,12 +250,9 @@ absl::Status Winograd4x4To36::UploadBt(CLContext* context) {
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition_.GetDataType();
|
||||
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(CreateLinearStorage(desc, bt_aligned, context, <));
|
||||
args_.AddObject("bt", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(bt_aligned);
|
||||
args_.AddObject("bt",
|
||||
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
@ -456,11 +453,9 @@ absl::Status Winograd36To4x4::UploadAt(CLContext* context) {
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition_.GetDataType();
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(CreateLinearStorage(desc, at_aligned, context, <));
|
||||
args_.AddObject("at", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(at_aligned);
|
||||
args_.AddObject("at",
|
||||
absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
@ -509,12 +504,9 @@ absl::Status CreateWinograd36To4x4(
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
LinearStorage lt;
|
||||
RETURN_IF_ERROR(
|
||||
CreateLinearStorage(desc, biases, creation_context.context, <));
|
||||
result->args_.AddObject("biases", AccessType::READ,
|
||||
absl::make_unique<LinearStorage>(std::move(lt)),
|
||||
absl::make_unique<TensorLinearDescriptor>(desc));
|
||||
desc.UploadLinearData(biases);
|
||||
result->args_.AddObject(
|
||||
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
|
||||
return result->UploadAt(creation_context.context);
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,29 @@ namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
|
||||
TensorLinearDescriptor::TensorLinearDescriptor(TensorLinearDescriptor&& desc)
|
||||
: GPUObjectDescriptor(std::move(desc)),
|
||||
storage_type(desc.storage_type),
|
||||
element_type(desc.element_type),
|
||||
memory_type(desc.memory_type),
|
||||
size(desc.size),
|
||||
data(std::move(desc.data)) {}
|
||||
|
||||
TensorLinearDescriptor& TensorLinearDescriptor::operator=(
|
||||
TensorLinearDescriptor&& desc) {
|
||||
if (this != &desc) {
|
||||
std::swap(storage_type, desc.storage_type);
|
||||
std::swap(element_type, desc.element_type);
|
||||
std::swap(memory_type, desc.memory_type);
|
||||
std::swap(size, desc.size);
|
||||
data = std::move(desc.data);
|
||||
GPUObjectDescriptor::operator=(std::move(desc));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void TensorLinearDescriptor::Release() { data.clear(); }
|
||||
|
||||
GPUResources TensorLinearDescriptor::GetGPUResources() const {
|
||||
GPUResources resources;
|
||||
resources.ints.push_back("length");
|
||||
@ -81,20 +104,60 @@ absl::Status TensorLinearDescriptor::PerformReadSelector(
|
||||
}
|
||||
}
|
||||
|
||||
LinearStorage::LinearStorage(int depth, LinearStorageType storage_type)
|
||||
: depth_(depth), storage_type_(storage_type) {}
|
||||
absl::Status TensorLinearDescriptor::CreateGPUObject(
|
||||
CLContext* context, GPUObjectPtr* result) const {
|
||||
LinearStorage gpu_storage;
|
||||
RETURN_IF_ERROR(gpu_storage.CreateFromTensorLinearDescriptor(*this, context));
|
||||
*result = absl::make_unique<LinearStorage>(std::move(gpu_storage));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void TensorLinearDescriptor::UploadLinearData(
|
||||
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src,
|
||||
int aligned_size) {
|
||||
size = aligned_size == 0 ? DivideRoundUp(src.shape.v, 4) : aligned_size;
|
||||
if (element_type == DataType::FLOAT32) {
|
||||
data.resize(size * sizeof(float) * 4);
|
||||
float* gpu_data = reinterpret_cast<float*>(data.data());
|
||||
for (int i = 0; i < size * 4; ++i) {
|
||||
if (i < src.shape.v) {
|
||||
gpu_data[i] = src.data[i];
|
||||
} else {
|
||||
gpu_data[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
data.resize(size * sizeof(half) * 4);
|
||||
half* gpu_data = reinterpret_cast<half*>(data.data());
|
||||
for (int i = 0; i < size * 4; ++i) {
|
||||
if (i < src.shape.v) {
|
||||
gpu_data[i] = src.data[i];
|
||||
} else {
|
||||
gpu_data[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LinearStorage::Release() {
|
||||
if (memory_) {
|
||||
clReleaseMemObject(memory_);
|
||||
memory_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
LinearStorage::LinearStorage(LinearStorage&& storage)
|
||||
: GPUObject(std::move(storage)),
|
||||
texture_storage_(std::move(storage.texture_storage_)),
|
||||
buffer_storage_(std::move(storage.buffer_storage_)),
|
||||
memory_(storage.memory_),
|
||||
depth_(storage.depth_),
|
||||
storage_type_(storage.storage_type_) {}
|
||||
storage_type_(storage.storage_type_) {
|
||||
storage.memory_ = nullptr;
|
||||
}
|
||||
|
||||
LinearStorage& LinearStorage::operator=(LinearStorage&& storage) {
|
||||
if (this != &storage) {
|
||||
texture_storage_ = std::move(storage.texture_storage_);
|
||||
buffer_storage_ = std::move(storage.buffer_storage_);
|
||||
Release();
|
||||
std::swap(memory_, storage.memory_);
|
||||
std::swap(depth_, storage.depth_);
|
||||
std::swap(storage_type_, storage.storage_type_);
|
||||
GPUObject::operator=(std::move(storage));
|
||||
@ -115,14 +178,37 @@ absl::Status LinearStorage::GetGPUResources(
|
||||
resources->ints.push_back({"length", depth_});
|
||||
|
||||
if (storage_type_ == LinearStorageType::BUFFER) {
|
||||
resources->buffers.push_back({"buffer", buffer_storage_.GetMemoryPtr()});
|
||||
resources->buffers.push_back({"buffer", memory_});
|
||||
} else {
|
||||
resources->images2d.push_back({"tex2d", texture_storage_.GetMemoryPtr()});
|
||||
resources->images2d.push_back({"tex2d", memory_});
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status LinearStorage::CreateFromTensorLinearDescriptor(
|
||||
const TensorLinearDescriptor& desc, CLContext* context) {
|
||||
storage_type_ = desc.storage_type;
|
||||
depth_ = desc.size;
|
||||
uint8_t* data_ptr = desc.data.empty()
|
||||
? nullptr
|
||||
: const_cast<unsigned char*>(desc.data.data());
|
||||
if (storage_type_ == LinearStorageType::BUFFER) {
|
||||
bool read_only = desc.memory_type == MemoryType::CONSTANT;
|
||||
uint8_t* data_ptr = desc.data.empty()
|
||||
? nullptr
|
||||
: const_cast<unsigned char*>(desc.data.data());
|
||||
const int float4_size = desc.element_type == DataType::FLOAT32
|
||||
? sizeof(float) * 4
|
||||
: sizeof(half) * 4;
|
||||
return CreateCLBuffer(context->context(), depth_ * float4_size, read_only,
|
||||
data_ptr, &memory_);
|
||||
} else {
|
||||
return CreateFloatRGBAImage2D(context->context(), depth_, 1,
|
||||
desc.element_type, data_ptr, &memory_);
|
||||
}
|
||||
}
|
||||
|
||||
LinearStorageType DeduceLinearStorageType(
|
||||
TensorStorageType tensor_storage_type) {
|
||||
if (tensor_storage_type == TensorStorageType::BUFFER) {
|
||||
@ -132,24 +218,6 @@ LinearStorageType DeduceLinearStorageType(
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status CreateLinearStorage(LinearStorageType storage_type,
|
||||
DataType data_type, int size, void* data,
|
||||
CLContext* context, LinearStorage* result) {
|
||||
if (storage_type == LinearStorageType::BUFFER) {
|
||||
const int float4_size =
|
||||
data_type == DataType::FLOAT32 ? sizeof(float4) : sizeof(half4);
|
||||
*result = LinearStorage(size, LinearStorageType::BUFFER);
|
||||
RETURN_IF_ERROR(CreateReadOnlyBuffer(float4_size * size, data, context,
|
||||
&result->buffer_storage_));
|
||||
return absl::OkStatus();
|
||||
} else {
|
||||
*result = LinearStorage(size, LinearStorageType::TEXTURE_2D);
|
||||
RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, size, 1, data, context,
|
||||
&result->texture_storage_));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -21,11 +21,9 @@ limitations under the License.
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/types/span.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
@ -42,6 +40,20 @@ struct TensorLinearDescriptor : public GPUObjectDescriptor {
|
||||
DataType element_type; // FLOAT32 or FLOAT16
|
||||
MemoryType memory_type = MemoryType::GLOBAL; // applicable for BUFFER
|
||||
|
||||
// optional
|
||||
int size = 0;
|
||||
std::vector<uint8_t> data;
|
||||
|
||||
TensorLinearDescriptor() = default;
|
||||
TensorLinearDescriptor(const TensorLinearDescriptor&) = default;
|
||||
TensorLinearDescriptor& operator=(const TensorLinearDescriptor&) = default;
|
||||
TensorLinearDescriptor(TensorLinearDescriptor&& desc);
|
||||
TensorLinearDescriptor& operator=(TensorLinearDescriptor&& desc);
|
||||
|
||||
void UploadLinearData(
|
||||
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src,
|
||||
int aligned_size = 0);
|
||||
|
||||
absl::Status PerformSelector(const std::string& selector,
|
||||
const std::vector<std::string>& args,
|
||||
const std::vector<std::string>& template_args,
|
||||
@ -50,6 +62,10 @@ struct TensorLinearDescriptor : public GPUObjectDescriptor {
|
||||
GPUResources GetGPUResources() const override;
|
||||
absl::Status PerformReadSelector(const std::vector<std::string>& args,
|
||||
std::string* result) const;
|
||||
|
||||
absl::Status CreateGPUObject(CLContext* context,
|
||||
GPUObjectPtr* result) const override;
|
||||
void Release() override;
|
||||
};
|
||||
|
||||
LinearStorageType DeduceLinearStorageType(
|
||||
@ -60,8 +76,7 @@ LinearStorageType DeduceLinearStorageType(
|
||||
class LinearStorage : public GPUObject {
|
||||
public:
|
||||
LinearStorage() {}
|
||||
|
||||
virtual ~LinearStorage() {}
|
||||
~LinearStorage() override { Release(); }
|
||||
|
||||
// Move only
|
||||
LinearStorage(LinearStorage&& storage);
|
||||
@ -72,46 +87,17 @@ class LinearStorage : public GPUObject {
|
||||
absl::Status GetGPUResources(const GPUObjectDescriptor* obj_ptr,
|
||||
GPUResourcesWithValue* resources) const override;
|
||||
|
||||
absl::Status CreateFromTensorLinearDescriptor(
|
||||
const TensorLinearDescriptor& desc, CLContext* context);
|
||||
|
||||
private:
|
||||
friend absl::Status CreateLinearStorage(LinearStorageType storage_type,
|
||||
DataType data_type, int size,
|
||||
void* data, CLContext* context,
|
||||
LinearStorage* result);
|
||||
|
||||
LinearStorage(int depth, LinearStorageType storage_type);
|
||||
|
||||
Texture2D texture_storage_;
|
||||
Buffer buffer_storage_;
|
||||
void Release();
|
||||
|
||||
cl_mem memory_ = nullptr;
|
||||
int depth_;
|
||||
LinearStorageType storage_type_;
|
||||
};
|
||||
|
||||
absl::Status CreateLinearStorage(LinearStorageType storage_type,
|
||||
DataType data_type, int size, void* data,
|
||||
CLContext* context, LinearStorage* result);
|
||||
|
||||
template <DataType T>
|
||||
absl::Status CreateLinearStorage(const TensorLinearDescriptor& descriptor,
|
||||
const tflite::gpu::Tensor<Linear, T>& tensor,
|
||||
CLContext* context, LinearStorage* result) {
|
||||
const int depth = DivideRoundUp(tensor.shape.v, 4);
|
||||
if (descriptor.element_type == DataType::FLOAT32) {
|
||||
std::vector<float4> gpu_data(depth);
|
||||
CopyLinearFLT4(tensor, absl::MakeSpan(gpu_data));
|
||||
RETURN_IF_ERROR(CreateLinearStorage(descriptor.storage_type,
|
||||
descriptor.element_type, depth,
|
||||
gpu_data.data(), context, result));
|
||||
} else {
|
||||
std::vector<half4> gpu_data(depth);
|
||||
CopyLinearFLT4(tensor, absl::MakeSpan(gpu_data));
|
||||
RETURN_IF_ERROR(CreateLinearStorage(descriptor.storage_type,
|
||||
descriptor.element_type, depth,
|
||||
gpu_data.data(), context, result));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -21,39 +21,14 @@ namespace cl {
|
||||
namespace {
|
||||
|
||||
// Creates new 4-channel 2D texture with cl_channel_type elements
|
||||
absl::Status CreateTexture2D(int width, int height, cl_channel_type type,
|
||||
void* data, CLContext* context,
|
||||
Texture2D* result) {
|
||||
cl_image_desc desc;
|
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
desc.image_width = width;
|
||||
desc.image_height = height;
|
||||
desc.image_depth = 0;
|
||||
desc.image_row_pitch = 0;
|
||||
desc.image_slice_pitch = 0;
|
||||
desc.num_mip_levels = 0;
|
||||
desc.num_samples = 0;
|
||||
desc.buffer = nullptr;
|
||||
|
||||
cl_image_format format;
|
||||
format.image_channel_order = CL_RGBA;
|
||||
format.image_channel_data_type = type;
|
||||
|
||||
cl_mem_flags flags = CL_MEM_READ_WRITE;
|
||||
if (data != nullptr) {
|
||||
flags |= CL_MEM_COPY_HOST_PTR;
|
||||
}
|
||||
|
||||
cl_int error_code;
|
||||
cl_mem texture = CreateImage2DLegacy(context->context(), flags, &format,
|
||||
&desc, data, &error_code);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(
|
||||
absl::StrCat("Failed to create 2D texture (clCreateImage): ",
|
||||
CLErrorCodeToString(error_code)));
|
||||
}
|
||||
|
||||
*result = Texture2D(texture, width, height, type);
|
||||
absl::Status CreateTexture2D(int width, int height, DataType type, void* data,
|
||||
CLContext* context, Texture2D* result) {
|
||||
cl_mem texture;
|
||||
RETURN_IF_ERROR(CreateFloatRGBAImage2D(context->context(), width, height,
|
||||
type, data, &texture));
|
||||
cl_channel_type channel_type =
|
||||
type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT;
|
||||
*result = Texture2D(texture, width, height, channel_type);
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
@ -94,7 +69,7 @@ absl::Status Texture2DDescriptor::PerformSelector(
|
||||
return PerformReadSelector(args, result);
|
||||
} else {
|
||||
return absl::NotFoundError(absl::StrCat(
|
||||
"TensorLinearDescriptor don't have selector with name - ", selector));
|
||||
"Texture2DDescriptor don't have selector with name - ", selector));
|
||||
}
|
||||
}
|
||||
|
||||
@ -167,79 +142,41 @@ absl::Status Texture2D::GetGPUResources(
|
||||
}
|
||||
|
||||
absl::Status Texture2D::CreateFromTexture2DDescriptor(
|
||||
const Texture2DDescriptor& tex_desc, CLContext* context) {
|
||||
cl_image_desc desc;
|
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
desc.image_width = tex_desc.size.x;
|
||||
desc.image_height = tex_desc.size.y;
|
||||
desc.image_depth = 0;
|
||||
desc.image_row_pitch = 0;
|
||||
desc.image_slice_pitch = 0;
|
||||
desc.num_mip_levels = 0;
|
||||
desc.num_samples = 0;
|
||||
desc.buffer = nullptr;
|
||||
|
||||
cl_image_format format;
|
||||
format.image_channel_order = CL_RGBA;
|
||||
format.image_channel_data_type =
|
||||
tex_desc.element_type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT;
|
||||
|
||||
cl_mem_flags flags = CL_MEM_READ_WRITE;
|
||||
if (!tex_desc.data.empty()) {
|
||||
flags |= CL_MEM_COPY_HOST_PTR;
|
||||
}
|
||||
|
||||
cl_int error_code;
|
||||
width_ = tex_desc.size.x;
|
||||
height_ = tex_desc.size.y;
|
||||
channel_type_ = format.image_channel_data_type;
|
||||
if (tex_desc.data.empty()) {
|
||||
texture_ = CreateImage2DLegacy(context->context(), flags, &format, &desc,
|
||||
nullptr, &error_code);
|
||||
} else {
|
||||
texture_ = CreateImage2DLegacy(
|
||||
context->context(), flags, &format, &desc,
|
||||
const_cast<unsigned char*>(tex_desc.data.data()), &error_code);
|
||||
}
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(
|
||||
absl::StrCat("Failed to create 2D texture (clCreateImage): ",
|
||||
CLErrorCodeToString(error_code)));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
const Texture2DDescriptor& desc, CLContext* context) {
|
||||
width_ = desc.size.x;
|
||||
height_ = desc.size.y;
|
||||
channel_type_ =
|
||||
desc.element_type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT;
|
||||
uint8_t* data_ptr = desc.data.empty()
|
||||
? nullptr
|
||||
: const_cast<unsigned char*>(desc.data.data());
|
||||
return CreateFloatRGBAImage2D(context->context(), desc.size.x, desc.size.y,
|
||||
desc.element_type, data_ptr, &texture_);
|
||||
}
|
||||
|
||||
// Creates new 4-channel 2D texture with f32 elements
|
||||
absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext* context,
|
||||
Texture2D* result) {
|
||||
return CreateTexture2D(width, height, CL_FLOAT, nullptr, context, result);
|
||||
return CreateTexture2D(width, height, DataType::FLOAT32, nullptr, context,
|
||||
result);
|
||||
}
|
||||
|
||||
// Creates new 4-channel 2D texture with f16 elements
|
||||
absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext* context,
|
||||
Texture2D* result) {
|
||||
return CreateTexture2D(width, height, CL_HALF_FLOAT, nullptr, context,
|
||||
return CreateTexture2D(width, height, DataType::FLOAT16, nullptr, context,
|
||||
result);
|
||||
}
|
||||
|
||||
absl::Status CreateTexture2DRGBA(DataType type, int width, int height,
|
||||
CLContext* context, Texture2D* result) {
|
||||
if (type == DataType::FLOAT32) {
|
||||
return CreateTexture2D(width, height, CL_FLOAT, nullptr, context, result);
|
||||
} else {
|
||||
return CreateTexture2D(width, height, CL_HALF_FLOAT, nullptr, context,
|
||||
result);
|
||||
}
|
||||
return CreateTexture2D(width, height, type, nullptr, context, result);
|
||||
}
|
||||
|
||||
absl::Status CreateTexture2DRGBA(DataType type, int width, int height,
|
||||
void* data, CLContext* context,
|
||||
Texture2D* result) {
|
||||
if (type == DataType::FLOAT32) {
|
||||
return CreateTexture2D(width, height, CL_FLOAT, data, context, result);
|
||||
} else {
|
||||
return CreateTexture2D(width, height, CL_HALF_FLOAT, data, context, result);
|
||||
}
|
||||
return CreateTexture2D(width, height, type, data, context, result);
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
|
@ -168,6 +168,56 @@ int ChannelTypeToSizeInBytes(cl_channel_type type) {
|
||||
|
||||
bool OpenCLSupported() { return LoadOpenCL().ok(); }
|
||||
|
||||
absl::Status CreateCLBuffer(cl_context context, int size_in_bytes,
|
||||
bool read_only, void* data, cl_mem* result) {
|
||||
cl_mem_flags flags = read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
|
||||
if (data) {
|
||||
flags |= CL_MEM_COPY_HOST_PTR;
|
||||
}
|
||||
cl_int error_code;
|
||||
*result = clCreateBuffer(context, flags, size_in_bytes, data, &error_code);
|
||||
if (!*result) {
|
||||
return absl::UnknownError(
|
||||
absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
|
||||
CLErrorCodeToString(error_code)));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CreateFloatRGBAImage2D(cl_context context, int width, int height,
|
||||
DataType type, void* data, cl_mem* result) {
|
||||
cl_image_desc desc;
|
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
desc.image_width = width;
|
||||
desc.image_height = height;
|
||||
desc.image_depth = 0;
|
||||
desc.image_row_pitch = 0;
|
||||
desc.image_slice_pitch = 0;
|
||||
desc.num_mip_levels = 0;
|
||||
desc.num_samples = 0;
|
||||
desc.buffer = nullptr;
|
||||
|
||||
cl_image_format format;
|
||||
format.image_channel_order = CL_RGBA;
|
||||
format.image_channel_data_type =
|
||||
type == DataType::FLOAT32 ? CL_FLOAT : CL_HALF_FLOAT;
|
||||
|
||||
cl_mem_flags flags = CL_MEM_READ_WRITE;
|
||||
if (data) {
|
||||
flags |= CL_MEM_COPY_HOST_PTR;
|
||||
}
|
||||
|
||||
cl_int error_code;
|
||||
*result =
|
||||
CreateImage2DLegacy(context, flags, &format, &desc, data, &error_code);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(
|
||||
absl::StrCat("Failed to create 2D texture (clCreateImage): ",
|
||||
CLErrorCodeToString(error_code)));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -49,6 +49,12 @@ void CopyLinearFLT4(const tflite::gpu::Tensor<Linear, S>& src,
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status CreateCLBuffer(cl_context context, int size_in_bytes,
|
||||
bool read_only, void* data, cl_mem* result);
|
||||
|
||||
absl::Status CreateFloatRGBAImage2D(cl_context context, int width, int height,
|
||||
DataType type, void* data, cl_mem* result);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
Loading…
Reference in New Issue
Block a user