Added CPU representation for Tensor.
PiperOrigin-RevId: 327297658 Change-Id: Iff651c9c21df506cf6a968d8c5000707d9bcf4cf
This commit is contained in:
parent
9c16428e04
commit
c1a32fd496
@ -170,17 +170,12 @@ absl::Status CreateElementwiseTwoInput(
|
|||||||
creation_context.device->info_, shape, definition.GetPrimaryStorageType(),
|
creation_context.device->info_, shape, definition.GetPrimaryStorageType(),
|
||||||
definition.GetDataType(), Layout::HWC);
|
definition.GetDataType(), Layout::HWC);
|
||||||
TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
|
TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
|
||||||
Tensor gpu_tensor;
|
desc.UploadData(constant_tensor);
|
||||||
RETURN_IF_ERROR(
|
|
||||||
CreateTensor(*creation_context.context, shape, desc, &gpu_tensor));
|
|
||||||
RETURN_IF_ERROR(
|
|
||||||
gpu_tensor.WriteData(creation_context.queue, constant_tensor));
|
|
||||||
|
|
||||||
*result = GPUOperation(definition);
|
*result = GPUOperation(definition);
|
||||||
result->elementwise_ = true;
|
result->elementwise_ = true;
|
||||||
result->args_.AddObject("second_tensor", AccessType::READ,
|
result->args_.AddObject("second_tensor",
|
||||||
absl::make_unique<Tensor>(std::move(gpu_tensor)),
|
absl::make_unique<TensorDescriptor>(std::move(desc)));
|
||||||
absl::make_unique<TensorDescriptor>(desc));
|
|
||||||
const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
|
const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
|
||||||
result->code_ = absl::StrCat(
|
result->code_ = absl::StrCat(
|
||||||
"FLT4 second_val = args.second_tensor.Read(0, 0, ", s_coord, ");\n");
|
"FLT4 second_val = args.second_tensor.Read(0, 0, ", s_coord, ");\n");
|
||||||
@ -207,17 +202,12 @@ absl::Status CreateElementwiseTwoInput(
|
|||||||
creation_context.device->info_, shape, definition.GetPrimaryStorageType(),
|
creation_context.device->info_, shape, definition.GetPrimaryStorageType(),
|
||||||
definition.GetDataType(), Layout::HWC);
|
definition.GetDataType(), Layout::HWC);
|
||||||
TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
|
TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
|
||||||
Tensor gpu_tensor;
|
desc.UploadData(constant_tensor);
|
||||||
RETURN_IF_ERROR(
|
|
||||||
CreateTensor(*creation_context.context, shape, desc, &gpu_tensor));
|
|
||||||
RETURN_IF_ERROR(
|
|
||||||
gpu_tensor.WriteData(creation_context.queue, constant_tensor));
|
|
||||||
|
|
||||||
*result = GPUOperation(definition);
|
*result = GPUOperation(definition);
|
||||||
result->elementwise_ = true;
|
result->elementwise_ = true;
|
||||||
result->args_.AddObject("second_tensor", AccessType::READ,
|
result->args_.AddObject("second_tensor",
|
||||||
absl::make_unique<Tensor>(std::move(gpu_tensor)),
|
absl::make_unique<TensorDescriptor>(std::move(desc)));
|
||||||
absl::make_unique<TensorDescriptor>(desc));
|
|
||||||
const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
|
const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
|
||||||
const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
|
const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
|
||||||
const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
|
const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
|
||||||
|
@ -28,6 +28,164 @@ namespace tflite {
|
|||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace cl {
|
namespace cl {
|
||||||
namespace {
|
namespace {
|
||||||
|
absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
|
||||||
|
const TensorDescriptor& descriptor,
|
||||||
|
const void* data_ptr, CLMemory* result) {
|
||||||
|
const int slices = DivideRoundUp(shape.c, 4);
|
||||||
|
cl_mem_flags mem_flags = CL_MEM_READ_WRITE;
|
||||||
|
if (data_ptr) {
|
||||||
|
mem_flags |= CL_MEM_COPY_HOST_PTR;
|
||||||
|
}
|
||||||
|
switch (descriptor.storage_type) {
|
||||||
|
case TensorStorageType::BUFFER:
|
||||||
|
case TensorStorageType::IMAGE_BUFFER: {
|
||||||
|
const size_t data_size = shape.b * shape.w * shape.h * shape.d * slices *
|
||||||
|
4 * SizeOf(descriptor.data_type);
|
||||||
|
cl_int error_code;
|
||||||
|
cl_mem memory = clCreateBuffer(context.context(), mem_flags, data_size,
|
||||||
|
const_cast<void*>(data_ptr), &error_code);
|
||||||
|
if (!memory) {
|
||||||
|
return absl::UnknownError(
|
||||||
|
absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
|
||||||
|
CLErrorCodeToString(error_code)));
|
||||||
|
}
|
||||||
|
*result = CLMemory(memory, true);
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
case TensorStorageType::TEXTURE_2D: {
|
||||||
|
cl_image_desc desc;
|
||||||
|
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||||
|
desc.image_width = shape.w * shape.b * shape.d;
|
||||||
|
desc.image_height = shape.h * slices;
|
||||||
|
desc.image_depth = 0;
|
||||||
|
desc.image_row_pitch = 0;
|
||||||
|
desc.image_slice_pitch = 0;
|
||||||
|
desc.num_mip_levels = 0;
|
||||||
|
desc.num_samples = 0;
|
||||||
|
desc.buffer = nullptr;
|
||||||
|
|
||||||
|
cl_image_format format;
|
||||||
|
format.image_channel_order = CL_RGBA;
|
||||||
|
format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
|
||||||
|
|
||||||
|
cl_int error_code;
|
||||||
|
cl_mem memory =
|
||||||
|
CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
|
||||||
|
const_cast<void*>(data_ptr), &error_code);
|
||||||
|
if (error_code != CL_SUCCESS) {
|
||||||
|
return absl::UnknownError(
|
||||||
|
absl::StrCat("Failed to create 2D texture (clCreateImage): ",
|
||||||
|
CLErrorCodeToString(error_code)));
|
||||||
|
}
|
||||||
|
|
||||||
|
*result = CLMemory(memory, true);
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
case TensorStorageType::TEXTURE_3D: {
|
||||||
|
cl_image_desc desc;
|
||||||
|
desc.image_type = CL_MEM_OBJECT_IMAGE3D;
|
||||||
|
desc.image_width = shape.w * shape.b;
|
||||||
|
desc.image_height = shape.h;
|
||||||
|
desc.image_depth = slices * shape.d;
|
||||||
|
desc.image_row_pitch = 0;
|
||||||
|
desc.image_slice_pitch = 0;
|
||||||
|
desc.num_mip_levels = 0;
|
||||||
|
desc.num_samples = 0;
|
||||||
|
desc.buffer = nullptr;
|
||||||
|
|
||||||
|
cl_image_format format;
|
||||||
|
format.image_channel_order = CL_RGBA;
|
||||||
|
format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
|
||||||
|
|
||||||
|
cl_int error_code;
|
||||||
|
cl_mem memory =
|
||||||
|
CreateImage3DLegacy(context.context(), mem_flags, &format, &desc,
|
||||||
|
const_cast<void*>(data_ptr), &error_code);
|
||||||
|
if (error_code != CL_SUCCESS) {
|
||||||
|
return absl::UnknownError(
|
||||||
|
absl::StrCat("Failed to create 3D texture (clCreateImage): ",
|
||||||
|
CLErrorCodeToString(error_code)));
|
||||||
|
}
|
||||||
|
|
||||||
|
*result = CLMemory(memory, true);
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
case TensorStorageType::TEXTURE_ARRAY: {
|
||||||
|
cl_image_desc desc;
|
||||||
|
desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
|
||||||
|
desc.image_width = shape.w * shape.b;
|
||||||
|
desc.image_height = shape.h;
|
||||||
|
desc.image_depth = 0;
|
||||||
|
desc.image_array_size = slices * shape.d;
|
||||||
|
desc.image_row_pitch = 0;
|
||||||
|
desc.image_slice_pitch = 0;
|
||||||
|
desc.num_mip_levels = 0;
|
||||||
|
desc.num_samples = 0;
|
||||||
|
desc.buffer = nullptr;
|
||||||
|
|
||||||
|
cl_image_format format;
|
||||||
|
format.image_channel_order = CL_RGBA;
|
||||||
|
format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
|
||||||
|
|
||||||
|
cl_int error_code;
|
||||||
|
cl_mem memory =
|
||||||
|
clCreateImage(context.context(), mem_flags, &format, &desc,
|
||||||
|
const_cast<void*>(data_ptr), &error_code);
|
||||||
|
if (error_code != CL_SUCCESS) {
|
||||||
|
return absl::UnknownError(
|
||||||
|
absl::StrCat("Failed to create 2D texture array (clCreateImage): ",
|
||||||
|
CLErrorCodeToString(error_code)));
|
||||||
|
}
|
||||||
|
|
||||||
|
*result = CLMemory(memory, true);
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
case TensorStorageType::SINGLE_TEXTURE_2D: {
|
||||||
|
if (slices != 1) {
|
||||||
|
return absl::InvalidArgumentError(absl::StrCat(
|
||||||
|
"SINGLE_TEXTURE_2D support only channels in range [1-4], but ",
|
||||||
|
shape.c, "was provided"));
|
||||||
|
}
|
||||||
|
cl_image_desc desc;
|
||||||
|
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||||
|
desc.image_width = shape.w * shape.b * shape.d;
|
||||||
|
desc.image_height = shape.h;
|
||||||
|
desc.image_depth = 0;
|
||||||
|
desc.image_row_pitch = 0;
|
||||||
|
desc.image_slice_pitch = 0;
|
||||||
|
desc.num_mip_levels = 0;
|
||||||
|
desc.num_samples = 0;
|
||||||
|
desc.buffer = nullptr;
|
||||||
|
|
||||||
|
cl_image_format format;
|
||||||
|
if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type)) {
|
||||||
|
format.image_channel_order = ToChannelOrder(shape.c);
|
||||||
|
format.image_channel_data_type =
|
||||||
|
ToImageChannelType(descriptor.data_type);
|
||||||
|
} else {
|
||||||
|
return absl::InvalidArgumentError(absl::StrCat(
|
||||||
|
"This device doesn't support ", shape.c, "-channel textures."));
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_int error_code;
|
||||||
|
cl_mem memory =
|
||||||
|
CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
|
||||||
|
const_cast<void*>(data_ptr), &error_code);
|
||||||
|
if (error_code != CL_SUCCESS) {
|
||||||
|
return absl::UnknownError(
|
||||||
|
absl::StrCat("Failed to create single 2D texture (clCreateImage): ",
|
||||||
|
CLErrorCodeToString(error_code)));
|
||||||
|
}
|
||||||
|
|
||||||
|
*result = CLMemory(memory, true);
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
return absl::InternalError("Unsupported tensor storage type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
absl::Status CreateImageBufferFromBuffer(const CLContext& context,
|
absl::Status CreateImageBufferFromBuffer(const CLContext& context,
|
||||||
cl_mem memory, DataType data_type,
|
cl_mem memory, DataType data_type,
|
||||||
@ -59,7 +217,8 @@ absl::Status CreateTensor(const CLContext& context, const BHWDC& shape,
|
|||||||
const bool memory_owner = memory == nullptr;
|
const bool memory_owner = memory == nullptr;
|
||||||
if (memory_owner) {
|
if (memory_owner) {
|
||||||
CLMemory mem;
|
CLMemory mem;
|
||||||
RETURN_IF_ERROR(AllocateTensorMemory(context, shape, descriptor, &mem));
|
RETURN_IF_ERROR(
|
||||||
|
AllocateTensorMemory(context, shape, descriptor, nullptr, &mem));
|
||||||
memory = mem.Release();
|
memory = mem.Release();
|
||||||
}
|
}
|
||||||
if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
|
if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
|
||||||
@ -94,6 +253,14 @@ absl::Status CreateTensorShared(const CLContext& context, const BHWDC& shape,
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
absl::Status TensorDescriptor::CreateGPUObject(CLContext* context,
|
||||||
|
GPUObjectPtr* result) const {
|
||||||
|
Tensor gpu_tensor;
|
||||||
|
RETURN_IF_ERROR(gpu_tensor.CreateFromDescriptor(*this, context));
|
||||||
|
*result = absl::make_unique<Tensor>(std::move(gpu_tensor));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
|
Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
|
||||||
const TensorDescriptor& descriptor)
|
const TensorDescriptor& descriptor)
|
||||||
: memory_(memory),
|
: memory_(memory),
|
||||||
@ -279,12 +446,6 @@ absl::Status Tensor::IsValid(const BHWDC& shape) const {
|
|||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
int Tensor::GetChannelsAlignment() const {
|
|
||||||
return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D
|
|
||||||
? shape_.c
|
|
||||||
: 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
int Tensor::GetAlignedChannels() const {
|
int Tensor::GetAlignedChannels() const {
|
||||||
return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D
|
return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D
|
||||||
? shape_.c
|
? shape_.c
|
||||||
@ -329,11 +490,13 @@ absl::Status Tensor::WriteDataBHWDC(absl::Span<const float> in,
|
|||||||
if (descriptor_.data_type == DataType::FLOAT32) {
|
if (descriptor_.data_type == DataType::FLOAT32) {
|
||||||
data_f.resize(elements_count);
|
data_f.resize(elements_count);
|
||||||
data_ptr = data_f.data();
|
data_ptr = data_f.data();
|
||||||
DataFromBHWDC(in, absl::MakeSpan(data_f.data(), data_f.size()));
|
DataFromBHWDC(in, shape_, descriptor_,
|
||||||
|
absl::MakeSpan(data_f.data(), data_f.size()));
|
||||||
} else {
|
} else {
|
||||||
data_h.resize(elements_count);
|
data_h.resize(elements_count);
|
||||||
data_ptr = data_h.data();
|
data_ptr = data_h.data();
|
||||||
DataFromBHWDC(in, absl::MakeSpan(data_h.data(), data_h.size()));
|
DataFromBHWDC(in, shape_, descriptor_,
|
||||||
|
absl::MakeSpan(data_h.data(), data_h.size()));
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (descriptor_.storage_type) {
|
switch (descriptor_.storage_type) {
|
||||||
@ -413,9 +576,11 @@ absl::Status Tensor::ReadDataBHWDC(absl::Span<float> out,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (descriptor_.data_type == DataType::FLOAT32) {
|
if (descriptor_.data_type == DataType::FLOAT32) {
|
||||||
DataToBHWDC(absl::MakeConstSpan(data_f.data(), data_f.size()), out);
|
DataToBHWDC(absl::MakeConstSpan(data_f.data(), data_f.size()), shape_,
|
||||||
|
descriptor_, out);
|
||||||
} else {
|
} else {
|
||||||
DataToBHWDC(absl::MakeConstSpan(data_h.data(), data_h.size()), out);
|
DataToBHWDC(absl::MakeConstSpan(data_h.data(), data_h.size()), shape_,
|
||||||
|
descriptor_, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
@ -432,6 +597,26 @@ absl::Status Tensor::ReadData(CLCommandQueue* queue,
|
|||||||
return ReadDataBHWDC(absl::MakeSpan(dst->data), queue);
|
return ReadDataBHWDC(absl::MakeSpan(dst->data), queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
|
||||||
|
CLContext* context) {
|
||||||
|
shape_ = desc.shape;
|
||||||
|
descriptor_.data_type = desc.data_type;
|
||||||
|
descriptor_.storage_type = desc.storage_type;
|
||||||
|
descriptor_.layout = desc.layout;
|
||||||
|
memory_owner_ = true;
|
||||||
|
CLMemory memory;
|
||||||
|
RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_,
|
||||||
|
desc.data.data(), &memory));
|
||||||
|
memory_ = memory.Release();
|
||||||
|
if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) {
|
||||||
|
RETURN_IF_ERROR(CreateImageBufferFromBuffer(
|
||||||
|
*context, memory_, desc.data_type,
|
||||||
|
shape_.b * shape_.w * shape_.h * shape_.d * DivideRoundUp(shape_.c, 4),
|
||||||
|
&image_buffer_memory_));
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
absl::Status CreateTensor(const CLContext& context, const BHWC& shape,
|
absl::Status CreateTensor(const CLContext& context, const BHWC& shape,
|
||||||
const TensorDescriptor& descriptor, Tensor* result) {
|
const TensorDescriptor& descriptor, Tensor* result) {
|
||||||
const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
|
const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
|
||||||
@ -462,220 +647,14 @@ absl::Status AllocateTensorMemory(const CLContext& context, const BHWC& shape,
|
|||||||
const TensorDescriptor& descriptor,
|
const TensorDescriptor& descriptor,
|
||||||
CLMemory* result) {
|
CLMemory* result) {
|
||||||
const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
|
const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
|
||||||
return AllocateTensorMemory(context, shape5D, descriptor, result);
|
return AllocateTensorMemory(context, shape5D, descriptor, nullptr, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
|
absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
|
||||||
const TensorDescriptor& descriptor,
|
const TensorDescriptor& descriptor,
|
||||||
CLMemory* result) {
|
CLMemory* result) {
|
||||||
const int slices = DivideRoundUp(shape.c, 4);
|
return AllocateTensorMemory(context, shape, descriptor, nullptr, result);
|
||||||
switch (descriptor.storage_type) {
|
|
||||||
case TensorStorageType::BUFFER:
|
|
||||||
case TensorStorageType::IMAGE_BUFFER: {
|
|
||||||
const size_t data_size = shape.b * shape.w * shape.h * shape.d * slices *
|
|
||||||
4 * SizeOf(descriptor.data_type);
|
|
||||||
cl_int error_code;
|
|
||||||
cl_mem memory = clCreateBuffer(context.context(), CL_MEM_READ_WRITE,
|
|
||||||
data_size, nullptr, &error_code);
|
|
||||||
if (!memory) {
|
|
||||||
return absl::UnknownError(
|
|
||||||
absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
|
|
||||||
CLErrorCodeToString(error_code)));
|
|
||||||
}
|
}
|
||||||
*result = CLMemory(memory, true);
|
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
|
||||||
case TensorStorageType::TEXTURE_2D: {
|
|
||||||
cl_image_desc desc;
|
|
||||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
|
||||||
desc.image_width = shape.w * shape.b * shape.d;
|
|
||||||
desc.image_height = shape.h * slices;
|
|
||||||
desc.image_depth = 0;
|
|
||||||
desc.image_row_pitch = 0;
|
|
||||||
desc.image_slice_pitch = 0;
|
|
||||||
desc.num_mip_levels = 0;
|
|
||||||
desc.num_samples = 0;
|
|
||||||
desc.buffer = nullptr;
|
|
||||||
|
|
||||||
cl_image_format format;
|
|
||||||
format.image_channel_order = CL_RGBA;
|
|
||||||
format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
|
|
||||||
|
|
||||||
cl_int error_code;
|
|
||||||
cl_mem memory = CreateImage2DLegacy(context.context(), CL_MEM_READ_WRITE,
|
|
||||||
&format, &desc, nullptr, &error_code);
|
|
||||||
if (error_code != CL_SUCCESS) {
|
|
||||||
return absl::UnknownError(
|
|
||||||
absl::StrCat("Failed to create 2D texture (clCreateImage): ",
|
|
||||||
CLErrorCodeToString(error_code)));
|
|
||||||
}
|
|
||||||
|
|
||||||
*result = CLMemory(memory, true);
|
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
|
||||||
case TensorStorageType::TEXTURE_3D: {
|
|
||||||
cl_image_desc desc;
|
|
||||||
desc.image_type = CL_MEM_OBJECT_IMAGE3D;
|
|
||||||
desc.image_width = shape.w * shape.b;
|
|
||||||
desc.image_height = shape.h;
|
|
||||||
desc.image_depth = slices * shape.d;
|
|
||||||
desc.image_row_pitch = 0;
|
|
||||||
desc.image_slice_pitch = 0;
|
|
||||||
desc.num_mip_levels = 0;
|
|
||||||
desc.num_samples = 0;
|
|
||||||
desc.buffer = nullptr;
|
|
||||||
|
|
||||||
cl_image_format format;
|
|
||||||
format.image_channel_order = CL_RGBA;
|
|
||||||
format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
|
|
||||||
|
|
||||||
cl_int error_code;
|
|
||||||
cl_mem memory = CreateImage3DLegacy(context.context(), CL_MEM_READ_WRITE,
|
|
||||||
&format, &desc, nullptr, &error_code);
|
|
||||||
if (error_code != CL_SUCCESS) {
|
|
||||||
return absl::UnknownError(
|
|
||||||
absl::StrCat("Failed to create 3D texture (clCreateImage): ",
|
|
||||||
CLErrorCodeToString(error_code)));
|
|
||||||
}
|
|
||||||
|
|
||||||
*result = CLMemory(memory, true);
|
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
|
||||||
case TensorStorageType::TEXTURE_ARRAY: {
|
|
||||||
cl_image_desc desc;
|
|
||||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
|
|
||||||
desc.image_width = shape.w * shape.b;
|
|
||||||
desc.image_height = shape.h;
|
|
||||||
desc.image_depth = 0;
|
|
||||||
desc.image_array_size = slices * shape.d;
|
|
||||||
desc.image_row_pitch = 0;
|
|
||||||
desc.image_slice_pitch = 0;
|
|
||||||
desc.num_mip_levels = 0;
|
|
||||||
desc.num_samples = 0;
|
|
||||||
desc.buffer = nullptr;
|
|
||||||
|
|
||||||
cl_image_format format;
|
|
||||||
format.image_channel_order = CL_RGBA;
|
|
||||||
format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
|
|
||||||
|
|
||||||
cl_int error_code;
|
|
||||||
cl_mem memory = clCreateImage(context.context(), CL_MEM_READ_WRITE,
|
|
||||||
&format, &desc, nullptr, &error_code);
|
|
||||||
if (error_code != CL_SUCCESS) {
|
|
||||||
return absl::UnknownError(
|
|
||||||
absl::StrCat("Failed to create 2D texture array (clCreateImage): ",
|
|
||||||
CLErrorCodeToString(error_code)));
|
|
||||||
}
|
|
||||||
|
|
||||||
*result = CLMemory(memory, true);
|
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
|
||||||
|
|
||||||
case TensorStorageType::SINGLE_TEXTURE_2D: {
|
|
||||||
if (slices != 1) {
|
|
||||||
return absl::InvalidArgumentError(absl::StrCat(
|
|
||||||
"SINGLE_TEXTURE_2D support only channels in range [1-4], but ",
|
|
||||||
shape.c, "was provided"));
|
|
||||||
}
|
|
||||||
cl_image_desc desc;
|
|
||||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
|
||||||
desc.image_width = shape.w * shape.b * shape.d;
|
|
||||||
desc.image_height = shape.h;
|
|
||||||
desc.image_depth = 0;
|
|
||||||
desc.image_row_pitch = 0;
|
|
||||||
desc.image_slice_pitch = 0;
|
|
||||||
desc.num_mip_levels = 0;
|
|
||||||
desc.num_samples = 0;
|
|
||||||
desc.buffer = nullptr;
|
|
||||||
|
|
||||||
cl_image_format format;
|
|
||||||
if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type)) {
|
|
||||||
format.image_channel_order = ToChannelOrder(shape.c);
|
|
||||||
format.image_channel_data_type =
|
|
||||||
ToImageChannelType(descriptor.data_type);
|
|
||||||
} else {
|
|
||||||
return absl::InvalidArgumentError(absl::StrCat(
|
|
||||||
"This device doesn't support ", shape.c, "-channel textures."));
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int error_code;
|
|
||||||
cl_mem memory = CreateImage2DLegacy(context.context(), CL_MEM_READ_WRITE,
|
|
||||||
&format, &desc, nullptr, &error_code);
|
|
||||||
if (error_code != CL_SUCCESS) {
|
|
||||||
return absl::UnknownError(
|
|
||||||
absl::StrCat("Failed to create 2D texture (clCreateImage): ",
|
|
||||||
CLErrorCodeToString(error_code)));
|
|
||||||
}
|
|
||||||
|
|
||||||
*result = CLMemory(memory, true);
|
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
return absl::InternalError("Unsupported tensor storage type");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void Tensor::DataFromBHWDC(absl::Span<const float> src,
|
|
||||||
absl::Span<T> dst) const {
|
|
||||||
const int channels_batch = GetChannelsAlignment();
|
|
||||||
for (int b = 0; b < shape_.b; ++b) {
|
|
||||||
for (int s = 0; s < Slices(); ++s) {
|
|
||||||
for (int y = 0; y < shape_.h; ++y) {
|
|
||||||
for (int x = 0; x < shape_.w; ++x) {
|
|
||||||
for (int d = 0; d < shape_.d; ++d) {
|
|
||||||
for (int c = 0; c < channels_batch; ++c) {
|
|
||||||
float value;
|
|
||||||
if (s * 4 + c < shape_.c) {
|
|
||||||
const int cpu_index =
|
|
||||||
shape_.LinearIndex({b, y, x, d, s * 4 + c});
|
|
||||||
value = src[cpu_index];
|
|
||||||
} else {
|
|
||||||
value = 0.0f;
|
|
||||||
}
|
|
||||||
const int gpu_index = GetLinearIndex(b, x, y, d, s, c);
|
|
||||||
dst[gpu_index] = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template void Tensor::DataFromBHWDC<float>(absl::Span<const float> src,
|
|
||||||
absl::Span<float> dst) const;
|
|
||||||
template void Tensor::DataFromBHWDC<half>(absl::Span<const float> src,
|
|
||||||
absl::Span<half> dst) const;
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void Tensor::DataToBHWDC(absl::Span<const T> src, absl::Span<float> dst) const {
|
|
||||||
const int channels_batch = GetChannelsAlignment();
|
|
||||||
for (int b = 0; b < shape_.b; ++b) {
|
|
||||||
for (int s = 0; s < Slices(); ++s) {
|
|
||||||
for (int y = 0; y < shape_.h; ++y) {
|
|
||||||
for (int x = 0; x < shape_.w; ++x) {
|
|
||||||
for (int d = 0; d < shape_.d; ++d) {
|
|
||||||
for (int c = 0; c < channels_batch; ++c) {
|
|
||||||
if (s * 4 + c >= shape_.c) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const int cpu_index = shape_.LinearIndex({b, y, x, d, s * 4 + c});
|
|
||||||
const int gpu_index = GetLinearIndex(b, x, y, d, s, c);
|
|
||||||
dst[cpu_index] = src[gpu_index];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template void Tensor::DataToBHWDC<float>(absl::Span<const float> src,
|
|
||||||
absl::Span<float> dst) const;
|
|
||||||
template void Tensor::DataToBHWDC<half>(absl::Span<const half> src,
|
|
||||||
absl::Span<float> dst) const;
|
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
@ -92,6 +92,9 @@ class Tensor : public GPUObject {
|
|||||||
absl::Status ReadData(CLCommandQueue* queue, TensorFloat32* dst) const;
|
absl::Status ReadData(CLCommandQueue* queue, TensorFloat32* dst) const;
|
||||||
absl::Status ReadData(CLCommandQueue* queue, Tensor5DFloat32* dst) const;
|
absl::Status ReadData(CLCommandQueue* queue, Tensor5DFloat32* dst) const;
|
||||||
|
|
||||||
|
absl::Status CreateFromDescriptor(const TensorDescriptor& desc,
|
||||||
|
CLContext* context);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
absl::Status IsValid(const BHWC& shape) const;
|
absl::Status IsValid(const BHWC& shape) const;
|
||||||
absl::Status IsValid(const BHWDC& shape) const;
|
absl::Status IsValid(const BHWDC& shape) const;
|
||||||
@ -104,37 +107,6 @@ class Tensor : public GPUObject {
|
|||||||
absl::Status ReadDataBHWDC(absl::Span<float> out,
|
absl::Status ReadDataBHWDC(absl::Span<float> out,
|
||||||
CLCommandQueue* queue) const;
|
CLCommandQueue* queue) const;
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void DataFromBHWDC(absl::Span<const float> src, absl::Span<T> dst) const;
|
|
||||||
template <typename T>
|
|
||||||
void DataToBHWDC(absl::Span<const T> src, absl::Span<float> dst) const;
|
|
||||||
|
|
||||||
// TODO(sorokin) might be bad performance
|
|
||||||
int GetLinearIndex(int b, int x, int y, int d, int s, int sub_c) const {
|
|
||||||
switch (descriptor_.storage_type) {
|
|
||||||
case TensorStorageType::BUFFER:
|
|
||||||
case TensorStorageType::IMAGE_BUFFER:
|
|
||||||
case TensorStorageType::TEXTURE_ARRAY:
|
|
||||||
case TensorStorageType::TEXTURE_3D:
|
|
||||||
return ((((d * Slices() + s) * shape_.h + y) * shape_.w + x) *
|
|
||||||
shape_.b +
|
|
||||||
b) *
|
|
||||||
4 +
|
|
||||||
sub_c; // DSHWBC4
|
|
||||||
case TensorStorageType::TEXTURE_2D:
|
|
||||||
return ((((y * Slices() + s) * shape_.w + x) * shape_.b + b) *
|
|
||||||
shape_.d +
|
|
||||||
d) *
|
|
||||||
4 +
|
|
||||||
sub_c; // HSWBDC4
|
|
||||||
case TensorStorageType::SINGLE_TEXTURE_2D:
|
|
||||||
return (((y * shape_.w + x) * shape_.b + b) * shape_.d + d) * shape_.c +
|
|
||||||
sub_c; // HWBDC
|
|
||||||
case TensorStorageType::UNKNOWN:
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int3 GetFullTensorRegion() const;
|
int3 GetFullTensorRegion() const;
|
||||||
void Release();
|
void Release();
|
||||||
|
|
||||||
|
@ -73,6 +73,25 @@ std::string ToString(TensorStorageType type) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TensorDescriptor::TensorDescriptor(TensorDescriptor&& desc)
|
||||||
|
: GPUObjectDescriptor(std::move(desc)),
|
||||||
|
data_type(desc.data_type),
|
||||||
|
storage_type(desc.storage_type),
|
||||||
|
layout(desc.layout),
|
||||||
|
shape(desc.shape),
|
||||||
|
data(std::move(desc.data)) {}
|
||||||
|
TensorDescriptor& TensorDescriptor::operator=(TensorDescriptor&& desc) {
|
||||||
|
if (this != &desc) {
|
||||||
|
std::swap(data_type, desc.data_type);
|
||||||
|
std::swap(storage_type, desc.storage_type);
|
||||||
|
std::swap(layout, desc.layout);
|
||||||
|
std::swap(shape, desc.shape);
|
||||||
|
data = std::move(desc.data);
|
||||||
|
GPUObjectDescriptor::operator=(std::move(desc));
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
GPUResources TensorDescriptor::GetGPUResources() const {
|
GPUResources TensorDescriptor::GetGPUResources() const {
|
||||||
GPUResources resources;
|
GPUResources resources;
|
||||||
if (HasAxis(Axis::WIDTH)) {
|
if (HasAxis(Axis::WIDTH)) {
|
||||||
@ -725,6 +744,134 @@ TextureAddressMode TensorDescriptor::ModeFromState() const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TensorDescriptor::UploadData(
|
||||||
|
const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src) {
|
||||||
|
shape = BHWDC(1, src.shape.h, src.shape.w, 1, src.shape.c);
|
||||||
|
UploadData(absl::MakeConstSpan(src.data));
|
||||||
|
}
|
||||||
|
|
||||||
|
void TensorDescriptor::UploadData(
|
||||||
|
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src) {
|
||||||
|
shape = BHWDC(1, 1, 1, 1, src.shape.v);
|
||||||
|
UploadData(absl::MakeConstSpan(src.data));
|
||||||
|
}
|
||||||
|
|
||||||
|
void TensorDescriptor::UploadData(absl::Span<const float> src) {
|
||||||
|
int aligned_channels = storage_type == TensorStorageType::SINGLE_TEXTURE_2D
|
||||||
|
? shape.c
|
||||||
|
: AlignByN(shape.c, 4);
|
||||||
|
int elements_count = shape.b * shape.w * shape.h * shape.d * aligned_channels;
|
||||||
|
data.resize(elements_count * SizeOf(data_type));
|
||||||
|
if (data_type == DataType::FLOAT32) {
|
||||||
|
float* gpu_data = reinterpret_cast<float*>(data.data());
|
||||||
|
DataFromBHWDC(src, shape, *this, absl::MakeSpan(gpu_data, elements_count));
|
||||||
|
} else {
|
||||||
|
half* gpu_data = reinterpret_cast<half*>(data.data());
|
||||||
|
DataFromBHWDC(src, shape, *this, absl::MakeSpan(gpu_data, elements_count));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
int GetLinearIndex(const TensorDescriptor& desc, const BHWDC& shape, int b,
|
||||||
|
int x, int y, int d, int s, int sub_c) {
|
||||||
|
const int slices = DivideRoundUp(shape.c, 4);
|
||||||
|
switch (desc.storage_type) {
|
||||||
|
case TensorStorageType::BUFFER:
|
||||||
|
case TensorStorageType::IMAGE_BUFFER:
|
||||||
|
case TensorStorageType::TEXTURE_ARRAY:
|
||||||
|
case TensorStorageType::TEXTURE_3D:
|
||||||
|
return ((((d * slices + s) * shape.h + y) * shape.w + x) * shape.b + b) *
|
||||||
|
4 +
|
||||||
|
sub_c; // DSHWBC4
|
||||||
|
case TensorStorageType::TEXTURE_2D:
|
||||||
|
return ((((y * slices + s) * shape.w + x) * shape.b + b) * shape.d + d) *
|
||||||
|
4 +
|
||||||
|
sub_c; // HSWBDC4
|
||||||
|
case TensorStorageType::SINGLE_TEXTURE_2D:
|
||||||
|
return (((y * shape.w + x) * shape.b + b) * shape.d + d) * shape.c +
|
||||||
|
sub_c; // HWBDC
|
||||||
|
case TensorStorageType::UNKNOWN:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int GetChannelsAlignment(const TensorDescriptor& desc, const BHWDC& shape) {
|
||||||
|
return desc.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c
|
||||||
|
: 4;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void DataFromBHWDC(absl::Span<const float> src, const BHWDC& shape,
|
||||||
|
const TensorDescriptor& desc, absl::Span<T> dst) {
|
||||||
|
const int channels_alignment = GetChannelsAlignment(desc, shape);
|
||||||
|
const int slices = DivideRoundUp(shape.c, 4);
|
||||||
|
for (int b = 0; b < shape.b; ++b) {
|
||||||
|
for (int s = 0; s < slices; ++s) {
|
||||||
|
for (int y = 0; y < shape.h; ++y) {
|
||||||
|
for (int x = 0; x < shape.w; ++x) {
|
||||||
|
for (int d = 0; d < shape.d; ++d) {
|
||||||
|
for (int c = 0; c < channels_alignment; ++c) {
|
||||||
|
float value;
|
||||||
|
if (s * 4 + c < shape.c) {
|
||||||
|
const int cpu_index =
|
||||||
|
shape.LinearIndex({b, y, x, d, s * 4 + c});
|
||||||
|
value = src[cpu_index];
|
||||||
|
} else {
|
||||||
|
value = 0.0f;
|
||||||
|
}
|
||||||
|
int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c);
|
||||||
|
dst[gpu_index] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template void DataFromBHWDC<float>(absl::Span<const float> src,
|
||||||
|
const BHWDC& shape,
|
||||||
|
const TensorDescriptor& desc,
|
||||||
|
absl::Span<float> dst);
|
||||||
|
template void DataFromBHWDC<half>(absl::Span<const float> src,
|
||||||
|
const BHWDC& shape,
|
||||||
|
const TensorDescriptor& desc,
|
||||||
|
absl::Span<half> dst);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void DataToBHWDC(absl::Span<const T> src, const BHWDC& shape,
|
||||||
|
const TensorDescriptor& desc, absl::Span<float> dst) {
|
||||||
|
const int channels_alignment = GetChannelsAlignment(desc, shape);
|
||||||
|
const int slices = DivideRoundUp(shape.c, 4);
|
||||||
|
for (int b = 0; b < shape.b; ++b) {
|
||||||
|
for (int s = 0; s < slices; ++s) {
|
||||||
|
for (int y = 0; y < shape.h; ++y) {
|
||||||
|
for (int x = 0; x < shape.w; ++x) {
|
||||||
|
for (int d = 0; d < shape.d; ++d) {
|
||||||
|
for (int c = 0; c < channels_alignment; ++c) {
|
||||||
|
if (s * 4 + c >= shape.c) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int cpu_index = shape.LinearIndex({b, y, x, d, s * 4 + c});
|
||||||
|
int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c);
|
||||||
|
dst[cpu_index] = src[gpu_index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template void DataToBHWDC<float>(absl::Span<const float> src,
|
||||||
|
const BHWDC& shape,
|
||||||
|
const TensorDescriptor& desc,
|
||||||
|
absl::Span<float> dst);
|
||||||
|
template void DataToBHWDC<half>(absl::Span<const half> src, const BHWDC& shape,
|
||||||
|
const TensorDescriptor& desc,
|
||||||
|
absl::Span<float> dst);
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
@ -49,6 +49,11 @@ struct TensorDescriptor : public GPUObjectDescriptor {
|
|||||||
TensorDescriptor(DataType dt, TensorStorageType st, Layout l)
|
TensorDescriptor(DataType dt, TensorStorageType st, Layout l)
|
||||||
: data_type(dt), storage_type(st), layout(l) {}
|
: data_type(dt), storage_type(st), layout(l) {}
|
||||||
|
|
||||||
|
TensorDescriptor(const TensorDescriptor&) = default;
|
||||||
|
TensorDescriptor& operator=(const TensorDescriptor&) = default;
|
||||||
|
TensorDescriptor(TensorDescriptor&& desc);
|
||||||
|
TensorDescriptor& operator=(TensorDescriptor&& desc);
|
||||||
|
|
||||||
bool operator==(const TensorDescriptor& d) const {
|
bool operator==(const TensorDescriptor& d) const {
|
||||||
return data_type == d.data_type && storage_type == d.storage_type &&
|
return data_type == d.data_type && storage_type == d.storage_type &&
|
||||||
layout == d.layout;
|
layout == d.layout;
|
||||||
@ -63,6 +68,10 @@ struct TensorDescriptor : public GPUObjectDescriptor {
|
|||||||
|
|
||||||
GPUResources GetGPUResources() const override;
|
GPUResources GetGPUResources() const override;
|
||||||
|
|
||||||
|
absl::Status CreateGPUObject(CLContext* context,
|
||||||
|
GPUObjectPtr* result) const override;
|
||||||
|
void Release() override { data.clear(); }
|
||||||
|
|
||||||
bool HasAxis(Axis axis) const;
|
bool HasAxis(Axis axis) const;
|
||||||
void SetTextureAddressMode(TextureAddressMode mode);
|
void SetTextureAddressMode(TextureAddressMode mode);
|
||||||
|
|
||||||
@ -70,6 +79,9 @@ struct TensorDescriptor : public GPUObjectDescriptor {
|
|||||||
const std::vector<std::string>& args, std::string* value_name,
|
const std::vector<std::string>& args, std::string* value_name,
|
||||||
std::string* x_coord, std::string* y_coord, std::string* s_coord) const;
|
std::string* x_coord, std::string* y_coord, std::string* s_coord) const;
|
||||||
|
|
||||||
|
void UploadData(const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src);
|
||||||
|
void UploadData(const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src);
|
||||||
|
|
||||||
DataType data_type = DataType::UNKNOWN;
|
DataType data_type = DataType::UNKNOWN;
|
||||||
TensorStorageType storage_type = TensorStorageType::UNKNOWN;
|
TensorStorageType storage_type = TensorStorageType::UNKNOWN;
|
||||||
// This field describes logical layout, actual(physical) GPU layout can be
|
// This field describes logical layout, actual(physical) GPU layout can be
|
||||||
@ -77,6 +89,10 @@ struct TensorDescriptor : public GPUObjectDescriptor {
|
|||||||
Layout layout =
|
Layout layout =
|
||||||
Layout::UNKNOWN; // Supported layouts is HWC, BHWC, HWDC, BHWDC
|
Layout::UNKNOWN; // Supported layouts is HWC, BHWC, HWDC, BHWDC
|
||||||
|
|
||||||
|
// optional
|
||||||
|
BHWDC shape;
|
||||||
|
std::vector<uint8_t> data;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
absl::Status PerformReadSelector(
|
absl::Status PerformReadSelector(
|
||||||
const std::vector<std::string>& args,
|
const std::vector<std::string>& args,
|
||||||
@ -145,8 +161,18 @@ struct TensorDescriptor : public GPUObjectDescriptor {
|
|||||||
bool ParseCoordsFromArgs(const std::vector<std::string>& args, int offset,
|
bool ParseCoordsFromArgs(const std::vector<std::string>& args, int offset,
|
||||||
std::string* xc, std::string* yc, std::string* zc,
|
std::string* xc, std::string* yc, std::string* zc,
|
||||||
std::string* sc, std::string* bc) const;
|
std::string* sc, std::string* bc) const;
|
||||||
|
|
||||||
|
void UploadData(absl::Span<const float> src);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void DataFromBHWDC(absl::Span<const float> src, const BHWDC& shape,
|
||||||
|
const TensorDescriptor& desc, absl::Span<T> dst);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void DataToBHWDC(absl::Span<const T> src, const BHWDC& shape,
|
||||||
|
const TensorDescriptor& desc, absl::Span<float> dst);
|
||||||
|
|
||||||
std::string ToString(TensorStorageType type);
|
std::string ToString(TensorStorageType type);
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
|
Loading…
Reference in New Issue
Block a user