Added info about supported image formats to DeviceInfo.

storage_type_util cleaned from OpenCL API calls/structs.

PiperOrigin-RevId: 325347475
Change-Id: I096636e4dd837ef9754df70caf37842c605c24f5
This commit is contained in:
Raman Sarokin 2020-08-06 17:49:37 -07:00 committed by TensorFlower Gardener
parent d9ea505110
commit 235dbc2dc2
29 changed files with 177 additions and 112 deletions

View File

@ -257,6 +257,7 @@ cc_library(
srcs = ["device_info.cc"],
hdrs = ["device_info.h"],
deps = [
"//tensorflow/lite/delegates/gpu/common:data_type",
"@com_google_absl//absl/strings",
],
)
@ -468,11 +469,11 @@ cc_library(
srcs = ["storage_type_util.cc"],
hdrs = ["storage_type_util.h"],
deps = [
":cl_context",
":cl_device",
":device_info",
":tensor_type",
"//tensorflow/lite/delegates/gpu/common:data_type",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:util",
],
)

View File

@ -43,6 +43,44 @@ std::vector<cl_image_format> GetSupportedImage2DFormats(cl_context context,
return result;
}
bool IsEqualToImageFormat(cl_image_format image_format, DataType data_type,
int num_channels) {
return image_format.image_channel_data_type ==
ToImageChannelType(data_type) &&
image_format.image_channel_order == ToChannelOrder(num_channels);
}
void AddSupportedImageFormats(cl_context context, DeviceInfo* info) {
auto supported_formats =
GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE);
for (auto format : supported_formats) {
info->supports_r_f16_tex2d =
info->supports_r_f16_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT16, 1);
info->supports_rg_f16_tex2d =
info->supports_rg_f16_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT16, 2);
info->supports_rgb_f16_tex2d =
info->supports_rgb_f16_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT16, 3);
info->supports_rgba_f16_tex2d =
info->supports_rgba_f16_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT16, 4);
info->supports_r_f32_tex2d =
info->supports_r_f32_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT32, 1);
info->supports_rg_f32_tex2d =
info->supports_rg_f32_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT32, 2);
info->supports_rgb_f32_tex2d =
info->supports_rgb_f32_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT32, 3);
info->supports_rgba_f32_tex2d =
info->supports_rgba_f32_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT32, 4);
}
}
absl::Status CreateCLContext(const CLDevice& device,
cl_context_properties* properties,
CLContext* result) {
@ -55,6 +93,7 @@ absl::Status CreateCLContext(const CLDevice& device,
absl::StrCat("Failed to create a compute context - ",
CLErrorCodeToString(error_code)));
}
AddSupportedImageFormats(context, &device.info_);
*result = CLContext(context, true);
return absl::OkStatus();

View File

@ -248,24 +248,24 @@ DeviceInfo DeviceInfoFromDeviceID(cl_device_id id) {
}
CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
: id_(id), platform_id_(platform_id), info_(DeviceInfoFromDeviceID(id)) {}
: info_(DeviceInfoFromDeviceID(id)), id_(id), platform_id_(platform_id) {}
CLDevice::CLDevice(const CLDevice& device)
: id_(device.id_), platform_id_(device.platform_id_), info_(device.info_) {}
: info_(device.info_), id_(device.id_), platform_id_(device.platform_id_) {}
CLDevice& CLDevice::operator=(const CLDevice& device) {
if (this != &device) {
info_ = device.info_;
id_ = device.id_;
platform_id_ = device.platform_id_;
info_ = device.info_;
}
return *this;
}
CLDevice::CLDevice(CLDevice&& device)
: id_(device.id_),
platform_id_(device.platform_id_),
info_(std::move(device.info_)) {
: info_(std::move(device.info_)),
id_(device.id_),
platform_id_(device.platform_id_) {
device.id_ = nullptr;
device.platform_id_ = nullptr;
}
@ -274,9 +274,9 @@ CLDevice& CLDevice::operator=(CLDevice&& device) {
if (this != &device) {
id_ = nullptr;
platform_id_ = nullptr;
info_ = std::move(device.info_);
std::swap(id_, device.id_);
std::swap(platform_id_, device.platform_id_);
info_ = std::move(device.info_);
}
return *this;
}
@ -368,7 +368,7 @@ bool CLDevice::IsAMD() const { return info_.IsAMD(); }
bool CLDevice::IsIntel() const { return info_.IsIntel(); }
bool CLDevice::SupportsOneLayerTextureArray() const {
return !IsAdreno() || info_.adreno_info.support_one_layer_texture_array;
return info_.SupportsOneLayerTextureArray();
}
void CLDevice::DisableOneLayerTextureArray() {

View File

@ -46,9 +46,6 @@ class CLDevice {
cl_platform_id platform() const { return platform_id_; }
std::string GetPlatformVersion() const;
const DeviceInfo& GetInfo() const { return info_; }
const DeviceInfo* GetInfoPtr() const { return &info_; }
Vendor vendor() const { return info_.vendor; }
OpenCLVersion cl_version() const { return info_.cl_version; }
bool SupportsFP16() const;
@ -76,10 +73,13 @@ class CLDevice {
bool SupportsOneLayerTextureArray() const;
void DisableOneLayerTextureArray();
// We update device info during context creation, so as supported texture
// formats can be requested from context only.
mutable DeviceInfo info_;
private:
cl_device_id id_ = nullptr;
cl_platform_id platform_id_ = nullptr;
DeviceInfo info_;
};
absl::Status CreateDefaultGPUDevice(CLDevice* result);

View File

@ -78,13 +78,13 @@ std::string CompilerOptionToString(const CLDevice& device,
CompilerOptions option) {
switch (option) {
case CompilerOptions::ADRENO_FULL_SIMD_LINE:
if (device.GetInfo().adreno_info.gpu_version < 500) {
if (device.info_.adreno_info.gpu_version < 500) {
return "-qcom-accelerate-16-bit";
} else {
return "-qcom-accelerate-16-bit=true";
}
case CompilerOptions::ADRENO_MORE_WAVES:
if (device.GetInfo().adreno_info.gpu_version >= 500) {
if (device.info_.adreno_info.gpu_version >= 500) {
return "-qcom-accelerate-16-bit=false";
} else {
return "";

View File

@ -231,6 +231,28 @@ bool DeviceInfo::SupportsImage3D() const {
return supports_image3d_writes;
}
bool DeviceInfo::SupportsFloatImage2D(DataType data_type, int channels) const {
if (channels == 1) {
return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d
: supports_r_f16_tex2d;
} else if (channels == 2) {
return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d
: supports_rg_f16_tex2d;
} else if (channels == 3) {
return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d
: supports_rgb_f16_tex2d;
} else if (channels == 4) {
return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d
: supports_rgba_f16_tex2d;
} else {
return false;
}
}
bool DeviceInfo::SupportsOneLayerTextureArray() const {
return !IsAdreno() || adreno_info.support_one_layer_texture_array;
}
bool DeviceInfo::IsAdreno() const { return vendor == Vendor::kQualcomm; }
bool DeviceInfo::IsAdreno3xx() const {

View File

@ -19,6 +19,8 @@ limitations under the License.
#include <string>
#include <vector>
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
// for use only in device_info.cc, but keep here to make tests
int GetAdrenoGPUVersion(const std::string& gpu_version);
@ -131,6 +133,11 @@ struct DeviceInfo {
bool SupportsImageBuffer() const;
bool SupportsImage3D() const;
bool SupportsFloatImage2D(DataType data_type, int channels) const;
// To track bug on some Adreno. b/131099086
bool SupportsOneLayerTextureArray() const;
std::vector<std::string> extensions;
bool supports_fp16;
bool supports_image3d_writes;
@ -157,6 +164,16 @@ struct DeviceInfo {
bool supports_fp32_rtn;
bool supports_fp16_rtn;
bool supports_r_f16_tex2d = false;
bool supports_rg_f16_tex2d = false;
bool supports_rgb_f16_tex2d = false;
bool supports_rgba_f16_tex2d = false;
bool supports_r_f32_tex2d = false;
bool supports_rg_f32_tex2d = false;
bool supports_rgb_f32_tex2d = false;
bool supports_rgba_f32_tex2d = false;
AdrenoInfo adreno_info;
MaliInfo mali_info;
};

View File

@ -47,7 +47,7 @@ __kernel void main_function(__write_only image2d_array_t dst) {
absl::Status CheckKernelSupportOfOneLayerTextureArray(Environment* env,
bool* result) {
// No bug on Adreno 6xx
if (env->device().GetInfo().adreno_info.gpu_version >= 600) {
if (env->device().info_.adreno_info.gpu_version >= 600) {
*result = true;
return absl::OkStatus();
}
@ -242,7 +242,7 @@ TensorStorageType GetFastestStorageType(const CLDevice& gpu) {
} else if (gpu.IsPowerVR()) {
return TensorStorageType::TEXTURE_2D;
} else if (gpu.IsMali()) {
const MaliInfo mali_info = gpu.GetInfo().mali_info;
const MaliInfo mali_info = gpu.info_.mali_info;
if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() ||
mali_info.IsValhall()) {
return TensorStorageType::TEXTURE_2D;

View File

@ -203,7 +203,7 @@ absl::Status InferenceContext::InitFromGraph(
TuningParameters tuning_parameters;
tuning_parameters.queue = env->profiling_queue();
tuning_parameters.info = env->device().GetInfoPtr();
tuning_parameters.info = &env->device().info_;
if (create_info.hints.Check(ModelHints::kFastTuning)) {
tuning_parameters.tuning_type = TuningType::FAST;
}
@ -244,14 +244,13 @@ void InferenceContext::ReserveGraphTensors(
if (graph.IsGraphInput(t->id) || graph.IsGraphOutput(t->id)) {
if (shape.c < 4 &&
CanCreateTensorWithShape(
*creation_context.context, *creation_context.device, shape,
creation_context.device->info_, shape,
TensorDescriptor{data_type, TensorStorageType::SINGLE_TEXTURE_2D,
layout})) {
storage_type = TensorStorageType::SINGLE_TEXTURE_2D;
}
}
storage_type = SelectBestStorageType(*creation_context.context,
*creation_context.device, shape,
storage_type = SelectBestStorageType(creation_context.device->info_, shape,
storage_type, data_type, layout);
tensor_reserver_.Add(
t->id, {shape, TensorDescriptor{data_type, storage_type, layout}});

View File

@ -93,7 +93,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device,
}
bool can_use_flt8 = (shape.w * shape.b) % 2 == 0 &&
definition.precision != CalculationsPrecision::F32;
bool is_midgard = device.IsMali() && device.GetInfo().mali_info.IsMidgard();
bool is_midgard = device.IsMali() && device.info_.mali_info.IsMidgard();
if (is_midgard) {
if (can_use_flt8) {
conv_params.element_size = 8;
@ -141,7 +141,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device,
conv_params.element_size = 4;
conv_params.block_size = int3(1, 1, 1);
if (device.IsMali() && definition.precision == CalculationsPrecision::F16 &&
device.GetInfo().compute_units_count <= 4) {
device.info_.compute_units_count <= 4) {
conv_params.block_size.x *= 2;
}
return conv_params;

View File

@ -271,7 +271,7 @@ bool IsConvConstantsSupported(const CLDevice& device,
? sizeof(float)
: sizeof(half);
const int filters_buffer_size = filters_count * float_size;
const int kConstantMaxSize = GetOptimalMaxConstantSize(device.GetInfo());
const int kConstantMaxSize = GetOptimalMaxConstantSize(device.info_);
const int flt4_registers = DivideRoundUp(w_shape.o, 4);
return filters_buffer_size <= kConstantMaxSize && flt4_registers <= 8;
}
@ -283,7 +283,7 @@ absl::Status CreateConvConstants(const CreationContext& creation_context,
if (!IsConvConstantsSupported(*creation_context.device, definition, attr)) {
return absl::InvalidArgumentError("ConvConstants doesn't supported");
}
*result = ConvConstants(definition, attr, creation_context.device->GetInfo());
*result = ConvConstants(definition, attr, creation_context.device->info_);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));

View File

@ -718,7 +718,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
if (dst_shape) {
int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth;
float task_size_per_cu =
static_cast<float>(task_size) / device.GetInfo().compute_units_count;
static_cast<float>(task_size) / device.info_.compute_units_count;
int block_size = conv_params.block_size.x * conv_params.block_size.y *
conv_params.block_size.z;
float threads_per_cu = task_size_per_cu / block_size;
@ -844,7 +844,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
conv_params.block_size = int3(1, 1, 1);
}
conv_params.src_depth_loop_size = 1;
MaliInfo mali_info = device.GetInfo().mali_info;
MaliInfo mali_info = device.info_.mali_info;
if (src_depth % 2 == 0 && block_size <= 2 && !mali_info.IsMidgard()) {
conv_params.src_depth_loop_size = 2;
}
@ -987,7 +987,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context,
const Convolution2DAttributes& attr,
ConvPowerVR* result, const BHWC* dst_shape) {
*result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape);
result->GenerateCode(creation_context.device->GetInfo());
result->GenerateCode(creation_context.device->info_);
return result->UploadData(attr.weights, attr.bias, creation_context.context);
}
@ -996,7 +996,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context,
const FullyConnectedAttributes& attr,
ConvPowerVR* result, const BHWC* dst_shape) {
*result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape);
result->GenerateCode(creation_context.device->GetInfo());
result->GenerateCode(creation_context.device->info_);
return result->UploadData(attr.weights, attr.bias, creation_context.context);
}
@ -1006,7 +1006,7 @@ absl::Status CreateConvPowerVRDynamicWeights(
ConvPowerVR* result, const BHWC* dst_shape) {
*result = ConvPowerVR(definition, attr, weights_shape,
*creation_context.device, dst_shape);
result->GenerateCode(creation_context.device->GetInfo());
result->GenerateCode(creation_context.device->info_);
return result->UploadBias(attr.bias, creation_context.context);
}
@ -1017,7 +1017,7 @@ absl::Status CreateConvPowerVRWino4x4To6x6(
*result = ConvPowerVR(definition);
result->conv_params_ = result->GuessBestParamsWinograd(
*creation_context.device, definition, attr, dst_shape);
result->GenerateCode(creation_context.device->GetInfo());
result->GenerateCode(creation_context.device->info_);
return result->UploadDataForWinograd4x4To6x6(
attr.weights, *creation_context.device, creation_context.context);
}

View File

@ -430,7 +430,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context,
const Convolution2DAttributes& attr,
ConvTexture* result) {
*result = ConvTexture(definition, attr);
result->GenerateCode(creation_context.device->GetInfo());
result->GenerateCode(creation_context.device->info_);
return result->UploadData(attr.weights, attr.bias, creation_context.context);
}
@ -439,7 +439,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context,
const FullyConnectedAttributes& attr,
ConvTexture* result) {
*result = ConvTexture(definition);
result->GenerateCode(creation_context.device->GetInfo());
result->GenerateCode(creation_context.device->info_);
return result->UploadData(attr.weights, attr.bias, creation_context.context);
}
@ -449,7 +449,7 @@ absl::Status CreateConvTextureWino4x4To6x6(
*result = ConvTexture(definition);
result->different_weights_for_height_ = true;
result->block_size_ = {4, 1, 2};
result->GenerateCode(creation_context.device->GetInfo());
result->GenerateCode(creation_context.device->info_);
return result->UploadDataForWinograd4x4To6x6(
attr.weights, *creation_context.device, creation_context.context);
}

View File

@ -152,8 +152,8 @@ __kernel void from_tensor()" +
context_ = &environment->context();
shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h,
input_def.dimensions.w, input_def.dimensions.c);
RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().GetInfo(), {},
&shader_src));
RETURN_IF_ERROR(
args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
return environment->program_cache()->GetOrCreateCLKernel(
shader_src, "from_tensor", environment->context(),
environment->device(), &kernel_);
@ -272,8 +272,8 @@ __kernel void to_tensor()" +
context_ = &environment->context();
shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h,
output_def.dimensions.w, output_def.dimensions.c);
RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().GetInfo(), {},
&shader_src));
RETURN_IF_ERROR(
args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
return environment->program_cache()->GetOrCreateCLKernel(
shader_src, "to_tensor", environment->context(), environment->device(),
&kernel_);

View File

@ -360,8 +360,8 @@ absl::Status CreateConvolutionTransposed(
const CreationContext& creation_context, const OperationDef& definition,
const ConvolutionTransposedAttributes& attr,
ConvolutionTransposed* result) {
*result = ConvolutionTransposed(definition, attr,
creation_context.device->GetInfo());
*result =
ConvolutionTransposed(definition, attr, creation_context.device->info_);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));

View File

@ -175,7 +175,7 @@ absl::Status CreateConvolutionTransposedThin(
"ConvolutionTransposedThin doesn't support this attributes");
}
*result = ConvolutionTransposedThin(definition, attr,
creation_context.device->GetInfo());
creation_context.device->info_);
RETURN_IF_ERROR(
result->UploadData(attr.weights, attr.bias, creation_context.context));
return absl::OkStatus();

View File

@ -330,7 +330,7 @@ absl::Status CreateDepthwiseConv3x3(
bool local_mem_uploads =
weights_are_buffer && creation_context.device->IsPowerVR();
*result = DepthwiseConv3x3(definition, weights_are_buffer, local_mem_uploads,
creation_context.device->GetInfo());
creation_context.device->info_);
return result->UploadWeightsAndBiases(attr.weights, attr.bias,
creation_context.context);
}

View File

@ -166,10 +166,9 @@ absl::Status CreateElementwiseTwoInput(
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& constant_tensor,
bool swap_inputs, GPUOperation* result) {
const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v);
TensorStorageType storage_type =
SelectBestStorageType(*creation_context.context, *creation_context.device,
shape, definition.GetPrimaryStorageType(),
definition.GetDataType(), Layout::HWC);
TensorStorageType storage_type = SelectBestStorageType(
creation_context.device->info_, shape, definition.GetPrimaryStorageType(),
definition.GetDataType(), Layout::HWC);
TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
Tensor gpu_tensor;
RETURN_IF_ERROR(CreateTensor(*creation_context.context,
@ -205,10 +204,9 @@ absl::Status CreateElementwiseTwoInput(
bool swap_inputs, GPUOperation* result) {
const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w,
constant_tensor.shape.c);
TensorStorageType storage_type =
SelectBestStorageType(*creation_context.context, *creation_context.device,
shape, definition.GetPrimaryStorageType(),
definition.GetDataType(), Layout::HWC);
TensorStorageType storage_type = SelectBestStorageType(
creation_context.device->info_, shape, definition.GetPrimaryStorageType(),
definition.GetDataType(), Layout::HWC);
TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
Tensor gpu_tensor;
RETURN_IF_ERROR(CreateTensor(*creation_context.context,

View File

@ -114,7 +114,7 @@ absl::Status CreateFullyConnected(const CreationContext& creation_context,
const OperationDef& definition,
const FullyConnectedAttributes& attr,
FullyConnected* result) {
*result = FullyConnected(definition, creation_context.device->GetInfo());
*result = FullyConnected(definition, creation_context.device->info_);
RETURN_IF_ERROR(
result->UploadWeights(attr.weights, creation_context.context));

View File

@ -227,7 +227,7 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
RETURN_IF_ERROR(
MergeOperations(linked_operations_, &args_, &element_wise_code));
RETURN_IF_ERROR(args_.TransformToCLCode(
creation_context.device->GetInfo(),
creation_context.device->info_,
{{dst_tensors_names_[0], element_wise_code}}, &code));
code = absl::Substitute(code, args_.GetListOfArgs());
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
@ -238,13 +238,13 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
RETURN_IF_ERROR(
MergeOperations(linked_operations_, &args_, &element_wise_code));
RETURN_IF_ERROR(args_.TransformToCLCode(
creation_context.device->GetInfo(),
creation_context.device->info_,
{{dst_tensors_names_[0], element_wise_code}}, &code_));
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
code_, "main_function", compiler_options_, *creation_context.context,
*creation_context.device, &kernel_));
}
return PostCompileCheck(creation_context.device->GetInfo());
return PostCompileCheck(creation_context.device->info_);
}
int3 GPUOperation::GetGridSize() const {

View File

@ -117,7 +117,7 @@ int GetRecommendedBlockSizeForConv(const CLDevice& device,
CalculationsPrecision precision,
int task_size) {
const float task_size_per_cu =
task_size / static_cast<float>(device.GetInfo().compute_units_count);
task_size / static_cast<float>(device.info_.compute_units_count);
int block_size = 1;
float threshold_1 = FLT_MAX;
float threshold_2 = FLT_MAX;
@ -125,7 +125,7 @@ int GetRecommendedBlockSizeForConv(const CLDevice& device,
if (!device.IsMali()) {
return 1;
}
MaliInfo mali_info = device.GetInfo().mali_info;
MaliInfo mali_info = device.info_.mali_info;
switch (precision) {
case CalculationsPrecision::F16:
if (mali_info.IsBifrostGen1()) {

View File

@ -303,7 +303,7 @@ absl::Status CreateWinograd4x4To36(const CreationContext& creation_context,
const Padding2D& padding,
Winograd4x4To36* result) {
*result =
Winograd4x4To36(definition, padding, creation_context.device->GetInfo());
Winograd4x4To36(definition, padding, creation_context.device->info_);
return result->UploadBt(creation_context.context);
}
@ -502,7 +502,7 @@ absl::Status CreateWinograd36To4x4(
const CreationContext& creation_context, const OperationDef& definition,
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases,
Winograd36To4x4* result) {
*result = Winograd36To4x4(definition, creation_context.device->GetInfo());
*result = Winograd36To4x4(definition, creation_context.device->info_);
TensorLinearDescriptor desc;
desc.storage_type = LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();

View File

@ -167,7 +167,7 @@ absl::Status SelectConvolution(const Convolution2DAttributes& attr,
const CreationContext& creation_context,
const OperationDef& op_def, ModelHints hints,
std::unique_ptr<GPUOperation>* ptr) {
const auto& device_info = creation_context.device->GetInfo();
const auto& device_info = creation_context.device->info_;
if (device_info.IsAdreno()) {
return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def,
hints, ptr);
@ -190,7 +190,7 @@ absl::Status SelectConvolutionForWinograd(
const Convolution2DAttributes& attr, const BHWC& dst_shape,
const CreationContext& creation_context, const OperationDef& op_def,
ModelHints hints, std::unique_ptr<GPUOperation>* ptr) {
const auto& device_info = creation_context.device->GetInfo();
const auto& device_info = creation_context.device->info_;
if (device_info.IsAdreno()) {
return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context,
op_def, hints, ptr);
@ -215,7 +215,7 @@ absl::Status SelectConvolutionWithDynamicWeights(
const BHWC& dst_shape, const CreationContext& creation_context,
const OperationDef& op_def, ModelHints hints,
std::unique_ptr<GPUOperation>* ptr, ConvWeightsDescription* weights_desc) {
const auto& device_info = creation_context.device->GetInfo();
const auto& device_info = creation_context.device->info_;
if (device_info.IsAdreno()) {
return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape,
creation_context, op_def,

View File

@ -105,7 +105,7 @@ absl::Status SelectConvolutionTransposed(
const ConvolutionTransposedAttributes& attr,
const CreationContext& creation_context, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
const auto& device_info = creation_context.device->GetInfo();
const auto& device_info = creation_context.device->info_;
if (device_info.IsAdreno()) {
return SelectConvolutionTransposedAdreno(attr, creation_context, op_def,
ptr);

View File

@ -69,7 +69,7 @@ absl::Status SelectDWConvolutionMali(
const auto storage_type = op_def.src_tensors[0].storage_type;
bool buffer_type = storage_type == TensorStorageType::BUFFER ||
storage_type == TensorStorageType::IMAGE_BUFFER;
MaliInfo mali_info = creation_context.device->GetInfo().mali_info;
MaliInfo mali_info = creation_context.device->info_.mali_info;
if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() &&
!buffer_type && op_def.precision != CalculationsPrecision::F32) {
DepthwiseConv3x3 dw_conv;
@ -90,7 +90,7 @@ absl::Status SelectDWConvolution(const DepthwiseConvolution2DAttributes& attr,
const CreationContext& creation_context,
const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
const auto& device_info = creation_context.device->GetInfo();
const auto& device_info = creation_context.device->info_;
if (device_info.IsAdreno()) {
return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr);
} else if (device_info.IsPowerVR()) {

View File

@ -104,7 +104,7 @@ absl::Status SelectFullyConnected(const FullyConnectedAttributes& attr,
const CreationContext& creation_context,
const OperationDef& op_def, int batch_size,
std::unique_ptr<GPUOperation>* ptr) {
const auto& device_info = creation_context.device->GetInfo();
const auto& device_info = creation_context.device->info_;
if (device_info.IsAdreno()) {
return SelectFullyConnectedAdreno(attr, creation_context, op_def,
batch_size, ptr);

View File

@ -75,14 +75,14 @@ absl::Status WinogradFromNode(const CreationContext& creation_context,
const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c};
TensorDescriptor td_0;
td_0.storage_type = SelectBestStorageType(
*creation_context.context, *creation_context.device, shape_0,
creation_context.device->info_, shape_0,
op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type,
op_def.src_tensors[0].layout);
td_0.data_type = op_def.src_tensors[0].data_type;
td_0.layout = op_def.src_tensors[0].layout;
TensorDescriptor td_1;
td_1.storage_type = SelectBestStorageType(
*creation_context.context, *creation_context.device, shape_1,
creation_context.device->info_, shape_1,
op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type,
op_def.src_tensors[0].layout);
td_1.data_type = op_def.src_tensors[0].data_type;
@ -175,7 +175,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
channels[i] = inputs[i]->tensor.shape.c;
}
return SelectConcat(attr, channels, op_def,
creation_context.device->GetInfo(), gpu_op);
creation_context.device->info_, gpu_op);
}
case OperationType::CONVOLUTION_2D: {
auto attr =
@ -248,7 +248,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
inputs[0]->tensor.shape.b, gpu_op);
}
case OperationType::LSTM: {
SelectLSTM(op_def, creation_context.device->GetInfo(), gpu_op);
SelectLSTM(op_def, creation_context.device->info_, gpu_op);
return absl::OkStatus();
}
case OperationType::MAX_UNPOOLING_2D: {
@ -259,8 +259,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
}
case OperationType::MEAN: {
auto attr = absl::any_cast<MeanAttributes>(node.operation.attributes);
return SelectMean(attr, op_def, creation_context.device->GetInfo(),
gpu_op);
return SelectMean(attr, op_def, creation_context.device->info_, gpu_op);
}
case OperationType::MEAN_STDDEV_NORMALIZATION: {
MeanStdDevNormalization operation = CreateMeanStdDevNormalization(op_def);

View File

@ -15,18 +15,16 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/util.h"
namespace tflite {
namespace gpu {
namespace cl {
bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
const BHWDC& shape,
bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape,
const TensorDescriptor& descriptor) {
const int slices = DivideRoundUp(shape.c, 4);
switch (descriptor.storage_type) {
@ -35,64 +33,60 @@ bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2);
const int buffer_size =
shape.b * shape.w * shape.h * shape.d * slices * flt4_size;
return buffer_size <= device.GetInfo().buffer_max_size;
return buffer_size <= device_info.buffer_max_size;
}
case TensorStorageType::IMAGE_BUFFER:
return shape.b * shape.w * shape.h * shape.d * slices <=
device.GetInfo().image_buffer_max_size;
device_info.image_buffer_max_size;
case TensorStorageType::TEXTURE_3D:
if (device.cl_version() < OpenCLVersion::CL_1_2 && slices == 1) {
if (device_info.cl_version < OpenCLVersion::CL_1_2 && slices == 1) {
// clCreateImage3D (that used in CL 1.0/1.1) can not create image with
// depth = 1 by specification;
return false;
}
return shape.w * shape.b <= device.GetInfo().image3d_max_width &&
shape.h <= device.GetInfo().image3d_max_height &&
slices * shape.d <= device.GetInfo().image3d_max_depth;
return shape.w * shape.b <= device_info.image3d_max_width &&
shape.h <= device_info.image3d_max_height &&
slices * shape.d <= device_info.image3d_max_depth;
case TensorStorageType::TEXTURE_ARRAY:
// Bug on some Adreno. b/131099086
if (slices == 1 && !device.SupportsOneLayerTextureArray()) {
if (slices == 1 && !device_info.SupportsOneLayerTextureArray()) {
return false;
}
return shape.w * shape.b <= device.GetInfo().image2d_max_width &&
shape.h <= device.GetInfo().image2d_max_height &&
slices * shape.d <= device.GetInfo().image_array_max_layers;
return shape.w * shape.b <= device_info.image2d_max_width &&
shape.h <= device_info.image2d_max_height &&
slices * shape.d <= device_info.image_array_max_layers;
case TensorStorageType::TEXTURE_2D:
return shape.w * shape.b * shape.d <=
device.GetInfo().image2d_max_width &&
shape.h * slices <= device.GetInfo().image2d_max_height;
return shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
shape.h * slices <= device_info.image2d_max_height;
case TensorStorageType::SINGLE_TEXTURE_2D:
return shape.c <= 4 &&
context.IsFloatTexture2DSupported(shape.c, descriptor.data_type) &&
shape.w * shape.b * shape.d <=
device.GetInfo().image2d_max_width &&
shape.h <= device.GetInfo().image2d_max_height;
device_info.SupportsFloatImage2D(descriptor.data_type, shape.c) &&
shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
shape.h <= device_info.image2d_max_height;
default:
return false;
}
}
bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
const BHWC& shape,
bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape,
const TensorDescriptor& descriptor) {
const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
return CanCreateTensorWithShape(context, device, shape5D, descriptor);
return CanCreateTensorWithShape(device_info, shape5D, descriptor);
}
TensorStorageType SelectBestStorageType(const CLContext& context,
const CLDevice& device,
TensorStorageType SelectBestStorageType(const DeviceInfo& device_info,
const BHWC& shape,
const TensorStorageType& desired,
const DataType& data_type,
const Layout& layout) {
if (CanCreateTensorWithShape(context, device, shape,
if (CanCreateTensorWithShape(device_info, shape,
TensorDescriptor{data_type, desired, layout})) {
return desired;
}
auto GetBestTypeAfterTextureArray = [&]() {
if (device.SupportsImageBuffer() &&
if (device_info.SupportsImageBuffer() &&
CanCreateTensorWithShape(
context, device, shape,
device_info, shape,
TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER,
layout})) {
return TensorStorageType::IMAGE_BUFFER;
@ -101,9 +95,9 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
}
};
auto GetBestTypeAfterTexture2D = [&]() {
if (device.SupportsTextureArray() &&
if (device_info.SupportsTextureArray() &&
CanCreateTensorWithShape(
context, device, shape,
device_info, shape,
TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY,
layout})) {
return TensorStorageType::TEXTURE_ARRAY;
@ -113,7 +107,7 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
};
auto GetBestTypeAfterTexture3D = [&]() {
if (CanCreateTensorWithShape(
context, device, shape,
device_info, shape,
TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D,
layout})) {
return TensorStorageType::TEXTURE_2D;

View File

@ -16,8 +16,7 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
@ -26,16 +25,13 @@ namespace tflite {
namespace gpu {
namespace cl {
bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
const BHWDC& shape,
bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape,
const TensorDescriptor& descriptor);
bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
const BHWC& shape,
bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape,
const TensorDescriptor& descriptor);
TensorStorageType SelectBestStorageType(const CLContext& context,
const CLDevice& device,
TensorStorageType SelectBestStorageType(const DeviceInfo& device_info,
const BHWC& shape,
const TensorStorageType& desired,
const DataType& data_type,