Added info about supported image formats to DeviceInfo.
storage_type_util cleaned from OpenCL API calls/structs. PiperOrigin-RevId: 325347475 Change-Id: I096636e4dd837ef9754df70caf37842c605c24f5
This commit is contained in:
parent
d9ea505110
commit
235dbc2dc2
@ -257,6 +257,7 @@ cc_library(
|
||||
srcs = ["device_info.cc"],
|
||||
hdrs = ["device_info.h"],
|
||||
deps = [
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
@ -468,11 +469,11 @@ cc_library(
|
||||
srcs = ["storage_type_util.cc"],
|
||||
hdrs = ["storage_type_util.h"],
|
||||
deps = [
|
||||
":cl_context",
|
||||
":cl_device",
|
||||
":device_info",
|
||||
":tensor_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -43,6 +43,44 @@ std::vector<cl_image_format> GetSupportedImage2DFormats(cl_context context,
|
||||
return result;
|
||||
}
|
||||
|
||||
bool IsEqualToImageFormat(cl_image_format image_format, DataType data_type,
|
||||
int num_channels) {
|
||||
return image_format.image_channel_data_type ==
|
||||
ToImageChannelType(data_type) &&
|
||||
image_format.image_channel_order == ToChannelOrder(num_channels);
|
||||
}
|
||||
|
||||
void AddSupportedImageFormats(cl_context context, DeviceInfo* info) {
|
||||
auto supported_formats =
|
||||
GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE);
|
||||
for (auto format : supported_formats) {
|
||||
info->supports_r_f16_tex2d =
|
||||
info->supports_r_f16_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT16, 1);
|
||||
info->supports_rg_f16_tex2d =
|
||||
info->supports_rg_f16_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT16, 2);
|
||||
info->supports_rgb_f16_tex2d =
|
||||
info->supports_rgb_f16_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT16, 3);
|
||||
info->supports_rgba_f16_tex2d =
|
||||
info->supports_rgba_f16_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT16, 4);
|
||||
info->supports_r_f32_tex2d =
|
||||
info->supports_r_f32_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT32, 1);
|
||||
info->supports_rg_f32_tex2d =
|
||||
info->supports_rg_f32_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT32, 2);
|
||||
info->supports_rgb_f32_tex2d =
|
||||
info->supports_rgb_f32_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT32, 3);
|
||||
info->supports_rgba_f32_tex2d =
|
||||
info->supports_rgba_f32_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT32, 4);
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status CreateCLContext(const CLDevice& device,
|
||||
cl_context_properties* properties,
|
||||
CLContext* result) {
|
||||
@ -55,6 +93,7 @@ absl::Status CreateCLContext(const CLDevice& device,
|
||||
absl::StrCat("Failed to create a compute context - ",
|
||||
CLErrorCodeToString(error_code)));
|
||||
}
|
||||
AddSupportedImageFormats(context, &device.info_);
|
||||
|
||||
*result = CLContext(context, true);
|
||||
return absl::OkStatus();
|
||||
|
@ -248,24 +248,24 @@ DeviceInfo DeviceInfoFromDeviceID(cl_device_id id) {
|
||||
}
|
||||
|
||||
CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
|
||||
: id_(id), platform_id_(platform_id), info_(DeviceInfoFromDeviceID(id)) {}
|
||||
: info_(DeviceInfoFromDeviceID(id)), id_(id), platform_id_(platform_id) {}
|
||||
|
||||
CLDevice::CLDevice(const CLDevice& device)
|
||||
: id_(device.id_), platform_id_(device.platform_id_), info_(device.info_) {}
|
||||
: info_(device.info_), id_(device.id_), platform_id_(device.platform_id_) {}
|
||||
|
||||
CLDevice& CLDevice::operator=(const CLDevice& device) {
|
||||
if (this != &device) {
|
||||
info_ = device.info_;
|
||||
id_ = device.id_;
|
||||
platform_id_ = device.platform_id_;
|
||||
info_ = device.info_;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
CLDevice::CLDevice(CLDevice&& device)
|
||||
: id_(device.id_),
|
||||
platform_id_(device.platform_id_),
|
||||
info_(std::move(device.info_)) {
|
||||
: info_(std::move(device.info_)),
|
||||
id_(device.id_),
|
||||
platform_id_(device.platform_id_) {
|
||||
device.id_ = nullptr;
|
||||
device.platform_id_ = nullptr;
|
||||
}
|
||||
@ -274,9 +274,9 @@ CLDevice& CLDevice::operator=(CLDevice&& device) {
|
||||
if (this != &device) {
|
||||
id_ = nullptr;
|
||||
platform_id_ = nullptr;
|
||||
info_ = std::move(device.info_);
|
||||
std::swap(id_, device.id_);
|
||||
std::swap(platform_id_, device.platform_id_);
|
||||
info_ = std::move(device.info_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
@ -368,7 +368,7 @@ bool CLDevice::IsAMD() const { return info_.IsAMD(); }
|
||||
bool CLDevice::IsIntel() const { return info_.IsIntel(); }
|
||||
|
||||
bool CLDevice::SupportsOneLayerTextureArray() const {
|
||||
return !IsAdreno() || info_.adreno_info.support_one_layer_texture_array;
|
||||
return info_.SupportsOneLayerTextureArray();
|
||||
}
|
||||
|
||||
void CLDevice::DisableOneLayerTextureArray() {
|
||||
|
@ -46,9 +46,6 @@ class CLDevice {
|
||||
cl_platform_id platform() const { return platform_id_; }
|
||||
std::string GetPlatformVersion() const;
|
||||
|
||||
const DeviceInfo& GetInfo() const { return info_; }
|
||||
const DeviceInfo* GetInfoPtr() const { return &info_; }
|
||||
|
||||
Vendor vendor() const { return info_.vendor; }
|
||||
OpenCLVersion cl_version() const { return info_.cl_version; }
|
||||
bool SupportsFP16() const;
|
||||
@ -76,10 +73,13 @@ class CLDevice {
|
||||
bool SupportsOneLayerTextureArray() const;
|
||||
void DisableOneLayerTextureArray();
|
||||
|
||||
// We update device info during context creation, so as supported texture
|
||||
// formats can be requested from context only.
|
||||
mutable DeviceInfo info_;
|
||||
|
||||
private:
|
||||
cl_device_id id_ = nullptr;
|
||||
cl_platform_id platform_id_ = nullptr;
|
||||
DeviceInfo info_;
|
||||
};
|
||||
|
||||
absl::Status CreateDefaultGPUDevice(CLDevice* result);
|
||||
|
@ -78,13 +78,13 @@ std::string CompilerOptionToString(const CLDevice& device,
|
||||
CompilerOptions option) {
|
||||
switch (option) {
|
||||
case CompilerOptions::ADRENO_FULL_SIMD_LINE:
|
||||
if (device.GetInfo().adreno_info.gpu_version < 500) {
|
||||
if (device.info_.adreno_info.gpu_version < 500) {
|
||||
return "-qcom-accelerate-16-bit";
|
||||
} else {
|
||||
return "-qcom-accelerate-16-bit=true";
|
||||
}
|
||||
case CompilerOptions::ADRENO_MORE_WAVES:
|
||||
if (device.GetInfo().adreno_info.gpu_version >= 500) {
|
||||
if (device.info_.adreno_info.gpu_version >= 500) {
|
||||
return "-qcom-accelerate-16-bit=false";
|
||||
} else {
|
||||
return "";
|
||||
|
@ -231,6 +231,28 @@ bool DeviceInfo::SupportsImage3D() const {
|
||||
return supports_image3d_writes;
|
||||
}
|
||||
|
||||
bool DeviceInfo::SupportsFloatImage2D(DataType data_type, int channels) const {
|
||||
if (channels == 1) {
|
||||
return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d
|
||||
: supports_r_f16_tex2d;
|
||||
} else if (channels == 2) {
|
||||
return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d
|
||||
: supports_rg_f16_tex2d;
|
||||
} else if (channels == 3) {
|
||||
return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d
|
||||
: supports_rgb_f16_tex2d;
|
||||
} else if (channels == 4) {
|
||||
return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d
|
||||
: supports_rgba_f16_tex2d;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool DeviceInfo::SupportsOneLayerTextureArray() const {
|
||||
return !IsAdreno() || adreno_info.support_one_layer_texture_array;
|
||||
}
|
||||
|
||||
bool DeviceInfo::IsAdreno() const { return vendor == Vendor::kQualcomm; }
|
||||
|
||||
bool DeviceInfo::IsAdreno3xx() const {
|
||||
|
@ -19,6 +19,8 @@ limitations under the License.
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
|
||||
// for use only in device_info.cc, but keep here to make tests
|
||||
int GetAdrenoGPUVersion(const std::string& gpu_version);
|
||||
|
||||
@ -131,6 +133,11 @@ struct DeviceInfo {
|
||||
bool SupportsImageBuffer() const;
|
||||
bool SupportsImage3D() const;
|
||||
|
||||
bool SupportsFloatImage2D(DataType data_type, int channels) const;
|
||||
|
||||
// To track bug on some Adreno. b/131099086
|
||||
bool SupportsOneLayerTextureArray() const;
|
||||
|
||||
std::vector<std::string> extensions;
|
||||
bool supports_fp16;
|
||||
bool supports_image3d_writes;
|
||||
@ -157,6 +164,16 @@ struct DeviceInfo {
|
||||
bool supports_fp32_rtn;
|
||||
bool supports_fp16_rtn;
|
||||
|
||||
bool supports_r_f16_tex2d = false;
|
||||
bool supports_rg_f16_tex2d = false;
|
||||
bool supports_rgb_f16_tex2d = false;
|
||||
bool supports_rgba_f16_tex2d = false;
|
||||
|
||||
bool supports_r_f32_tex2d = false;
|
||||
bool supports_rg_f32_tex2d = false;
|
||||
bool supports_rgb_f32_tex2d = false;
|
||||
bool supports_rgba_f32_tex2d = false;
|
||||
|
||||
AdrenoInfo adreno_info;
|
||||
MaliInfo mali_info;
|
||||
};
|
||||
|
@ -47,7 +47,7 @@ __kernel void main_function(__write_only image2d_array_t dst) {
|
||||
absl::Status CheckKernelSupportOfOneLayerTextureArray(Environment* env,
|
||||
bool* result) {
|
||||
// No bug on Adreno 6xx
|
||||
if (env->device().GetInfo().adreno_info.gpu_version >= 600) {
|
||||
if (env->device().info_.adreno_info.gpu_version >= 600) {
|
||||
*result = true;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
@ -242,7 +242,7 @@ TensorStorageType GetFastestStorageType(const CLDevice& gpu) {
|
||||
} else if (gpu.IsPowerVR()) {
|
||||
return TensorStorageType::TEXTURE_2D;
|
||||
} else if (gpu.IsMali()) {
|
||||
const MaliInfo mali_info = gpu.GetInfo().mali_info;
|
||||
const MaliInfo mali_info = gpu.info_.mali_info;
|
||||
if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() ||
|
||||
mali_info.IsValhall()) {
|
||||
return TensorStorageType::TEXTURE_2D;
|
||||
|
@ -203,7 +203,7 @@ absl::Status InferenceContext::InitFromGraph(
|
||||
|
||||
TuningParameters tuning_parameters;
|
||||
tuning_parameters.queue = env->profiling_queue();
|
||||
tuning_parameters.info = env->device().GetInfoPtr();
|
||||
tuning_parameters.info = &env->device().info_;
|
||||
if (create_info.hints.Check(ModelHints::kFastTuning)) {
|
||||
tuning_parameters.tuning_type = TuningType::FAST;
|
||||
}
|
||||
@ -244,14 +244,13 @@ void InferenceContext::ReserveGraphTensors(
|
||||
if (graph.IsGraphInput(t->id) || graph.IsGraphOutput(t->id)) {
|
||||
if (shape.c < 4 &&
|
||||
CanCreateTensorWithShape(
|
||||
*creation_context.context, *creation_context.device, shape,
|
||||
creation_context.device->info_, shape,
|
||||
TensorDescriptor{data_type, TensorStorageType::SINGLE_TEXTURE_2D,
|
||||
layout})) {
|
||||
storage_type = TensorStorageType::SINGLE_TEXTURE_2D;
|
||||
}
|
||||
}
|
||||
storage_type = SelectBestStorageType(*creation_context.context,
|
||||
*creation_context.device, shape,
|
||||
storage_type = SelectBestStorageType(creation_context.device->info_, shape,
|
||||
storage_type, data_type, layout);
|
||||
tensor_reserver_.Add(
|
||||
t->id, {shape, TensorDescriptor{data_type, storage_type, layout}});
|
||||
|
@ -93,7 +93,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device,
|
||||
}
|
||||
bool can_use_flt8 = (shape.w * shape.b) % 2 == 0 &&
|
||||
definition.precision != CalculationsPrecision::F32;
|
||||
bool is_midgard = device.IsMali() && device.GetInfo().mali_info.IsMidgard();
|
||||
bool is_midgard = device.IsMali() && device.info_.mali_info.IsMidgard();
|
||||
if (is_midgard) {
|
||||
if (can_use_flt8) {
|
||||
conv_params.element_size = 8;
|
||||
@ -141,7 +141,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device,
|
||||
conv_params.element_size = 4;
|
||||
conv_params.block_size = int3(1, 1, 1);
|
||||
if (device.IsMali() && definition.precision == CalculationsPrecision::F16 &&
|
||||
device.GetInfo().compute_units_count <= 4) {
|
||||
device.info_.compute_units_count <= 4) {
|
||||
conv_params.block_size.x *= 2;
|
||||
}
|
||||
return conv_params;
|
||||
|
@ -271,7 +271,7 @@ bool IsConvConstantsSupported(const CLDevice& device,
|
||||
? sizeof(float)
|
||||
: sizeof(half);
|
||||
const int filters_buffer_size = filters_count * float_size;
|
||||
const int kConstantMaxSize = GetOptimalMaxConstantSize(device.GetInfo());
|
||||
const int kConstantMaxSize = GetOptimalMaxConstantSize(device.info_);
|
||||
const int flt4_registers = DivideRoundUp(w_shape.o, 4);
|
||||
return filters_buffer_size <= kConstantMaxSize && flt4_registers <= 8;
|
||||
}
|
||||
@ -283,7 +283,7 @@ absl::Status CreateConvConstants(const CreationContext& creation_context,
|
||||
if (!IsConvConstantsSupported(*creation_context.device, definition, attr)) {
|
||||
return absl::InvalidArgumentError("ConvConstants doesn't supported");
|
||||
}
|
||||
*result = ConvConstants(definition, attr, creation_context.device->GetInfo());
|
||||
*result = ConvConstants(definition, attr, creation_context.device->info_);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadWeights(attr.weights, creation_context.context));
|
||||
|
||||
|
@ -718,7 +718,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
|
||||
if (dst_shape) {
|
||||
int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth;
|
||||
float task_size_per_cu =
|
||||
static_cast<float>(task_size) / device.GetInfo().compute_units_count;
|
||||
static_cast<float>(task_size) / device.info_.compute_units_count;
|
||||
int block_size = conv_params.block_size.x * conv_params.block_size.y *
|
||||
conv_params.block_size.z;
|
||||
float threads_per_cu = task_size_per_cu / block_size;
|
||||
@ -844,7 +844,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
|
||||
conv_params.block_size = int3(1, 1, 1);
|
||||
}
|
||||
conv_params.src_depth_loop_size = 1;
|
||||
MaliInfo mali_info = device.GetInfo().mali_info;
|
||||
MaliInfo mali_info = device.info_.mali_info;
|
||||
if (src_depth % 2 == 0 && block_size <= 2 && !mali_info.IsMidgard()) {
|
||||
conv_params.src_depth_loop_size = 2;
|
||||
}
|
||||
@ -987,7 +987,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context,
|
||||
const Convolution2DAttributes& attr,
|
||||
ConvPowerVR* result, const BHWC* dst_shape) {
|
||||
*result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape);
|
||||
result->GenerateCode(creation_context.device->GetInfo());
|
||||
result->GenerateCode(creation_context.device->info_);
|
||||
return result->UploadData(attr.weights, attr.bias, creation_context.context);
|
||||
}
|
||||
|
||||
@ -996,7 +996,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context,
|
||||
const FullyConnectedAttributes& attr,
|
||||
ConvPowerVR* result, const BHWC* dst_shape) {
|
||||
*result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape);
|
||||
result->GenerateCode(creation_context.device->GetInfo());
|
||||
result->GenerateCode(creation_context.device->info_);
|
||||
return result->UploadData(attr.weights, attr.bias, creation_context.context);
|
||||
}
|
||||
|
||||
@ -1006,7 +1006,7 @@ absl::Status CreateConvPowerVRDynamicWeights(
|
||||
ConvPowerVR* result, const BHWC* dst_shape) {
|
||||
*result = ConvPowerVR(definition, attr, weights_shape,
|
||||
*creation_context.device, dst_shape);
|
||||
result->GenerateCode(creation_context.device->GetInfo());
|
||||
result->GenerateCode(creation_context.device->info_);
|
||||
return result->UploadBias(attr.bias, creation_context.context);
|
||||
}
|
||||
|
||||
@ -1017,7 +1017,7 @@ absl::Status CreateConvPowerVRWino4x4To6x6(
|
||||
*result = ConvPowerVR(definition);
|
||||
result->conv_params_ = result->GuessBestParamsWinograd(
|
||||
*creation_context.device, definition, attr, dst_shape);
|
||||
result->GenerateCode(creation_context.device->GetInfo());
|
||||
result->GenerateCode(creation_context.device->info_);
|
||||
return result->UploadDataForWinograd4x4To6x6(
|
||||
attr.weights, *creation_context.device, creation_context.context);
|
||||
}
|
||||
|
@ -430,7 +430,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context,
|
||||
const Convolution2DAttributes& attr,
|
||||
ConvTexture* result) {
|
||||
*result = ConvTexture(definition, attr);
|
||||
result->GenerateCode(creation_context.device->GetInfo());
|
||||
result->GenerateCode(creation_context.device->info_);
|
||||
return result->UploadData(attr.weights, attr.bias, creation_context.context);
|
||||
}
|
||||
|
||||
@ -439,7 +439,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context,
|
||||
const FullyConnectedAttributes& attr,
|
||||
ConvTexture* result) {
|
||||
*result = ConvTexture(definition);
|
||||
result->GenerateCode(creation_context.device->GetInfo());
|
||||
result->GenerateCode(creation_context.device->info_);
|
||||
return result->UploadData(attr.weights, attr.bias, creation_context.context);
|
||||
}
|
||||
|
||||
@ -449,7 +449,7 @@ absl::Status CreateConvTextureWino4x4To6x6(
|
||||
*result = ConvTexture(definition);
|
||||
result->different_weights_for_height_ = true;
|
||||
result->block_size_ = {4, 1, 2};
|
||||
result->GenerateCode(creation_context.device->GetInfo());
|
||||
result->GenerateCode(creation_context.device->info_);
|
||||
return result->UploadDataForWinograd4x4To6x6(
|
||||
attr.weights, *creation_context.device, creation_context.context);
|
||||
}
|
||||
|
@ -152,8 +152,8 @@ __kernel void from_tensor()" +
|
||||
context_ = &environment->context();
|
||||
shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h,
|
||||
input_def.dimensions.w, input_def.dimensions.c);
|
||||
RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().GetInfo(), {},
|
||||
&shader_src));
|
||||
RETURN_IF_ERROR(
|
||||
args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
|
||||
return environment->program_cache()->GetOrCreateCLKernel(
|
||||
shader_src, "from_tensor", environment->context(),
|
||||
environment->device(), &kernel_);
|
||||
@ -272,8 +272,8 @@ __kernel void to_tensor()" +
|
||||
context_ = &environment->context();
|
||||
shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h,
|
||||
output_def.dimensions.w, output_def.dimensions.c);
|
||||
RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().GetInfo(), {},
|
||||
&shader_src));
|
||||
RETURN_IF_ERROR(
|
||||
args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
|
||||
return environment->program_cache()->GetOrCreateCLKernel(
|
||||
shader_src, "to_tensor", environment->context(), environment->device(),
|
||||
&kernel_);
|
||||
|
@ -360,8 +360,8 @@ absl::Status CreateConvolutionTransposed(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
ConvolutionTransposed* result) {
|
||||
*result = ConvolutionTransposed(definition, attr,
|
||||
creation_context.device->GetInfo());
|
||||
*result =
|
||||
ConvolutionTransposed(definition, attr, creation_context.device->info_);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadWeights(attr.weights, creation_context.context));
|
||||
|
||||
|
@ -175,7 +175,7 @@ absl::Status CreateConvolutionTransposedThin(
|
||||
"ConvolutionTransposedThin doesn't support this attributes");
|
||||
}
|
||||
*result = ConvolutionTransposedThin(definition, attr,
|
||||
creation_context.device->GetInfo());
|
||||
creation_context.device->info_);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadData(attr.weights, attr.bias, creation_context.context));
|
||||
return absl::OkStatus();
|
||||
|
@ -330,7 +330,7 @@ absl::Status CreateDepthwiseConv3x3(
|
||||
bool local_mem_uploads =
|
||||
weights_are_buffer && creation_context.device->IsPowerVR();
|
||||
*result = DepthwiseConv3x3(definition, weights_are_buffer, local_mem_uploads,
|
||||
creation_context.device->GetInfo());
|
||||
creation_context.device->info_);
|
||||
return result->UploadWeightsAndBiases(attr.weights, attr.bias,
|
||||
creation_context.context);
|
||||
}
|
||||
|
@ -166,10 +166,9 @@ absl::Status CreateElementwiseTwoInput(
|
||||
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& constant_tensor,
|
||||
bool swap_inputs, GPUOperation* result) {
|
||||
const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v);
|
||||
TensorStorageType storage_type =
|
||||
SelectBestStorageType(*creation_context.context, *creation_context.device,
|
||||
shape, definition.GetPrimaryStorageType(),
|
||||
definition.GetDataType(), Layout::HWC);
|
||||
TensorStorageType storage_type = SelectBestStorageType(
|
||||
creation_context.device->info_, shape, definition.GetPrimaryStorageType(),
|
||||
definition.GetDataType(), Layout::HWC);
|
||||
TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
|
||||
Tensor gpu_tensor;
|
||||
RETURN_IF_ERROR(CreateTensor(*creation_context.context,
|
||||
@ -205,10 +204,9 @@ absl::Status CreateElementwiseTwoInput(
|
||||
bool swap_inputs, GPUOperation* result) {
|
||||
const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w,
|
||||
constant_tensor.shape.c);
|
||||
TensorStorageType storage_type =
|
||||
SelectBestStorageType(*creation_context.context, *creation_context.device,
|
||||
shape, definition.GetPrimaryStorageType(),
|
||||
definition.GetDataType(), Layout::HWC);
|
||||
TensorStorageType storage_type = SelectBestStorageType(
|
||||
creation_context.device->info_, shape, definition.GetPrimaryStorageType(),
|
||||
definition.GetDataType(), Layout::HWC);
|
||||
TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
|
||||
Tensor gpu_tensor;
|
||||
RETURN_IF_ERROR(CreateTensor(*creation_context.context,
|
||||
|
@ -114,7 +114,7 @@ absl::Status CreateFullyConnected(const CreationContext& creation_context,
|
||||
const OperationDef& definition,
|
||||
const FullyConnectedAttributes& attr,
|
||||
FullyConnected* result) {
|
||||
*result = FullyConnected(definition, creation_context.device->GetInfo());
|
||||
*result = FullyConnected(definition, creation_context.device->info_);
|
||||
RETURN_IF_ERROR(
|
||||
result->UploadWeights(attr.weights, creation_context.context));
|
||||
|
||||
|
@ -227,7 +227,7 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
||||
RETURN_IF_ERROR(
|
||||
MergeOperations(linked_operations_, &args_, &element_wise_code));
|
||||
RETURN_IF_ERROR(args_.TransformToCLCode(
|
||||
creation_context.device->GetInfo(),
|
||||
creation_context.device->info_,
|
||||
{{dst_tensors_names_[0], element_wise_code}}, &code));
|
||||
code = absl::Substitute(code, args_.GetListOfArgs());
|
||||
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
|
||||
@ -238,13 +238,13 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
||||
RETURN_IF_ERROR(
|
||||
MergeOperations(linked_operations_, &args_, &element_wise_code));
|
||||
RETURN_IF_ERROR(args_.TransformToCLCode(
|
||||
creation_context.device->GetInfo(),
|
||||
creation_context.device->info_,
|
||||
{{dst_tensors_names_[0], element_wise_code}}, &code_));
|
||||
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
|
||||
code_, "main_function", compiler_options_, *creation_context.context,
|
||||
*creation_context.device, &kernel_));
|
||||
}
|
||||
return PostCompileCheck(creation_context.device->GetInfo());
|
||||
return PostCompileCheck(creation_context.device->info_);
|
||||
}
|
||||
|
||||
int3 GPUOperation::GetGridSize() const {
|
||||
|
@ -117,7 +117,7 @@ int GetRecommendedBlockSizeForConv(const CLDevice& device,
|
||||
CalculationsPrecision precision,
|
||||
int task_size) {
|
||||
const float task_size_per_cu =
|
||||
task_size / static_cast<float>(device.GetInfo().compute_units_count);
|
||||
task_size / static_cast<float>(device.info_.compute_units_count);
|
||||
int block_size = 1;
|
||||
float threshold_1 = FLT_MAX;
|
||||
float threshold_2 = FLT_MAX;
|
||||
@ -125,7 +125,7 @@ int GetRecommendedBlockSizeForConv(const CLDevice& device,
|
||||
if (!device.IsMali()) {
|
||||
return 1;
|
||||
}
|
||||
MaliInfo mali_info = device.GetInfo().mali_info;
|
||||
MaliInfo mali_info = device.info_.mali_info;
|
||||
switch (precision) {
|
||||
case CalculationsPrecision::F16:
|
||||
if (mali_info.IsBifrostGen1()) {
|
||||
|
@ -303,7 +303,7 @@ absl::Status CreateWinograd4x4To36(const CreationContext& creation_context,
|
||||
const Padding2D& padding,
|
||||
Winograd4x4To36* result) {
|
||||
*result =
|
||||
Winograd4x4To36(definition, padding, creation_context.device->GetInfo());
|
||||
Winograd4x4To36(definition, padding, creation_context.device->info_);
|
||||
return result->UploadBt(creation_context.context);
|
||||
}
|
||||
|
||||
@ -502,7 +502,7 @@ absl::Status CreateWinograd36To4x4(
|
||||
const CreationContext& creation_context, const OperationDef& definition,
|
||||
const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases,
|
||||
Winograd36To4x4* result) {
|
||||
*result = Winograd36To4x4(definition, creation_context.device->GetInfo());
|
||||
*result = Winograd36To4x4(definition, creation_context.device->info_);
|
||||
TensorLinearDescriptor desc;
|
||||
desc.storage_type = LinearStorageType::TEXTURE_2D;
|
||||
desc.element_type = definition.GetDataType();
|
||||
|
@ -167,7 +167,7 @@ absl::Status SelectConvolution(const Convolution2DAttributes& attr,
|
||||
const CreationContext& creation_context,
|
||||
const OperationDef& op_def, ModelHints hints,
|
||||
std::unique_ptr<GPUOperation>* ptr) {
|
||||
const auto& device_info = creation_context.device->GetInfo();
|
||||
const auto& device_info = creation_context.device->info_;
|
||||
if (device_info.IsAdreno()) {
|
||||
return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def,
|
||||
hints, ptr);
|
||||
@ -190,7 +190,7 @@ absl::Status SelectConvolutionForWinograd(
|
||||
const Convolution2DAttributes& attr, const BHWC& dst_shape,
|
||||
const CreationContext& creation_context, const OperationDef& op_def,
|
||||
ModelHints hints, std::unique_ptr<GPUOperation>* ptr) {
|
||||
const auto& device_info = creation_context.device->GetInfo();
|
||||
const auto& device_info = creation_context.device->info_;
|
||||
if (device_info.IsAdreno()) {
|
||||
return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context,
|
||||
op_def, hints, ptr);
|
||||
@ -215,7 +215,7 @@ absl::Status SelectConvolutionWithDynamicWeights(
|
||||
const BHWC& dst_shape, const CreationContext& creation_context,
|
||||
const OperationDef& op_def, ModelHints hints,
|
||||
std::unique_ptr<GPUOperation>* ptr, ConvWeightsDescription* weights_desc) {
|
||||
const auto& device_info = creation_context.device->GetInfo();
|
||||
const auto& device_info = creation_context.device->info_;
|
||||
if (device_info.IsAdreno()) {
|
||||
return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape,
|
||||
creation_context, op_def,
|
||||
|
@ -105,7 +105,7 @@ absl::Status SelectConvolutionTransposed(
|
||||
const ConvolutionTransposedAttributes& attr,
|
||||
const CreationContext& creation_context, const OperationDef& op_def,
|
||||
std::unique_ptr<GPUOperation>* ptr) {
|
||||
const auto& device_info = creation_context.device->GetInfo();
|
||||
const auto& device_info = creation_context.device->info_;
|
||||
if (device_info.IsAdreno()) {
|
||||
return SelectConvolutionTransposedAdreno(attr, creation_context, op_def,
|
||||
ptr);
|
||||
|
@ -69,7 +69,7 @@ absl::Status SelectDWConvolutionMali(
|
||||
const auto storage_type = op_def.src_tensors[0].storage_type;
|
||||
bool buffer_type = storage_type == TensorStorageType::BUFFER ||
|
||||
storage_type == TensorStorageType::IMAGE_BUFFER;
|
||||
MaliInfo mali_info = creation_context.device->GetInfo().mali_info;
|
||||
MaliInfo mali_info = creation_context.device->info_.mali_info;
|
||||
if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() &&
|
||||
!buffer_type && op_def.precision != CalculationsPrecision::F32) {
|
||||
DepthwiseConv3x3 dw_conv;
|
||||
@ -90,7 +90,7 @@ absl::Status SelectDWConvolution(const DepthwiseConvolution2DAttributes& attr,
|
||||
const CreationContext& creation_context,
|
||||
const OperationDef& op_def,
|
||||
std::unique_ptr<GPUOperation>* ptr) {
|
||||
const auto& device_info = creation_context.device->GetInfo();
|
||||
const auto& device_info = creation_context.device->info_;
|
||||
if (device_info.IsAdreno()) {
|
||||
return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr);
|
||||
} else if (device_info.IsPowerVR()) {
|
||||
|
@ -104,7 +104,7 @@ absl::Status SelectFullyConnected(const FullyConnectedAttributes& attr,
|
||||
const CreationContext& creation_context,
|
||||
const OperationDef& op_def, int batch_size,
|
||||
std::unique_ptr<GPUOperation>* ptr) {
|
||||
const auto& device_info = creation_context.device->GetInfo();
|
||||
const auto& device_info = creation_context.device->info_;
|
||||
if (device_info.IsAdreno()) {
|
||||
return SelectFullyConnectedAdreno(attr, creation_context, op_def,
|
||||
batch_size, ptr);
|
||||
|
@ -75,14 +75,14 @@ absl::Status WinogradFromNode(const CreationContext& creation_context,
|
||||
const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c};
|
||||
TensorDescriptor td_0;
|
||||
td_0.storage_type = SelectBestStorageType(
|
||||
*creation_context.context, *creation_context.device, shape_0,
|
||||
creation_context.device->info_, shape_0,
|
||||
op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type,
|
||||
op_def.src_tensors[0].layout);
|
||||
td_0.data_type = op_def.src_tensors[0].data_type;
|
||||
td_0.layout = op_def.src_tensors[0].layout;
|
||||
TensorDescriptor td_1;
|
||||
td_1.storage_type = SelectBestStorageType(
|
||||
*creation_context.context, *creation_context.device, shape_1,
|
||||
creation_context.device->info_, shape_1,
|
||||
op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type,
|
||||
op_def.src_tensors[0].layout);
|
||||
td_1.data_type = op_def.src_tensors[0].data_type;
|
||||
@ -175,7 +175,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
||||
channels[i] = inputs[i]->tensor.shape.c;
|
||||
}
|
||||
return SelectConcat(attr, channels, op_def,
|
||||
creation_context.device->GetInfo(), gpu_op);
|
||||
creation_context.device->info_, gpu_op);
|
||||
}
|
||||
case OperationType::CONVOLUTION_2D: {
|
||||
auto attr =
|
||||
@ -248,7 +248,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
||||
inputs[0]->tensor.shape.b, gpu_op);
|
||||
}
|
||||
case OperationType::LSTM: {
|
||||
SelectLSTM(op_def, creation_context.device->GetInfo(), gpu_op);
|
||||
SelectLSTM(op_def, creation_context.device->info_, gpu_op);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
case OperationType::MAX_UNPOOLING_2D: {
|
||||
@ -259,8 +259,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
||||
}
|
||||
case OperationType::MEAN: {
|
||||
auto attr = absl::any_cast<MeanAttributes>(node.operation.attributes);
|
||||
return SelectMean(attr, op_def, creation_context.device->GetInfo(),
|
||||
gpu_op);
|
||||
return SelectMean(attr, op_def, creation_context.device->info_, gpu_op);
|
||||
}
|
||||
case OperationType::MEAN_STDDEV_NORMALIZATION: {
|
||||
MeanStdDevNormalization operation = CreateMeanStdDevNormalization(op_def);
|
||||
|
@ -15,18 +15,16 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h"
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
|
||||
bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
|
||||
const BHWDC& shape,
|
||||
bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape,
|
||||
const TensorDescriptor& descriptor) {
|
||||
const int slices = DivideRoundUp(shape.c, 4);
|
||||
switch (descriptor.storage_type) {
|
||||
@ -35,64 +33,60 @@ bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
|
||||
4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2);
|
||||
const int buffer_size =
|
||||
shape.b * shape.w * shape.h * shape.d * slices * flt4_size;
|
||||
return buffer_size <= device.GetInfo().buffer_max_size;
|
||||
return buffer_size <= device_info.buffer_max_size;
|
||||
}
|
||||
case TensorStorageType::IMAGE_BUFFER:
|
||||
return shape.b * shape.w * shape.h * shape.d * slices <=
|
||||
device.GetInfo().image_buffer_max_size;
|
||||
device_info.image_buffer_max_size;
|
||||
case TensorStorageType::TEXTURE_3D:
|
||||
if (device.cl_version() < OpenCLVersion::CL_1_2 && slices == 1) {
|
||||
if (device_info.cl_version < OpenCLVersion::CL_1_2 && slices == 1) {
|
||||
// clCreateImage3D (that used in CL 1.0/1.1) can not create image with
|
||||
// depth = 1 by specification;
|
||||
return false;
|
||||
}
|
||||
return shape.w * shape.b <= device.GetInfo().image3d_max_width &&
|
||||
shape.h <= device.GetInfo().image3d_max_height &&
|
||||
slices * shape.d <= device.GetInfo().image3d_max_depth;
|
||||
return shape.w * shape.b <= device_info.image3d_max_width &&
|
||||
shape.h <= device_info.image3d_max_height &&
|
||||
slices * shape.d <= device_info.image3d_max_depth;
|
||||
case TensorStorageType::TEXTURE_ARRAY:
|
||||
// Bug on some Adreno. b/131099086
|
||||
if (slices == 1 && !device.SupportsOneLayerTextureArray()) {
|
||||
if (slices == 1 && !device_info.SupportsOneLayerTextureArray()) {
|
||||
return false;
|
||||
}
|
||||
return shape.w * shape.b <= device.GetInfo().image2d_max_width &&
|
||||
shape.h <= device.GetInfo().image2d_max_height &&
|
||||
slices * shape.d <= device.GetInfo().image_array_max_layers;
|
||||
return shape.w * shape.b <= device_info.image2d_max_width &&
|
||||
shape.h <= device_info.image2d_max_height &&
|
||||
slices * shape.d <= device_info.image_array_max_layers;
|
||||
case TensorStorageType::TEXTURE_2D:
|
||||
return shape.w * shape.b * shape.d <=
|
||||
device.GetInfo().image2d_max_width &&
|
||||
shape.h * slices <= device.GetInfo().image2d_max_height;
|
||||
return shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
|
||||
shape.h * slices <= device_info.image2d_max_height;
|
||||
case TensorStorageType::SINGLE_TEXTURE_2D:
|
||||
return shape.c <= 4 &&
|
||||
context.IsFloatTexture2DSupported(shape.c, descriptor.data_type) &&
|
||||
shape.w * shape.b * shape.d <=
|
||||
device.GetInfo().image2d_max_width &&
|
||||
shape.h <= device.GetInfo().image2d_max_height;
|
||||
device_info.SupportsFloatImage2D(descriptor.data_type, shape.c) &&
|
||||
shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
|
||||
shape.h <= device_info.image2d_max_height;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
|
||||
const BHWC& shape,
|
||||
bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape,
|
||||
const TensorDescriptor& descriptor) {
|
||||
const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
|
||||
return CanCreateTensorWithShape(context, device, shape5D, descriptor);
|
||||
return CanCreateTensorWithShape(device_info, shape5D, descriptor);
|
||||
}
|
||||
|
||||
TensorStorageType SelectBestStorageType(const CLContext& context,
|
||||
const CLDevice& device,
|
||||
TensorStorageType SelectBestStorageType(const DeviceInfo& device_info,
|
||||
const BHWC& shape,
|
||||
const TensorStorageType& desired,
|
||||
const DataType& data_type,
|
||||
const Layout& layout) {
|
||||
if (CanCreateTensorWithShape(context, device, shape,
|
||||
if (CanCreateTensorWithShape(device_info, shape,
|
||||
TensorDescriptor{data_type, desired, layout})) {
|
||||
return desired;
|
||||
}
|
||||
auto GetBestTypeAfterTextureArray = [&]() {
|
||||
if (device.SupportsImageBuffer() &&
|
||||
if (device_info.SupportsImageBuffer() &&
|
||||
CanCreateTensorWithShape(
|
||||
context, device, shape,
|
||||
device_info, shape,
|
||||
TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER,
|
||||
layout})) {
|
||||
return TensorStorageType::IMAGE_BUFFER;
|
||||
@ -101,9 +95,9 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
|
||||
}
|
||||
};
|
||||
auto GetBestTypeAfterTexture2D = [&]() {
|
||||
if (device.SupportsTextureArray() &&
|
||||
if (device_info.SupportsTextureArray() &&
|
||||
CanCreateTensorWithShape(
|
||||
context, device, shape,
|
||||
device_info, shape,
|
||||
TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY,
|
||||
layout})) {
|
||||
return TensorStorageType::TEXTURE_ARRAY;
|
||||
@ -113,7 +107,7 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
|
||||
};
|
||||
auto GetBestTypeAfterTexture3D = [&]() {
|
||||
if (CanCreateTensorWithShape(
|
||||
context, device, shape,
|
||||
device_info, shape,
|
||||
TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D,
|
||||
layout})) {
|
||||
return TensorStorageType::TEXTURE_2D;
|
||||
|
@ -16,8 +16,7 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
@ -26,16 +25,13 @@ namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
|
||||
bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
|
||||
const BHWDC& shape,
|
||||
bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape,
|
||||
const TensorDescriptor& descriptor);
|
||||
|
||||
bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
|
||||
const BHWC& shape,
|
||||
bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape,
|
||||
const TensorDescriptor& descriptor);
|
||||
|
||||
TensorStorageType SelectBestStorageType(const CLContext& context,
|
||||
const CLDevice& device,
|
||||
TensorStorageType SelectBestStorageType(const DeviceInfo& device_info,
|
||||
const BHWC& shape,
|
||||
const TensorStorageType& desired,
|
||||
const DataType& data_type,
|
||||
|
Loading…
x
Reference in New Issue
Block a user