From 235dbc2dc26a00ebf6d2b1f3cba37cba9d548ffc Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Thu, 6 Aug 2020 17:49:37 -0700 Subject: [PATCH] Added info about supported image formats to DeviceInfo. storage_type_util cleaned from OpenCL API calls/structs. PiperOrigin-RevId: 325347475 Change-Id: I096636e4dd837ef9754df70caf37842c605c24f5 --- tensorflow/lite/delegates/gpu/cl/BUILD | 5 +- .../lite/delegates/gpu/cl/cl_context.cc | 39 +++++++++++++ tensorflow/lite/delegates/gpu/cl/cl_device.cc | 16 ++--- tensorflow/lite/delegates/gpu/cl/cl_device.h | 8 +-- .../lite/delegates/gpu/cl/cl_program.cc | 4 +- .../lite/delegates/gpu/cl/device_info.cc | 22 +++++++ .../lite/delegates/gpu/cl/device_info.h | 17 ++++++ .../lite/delegates/gpu/cl/environment.cc | 4 +- .../delegates/gpu/cl/inference_context.cc | 7 +-- .../gpu/cl/kernels/conv_buffer_1x1.cc | 4 +- .../gpu/cl/kernels/conv_constants.cc | 4 +- .../delegates/gpu/cl/kernels/conv_powervr.cc | 12 ++-- .../delegates/gpu/cl/kernels/conv_texture.cc | 6 +- .../delegates/gpu/cl/kernels/converter.cc | 8 +-- .../gpu/cl/kernels/convolution_transposed.cc | 4 +- .../cl/kernels/convolution_transposed_thin.cc | 2 +- .../gpu/cl/kernels/depthwise_conv_3x3.cc | 2 +- .../delegates/gpu/cl/kernels/elementwise.cc | 14 ++--- .../gpu/cl/kernels/fully_connected.cc | 2 +- .../delegates/gpu/cl/kernels/gpu_operation.cc | 6 +- .../lite/delegates/gpu/cl/kernels/util.cc | 4 +- .../lite/delegates/gpu/cl/kernels/winograd.cc | 4 +- .../gpu/cl/selectors/convolution_selector.cc | 6 +- .../convolution_transposed_selector.cc | 2 +- .../cl/selectors/dw_convolution_selector.cc | 4 +- .../cl/selectors/fully_connected_selector.cc | 2 +- .../gpu/cl/selectors/operation_selector.cc | 11 ++-- .../delegates/gpu/cl/storage_type_util.cc | 58 +++++++++---------- .../lite/delegates/gpu/cl/storage_type_util.h | 12 ++-- 29 files changed, 177 insertions(+), 112 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index 66bcbc826ea..d6076e221bd 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -257,6 +257,7 @@ cc_library( srcs = ["device_info.cc"], hdrs = ["device_info.h"], deps = [ + "//tensorflow/lite/delegates/gpu/common:data_type", "@com_google_absl//absl/strings", ], ) @@ -468,11 +469,11 @@ cc_library( srcs = ["storage_type_util.cc"], hdrs = ["storage_type_util.h"], deps = [ - ":cl_context", - ":cl_device", + ":device_info", ":tensor_type", "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:shape", + "//tensorflow/lite/delegates/gpu/common:util", ], ) diff --git a/tensorflow/lite/delegates/gpu/cl/cl_context.cc b/tensorflow/lite/delegates/gpu/cl/cl_context.cc index e697c78b692..9a8f404c46e 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_context.cc @@ -43,6 +43,44 @@ std::vector GetSupportedImage2DFormats(cl_context context, return result; } +bool IsEqualToImageFormat(cl_image_format image_format, DataType data_type, + int num_channels) { + return image_format.image_channel_data_type == + ToImageChannelType(data_type) && + image_format.image_channel_order == ToChannelOrder(num_channels); +} + +void AddSupportedImageFormats(cl_context context, DeviceInfo* info) { + auto supported_formats = + GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE); + for (auto format : supported_formats) { + info->supports_r_f16_tex2d = + info->supports_r_f16_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT16, 1); + info->supports_rg_f16_tex2d = + info->supports_rg_f16_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT16, 2); + info->supports_rgb_f16_tex2d = + info->supports_rgb_f16_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT16, 3); + info->supports_rgba_f16_tex2d = + info->supports_rgba_f16_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT16, 4); + info->supports_r_f32_tex2d = + info->supports_r_f32_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT32, 1); + info->supports_rg_f32_tex2d = + info->supports_rg_f32_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT32, 2); + info->supports_rgb_f32_tex2d = + info->supports_rgb_f32_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT32, 3); + info->supports_rgba_f32_tex2d = + info->supports_rgba_f32_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT32, 4); + } +} + absl::Status CreateCLContext(const CLDevice& device, cl_context_properties* properties, CLContext* result) { @@ -55,6 +93,7 @@ absl::Status CreateCLContext(const CLDevice& device, absl::StrCat("Failed to create a compute context - ", CLErrorCodeToString(error_code))); } + AddSupportedImageFormats(context, &device.info_); *result = CLContext(context, true); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.cc b/tensorflow/lite/delegates/gpu/cl/cl_device.cc index b93bfb25ad1..16f5ce217e9 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.cc @@ -248,24 +248,24 @@ DeviceInfo DeviceInfoFromDeviceID(cl_device_id id) { } CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id) - : id_(id), platform_id_(platform_id), info_(DeviceInfoFromDeviceID(id)) {} + : info_(DeviceInfoFromDeviceID(id)), id_(id), platform_id_(platform_id) {} CLDevice::CLDevice(const CLDevice& device) - : id_(device.id_), platform_id_(device.platform_id_), info_(device.info_) {} + : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_) {} CLDevice& CLDevice::operator=(const CLDevice& device) { if (this != &device) { + info_ = device.info_; id_ = device.id_; platform_id_ = device.platform_id_; - info_ = device.info_; } return *this; } CLDevice::CLDevice(CLDevice&& device) - : id_(device.id_), - platform_id_(device.platform_id_), - info_(std::move(device.info_)) { + : info_(std::move(device.info_)), + id_(device.id_), + platform_id_(device.platform_id_) { device.id_ = nullptr; device.platform_id_ = nullptr; } @@ -274,9 +274,9 @@ CLDevice& CLDevice::operator=(CLDevice&& device) { if (this != &device) { id_ = nullptr; platform_id_ = nullptr; + info_ = std::move(device.info_); std::swap(id_, device.id_); std::swap(platform_id_, device.platform_id_); - info_ = std::move(device.info_); } return *this; } @@ -368,7 +368,7 @@ bool CLDevice::IsAMD() const { return info_.IsAMD(); } bool CLDevice::IsIntel() const { return info_.IsIntel(); } bool CLDevice::SupportsOneLayerTextureArray() const { - return !IsAdreno() || info_.adreno_info.support_one_layer_texture_array; + return info_.SupportsOneLayerTextureArray(); } void CLDevice::DisableOneLayerTextureArray() { diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.h b/tensorflow/lite/delegates/gpu/cl/cl_device.h index 7e4792b0a53..e7cd274661d 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.h +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.h @@ -46,9 +46,6 @@ class CLDevice { cl_platform_id platform() const { return platform_id_; } std::string GetPlatformVersion() const; - const DeviceInfo& GetInfo() const { return info_; } - const DeviceInfo* GetInfoPtr() const { return &info_; } - Vendor vendor() const { return info_.vendor; } OpenCLVersion cl_version() const { return info_.cl_version; } bool SupportsFP16() const; @@ -76,10 +73,13 @@ class CLDevice { bool SupportsOneLayerTextureArray() const; void DisableOneLayerTextureArray(); + // We update device info during context creation, so as supported texture + // formats can be requested from context only. + mutable DeviceInfo info_; + private: cl_device_id id_ = nullptr; cl_platform_id platform_id_ = nullptr; - DeviceInfo info_; }; absl::Status CreateDefaultGPUDevice(CLDevice* result); diff --git a/tensorflow/lite/delegates/gpu/cl/cl_program.cc b/tensorflow/lite/delegates/gpu/cl/cl_program.cc index 3b821dc3a5d..fd29ebec2d7 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_program.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_program.cc @@ -78,13 +78,13 @@ std::string CompilerOptionToString(const CLDevice& device, CompilerOptions option) { switch (option) { case CompilerOptions::ADRENO_FULL_SIMD_LINE: - if (device.GetInfo().adreno_info.gpu_version < 500) { + if (device.info_.adreno_info.gpu_version < 500) { return "-qcom-accelerate-16-bit"; } else { return "-qcom-accelerate-16-bit=true"; } case CompilerOptions::ADRENO_MORE_WAVES: - if (device.GetInfo().adreno_info.gpu_version >= 500) { + if (device.info_.adreno_info.gpu_version >= 500) { return "-qcom-accelerate-16-bit=false"; } else { return ""; diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.cc b/tensorflow/lite/delegates/gpu/cl/device_info.cc index 7e0acb87ab7..d1ed69aa100 100644 --- a/tensorflow/lite/delegates/gpu/cl/device_info.cc +++ b/tensorflow/lite/delegates/gpu/cl/device_info.cc @@ -231,6 +231,28 @@ bool DeviceInfo::SupportsImage3D() const { return supports_image3d_writes; } +bool DeviceInfo::SupportsFloatImage2D(DataType data_type, int channels) const { + if (channels == 1) { + return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d + : supports_r_f16_tex2d; + } else if (channels == 2) { + return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d + : supports_rg_f16_tex2d; + } else if (channels == 3) { + return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d + : supports_rgb_f16_tex2d; + } else if (channels == 4) { + return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d + : supports_rgba_f16_tex2d; + } else { + return false; + } +} + +bool DeviceInfo::SupportsOneLayerTextureArray() const { + return !IsAdreno() || adreno_info.support_one_layer_texture_array; +} + bool DeviceInfo::IsAdreno() const { return vendor == Vendor::kQualcomm; } bool DeviceInfo::IsAdreno3xx() const { diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.h b/tensorflow/lite/delegates/gpu/cl/device_info.h index b13fe3df846..7123891ecf4 100644 --- a/tensorflow/lite/delegates/gpu/cl/device_info.h +++ b/tensorflow/lite/delegates/gpu/cl/device_info.h @@ -19,6 +19,8 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/gpu/common/data_type.h" + // for use only in device_info.cc, but keep here to make tests int GetAdrenoGPUVersion(const std::string& gpu_version); @@ -131,6 +133,11 @@ struct DeviceInfo { bool SupportsImageBuffer() const; bool SupportsImage3D() const; + bool SupportsFloatImage2D(DataType data_type, int channels) const; + + // To track bug on some Adreno. b/131099086 + bool SupportsOneLayerTextureArray() const; + std::vector extensions; bool supports_fp16; bool supports_image3d_writes; @@ -157,6 +164,16 @@ struct DeviceInfo { bool supports_fp32_rtn; bool supports_fp16_rtn; + bool supports_r_f16_tex2d = false; + bool supports_rg_f16_tex2d = false; + bool supports_rgb_f16_tex2d = false; + bool supports_rgba_f16_tex2d = false; + + bool supports_r_f32_tex2d = false; + bool supports_rg_f32_tex2d = false; + bool supports_rgb_f32_tex2d = false; + bool supports_rgba_f32_tex2d = false; + AdrenoInfo adreno_info; MaliInfo mali_info; }; diff --git a/tensorflow/lite/delegates/gpu/cl/environment.cc b/tensorflow/lite/delegates/gpu/cl/environment.cc index c8b0b56978c..3d5546a8ebb 100644 --- a/tensorflow/lite/delegates/gpu/cl/environment.cc +++ b/tensorflow/lite/delegates/gpu/cl/environment.cc @@ -47,7 +47,7 @@ __kernel void main_function(__write_only image2d_array_t dst) { absl::Status CheckKernelSupportOfOneLayerTextureArray(Environment* env, bool* result) { // No bug on Adreno 6xx - if (env->device().GetInfo().adreno_info.gpu_version >= 600) { + if (env->device().info_.adreno_info.gpu_version >= 600) { *result = true; return absl::OkStatus(); } @@ -242,7 +242,7 @@ TensorStorageType GetFastestStorageType(const CLDevice& gpu) { } else if (gpu.IsPowerVR()) { return TensorStorageType::TEXTURE_2D; } else if (gpu.IsMali()) { - const MaliInfo mali_info = gpu.GetInfo().mali_info; + const MaliInfo mali_info = gpu.info_.mali_info; if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() || mali_info.IsValhall()) { return TensorStorageType::TEXTURE_2D; diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index 689b511bb5e..7802024302b 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -203,7 +203,7 @@ absl::Status InferenceContext::InitFromGraph( TuningParameters tuning_parameters; tuning_parameters.queue = env->profiling_queue(); - tuning_parameters.info = env->device().GetInfoPtr(); + tuning_parameters.info = &env->device().info_; if (create_info.hints.Check(ModelHints::kFastTuning)) { tuning_parameters.tuning_type = TuningType::FAST; } @@ -244,14 +244,13 @@ void InferenceContext::ReserveGraphTensors( if (graph.IsGraphInput(t->id) || graph.IsGraphOutput(t->id)) { if (shape.c < 4 && CanCreateTensorWithShape( - *creation_context.context, *creation_context.device, shape, + creation_context.device->info_, shape, TensorDescriptor{data_type, TensorStorageType::SINGLE_TEXTURE_2D, layout})) { storage_type = TensorStorageType::SINGLE_TEXTURE_2D; } } - storage_type = SelectBestStorageType(*creation_context.context, - *creation_context.device, shape, + storage_type = SelectBestStorageType(creation_context.device->info_, shape, storage_type, data_type, layout); tensor_reserver_.Add( t->id, {shape, TensorDescriptor{data_type, storage_type, layout}}); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc index de6021aa5fe..3216e2ef246 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc @@ -93,7 +93,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device, } bool can_use_flt8 = (shape.w * shape.b) % 2 == 0 && definition.precision != CalculationsPrecision::F32; - bool is_midgard = device.IsMali() && device.GetInfo().mali_info.IsMidgard(); + bool is_midgard = device.IsMali() && device.info_.mali_info.IsMidgard(); if (is_midgard) { if (can_use_flt8) { conv_params.element_size = 8; @@ -141,7 +141,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device, conv_params.element_size = 4; conv_params.block_size = int3(1, 1, 1); if (device.IsMali() && definition.precision == CalculationsPrecision::F16 && - device.GetInfo().compute_units_count <= 4) { + device.info_.compute_units_count <= 4) { conv_params.block_size.x *= 2; } return conv_params; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc index d5a2a56c19c..1ed900a2080 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc @@ -271,7 +271,7 @@ bool IsConvConstantsSupported(const CLDevice& device, ? sizeof(float) : sizeof(half); const int filters_buffer_size = filters_count * float_size; - const int kConstantMaxSize = GetOptimalMaxConstantSize(device.GetInfo()); + const int kConstantMaxSize = GetOptimalMaxConstantSize(device.info_); const int flt4_registers = DivideRoundUp(w_shape.o, 4); return filters_buffer_size <= kConstantMaxSize && flt4_registers <= 8; } @@ -283,7 +283,7 @@ absl::Status CreateConvConstants(const CreationContext& creation_context, if (!IsConvConstantsSupported(*creation_context.device, definition, attr)) { return absl::InvalidArgumentError("ConvConstants doesn't supported"); } - *result = ConvConstants(definition, attr, creation_context.device->GetInfo()); + *result = ConvConstants(definition, attr, creation_context.device->info_); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index f69368d1083..d65595d068c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -718,7 +718,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( if (dst_shape) { int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth; float task_size_per_cu = - static_cast(task_size) / device.GetInfo().compute_units_count; + static_cast(task_size) / device.info_.compute_units_count; int block_size = conv_params.block_size.x * conv_params.block_size.y * conv_params.block_size.z; float threads_per_cu = task_size_per_cu / block_size; @@ -844,7 +844,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( conv_params.block_size = int3(1, 1, 1); } conv_params.src_depth_loop_size = 1; - MaliInfo mali_info = device.GetInfo().mali_info; + MaliInfo mali_info = device.info_.mali_info; if (src_depth % 2 == 0 && block_size <= 2 && !mali_info.IsMidgard()) { conv_params.src_depth_loop_size = 2; } @@ -987,7 +987,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context, const Convolution2DAttributes& attr, ConvPowerVR* result, const BHWC* dst_shape) { *result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -996,7 +996,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context, const FullyConnectedAttributes& attr, ConvPowerVR* result, const BHWC* dst_shape) { *result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -1006,7 +1006,7 @@ absl::Status CreateConvPowerVRDynamicWeights( ConvPowerVR* result, const BHWC* dst_shape) { *result = ConvPowerVR(definition, attr, weights_shape, *creation_context.device, dst_shape); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadBias(attr.bias, creation_context.context); } @@ -1017,7 +1017,7 @@ absl::Status CreateConvPowerVRWino4x4To6x6( *result = ConvPowerVR(definition); result->conv_params_ = result->GuessBestParamsWinograd( *creation_context.device, definition, attr, dst_shape); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadDataForWinograd4x4To6x6( attr.weights, *creation_context.device, creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc index 88035556c86..581c8056ced 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc @@ -430,7 +430,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context, const Convolution2DAttributes& attr, ConvTexture* result) { *result = ConvTexture(definition, attr); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -439,7 +439,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context, const FullyConnectedAttributes& attr, ConvTexture* result) { *result = ConvTexture(definition); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -449,7 +449,7 @@ absl::Status CreateConvTextureWino4x4To6x6( *result = ConvTexture(definition); result->different_weights_for_height_ = true; result->block_size_ = {4, 1, 2}; - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadDataForWinograd4x4To6x6( attr.weights, *creation_context.device, creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc index bd5aaed8bc3..d52efb43a08 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc @@ -152,8 +152,8 @@ __kernel void from_tensor()" + context_ = &environment->context(); shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w, input_def.dimensions.c); - RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().GetInfo(), {}, - &shader_src)); + RETURN_IF_ERROR( + args_.TransformToCLCode(environment->device().info_, {}, &shader_src)); return environment->program_cache()->GetOrCreateCLKernel( shader_src, "from_tensor", environment->context(), environment->device(), &kernel_); @@ -272,8 +272,8 @@ __kernel void to_tensor()" + context_ = &environment->context(); shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h, output_def.dimensions.w, output_def.dimensions.c); - RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().GetInfo(), {}, - &shader_src)); + RETURN_IF_ERROR( + args_.TransformToCLCode(environment->device().info_, {}, &shader_src)); return environment->program_cache()->GetOrCreateCLKernel( shader_src, "to_tensor", environment->context(), environment->device(), &kernel_); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc index a139b3affc9..c6eba691306 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc @@ -360,8 +360,8 @@ absl::Status CreateConvolutionTransposed( const CreationContext& creation_context, const OperationDef& definition, const ConvolutionTransposedAttributes& attr, ConvolutionTransposed* result) { - *result = ConvolutionTransposed(definition, attr, - creation_context.device->GetInfo()); + *result = + ConvolutionTransposed(definition, attr, creation_context.device->info_); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc index 2268313a867..54fd5396869 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc @@ -175,7 +175,7 @@ absl::Status CreateConvolutionTransposedThin( "ConvolutionTransposedThin doesn't support this attributes"); } *result = ConvolutionTransposedThin(definition, attr, - creation_context.device->GetInfo()); + creation_context.device->info_); RETURN_IF_ERROR( result->UploadData(attr.weights, attr.bias, creation_context.context)); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc index e171231fc0a..f0213cda805 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc @@ -330,7 +330,7 @@ absl::Status CreateDepthwiseConv3x3( bool local_mem_uploads = weights_are_buffer && creation_context.device->IsPowerVR(); *result = DepthwiseConv3x3(definition, weights_are_buffer, local_mem_uploads, - creation_context.device->GetInfo()); + creation_context.device->info_); return result->UploadWeightsAndBiases(attr.weights, attr.bias, creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc index f735f1aa047..7d46ae4a109 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc @@ -166,10 +166,9 @@ absl::Status CreateElementwiseTwoInput( const tflite::gpu::Tensor& constant_tensor, bool swap_inputs, GPUOperation* result) { const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v); - TensorStorageType storage_type = - SelectBestStorageType(*creation_context.context, *creation_context.device, - shape, definition.GetPrimaryStorageType(), - definition.GetDataType(), Layout::HWC); + TensorStorageType storage_type = SelectBestStorageType( + creation_context.device->info_, shape, definition.GetPrimaryStorageType(), + definition.GetDataType(), Layout::HWC); TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC}; Tensor gpu_tensor; RETURN_IF_ERROR(CreateTensor(*creation_context.context, @@ -205,10 +204,9 @@ absl::Status CreateElementwiseTwoInput( bool swap_inputs, GPUOperation* result) { const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w, constant_tensor.shape.c); - TensorStorageType storage_type = - SelectBestStorageType(*creation_context.context, *creation_context.device, - shape, definition.GetPrimaryStorageType(), - definition.GetDataType(), Layout::HWC); + TensorStorageType storage_type = SelectBestStorageType( + creation_context.device->info_, shape, definition.GetPrimaryStorageType(), + definition.GetDataType(), Layout::HWC); TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC}; Tensor gpu_tensor; RETURN_IF_ERROR(CreateTensor(*creation_context.context, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc index 2ab0284febe..ec18fa9f6e2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc @@ -114,7 +114,7 @@ absl::Status CreateFullyConnected(const CreationContext& creation_context, const OperationDef& definition, const FullyConnectedAttributes& attr, FullyConnected* result) { - *result = FullyConnected(definition, creation_context.device->GetInfo()); + *result = FullyConnected(definition, creation_context.device->info_); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index 7260048c6d3..97c72c1269d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -227,7 +227,7 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) { RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); RETURN_IF_ERROR(args_.TransformToCLCode( - creation_context.device->GetInfo(), + creation_context.device->info_, {{dst_tensors_names_[0], element_wise_code}}, &code)); code = absl::Substitute(code, args_.GetListOfArgs()); RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( @@ -238,13 +238,13 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) { RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); RETURN_IF_ERROR(args_.TransformToCLCode( - creation_context.device->GetInfo(), + creation_context.device->info_, {{dst_tensors_names_[0], element_wise_code}}, &code_)); RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( code_, "main_function", compiler_options_, *creation_context.context, *creation_context.device, &kernel_)); } - return PostCompileCheck(creation_context.device->GetInfo()); + return PostCompileCheck(creation_context.device->info_); } int3 GPUOperation::GetGridSize() const { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc index 3fe4ffb4acd..d907c0210b7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc @@ -117,7 +117,7 @@ int GetRecommendedBlockSizeForConv(const CLDevice& device, CalculationsPrecision precision, int task_size) { const float task_size_per_cu = - task_size / static_cast(device.GetInfo().compute_units_count); + task_size / static_cast(device.info_.compute_units_count); int block_size = 1; float threshold_1 = FLT_MAX; float threshold_2 = FLT_MAX; @@ -125,7 +125,7 @@ int GetRecommendedBlockSizeForConv(const CLDevice& device, if (!device.IsMali()) { return 1; } - MaliInfo mali_info = device.GetInfo().mali_info; + MaliInfo mali_info = device.info_.mali_info; switch (precision) { case CalculationsPrecision::F16: if (mali_info.IsBifrostGen1()) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index 4c3e8ddba05..698599a5bbd 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -303,7 +303,7 @@ absl::Status CreateWinograd4x4To36(const CreationContext& creation_context, const Padding2D& padding, Winograd4x4To36* result) { *result = - Winograd4x4To36(definition, padding, creation_context.device->GetInfo()); + Winograd4x4To36(definition, padding, creation_context.device->info_); return result->UploadBt(creation_context.context); } @@ -502,7 +502,7 @@ absl::Status CreateWinograd36To4x4( const CreationContext& creation_context, const OperationDef& definition, const tflite::gpu::Tensor& biases, Winograd36To4x4* result) { - *result = Winograd36To4x4(definition, creation_context.device->GetInfo()); + *result = Winograd36To4x4(definition, creation_context.device->info_); TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition.GetDataType(); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc index b577757057e..4a97bdddd09 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc @@ -167,7 +167,7 @@ absl::Status SelectConvolution(const Convolution2DAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, ModelHints hints, std::unique_ptr* ptr) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def, hints, ptr); @@ -190,7 +190,7 @@ absl::Status SelectConvolutionForWinograd( const Convolution2DAttributes& attr, const BHWC& dst_shape, const CreationContext& creation_context, const OperationDef& op_def, ModelHints hints, std::unique_ptr* ptr) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context, op_def, hints, ptr); @@ -215,7 +215,7 @@ absl::Status SelectConvolutionWithDynamicWeights( const BHWC& dst_shape, const CreationContext& creation_context, const OperationDef& op_def, ModelHints hints, std::unique_ptr* ptr, ConvWeightsDescription* weights_desc) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape, creation_context, op_def, diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc index 56864f2c575..c00d9392702 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc @@ -105,7 +105,7 @@ absl::Status SelectConvolutionTransposed( const ConvolutionTransposedAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, std::unique_ptr* ptr) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectConvolutionTransposedAdreno(attr, creation_context, op_def, ptr); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc index fafd9078f6f..b89f271365f 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc @@ -69,7 +69,7 @@ absl::Status SelectDWConvolutionMali( const auto storage_type = op_def.src_tensors[0].storage_type; bool buffer_type = storage_type == TensorStorageType::BUFFER || storage_type == TensorStorageType::IMAGE_BUFFER; - MaliInfo mali_info = creation_context.device->GetInfo().mali_info; + MaliInfo mali_info = creation_context.device->info_.mali_info; if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() && !buffer_type && op_def.precision != CalculationsPrecision::F32) { DepthwiseConv3x3 dw_conv; @@ -90,7 +90,7 @@ absl::Status SelectDWConvolution(const DepthwiseConvolution2DAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, std::unique_ptr* ptr) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr); } else if (device_info.IsPowerVR()) { diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc index cb967e45b52..0df8e243da3 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc @@ -104,7 +104,7 @@ absl::Status SelectFullyConnected(const FullyConnectedAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, int batch_size, std::unique_ptr* ptr) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectFullyConnectedAdreno(attr, creation_context, op_def, batch_size, ptr); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index 5661c3d0a37..b257e5a85da 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -75,14 +75,14 @@ absl::Status WinogradFromNode(const CreationContext& creation_context, const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c}; TensorDescriptor td_0; td_0.storage_type = SelectBestStorageType( - *creation_context.context, *creation_context.device, shape_0, + creation_context.device->info_, shape_0, op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout); td_0.data_type = op_def.src_tensors[0].data_type; td_0.layout = op_def.src_tensors[0].layout; TensorDescriptor td_1; td_1.storage_type = SelectBestStorageType( - *creation_context.context, *creation_context.device, shape_1, + creation_context.device->info_, shape_1, op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout); td_1.data_type = op_def.src_tensors[0].data_type; @@ -175,7 +175,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, channels[i] = inputs[i]->tensor.shape.c; } return SelectConcat(attr, channels, op_def, - creation_context.device->GetInfo(), gpu_op); + creation_context.device->info_, gpu_op); } case OperationType::CONVOLUTION_2D: { auto attr = @@ -248,7 +248,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, inputs[0]->tensor.shape.b, gpu_op); } case OperationType::LSTM: { - SelectLSTM(op_def, creation_context.device->GetInfo(), gpu_op); + SelectLSTM(op_def, creation_context.device->info_, gpu_op); return absl::OkStatus(); } case OperationType::MAX_UNPOOLING_2D: { @@ -259,8 +259,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, } case OperationType::MEAN: { auto attr = absl::any_cast(node.operation.attributes); - return SelectMean(attr, op_def, creation_context.device->GetInfo(), - gpu_op); + return SelectMean(attr, op_def, creation_context.device->info_, gpu_op); } case OperationType::MEAN_STDDEV_NORMALIZATION: { MeanStdDevNormalization operation = CreateMeanStdDevNormalization(op_def); diff --git a/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc b/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc index 755da0c7619..ddcb65e07f9 100644 --- a/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc +++ b/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc @@ -15,18 +15,16 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h" -#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" -#include "tensorflow/lite/delegates/gpu/cl/cl_device.h" #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" +#include "tensorflow/lite/delegates/gpu/common/util.h" namespace tflite { namespace gpu { namespace cl { -bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device, - const BHWDC& shape, +bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape, const TensorDescriptor& descriptor) { const int slices = DivideRoundUp(shape.c, 4); switch (descriptor.storage_type) { @@ -35,64 +33,60 @@ bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device, 4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2); const int buffer_size = shape.b * shape.w * shape.h * shape.d * slices * flt4_size; - return buffer_size <= device.GetInfo().buffer_max_size; + return buffer_size <= device_info.buffer_max_size; } case TensorStorageType::IMAGE_BUFFER: return shape.b * shape.w * shape.h * shape.d * slices <= - device.GetInfo().image_buffer_max_size; + device_info.image_buffer_max_size; case TensorStorageType::TEXTURE_3D: - if (device.cl_version() < OpenCLVersion::CL_1_2 && slices == 1) { + if (device_info.cl_version < OpenCLVersion::CL_1_2 && slices == 1) { // clCreateImage3D (that used in CL 1.0/1.1) can not create image with // depth = 1 by specification; return false; } - return shape.w * shape.b <= device.GetInfo().image3d_max_width && - shape.h <= device.GetInfo().image3d_max_height && - slices * shape.d <= device.GetInfo().image3d_max_depth; + return shape.w * shape.b <= device_info.image3d_max_width && + shape.h <= device_info.image3d_max_height && + slices * shape.d <= device_info.image3d_max_depth; case TensorStorageType::TEXTURE_ARRAY: // Bug on some Adreno. b/131099086 - if (slices == 1 && !device.SupportsOneLayerTextureArray()) { + if (slices == 1 && !device_info.SupportsOneLayerTextureArray()) { return false; } - return shape.w * shape.b <= device.GetInfo().image2d_max_width && - shape.h <= device.GetInfo().image2d_max_height && - slices * shape.d <= device.GetInfo().image_array_max_layers; + return shape.w * shape.b <= device_info.image2d_max_width && + shape.h <= device_info.image2d_max_height && + slices * shape.d <= device_info.image_array_max_layers; case TensorStorageType::TEXTURE_2D: - return shape.w * shape.b * shape.d <= - device.GetInfo().image2d_max_width && - shape.h * slices <= device.GetInfo().image2d_max_height; + return shape.w * shape.b * shape.d <= device_info.image2d_max_width && + shape.h * slices <= device_info.image2d_max_height; case TensorStorageType::SINGLE_TEXTURE_2D: return shape.c <= 4 && - context.IsFloatTexture2DSupported(shape.c, descriptor.data_type) && - shape.w * shape.b * shape.d <= - device.GetInfo().image2d_max_width && - shape.h <= device.GetInfo().image2d_max_height; + device_info.SupportsFloatImage2D(descriptor.data_type, shape.c) && + shape.w * shape.b * shape.d <= device_info.image2d_max_width && + shape.h <= device_info.image2d_max_height; default: return false; } } -bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device, - const BHWC& shape, +bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape, const TensorDescriptor& descriptor) { const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c); - return CanCreateTensorWithShape(context, device, shape5D, descriptor); + return CanCreateTensorWithShape(device_info, shape5D, descriptor); } -TensorStorageType SelectBestStorageType(const CLContext& context, - const CLDevice& device, +TensorStorageType SelectBestStorageType(const DeviceInfo& device_info, const BHWC& shape, const TensorStorageType& desired, const DataType& data_type, const Layout& layout) { - if (CanCreateTensorWithShape(context, device, shape, + if (CanCreateTensorWithShape(device_info, shape, TensorDescriptor{data_type, desired, layout})) { return desired; } auto GetBestTypeAfterTextureArray = [&]() { - if (device.SupportsImageBuffer() && + if (device_info.SupportsImageBuffer() && CanCreateTensorWithShape( - context, device, shape, + device_info, shape, TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER, layout})) { return TensorStorageType::IMAGE_BUFFER; @@ -101,9 +95,9 @@ TensorStorageType SelectBestStorageType(const CLContext& context, } }; auto GetBestTypeAfterTexture2D = [&]() { - if (device.SupportsTextureArray() && + if (device_info.SupportsTextureArray() && CanCreateTensorWithShape( - context, device, shape, + device_info, shape, TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY, layout})) { return TensorStorageType::TEXTURE_ARRAY; @@ -113,7 +107,7 @@ TensorStorageType SelectBestStorageType(const CLContext& context, }; auto GetBestTypeAfterTexture3D = [&]() { if (CanCreateTensorWithShape( - context, device, shape, + device_info, shape, TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D, layout})) { return TensorStorageType::TEXTURE_2D; diff --git a/tensorflow/lite/delegates/gpu/cl/storage_type_util.h b/tensorflow/lite/delegates/gpu/cl/storage_type_util.h index 87fc2206e81..a8a82008461 100644 --- a/tensorflow/lite/delegates/gpu/cl/storage_type_util.h +++ b/tensorflow/lite/delegates/gpu/cl/storage_type_util.h @@ -16,8 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_ -#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" -#include "tensorflow/lite/delegates/gpu/cl/cl_device.h" +#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" @@ -26,16 +25,13 @@ namespace tflite { namespace gpu { namespace cl { -bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device, - const BHWDC& shape, +bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape, const TensorDescriptor& descriptor); -bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device, - const BHWC& shape, +bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape, const TensorDescriptor& descriptor); -TensorStorageType SelectBestStorageType(const CLContext& context, - const CLDevice& device, +TensorStorageType SelectBestStorageType(const DeviceInfo& device_info, const BHWC& shape, const TensorStorageType& desired, const DataType& data_type,