Added info about supported image formats to DeviceInfo.

storage_type_util cleaned from OpenCL API calls/structs. PiperOrigin-RevId: 325347475 Change-Id: I096636e4dd837ef9754df70caf37842c605c24f5
2020-08-06 17:49:37 -07:00 · 2020-08-06 17:49:37 -07:00 · 235dbc2dc2
commit 235dbc2dc2
parent d9ea505110
29 changed files with 177 additions and 112 deletions
--- a/tensorflow/lite/delegates/gpu/cl/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/BUILD
@ -257,6 +257,7 @@ cc_library(
    srcs = ["device_info.cc"],
    hdrs = ["device_info.h"],
    deps = [
+        "//tensorflow/lite/delegates/gpu/common:data_type",
        "@com_google_absl//absl/strings",
    ],
 )
@ -468,11 +469,11 @@ cc_library(
    srcs = ["storage_type_util.cc"],
    hdrs = ["storage_type_util.h"],
    deps = [
-        ":cl_context",
-        ":cl_device",
+        ":device_info",
        ":tensor_type",
        "//tensorflow/lite/delegates/gpu/common:data_type",
        "//tensorflow/lite/delegates/gpu/common:shape",
+        "//tensorflow/lite/delegates/gpu/common:util",
    ],
 )

--- a/tensorflow/lite/delegates/gpu/cl/cl_context.cc
+++ b/tensorflow/lite/delegates/gpu/cl/cl_context.cc
@ -43,6 +43,44 @@ std::vector<cl_image_format> GetSupportedImage2DFormats(cl_context context,
  return result;
 }

+bool IsEqualToImageFormat(cl_image_format image_format, DataType data_type,
+                          int num_channels) {
+  return image_format.image_channel_data_type ==
+             ToImageChannelType(data_type) &&
+         image_format.image_channel_order == ToChannelOrder(num_channels);
+}
+
+void AddSupportedImageFormats(cl_context context, DeviceInfo* info) {
+  auto supported_formats =
+      GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE);
+  for (auto format : supported_formats) {
+    info->supports_r_f16_tex2d =
+        info->supports_r_f16_tex2d ||
+        IsEqualToImageFormat(format, DataType::FLOAT16, 1);
+    info->supports_rg_f16_tex2d =
+        info->supports_rg_f16_tex2d ||
+        IsEqualToImageFormat(format, DataType::FLOAT16, 2);
+    info->supports_rgb_f16_tex2d =
+        info->supports_rgb_f16_tex2d ||
+        IsEqualToImageFormat(format, DataType::FLOAT16, 3);
+    info->supports_rgba_f16_tex2d =
+        info->supports_rgba_f16_tex2d ||
+        IsEqualToImageFormat(format, DataType::FLOAT16, 4);
+    info->supports_r_f32_tex2d =
+        info->supports_r_f32_tex2d ||
+        IsEqualToImageFormat(format, DataType::FLOAT32, 1);
+    info->supports_rg_f32_tex2d =
+        info->supports_rg_f32_tex2d ||
+        IsEqualToImageFormat(format, DataType::FLOAT32, 2);
+    info->supports_rgb_f32_tex2d =
+        info->supports_rgb_f32_tex2d ||
+        IsEqualToImageFormat(format, DataType::FLOAT32, 3);
+    info->supports_rgba_f32_tex2d =
+        info->supports_rgba_f32_tex2d ||
+        IsEqualToImageFormat(format, DataType::FLOAT32, 4);
+  }
+}
+
 absl::Status CreateCLContext(const CLDevice& device,
                             cl_context_properties* properties,
                             CLContext* result) {
@ -55,6 +93,7 @@ absl::Status CreateCLContext(const CLDevice& device,
        absl::StrCat("Failed to create a compute context - ",
                     CLErrorCodeToString(error_code)));
  }
+  AddSupportedImageFormats(context, &device.info_);

  *result = CLContext(context, true);
  return absl::OkStatus();
--- a/tensorflow/lite/delegates/gpu/cl/cl_device.cc
+++ b/tensorflow/lite/delegates/gpu/cl/cl_device.cc
@ -248,24 +248,24 @@ DeviceInfo DeviceInfoFromDeviceID(cl_device_id id) {
 }

 CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
-    : id_(id), platform_id_(platform_id), info_(DeviceInfoFromDeviceID(id)) {}
+    : info_(DeviceInfoFromDeviceID(id)), id_(id), platform_id_(platform_id) {}

 CLDevice::CLDevice(const CLDevice& device)
-    : id_(device.id_), platform_id_(device.platform_id_), info_(device.info_) {}
+    : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_) {}

 CLDevice& CLDevice::operator=(const CLDevice& device) {
  if (this != &device) {
+    info_ = device.info_;
    id_ = device.id_;
    platform_id_ = device.platform_id_;
-    info_ = device.info_;
  }
  return *this;
 }

 CLDevice::CLDevice(CLDevice&& device)
-    : id_(device.id_),
-      platform_id_(device.platform_id_),
-      info_(std::move(device.info_)) {
+    : info_(std::move(device.info_)),
+      id_(device.id_),
+      platform_id_(device.platform_id_) {
  device.id_ = nullptr;
  device.platform_id_ = nullptr;
 }
@ -274,9 +274,9 @@ CLDevice& CLDevice::operator=(CLDevice&& device) {
  if (this != &device) {
    id_ = nullptr;
    platform_id_ = nullptr;
+    info_ = std::move(device.info_);
    std::swap(id_, device.id_);
    std::swap(platform_id_, device.platform_id_);
-    info_ = std::move(device.info_);
  }
  return *this;
 }
@ -368,7 +368,7 @@ bool CLDevice::IsAMD() const { return info_.IsAMD(); }
 bool CLDevice::IsIntel() const { return info_.IsIntel(); }

 bool CLDevice::SupportsOneLayerTextureArray() const {
-  return !IsAdreno() || info_.adreno_info.support_one_layer_texture_array;
+  return info_.SupportsOneLayerTextureArray();
 }

 void CLDevice::DisableOneLayerTextureArray() {
--- a/tensorflow/lite/delegates/gpu/cl/cl_device.h
+++ b/tensorflow/lite/delegates/gpu/cl/cl_device.h
@ -46,9 +46,6 @@ class CLDevice {
  cl_platform_id platform() const { return platform_id_; }
  std::string GetPlatformVersion() const;

-  const DeviceInfo& GetInfo() const { return info_; }
-  const DeviceInfo* GetInfoPtr() const { return &info_; }
-
  Vendor vendor() const { return info_.vendor; }
  OpenCLVersion cl_version() const { return info_.cl_version; }
  bool SupportsFP16() const;
@ -76,10 +73,13 @@ class CLDevice {
  bool SupportsOneLayerTextureArray() const;
  void DisableOneLayerTextureArray();

+  // We update device info during context creation, so as supported texture
+  // formats can be requested from context only.
+  mutable DeviceInfo info_;
+
 private:
  cl_device_id id_ = nullptr;
  cl_platform_id platform_id_ = nullptr;
-  DeviceInfo info_;
 };

 absl::Status CreateDefaultGPUDevice(CLDevice* result);
--- a/tensorflow/lite/delegates/gpu/cl/cl_program.cc
+++ b/tensorflow/lite/delegates/gpu/cl/cl_program.cc
@ -78,13 +78,13 @@ std::string CompilerOptionToString(const CLDevice& device,
                                   CompilerOptions option) {
  switch (option) {
    case CompilerOptions::ADRENO_FULL_SIMD_LINE:
-      if (device.GetInfo().adreno_info.gpu_version < 500) {
+      if (device.info_.adreno_info.gpu_version < 500) {
        return "-qcom-accelerate-16-bit";
      } else {
        return "-qcom-accelerate-16-bit=true";
      }
    case CompilerOptions::ADRENO_MORE_WAVES:
-      if (device.GetInfo().adreno_info.gpu_version >= 500) {
+      if (device.info_.adreno_info.gpu_version >= 500) {
        return "-qcom-accelerate-16-bit=false";
      } else {
        return "";
--- a/tensorflow/lite/delegates/gpu/cl/device_info.cc
+++ b/tensorflow/lite/delegates/gpu/cl/device_info.cc
@ -231,6 +231,28 @@ bool DeviceInfo::SupportsImage3D() const {
  return supports_image3d_writes;
 }

+bool DeviceInfo::SupportsFloatImage2D(DataType data_type, int channels) const {
+  if (channels == 1) {
+    return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d
+                                          : supports_r_f16_tex2d;
+  } else if (channels == 2) {
+    return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d
+                                          : supports_rg_f16_tex2d;
+  } else if (channels == 3) {
+    return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d
+                                          : supports_rgb_f16_tex2d;
+  } else if (channels == 4) {
+    return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d
+                                          : supports_rgba_f16_tex2d;
+  } else {
+    return false;
+  }
+}
+
+bool DeviceInfo::SupportsOneLayerTextureArray() const {
+  return !IsAdreno() || adreno_info.support_one_layer_texture_array;
+}
+
 bool DeviceInfo::IsAdreno() const { return vendor == Vendor::kQualcomm; }

 bool DeviceInfo::IsAdreno3xx() const {
--- a/tensorflow/lite/delegates/gpu/cl/device_info.h
+++ b/tensorflow/lite/delegates/gpu/cl/device_info.h
@ -19,6 +19,8 @@ limitations under the License.
 #include <string>
 #include <vector>

+#include "tensorflow/lite/delegates/gpu/common/data_type.h"
+
 // for use only in device_info.cc, but keep here to make tests
 int GetAdrenoGPUVersion(const std::string& gpu_version);

@ -131,6 +133,11 @@ struct DeviceInfo {
  bool SupportsImageBuffer() const;
  bool SupportsImage3D() const;

+  bool SupportsFloatImage2D(DataType data_type, int channels) const;
+
+  // To track bug on some Adreno. b/131099086
+  bool SupportsOneLayerTextureArray() const;
+
  std::vector<std::string> extensions;
  bool supports_fp16;
  bool supports_image3d_writes;
@ -157,6 +164,16 @@ struct DeviceInfo {
  bool supports_fp32_rtn;
  bool supports_fp16_rtn;

+  bool supports_r_f16_tex2d = false;
+  bool supports_rg_f16_tex2d = false;
+  bool supports_rgb_f16_tex2d = false;
+  bool supports_rgba_f16_tex2d = false;
+
+  bool supports_r_f32_tex2d = false;
+  bool supports_rg_f32_tex2d = false;
+  bool supports_rgb_f32_tex2d = false;
+  bool supports_rgba_f32_tex2d = false;
+
  AdrenoInfo adreno_info;
  MaliInfo mali_info;
 };
--- a/tensorflow/lite/delegates/gpu/cl/environment.cc
+++ b/tensorflow/lite/delegates/gpu/cl/environment.cc
@ -47,7 +47,7 @@ __kernel void main_function(__write_only image2d_array_t dst) {
 absl::Status CheckKernelSupportOfOneLayerTextureArray(Environment* env,
                                                      bool* result) {
  // No bug on Adreno 6xx
-  if (env->device().GetInfo().adreno_info.gpu_version >= 600) {
+  if (env->device().info_.adreno_info.gpu_version >= 600) {
    *result = true;
    return absl::OkStatus();
  }
@ -242,7 +242,7 @@ TensorStorageType GetFastestStorageType(const CLDevice& gpu) {
  } else if (gpu.IsPowerVR()) {
    return TensorStorageType::TEXTURE_2D;
  } else if (gpu.IsMali()) {
-    const MaliInfo mali_info = gpu.GetInfo().mali_info;
+    const MaliInfo mali_info = gpu.info_.mali_info;
    if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() ||
        mali_info.IsValhall()) {
      return TensorStorageType::TEXTURE_2D;
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
@ -203,7 +203,7 @@ absl::Status InferenceContext::InitFromGraph(

  TuningParameters tuning_parameters;
  tuning_parameters.queue = env->profiling_queue();
-  tuning_parameters.info = env->device().GetInfoPtr();
+  tuning_parameters.info = &env->device().info_;
  if (create_info.hints.Check(ModelHints::kFastTuning)) {
    tuning_parameters.tuning_type = TuningType::FAST;
  }
@ -244,14 +244,13 @@ void InferenceContext::ReserveGraphTensors(
    if (graph.IsGraphInput(t->id) || graph.IsGraphOutput(t->id)) {
      if (shape.c < 4 &&
          CanCreateTensorWithShape(
-              *creation_context.context, *creation_context.device, shape,
+              creation_context.device->info_, shape,
              TensorDescriptor{data_type, TensorStorageType::SINGLE_TEXTURE_2D,
                               layout})) {
        storage_type = TensorStorageType::SINGLE_TEXTURE_2D;
      }
    }
-    storage_type = SelectBestStorageType(*creation_context.context,
-                                         *creation_context.device, shape,
+    storage_type = SelectBestStorageType(creation_context.device->info_, shape,
                                         storage_type, data_type, layout);
    tensor_reserver_.Add(
        t->id, {shape, TensorDescriptor{data_type, storage_type, layout}});
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc
@ -93,7 +93,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device,
  }
  bool can_use_flt8 = (shape.w * shape.b) % 2 == 0 &&
                      definition.precision != CalculationsPrecision::F32;
-  bool is_midgard = device.IsMali() && device.GetInfo().mali_info.IsMidgard();
+  bool is_midgard = device.IsMali() && device.info_.mali_info.IsMidgard();
  if (is_midgard) {
    if (can_use_flt8) {
      conv_params.element_size = 8;
@ -141,7 +141,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device,
  conv_params.element_size = 4;
  conv_params.block_size = int3(1, 1, 1);
  if (device.IsMali() && definition.precision == CalculationsPrecision::F16 &&
-      device.GetInfo().compute_units_count <= 4) {
+      device.info_.compute_units_count <= 4) {
    conv_params.block_size.x *= 2;
  }
  return conv_params;
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc
@ -271,7 +271,7 @@ bool IsConvConstantsSupported(const CLDevice& device,
                             ? sizeof(float)
                             : sizeof(half);
  const int filters_buffer_size = filters_count * float_size;
-  const int kConstantMaxSize = GetOptimalMaxConstantSize(device.GetInfo());
+  const int kConstantMaxSize = GetOptimalMaxConstantSize(device.info_);
  const int flt4_registers = DivideRoundUp(w_shape.o, 4);
  return filters_buffer_size <= kConstantMaxSize && flt4_registers <= 8;
 }
@ -283,7 +283,7 @@ absl::Status CreateConvConstants(const CreationContext& creation_context,
  if (!IsConvConstantsSupported(*creation_context.device, definition, attr)) {
    return absl::InvalidArgumentError("ConvConstants doesn't supported");
  }
-  *result = ConvConstants(definition, attr, creation_context.device->GetInfo());
+  *result = ConvConstants(definition, attr, creation_context.device->info_);
  RETURN_IF_ERROR(
      result->UploadWeights(attr.weights, creation_context.context));

--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
@ -718,7 +718,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
    if (dst_shape) {
      int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth;
      float task_size_per_cu =
-          static_cast<float>(task_size) / device.GetInfo().compute_units_count;
+          static_cast<float>(task_size) / device.info_.compute_units_count;
      int block_size = conv_params.block_size.x * conv_params.block_size.y *
                       conv_params.block_size.z;
      float threads_per_cu = task_size_per_cu / block_size;
@ -844,7 +844,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
      conv_params.block_size = int3(1, 1, 1);
    }
    conv_params.src_depth_loop_size = 1;
-    MaliInfo mali_info = device.GetInfo().mali_info;
+    MaliInfo mali_info = device.info_.mali_info;
    if (src_depth % 2 == 0 && block_size <= 2 && !mali_info.IsMidgard()) {
      conv_params.src_depth_loop_size = 2;
    }
@ -987,7 +987,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context,
                               const Convolution2DAttributes& attr,
                               ConvPowerVR* result, const BHWC* dst_shape) {
  *result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape);
-  result->GenerateCode(creation_context.device->GetInfo());
+  result->GenerateCode(creation_context.device->info_);
  return result->UploadData(attr.weights, attr.bias, creation_context.context);
 }

@ -996,7 +996,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context,
                               const FullyConnectedAttributes& attr,
                               ConvPowerVR* result, const BHWC* dst_shape) {
  *result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape);
-  result->GenerateCode(creation_context.device->GetInfo());
+  result->GenerateCode(creation_context.device->info_);
  return result->UploadData(attr.weights, attr.bias, creation_context.context);
 }

@ -1006,7 +1006,7 @@ absl::Status CreateConvPowerVRDynamicWeights(
    ConvPowerVR* result, const BHWC* dst_shape) {
  *result = ConvPowerVR(definition, attr, weights_shape,
                        *creation_context.device, dst_shape);
-  result->GenerateCode(creation_context.device->GetInfo());
+  result->GenerateCode(creation_context.device->info_);
  return result->UploadBias(attr.bias, creation_context.context);
 }

@ -1017,7 +1017,7 @@ absl::Status CreateConvPowerVRWino4x4To6x6(
  *result = ConvPowerVR(definition);
  result->conv_params_ = result->GuessBestParamsWinograd(
      *creation_context.device, definition, attr, dst_shape);
-  result->GenerateCode(creation_context.device->GetInfo());
+  result->GenerateCode(creation_context.device->info_);
  return result->UploadDataForWinograd4x4To6x6(
      attr.weights, *creation_context.device, creation_context.context);
 }
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
@ -430,7 +430,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context,
                               const Convolution2DAttributes& attr,
                               ConvTexture* result) {
  *result = ConvTexture(definition, attr);
-  result->GenerateCode(creation_context.device->GetInfo());
+  result->GenerateCode(creation_context.device->info_);
  return result->UploadData(attr.weights, attr.bias, creation_context.context);
 }

@ -439,7 +439,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context,
                               const FullyConnectedAttributes& attr,
                               ConvTexture* result) {
  *result = ConvTexture(definition);
-  result->GenerateCode(creation_context.device->GetInfo());
+  result->GenerateCode(creation_context.device->info_);
  return result->UploadData(attr.weights, attr.bias, creation_context.context);
 }

@ -449,7 +449,7 @@ absl::Status CreateConvTextureWino4x4To6x6(
  *result = ConvTexture(definition);
  result->different_weights_for_height_ = true;
  result->block_size_ = {4, 1, 2};
-  result->GenerateCode(creation_context.device->GetInfo());
+  result->GenerateCode(creation_context.device->info_);
  return result->UploadDataForWinograd4x4To6x6(
      attr.weights, *creation_context.device, creation_context.context);
 }
--- a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc
@ -152,8 +152,8 @@ __kernel void from_tensor()" +
    context_ = &environment->context();
    shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h,
                  input_def.dimensions.w, input_def.dimensions.c);
-    RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().GetInfo(), {},
-                                            &shader_src));
+    RETURN_IF_ERROR(
+        args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
    return environment->program_cache()->GetOrCreateCLKernel(
        shader_src, "from_tensor", environment->context(),
        environment->device(), &kernel_);
@ -272,8 +272,8 @@ __kernel void to_tensor()" +
    context_ = &environment->context();
    shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h,
                  output_def.dimensions.w, output_def.dimensions.c);
-    RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().GetInfo(), {},
-                                            &shader_src));
+    RETURN_IF_ERROR(
+        args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
    return environment->program_cache()->GetOrCreateCLKernel(
        shader_src, "to_tensor", environment->context(), environment->device(),
        &kernel_);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
@ -360,8 +360,8 @@ absl::Status CreateConvolutionTransposed(
    const CreationContext& creation_context, const OperationDef& definition,
    const ConvolutionTransposedAttributes& attr,
    ConvolutionTransposed* result) {
-  *result = ConvolutionTransposed(definition, attr,
-                                  creation_context.device->GetInfo());
+  *result =
+      ConvolutionTransposed(definition, attr, creation_context.device->info_);
  RETURN_IF_ERROR(
      result->UploadWeights(attr.weights, creation_context.context));

--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc
@ -175,7 +175,7 @@ absl::Status CreateConvolutionTransposedThin(
        "ConvolutionTransposedThin doesn't support this attributes");
  }
  *result = ConvolutionTransposedThin(definition, attr,
-                                      creation_context.device->GetInfo());
+                                      creation_context.device->info_);
  RETURN_IF_ERROR(
      result->UploadData(attr.weights, attr.bias, creation_context.context));
  return absl::OkStatus();
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc
@ -330,7 +330,7 @@ absl::Status CreateDepthwiseConv3x3(
  bool local_mem_uploads =
      weights_are_buffer && creation_context.device->IsPowerVR();
  *result = DepthwiseConv3x3(definition, weights_are_buffer, local_mem_uploads,
-                             creation_context.device->GetInfo());
+                             creation_context.device->info_);
  return result->UploadWeightsAndBiases(attr.weights, attr.bias,
                                        creation_context.context);
 }
--- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc
@ -166,10 +166,9 @@ absl::Status CreateElementwiseTwoInput(
    const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& constant_tensor,
    bool swap_inputs, GPUOperation* result) {
  const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v);
-  TensorStorageType storage_type =
-      SelectBestStorageType(*creation_context.context, *creation_context.device,
-                            shape, definition.GetPrimaryStorageType(),
-                            definition.GetDataType(), Layout::HWC);
+  TensorStorageType storage_type = SelectBestStorageType(
+      creation_context.device->info_, shape, definition.GetPrimaryStorageType(),
+      definition.GetDataType(), Layout::HWC);
  TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
  Tensor gpu_tensor;
  RETURN_IF_ERROR(CreateTensor(*creation_context.context,
@ -205,10 +204,9 @@ absl::Status CreateElementwiseTwoInput(
    bool swap_inputs, GPUOperation* result) {
  const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w,
                          constant_tensor.shape.c);
-  TensorStorageType storage_type =
-      SelectBestStorageType(*creation_context.context, *creation_context.device,
-                            shape, definition.GetPrimaryStorageType(),
-                            definition.GetDataType(), Layout::HWC);
+  TensorStorageType storage_type = SelectBestStorageType(
+      creation_context.device->info_, shape, definition.GetPrimaryStorageType(),
+      definition.GetDataType(), Layout::HWC);
  TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
  Tensor gpu_tensor;
  RETURN_IF_ERROR(CreateTensor(*creation_context.context,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc
@ -114,7 +114,7 @@ absl::Status CreateFullyConnected(const CreationContext& creation_context,
                                  const OperationDef& definition,
                                  const FullyConnectedAttributes& attr,
                                  FullyConnected* result) {
-  *result = FullyConnected(definition, creation_context.device->GetInfo());
+  *result = FullyConnected(definition, creation_context.device->info_);
  RETURN_IF_ERROR(
      result->UploadWeights(attr.weights, creation_context.context));

--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc
@ -227,7 +227,7 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
    RETURN_IF_ERROR(
        MergeOperations(linked_operations_, &args_, &element_wise_code));
    RETURN_IF_ERROR(args_.TransformToCLCode(
-        creation_context.device->GetInfo(),
+        creation_context.device->info_,
        {{dst_tensors_names_[0], element_wise_code}}, &code));
    code = absl::Substitute(code, args_.GetListOfArgs());
    RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
@ -238,13 +238,13 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
    RETURN_IF_ERROR(
        MergeOperations(linked_operations_, &args_, &element_wise_code));
    RETURN_IF_ERROR(args_.TransformToCLCode(
-        creation_context.device->GetInfo(),
+        creation_context.device->info_,
        {{dst_tensors_names_[0], element_wise_code}}, &code_));
    RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
        code_, "main_function", compiler_options_, *creation_context.context,
        *creation_context.device, &kernel_));
  }
-  return PostCompileCheck(creation_context.device->GetInfo());
+  return PostCompileCheck(creation_context.device->info_);
 }

 int3 GPUOperation::GetGridSize() const {
--- a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc
@ -117,7 +117,7 @@ int GetRecommendedBlockSizeForConv(const CLDevice& device,
                                   CalculationsPrecision precision,
                                   int task_size) {
  const float task_size_per_cu =
-      task_size / static_cast<float>(device.GetInfo().compute_units_count);
+      task_size / static_cast<float>(device.info_.compute_units_count);
  int block_size = 1;
  float threshold_1 = FLT_MAX;
  float threshold_2 = FLT_MAX;
@ -125,7 +125,7 @@ int GetRecommendedBlockSizeForConv(const CLDevice& device,
  if (!device.IsMali()) {
    return 1;
  }
-  MaliInfo mali_info = device.GetInfo().mali_info;
+  MaliInfo mali_info = device.info_.mali_info;
  switch (precision) {
    case CalculationsPrecision::F16:
      if (mali_info.IsBifrostGen1()) {
--- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc
@ -303,7 +303,7 @@ absl::Status CreateWinograd4x4To36(const CreationContext& creation_context,
                                   const Padding2D& padding,
                                   Winograd4x4To36* result) {
  *result =
-      Winograd4x4To36(definition, padding, creation_context.device->GetInfo());
+      Winograd4x4To36(definition, padding, creation_context.device->info_);
  return result->UploadBt(creation_context.context);
 }

@ -502,7 +502,7 @@ absl::Status CreateWinograd36To4x4(
    const CreationContext& creation_context, const OperationDef& definition,
    const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases,
    Winograd36To4x4* result) {
-  *result = Winograd36To4x4(definition, creation_context.device->GetInfo());
+  *result = Winograd36To4x4(definition, creation_context.device->info_);
  TensorLinearDescriptor desc;
  desc.storage_type = LinearStorageType::TEXTURE_2D;
  desc.element_type = definition.GetDataType();
--- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
@ -167,7 +167,7 @@ absl::Status SelectConvolution(const Convolution2DAttributes& attr,
                               const CreationContext& creation_context,
                               const OperationDef& op_def, ModelHints hints,
                               std::unique_ptr<GPUOperation>* ptr) {
-  const auto& device_info = creation_context.device->GetInfo();
+  const auto& device_info = creation_context.device->info_;
  if (device_info.IsAdreno()) {
    return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def,
                                     hints, ptr);
@ -190,7 +190,7 @@ absl::Status SelectConvolutionForWinograd(
    const Convolution2DAttributes& attr, const BHWC& dst_shape,
    const CreationContext& creation_context, const OperationDef& op_def,
    ModelHints hints, std::unique_ptr<GPUOperation>* ptr) {
-  const auto& device_info = creation_context.device->GetInfo();
+  const auto& device_info = creation_context.device->info_;
  if (device_info.IsAdreno()) {
    return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context,
                                             op_def, hints, ptr);
@ -215,7 +215,7 @@ absl::Status SelectConvolutionWithDynamicWeights(
    const BHWC& dst_shape, const CreationContext& creation_context,
    const OperationDef& op_def, ModelHints hints,
    std::unique_ptr<GPUOperation>* ptr, ConvWeightsDescription* weights_desc) {
-  const auto& device_info = creation_context.device->GetInfo();
+  const auto& device_info = creation_context.device->info_;
  if (device_info.IsAdreno()) {
    return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape,
                                                 creation_context, op_def,
--- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc
@ -105,7 +105,7 @@ absl::Status SelectConvolutionTransposed(
    const ConvolutionTransposedAttributes& attr,
    const CreationContext& creation_context, const OperationDef& op_def,
    std::unique_ptr<GPUOperation>* ptr) {
-  const auto& device_info = creation_context.device->GetInfo();
+  const auto& device_info = creation_context.device->info_;
  if (device_info.IsAdreno()) {
    return SelectConvolutionTransposedAdreno(attr, creation_context, op_def,
                                             ptr);
--- a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc
@ -69,7 +69,7 @@ absl::Status SelectDWConvolutionMali(
  const auto storage_type = op_def.src_tensors[0].storage_type;
  bool buffer_type = storage_type == TensorStorageType::BUFFER ||
                     storage_type == TensorStorageType::IMAGE_BUFFER;
-  MaliInfo mali_info = creation_context.device->GetInfo().mali_info;
+  MaliInfo mali_info = creation_context.device->info_.mali_info;
  if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() &&
      !buffer_type && op_def.precision != CalculationsPrecision::F32) {
    DepthwiseConv3x3 dw_conv;
@ -90,7 +90,7 @@ absl::Status SelectDWConvolution(const DepthwiseConvolution2DAttributes& attr,
                                 const CreationContext& creation_context,
                                 const OperationDef& op_def,
                                 std::unique_ptr<GPUOperation>* ptr) {
-  const auto& device_info = creation_context.device->GetInfo();
+  const auto& device_info = creation_context.device->info_;
  if (device_info.IsAdreno()) {
    return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr);
  } else if (device_info.IsPowerVR()) {
--- a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc
@ -104,7 +104,7 @@ absl::Status SelectFullyConnected(const FullyConnectedAttributes& attr,
                                  const CreationContext& creation_context,
                                  const OperationDef& op_def, int batch_size,
                                  std::unique_ptr<GPUOperation>* ptr) {
-  const auto& device_info = creation_context.device->GetInfo();
+  const auto& device_info = creation_context.device->info_;
  if (device_info.IsAdreno()) {
    return SelectFullyConnectedAdreno(attr, creation_context, op_def,
                                      batch_size, ptr);
--- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc
@ -75,14 +75,14 @@ absl::Status WinogradFromNode(const CreationContext& creation_context,
  const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c};
  TensorDescriptor td_0;
  td_0.storage_type = SelectBestStorageType(
-      *creation_context.context, *creation_context.device, shape_0,
+      creation_context.device->info_, shape_0,
      op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type,
      op_def.src_tensors[0].layout);
  td_0.data_type = op_def.src_tensors[0].data_type;
  td_0.layout = op_def.src_tensors[0].layout;
  TensorDescriptor td_1;
  td_1.storage_type = SelectBestStorageType(
-      *creation_context.context, *creation_context.device, shape_1,
+      creation_context.device->info_, shape_1,
      op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type,
      op_def.src_tensors[0].layout);
  td_1.data_type = op_def.src_tensors[0].data_type;
@ -175,7 +175,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
        channels[i] = inputs[i]->tensor.shape.c;
      }
      return SelectConcat(attr, channels, op_def,
-                          creation_context.device->GetInfo(), gpu_op);
+                          creation_context.device->info_, gpu_op);
    }
    case OperationType::CONVOLUTION_2D: {
      auto attr =
@ -248,7 +248,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
                                  inputs[0]->tensor.shape.b, gpu_op);
    }
    case OperationType::LSTM: {
-      SelectLSTM(op_def, creation_context.device->GetInfo(), gpu_op);
+      SelectLSTM(op_def, creation_context.device->info_, gpu_op);
      return absl::OkStatus();
    }
    case OperationType::MAX_UNPOOLING_2D: {
@ -259,8 +259,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
    }
    case OperationType::MEAN: {
      auto attr = absl::any_cast<MeanAttributes>(node.operation.attributes);
-      return SelectMean(attr, op_def, creation_context.device->GetInfo(),
-                        gpu_op);
+      return SelectMean(attr, op_def, creation_context.device->info_, gpu_op);
    }
    case OperationType::MEAN_STDDEV_NORMALIZATION: {
      MeanStdDevNormalization operation = CreateMeanStdDevNormalization(op_def);
--- a/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc
+++ b/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc
@ -15,18 +15,16 @@ limitations under the License.

 #include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h"

-#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
-#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/common/util.h"

 namespace tflite {
 namespace gpu {
 namespace cl {

-bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
-                              const BHWDC& shape,
+bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape,
                              const TensorDescriptor& descriptor) {
  const int slices = DivideRoundUp(shape.c, 4);
  switch (descriptor.storage_type) {
@ -35,64 +33,60 @@ bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
          4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2);
      const int buffer_size =
          shape.b * shape.w * shape.h * shape.d * slices * flt4_size;
-      return buffer_size <= device.GetInfo().buffer_max_size;
+      return buffer_size <= device_info.buffer_max_size;
    }
    case TensorStorageType::IMAGE_BUFFER:
      return shape.b * shape.w * shape.h * shape.d * slices <=
-             device.GetInfo().image_buffer_max_size;
+             device_info.image_buffer_max_size;
    case TensorStorageType::TEXTURE_3D:
-      if (device.cl_version() < OpenCLVersion::CL_1_2 && slices == 1) {
+      if (device_info.cl_version < OpenCLVersion::CL_1_2 && slices == 1) {
        // clCreateImage3D (that used in CL 1.0/1.1) can not create image with
        // depth = 1 by specification;
        return false;
      }
-      return shape.w * shape.b <= device.GetInfo().image3d_max_width &&
-             shape.h <= device.GetInfo().image3d_max_height &&
-             slices * shape.d <= device.GetInfo().image3d_max_depth;
+      return shape.w * shape.b <= device_info.image3d_max_width &&
+             shape.h <= device_info.image3d_max_height &&
+             slices * shape.d <= device_info.image3d_max_depth;
    case TensorStorageType::TEXTURE_ARRAY:
      // Bug on some Adreno. b/131099086
-      if (slices == 1 && !device.SupportsOneLayerTextureArray()) {
+      if (slices == 1 && !device_info.SupportsOneLayerTextureArray()) {
        return false;
      }
-      return shape.w * shape.b <= device.GetInfo().image2d_max_width &&
-             shape.h <= device.GetInfo().image2d_max_height &&
-             slices * shape.d <= device.GetInfo().image_array_max_layers;
+      return shape.w * shape.b <= device_info.image2d_max_width &&
+             shape.h <= device_info.image2d_max_height &&
+             slices * shape.d <= device_info.image_array_max_layers;
    case TensorStorageType::TEXTURE_2D:
-      return shape.w * shape.b * shape.d <=
-                 device.GetInfo().image2d_max_width &&
-             shape.h * slices <= device.GetInfo().image2d_max_height;
+      return shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
+             shape.h * slices <= device_info.image2d_max_height;
    case TensorStorageType::SINGLE_TEXTURE_2D:
      return shape.c <= 4 &&
-             context.IsFloatTexture2DSupported(shape.c, descriptor.data_type) &&
-             shape.w * shape.b * shape.d <=
-                 device.GetInfo().image2d_max_width &&
-             shape.h <= device.GetInfo().image2d_max_height;
+             device_info.SupportsFloatImage2D(descriptor.data_type, shape.c) &&
+             shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
+             shape.h <= device_info.image2d_max_height;
    default:
      return false;
  }
 }

-bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
-                              const BHWC& shape,
+bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape,
                              const TensorDescriptor& descriptor) {
  const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
-  return CanCreateTensorWithShape(context, device, shape5D, descriptor);
+  return CanCreateTensorWithShape(device_info, shape5D, descriptor);
 }

-TensorStorageType SelectBestStorageType(const CLContext& context,
-                                        const CLDevice& device,
+TensorStorageType SelectBestStorageType(const DeviceInfo& device_info,
                                        const BHWC& shape,
                                        const TensorStorageType& desired,
                                        const DataType& data_type,
                                        const Layout& layout) {
-  if (CanCreateTensorWithShape(context, device, shape,
+  if (CanCreateTensorWithShape(device_info, shape,
                               TensorDescriptor{data_type, desired, layout})) {
    return desired;
  }
  auto GetBestTypeAfterTextureArray = [&]() {
-    if (device.SupportsImageBuffer() &&
+    if (device_info.SupportsImageBuffer() &&
        CanCreateTensorWithShape(
-            context, device, shape,
+            device_info, shape,
            TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER,
                             layout})) {
      return TensorStorageType::IMAGE_BUFFER;
@ -101,9 +95,9 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
    }
  };
  auto GetBestTypeAfterTexture2D = [&]() {
-    if (device.SupportsTextureArray() &&
+    if (device_info.SupportsTextureArray() &&
        CanCreateTensorWithShape(
-            context, device, shape,
+            device_info, shape,
            TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY,
                             layout})) {
      return TensorStorageType::TEXTURE_ARRAY;
@ -113,7 +107,7 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
  };
  auto GetBestTypeAfterTexture3D = [&]() {
    if (CanCreateTensorWithShape(
-            context, device, shape,
+            device_info, shape,
            TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D,
                             layout})) {
      return TensorStorageType::TEXTURE_2D;
--- a/tensorflow/lite/delegates/gpu/cl/storage_type_util.h
+++ b/tensorflow/lite/delegates/gpu/cl/storage_type_util.h
@ -16,8 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_
 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_

-#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
-#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
+#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
@ -26,16 +25,13 @@ namespace tflite {
 namespace gpu {
 namespace cl {

-bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
-                              const BHWDC& shape,
+bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape,
                              const TensorDescriptor& descriptor);

-bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
-                              const BHWC& shape,
+bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape,
                              const TensorDescriptor& descriptor);

-TensorStorageType SelectBestStorageType(const CLContext& context,
-                                        const CLDevice& device,
+TensorStorageType SelectBestStorageType(const DeviceInfo& device_info,
                                        const BHWC& shape,
                                        const TensorStorageType& desired,
                                        const DataType& data_type,