ConvTexture converted to new style.

Texture2D implements GPUObject. PiperOrigin-RevId: 317752276 Change-Id: Id1c66b2f6ca7b70475cc82abc422935fc3f1a251
2020-06-22 15:53:37 -07:00 · 2020-06-22 15:53:37 -07:00 · 23d5a2e00a
parent 26421826a0
commit 23d5a2e00a
5 changed files with 218 additions and 135 deletions
--- a/tensorflow/lite/delegates/gpu/cl/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/BUILD
@ -523,6 +523,7 @@ cc_library(
    deps = [
        ":cl_command_queue",
        ":cl_context",
        ":gpu_object",
        ":opencl_wrapper",
        ":tensor_type",
        ":util",
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
@ -31,18 +31,32 @@ namespace gpu {
 namespace cl {
 namespace {
-std::string GenerateConvCode(
+std::string GenerateConvCode(const OperationDef& op_def, const int3& block_size,
-    const OperationDef& op_def, const int3& block_size, bool is1x1,
+                             bool is1x1, bool adreno4xx_optimization,
-    bool adreno4xx_optimization, bool stride_correction,
+                             bool stride_correction,
-    bool different_weights_for_height, const CLDevice& device,
+                             bool different_weights_for_height,
-    const std::vector<ElementwiseOperation*>& linked_operations) {
+                             const CLDevice& device, Arguments* args) {
-  std::string c = GetCommonDefines(op_def.precision);
+  auto src_desc = absl::make_unique<TensorDescriptor>(op_def.src_tensors[0]);
-  TensorCodeGenerator src_tensor(
+  src_desc->SetTextureAddressMode(GetFastestZeroMode(device));
-      "src_data", WHSPoint{"src_size.x", "src_size.y", "src_size.z"},
+  if (op_def.IsBatchSupported()) {
-      op_def.src_tensors[0]);
+    src_desc->SetStateVar("BatchedWidth", "true");
-  TensorCodeGenerator dst_tensor(
+  }
-      "dst_data", WHSPoint{"dst_size.x", "dst_size.y", "dst_size.z"},
+  args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc));
-      op_def.dst_tensors[0]);
+  auto dst_desc = absl::make_unique<TensorDescriptor>(op_def.dst_tensors[0]);
  if (op_def.IsBatchSupported()) {
    dst_desc->SetStateVar("BatchedWidth", "true");
  }
  args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
  if (!is1x1) {
    args->AddInt("kernel_size_x");
    args->AddInt("kernel_size_y");
    args->AddInt("dilation_x");
    args->AddInt("dilation_y");
  }
  args->AddInt("stride_x");
  args->AddInt("stride_y");
  args->AddInt("padding_x");
  args->AddInt("padding_y");
  const auto src_tensor_type = op_def.src_tensors[0].storage_type;
  const bool is_buffer = src_tensor_type == TensorStorageType::IMAGE_BUFFER ||
@ -63,6 +77,7 @@ std::string GenerateConvCode(
    zs[z] = std::to_string(z);
  }
  std::string c = GetCommonDefines(op_def.precision);
  for (int z = 0; z < block_size.z; ++z) {
    const std::string f0 = std::to_string(z * 4 + 0);
    const std::string f1 = std::to_string(z * 4 + 1);
@ -86,43 +101,29 @@ std::string GenerateConvCode(
  }
  c += "__kernel void main_function(\n";
-  c += src_tensor.GetDeclaration(AccessType::READ) + ",\n";
+  c += "$0) {\n";
  c += "    __read_only image2d_t filters0,   \n";
  c += "    __read_only image2d_t filters1,   \n";
  c += "    __read_only image2d_t filters2,   \n";
  c += "    __read_only image2d_t filters3,   \n";
  c += "    __read_only image2d_t biases";
  c += GetArgsDeclaration(linked_operations);
  c += dst_tensor.GetDeclaration(AccessType::WRITE) + ",\n";
  c += "    int4 src_size,                   \n";
  c += "    int4 dst_size,                   \n";
  if (!is1x1) {
    c += "    int2 kernel_size,              \n";
    c += "    int2 dilation,                 \n";
  }
  c += "    int2 stride,                     \n";
  c += "    int2 padding                     \n";
  c += ") {\n";
  c += "  int X = get_global_id(0) * " + std::to_string(block_size.x) + ";\n";
  c += "  int Y = get_global_id(1) * " + std::to_string(block_size.y) + ";\n";
  c += "  int Z = get_global_id(2) * " + std::to_string(block_size.z) + ";\n";
-  c += "  if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.z) return;\n";
+  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() "
       "|| Z >= args.dst_tensor.Slices()) return;\n";
  std::vector<std::string> s_x(block_size.x);
  std::vector<std::string> s_y(block_size.y);
  for (int x = 0; x < block_size.x; ++x) {
    if (stride_correction) {
      c += "  int xc" + xs[x] + " = " +
-           GetXStrideCorrected("X + " + xs[x], "src_size.w", "stride.x",
+           GetXStrideCorrected("X + " + xs[x], "args.src_tensor.Batch()",
-                               "padding.x") +
+                               "args.stride_x", "args.padding_x") +
           ";\n";
    } else {
      c += "  int xc" + xs[x] + " = (X +" + xs[x] +
-           ") * stride.x + padding.x;\n";
+           ") * args.stride_x + args.padding_x;\n";
    }
    s_x[x] = is1x1 ? "xc" + xs[x] : "cx" + xs[x];
  }
  for (int y = 0; y < block_size.y; ++y) {
-    c += "  int yc" + ys[y] + " = (Y +" + ys[y] + ") * stride.y + padding.y;\n";
+    c += "  int yc" + ys[y] + " = (Y +" + ys[y] +
         ") * args.stride_y + args.padding_y;\n";
    s_y[y] = is1x1 ? "yc" + ys[y] : "cy" + ys[y];
  }
  for (int i = 0; i < block_size.x * block_size.y * block_size.z; ++i) {
@ -131,7 +132,7 @@ std::string GenerateConvCode(
  }
  std::string f_y = is1x1 ? "s" : "filter_offset";
  if (different_weights_for_height) {
-    f_y = "Y * src_size.z + s";
+    f_y = "Y * args.src_tensor.Slices() + s";
  }
  if (!is1x1) {
    for (int x = 0; x < block_size.x; ++x) {
@ -141,31 +142,31 @@ std::string GenerateConvCode(
      c += "  int cy" + ys[y] + ";\n";
    }
    c += "  int filter_offset = 0;\n";
-    c += "  for (int y = 0; y < kernel_size.y; ++y) {\n";
+    c += "  for (int y = 0; y < args.kernel_size_y; ++y) {\n";
    for (int y = 0; y < block_size.y; ++y) {
-      c += "  cy" + ys[y] + " = y * dilation.y + yc" + ys[y] + ";\n";
+      c += "  cy" + ys[y] + " = y * args.dilation_y + yc" + ys[y] + ";\n";
    }
    if (is_buffer) {
      for (int y = 0; y < block_size.y; ++y) {
        c += "  bool in_y" + ys[y] + " = cy" + ys[y] + " >= 0 && cy" + ys[y] +
-             " < src_size.y;\n";
+             " < args.src_tensor.Height();\n";
        if (src_tensor_type == TensorStorageType::BUFFER) {
          c += "    cy" + ys[y] + " = clamp(cy" + ys[y] +
-               ", 0, src_size.y - 1);\n";
+               ", 0, args.src_tensor.Height() - 1);\n";
        }
      }
    }
-    c += "  for (int x = 0; x < kernel_size.x; ++x) {\n";
+    c += "  for (int x = 0; x < args.kernel_size_x; ++x) {\n";
    for (int x = 0; x < block_size.x; ++x) {
-      c += "  cx" + xs[x] + " = x * dilation.x + xc" + xs[x] + ";\n";
+      c += "  cx" + xs[x] + " = x * args.dilation_x + xc" + xs[x] + ";\n";
    }
    if (is_buffer) {
      for (int x = 0; x < block_size.x; ++x) {
        c += "  bool in_x" + xs[x] + " = cx" + xs[x] + " >= 0 && cx" + xs[x] +
-             " < src_size.x;\n";
+             " < args.src_tensor.Width();\n";
        if (src_tensor_type == TensorStorageType::BUFFER) {
          c += "    cx" + xs[x] + " = clamp(cx" + xs[x] +
-               ", 0, src_size.x - 1);\n";
+               ", 0, args.src_tensor.Width() - 1);\n";
        }
      }
      for (int x = 0; x < block_size.x; ++x) {
@ -173,90 +174,95 @@ std::string GenerateConvCode(
          const std::string id = std::to_string(y * block_size.x + x);
          if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
            c += absl::Substitute(
-                "  int addr_$0 = select(-1, cy$2 * src_size.x + cx$1, (in_x$1 "
+                "  int addr_$0 = select(-1, cy$2 * args.src_tensor.Width() + "
                "cx$1, (in_x$1 "
                "&& "
                "in_y$2));\n",
                y * block_size.x + x, x, y);
            c += absl::Substitute(
-                "  int dz_$0 = select(0, src_size.x * src_size.y, (in_x$1 && "
+                "  int dz_$0 = select(0, args.src_tensor.Width() * "
                "args.src_tensor.Height(), (in_x$1 && "
                "in_y$2));\n",
                y * block_size.x + x, x, y);
          } else {
-            c += absl::Substitute("  int addr_$0 = cy$2 * src_size.x + cx$1;\n",
+            c += absl::Substitute(
-                                  y * block_size.x + x, x, y);
+                "  int addr_$0 = cy$2 * args.src_tensor.Width() + cx$1;\n",
                y * block_size.x + x, x, y);
          }
        }
      }
      if (src_tensor_type == TensorStorageType::BUFFER) {
-        c += "  int dz = src_size.x * src_size.y;\n";
+        c += "  int dz = args.src_tensor.Width() * args.src_tensor.Height();\n";
      }
    }
  } else if (is_buffer) {
    for (int y = 0; y < block_size.y; ++y) {
      c += "  bool in_y" + ys[y] + " = yc" + ys[y] + " >= 0 && yc" + ys[y] +
-           " < src_size.y;\n";
+           " < args.src_tensor.Height();\n";
    }
    for (int x = 0; x < block_size.x; ++x) {
      c += "  bool in_x" + xs[x] + " = xc" + xs[x] + " >= 0 && xc" + xs[x] +
-           " < src_size.x;\n";
+           " < args.src_tensor.Width();\n";
    }
    for (int x = 0; x < block_size.x; ++x) {
      for (int y = 0; y < block_size.y; ++y) {
        const std::string id = std::to_string(y * block_size.x + x);
        if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
          c += absl::Substitute(
-              "  int addr_$0 = select(-1, yc$2 * src_size.x + xc$1, (in_x$1 && "
+              "  int addr_$0 = select(-1, yc$2 * args.src_tensor.Width() + "
              "xc$1, (in_x$1 && "
              "in_y$2));\n",
              y * block_size.x + x, x, y);
          c += absl::Substitute(
-              "  int dz_$0 = select(0, src_size.x * src_size.y, (in_x$1 && "
+              "  int dz_$0 = select(0, args.src_tensor.Width() * "
              "args.src_tensor.Height(), (in_x$1 && "
              "in_y$2));\n",
              y * block_size.x + x, x, y);
        } else {
-          c += absl::Substitute("  int addr_$0 = yc$2 * src_size.x + xc$1;\n",
+          c += absl::Substitute(
-                                y * block_size.x + x, x, y);
+              "  int addr_$0 = yc$2 * args.src_tensor.Width() + xc$1;\n",
              y * block_size.x + x, x, y);
        }
      }
    }
    if (src_tensor_type == TensorStorageType::BUFFER) {
-      c += "  int dz = src_size.x * src_size.y;\n";
+      c += "  int dz = args.src_tensor.Width() * args.src_tensor.Height();\n";
    }
  }
-  c += "  for (int s = 0; s < src_size.z; ++s) {\n";
+  c += "  for (int s = 0; s < args.src_tensor.Slices(); ++s) {\n";
  if (is_buffer) {
    if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
      for (int index = 0; index < block_size.x * block_size.y; ++index) {
        const std::string id = std::to_string(index);
        c +=
-            "    FLT4 src" + id + " = " + src_tensor.Read("addr_" + id) + ";\n";
+            "    FLT4 src" + id + " = args.src_tensor.Read(addr_" + id + ");\n";
      }
    } else {
      for (int x = 0; x < block_size.x; ++x) {
        for (int y = 0; y < block_size.y; ++y) {
          const std::string id = std::to_string(y * block_size.x + x);
-          c += "    FLT4 src" + id + " = " + src_tensor.Read("addr_" + id) +
+          c += "    FLT4 src" + id + " = args.src_tensor.Read(addr_" + id +
-               " * (FLT)(in_x" + xs[x] + " && in_y" + ys[y] + "); addr_" + id +
+               ") * (FLT)(in_x" + xs[x] + " && in_y" + ys[y] + "); addr_" + id +
               " += dz;\n";
        }
      }
    }
  }
  for (int z = 0; z < block_size.z; ++z) {
-    const std::string fc = "(int2)(Z + " + zs[z] + ", " + f_y + ")";
+    c += absl::Substitute(R"(    FLT4 f$2 = args.weights0.Read($0, $1);
-    c += absl::Substitute(R"(    FLT4 f$1 = READ_IMAGE(filters0, smp_none, $0);
+    FLT4 f$3 = args.weights1.Read($0, $1);
-    FLT4 f$2 = READ_IMAGE(filters1, smp_none, $0);
+    FLT4 f$4 = args.weights2.Read($0, $1);
-    FLT4 f$3 = READ_IMAGE(filters2, smp_none, $0);
+    FLT4 f$5 = args.weights3.Read($0, $1);
    FLT4 f$4 = READ_IMAGE(filters3, smp_none, $0);
 )",
-                          fc, z * 4 + 0, z * 4 + 1, z * 4 + 2, z * 4 + 3);
+                          "Z + " + zs[z], f_y, z * 4 + 0, z * 4 + 1, z * 4 + 2,
                          z * 4 + 3);
  }
  if (!is_buffer) {
    const auto mode = GetFastestZeroMode(device);
    for (int x = 0; x < block_size.x; ++x) {
      for (int y = 0; y < block_size.y; ++y) {
        const std::string id = std::to_string(y * block_size.x + x);
-        c += "    FLT4 src" + id + " = " +
+        c += "    FLT4 src" + id + " = args.src_tensor.Read(" + s_x[x] + ", " +
-             src_tensor.ReadWHS(s_x[x], s_y[y], "s", mode) + ";\n";
+             s_y[y] + ", s);\n";
      }
    }
  }
@ -278,17 +284,17 @@ std::string GenerateConvCode(
      }
    }
  }
-  c += "  }\n";  // src_size.z
+  c += "  }\n";  // args.src_tensor.Slices()
  if (!is1x1) {
-    c += "  }\n";  // kernel_size.x
+    c += "  }\n";  // kernel_size_x
-    c += "  }\n";  // kernel_size.y
+    c += "  }\n";  // kernel_size_y
  }
  // when is1x1 && adreno4xx_optimization is true, xc0 == X and yc0 == Y
  std::string dst_x = is1x1 && adreno4xx_optimization ? "xc0" : "X";
  std::string dst_y = is1x1 && adreno4xx_optimization ? "yc0" : "Y";
  for (int z = 0; z < block_size.z; ++z) {
-    c += "  if (Z < dst_size.z) {\n";
+    c += "  if (Z < args.dst_tensor.Slices()) {\n";
-    c += "    FLT4 bias_val = READ_IMAGE(biases, smp_none, (int2)(Z, 0));\n";
+    c += "    FLT4 bias_val = args.biases.Read(Z);\n";
    for (int y = 0; y < block_size.y; ++y) {
      for (int x = 0; x < block_size.x; ++x) {
        const std::string id =
@ -296,11 +302,10 @@ std::string GenerateConvCode(
        c += "    {\n";
        c += "      int xc = " + dst_x + " + " + xs[x] + ";\n";
        c += "      int yc = " + dst_y + " + " + ys[y] + ";\n";
-        c += "      if (xc < dst_size.x && yc < dst_size.y) {\n";
+        c += "      if (xc < args.dst_tensor.Width() && yc < "
             "args.dst_tensor.Height()) {\n";
        c += "        FLT4 res = TO_FLT4(r" + id + ") + bias_val;\n";
-        const LinkingContext context{"res", "xc", "yc", "Z"};
+        c += "        args.dst_tensor.Write(res, xc, yc, Z);\n";
        c += PostProcess(linked_operations, context);
        c += "        " + dst_tensor.WriteWHS("res", "xc", "yc", "Z") + "\n";
        c += "      }\n";
        c += "    }\n";
      }
@ -350,11 +355,6 @@ ConvTexture::ConvTexture(const OperationDef& definition)
 ConvTexture::ConvTexture(ConvTexture&& operation)
    : GPUOperation(std::move(operation)),
      weights_0_(std::move(operation.weights_0_)),
      weights_1_(std::move(operation.weights_1_)),
      weights_2_(std::move(operation.weights_2_)),
      weights_3_(std::move(operation.weights_3_)),
      biases_(std::move(operation.biases_)),
      kernel_size_(operation.kernel_size_),
      stride_(operation.stride_),
      padding_(operation.padding_),
@ -366,11 +366,6 @@ ConvTexture::ConvTexture(ConvTexture&& operation)
 ConvTexture& ConvTexture::operator=(ConvTexture&& operation) {
  if (this != &operation) {
    weights_0_ = std::move(operation.weights_0_);
    weights_1_ = std::move(operation.weights_1_);
    weights_2_ = std::move(operation.weights_2_);
    weights_3_ = std::move(operation.weights_3_);
    biases_ = std::move(operation.biases_);
    std::swap(kernel_size_, operation.kernel_size_);
    std::swap(stride_, operation.stride_);
    std::swap(padding_, operation.padding_);
@ -395,10 +390,16 @@ absl::Status ConvTexture::Compile(const CreationContext& creation_context) {
      definition_.precision == CalculationsPrecision::F16;
  const bool stride_correction =
      definition_.IsBatchSupported() && stride_.x != 1;
-  const std::string code =
+  std::string code =
      GenerateConvCode(definition_, block_size_, is1x1, adreno4xx_optimization,
                       stride_correction, different_weights_for_height_,
-                       *creation_context.device, linked_operations_);
+                       *creation_context.device, &args_);
  std::string element_wise_code;
  RETURN_IF_ERROR(
      MergeOperations(linked_operations_, &args_, &element_wise_code));
  RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(),
                                          {{"dst_tensor", element_wise_code}},
                                          &code));
  std::vector<CompilerOptions> options;
  if (UseFP16SIMD(*creation_context.device, definition_.precision, is1x1)) {
    options.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE);
@ -409,25 +410,20 @@ absl::Status ConvTexture::Compile(const CreationContext& creation_context) {
 }
 absl::Status ConvTexture::BindArguments() {
-  kernel_.ResetBindingCounter();
+  RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0]));
-  RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
+  RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0]));
  RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_0_.GetMemoryPtr()));
  RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_1_.GetMemoryPtr()));
  RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_2_.GetMemoryPtr()));
  RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_3_.GetMemoryPtr()));
  RETURN_IF_ERROR(kernel_.SetMemoryAuto(biases_.GetMemoryPtr()));
  RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
  RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
  RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWBatchedHSB()));
  RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHSB()));
  if (!(kernel_size_.x == 1 && kernel_size_.y == 1)) {
-    RETURN_IF_ERROR(kernel_.SetBytesAuto(kernel_size_));
+    RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x));
-    RETURN_IF_ERROR(kernel_.SetBytesAuto(
+    RETURN_IF_ERROR(args_.SetInt("kernel_size_y", kernel_size_.y));
-        int2(dilation_.x * src_[0]->Batch(), dilation_.y)));
+    RETURN_IF_ERROR(args_.SetInt("dilation_x", dilation_.x * src_[0]->Batch()));
    RETURN_IF_ERROR(args_.SetInt("dilation_y", dilation_.y));
  }
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(stride_));
+  RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x));
-  RETURN_IF_ERROR(
+  RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y));
-      kernel_.SetBytesAuto(int2(padding_.x * src_[0]->Batch(), padding_.y)));
+  RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch()));
  RETURN_IF_ERROR(args_.SetInt("padding_y", padding_.y));
  RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
  RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
  return absl::OkStatus();
 }
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h
@ -92,12 +92,6 @@ class ConvTexture : public GPUOperation {
  absl::Status BindArguments();
  int3 GetGridSize() const;
  Texture2D weights_0_;
  Texture2D weights_1_;
  Texture2D weights_2_;
  Texture2D weights_3_;
  LinearStorage biases_;
  int2 kernel_size_;
  int2 stride_;
  int2 padding_;
@ -119,11 +113,16 @@ absl::Status ConvTexture::UploadData(
    const tflite::gpu::Tensor<OHWI, T>& weights,
    const tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
  RETURN_IF_ERROR(UploadWeights(weights, context));
-  LinearStorageCreateInfo create_info;
+
-  create_info.storage_type = LinearStorageType::TEXTURE_2D;
+  TensorLinearDescriptor desc;
-  create_info.data_type = definition_.GetDataType();
+  desc.storage_type = LinearStorageType::TEXTURE_2D;
-  create_info.aligned_size = weights.shape.o;
+  desc.element_type = definition_.GetDataType();
-  RETURN_IF_ERROR(CreateLinearStorage(create_info, biases, context, &biases_));
+
  LinearStorage lt;
  RETURN_IF_ERROR(CreateLinearStorage(desc, biases, context, &lt));
  args_.AddObject("biases", AccessType::READ,
                  absl::make_unique<LinearStorage>(std::move(lt)),
                  absl::make_unique<TensorLinearDescriptor>(desc));
  return absl::OkStatus();
 }
@ -135,14 +134,19 @@ absl::Status ConvTexture::UploadDataForWinograd4x4To6x6(
  RearrangeWeightsToWinograd4x4To6x6Weights(weights, &wino_weights);
  RETURN_IF_ERROR(UploadWeights(wino_weights, context));
  LinearStorageCreateInfo create_info;
  create_info.storage_type = LinearStorageType::TEXTURE_2D;
  create_info.data_type = definition_.GetDataType();
  create_info.aligned_size = 1;
  tflite::gpu::Tensor<Linear, DataType::FLOAT32> bias;
  bias.shape = Linear(1);
  bias.data = {0.0f};
-  return CreateLinearStorage(create_info, bias, context, &biases_);
+  TensorLinearDescriptor desc;
  desc.storage_type = LinearStorageType::TEXTURE_2D;
  desc.element_type = definition_.GetDataType();
  LinearStorage lt;
  RETURN_IF_ERROR(CreateLinearStorage(desc, bias, context, &lt));
  args_.AddObject("biases", AccessType::READ,
                  absl::make_unique<LinearStorage>(std::move(lt)),
                  absl::make_unique<TensorLinearDescriptor>(desc));
  return absl::OkStatus();
 }
 template <DataType T>
@ -157,11 +161,20 @@ absl::Status ConvTexture::UploadWeights(
  int texture_width = dst_depth;
  int texture_height = src_depth * kernel_x * kernel_y;
-  DataType data_type = definition_.GetDataType();
+  const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
  DataType data_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
  const int elements_count = texture_width * texture_height;
-  if (data_type == DataType::FLOAT32) {
+  Texture2DDescriptor desc;
  desc.element_type = data_type;
  Texture2D weights_0;
  Texture2D weights_1;
  Texture2D weights_2;
  Texture2D weights_3;
  if (f32_weights) {
    std::vector<float4> gpu_data_0(elements_count);
    std::vector<float4> gpu_data_1(elements_count);
    std::vector<float4> gpu_data_2(elements_count);
@ -171,15 +184,16 @@ absl::Status ConvTexture::UploadWeights(
                         absl::MakeSpan(gpu_data_3));
    RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, texture_width,
                                        texture_height, gpu_data_0.data(),
-                                        context, &weights_0_));
+                                        context, &weights_0));
    RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, texture_width,
                                        texture_height, gpu_data_1.data(),
-                                        context, &weights_1_));
+                                        context, &weights_1));
    RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, texture_width,
                                        texture_height, gpu_data_2.data(),
-                                        context, &weights_2_));
+                                        context, &weights_2));
-    return CreateTexture2DRGBA(data_type, texture_width, texture_height,
+    RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, texture_width,
-                               gpu_data_3.data(), context, &weights_3_);
+                                        texture_height, gpu_data_3.data(),
                                        context, &weights_3));
  } else {
    std::vector<half4> gpu_data_0(elements_count);
    std::vector<half4> gpu_data_1(elements_count);
@ -190,16 +204,31 @@ absl::Status ConvTexture::UploadWeights(
                         absl::MakeSpan(gpu_data_3));
    RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, texture_width,
                                        texture_height, gpu_data_0.data(),
-                                        context, &weights_0_));
+                                        context, &weights_0));
    RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, texture_width,
                                        texture_height, gpu_data_1.data(),
-                                        context, &weights_1_));
+                                        context, &weights_1));
    RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, texture_width,
                                        texture_height, gpu_data_2.data(),
-                                        context, &weights_2_));
+                                        context, &weights_2));
-    return CreateTexture2DRGBA(data_type, texture_width, texture_height,
+    RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, texture_width,
-                               gpu_data_3.data(), context, &weights_3_);
+                                        texture_height, gpu_data_3.data(),
                                        context, &weights_3));
  }
  args_.AddObject("weights0", AccessType::READ,
                  absl::make_unique<Texture2D>(std::move(weights_0)),
                  absl::make_unique<Texture2DDescriptor>(desc));
  args_.AddObject("weights1", AccessType::READ,
                  absl::make_unique<Texture2D>(std::move(weights_1)),
                  absl::make_unique<Texture2DDescriptor>(desc));
  args_.AddObject("weights2", AccessType::READ,
                  absl::make_unique<Texture2D>(std::move(weights_2)),
                  absl::make_unique<Texture2DDescriptor>(desc));
  args_.AddObject("weights3", AccessType::READ,
                  absl::make_unique<Texture2D>(std::move(weights_3)),
                  absl::make_unique<Texture2DDescriptor>(desc));
  return absl::OkStatus();
 }
 template <DataType S, typename T>
--- a/tensorflow/lite/delegates/gpu/cl/texture2d.cc
+++ b/tensorflow/lite/delegates/gpu/cl/texture2d.cc
@ -59,6 +59,41 @@ absl::Status CreateTexture2D(int width, int height, cl_channel_type type,
 }
 }  // namespace
 GPUResources Texture2DDescriptor::GetGPUResources(
    AccessType access_type) const {
  GPUResources resources;
  GPUImage2DDescriptor desc;
  desc.data_type = element_type;
  desc.access_type = access_type;
  resources.images2d.push_back({"tex2d", desc});
  return resources;
 }
 absl::Status Texture2DDescriptor::PerformSelector(
    const std::string& selector, const std::vector<std::string>& args,
    const std::vector<std::string>& template_args, std::string* result) const {
  if (selector == "Read") {
    return PerformReadSelector(args, result);
  } else {
    return absl::NotFoundError(absl::StrCat(
        "TensorLinearDescriptor don't have selector with name - ", selector));
  }
 }
 absl::Status Texture2DDescriptor::PerformReadSelector(
    const std::vector<std::string>& args, std::string* result) const {
  if (args.size() != 2) {
    return absl::NotFoundError(
        absl::StrCat("Texture2DDescriptor Read require one argument, but ",
                     args.size(), " was passed"));
  }
  const std::string read =
      element_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
  *result = absl::StrCat(read, "(tex2d, smp_none, (int2)(", args[0],
                         ", " + args[1] + "))");
  return absl::OkStatus();
 }
 Texture2D::Texture2D(cl_mem texture, int width, int height,
                     cl_channel_type type)
    : texture_(texture), width_(width), height_(height), channel_type_(type) {}
@ -95,6 +130,12 @@ void Texture2D::Release() {
  }
 }
 GPUResourcesWithValue Texture2D::GetGPUResources(AccessType access_type) const {
  GPUResourcesWithValue resources;
  resources.images2d.push_back({"tex2d", texture_});
  return resources;
 }
 // Creates new 4-channel 2D texture with f32 elements
 absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext* context,
                                    Texture2D* result) {
--- a/tensorflow/lite/delegates/gpu/cl/texture2d.h
+++ b/tensorflow/lite/delegates/gpu/cl/texture2d.h
@ -20,6 +20,7 @@ limitations under the License.
 #include "absl/types/span.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
 #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/cl/util.h"
@ -30,9 +31,22 @@ namespace tflite {
 namespace gpu {
 namespace cl {
 struct Texture2DDescriptor : public GPUObjectDescriptor {
  DataType element_type;  // FLOAT32 or FLOAT16
  absl::Status PerformSelector(const std::string& selector,
                               const std::vector<std::string>& args,
                               const std::vector<std::string>& template_args,
                               std::string* result) const override;
  GPUResources GetGPUResources(AccessType access_type) const override;
  absl::Status PerformReadSelector(const std::vector<std::string>& args,
                                   std::string* result) const;
 };
 // Texture2D represent formatted GPU data storage.
 // Texture2D is moveable but not copyable.
-class Texture2D {
+class Texture2D : public GPUObject {
 public:
  Texture2D() {}  // just for using Texture2D as a class members
  Texture2D(cl_mem texture, int width, int height, cl_channel_type type);
@ -56,6 +70,8 @@ class Texture2D {
  template <typename T>
  absl::Status ReadData(CLCommandQueue* queue, std::vector<T>* result) const;
  GPUResourcesWithValue GetGPUResources(AccessType access_type) const override;
 private:
  void Release();