Simplified signatures of CreateKernel... methods.

PiperOrigin-RevId: 269652728
2019-09-17 14:17:07 -07:00 · 2019-09-17 14:17:07 -07:00 · e72f5dd506
commit e72f5dd506
parent a15068b1a6
20 changed files with 173 additions and 224 deletions
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer.cc
@ -29,15 +29,13 @@ namespace cl {
 namespace {

 std::string GenerateConvBuffer(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-    int x_elements, int y_elements,
+    const OperationDef& op_def, int x_elements, int y_elements,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  std::string c = GetCommonDefines(precision);
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  std::string c = GetCommonDefines(op_def.precision);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);

-  switch (precision) {
+  switch (op_def.precision) {
    case CalculationsPrecision::F32:
    case CalculationsPrecision::F16:
      c += "#define CONV(R, S)    \\\n";
@ -53,7 +51,7 @@ std::string GenerateConvBuffer(
      break;
  }

-  switch (precision) {
+  switch (op_def.precision) {
    case CalculationsPrecision::F32:
      c += "#define FLT16 float16\n";
      break;
@ -206,9 +204,8 @@ ConvBuffer& ConvBuffer::operator=(ConvBuffer&& operation) {
 }

 Status ConvBuffer::Compile(const CreationContext& creation_context) {
-  std::string code = GenerateConvBuffer(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, x_elements_, y_elements_, linked_operations_);
+  std::string code = GenerateConvBuffer(definition_, x_elements_, y_elements_,
+                                        linked_operations_);
  return creation_context.cache->GetOrCreateCLKernel(
      code, "main_function", *creation_context.context,
      *creation_context.device, &kernel_);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc
@ -85,15 +85,14 @@ std::string GetShiftFromElementSize(int element_size) {
 }

 std::string GenerateConvBuffer1x1(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-    int x_elements, int y_elements, int element_size,
+    const OperationDef& op_def, int x_elements, int y_elements,
+    int element_size,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  std::string c = GetCommonDefines(precision);
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  std::string c = GetCommonDefines(op_def.precision);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);

-  switch (precision) {
+  switch (op_def.precision) {
    case CalculationsPrecision::F32:
      c += "#define FLT8 float8\n";
      c += "#define FLT16 float16\n";
@ -154,7 +153,8 @@ std::string GenerateConvBuffer1x1(
    }
  }
  c += "    FLT16 f0 = temp[0];\n";
-  c += GetComputationPart(x_elements, y_elements, element_size, precision);
+  c += GetComputationPart(x_elements, y_elements, element_size,
+                          op_def.precision);
  for (int i = 0; i < x_elements * y_elements; ++i) {
    std::string i_s = std::to_string(i);
    c += "    src_addr_" + i_s + " += src_size.z;\n";
@ -232,16 +232,12 @@ ConvBuffer1x1& ConvBuffer1x1::operator=(ConvBuffer1x1&& operation) {

 Status ConvBuffer1x1::Compile(const CreationContext& creation_context) {
  std::string code_flt4 = GenerateConvBuffer1x1(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, flt4_x_count_, flt4_y_count_, 1,
-      linked_operations_);
+      definition_, flt4_x_count_, flt4_y_count_, 1, linked_operations_);
  RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
      code_flt4, "main_function", *creation_context.context,
      *creation_context.device, &kernel_flt4_));
  std::string code_flt8 = GenerateConvBuffer1x1(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, flt8_x_count_, flt8_y_count_, 2,
-      linked_operations_);
+      definition_, flt8_x_count_, flt8_y_count_, 2, linked_operations_);
  RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
      code_flt8, "main_function", *creation_context.context,
      *creation_context.device, &kernel_flt8_));
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc
@ -28,25 +28,23 @@ namespace cl {
 namespace {

 std::string GenerateConvolutionConstantCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-    const int2& kernel_size, const int2& dilation, int src_channels,
-    int dst_channels, const CLDevice& device,
+    const OperationDef& op_def, const int2& kernel_size, const int2& dilation,
+    int src_channels, int dst_channels, const CLDevice& device,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.src_tensors[0]);

-  std::string c = GetCommonDefines(precision);
+  std::string c = GetCommonDefines(op_def.precision);

  const int out_z = IntegralDivideRoundUp(dst_channels, 4);
  const std::string kOutZ = std::to_string(out_z);
  const int src_depth = IntegralDivideRoundUp(src_channels, 4);

-  const bool manual_clamp =
-      src_descriptor.storage_type == TensorStorageType::BUFFER ||
-      src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER;
+  const auto src_tensor_type = op_def.src_tensors[0].storage_type;
+  const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
+                            src_tensor_type == TensorStorageType::IMAGE_BUFFER;

-  switch (precision) {
+  switch (op_def.precision) {
    case CalculationsPrecision::F32:
    case CalculationsPrecision::F16:
      c += "#define CONV4(R, SRC, F, i) \\\n";
@ -213,9 +211,8 @@ ConvConstants& ConvConstants::operator=(ConvConstants&& kernel) {

 Status ConvConstants::Compile(const CreationContext& creation_context) {
  const auto code = GenerateConvolutionConstantCode(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, kernel_size_, dilation_, src_channels_,
-      dst_channels_, *creation_context.device, linked_operations_);
+      definition_, kernel_size_, dilation_, src_channels_, dst_channels_,
+      *creation_context.device, linked_operations_);
  std::vector<CompilerOptions> options;
  if (definition_.precision == CalculationsPrecision::F16 &&
      creation_context.device->IsAdreno3xx()) {
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
@ -62,9 +62,8 @@ ConvPowerVR& ConvPowerVR::operator=(ConvPowerVR&& operation) {
 }

 Status ConvPowerVR::Compile(const CreationContext& creation_context) {
-  const std::string code = GenerateConvPowerVR1x1(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, conv_params_, linked_operations_);
+  const std::string code =
+      GenerateConvPowerVR1x1(definition_, conv_params_, linked_operations_);
  std::vector<CompilerOptions> options;
  if (definition_.precision == CalculationsPrecision::F16 &&
      creation_context.device->IsPowerVR()) {
@ -117,18 +116,16 @@ Status ConvPowerVR::AddToQueue(CLCommandQueue* queue) {
 }

 std::string GenerateConvPowerVR1x1(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-    const ConvPowerVR::ConvParams& conv_params,
+    const OperationDef& op_def, const ConvPowerVR::ConvParams& conv_params,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  std::string c = GetCommonDefines(precision);
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  std::string c = GetCommonDefines(op_def.precision);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);

  const bool is1x1 = conv_params.x_kernel_is_1 && conv_params.y_kernel_is_1;
-  const bool buffer_type =
-      src_descriptor.storage_type == TensorStorageType::BUFFER ||
-      src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER;
+  const auto src_tensor_type = op_def.src_tensors[0].storage_type;
+  const bool buffer_type = src_tensor_type == TensorStorageType::BUFFER ||
+                           src_tensor_type == TensorStorageType::IMAGE_BUFFER;
  const bool manual_clamp = buffer_type && !is1x1;

  c += "#define SIMD_BARRIER " +
@ -248,7 +245,7 @@ std::string GenerateConvPowerVR1x1(
    for (int y = 0; y < block_size.y; ++y) {
      for (int x = 0; x < block_size.x; ++x) {
        const std::string id = std::to_string(y) + std::to_string(x);
-        if (precision == CalculationsPrecision::F32_F16) {
+        if (op_def.precision == CalculationsPrecision::F32_F16) {
          c += "    ACCUM_FLT4 src" + id + ";\n";
        } else {
          c += "    FLT4 src" + id + ";\n";
@ -265,8 +262,8 @@ std::string GenerateConvPowerVR1x1(
                                       ? ""
                                       : " * (FLT)(mx" + std::to_string(x) +
                                             " && my" + std::to_string(y) + ")";
-          if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
-            if (precision == CalculationsPrecision::F32_F16) {
+          if (src_tensor_type == TensorStorageType::BUFFER) {
+            if (op_def.precision == CalculationsPrecision::F32_F16) {
              c += "    src" + id + " = convert_float4(src_data[src_a_" + id +
                   "]" + multiplier + ");\n";
            } else {
@ -274,8 +271,8 @@ std::string GenerateConvPowerVR1x1(
                   multiplier + ";\n";
            }
          }
-          if (src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
-            if (precision == CalculationsPrecision::F32_F16) {
+          if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
+            if (op_def.precision == CalculationsPrecision::F32_F16) {
              c += "    src" + id + " = " +
                   src_tensor.ReadAsFloat3D("src_a_" + id) + multiplier + ";\n";
            } else {
@ -290,7 +287,7 @@ std::string GenerateConvPowerVR1x1(
              is1x1 ? "X + " + std::to_string(x) : "xck" + std::to_string(x);
          const std::string yc =
              is1x1 ? "Y + " + std::to_string(y) : "yck" + std::to_string(y);
-          if (precision == CalculationsPrecision::F32_F16) {
+          if (op_def.precision == CalculationsPrecision::F32_F16) {
            c += "    src" + id + " = " +
                 src_tensor.ReadAsFloat3D(xc, yc, "s") + ";\n";
          } else {
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
@ -76,9 +76,7 @@ class ConvPowerVR : public GPUOperation {
                                  ConvPowerVR* result);

  friend std::string GenerateConvPowerVR1x1(
-      const TensorDescriptor& src_descriptor,
-      const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-      const ConvParams& conv_params,
+      const OperationDef& op_def, const ConvParams& conv_params,
      const std::vector<ElementwiseOperation*>& linked_operations);

  friend ConvParams GuessBestParams(const CLDevice& device,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
@ -30,15 +30,17 @@ namespace cl {
 namespace {

 std::string GenerateConvCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-    bool is1x1, bool adreno4xx_optimization, const CLDevice& device,
+    const OperationDef& op_def, bool is1x1, bool adreno4xx_optimization,
+    const CLDevice& device,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  std::string c = GetCommonDefines(precision);
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  std::string c = GetCommonDefines(op_def.precision);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);

-  switch (precision) {
+  const bool is_image_buffer =
+      op_def.src_tensors[0].storage_type == TensorStorageType::IMAGE_BUFFER;
+
+  switch (op_def.precision) {
    case CalculationsPrecision::F32:
    case CalculationsPrecision::F16:
      c += "#define CONV1(R, S)    \\\n";
@ -102,14 +104,14 @@ std::string GenerateConvCode(
    c += "  for (int y = 0; y < kernel_size.y; ++y) {\n";
    c += "  c0.y = y * dilation.y + yc0;\n";
    c += "  c1.y = y * dilation.y + yc1;\n";
-    if (src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
+    if (is_image_buffer) {
      c += "  bool in_y0 = c0.y >= 0 && c0.y < src_size.y;\n";
      c += "  bool in_y1 = c1.y >= 0 && c1.y < src_size.y;\n";
    }
    c += "  for (int x = 0; x < kernel_size.x; ++x) {\n";
    c += "  c0.x = x * dilation.x + xc0;\n";
    c += "  c1.x = x * dilation.x + xc1;\n";
-    if (src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
+    if (is_image_buffer) {
      c += "  bool in_x0 = c0.x >= 0 && c0.x < src_size.x;\n";
      c += "  bool in_x1 = c1.x >= 0 && c1.x < src_size.x;\n";
      c += "  int addr_0 = select(-1, c0.y * src_size.x + c0.x, (in_x0 && "
@ -129,7 +131,7 @@ std::string GenerateConvCode(
      c += "  int dz_3 = select(0, src_size.x * src_size.y, (in_x1 && "
           "in_y1));\n";
    }
-  } else if (src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
+  } else if (is_image_buffer) {
    c += "  bool in_x0 = xc0 >= 0 && xc0 < src_size.x;\n";
    c += "  bool in_x1 = xc1 >= 0 && xc1 < src_size.x;\n";
    c += "  bool in_y0 = yc0 >= 0 && yc0 < src_size.y;\n";
@ -148,7 +150,7 @@ std::string GenerateConvCode(
    c += "  int dz_3 = select(0, src_size.x * src_size.y, (in_x1 && in_y1));\n";
  }
  c += "  for (int s = 0; s < src_size.w; ++s) {\n";
-  if (src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
+  if (is_image_buffer) {
    c += "    FLT4 src0 = " + src_tensor.Read3D("addr_0") + ";\n";
    c += "    FLT4 src1 = " + src_tensor.Read3D("addr_1") + ";\n";
    c += "    FLT4 src2 = " + src_tensor.Read3D("addr_2") + ";\n";
@ -164,7 +166,7 @@ std::string GenerateConvCode(
  c += "    FLT4 f5 = READ_IMAGE(filters1, smp_none, " + fc1 + ");\n";
  c += "    FLT4 f6 = READ_IMAGE(filters2, smp_none, " + fc1 + ");\n";
  c += "    FLT4 f7 = READ_IMAGE(filters3, smp_none, " + fc1 + ");\n";
-  if (src_descriptor.storage_type != TensorStorageType::IMAGE_BUFFER) {
+  if (!is_image_buffer) {
    const auto mode = GetFastestZeroMode(device);
    c += "    FLT4 src0 = " + src_tensor.Read3D(s_x0, s_y0, "s", mode) + ";\n";
    c += "    FLT4 src1 = " + src_tensor.Read3D(s_x1, s_y0, "s", mode) + ";\n";
@ -182,7 +184,7 @@ std::string GenerateConvCode(
  if (!is1x1) {
    c += "    filter_offset++;\n";
  }
-  if (src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
+  if (is_image_buffer) {
    c += "     addr_0 += dz_0;\n";
    c += "     addr_1 += dz_1;\n";
    c += "     addr_2 += dz_2;\n";
@ -296,8 +298,7 @@ Status ConvTexture::Compile(const CreationContext& creation_context) {
      storage_type == TensorStorageType::TEXTURE_ARRAY &&
      definition_.precision == CalculationsPrecision::F16;
  std::string code =
-      GenerateConvCode(definition_.src_tensors[0], definition_.dst_tensors[0],
-                       definition_.precision, is1x1, adreno4xx_optimization,
+      GenerateConvCode(definition_, is1x1, adreno4xx_optimization,
                       *creation_context.device, linked_operations_);
  std::vector<CompilerOptions> options;
  if (UseFP16SIMD(*creation_context.device, definition_.precision, is1x1)) {
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
@ -28,18 +28,18 @@ namespace cl {
 namespace {

 std::string GenerateConvolutionTransposedCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-    const LinearStorage& biases, const CLDevice& device,
+    const OperationDef& op_def, const LinearStorage& biases,
+    const CLDevice& device,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
-  std::string c = GetCommonDefines(precision);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);
+  const auto src_tensor_type = op_def.src_tensors[0].storage_type;
+  std::string c = GetCommonDefines(op_def.precision);

-  switch (precision) {
+  switch (op_def.precision) {
    case CalculationsPrecision::F32:
    case CalculationsPrecision::F16:
-      if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+      if (src_tensor_type == TensorStorageType::BUFFER) {
        c += "#define CONV(R, S)   \\\n";
        c += "R += S.x * f0.s0123; \\\n";
        c += "R += S.y * f0.s4567; \\\n";
@ -54,7 +54,7 @@ std::string GenerateConvolutionTransposedCode(
      }
      break;
    case CalculationsPrecision::F32_F16:
-      if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+      if (src_tensor_type == TensorStorageType::BUFFER) {
        c += "#define CONV(R, S) \\\n";
        c += "R += convert_float4(S.x * f0.s0123 + S.y * f0.s4567 + S.z * "
             "f0.s89ab + S.w * f0.scdef);\n";
@ -66,7 +66,7 @@ std::string GenerateConvolutionTransposedCode(
      break;
  }

-  switch (precision) {
+  switch (op_def.precision) {
    case CalculationsPrecision::F32:
      c += "#define FLT16 float16\n";
      break;
@ -78,7 +78,7 @@ std::string GenerateConvolutionTransposedCode(

  c += "__kernel void main_function(\n";
  c += src_tensor.GetDeclaration(AccessType::READ) + ",\n";
-  if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+  if (src_tensor_type == TensorStorageType::BUFFER) {
    c += "    __global FLT16* filters,  \n";
    c += "    __global FLT4* biases";
  } else {
@ -99,7 +99,7 @@ std::string GenerateConvolutionTransposedCode(
  c += "  int Y = get_global_id(1);\n";
  c += "  int Z = get_global_id(2);\n";
  c += "  if (X >= dst_size.x || Y >= dst_size.y || Z >= dst_size.w) return;\n";
-  if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+  if (src_tensor_type == TensorStorageType::BUFFER) {
    c += "  int f_base = Z * src_size.w * kernel_size.x * kernel_size.y;\n";
  }
  c += "  int2 offset = (int2)(X, Y) + padding - k_offset;\n";
@ -126,7 +126,7 @@ std::string GenerateConvolutionTransposedCode(
  c += "      bool out_x = s_x < 0 || s_x >= src_size.x;\n";
  c += "      int kernel_index = index_y * kernel_size.x + index_x;\n";
  c += "      if (inside_kernel && !(out_x || out_y)) {\n";
-  if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+  if (src_tensor_type == TensorStorageType::BUFFER) {
    c += "        int f_offset = f_base + kernel_index * src_size.w;\n";
  } else {
    c += "        int x_c = kernel_index * src_size.w * 4;\n";
@ -135,7 +135,7 @@ std::string GenerateConvolutionTransposedCode(
  c += "          FLT4 src =" +
       src_tensor.Read3D("s_x", "s_y", "l", TextureAddressMode::DONT_CARE) +
       ";\n";
-  if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+  if (src_tensor_type == TensorStorageType::BUFFER) {
    c += "          FLT16 f0 = filters[f_offset]; f_offset++;\n";
  } else {
    c += "          FLT4 f[4];\n";
@ -217,9 +217,7 @@ ConvolutionTransposed& ConvolutionTransposed::operator=(

 Status ConvolutionTransposed::Compile(const CreationContext& creation_context) {
  const auto code = GenerateConvolutionTransposedCode(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, biases_, *creation_context.device,
-      linked_operations_);
+      definition_, biases_, *creation_context.device, linked_operations_);

  return creation_context.cache->GetOrCreateCLKernel(
      code, "main_function", *creation_context.context,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc
@ -28,17 +28,16 @@ namespace cl {
 namespace {

 std::string GenerateConvolutionTransposedCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-    const LinearStorage& biases, int src_depth, int dst_depth,
-    const CLDevice& device,
+    const OperationDef& op_def, const LinearStorage& biases, int src_depth,
+    int dst_depth, const CLDevice& device,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);
+  const auto src_tensor_type = op_def.src_tensors[0].storage_type;

-  std::string c = GetCommonDefines(precision);
+  std::string c = GetCommonDefines(op_def.precision);

-  switch (precision) {
+  switch (op_def.precision) {
    case CalculationsPrecision::F32:
    case CalculationsPrecision::F16:
      c += "#define CONV(R, SRC, F, i) \\\n";
@ -78,7 +77,7 @@ std::string GenerateConvolutionTransposedCode(
  for (int s = 0; s < src_depth; ++s) {
    const std::string z = std::to_string(s);
    c += "  {\n";
-    if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+    if (src_tensor_type == TensorStorageType::BUFFER) {
      c += "  bool x_in = X + 1 < src_size.x;\n";
      c += "  bool y_in = Y + 1 < src_size.y;\n";
      c += "  FLT4 src0 = " + src_tensor.Read3D("X", "Y", z) + ";\n";
@ -94,7 +93,7 @@ std::string GenerateConvolutionTransposedCode(
      c += "  if (x_in && y_in) {\n";
      c += "    src3 = " + src_tensor.Read3D("X + 1", "Y + 1", z) + ";\n";
      c += "  }\n";
-    } else if (src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
+    } else if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
      c += "  " + src_tensor.GetAddress("c0", "X", "Y", z) + ";\n";
      c += "  " + src_tensor.GetAddress("c1", "X + 1", "Y", z) + ";\n";
      c += "  " + src_tensor.GetAddress("c2", "X", "Y + 1", z) + ";\n";
@ -197,8 +196,7 @@ ConvolutionTransposed3x3Thin& ConvolutionTransposed3x3Thin::operator=(
 Status ConvolutionTransposed3x3Thin::Compile(
    const CreationContext& creation_context) {
  const auto code = GenerateConvolutionTransposedCode(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, biases_, IntegralDivideRoundUp(src_channels_, 4),
+      definition_, biases_, IntegralDivideRoundUp(src_channels_, 4),
      IntegralDivideRoundUp(dst_channels_, 4), *creation_context.device,
      linked_operations_);

--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc
@ -29,15 +29,13 @@ namespace cl {
 namespace {

 std::string GenerateConvolutionTransposedCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-    int src_depth, int dst_channels, const int2& kernel_size,
-    const CLDevice& device,
+    const OperationDef& op_def, int src_depth, int dst_channels,
+    const int2& kernel_size, const CLDevice& device,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);

-  std::string c = GetCommonDefines(precision);
+  std::string c = GetCommonDefines(op_def.precision);
  const std::string channel_x = dst_channels == 1 ? "" : ".x";
  const std::vector<std::string> postfix = {channel_x, ".y", ".z", ".w"};
  const std::vector<std::string> channel = {".x", ".y", ".z", ".w"};
@ -47,7 +45,7 @@ std::string GenerateConvolutionTransposedCode(

  std::string accum_type;

-  switch (precision) {
+  switch (op_def.precision) {
    case CalculationsPrecision::F32:
    case CalculationsPrecision::F32_F16:
      accum_type = "float" + type_postfix;
@ -80,7 +78,8 @@ std::string GenerateConvolutionTransposedCode(
      std::string r_s =
          "  r[" + std::to_string(y) + "][" + std::to_string(x) + "]";
      const std::string to_accum =
-          precision == CalculationsPrecision::F32_F16 ? "convert_float" : "";
+          op_def.precision == CalculationsPrecision::F32_F16 ? "convert_float"
+                                                             : "";
      for (int d = 0; d < dst_channels; ++d) {
        c += r_s + postfix[d] + " = " + to_accum + "(dot(src, filters[" +
             std::to_string(index) + "]));\n";
@ -90,7 +89,7 @@ std::string GenerateConvolutionTransposedCode(
  }
  c += "  }\n";
  for (int i = 1; i < src_depth; ++i) {
-    if (precision != CalculationsPrecision::F32_F16) {
+    if (op_def.precision != CalculationsPrecision::F32_F16) {
      c += "  if (X < src_size.x + " + std::to_string(i + 1) + ") {\n";
    } else {
      c += "  {\n";
@ -116,7 +115,7 @@ std::string GenerateConvolutionTransposedCode(
  c += "  Y *= " + std::to_string(kernel_size.x) + ";\n";
  for (int y = 0; y < kernel_size.y; ++y) {
    for (int x = 0; x < kernel_size.x; ++x) {
-      if (precision != CalculationsPrecision::F32_F16) {
+      if (op_def.precision != CalculationsPrecision::F32_F16) {
        c += "  if (X + " + std::to_string(x) + " < dst_size.x && ";
        c += "Y + " + std::to_string(y) + " < dst_size.y) {\n";
      } else {
@ -185,10 +184,8 @@ ConvolutionTransposedThin& ConvolutionTransposedThin::operator=(
 Status ConvolutionTransposedThin::Compile(
    const CreationContext& creation_context) {
  const auto code = GenerateConvolutionTransposedCode(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, IntegralDivideRoundUp(src_channels_, 4),
-      dst_channels_, kernel_size_, *creation_context.device,
-      linked_operations_);
+      definition_, IntegralDivideRoundUp(src_channels_, 4), dst_channels_,
+      kernel_size_, *creation_context.device, linked_operations_);

  std::vector<CompilerOptions> options;
  if (definition_.precision == CalculationsPrecision::F16 &&
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv.cc
@ -73,23 +73,22 @@ std::string GetSrcValue(const TensorCodeGenerator& src_tensor,
 }

 std::string GenerateDepthWiseConvolutionCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-    const LinearStorage& biases, int channel_multiplier,
+    const OperationDef& op_def, const LinearStorage& biases,
+    int channel_multiplier,
    const std::vector<ElementwiseOperation*>& linked_operations,
    const CLDevice& device) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);
+  const auto src_tensor_type = op_def.src_tensors[0].storage_type;

-  std::string c = GetCommonDefines(precision);
+  std::string c = GetCommonDefines(op_def.precision);

-  const bool manual_clamp =
-      src_descriptor.storage_type == TensorStorageType::BUFFER ||
-      src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER;
+  const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
+                            src_tensor_type == TensorStorageType::IMAGE_BUFFER;

  c += "__kernel void main_function(\n";
  c += src_tensor.GetDeclaration(AccessType::READ) + ",\n";
-  if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+  if (src_tensor_type == TensorStorageType::BUFFER) {
    c += "    __global FLT4* filters,  \n";
  } else {
    c += "    __read_only image2d_t filters,  \n";
@ -114,7 +113,7 @@ std::string GenerateDepthWiseConvolutionCode(
  c += "  ACCUM_FLT4 r = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
  c += "  int x_offseted = X * stride.x - padding.x;\n";
  c += "  int y_offseted = Y * stride.y - padding.y;\n";
-  if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+  if (src_tensor_type == TensorStorageType::BUFFER) {
    c += "  int fx_c = Z * kernel_size.x * kernel_size.y;\n";
  } else {
    c += "  int fx_c = 0;\n";
@ -128,7 +127,7 @@ std::string GenerateDepthWiseConvolutionCode(
    c += "      int x_c = x_offseted + kx * dilation.x;\n";
    c += "      bool outside_x = x_c < 0 || x_c >= src_size.x;\n";
    c += "      if (!outside_x && !outside_y) {\n";
-    if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+    if (src_tensor_type == TensorStorageType::BUFFER) {
      c += "        FLT4 f = filters[fx_c];\n";
    } else {
      c += "        FLT4 f = READ_IMAGE(filters, smp_none, (int2)(fx_c, Z));\n";
@ -210,8 +209,7 @@ DepthWiseConvolution& DepthWiseConvolution::operator=(

 Status DepthWiseConvolution::Compile(const CreationContext& creation_context) {
  const auto code = GenerateDepthWiseConvolutionCode(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, biases_, channel_multiplier_, linked_operations_,
+      definition_, biases_, channel_multiplier_, linked_operations_,
      *creation_context.device);
  return creation_context.cache->GetOrCreateCLKernel(
      code, "main_function", *creation_context.context,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3.cc
@ -29,19 +29,18 @@ namespace cl {
 namespace {

 std::string GenerateDepthWiseConvCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
+    const OperationDef& op_def,
    const std::vector<ElementwiseOperation*>& linked_operations,
    const CLDevice& device, bool weights_are_buffer, bool local_mem_uploads) {
-  std::string c = GetCommonDefines(precision);
-  TensorCodeGenerator src_tensor("src_data", "dst_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  std::string c = GetCommonDefines(op_def.precision);
+  TensorCodeGenerator src_tensor("src_data", "dst_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);
+  const auto src_tensor_type = op_def.src_tensors[0].storage_type;

  const auto mode = GetFastestZeroMode(device);

-  const bool manual_clamp =
-      src_descriptor.storage_type == TensorStorageType::BUFFER ||
-      src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER;
+  const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
+                            src_tensor_type == TensorStorageType::IMAGE_BUFFER;

  if (local_mem_uploads) {
    c += "__attribute__((reqd_work_group_size(8, 4, 1)))\n";
@ -119,7 +118,7 @@ std::string GenerateDepthWiseConvCode(
    c += "  y1 = clamp(y1, 0, dst_size.y - 1);\n";
    c += "  y2 = clamp(y2, 0, dst_size.y - 1);\n";
    c += "  y3 = clamp(y3, 0, dst_size.y - 1);\n";
-    if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+    if (src_tensor_type == TensorStorageType::BUFFER) {
      c += "  __global FLT4* src_loc = src_data + Z * dst_size.x * "
           "dst_size.y;\n";
    }
@ -145,7 +144,7 @@ std::string GenerateDepthWiseConvCode(
    bias = "f[9]";
  }
  auto read_4x_line = [&](int y) {
-    if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+    if (src_tensor_type == TensorStorageType::BUFFER) {
      const std::string y_in = "y" + std::to_string(y) + "_in";
      c += "    s0 = src_loc[" + yc[y] + " * dst_size.x + " + xc[0] +
           "] * (FLT)(x0_in && " + y_in + ");\n";
@ -155,7 +154,7 @@ std::string GenerateDepthWiseConvCode(
           "] * (FLT)(x2_in && " + y_in + ");\n";
      c += "    s3 = src_loc[" + yc[y] + " * dst_size.x + " + xc[3] +
           "] * (FLT)(x3_in && " + y_in + ");\n";
-    } else if (src_descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
+    } else if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
      const std::string y_in = "y" + std::to_string(y) + "_in";
      c += "    s0 = " + src_tensor.Read3D(xc[0], yc[y], "Z", mode) +
           " * (FLT)(x0_in && " + y_in + ");\n";
@ -296,8 +295,7 @@ DepthWiseConv3x3& DepthWiseConv3x3::operator=(DepthWiseConv3x3&& operation) {

 Status DepthWiseConv3x3::Compile(const CreationContext& creation_context) {
  std::string code = GenerateDepthWiseConvCode(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, linked_operations_, *creation_context.device,
+      definition_, linked_operations_, *creation_context.device,
      weights_are_buffer_, local_mem_uploads_);
  std::vector<CompilerOptions> options;
  if (definition_.precision == CalculationsPrecision::F16 &&
--- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture.cc
@ -32,16 +32,15 @@ namespace {
 // otimized shaders

 std::string GetFullyConnectedKernelCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
+    const OperationDef& op_def,
    const std::vector<ElementwiseOperation*>& linked_operations,
    const int3& work_group_size) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);

-  std::string c = GetCommonDefines(precision);
+  std::string c = GetCommonDefines(op_def.precision);

-  switch (precision) {
+  switch (op_def.precision) {
    case CalculationsPrecision::F32:
      c += "#define READ_IMAGE read_imagef\n";
      break;
@ -70,7 +69,7 @@ std::string GetFullyConnectedKernelCode(
  c += "  for (int i = 0; i < src_depth_x4; ++i, c += 4, c2 += 8) {\n";
  c += "    FLT4 v = " +
       src_tensor.Read3D("0", "0", "c", TextureAddressMode::DONT_CARE) + ";\n";
-  if (precision != CalculationsPrecision::F32) {
+  if (op_def.precision != CalculationsPrecision::F32) {
    c += "   half8 m0 = as_half8(read_imagef(filters, smp_none, (int2)(gid, "
         "c2+0)));\n";
    c += "   half8 m1 = as_half8(read_imagef(filters, smp_none, (int2)(gid, "
@ -143,8 +142,7 @@ Status FullyConnectedTexture::Compile(const CreationContext& creation_context) {
    work_group_size_ = {wg_width, wg_height, 1};
    wg_width /= 2;
    const auto code = GetFullyConnectedKernelCode(
-        definition_.src_tensors[0], definition_.dst_tensors[0],
-        definition_.precision, linked_operations_, work_group_size_);
+        definition_, linked_operations_, work_group_size_);
    RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
        code, "main_function", *creation_context.context,
        *creation_context.device, &kernel_));
--- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc
@ -26,19 +26,16 @@ namespace cl {
 namespace {

 std::string GetMaxUnoolingKernelCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& src_ind_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-    const CLDevice& device,
+    const OperationDef& op_def, const CLDevice& device,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src("src_data", "src_size", src_descriptor);
+  TensorCodeGenerator src("src_data", "src_size", op_def.src_tensors[0]);
  TensorCodeGenerator src_ind("src_data_indices", "src_size",
-                              src_ind_descriptor);
-  TensorCodeGenerator dst("dst_data", "dst_size", dst_descriptor);
+                              op_def.src_tensors[1]);
+  TensorCodeGenerator dst("dst_data", "dst_size", op_def.dst_tensors[0]);

  const auto address_mode = GetFastestZeroMode(device);

-  std::string code = GetCommonDefines(precision);
+  std::string code = GetCommonDefines(op_def.precision);

  code += "__kernel void main_function(\n";
  code += src.GetDeclaration(AccessType::READ) + ",\n";
@ -59,7 +56,7 @@ std::string GetMaxUnoolingKernelCode(
  code += "  int src_x = (X + padding.x) / stride.x;\n";
  code += "  int src_y = (Y + padding.y) / stride.y;\n";
  code += "  " + src.GetAddress("src_adr", "src_x", "src_y", "Z") + "\n";
-  if (src_descriptor.storage_type == TensorStorageType::BUFFER) {
+  if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER) {
    code += "  bool outside = src_x < 0 || src_y < 0 ||";
    code += "  src_x >= src_size.x || src_y >= src_size.y;\n";
    code += "  FLT4 src = (FLT4)(0.0f);\n";
@ -123,9 +120,7 @@ MaxUnpooling& MaxUnpooling::operator=(MaxUnpooling&& kernel) {

 Status MaxUnpooling::Compile(const CreationContext& creation_context) {
  const auto code = GetMaxUnoolingKernelCode(
-      definition_.src_tensors[0], definition_.src_tensors[1],
-      definition_.dst_tensors[0], definition_.precision,
-      *creation_context.device, linked_operations_);
+      definition_, *creation_context.device, linked_operations_);
  return creation_context.cache->GetOrCreateCLKernel(
      code, "main_function", *creation_context.context,
      *creation_context.device, &kernel_);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc
@ -26,13 +26,12 @@ namespace cl {
 namespace {

 std::string GetPaddingCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
+    const OperationDef& op_def,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);

-  std::string code = GetCommonDefines(precision);
+  std::string code = GetCommonDefines(op_def.precision);
  const std::string channels[] = {".x", ".y", ".z", ".w"};

  code += "__kernel void main_function(\n";
@ -109,9 +108,7 @@ void Padding::SetPrepended(const int3& prepended) {
 }

 Status Padding::Compile(const CreationContext& creation_context) {
-  const auto code =
-      GetPaddingCode(definition_.src_tensors[0], definition_.dst_tensors[0],
-                     definition_.precision, linked_operations_);
+  const auto code = GetPaddingCode(definition_, linked_operations_);
  return creation_context.cache->GetOrCreateCLKernel(
      code, "main_function", *creation_context.context,
      *creation_context.device, &kernel_);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc
@ -26,13 +26,12 @@ namespace cl {
 namespace {

 std::string GetReshapeCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
+    const OperationDef& op_def,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);

-  std::string c = GetCommonDefines(precision);
+  std::string c = GetCommonDefines(op_def.precision);
  c += "__kernel void main_function(\n";
  c += src_tensor.GetDeclaration(AccessType::READ);
  c += GetArgsDeclaration(linked_operations);
@ -93,9 +92,7 @@ Reshape& Reshape::operator=(Reshape&& operation) {
 }

 Status Reshape::Compile(const CreationContext& creation_context) {
-  const auto code =
-      GetReshapeCode(definition_.src_tensors[0], definition_.dst_tensors[0],
-                     definition_.precision, linked_operations_);
+  const auto code = GetReshapeCode(definition_, linked_operations_);
  return creation_context.cache->GetOrCreateCLKernel(
      code, "main_function", *creation_context.context,
      *creation_context.device, &kernel_);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc
@ -26,13 +26,12 @@ namespace cl {
 namespace {

 std::string GetReshapeCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
+    const OperationDef& op_def,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);

-  std::string c = GetCommonDefines(precision);
+  std::string c = GetCommonDefines(op_def.precision);
  c += "__kernel void main_function(\n";
  c += src_tensor.GetDeclaration(AccessType::READ);
  c += GetArgsDeclaration(linked_operations);
@ -78,9 +77,7 @@ Reshapex4& Reshapex4::operator=(Reshapex4&& operation) {
 }

 Status Reshapex4::Compile(const CreationContext& creation_context) {
-  const auto code =
-      GetReshapeCode(definition_.src_tensors[0], definition_.dst_tensors[0],
-                     definition_.precision, linked_operations_);
+  const auto code = GetReshapeCode(definition_, linked_operations_);
  return creation_context.cache->GetOrCreateCLKernel(
      code, "main_function", *creation_context.context,
      *creation_context.device, &kernel_);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc
@ -27,13 +27,12 @@ namespace cl {
 namespace {

 std::string GetSoftmaxKernelCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
+    const OperationDef& op_def,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "size", op_def.dst_tensors[0]);

-  std::string code = GetCommonDefines(precision);
+  std::string code = GetCommonDefines(op_def.precision);
  code += "__kernel void main_function(\n";
  code += src_tensor.GetDeclaration(AccessType::READ);
  code += GetArgsDeclaration(linked_operations);
@ -92,9 +91,7 @@ Softmax& Softmax::operator=(Softmax&& kernel) {
 }

 Status Softmax::Compile(const CreationContext& creation_context) {
-  const auto code = GetSoftmaxKernelCode(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, linked_operations_);
+  const auto code = GetSoftmaxKernelCode(definition_, linked_operations_);
  return creation_context.cache->GetOrCreateCLKernel(
      code, "main_function", *creation_context.context,
      *creation_context.device, &kernel_);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc
@ -26,13 +26,14 @@ namespace cl {
 namespace {

 std::string GetSoftmaxKernelCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
+    const OperationDef& op_def,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "tensor_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "tensor_size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "tensor_size",
+                                 op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "tensor_size",
+                                 op_def.dst_tensors[0]);

-  std::string code = GetCommonDefines(precision);
+  std::string code = GetCommonDefines(op_def.precision);
  code += "__kernel void main_function(\n";
  code += src_tensor.GetDeclaration(AccessType::READ);
  code += GetArgsDeclaration(linked_operations);
@ -110,9 +111,7 @@ Softmax1x1& Softmax1x1::operator=(Softmax1x1&& kernel) {
 }

 Status Softmax1x1::Compile(const CreationContext& creation_context) {
-  const auto code = GetSoftmaxKernelCode(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, linked_operations_);
+  const auto code = GetSoftmaxKernelCode(definition_, linked_operations_);
  return creation_context.cache->GetOrCreateCLKernel(
      code, "main_function", *creation_context.context,
      *creation_context.device, &kernel_);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc
@ -26,14 +26,12 @@ namespace cl {
 namespace {

 std::string GetStridedSliceCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
-    bool alignedx4,
+    const OperationDef& op_def, bool alignedx4,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);

-  std::string c = GetCommonDefines(precision);
+  std::string c = GetCommonDefines(op_def.precision);
  c += "__kernel void main_function(\n";
  c += src_tensor.GetDeclaration(AccessType::READ);
  c += GetArgsDeclaration(linked_operations);
@ -144,9 +142,8 @@ StridedSlice& StridedSlice::operator=(StridedSlice&& operation) {
 }

 Status StridedSlice::Compile(const CreationContext& creation_context) {
-  const auto code = GetStridedSliceCode(
-      definition_.src_tensors[0], definition_.dst_tensors[0],
-      definition_.precision, Is4Alighed(attributes_), linked_operations_);
+  const auto code = GetStridedSliceCode(definition_, Is4Alighed(attributes_),
+                                        linked_operations_);
  return creation_context.cache->GetOrCreateCLKernel(
      code, "main_function", *creation_context.context,
      *creation_context.device, &kernel_);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/upsample.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/upsample.cc
@ -25,13 +25,12 @@ namespace cl {
 namespace {

 std::string GetUpsampleCode(
-    const TensorDescriptor& src_descriptor,
-    const TensorDescriptor& dst_descriptor, CalculationsPrecision precision,
+    const OperationDef& op_def,
    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor("src_data", "src_size", src_descriptor);
-  TensorCodeGenerator dst_tensor("dst_data", "dst_size", dst_descriptor);
+  TensorCodeGenerator src_tensor("src_data", "src_size", op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor("dst_data", "dst_size", op_def.dst_tensors[0]);

-  std::string c = GetCommonDefines(precision);
+  std::string c = GetCommonDefines(op_def.precision);
  c += "__kernel void main_function(\n";
  c += src_tensor.GetDeclaration(AccessType::READ);
  c += GetArgsDeclaration(linked_operations);
@ -96,9 +95,7 @@ Upsample& Upsample::operator=(Upsample&& operation) {
 }

 Status Upsample::Compile(const CreationContext& creation_context) {
-  const auto code =
-      GetUpsampleCode(definition_.src_tensors[0], definition_.dst_tensors[0],
-                      definition_.precision, linked_operations_);
+  const auto code = GetUpsampleCode(definition_, linked_operations_);
  return creation_context.cache->GetOrCreateCLKernel(
      code, "main_function", *creation_context.context,
      *creation_context.device, &kernel_);