Added layout to TensorDescriptor.

PiperOrigin-RevId: 289392420 Change-Id: Ia709c0dfe8d124d28e4cd8067436960d910f20b7
2020-01-13 01:01:36 -08:00 · 2020-01-13 01:01:36 -08:00 · c5c5f0317a
commit c5c5f0317a
parent e69cdffeea
36 changed files with 326 additions and 231 deletions
--- a/tensorflow/lite/delegates/gpu/cl/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/BUILD
@ -27,6 +27,7 @@ cc_library(
        "//tensorflow/lite/delegates/gpu/cl/kernels:converter",
        "//tensorflow/lite/delegates/gpu/common:data_type",
        "//tensorflow/lite/delegates/gpu/common:model",
+        "//tensorflow/lite/delegates/gpu/common:shape",
        "//tensorflow/lite/delegates/gpu/common:status",
        "//tensorflow/lite/delegates/gpu/common:tensor",
        "@com_google_absl//absl/memory",
@ -230,6 +231,7 @@ cc_library(
        ":tensor_type",
        ":util",
        "//tensorflow/lite/delegates/gpu/common:data_type",
+        "//tensorflow/lite/delegates/gpu/common:shape",
        "//tensorflow/lite/delegates/gpu/common:status",
        "//tensorflow/lite/delegates/gpu/common:tensor",
    ],
@ -305,6 +307,7 @@ cc_library(
        "//tensorflow/lite/delegates/gpu/common:model",
        "//tensorflow/lite/delegates/gpu/common:model_transformer",
        "//tensorflow/lite/delegates/gpu/common:operations",
+        "//tensorflow/lite/delegates/gpu/common:shape",
        "//tensorflow/lite/delegates/gpu/common:status",
        "//tensorflow/lite/delegates/gpu/common:tensor",
        "//tensorflow/lite/delegates/gpu/common:types",
@ -430,6 +433,7 @@ cc_library(
    hdrs = ["tensor_type.h"],
    deps = [
        "//tensorflow/lite/delegates/gpu/common:data_type",
+        "//tensorflow/lite/delegates/gpu/common:shape",
    ],
 )

--- a/tensorflow/lite/delegates/gpu/cl/api.cc
+++ b/tensorflow/lite/delegates/gpu/cl/api.cc
@ -35,6 +35,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/tensor.h"

 namespace tflite {
@ -157,7 +158,8 @@ class DefaultTensorTie : public TensorTie {
        const TensorDescriptor desc{
            d.object_def.data_type,
            ToTensorStorageType(d.object_def.object_type,
-                                d.object_def.data_layout)};
+                                d.object_def.data_layout),
+            Layout::BHWC};
        RETURN_IF_ERROR(AllocateTensorMemory(env->context(), env->device(),
                                             shape, desc, &cl_memory_));
        if (d.object_def.object_type == ObjectType::OPENCL_TEXTURE) {
--- a/tensorflow/lite/delegates/gpu/cl/environment.cc
+++ b/tensorflow/lite/delegates/gpu/cl/environment.cc
@ -20,6 +20,7 @@ limitations under the License.

 #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
 #include "tensorflow/lite/delegates/gpu/cl/util.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"

 namespace tflite {
 namespace gpu {
@ -58,7 +59,8 @@ Status CheckKernelSupportOfOneLayerTextureArray(Environment* env,
  const BHWC shape(1, 4, 4, 4);
  RETURN_IF_ERROR(CreateTensor(
      env->context(), env->device(), shape,
-      {DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY}, &tensor));
+      {DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY, Layout::HWC},
+      &tensor));
  RETURN_IF_ERROR(kernel.SetMemory(0, tensor.GetMemoryPtr()));
  RETURN_IF_ERROR(env->queue()->DispatchImplicit(kernel, {4, 4, 1}, {4, 4, 1}));
  TensorFloat32 tensor_gpu;
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
@ -36,6 +36,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/common/model.h"
 #include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/transformations/add_bias.h"
 #include "tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"
@ -112,16 +113,18 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
                                        const CLDevice& device,
                                        const BHWC& shape,
                                        const TensorStorageType& desired,
-                                        const DataType& data_type) {
+                                        const DataType& data_type,
+                                        const Layout& layout) {
  if (CanCreateTensorWithShape(context, device, shape,
-                               TensorDescriptor{data_type, desired})) {
+                               TensorDescriptor{data_type, desired, layout})) {
    return desired;
  }
  auto GetBestTypeAfterTextureArray = [&]() {
    if (device.SupportsImageBuffer() &&
        CanCreateTensorWithShape(
            context, device, shape,
-            TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER})) {
+            TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER,
+                             layout})) {
      return TensorStorageType::IMAGE_BUFFER;
    } else {
      return TensorStorageType::BUFFER;
@ -131,7 +134,8 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
    if (device.SupportsTextureArray() &&
        CanCreateTensorWithShape(
            context, device, shape,
-            TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY})) {
+            TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY,
+                             layout})) {
      return TensorStorageType::TEXTURE_ARRAY;
    } else {
      return GetBestTypeAfterTextureArray();
@ -140,7 +144,8 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
  auto GetBestTypeAfterTexture3D = [&]() {
    if (CanCreateTensorWithShape(
            context, device, shape,
-            TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D})) {
+            TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D,
+                             layout})) {
      return TensorStorageType::TEXTURE_2D;
    } else {
      return GetBestTypeAfterTexture2D();
@ -256,20 +261,21 @@ void InferenceContext::ReserveGraphTensors(
  for (auto& t : tensors) {
    TensorStorageType storage_type = create_info.storage_type;
    const auto shape = graph.GetValue(t->id)->tensor.shape;
+    Layout layout = shape.b == 1 ? Layout::HWC : Layout::BHWC;
    if (graph.IsGraphInput(t->id) || graph.IsGraphOutput(t->id)) {
      if (shape.c < 4 &&
          CanCreateTensorWithShape(
              *creation_context.context, *creation_context.device, shape,
-              TensorDescriptor{data_type,
-                               TensorStorageType::SINGLE_TEXTURE_2D})) {
+              TensorDescriptor{data_type, TensorStorageType::SINGLE_TEXTURE_2D,
+                               layout})) {
        storage_type = TensorStorageType::SINGLE_TEXTURE_2D;
      }
    }
    storage_type = SelectBestStorageType(*creation_context.context,
                                         *creation_context.device, shape,
-                                         storage_type, data_type);
-    tensor_reserver_.Add(t->id,
-                         {shape, TensorDescriptor{data_type, storage_type}});
+                                         storage_type, data_type, layout);
+    tensor_reserver_.Add(
+        t->id, {shape, TensorDescriptor{data_type, storage_type, layout}});
    max_id = std::max(max_id, t->id);
  }
  tensor_reserver_.SetNext(max_id + 1);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/add_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/add_test.cc
@ -45,9 +45,9 @@ TEST_F(OpenCLOperationTest, AddTwoEqualTensors) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Add operation = CreateAdd(op_def, channels, channels[0]);
      ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
@ -73,9 +73,9 @@ TEST_F(OpenCLOperationTest, AddFirstTensorHasMoreChannelsThanSecond) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Add operation = CreateAdd(op_def, channels, channels[0]);
      ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
@ -103,9 +103,9 @@ TEST_F(OpenCLOperationTest, AddFirstTensorHasLessChannelsThanSecond) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Add operation = CreateAdd(op_def, channels, 6);
      ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/apply_mask_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/apply_mask_test.cc
@ -45,9 +45,9 @@ TEST_F(OpenCLOperationTest, ApplyMaskOneChannel) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ApplyMask operation =
          CreateApplyMask(op_def, src_tensor.shape, mask_tensor.shape);
@ -75,9 +75,9 @@ TEST_F(OpenCLOperationTest, ApplyMaskEqualSizes) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ApplyMask operation =
          CreateApplyMask(op_def, src_tensor.shape, mask_tensor.shape);
@ -105,9 +105,9 @@ TEST_F(OpenCLOperationTest, ApplyMaskVector) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ApplyMask operation =
          CreateApplyMask(op_def, src_tensor.shape, mask_tensor.shape);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc
@ -47,9 +47,9 @@ TEST_F(OpenCLOperationTest, ConcatWidth) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConcatXY operation = CreateConcatXY(op_def, attr, 2);
      ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
@ -79,9 +79,9 @@ TEST_F(OpenCLOperationTest, ConcatHeight) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConcatXY operation = CreateConcatXY(op_def, attr, 2);
      ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
@ -112,10 +112,10 @@ TEST_F(OpenCLOperationTest, ConcatChannels) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConcatZ operation = CreateConcatZ(op_def, {1, 2, 3});
      ASSERT_OK(ExecuteGPUOperation({src0, src1, src2}, creation_context_,
@ -146,9 +146,9 @@ TEST_F(OpenCLOperationTest, ConcatChannelsAlignedx4) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConcatZ operation = CreateConcatZ(op_def, {4, 4});
      ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1_test.cc
@ -51,8 +51,10 @@ TEST_F(OpenCLOperationTest, ConvBuffer1x1SimpleWeights) {
    OperationDef op_def;
    op_def.precision = precision;
    auto data_type = DeduceDataTypeFromPrecision(precision);
-    op_def.src_tensors.push_back({data_type, TensorStorageType::BUFFER});
-    op_def.dst_tensors.push_back({data_type, TensorStorageType::BUFFER});
+    op_def.src_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
+    op_def.dst_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
    TensorFloat32 dst_tensor;
    ConvBuffer1x1 operation;
    ASSERT_OK(CreateConvBuffer1x1(creation_context_, op_def, attr, &operation));
@ -84,8 +86,10 @@ TEST_F(OpenCLOperationTest, ConvBuffer1x1) {
    OperationDef op_def;
    op_def.precision = precision;
    auto data_type = DeduceDataTypeFromPrecision(precision);
-    op_def.src_tensors.push_back({data_type, TensorStorageType::BUFFER});
-    op_def.dst_tensors.push_back({data_type, TensorStorageType::BUFFER});
+    op_def.src_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
+    op_def.dst_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
    TensorFloat32 dst_tensor;
    ConvBuffer1x1 operation;
    ASSERT_OK(CreateConvBuffer1x1(creation_context_, op_def, attr, &operation));
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_test.cc
@ -51,8 +51,10 @@ TEST_F(OpenCLOperationTest, ConvBufferSimpleWeights) {
    OperationDef op_def;
    op_def.precision = precision;
    auto data_type = DeduceDataTypeFromPrecision(precision);
-    op_def.src_tensors.push_back({data_type, TensorStorageType::BUFFER});
-    op_def.dst_tensors.push_back({data_type, TensorStorageType::BUFFER});
+    op_def.src_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
+    op_def.dst_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
    TensorFloat32 dst_tensor;
    ConvBuffer operation;
    ASSERT_OK(CreateConvBuffer(creation_context_, op_def, attr, &operation));
@ -84,8 +86,10 @@ TEST_F(OpenCLOperationTest, ConvBuffer) {
    OperationDef op_def;
    op_def.precision = precision;
    auto data_type = DeduceDataTypeFromPrecision(precision);
-    op_def.src_tensors.push_back({data_type, TensorStorageType::BUFFER});
-    op_def.dst_tensors.push_back({data_type, TensorStorageType::BUFFER});
+    op_def.src_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
+    op_def.dst_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
    TensorFloat32 dst_tensor;
    ConvBuffer operation;
    ASSERT_OK(CreateConvBuffer(creation_context_, op_def, attr, &operation));
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants_test.cc
@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, ConvConstantsSimpleWeights) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvConstants operation;
      ASSERT_OK(
@ -88,8 +88,8 @@ TEST_F(OpenCLOperationTest, ConvConstants) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvConstants operation;
      ASSERT_OK(
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr_test.cc
@ -54,8 +54,8 @@ TEST_F(OpenCLOperationTest, ConvPowerVR1x1SimpleWeights) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvPowerVR operation;
      ASSERT_OK(CreateConvPowerVR(creation_context_, op_def, attr, &operation));
@ -89,8 +89,8 @@ TEST_F(OpenCLOperationTest, ConvPowerVR1x1) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvPowerVR operation;
      ASSERT_OK(CreateConvPowerVR(creation_context_, op_def, attr, &operation));
@ -124,8 +124,8 @@ TEST_F(OpenCLOperationTest, ConvPowerVRSimpleWeights) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvPowerVR operation;
      ASSERT_OK(CreateConvPowerVR(creation_context_, op_def, attr, &operation));
@ -159,8 +159,8 @@ TEST_F(OpenCLOperationTest, ConvPowerVR) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvPowerVR operation;
      ASSERT_OK(CreateConvPowerVR(creation_context_, op_def, attr, &operation));
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc
@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, ConvTextureSimpleWeights) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvTexture operation;
      ASSERT_OK(CreateConvTexture(creation_context_, op_def, attr, &operation));
@ -87,8 +87,8 @@ TEST_F(OpenCLOperationTest, ConvTexture) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvTexture operation;
      ASSERT_OK(CreateConvTexture(creation_context_, op_def, attr, &operation));
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc
@ -51,8 +51,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3ThinSimpleWeights) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvolutionTransposed3x3Thin operation;
      ASSERT_OK(CreateConvolutionTransposed3x3Thin(creation_context_, op_def,
@ -87,8 +87,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3Thin) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvolutionTransposed3x3Thin operation;
      ASSERT_OK(CreateConvolutionTransposed3x3Thin(creation_context_, op_def,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4_test.cc
@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed4x4) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvolutionTransposed4x4 operation;
      ASSERT_OK(CreateConvolutionTransposed4x4(creation_context_, op_def, attr,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_test.cc
@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedSimpleWeights) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvolutionTransposed operation;
      ASSERT_OK(CreateConvolutionTransposed(creation_context_, op_def, attr,
@ -91,8 +91,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvolutionTransposed operation;
      ASSERT_OK(CreateConvolutionTransposed(creation_context_, op_def, attr,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin_test.cc
@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedThinSimpleWeights) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvolutionTransposedThin operation;
      ASSERT_OK(CreateConvolutionTransposedThin(creation_context_, op_def, attr,
@ -91,8 +91,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedThin) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ConvolutionTransposedThin operation;
      ASSERT_OK(CreateConvolutionTransposedThin(creation_context_, op_def, attr,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc
@ -53,8 +53,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConv3x3SimpleWeights) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      DepthWiseConv3x3 operation;
      ASSERT_OK(
@ -90,8 +90,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConv3x3) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      DepthWiseConv3x3 operation;
      ASSERT_OK(
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_test.cc
@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvSimpleWeights) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      DepthWiseConvolution operation;
      ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
@ -88,8 +88,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvNoMultiplier) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      DepthWiseConvolution operation;
      ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
@ -125,8 +125,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvMultiplier2) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      DepthWiseConvolution operation;
      ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc
@ -41,8 +41,8 @@ TEST_F(OpenCLOperationTest, Abs) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseOneInput operation =
          CreateElementwiseOneInput(op_def, OperationType::ABS);
@ -66,8 +66,8 @@ TEST_F(OpenCLOperationTest, Cos) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseOneInput operation =
          CreateElementwiseOneInput(op_def, OperationType::COS);
@ -92,8 +92,8 @@ TEST_F(OpenCLOperationTest, HardSwish) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseOneInput operation =
          CreateElementwiseOneInput(op_def, OperationType::HARD_SWISH);
@ -118,8 +118,8 @@ TEST_F(OpenCLOperationTest, Log) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseOneInput operation =
          CreateElementwiseOneInput(op_def, OperationType::LOG);
@ -143,8 +143,8 @@ TEST_F(OpenCLOperationTest, Rsqrt) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseOneInput operation =
          CreateElementwiseOneInput(op_def, OperationType::RSQRT);
@ -170,8 +170,8 @@ TEST_F(OpenCLOperationTest, Sigmoid) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseOneInput operation =
          CreateElementwiseOneInput(op_def, OperationType::SIGMOID);
@ -194,8 +194,8 @@ TEST_F(OpenCLOperationTest, Sin) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseOneInput operation =
          CreateElementwiseOneInput(op_def, OperationType::SIN);
@ -220,8 +220,8 @@ TEST_F(OpenCLOperationTest, Sqrt) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseOneInput operation =
          CreateElementwiseOneInput(op_def, OperationType::SQRT);
@ -246,8 +246,8 @@ TEST_F(OpenCLOperationTest, Square) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseOneInput operation =
          CreateElementwiseOneInput(op_def, OperationType::SQUARE);
@ -270,8 +270,8 @@ TEST_F(OpenCLOperationTest, Tanh) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseOneInput operation =
          CreateElementwiseOneInput(op_def, OperationType::TANH);
@ -298,9 +298,9 @@ TEST_F(OpenCLOperationTest, Sub) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseTwoInput operation =
          CreateElementwiseTwoInput(op_def, OperationType::SUB);
@ -326,9 +326,9 @@ TEST_F(OpenCLOperationTest, SquaredDiff) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseTwoInput operation =
          CreateElementwiseTwoInput(op_def, OperationType::SQUARED_DIFF);
@ -354,9 +354,9 @@ TEST_F(OpenCLOperationTest, Div) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseTwoInput operation =
          CreateElementwiseTwoInput(op_def, OperationType::DIV);
@ -382,9 +382,9 @@ TEST_F(OpenCLOperationTest, Pow) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ElementwiseTwoInput operation =
          CreateElementwiseTwoInput(op_def, OperationType::POW);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture_test.cc
@ -48,8 +48,8 @@ TEST_F(OpenCLOperationTest, FullyConnectedTexture) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      FullyConnectedTexture operation;
      ASSERT_OK(CreateFullyConnectedTexture(creation_context_, op_def, attr,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc
@ -61,10 +61,10 @@ TEST_F(OpenCLOperationTest, LSTM) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::BHWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::BHWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::BHWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::BHWC});
      TensorFloat32 new_state;
      TensorFloat32 new_activ;
      LSTM operation = CreateLSTM(op_def);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc
@ -51,9 +51,9 @@ TEST_F(OpenCLOperationTest, MaxUnpooling) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      MaxUnpooling operation = CreateMaxUnpooling(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation({src_tensor, src_ind_tensor},
--- a/tensorflow/lite/delegates/gpu/cl/kernels/multiply_add_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/multiply_add_test.cc
@ -49,8 +49,8 @@ TEST_F(OpenCLOperationTest, MultiplyAddVectorMul) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      MultiplyAdd operation;
      ASSERT_OK(CreateMultiplyAdd(creation_context_, op_def, attr, &operation));
@ -79,8 +79,8 @@ TEST_F(OpenCLOperationTest, MultiplyAddVectorAdd) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      MultiplyAdd operation;
      ASSERT_OK(CreateMultiplyAdd(creation_context_, op_def, attr, &operation));
@ -106,8 +106,8 @@ TEST_F(OpenCLOperationTest, MultiplyAddScalarMul) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      MultiplyAdd operation;
      ASSERT_OK(CreateMultiplyAdd(creation_context_, op_def, attr, &operation));
@ -133,8 +133,8 @@ TEST_F(OpenCLOperationTest, MultiplyAddScalarAdd) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      MultiplyAdd operation;
      ASSERT_OK(CreateMultiplyAdd(creation_context_, op_def, attr, &operation));
@ -167,8 +167,8 @@ TEST_F(OpenCLOperationTest, MultiplyAddVectorMad) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      MultiplyAdd operation;
      ASSERT_OK(CreateMultiplyAdd(creation_context_, op_def, mul_attr, add_attr,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/padding_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding_test.cc
@ -46,8 +46,8 @@ TEST_F(OpenCLOperationTest, PaddingAppendWidth) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Padding operation = CreatePadding(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -74,8 +74,8 @@ TEST_F(OpenCLOperationTest, PaddingPrependWidth) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Padding operation = CreatePadding(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -102,8 +102,8 @@ TEST_F(OpenCLOperationTest, PaddingAppendHeight) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Padding operation = CreatePadding(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -130,8 +130,8 @@ TEST_F(OpenCLOperationTest, PaddingPrependHeight) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Padding operation = CreatePadding(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -158,8 +158,8 @@ TEST_F(OpenCLOperationTest, PaddingAppendChannels) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Padding operation = CreatePadding(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -186,8 +186,8 @@ TEST_F(OpenCLOperationTest, PaddingPrependChannels) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Padding operation = CreatePadding(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -214,8 +214,8 @@ TEST_F(OpenCLOperationTest, PaddingComplex) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Padding operation = CreatePadding(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc
@ -49,8 +49,8 @@ TEST_F(OpenCLOperationTest, AveragePooling) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Pooling operation = CreatePooling(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -78,8 +78,8 @@ TEST_F(OpenCLOperationTest, AveragePoolingNonEmptyPadding) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Pooling operation = CreatePooling(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -108,8 +108,8 @@ TEST_F(OpenCLOperationTest, MaxPooling) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Pooling operation = CreatePooling(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -138,9 +138,9 @@ TEST_F(OpenCLOperationTest, MaxPoolingIndices) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      TensorFloat32 dst_tensor_ind;
      Pooling operation = CreatePooling(op_def, attr);
--- a/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc
@ -49,8 +49,8 @@ TEST_F(OpenCLOperationTest, PReLUAlpha) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      PReLU operation;
      ASSERT_OK(CreatePReLU(creation_context_, op_def, attr, &operation));
@ -80,8 +80,8 @@ TEST_F(OpenCLOperationTest, PReLUAlphaClip) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      PReLU operation;
      ASSERT_OK(CreatePReLU(creation_context_, op_def, attr, &operation));
--- a/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc
@ -46,8 +46,8 @@ TEST_F(OpenCLOperationTest, ReLUNoClipNoAlpha) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ReLU operation = CreateReLU(creation_context_, op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -73,8 +73,8 @@ TEST_F(OpenCLOperationTest, ReLUClip) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ReLU operation = CreateReLU(creation_context_, op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -100,8 +100,8 @@ TEST_F(OpenCLOperationTest, ReLUAlpha) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ReLU operation = CreateReLU(creation_context_, op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -127,8 +127,8 @@ TEST_F(OpenCLOperationTest, ReLUAlphaClip) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      ReLU operation = CreateReLU(creation_context_, op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape_test.cc
@ -42,8 +42,8 @@ TEST_F(OpenCLOperationTest, Reshape) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Reshape operation = CreateReshape(op_def);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4_test.cc
@ -42,8 +42,8 @@ TEST_F(OpenCLOperationTest, Reshapex4) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Reshapex4 operation = CreateReshapex4(op_def);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1_test.cc
@ -45,8 +45,8 @@ TEST_F(OpenCLOperationTest, Softmax1x1) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Softmax1x1 operation = CreateSoftmax1x1(op_def);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax_test.cc
@ -45,8 +45,8 @@ TEST_F(OpenCLOperationTest, Softmax) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Softmax operation = CreateSoftmax(op_def);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice_test.cc
@ -53,8 +53,8 @@ TEST_F(OpenCLOperationTest, StridedSlice) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      StridedSlice operation = CreateStridedSlice(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose_test.cc
@ -45,8 +45,8 @@ TEST_F(OpenCLOperationTest, Transpose) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Transpose operation = CreateTranspose(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
--- a/tensorflow/lite/delegates/gpu/cl/kernels/upsample_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/upsample_test.cc
@ -47,8 +47,8 @@ TEST_F(OpenCLOperationTest, UpsampleBilinearAligned) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Upsample operation = CreateUpsample(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@ -78,8 +78,8 @@ TEST_F(OpenCLOperationTest, UpsampleBilinearNonAligned) {
      OperationDef op_def;
      op_def.precision = precision;
      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
      Upsample operation = CreateUpsample(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
--- a/tensorflow/lite/delegates/gpu/cl/tensor_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/tensor_test.cc
@ -89,110 +89,173 @@ Status Tensor5DGenericTest(const BHWDC& shape,
  return OkStatus();
 }

-Status TensorTests(const TensorDescriptor& descriptor, Environment* env) {
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(1, 6, 7, 3), descriptor, env));
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(1, 1, 4, 12), descriptor, env));
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(1, 6, 1, 7), descriptor, env));
+Status TensorTests(DataType data_type, TensorStorageType storage_type,
+                   Environment* env) {
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(1, 6, 7, 3), {data_type, storage_type, Layout::HWC}, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(1, 1, 4, 12), {data_type, storage_type, Layout::HWC}, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(1, 6, 1, 7), {data_type, storage_type, Layout::HWC}, env));

  // Batch tests
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(2, 6, 7, 3), descriptor, env));
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(4, 1, 4, 12), descriptor, env));
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(7, 6, 1, 7), descriptor, env));
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(13, 7, 3, 3), descriptor, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(2, 6, 7, 3), {data_type, storage_type, Layout::BHWC}, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(4, 1, 4, 12), {data_type, storage_type, Layout::BHWC}, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(7, 6, 1, 7), {data_type, storage_type, Layout::BHWC}, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(13, 7, 3, 3), {data_type, storage_type, Layout::BHWC}, env));

  // 5D tests with batch = 1
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(1, 6, 7, 4, 3), descriptor, env));
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(1, 1, 4, 3, 12), descriptor, env));
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(1, 6, 1, 7, 7), descriptor, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(1, 6, 7, 4, 3), {data_type, storage_type, Layout::HWDC}, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(1, 1, 4, 3, 12), {data_type, storage_type, Layout::HWDC}, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(1, 6, 1, 7, 7), {data_type, storage_type, Layout::HWDC}, env));

  // 5D tests
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(2, 6, 7, 1, 3), descriptor, env));
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(4, 1, 4, 2, 12), descriptor, env));
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(7, 6, 1, 3, 7), descriptor, env));
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(13, 7, 3, 4, 3), descriptor, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(2, 6, 7, 1, 3), {data_type, storage_type, Layout::BHWDC}, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(4, 1, 4, 2, 12), {data_type, storage_type, Layout::BHWDC}, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(7, 6, 1, 3, 7), {data_type, storage_type, Layout::BHWDC}, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(13, 7, 3, 4, 3), {data_type, storage_type, Layout::BHWDC}, env));
  return OkStatus();
 }

 TEST_F(OpenCLTest, BufferF32) {
-  ASSERT_OK(TensorTests({DataType::FLOAT32, TensorStorageType::BUFFER}, &env_));
+  ASSERT_OK(TensorTests(DataType::FLOAT32, TensorStorageType::BUFFER, &env_));
 }

 TEST_F(OpenCLTest, BufferF16) {
-  ASSERT_OK(TensorTests({DataType::FLOAT16, TensorStorageType::BUFFER}, &env_));
+  ASSERT_OK(TensorTests(DataType::FLOAT16, TensorStorageType::BUFFER, &env_));
 }

 TEST_F(OpenCLTest, Texture2DF32) {
  ASSERT_OK(
-      TensorTests({DataType::FLOAT32, TensorStorageType::TEXTURE_2D}, &env_));
+      TensorTests(DataType::FLOAT32, TensorStorageType::TEXTURE_2D, &env_));
 }

 TEST_F(OpenCLTest, Texture2DF16) {
  ASSERT_OK(
-      TensorTests({DataType::FLOAT16, TensorStorageType::TEXTURE_2D}, &env_));
+      TensorTests(DataType::FLOAT16, TensorStorageType::TEXTURE_2D, &env_));
 }

 TEST_F(OpenCLTest, Texture3DF32) {
  ASSERT_OK(
-      TensorTests({DataType::FLOAT32, TensorStorageType::TEXTURE_3D}, &env_));
+      TensorTests(DataType::FLOAT32, TensorStorageType::TEXTURE_3D, &env_));
 }

 TEST_F(OpenCLTest, Texture3DF16) {
  ASSERT_OK(
-      TensorTests({DataType::FLOAT16, TensorStorageType::TEXTURE_3D}, &env_));
+      TensorTests(DataType::FLOAT16, TensorStorageType::TEXTURE_3D, &env_));
 }

 TEST_F(OpenCLTest, TextureArrayF32) {
-  ASSERT_OK(TensorTests({DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY},
-                        &env_));
+  ASSERT_OK(
+      TensorTests(DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY, &env_));
 }

 TEST_F(OpenCLTest, TextureArrayF16) {
-  ASSERT_OK(TensorTests({DataType::FLOAT16, TensorStorageType::TEXTURE_ARRAY},
-                        &env_));
+  ASSERT_OK(
+      TensorTests(DataType::FLOAT16, TensorStorageType::TEXTURE_ARRAY, &env_));
 }

 TEST_F(OpenCLTest, ImageBufferF32) {
  ASSERT_OK(
-      TensorTests({DataType::FLOAT32, TensorStorageType::IMAGE_BUFFER}, &env_));
+      TensorTests(DataType::FLOAT32, TensorStorageType::IMAGE_BUFFER, &env_));
 }

 TEST_F(OpenCLTest, ImageBufferF16) {
  ASSERT_OK(
-      TensorTests({DataType::FLOAT16, TensorStorageType::IMAGE_BUFFER}, &env_));
+      TensorTests(DataType::FLOAT16, TensorStorageType::IMAGE_BUFFER, &env_));
 }

 TEST_F(OpenCLTest, SingleTextureF32) {
  ASSERT_OK(TensorGenericTest(
      BHWC(1, 6, 14, 1),
-      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWC},
+      &env_));
  ASSERT_OK(TensorGenericTest(
      BHWC(1, 6, 14, 2),
-      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWC},
+      &env_));

  // Batch tests
  ASSERT_OK(TensorGenericTest(
      BHWC(7, 6, 14, 1),
-      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWC},
+      &env_));
  ASSERT_OK(TensorGenericTest(
      BHWC(3, 6, 14, 2),
-      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWC},
+      &env_));
+
+  // 5D tests with batch = 1
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(1, 6, 14, 7, 1),
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWDC},
+      &env_));
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(1, 6, 14, 4, 2),
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWDC},
+      &env_));
+
+  // 5D tests
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(7, 6, 14, 5, 1),
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWDC},
+      &env_));
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(3, 6, 14, 3, 2),
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWDC},
+      &env_));
 }

 TEST_F(OpenCLTest, SingleTextureF16) {
  ASSERT_OK(TensorGenericTest(
      BHWC(1, 6, 3, 1),
-      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWC},
+      &env_));
  ASSERT_OK(TensorGenericTest(
      BHWC(1, 6, 3, 2),
-      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWC},
+      &env_));

  // Batch tests
  ASSERT_OK(TensorGenericTest(
      BHWC(7, 6, 3, 1),
-      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWC},
+      &env_));
  ASSERT_OK(TensorGenericTest(
      BHWC(3, 6, 3, 2),
-      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWC},
+      &env_));
+
+  // 5D tests with batch = 1
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(1, 6, 14, 7, 1),
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWDC},
+      &env_));
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(1, 6, 14, 4, 2),
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWDC},
+      &env_));
+
+  // 5D tests
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(7, 6, 14, 5, 1),
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWDC},
+      &env_));
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(3, 6, 14, 3, 2),
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWDC},
+      &env_));
 }

 }  // namespace
--- a/tensorflow/lite/delegates/gpu/cl/tensor_type.h
+++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.h
@ -20,6 +20,7 @@ limitations under the License.
 #include <string>

 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"

 namespace tflite {
 namespace gpu {
@ -36,14 +37,23 @@ enum class TensorStorageType {
 };

 struct TensorDescriptor {
-  DataType data_type;
-  TensorStorageType storage_type;
+  TensorDescriptor() = default;
+  TensorDescriptor(DataType dt, TensorStorageType st, Layout l)
+      : data_type(dt), storage_type(st), layout(l) {}

  bool operator==(const TensorDescriptor& d) const {
-    return data_type == d.data_type && storage_type == d.storage_type;
+    return data_type == d.data_type && storage_type == d.storage_type &&
+           layout == d.layout;
  }

  bool operator!=(const TensorDescriptor& d) const { return !(*this == d); }
+
+  DataType data_type = DataType::UNKNOWN;
+  TensorStorageType storage_type = TensorStorageType::UNKNOWN;
+  // This field describes logical layout, actual(physical) GPU layout can be
+  // totally different.
+  Layout layout =
+      Layout::UNKNOWN;  // Supported layouts is HWC, BHWC, HWDC, BHWDC
 };

 std::string ToString(TensorStorageType type);