diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD
index 7dfbd52a203..00a28457767 100644
--- a/tensorflow/lite/delegates/gpu/cl/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/BUILD
@@ -27,6 +27,7 @@ cc_library(
         "//tensorflow/lite/delegates/gpu/cl/kernels:converter",
         "//tensorflow/lite/delegates/gpu/common:data_type",
         "//tensorflow/lite/delegates/gpu/common:model",
+        "//tensorflow/lite/delegates/gpu/common:shape",
         "//tensorflow/lite/delegates/gpu/common:status",
         "//tensorflow/lite/delegates/gpu/common:tensor",
         "@com_google_absl//absl/memory",
@@ -230,6 +231,7 @@ cc_library(
         ":tensor_type",
         ":util",
         "//tensorflow/lite/delegates/gpu/common:data_type",
+        "//tensorflow/lite/delegates/gpu/common:shape",
         "//tensorflow/lite/delegates/gpu/common:status",
         "//tensorflow/lite/delegates/gpu/common:tensor",
     ],
@@ -305,6 +307,7 @@ cc_library(
         "//tensorflow/lite/delegates/gpu/common:model",
         "//tensorflow/lite/delegates/gpu/common:model_transformer",
         "//tensorflow/lite/delegates/gpu/common:operations",
+        "//tensorflow/lite/delegates/gpu/common:shape",
         "//tensorflow/lite/delegates/gpu/common:status",
         "//tensorflow/lite/delegates/gpu/common:tensor",
         "//tensorflow/lite/delegates/gpu/common:types",
@@ -430,6 +433,7 @@ cc_library(
     hdrs = ["tensor_type.h"],
     deps = [
         "//tensorflow/lite/delegates/gpu/common:data_type",
+        "//tensorflow/lite/delegates/gpu/common:shape",
     ],
 )
 
diff --git a/tensorflow/lite/delegates/gpu/cl/api.cc b/tensorflow/lite/delegates/gpu/cl/api.cc
index bb83bf3f30e..ff7b70ae762 100644
--- a/tensorflow/lite/delegates/gpu/cl/api.cc
+++ b/tensorflow/lite/delegates/gpu/cl/api.cc
@@ -35,6 +35,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
 
 namespace tflite {
@@ -157,7 +158,8 @@ class DefaultTensorTie : public TensorTie {
         const TensorDescriptor desc{
             d.object_def.data_type,
             ToTensorStorageType(d.object_def.object_type,
-                                d.object_def.data_layout)};
+                                d.object_def.data_layout),
+            Layout::BHWC};
         RETURN_IF_ERROR(AllocateTensorMemory(env->context(), env->device(),
                                              shape, desc, &cl_memory_));
         if (d.object_def.object_type == ObjectType::OPENCL_TEXTURE) {
diff --git a/tensorflow/lite/delegates/gpu/cl/environment.cc b/tensorflow/lite/delegates/gpu/cl/environment.cc
index cc5ccaf418a..e9aaa6a827c 100644
--- a/tensorflow/lite/delegates/gpu/cl/environment.cc
+++ b/tensorflow/lite/delegates/gpu/cl/environment.cc
@@ -20,6 +20,7 @@ limitations under the License.
 
 #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
 #include "tensorflow/lite/delegates/gpu/cl/util.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
 
 namespace tflite {
 namespace gpu {
@@ -58,7 +59,8 @@ Status CheckKernelSupportOfOneLayerTextureArray(Environment* env,
   const BHWC shape(1, 4, 4, 4);
   RETURN_IF_ERROR(CreateTensor(
       env->context(), env->device(), shape,
-      {DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY}, &tensor));
+      {DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY, Layout::HWC},
+      &tensor));
   RETURN_IF_ERROR(kernel.SetMemory(0, tensor.GetMemoryPtr()));
   RETURN_IF_ERROR(env->queue()->DispatchImplicit(kernel, {4, 4, 1}, {4, 4, 1}));
   TensorFloat32 tensor_gpu;
diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
index 0676b2fe5d2..47941110ca3 100644
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
@@ -36,6 +36,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/common/model.h"
 #include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/transformations/add_bias.h"
 #include "tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"
@@ -112,16 +113,18 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
                                         const CLDevice& device,
                                         const BHWC& shape,
                                         const TensorStorageType& desired,
-                                        const DataType& data_type) {
+                                        const DataType& data_type,
+                                        const Layout& layout) {
   if (CanCreateTensorWithShape(context, device, shape,
-                               TensorDescriptor{data_type, desired})) {
+                               TensorDescriptor{data_type, desired, layout})) {
     return desired;
   }
   auto GetBestTypeAfterTextureArray = [&]() {
     if (device.SupportsImageBuffer() &&
         CanCreateTensorWithShape(
             context, device, shape,
-            TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER})) {
+            TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER,
+                             layout})) {
       return TensorStorageType::IMAGE_BUFFER;
     } else {
       return TensorStorageType::BUFFER;
@@ -131,7 +134,8 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
     if (device.SupportsTextureArray() &&
         CanCreateTensorWithShape(
             context, device, shape,
-            TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY})) {
+            TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY,
+                             layout})) {
       return TensorStorageType::TEXTURE_ARRAY;
     } else {
       return GetBestTypeAfterTextureArray();
@@ -140,7 +144,8 @@ TensorStorageType SelectBestStorageType(const CLContext& context,
   auto GetBestTypeAfterTexture3D = [&]() {
     if (CanCreateTensorWithShape(
             context, device, shape,
-            TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D})) {
+            TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D,
+                             layout})) {
       return TensorStorageType::TEXTURE_2D;
     } else {
       return GetBestTypeAfterTexture2D();
@@ -256,20 +261,21 @@ void InferenceContext::ReserveGraphTensors(
   for (auto& t : tensors) {
     TensorStorageType storage_type = create_info.storage_type;
     const auto shape = graph.GetValue(t->id)->tensor.shape;
+    Layout layout = shape.b == 1 ? Layout::HWC : Layout::BHWC;
     if (graph.IsGraphInput(t->id) || graph.IsGraphOutput(t->id)) {
       if (shape.c < 4 &&
           CanCreateTensorWithShape(
               *creation_context.context, *creation_context.device, shape,
-              TensorDescriptor{data_type,
-                               TensorStorageType::SINGLE_TEXTURE_2D})) {
+              TensorDescriptor{data_type, TensorStorageType::SINGLE_TEXTURE_2D,
+                               layout})) {
         storage_type = TensorStorageType::SINGLE_TEXTURE_2D;
       }
     }
     storage_type = SelectBestStorageType(*creation_context.context,
                                          *creation_context.device, shape,
-                                         storage_type, data_type);
-    tensor_reserver_.Add(t->id,
-                         {shape, TensorDescriptor{data_type, storage_type}});
+                                         storage_type, data_type, layout);
+    tensor_reserver_.Add(
+        t->id, {shape, TensorDescriptor{data_type, storage_type, layout}});
     max_id = std::max(max_id, t->id);
   }
   tensor_reserver_.SetNext(max_id + 1);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/add_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/add_test.cc
index 616aa6f7966..1eccab87646 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/add_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/add_test.cc
@@ -45,9 +45,9 @@ TEST_F(OpenCLOperationTest, AddTwoEqualTensors) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Add operation = CreateAdd(op_def, channels, channels[0]);
       ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
@@ -73,9 +73,9 @@ TEST_F(OpenCLOperationTest, AddFirstTensorHasMoreChannelsThanSecond) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Add operation = CreateAdd(op_def, channels, channels[0]);
       ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
@@ -103,9 +103,9 @@ TEST_F(OpenCLOperationTest, AddFirstTensorHasLessChannelsThanSecond) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Add operation = CreateAdd(op_def, channels, 6);
       ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/apply_mask_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/apply_mask_test.cc
index 5218b83136e..27c0b389412 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/apply_mask_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/apply_mask_test.cc
@@ -45,9 +45,9 @@ TEST_F(OpenCLOperationTest, ApplyMaskOneChannel) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ApplyMask operation =
           CreateApplyMask(op_def, src_tensor.shape, mask_tensor.shape);
@@ -75,9 +75,9 @@ TEST_F(OpenCLOperationTest, ApplyMaskEqualSizes) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ApplyMask operation =
           CreateApplyMask(op_def, src_tensor.shape, mask_tensor.shape);
@@ -105,9 +105,9 @@ TEST_F(OpenCLOperationTest, ApplyMaskVector) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ApplyMask operation =
           CreateApplyMask(op_def, src_tensor.shape, mask_tensor.shape);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc
index 441fbf4f890..eee4203ed1b 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc
@@ -47,9 +47,9 @@ TEST_F(OpenCLOperationTest, ConcatWidth) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConcatXY operation = CreateConcatXY(op_def, attr, 2);
       ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
@@ -79,9 +79,9 @@ TEST_F(OpenCLOperationTest, ConcatHeight) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConcatXY operation = CreateConcatXY(op_def, attr, 2);
       ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
@@ -112,10 +112,10 @@ TEST_F(OpenCLOperationTest, ConcatChannels) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConcatZ operation = CreateConcatZ(op_def, {1, 2, 3});
       ASSERT_OK(ExecuteGPUOperation({src0, src1, src2}, creation_context_,
@@ -146,9 +146,9 @@ TEST_F(OpenCLOperationTest, ConcatChannelsAlignedx4) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConcatZ operation = CreateConcatZ(op_def, {4, 4});
       ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1_test.cc
index b561975cd1a..c7d1bac2b0f 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1_test.cc
@@ -51,8 +51,10 @@ TEST_F(OpenCLOperationTest, ConvBuffer1x1SimpleWeights) {
     OperationDef op_def;
     op_def.precision = precision;
     auto data_type = DeduceDataTypeFromPrecision(precision);
-    op_def.src_tensors.push_back({data_type, TensorStorageType::BUFFER});
-    op_def.dst_tensors.push_back({data_type, TensorStorageType::BUFFER});
+    op_def.src_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
+    op_def.dst_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
     TensorFloat32 dst_tensor;
     ConvBuffer1x1 operation;
     ASSERT_OK(CreateConvBuffer1x1(creation_context_, op_def, attr, &operation));
@@ -84,8 +86,10 @@ TEST_F(OpenCLOperationTest, ConvBuffer1x1) {
     OperationDef op_def;
     op_def.precision = precision;
     auto data_type = DeduceDataTypeFromPrecision(precision);
-    op_def.src_tensors.push_back({data_type, TensorStorageType::BUFFER});
-    op_def.dst_tensors.push_back({data_type, TensorStorageType::BUFFER});
+    op_def.src_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
+    op_def.dst_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
     TensorFloat32 dst_tensor;
     ConvBuffer1x1 operation;
     ASSERT_OK(CreateConvBuffer1x1(creation_context_, op_def, attr, &operation));
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_test.cc
index 921af4d406b..2289600497e 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_test.cc
@@ -51,8 +51,10 @@ TEST_F(OpenCLOperationTest, ConvBufferSimpleWeights) {
     OperationDef op_def;
     op_def.precision = precision;
     auto data_type = DeduceDataTypeFromPrecision(precision);
-    op_def.src_tensors.push_back({data_type, TensorStorageType::BUFFER});
-    op_def.dst_tensors.push_back({data_type, TensorStorageType::BUFFER});
+    op_def.src_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
+    op_def.dst_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
     TensorFloat32 dst_tensor;
     ConvBuffer operation;
     ASSERT_OK(CreateConvBuffer(creation_context_, op_def, attr, &operation));
@@ -84,8 +86,10 @@ TEST_F(OpenCLOperationTest, ConvBuffer) {
     OperationDef op_def;
     op_def.precision = precision;
     auto data_type = DeduceDataTypeFromPrecision(precision);
-    op_def.src_tensors.push_back({data_type, TensorStorageType::BUFFER});
-    op_def.dst_tensors.push_back({data_type, TensorStorageType::BUFFER});
+    op_def.src_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
+    op_def.dst_tensors.push_back(
+        {data_type, TensorStorageType::BUFFER, Layout::HWC});
     TensorFloat32 dst_tensor;
     ConvBuffer operation;
     ASSERT_OK(CreateConvBuffer(creation_context_, op_def, attr, &operation));
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants_test.cc
index 3bb281a5554..015e862fa65 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants_test.cc
@@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, ConvConstantsSimpleWeights) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvConstants operation;
       ASSERT_OK(
@@ -88,8 +88,8 @@ TEST_F(OpenCLOperationTest, ConvConstants) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvConstants operation;
       ASSERT_OK(
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr_test.cc
index 90325ebbd30..b63a1dbc830 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr_test.cc
@@ -54,8 +54,8 @@ TEST_F(OpenCLOperationTest, ConvPowerVR1x1SimpleWeights) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvPowerVR operation;
       ASSERT_OK(CreateConvPowerVR(creation_context_, op_def, attr, &operation));
@@ -89,8 +89,8 @@ TEST_F(OpenCLOperationTest, ConvPowerVR1x1) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvPowerVR operation;
       ASSERT_OK(CreateConvPowerVR(creation_context_, op_def, attr, &operation));
@@ -124,8 +124,8 @@ TEST_F(OpenCLOperationTest, ConvPowerVRSimpleWeights) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvPowerVR operation;
       ASSERT_OK(CreateConvPowerVR(creation_context_, op_def, attr, &operation));
@@ -159,8 +159,8 @@ TEST_F(OpenCLOperationTest, ConvPowerVR) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvPowerVR operation;
       ASSERT_OK(CreateConvPowerVR(creation_context_, op_def, attr, &operation));
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc
index e38d82f222d..6b78d0a4078 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc
@@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, ConvTextureSimpleWeights) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvTexture operation;
       ASSERT_OK(CreateConvTexture(creation_context_, op_def, attr, &operation));
@@ -87,8 +87,8 @@ TEST_F(OpenCLOperationTest, ConvTexture) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvTexture operation;
       ASSERT_OK(CreateConvTexture(creation_context_, op_def, attr, &operation));
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc
index d78fe4e6bba..1d25605582a 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin_test.cc
@@ -51,8 +51,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3ThinSimpleWeights) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvolutionTransposed3x3Thin operation;
       ASSERT_OK(CreateConvolutionTransposed3x3Thin(creation_context_, op_def,
@@ -87,8 +87,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed3x3Thin) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvolutionTransposed3x3Thin operation;
       ASSERT_OK(CreateConvolutionTransposed3x3Thin(creation_context_, op_def,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4_test.cc
index 1f7feafbedf..97ee0b5702f 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4_test.cc
@@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed4x4) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvolutionTransposed4x4 operation;
       ASSERT_OK(CreateConvolutionTransposed4x4(creation_context_, op_def, attr,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_test.cc
index aa5a8c5c517..dca405c2c7f 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_test.cc
@@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedSimpleWeights) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvolutionTransposed operation;
       ASSERT_OK(CreateConvolutionTransposed(creation_context_, op_def, attr,
@@ -91,8 +91,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposed) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvolutionTransposed operation;
       ASSERT_OK(CreateConvolutionTransposed(creation_context_, op_def, attr,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin_test.cc
index 4e9676cfe2a..36fdf9f2fe9 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin_test.cc
@@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedThinSimpleWeights) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvolutionTransposedThin operation;
       ASSERT_OK(CreateConvolutionTransposedThin(creation_context_, op_def, attr,
@@ -91,8 +91,8 @@ TEST_F(OpenCLOperationTest, ConvolutionTransposedThin) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ConvolutionTransposedThin operation;
       ASSERT_OK(CreateConvolutionTransposedThin(creation_context_, op_def, attr,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc
index 5f1c864028c..eafa94f15d0 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc
@@ -53,8 +53,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConv3x3SimpleWeights) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       DepthWiseConv3x3 operation;
       ASSERT_OK(
@@ -90,8 +90,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConv3x3) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       DepthWiseConv3x3 operation;
       ASSERT_OK(
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_test.cc
index f5564712ad5..71b546bf384 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_test.cc
@@ -52,8 +52,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvSimpleWeights) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       DepthWiseConvolution operation;
       ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
@@ -88,8 +88,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvNoMultiplier) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       DepthWiseConvolution operation;
       ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
@@ -125,8 +125,8 @@ TEST_F(OpenCLOperationTest, DepthWiseConvMultiplier2) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       DepthWiseConvolution operation;
       ASSERT_OK(CreateDepthWiseConvolution(creation_context_, op_def, attr,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc
index e1b2638d276..81b29bfab82 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc
@@ -41,8 +41,8 @@ TEST_F(OpenCLOperationTest, Abs) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseOneInput operation =
           CreateElementwiseOneInput(op_def, OperationType::ABS);
@@ -66,8 +66,8 @@ TEST_F(OpenCLOperationTest, Cos) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseOneInput operation =
           CreateElementwiseOneInput(op_def, OperationType::COS);
@@ -92,8 +92,8 @@ TEST_F(OpenCLOperationTest, HardSwish) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseOneInput operation =
           CreateElementwiseOneInput(op_def, OperationType::HARD_SWISH);
@@ -118,8 +118,8 @@ TEST_F(OpenCLOperationTest, Log) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseOneInput operation =
           CreateElementwiseOneInput(op_def, OperationType::LOG);
@@ -143,8 +143,8 @@ TEST_F(OpenCLOperationTest, Rsqrt) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseOneInput operation =
           CreateElementwiseOneInput(op_def, OperationType::RSQRT);
@@ -170,8 +170,8 @@ TEST_F(OpenCLOperationTest, Sigmoid) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseOneInput operation =
           CreateElementwiseOneInput(op_def, OperationType::SIGMOID);
@@ -194,8 +194,8 @@ TEST_F(OpenCLOperationTest, Sin) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseOneInput operation =
           CreateElementwiseOneInput(op_def, OperationType::SIN);
@@ -220,8 +220,8 @@ TEST_F(OpenCLOperationTest, Sqrt) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseOneInput operation =
           CreateElementwiseOneInput(op_def, OperationType::SQRT);
@@ -246,8 +246,8 @@ TEST_F(OpenCLOperationTest, Square) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseOneInput operation =
           CreateElementwiseOneInput(op_def, OperationType::SQUARE);
@@ -270,8 +270,8 @@ TEST_F(OpenCLOperationTest, Tanh) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseOneInput operation =
           CreateElementwiseOneInput(op_def, OperationType::TANH);
@@ -298,9 +298,9 @@ TEST_F(OpenCLOperationTest, Sub) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseTwoInput operation =
           CreateElementwiseTwoInput(op_def, OperationType::SUB);
@@ -326,9 +326,9 @@ TEST_F(OpenCLOperationTest, SquaredDiff) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseTwoInput operation =
           CreateElementwiseTwoInput(op_def, OperationType::SQUARED_DIFF);
@@ -354,9 +354,9 @@ TEST_F(OpenCLOperationTest, Div) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseTwoInput operation =
           CreateElementwiseTwoInput(op_def, OperationType::DIV);
@@ -382,9 +382,9 @@ TEST_F(OpenCLOperationTest, Pow) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ElementwiseTwoInput operation =
           CreateElementwiseTwoInput(op_def, OperationType::POW);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture_test.cc
index 98057623311..0457142d707 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_texture_test.cc
@@ -48,8 +48,8 @@ TEST_F(OpenCLOperationTest, FullyConnectedTexture) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       FullyConnectedTexture operation;
       ASSERT_OK(CreateFullyConnectedTexture(creation_context_, op_def, attr,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc
index 0220725bb12..6e1b858711a 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc
@@ -61,10 +61,10 @@ TEST_F(OpenCLOperationTest, LSTM) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::BHWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::BHWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::BHWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::BHWC});
       TensorFloat32 new_state;
       TensorFloat32 new_activ;
       LSTM operation = CreateLSTM(op_def);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc
index 613d5ca7299..c03cb4f89d7 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc
@@ -51,9 +51,9 @@ TEST_F(OpenCLOperationTest, MaxUnpooling) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       MaxUnpooling operation = CreateMaxUnpooling(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation({src_tensor, src_ind_tensor},
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/multiply_add_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/multiply_add_test.cc
index 920669a816b..00f1f8dc90c 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/multiply_add_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/multiply_add_test.cc
@@ -49,8 +49,8 @@ TEST_F(OpenCLOperationTest, MultiplyAddVectorMul) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       MultiplyAdd operation;
       ASSERT_OK(CreateMultiplyAdd(creation_context_, op_def, attr, &operation));
@@ -79,8 +79,8 @@ TEST_F(OpenCLOperationTest, MultiplyAddVectorAdd) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       MultiplyAdd operation;
       ASSERT_OK(CreateMultiplyAdd(creation_context_, op_def, attr, &operation));
@@ -106,8 +106,8 @@ TEST_F(OpenCLOperationTest, MultiplyAddScalarMul) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       MultiplyAdd operation;
       ASSERT_OK(CreateMultiplyAdd(creation_context_, op_def, attr, &operation));
@@ -133,8 +133,8 @@ TEST_F(OpenCLOperationTest, MultiplyAddScalarAdd) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       MultiplyAdd operation;
       ASSERT_OK(CreateMultiplyAdd(creation_context_, op_def, attr, &operation));
@@ -167,8 +167,8 @@ TEST_F(OpenCLOperationTest, MultiplyAddVectorMad) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       MultiplyAdd operation;
       ASSERT_OK(CreateMultiplyAdd(creation_context_, op_def, mul_attr, add_attr,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/padding_test.cc
index ace90c37bf4..0324a5f8ae3 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/padding_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding_test.cc
@@ -46,8 +46,8 @@ TEST_F(OpenCLOperationTest, PaddingAppendWidth) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Padding operation = CreatePadding(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -74,8 +74,8 @@ TEST_F(OpenCLOperationTest, PaddingPrependWidth) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Padding operation = CreatePadding(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -102,8 +102,8 @@ TEST_F(OpenCLOperationTest, PaddingAppendHeight) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Padding operation = CreatePadding(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -130,8 +130,8 @@ TEST_F(OpenCLOperationTest, PaddingPrependHeight) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Padding operation = CreatePadding(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -158,8 +158,8 @@ TEST_F(OpenCLOperationTest, PaddingAppendChannels) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Padding operation = CreatePadding(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -186,8 +186,8 @@ TEST_F(OpenCLOperationTest, PaddingPrependChannels) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Padding operation = CreatePadding(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -214,8 +214,8 @@ TEST_F(OpenCLOperationTest, PaddingComplex) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Padding operation = CreatePadding(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc
index 27448bce1b6..12efd56f5d2 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc
@@ -49,8 +49,8 @@ TEST_F(OpenCLOperationTest, AveragePooling) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Pooling operation = CreatePooling(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -78,8 +78,8 @@ TEST_F(OpenCLOperationTest, AveragePoolingNonEmptyPadding) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Pooling operation = CreatePooling(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -108,8 +108,8 @@ TEST_F(OpenCLOperationTest, MaxPooling) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Pooling operation = CreatePooling(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -138,9 +138,9 @@ TEST_F(OpenCLOperationTest, MaxPoolingIndices) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       TensorFloat32 dst_tensor_ind;
       Pooling operation = CreatePooling(op_def, attr);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc
index 50d5aabb47b..4b0006c7f32 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc
@@ -49,8 +49,8 @@ TEST_F(OpenCLOperationTest, PReLUAlpha) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       PReLU operation;
       ASSERT_OK(CreatePReLU(creation_context_, op_def, attr, &operation));
@@ -80,8 +80,8 @@ TEST_F(OpenCLOperationTest, PReLUAlphaClip) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       PReLU operation;
       ASSERT_OK(CreatePReLU(creation_context_, op_def, attr, &operation));
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc
index d9e2718bf18..cebc9886ba5 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc
@@ -46,8 +46,8 @@ TEST_F(OpenCLOperationTest, ReLUNoClipNoAlpha) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ReLU operation = CreateReLU(creation_context_, op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -73,8 +73,8 @@ TEST_F(OpenCLOperationTest, ReLUClip) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ReLU operation = CreateReLU(creation_context_, op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -100,8 +100,8 @@ TEST_F(OpenCLOperationTest, ReLUAlpha) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ReLU operation = CreateReLU(creation_context_, op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -127,8 +127,8 @@ TEST_F(OpenCLOperationTest, ReLUAlphaClip) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       ReLU operation = CreateReLU(creation_context_, op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshape_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshape_test.cc
index 62b38d8f1ef..8f08eaee4fb 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape_test.cc
@@ -42,8 +42,8 @@ TEST_F(OpenCLOperationTest, Reshape) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Reshape operation = CreateReshape(op_def);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4_test.cc
index 8813a5f5208..65b88a94218 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4_test.cc
@@ -42,8 +42,8 @@ TEST_F(OpenCLOperationTest, Reshapex4) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Reshapex4 operation = CreateReshapex4(op_def);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1_test.cc
index fc86b961857..85c36087552 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1_test.cc
@@ -45,8 +45,8 @@ TEST_F(OpenCLOperationTest, Softmax1x1) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Softmax1x1 operation = CreateSoftmax1x1(op_def);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax_test.cc
index 037115e4399..bab81432248 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax_test.cc
@@ -45,8 +45,8 @@ TEST_F(OpenCLOperationTest, Softmax) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Softmax operation = CreateSoftmax(op_def);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice_test.cc
index 61f7800272f..dd127151358 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice_test.cc
@@ -53,8 +53,8 @@ TEST_F(OpenCLOperationTest, StridedSlice) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       StridedSlice operation = CreateStridedSlice(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/transpose_test.cc
index 58cdd227a75..07e1b9d58aa 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose_test.cc
@@ -45,8 +45,8 @@ TEST_F(OpenCLOperationTest, Transpose) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Transpose operation = CreateTranspose(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/upsample_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/upsample_test.cc
index beafbb9eda7..e32065e7266 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/upsample_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/upsample_test.cc
@@ -47,8 +47,8 @@ TEST_F(OpenCLOperationTest, UpsampleBilinearAligned) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Upsample operation = CreateUpsample(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
@@ -78,8 +78,8 @@ TEST_F(OpenCLOperationTest, UpsampleBilinearNonAligned) {
       OperationDef op_def;
       op_def.precision = precision;
       auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage});
-      op_def.dst_tensors.push_back({data_type, storage});
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
       TensorFloat32 dst_tensor;
       Upsample operation = CreateUpsample(op_def, attr);
       ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_test.cc b/tensorflow/lite/delegates/gpu/cl/tensor_test.cc
index a8448e411f6..7c859c43e6e 100644
--- a/tensorflow/lite/delegates/gpu/cl/tensor_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/tensor_test.cc
@@ -89,110 +89,173 @@ Status Tensor5DGenericTest(const BHWDC& shape,
   return OkStatus();
 }
 
-Status TensorTests(const TensorDescriptor& descriptor, Environment* env) {
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(1, 6, 7, 3), descriptor, env));
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(1, 1, 4, 12), descriptor, env));
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(1, 6, 1, 7), descriptor, env));
+Status TensorTests(DataType data_type, TensorStorageType storage_type,
+                   Environment* env) {
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(1, 6, 7, 3), {data_type, storage_type, Layout::HWC}, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(1, 1, 4, 12), {data_type, storage_type, Layout::HWC}, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(1, 6, 1, 7), {data_type, storage_type, Layout::HWC}, env));
 
   // Batch tests
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(2, 6, 7, 3), descriptor, env));
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(4, 1, 4, 12), descriptor, env));
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(7, 6, 1, 7), descriptor, env));
-  RETURN_IF_ERROR(TensorGenericTest(BHWC(13, 7, 3, 3), descriptor, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(2, 6, 7, 3), {data_type, storage_type, Layout::BHWC}, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(4, 1, 4, 12), {data_type, storage_type, Layout::BHWC}, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(7, 6, 1, 7), {data_type, storage_type, Layout::BHWC}, env));
+  RETURN_IF_ERROR(TensorGenericTest(
+      BHWC(13, 7, 3, 3), {data_type, storage_type, Layout::BHWC}, env));
 
   // 5D tests with batch = 1
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(1, 6, 7, 4, 3), descriptor, env));
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(1, 1, 4, 3, 12), descriptor, env));
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(1, 6, 1, 7, 7), descriptor, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(1, 6, 7, 4, 3), {data_type, storage_type, Layout::HWDC}, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(1, 1, 4, 3, 12), {data_type, storage_type, Layout::HWDC}, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(1, 6, 1, 7, 7), {data_type, storage_type, Layout::HWDC}, env));
 
   // 5D tests
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(2, 6, 7, 1, 3), descriptor, env));
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(4, 1, 4, 2, 12), descriptor, env));
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(7, 6, 1, 3, 7), descriptor, env));
-  RETURN_IF_ERROR(Tensor5DGenericTest(BHWDC(13, 7, 3, 4, 3), descriptor, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(2, 6, 7, 1, 3), {data_type, storage_type, Layout::BHWDC}, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(4, 1, 4, 2, 12), {data_type, storage_type, Layout::BHWDC}, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(7, 6, 1, 3, 7), {data_type, storage_type, Layout::BHWDC}, env));
+  RETURN_IF_ERROR(Tensor5DGenericTest(
+      BHWDC(13, 7, 3, 4, 3), {data_type, storage_type, Layout::BHWDC}, env));
   return OkStatus();
 }
 
 TEST_F(OpenCLTest, BufferF32) {
-  ASSERT_OK(TensorTests({DataType::FLOAT32, TensorStorageType::BUFFER}, &env_));
+  ASSERT_OK(TensorTests(DataType::FLOAT32, TensorStorageType::BUFFER, &env_));
 }
 
 TEST_F(OpenCLTest, BufferF16) {
-  ASSERT_OK(TensorTests({DataType::FLOAT16, TensorStorageType::BUFFER}, &env_));
+  ASSERT_OK(TensorTests(DataType::FLOAT16, TensorStorageType::BUFFER, &env_));
 }
 
 TEST_F(OpenCLTest, Texture2DF32) {
   ASSERT_OK(
-      TensorTests({DataType::FLOAT32, TensorStorageType::TEXTURE_2D}, &env_));
+      TensorTests(DataType::FLOAT32, TensorStorageType::TEXTURE_2D, &env_));
 }
 
 TEST_F(OpenCLTest, Texture2DF16) {
   ASSERT_OK(
-      TensorTests({DataType::FLOAT16, TensorStorageType::TEXTURE_2D}, &env_));
+      TensorTests(DataType::FLOAT16, TensorStorageType::TEXTURE_2D, &env_));
 }
 
 TEST_F(OpenCLTest, Texture3DF32) {
   ASSERT_OK(
-      TensorTests({DataType::FLOAT32, TensorStorageType::TEXTURE_3D}, &env_));
+      TensorTests(DataType::FLOAT32, TensorStorageType::TEXTURE_3D, &env_));
 }
 
 TEST_F(OpenCLTest, Texture3DF16) {
   ASSERT_OK(
-      TensorTests({DataType::FLOAT16, TensorStorageType::TEXTURE_3D}, &env_));
+      TensorTests(DataType::FLOAT16, TensorStorageType::TEXTURE_3D, &env_));
 }
 
 TEST_F(OpenCLTest, TextureArrayF32) {
-  ASSERT_OK(TensorTests({DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY},
-                        &env_));
+  ASSERT_OK(
+      TensorTests(DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY, &env_));
 }
 
 TEST_F(OpenCLTest, TextureArrayF16) {
-  ASSERT_OK(TensorTests({DataType::FLOAT16, TensorStorageType::TEXTURE_ARRAY},
-                        &env_));
+  ASSERT_OK(
+      TensorTests(DataType::FLOAT16, TensorStorageType::TEXTURE_ARRAY, &env_));
 }
 
 TEST_F(OpenCLTest, ImageBufferF32) {
   ASSERT_OK(
-      TensorTests({DataType::FLOAT32, TensorStorageType::IMAGE_BUFFER}, &env_));
+      TensorTests(DataType::FLOAT32, TensorStorageType::IMAGE_BUFFER, &env_));
 }
 
 TEST_F(OpenCLTest, ImageBufferF16) {
   ASSERT_OK(
-      TensorTests({DataType::FLOAT16, TensorStorageType::IMAGE_BUFFER}, &env_));
+      TensorTests(DataType::FLOAT16, TensorStorageType::IMAGE_BUFFER, &env_));
 }
 
 TEST_F(OpenCLTest, SingleTextureF32) {
   ASSERT_OK(TensorGenericTest(
       BHWC(1, 6, 14, 1),
-      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWC},
+      &env_));
   ASSERT_OK(TensorGenericTest(
       BHWC(1, 6, 14, 2),
-      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWC},
+      &env_));
 
   // Batch tests
   ASSERT_OK(TensorGenericTest(
       BHWC(7, 6, 14, 1),
-      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWC},
+      &env_));
   ASSERT_OK(TensorGenericTest(
       BHWC(3, 6, 14, 2),
-      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWC},
+      &env_));
+
+  // 5D tests with batch = 1
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(1, 6, 14, 7, 1),
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWDC},
+      &env_));
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(1, 6, 14, 4, 2),
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWDC},
+      &env_));
+
+  // 5D tests
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(7, 6, 14, 5, 1),
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWDC},
+      &env_));
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(3, 6, 14, 3, 2),
+      {DataType::FLOAT32, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWDC},
+      &env_));
 }
 
 TEST_F(OpenCLTest, SingleTextureF16) {
   ASSERT_OK(TensorGenericTest(
       BHWC(1, 6, 3, 1),
-      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWC},
+      &env_));
   ASSERT_OK(TensorGenericTest(
       BHWC(1, 6, 3, 2),
-      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWC},
+      &env_));
 
   // Batch tests
   ASSERT_OK(TensorGenericTest(
       BHWC(7, 6, 3, 1),
-      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWC},
+      &env_));
   ASSERT_OK(TensorGenericTest(
       BHWC(3, 6, 3, 2),
-      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D}, &env_));
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWC},
+      &env_));
+
+  // 5D tests with batch = 1
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(1, 6, 14, 7, 1),
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWDC},
+      &env_));
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(1, 6, 14, 4, 2),
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::HWDC},
+      &env_));
+
+  // 5D tests
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(7, 6, 14, 5, 1),
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWDC},
+      &env_));
+  ASSERT_OK(Tensor5DGenericTest(
+      BHWDC(3, 6, 14, 3, 2),
+      {DataType::FLOAT16, TensorStorageType::SINGLE_TEXTURE_2D, Layout::BHWDC},
+      &env_));
 }
 
 }  // namespace
diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_type.h b/tensorflow/lite/delegates/gpu/cl/tensor_type.h
index f576ea88090..9d98d38900f 100644
--- a/tensorflow/lite/delegates/gpu/cl/tensor_type.h
+++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
 
 namespace tflite {
 namespace gpu {
@@ -36,14 +37,23 @@ enum class TensorStorageType {
 };
 
 struct TensorDescriptor {
-  DataType data_type;
-  TensorStorageType storage_type;
+  TensorDescriptor() = default;
+  TensorDescriptor(DataType dt, TensorStorageType st, Layout l)
+      : data_type(dt), storage_type(st), layout(l) {}
 
   bool operator==(const TensorDescriptor& d) const {
-    return data_type == d.data_type && storage_type == d.storage_type;
+    return data_type == d.data_type && storage_type == d.storage_type &&
+           layout == d.layout;
   }
 
   bool operator!=(const TensorDescriptor& d) const { return !(*this == d); }
+
+  DataType data_type = DataType::UNKNOWN;
+  TensorStorageType storage_type = TensorStorageType::UNKNOWN;
+  // This field describes logical layout, actual(physical) GPU layout can be
+  // totally different.
+  Layout layout =
+      Layout::UNKNOWN;  // Supported layouts is HWC, BHWC, HWDC, BHWDC
 };
 
 std::string ToString(TensorStorageType type);