diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
index 74fb31aaa3c..f31a2ddfe71 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
@@ -169,7 +169,8 @@ Status ConvPowerVR::Tune(const TuningParameters& params) {
   if (conv_params_.weights_upload_type ==
           WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP ||
       conv_params_.weights_upload_type ==
-          WeightsUploadType::LOCAL_MEM_BY_THREADS) {
+          WeightsUploadType::LOCAL_MEM_BY_THREADS ||
+      conv_params_.fixed_work_group_size) {
     return OkStatus();
   }
   if (conv_params_.work_group_launch_order[0] == 0 &&
@@ -212,9 +213,15 @@ std::string GenerateConvPowerVR1x1(
       conv_params.weights_upload_type ==
           ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP;
 
+  const std::string weights_space =
+      conv_params.weights_upload_type ==
+              ConvPowerVR::WeightsUploadType::CONSTANT_MEM
+          ? "__constant"
+          : "__global";
+
   const int3 work_group_size = conv_params.work_group_size;
   const int3 block_size = conv_params.block_size;
-  if (need_local_mem) {  // we use fixed workgroup size when use local mem
+  if (conv_params.fixed_work_group_size) {
     c += "__attribute__((reqd_work_group_size(" +
          std::to_string(work_group_size.x) + ", " +
          std::to_string(work_group_size.y) + ", " +
@@ -222,8 +229,8 @@ std::string GenerateConvPowerVR1x1(
   }
   c += "__kernel void main_function(\n";
   c += src_tensor.GetDeclaration(AccessType::READ) + ",\n";
-  c += "    __global ACCUM_FLT4* filters_buffer,    \n";
-  c += "    __global ACCUM_FLT4* biases             \n";
+  c += "    " + weights_space + " ACCUM_FLT4* filters_buffer,    \n";
+  c += "    " + weights_space + " ACCUM_FLT4* biases             \n";
   c += GetArgsDeclaration(linked_operations);
   c += dst_tensor.GetDeclaration(AccessType::WRITE) + ",\n";
   if (!is1x1) {
@@ -301,14 +308,18 @@ std::string GenerateConvPowerVR1x1(
          "];\n";
   }
   if (conv_params.weights_upload_type ==
-      ConvPowerVR::WeightsUploadType::GLOBAL_MEM) {
-    c += "    __global ACCUM_FLT4* weights_cache;\n";
+          ConvPowerVR::WeightsUploadType::GLOBAL_MEM ||
+      conv_params.weights_upload_type ==
+          ConvPowerVR::WeightsUploadType::CONSTANT_MEM) {
+    c += "    " + weights_space + " ACCUM_FLT4* weights_cache;\n";
   }
   if (is1x1) {
-    c += "  __global ACCUM_FLT4* filters_loc = filters_buffer + Z * 4 * "
+    c += "  " + weights_space +
+         " ACCUM_FLT4* filters_loc = filters_buffer + Z * 4 * "
          "src_size.z;\n";
   } else {
-    c += "  __global ACCUM_FLT4* filters_loc = filters_buffer + Z * 4 * "
+    c += "  " + weights_space +
+         " ACCUM_FLT4* filters_loc = filters_buffer + Z * 4 * "
          "src_size.z * kernel_dilation.x * kernel_dilation.y;\n";
   }
   if (buffer_type) {
@@ -445,7 +456,7 @@ std::string GenerateConvPowerVR1x1(
         "weights_cache", "filters_loc",
         /*global_offset_name*/ "", "lid", total_work_items,
         block_size.z * 4 * conv_params.src_depth_loop_size);
-  } else {  // GLOBAL_MEM
+  } else {  // GLOBAL_MEM/CONSTANT_MEM
     c += "    weights_cache = filters_loc;\n";
   }
   read_src();
@@ -477,7 +488,7 @@ std::string GenerateConvPowerVR1x1(
     c += GenerateUploadByThreads("weights_cache", "biases", "Z", "lid",
                                  total_work_items, block_size.z);
     c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
-  } else {  // GLOBAL_MEM
+  } else {  // GLOBAL_MEM/CONSTANT_MEM
     c += "    weights_cache = biases + Z;\n";
   }
   if (need_local_mem) {
@@ -528,6 +539,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
     conv_params.block_size = int3(1, 1, 4);
     conv_params.work_group_size = int3(8, 4, 1);
     conv_params.work_group_launch_order = int3(2, 0, 1);
+    conv_params.fixed_work_group_size = true;
     conv_params.src_depth_loop_size = 1;
     conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
     if (dst_depth % 4 == 0 || dst_depth >= 8) {
@@ -547,6 +559,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
     conv_params.block_size = int3(1, 1, 4);
     conv_params.work_group_size = int3(8, 4, 1);
     conv_params.work_group_launch_order = int3(2, 0, 1);
+    conv_params.fixed_work_group_size = true;
     conv_params.src_depth_loop_size = 1;
     conv_params.weights_upload_type =
         WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP;
@@ -581,10 +594,33 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
       conv_params.block_size.x = 2;
       conv_params.work_group_size = int3(4, 8, 1);
     }
+  } else if (device.IsAMD()) {
+    conv_params.block_size = int3(2, 1, 1);
+    if (x_kernel_is_1 && y_kernel_is_1) {
+      conv_params.block_size.y = 2;
+    }
+    conv_params.work_group_size = int3(8, 4, 1);
+    conv_params.work_group_launch_order = int3(2, 0, 1);
+    conv_params.fixed_work_group_size = true;
+    conv_params.src_depth_loop_size = 1;
+    conv_params.weights_upload_type = WeightsUploadType::CONSTANT_MEM;
+    if (dst_depth % 8 == 0 || dst_depth >= 32) {
+      conv_params.block_size.z = 8;
+    } else if (dst_depth % 4 == 0 || dst_depth >= 8) {
+      conv_params.block_size.z = 4;
+    } else if (dst_depth % 2 == 0 || dst_depth >= 4) {
+      conv_params.block_size.z = 2;
+    } else {
+      conv_params.block_size.z = 1;
+    }
+    if (src_depth % 2 == 0 && src_depth >= 16) {
+      conv_params.src_depth_loop_size = 2;
+    }
   } else {
     conv_params.block_size = int3(1, 1, 4);
     conv_params.work_group_size = int3(8, 4, 1);
     conv_params.work_group_launch_order = int3(0, 1, 2);
+    conv_params.fixed_work_group_size = false;
     conv_params.src_depth_loop_size = 1;
     conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
     if (dst_depth % 4 == 0 || dst_depth >= 8) {
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
index 3ba1b1e72fc..ed66a3b0dad 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
@@ -54,12 +54,14 @@ class ConvPowerVR : public GPUOperation {
     LOCAL_MEM_ASYNC_SUBGROUP,  // we use it for PowerVR with workgroup size = 32
     LOCAL_MEM_BY_THREADS,
     GLOBAL_MEM,
+    CONSTANT_MEM,
   };
 
   struct ConvParams {
     int3 block_size;
     int3 work_group_size;
     int3 work_group_launch_order;
+    bool fixed_work_group_size;
     int src_depth_loop_size;
     WeightsUploadType weights_upload_type;
     bool x_kernel_is_1;
diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
index 8e666ce3904..a70aa3f9901 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
@@ -106,6 +106,7 @@ Status SelectConvolution(const Convolution2DAttributes& attr,
       return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def,
                                      hints, ptr);
     case Vendor::POWERVR:
+    case Vendor::AMD:
       return SelectConvolutionPowerVR(attr, creation_context, op_def, ptr);
     case Vendor::NVIDIA:
       return SelectConvolutionNVidia(attr, creation_context, op_def, ptr);