diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
index 31f87835674..79df26c8f34 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
@@ -275,6 +275,7 @@ cc_library(
     srcs = ["convolution_transposed.cc"],
     hdrs = ["convolution_transposed.h"],
     deps = [
+        ":conv_common",
         ":gpu_operation",
         ":util",
         ":work_group_picking",
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
index 53ffa1f577f..99df2689b95 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc
@@ -31,11 +31,10 @@ namespace cl {
 
 ConvolutionTransposed::ConvolutionTransposed(
     const OperationDef& definition, const ConvolutionTransposedAttributes& attr,
-    const GpuInfo& gpu_info)
+    const GpuInfo& gpu_info, bool weights_are_buffer)
     : GPUOperation(definition),
       stride_(attr.stride.w, attr.stride.h, 1, 1),
       block_size_(2, 2, 1, 2) {
-  const bool weights_are_buffer = gpu_info.IsMali();
   const bool is_f16 = definition.precision == CalculationsPrecision::F16;
   if (gpu_info.IsMali()) {
     if (gpu_info.mali_info.IsMidgard()) {
@@ -60,16 +59,15 @@ ConvolutionTransposed::ConvolutionTransposed(
   args_.AddInt("kernel_size_y", attr.weights.shape.h);
   code_ = GenerateConvolutionTransposedCode(definition_, gpu_info,
                                             weights_are_buffer, block_size_);
-  UploadWeights(attr.weights, weights_are_buffer);
 }
 
 ConvolutionTransposed::ConvolutionTransposed(
     const OperationDef& definition,
-    const ConvolutionTransposed3DAttributes& attr, const GpuInfo& gpu_info)
+    const ConvolutionTransposed3DAttributes& attr, const GpuInfo& gpu_info,
+    bool weights_are_buffer)
     : GPUOperation(definition),
       stride_(attr.stride.w, attr.stride.h, attr.stride.d, 1),
       block_size_(2, 2, 1, 2) {
-  const bool weights_are_buffer = gpu_info.IsMali();
   const bool is_f16 = definition.precision == CalculationsPrecision::F16;
   if (gpu_info.IsMali()) {
     if (gpu_info.mali_info.IsMidgard()) {
@@ -98,7 +96,6 @@ ConvolutionTransposed::ConvolutionTransposed(
   args_.AddInt("grid_size_y");
   code_ = GenerateConvolutionTransposedCode(definition_, gpu_info,
                                             weights_are_buffer, block_size_);
-  UploadWeights(attr.weights, weights_are_buffer);
 }
 
 ConvolutionTransposed::ConvolutionTransposed(ConvolutionTransposed&& operation)
@@ -124,6 +121,15 @@ std::string ConvolutionTransposed::GenerateConvolutionTransposedCode(
   AddSrcTensor("src_tensor", src_desc);
   AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
 
+  if (op_def.src_tensors.size() == 2) {
+    // dynamic weights
+    BufferDescriptor desc;
+    desc.element_type = op_def.src_tensors[1].data_type;
+    desc.element_size = 16;
+    desc.memory_type = MemoryType::GLOBAL;
+    AddSrcBuffer("weights", desc);
+  }
+
   const auto& src_def = op_def.src_tensors[0];
 
   std::string c = GetCommonDefines(op_def.precision);
@@ -544,7 +550,9 @@ void ConvolutionTransposed::GetPossibleKernelWorkGroups(
 ConvolutionTransposed CreateConvolutionTransposed(
     const GpuInfo& gpu_info, const OperationDef& definition,
     const ConvolutionTransposedAttributes& attr) {
-  ConvolutionTransposed result(definition, attr, gpu_info);
+  const bool weights_are_buffer = gpu_info.IsMali();
+  ConvolutionTransposed result(definition, attr, gpu_info, weights_are_buffer);
+  result.UploadWeights(attr.weights, weights_are_buffer);
 
   TensorLinearDescriptor desc;
   desc.storage_type =
@@ -559,7 +567,25 @@ ConvolutionTransposed CreateConvolutionTransposed(
 ConvolutionTransposed CreateConvolutionTransposed3D(
     const GpuInfo& gpu_info, const OperationDef& definition,
     const ConvolutionTransposed3DAttributes& attr) {
-  ConvolutionTransposed result(definition, attr, gpu_info);
+  const bool weights_are_buffer = gpu_info.IsMali();
+  ConvolutionTransposed result(definition, attr, gpu_info, weights_are_buffer);
+  result.UploadWeights(attr.weights, weights_are_buffer);
+
+  TensorLinearDescriptor desc;
+  desc.storage_type =
+      DeduceLinearStorageType(definition.GetPrimaryStorageType());
+  desc.element_type = definition.GetDataType();
+  desc.UploadLinearData(attr.bias);
+  result.args_.AddObject(
+      "biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
+  return result;
+}
+
+ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
+    const GpuInfo& gpu_info, const OperationDef& definition,
+    const ConvolutionTransposedAttributes& attr) {
+  const bool weights_are_buffer = true;
+  ConvolutionTransposed result(definition, attr, gpu_info, weights_are_buffer);
 
   TensorLinearDescriptor desc;
   desc.storage_type =
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h
index 05dee07ba21..c25c916cc56 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/conv_common.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
 #include "tensorflow/lite/delegates/gpu/cl/linear_storage.h"
@@ -53,6 +54,13 @@ class ConvolutionTransposed : public GPUOperation {
   ConvolutionTransposed(const ConvolutionTransposed&) = delete;
   ConvolutionTransposed& operator=(const ConvolutionTransposed&) = delete;
 
+  ConvWeightsDescription GetConvWeightsDescription() const {
+    ConvWeightsDescription desc;
+    desc.layout = ConvWeightsLayout::kOHWIOGroupI4O4;
+    desc.output_group_size = block_size_.w;
+    return desc;
+  }
+
  private:
   friend ConvolutionTransposed CreateConvolutionTransposed(
       const GpuInfo& gpu_info, const OperationDef& definition,
@@ -60,12 +68,16 @@ class ConvolutionTransposed : public GPUOperation {
   friend ConvolutionTransposed CreateConvolutionTransposed3D(
       const GpuInfo& gpu_info, const OperationDef& definition,
       const ConvolutionTransposed3DAttributes& attr);
+  friend ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
+      const GpuInfo& gpu_info, const OperationDef& definition,
+      const ConvolutionTransposedAttributes& attr);
+
   ConvolutionTransposed(const OperationDef& definition,
                         const ConvolutionTransposedAttributes& attr,
-                        const GpuInfo& gpu_info);
+                        const GpuInfo& gpu_info, bool weights_are_buffer);
   ConvolutionTransposed(const OperationDef& definition,
                         const ConvolutionTransposed3DAttributes& attr,
-                        const GpuInfo& gpu_info);
+                        const GpuInfo& gpu_info, bool weights_are_buffer);
 
   template <DataType T>
   void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
@@ -213,6 +225,10 @@ ConvolutionTransposed CreateConvolutionTransposed3D(
     const GpuInfo& gpu_info, const OperationDef& definition,
     const ConvolutionTransposed3DAttributes& attr);
 
+ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
+    const GpuInfo& gpu_info, const OperationDef& definition,
+    const ConvolutionTransposedAttributes& attr);
+
 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite