Serialization of OpenCL InferenceContext.

PiperOrigin-RevId: 337185119 Change-Id: I3841fd093a692a4acd851792f723381fd29e53bc
2020-10-14 15:26:13 -07:00 · 2020-10-14 15:26:13 -07:00 · 465aeca042
commit 465aeca042
parent 9318f787e6
12 changed files with 1527 additions and 29 deletions
--- a/tensorflow/lite/delegates/gpu/cl/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/BUILD
@ -55,6 +55,7 @@ cc_library(
        ":cl_device",
        ":gpu_object",
        ":opencl_wrapper",
+        ":serialization_cc_fbs",
        ":tensor_type",
        ":util",
        "//tensorflow/lite/delegates/gpu/common:access_type",
@ -358,6 +359,7 @@ cc_library(
    deps = [
        ":cl_context",
        ":opencl_wrapper",
+        ":serialization_cc_fbs",
        "//tensorflow/lite/delegates/gpu/common:access_type",
        "//tensorflow/lite/delegates/gpu/common:data_type",
        "//tensorflow/lite/delegates/gpu/common:status",
@ -366,19 +368,30 @@ cc_library(

 cc_library(
    name = "inference_context",
-    srcs = ["inference_context.cc"],
-    hdrs = ["inference_context.h"],
+    srcs = [
+        "inference_context.cc",
+        "serialization.cc",
+    ],
+    hdrs = [
+        "inference_context.h",
+        "serialization.h",
+    ],
    deps = [
+        ":arguments",
        ":buffer",
        ":cl_command_queue",
+        ":cl_context",
        ":cl_device",
        ":environment",
        ":gpu_object",
+        ":linear_storage",
        ":model_hints",
        ":opencl_wrapper",
        ":precision",
+        ":serialization_cc_fbs",
        ":storage_type_util",
        ":tensor_type",
+        ":texture2d",
        "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
        "//tensorflow/lite/delegates/gpu/cl/selectors:operation_selector",
        "//tensorflow/lite/delegates/gpu/cl/selectors:special_selector",
@ -396,6 +409,7 @@ cc_library(
        "//tensorflow/lite/delegates/gpu/common/transformations:merge_padding_with",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/types:span",
    ],
 )

@ -467,6 +481,14 @@ cc_library(
    ],
 )

+flatbuffer_cc_library(
+    name = "serialization_cc_fbs",
+    srcs = ["serialization.fbs"],
+    flatc_args = [
+        "--scoped-enums",
+    ],
+)
+
 cc_library(
    name = "storage_type_util",
    srcs = ["storage_type_util.cc"],
--- a/tensorflow/lite/delegates/gpu/cl/arguments.h
+++ b/tensorflow/lite/delegates/gpu/cl/arguments.h
@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
 #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
 #include "tensorflow/lite/delegates/gpu/cl/util.h"
 #include "tensorflow/lite/delegates/gpu/common/access_type.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
@ -77,6 +78,11 @@ class Arguments : public ArgumentsBinder {
  ~Arguments() override = default;

 private:
+  friend flatbuffers::Offset<data::Arguments> Encode(
+      const Arguments& args, flatbuffers::FlatBufferBuilder* builder);
+  friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
+                             Arguments* args);
+
  void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
  void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
  void AddImage2DArray(const std::string& name,
--- a/tensorflow/lite/delegates/gpu/cl/gpu_object.h
+++ b/tensorflow/lite/delegates/gpu/cl/gpu_object.h
@ -23,6 +23,7 @@ limitations under the License.

 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
 #include "tensorflow/lite/delegates/gpu/common/access_type.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
@ -164,6 +165,10 @@ class GPUObjectDescriptor {
  AccessType GetAccess() const { return access_type_; }

 protected:
+  friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
+      const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
+  friend void Decode(const data::GPUObjectDescriptor* fb_obj,
+                     GPUObjectDescriptor* obj);
  mutable std::map<std::string, std::string> state_vars_;
  AccessType access_type_;
 };
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
@ -153,7 +153,7 @@ CLNode& CLNode::operator=(CLNode&& node) {

 absl::Status InferenceContext::InitFromGraph(
    const CreateInferenceInfo& create_info, const GraphFloat32& graph,
-    Environment* env) {
+    Environment* env, std::vector<uint8_t>* serialized_model) {
  CreationContext creation_context;
  creation_context.device = env->GetDevicePtr();
  creation_context.context = &env->context();
@ -182,10 +182,6 @@ absl::Status InferenceContext::InitFromGraph(
  RETURN_IF_ERROR(Compile(creation_context));
  RETURN_IF_ERROR(UpdateParams());

-  for (auto& node : nodes_) {
-    node.operation->args_.ReleaseCPURepresentation();
-  }
-
  TuningParameters tuning_parameters;
  tuning_parameters.queue = env->profiling_queue();
  tuning_parameters.info = &env->device().info_;
@ -201,14 +197,54 @@ absl::Status InferenceContext::InitFromGraph(
    }
  }
  RETURN_IF_ERROR(Tune(tuning_parameters));
+
+  if (serialized_model) {
+    flatbuffers::FlatBufferBuilder builder;
+    auto encoded_fb = Encode(*this, &builder);
+    data::FinishInferenceContextBuffer(builder, encoded_fb);
+    serialized_model->resize(builder.GetSize());
+    std::memcpy(serialized_model->data(), builder.GetBufferPointer(),
+                builder.GetSize());
+  }
+  for (auto& node : nodes_) {
+    node.operation->args_.ReleaseCPURepresentation();
+  }
+  return absl::OkStatus();
+}
+
+absl::Status InferenceContext::RestoreDeserialized(
+    const std::vector<uint8_t>& serialized_model, Environment* env) {
+  flatbuffers::Verifier verifier(serialized_model.data(),
+                                 serialized_model.size());
+  if (!data::VerifyInferenceContextBuffer(verifier)) {
+    return absl::DataLossError("Deserialization failed.");
+  }
+  auto decoded_fb = data::GetInferenceContext(serialized_model.data());
+  RETURN_IF_ERROR(Decode(&env->context(), decoded_fb, this));
+
+  CreationContext creation_context;
+  creation_context.device = env->GetDevicePtr();
+  creation_context.context = &env->context();
+  creation_context.queue = env->queue();
+  creation_context.cache = env->program_cache();
+
+  RETURN_IF_ERROR(AllocateMemory(creation_context.context));
+  BindMemoryToOperations();
+  for (auto& node : nodes_) {
+    RETURN_IF_ERROR(node.operation->CompileDeserialized(creation_context));
+  }
+  RETURN_IF_ERROR(UpdateParams());
+  for (auto& node : nodes_) {
+    node.operation->args_.ReleaseCPURepresentation();
+  }
  return absl::OkStatus();
 }

 absl::Status InferenceContext::InitFromGraphWithTransforms(
    const CreateInferenceInfo& create_info, GraphFloat32* graph,
-    Environment* env) {
+    Environment* env, std::vector<uint8_t>* serialized_model) {
  RETURN_IF_ERROR(RunGraphTransforms(graph));
-  RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env));
+  RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env, serialized_model));
  return absl::OkStatus();
 }

--- a/tensorflow/lite/delegates/gpu/cl/inference_context.h
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h
@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/model_hints.h"
 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
 #include "tensorflow/lite/delegates/gpu/cl/precision.h"
+#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/common/model.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
@ -65,14 +66,15 @@ class InferenceContext {
  };

  absl::Status InitFromGraph(const CreateInferenceInfo& create_info,
-                             const GraphFloat32& graph, Environment* env);
+                             const GraphFloat32& graph, Environment* env,
+                             std::vector<uint8_t>* serialized_model = nullptr);

  // Applies OpenCL-specific transformations to the graph before the
  // initialization. These transformations are either impossible or useless in
  // other backends.
  absl::Status InitFromGraphWithTransforms(
      const CreateInferenceInfo& create_info, GraphFloat32* graph,
-      Environment* env);
+      Environment* env, std::vector<uint8_t>* serialized_model = nullptr);

  absl::Status AddToQueue(CLCommandQueue* queue);
  absl::Status Profile(ProfilingCommandQueue* queue, ProfilingInfo* result);
@ -92,9 +94,19 @@ class InferenceContext {
  const std::vector<ValueId>& GetInputIds() const { return input_ids_; }
  const std::vector<ValueId>& GetOutputIds() const { return output_ids_; }

+  absl::Status RestoreDeserialized(const std::vector<uint8_t>& serialized_model,
+                                   Environment* env);
+
 private:
  enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };

+  friend flatbuffers::Offset<data::InferenceContext> Encode(
+      const InferenceContext& inference,
+      flatbuffers::FlatBufferBuilder* builder);
+  friend absl::Status Decode(CLContext* context,
+                             const data::InferenceContext* fb_inference,
+                             InferenceContext* inference);
+
  void CopyInAndOutIds(const GraphFloat32& graph);
  absl::Status ConvertOperations(const DeviceInfo& device_info,
                                 const GraphFloat32& graph, ModelHints hints);
@ -165,6 +177,32 @@ class InferenceContext {
    void SetNext(ValueId id) { next_ = id; }
    DummyTensor Get(ValueId id) { return reservations_[id]; }

+    std::vector<std::pair<ValueId, TensorDescriptor>> GetTensorDescs() const {
+      std::vector<std::pair<ValueId, TensorDescriptor>> result;
+      for (auto& v : reservations_) {
+        TensorDescriptor desc = v.second.descriptor;
+        desc.shape.b = v.second.shape.b;
+        desc.shape.h = v.second.shape.h;
+        desc.shape.w = v.second.shape.w;
+        desc.shape.d = 1;
+        desc.shape.c = v.second.shape.c;
+        result.push_back({v.first, desc});
+      }
+      return result;
+    }
+
+    void Add(const std::vector<std::pair<ValueId, TensorDescriptor>>& tensors) {
+      for (auto& v : tensors) {
+        DummyTensor dummy;
+        dummy.descriptor = v.second;
+        dummy.shape.b = v.second.shape.b;
+        dummy.shape.h = v.second.shape.h;
+        dummy.shape.w = v.second.shape.w;
+        dummy.shape.c = v.second.shape.c;
+        Add(v.first, dummy);
+      }
+    }
+
   private:
    absl::flat_hash_map<ValueId, DummyTensor> reservations_;
    ValueId next_;
--- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
@ -651,6 +651,7 @@ cc_library(
        "//tensorflow/lite/delegates/gpu/cl:device_info",
        "//tensorflow/lite/delegates/gpu/cl:precision",
        "//tensorflow/lite/delegates/gpu/cl:program_cache",
+        "//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs",
        "//tensorflow/lite/delegates/gpu/cl:tensor",
        "//tensorflow/lite/delegates/gpu/cl:tensor_type",
        "//tensorflow/lite/delegates/gpu/common:access_type",
--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc
@ -223,7 +223,8 @@ absl::Status GPUOperation::UpdateParams() {
  return absl::OkStatus();
 }

-absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
+absl::Status GPUOperation::AssembleCode(const DeviceInfo& device_info,
+                                        CLContext* context) {
  if (elementwise_) {
    auto src_desc =
        absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
@ -241,28 +242,35 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
    dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
    args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));

-    std::string code =
-        GetElementWiseCode(definition_, check_src_channels_size_);
    elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
-    RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context));
+    code_ = GetElementWiseCode(definition_, check_src_channels_size_);
+    RETURN_IF_ERROR(args_.AllocateObjects(context));
    RETURN_IF_ERROR(args_.TransformToCLCode(
-        creation_context.device->info_,
-        {{dst_tensors_names_[0], elementwise_code_}}, &code));
-    RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
-        code, "main_function", *creation_context.context,
-        *creation_context.device, &kernel_));
+        device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
  } else {
-    RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context));
+    RETURN_IF_ERROR(args_.AllocateObjects(context));
    RETURN_IF_ERROR(args_.TransformToCLCode(
-        creation_context.device->info_,
-        {{dst_tensors_names_[0], elementwise_code_}}, &code_));
-    RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
-        code_, "main_function", compiler_options_, *creation_context.context,
-        *creation_context.device, &kernel_));
+        device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
  }
+  return absl::OkStatus();
+}
+
+absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
+  RETURN_IF_ERROR(
+      AssembleCode(creation_context.GetDeviceInfo(), creation_context.context));
+  RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
+      code_, "main_function", compiler_options_, *creation_context.context,
+      *creation_context.device, &kernel_));
  return PostCompileCheck(creation_context.device->info_, kernel_.info_);
 }

+absl::Status GPUOperation::CompileDeserialized(
+    const CreationContext& creation_context) {
+  return creation_context.cache->GetOrCreateCLKernel(
+      code_, "main_function", compiler_options_, *creation_context.context,
+      *creation_context.device, &kernel_);
+}
+
 void GPUOperation::GetPossibleKernelWorkGroups(
    TuningType tuning_type, const DeviceInfo& device_info,
    const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
@ -329,7 +337,7 @@ int3 GPUOperation::GetGridSize() const {
    const int grid_z = 1;
    return int3(grid_x, grid_y, grid_z);
  }
-  return int3(0, 0, 0);
+  return grid_size_;
 }

 void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) {
--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
@ -30,6 +30,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
 #include "tensorflow/lite/delegates/gpu/cl/precision.h"
 #include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
+#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
@ -129,8 +130,12 @@ class GPUOperation {

  absl::Status Tune(const TuningParameters& params);

+  absl::Status AssembleCode(const DeviceInfo& device_info, CLContext* context);
+
  absl::Status Compile(const CreationContext& creation_context);

+  absl::Status CompileDeserialized(const CreationContext& creation_context);
+
  virtual absl::Status PostCompileCheck(const DeviceInfo& device_info,
                                        const KernelInfo& kernel_info) {
    return absl::OkStatus();
@ -164,6 +169,11 @@ class GPUOperation {
  bool check_src_channels_size_ = false;

 protected:
+  friend flatbuffers::Offset<data::GPUOperation> Encode(
+      const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
+  friend absl::Status Decode(CLContext* context,
+                             const data::GPUOperation* fb_op, GPUOperation* op);
+
  virtual absl::Status BindArguments(ArgumentsBinder* args) {
    return absl::OkStatus();
  }
--- a/tensorflow/lite/delegates/gpu/cl/serialization.cc
+++ b/tensorflow/lite/delegates/gpu/cl/serialization.cc
--- a/tensorflow/lite/delegates/gpu/cl/serialization.fbs
+++ b/tensorflow/lite/delegates/gpu/cl/serialization.fbs
@ -0,0 +1,278 @@
+// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace tflite.gpu.cl.data;
+
+table Int4 {
+  x:int32;
+  y:int32;
+  z:int32;
+  w:int32;
+}
+
+table Int3 {
+  x:int32;
+  y:int32;
+  z:int32;
+}
+
+table Int2 {
+  x:int32;
+  y:int32;
+}
+
+table IntValue {
+  name:string;
+  value:int32;
+  active:bool;
+  offset:uint32;
+}
+
+table FloatValue {
+  name:string;
+  value:float;
+  active:bool;
+  offset:uint32;
+}
+
+table HalfValue {
+  name:string;
+  value:float;
+  active:bool;
+  store_as_f32:bool;
+  offset:uint32;
+}
+
+enum AccessType : byte {
+  READ = 0,
+  WRITE = 1,
+  READ_WRITE = 2,
+}
+
+enum DataType : byte {
+  UNKNOWN = 0,
+  FLOAT32 = 1,
+  FLOAT16 = 2,
+}
+
+enum MemoryType : byte {
+  GLOBAL = 0,
+  CONSTANT = 1,
+  LOCAL = 2,
+}
+
+table StateVariable {
+  key:string;
+  value:string;
+}
+
+table GPUObjectDescriptor {
+  state_vars:[StateVariable];
+  access_type:AccessType;
+}
+
+table BufferDescriptor {
+  base_obj:GPUObjectDescriptor;
+  element_type:DataType;
+  element_size:int32;
+  memory_type:MemoryType;
+  attributes:[string];
+  size:int32;
+  data:[uint8];
+}
+
+table Texture2DDescriptor {
+  base_obj:GPUObjectDescriptor;
+  element_type:DataType;
+  normalized:bool;
+  normalized_type:DataType;
+  size:Int2;
+  data:[uint8];
+}
+
+enum LinearStorageType : byte {
+  BUFFER = 0,
+  TEXTURE_2D = 1,
+}
+
+table TensorLinearDescriptor {
+  base_obj:GPUObjectDescriptor;
+  storage_type:LinearStorageType;
+  element_type:DataType;
+  memory_type:MemoryType;
+  size:int32;
+  data:[uint8];
+}
+
+enum TensorStorageType : byte {
+  UNKNOWN = 0,
+  BUFFER = 1,
+  IMAGE_BUFFER = 2,
+  TEXTURE_2D = 3,
+  TEXTURE_3D = 4,
+  TEXTURE_ARRAY = 5,
+  SINGLE_TEXTURE_2D = 6,
+}
+
+enum Layout : byte {
+  UNKNOWN = 0,
+  HWC = 1,
+  BHWC = 2,
+  HWDC = 3,
+  BHWDC = 4,
+}
+
+table BHWDC {
+  b:int32;
+  h:int32;
+  w:int32;
+  d:int32;
+  c:int32;
+}
+
+table TensorDescriptor {
+  base_obj:GPUObjectDescriptor;
+  data_type:DataType;
+  storage_type:TensorStorageType;
+  layout:Layout;
+  shape:BHWDC;
+  data:[uint8];
+}
+
+table BufferDescriptorMapValue {
+  key:string;
+  value:BufferDescriptor;
+}
+
+table Texture2DDescriptorMapValue {
+  key:string;
+  value:Texture2DDescriptor;
+}
+
+table TensorLinearDescriptorMapValue {
+  key:string;
+  value:TensorLinearDescriptor;
+}
+
+table TensorDescriptorMapValue {
+  key:string;
+  value:TensorDescriptor;
+}
+
+table Arguments {
+  int_values:[IntValue];
+  shared_int4s:[int32];
+
+  float_values:[FloatValue];
+  shared_float4s:[float];
+
+  half_values:[HalfValue];
+  shared_half4s:[float];
+
+  buffer_refs:[BufferDescriptorMapValue];
+  texture2d_refs:[Texture2DDescriptorMapValue];
+  tensor_linear_refs:[TensorLinearDescriptorMapValue];
+  tensor_refs:[TensorDescriptorMapValue];
+
+  buffer_objects:[BufferDescriptorMapValue];
+  texture2d_objects:[Texture2DDescriptorMapValue];
+  tensor_linear_objects:[TensorLinearDescriptorMapValue];
+  tensor_objects:[TensorDescriptorMapValue];
+}
+
+enum CalculationsPrecision : byte {
+  F32 = 0,
+  F32_F16 = 1,
+  F16 = 2,
+}
+
+enum TensorToGrid : byte {
+  CUSTOM = 0,
+  WB_TO_X_HD_TO_Y_S_TO_Z = 1,
+  WB_TO_X_HD_TO_Y_Z_IS_1 = 2,
+  WB_TO_X_H_TO_Y_D_TO_Z = 3,
+  B_TO_X_Y_IS_1_Z_IS_1 = 4,
+}
+
+enum CompilerOptions : byte {
+  ADRENO_FULL_SIMD_LINE = 0,
+  ADRENO_MORE_WAVES = 1,
+  POWERVR_FP16 = 2,
+  CL_OPT_DISABLE = 3,
+  CL_2_0 = 4,
+  CL_3_0 = 5,
+}
+
+table OperationDef {
+  precision:CalculationsPrecision;
+  src_tensors:[TensorDescriptor];
+  dst_tensors:[TensorDescriptor];
+}
+
+table CompilerOption {
+  option:CompilerOptions;
+}
+
+table GPUOperation {
+  arguments:Arguments;
+  code:string;
+  work_group_size:Int3;
+  compiler_options:[CompilerOption];
+  tensor_to_grid:TensorToGrid;
+  elementwise:bool;
+  linkable:bool;
+  check_src_channels_size:bool;
+  definition:OperationDef;
+  grid_dimension:int32;
+  work_group_launch_order:Int3;
+  grid_size:Int3;
+  src_tensors_names:[string];
+  dst_tensors_names:[string];
+  work_groups_count:Int3;
+  linkable_count:int32;
+  elementwise_code:string;
+}
+
+table TensorDescWithId {
+  desc:TensorDescriptor;
+  id:int32;
+}
+
+table CLNode {
+  gpu_op:GPUOperation;
+  input_ids:[int32];
+  output_ids:[int32];
+  name:string;
+}
+
+table PairOfValueIds {
+  first:int32;
+  second:int32;
+}
+
+table InferenceContext {
+  need_flush:bool;
+  flush_periodically:bool;
+  flush_period:int32;
+  need_manual_release:bool;
+  precision:CalculationsPrecision;
+  storage_type:TensorStorageType;
+  nodes:[CLNode];
+  tensors:[TensorDescWithId];
+  input_ids:[int32];
+  variable_ids_and_refs:[PairOfValueIds];
+  output_ids:[int32];
+}
+
+root_type InferenceContext;
--- a/tensorflow/lite/delegates/gpu/cl/serialization.h
+++ b/tensorflow/lite/delegates/gpu/cl/serialization.h
@ -0,0 +1,42 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
+
+#include "absl/types/span.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+
+namespace tflite {
+namespace gpu {
+namespace cl {
+
+class InferenceContext;
+
+flatbuffers::Offset<data::InferenceContext> Encode(
+    const InferenceContext& inference, flatbuffers::FlatBufferBuilder* builder);
+
+absl::Status Decode(CLContext* context,
+                    const data::InferenceContext* fb_inference,
+                    InferenceContext* inference);
+
+}  // namespace cl
+}  // namespace gpu
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
--- a/tensorflow/lite/delegates/gpu/cl/tensor.cc
+++ b/tensorflow/lite/delegates/gpu/cl/tensor.cc
@ -605,8 +605,11 @@ absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
  descriptor_.layout = desc.layout;
  memory_owner_ = true;
  CLMemory memory;
-  RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_,
-                                       desc.data.data(), &memory));
+  uint8_t* data_ptr = desc.data.empty()
+                          ? nullptr
+                          : const_cast<unsigned char*>(desc.data.data());
+  RETURN_IF_ERROR(
+      AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
  memory_ = memory.Release();
  if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) {
    RETURN_IF_ERROR(CreateImageBufferFromBuffer(