Serialization of OpenCL InferenceContext.

PiperOrigin-RevId: 337185119 Change-Id: I3841fd093a692a4acd851792f723381fd29e53bc
2020-10-14 15:26:13 -07:00 · 2020-10-14 15:26:13 -07:00 · 465aeca042
commit 465aeca042
parent 9318f787e6
12 changed files with 1527 additions and 29 deletions
--- a/tensorflow/lite/delegates/gpu/cl/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/BUILD
@ -55,6 +55,7 @@ cc_library(
        ":cl_device",
        ":gpu_object",
        ":opencl_wrapper",
        ":serialization_cc_fbs",
        ":tensor_type",
        ":util",
        "//tensorflow/lite/delegates/gpu/common:access_type",
@ -358,6 +359,7 @@ cc_library(
    deps = [
        ":cl_context",
        ":opencl_wrapper",
        ":serialization_cc_fbs",
        "//tensorflow/lite/delegates/gpu/common:access_type",
        "//tensorflow/lite/delegates/gpu/common:data_type",
        "//tensorflow/lite/delegates/gpu/common:status",
@ -366,19 +368,30 @@ cc_library(
 cc_library(
    name = "inference_context",
-    srcs = ["inference_context.cc"],
+    srcs = [
-    hdrs = ["inference_context.h"],
+        "inference_context.cc",
        "serialization.cc",
    ],
    hdrs = [
        "inference_context.h",
        "serialization.h",
    ],
    deps = [
        ":arguments",
        ":buffer",
        ":cl_command_queue",
        ":cl_context",
        ":cl_device",
        ":environment",
        ":gpu_object",
        ":linear_storage",
        ":model_hints",
        ":opencl_wrapper",
        ":precision",
        ":serialization_cc_fbs",
        ":storage_type_util",
        ":tensor_type",
        ":texture2d",
        "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
        "//tensorflow/lite/delegates/gpu/cl/selectors:operation_selector",
        "//tensorflow/lite/delegates/gpu/cl/selectors:special_selector",
@ -396,6 +409,7 @@ cc_library(
        "//tensorflow/lite/delegates/gpu/common/transformations:merge_padding_with",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/types:span",
    ],
 )
@ -467,6 +481,14 @@ cc_library(
    ],
 )
 flatbuffer_cc_library(
    name = "serialization_cc_fbs",
    srcs = ["serialization.fbs"],
    flatc_args = [
        "--scoped-enums",
    ],
 )
 cc_library(
    name = "storage_type_util",
    srcs = ["storage_type_util.cc"],
--- a/tensorflow/lite/delegates/gpu/cl/arguments.h
+++ b/tensorflow/lite/delegates/gpu/cl/arguments.h
@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
 #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
 #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
 #include "tensorflow/lite/delegates/gpu/cl/util.h"
 #include "tensorflow/lite/delegates/gpu/common/access_type.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
@ -77,6 +78,11 @@ class Arguments : public ArgumentsBinder {
  ~Arguments() override = default;
 private:
  friend flatbuffers::Offset<data::Arguments> Encode(
      const Arguments& args, flatbuffers::FlatBufferBuilder* builder);
  friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
                             Arguments* args);
  void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
  void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
  void AddImage2DArray(const std::string& name,
--- a/tensorflow/lite/delegates/gpu/cl/gpu_object.h
+++ b/tensorflow/lite/delegates/gpu/cl/gpu_object.h
@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
 #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
 #include "tensorflow/lite/delegates/gpu/common/access_type.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
@ -164,6 +165,10 @@ class GPUObjectDescriptor {
  AccessType GetAccess() const { return access_type_; }
 protected:
  friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
      const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
  friend void Decode(const data::GPUObjectDescriptor* fb_obj,
                     GPUObjectDescriptor* obj);
  mutable std::map<std::string, std::string> state_vars_;
  AccessType access_type_;
 };
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
@ -153,7 +153,7 @@ CLNode& CLNode::operator=(CLNode&& node) {
 absl::Status InferenceContext::InitFromGraph(
    const CreateInferenceInfo& create_info, const GraphFloat32& graph,
-    Environment* env) {
+    Environment* env, std::vector<uint8_t>* serialized_model) {
  CreationContext creation_context;
  creation_context.device = env->GetDevicePtr();
  creation_context.context = &env->context();
@ -182,10 +182,6 @@ absl::Status InferenceContext::InitFromGraph(
  RETURN_IF_ERROR(Compile(creation_context));
  RETURN_IF_ERROR(UpdateParams());
  for (auto& node : nodes_) {
    node.operation->args_.ReleaseCPURepresentation();
  }
  TuningParameters tuning_parameters;
  tuning_parameters.queue = env->profiling_queue();
  tuning_parameters.info = &env->device().info_;
@ -201,14 +197,54 @@ absl::Status InferenceContext::InitFromGraph(
    }
  }
  RETURN_IF_ERROR(Tune(tuning_parameters));
  if (serialized_model) {
    flatbuffers::FlatBufferBuilder builder;
    auto encoded_fb = Encode(*this, &builder);
    data::FinishInferenceContextBuffer(builder, encoded_fb);
    serialized_model->resize(builder.GetSize());
    std::memcpy(serialized_model->data(), builder.GetBufferPointer(),
                builder.GetSize());
  }
  for (auto& node : nodes_) {
    node.operation->args_.ReleaseCPURepresentation();
  }
  return absl::OkStatus();
 }
 absl::Status InferenceContext::RestoreDeserialized(
    const std::vector<uint8_t>& serialized_model, Environment* env) {
  flatbuffers::Verifier verifier(serialized_model.data(),
                                 serialized_model.size());
  if (!data::VerifyInferenceContextBuffer(verifier)) {
    return absl::DataLossError("Deserialization failed.");
  }
  auto decoded_fb = data::GetInferenceContext(serialized_model.data());
  RETURN_IF_ERROR(Decode(&env->context(), decoded_fb, this));
  CreationContext creation_context;
  creation_context.device = env->GetDevicePtr();
  creation_context.context = &env->context();
  creation_context.queue = env->queue();
  creation_context.cache = env->program_cache();
  RETURN_IF_ERROR(AllocateMemory(creation_context.context));
  BindMemoryToOperations();
  for (auto& node : nodes_) {
    RETURN_IF_ERROR(node.operation->CompileDeserialized(creation_context));
  }
  RETURN_IF_ERROR(UpdateParams());
  for (auto& node : nodes_) {
    node.operation->args_.ReleaseCPURepresentation();
  }
  return absl::OkStatus();
 }
 absl::Status InferenceContext::InitFromGraphWithTransforms(
    const CreateInferenceInfo& create_info, GraphFloat32* graph,
-    Environment* env) {
+    Environment* env, std::vector<uint8_t>* serialized_model) {
  RETURN_IF_ERROR(RunGraphTransforms(graph));
-  RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env));
+  RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env, serialized_model));
  return absl::OkStatus();
 }
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.h
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h
@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/model_hints.h"
 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
 #include "tensorflow/lite/delegates/gpu/cl/precision.h"
 #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/common/model.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
@ -65,14 +66,15 @@ class InferenceContext {
  };
  absl::Status InitFromGraph(const CreateInferenceInfo& create_info,
-                             const GraphFloat32& graph, Environment* env);
+                             const GraphFloat32& graph, Environment* env,
                             std::vector<uint8_t>* serialized_model = nullptr);
  // Applies OpenCL-specific transformations to the graph before the
  // initialization. These transformations are either impossible or useless in
  // other backends.
  absl::Status InitFromGraphWithTransforms(
      const CreateInferenceInfo& create_info, GraphFloat32* graph,
-      Environment* env);
+      Environment* env, std::vector<uint8_t>* serialized_model = nullptr);
  absl::Status AddToQueue(CLCommandQueue* queue);
  absl::Status Profile(ProfilingCommandQueue* queue, ProfilingInfo* result);
@ -92,9 +94,19 @@ class InferenceContext {
  const std::vector<ValueId>& GetInputIds() const { return input_ids_; }
  const std::vector<ValueId>& GetOutputIds() const { return output_ids_; }
  absl::Status RestoreDeserialized(const std::vector<uint8_t>& serialized_model,
                                   Environment* env);
 private:
  enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };
  friend flatbuffers::Offset<data::InferenceContext> Encode(
      const InferenceContext& inference,
      flatbuffers::FlatBufferBuilder* builder);
  friend absl::Status Decode(CLContext* context,
                             const data::InferenceContext* fb_inference,
                             InferenceContext* inference);
  void CopyInAndOutIds(const GraphFloat32& graph);
  absl::Status ConvertOperations(const DeviceInfo& device_info,
                                 const GraphFloat32& graph, ModelHints hints);
@ -165,6 +177,32 @@ class InferenceContext {
    void SetNext(ValueId id) { next_ = id; }
    DummyTensor Get(ValueId id) { return reservations_[id]; }
    std::vector<std::pair<ValueId, TensorDescriptor>> GetTensorDescs() const {
      std::vector<std::pair<ValueId, TensorDescriptor>> result;
      for (auto& v : reservations_) {
        TensorDescriptor desc = v.second.descriptor;
        desc.shape.b = v.second.shape.b;
        desc.shape.h = v.second.shape.h;
        desc.shape.w = v.second.shape.w;
        desc.shape.d = 1;
        desc.shape.c = v.second.shape.c;
        result.push_back({v.first, desc});
      }
      return result;
    }
    void Add(const std::vector<std::pair<ValueId, TensorDescriptor>>& tensors) {
      for (auto& v : tensors) {
        DummyTensor dummy;
        dummy.descriptor = v.second;
        dummy.shape.b = v.second.shape.b;
        dummy.shape.h = v.second.shape.h;
        dummy.shape.w = v.second.shape.w;
        dummy.shape.c = v.second.shape.c;
        Add(v.first, dummy);
      }
    }
   private:
    absl::flat_hash_map<ValueId, DummyTensor> reservations_;
    ValueId next_;
--- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
@ -651,6 +651,7 @@ cc_library(
        "//tensorflow/lite/delegates/gpu/cl:device_info",
        "//tensorflow/lite/delegates/gpu/cl:precision",
        "//tensorflow/lite/delegates/gpu/cl:program_cache",
        "//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs",
        "//tensorflow/lite/delegates/gpu/cl:tensor",
        "//tensorflow/lite/delegates/gpu/cl:tensor_type",
        "//tensorflow/lite/delegates/gpu/common:access_type",
--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc
@ -223,7 +223,8 @@ absl::Status GPUOperation::UpdateParams() {
  return absl::OkStatus();
 }
-absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
+absl::Status GPUOperation::AssembleCode(const DeviceInfo& device_info,
                                        CLContext* context) {
  if (elementwise_) {
    auto src_desc =
        absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
@ -241,28 +242,35 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
    dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
    args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
    std::string code =
        GetElementWiseCode(definition_, check_src_channels_size_);
    elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
-    RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context));
+    code_ = GetElementWiseCode(definition_, check_src_channels_size_);
    RETURN_IF_ERROR(args_.AllocateObjects(context));
    RETURN_IF_ERROR(args_.TransformToCLCode(
-        creation_context.device->info_,
+        device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
        {{dst_tensors_names_[0], elementwise_code_}}, &code));
    RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
        code, "main_function", *creation_context.context,
        *creation_context.device, &kernel_));
  } else {
-    RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context));
+    RETURN_IF_ERROR(args_.AllocateObjects(context));
    RETURN_IF_ERROR(args_.TransformToCLCode(
-        creation_context.device->info_,
+        device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
        {{dst_tensors_names_[0], elementwise_code_}}, &code_));
    RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
        code_, "main_function", compiler_options_, *creation_context.context,
        *creation_context.device, &kernel_));
  }
  return absl::OkStatus();
 }
 absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
  RETURN_IF_ERROR(
      AssembleCode(creation_context.GetDeviceInfo(), creation_context.context));
  RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
      code_, "main_function", compiler_options_, *creation_context.context,
      *creation_context.device, &kernel_));
  return PostCompileCheck(creation_context.device->info_, kernel_.info_);
 }
 absl::Status GPUOperation::CompileDeserialized(
    const CreationContext& creation_context) {
  return creation_context.cache->GetOrCreateCLKernel(
      code_, "main_function", compiler_options_, *creation_context.context,
      *creation_context.device, &kernel_);
 }
 void GPUOperation::GetPossibleKernelWorkGroups(
    TuningType tuning_type, const DeviceInfo& device_info,
    const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
@ -329,7 +337,7 @@ int3 GPUOperation::GetGridSize() const {
    const int grid_z = 1;
    return int3(grid_x, grid_y, grid_z);
  }
-  return int3(0, 0, 0);
+  return grid_size_;
 }
 void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) {
--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
@ -30,6 +30,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
 #include "tensorflow/lite/delegates/gpu/cl/precision.h"
 #include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
 #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
@ -129,8 +130,12 @@ class GPUOperation {
  absl::Status Tune(const TuningParameters& params);
  absl::Status AssembleCode(const DeviceInfo& device_info, CLContext* context);
  absl::Status Compile(const CreationContext& creation_context);
  absl::Status CompileDeserialized(const CreationContext& creation_context);
  virtual absl::Status PostCompileCheck(const DeviceInfo& device_info,
                                        const KernelInfo& kernel_info) {
    return absl::OkStatus();
@ -164,6 +169,11 @@ class GPUOperation {
  bool check_src_channels_size_ = false;
 protected:
  friend flatbuffers::Offset<data::GPUOperation> Encode(
      const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
  friend absl::Status Decode(CLContext* context,
                             const data::GPUOperation* fb_op, GPUOperation* op);
  virtual absl::Status BindArguments(ArgumentsBinder* args) {
    return absl::OkStatus();
  }
--- a/tensorflow/lite/delegates/gpu/cl/serialization.cc
+++ b/tensorflow/lite/delegates/gpu/cl/serialization.cc
--- a/tensorflow/lite/delegates/gpu/cl/serialization.fbs
+++ b/tensorflow/lite/delegates/gpu/cl/serialization.fbs
@ -0,0 +1,278 @@
 // Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 namespace tflite.gpu.cl.data;
 table Int4 {
  x:int32;
  y:int32;
  z:int32;
  w:int32;
 }
 table Int3 {
  x:int32;
  y:int32;
  z:int32;
 }
 table Int2 {
  x:int32;
  y:int32;
 }
 table IntValue {
  name:string;
  value:int32;
  active:bool;
  offset:uint32;
 }
 table FloatValue {
  name:string;
  value:float;
  active:bool;
  offset:uint32;
 }
 table HalfValue {
  name:string;
  value:float;
  active:bool;
  store_as_f32:bool;
  offset:uint32;
 }
 enum AccessType : byte {
  READ = 0,
  WRITE = 1,
  READ_WRITE = 2,
 }
 enum DataType : byte {
  UNKNOWN = 0,
  FLOAT32 = 1,
  FLOAT16 = 2,
 }
 enum MemoryType : byte {
  GLOBAL = 0,
  CONSTANT = 1,
  LOCAL = 2,
 }
 table StateVariable {
  key:string;
  value:string;
 }
 table GPUObjectDescriptor {
  state_vars:[StateVariable];
  access_type:AccessType;
 }
 table BufferDescriptor {
  base_obj:GPUObjectDescriptor;
  element_type:DataType;
  element_size:int32;
  memory_type:MemoryType;
  attributes:[string];
  size:int32;
  data:[uint8];
 }
 table Texture2DDescriptor {
  base_obj:GPUObjectDescriptor;
  element_type:DataType;
  normalized:bool;
  normalized_type:DataType;
  size:Int2;
  data:[uint8];
 }
 enum LinearStorageType : byte {
  BUFFER = 0,
  TEXTURE_2D = 1,
 }
 table TensorLinearDescriptor {
  base_obj:GPUObjectDescriptor;
  storage_type:LinearStorageType;
  element_type:DataType;
  memory_type:MemoryType;
  size:int32;
  data:[uint8];
 }
 enum TensorStorageType : byte {
  UNKNOWN = 0,
  BUFFER = 1,
  IMAGE_BUFFER = 2,
  TEXTURE_2D = 3,
  TEXTURE_3D = 4,
  TEXTURE_ARRAY = 5,
  SINGLE_TEXTURE_2D = 6,
 }
 enum Layout : byte {
  UNKNOWN = 0,
  HWC = 1,
  BHWC = 2,
  HWDC = 3,
  BHWDC = 4,
 }
 table BHWDC {
  b:int32;
  h:int32;
  w:int32;
  d:int32;
  c:int32;
 }
 table TensorDescriptor {
  base_obj:GPUObjectDescriptor;
  data_type:DataType;
  storage_type:TensorStorageType;
  layout:Layout;
  shape:BHWDC;
  data:[uint8];
 }
 table BufferDescriptorMapValue {
  key:string;
  value:BufferDescriptor;
 }
 table Texture2DDescriptorMapValue {
  key:string;
  value:Texture2DDescriptor;
 }
 table TensorLinearDescriptorMapValue {
  key:string;
  value:TensorLinearDescriptor;
 }
 table TensorDescriptorMapValue {
  key:string;
  value:TensorDescriptor;
 }
 table Arguments {
  int_values:[IntValue];
  shared_int4s:[int32];
  float_values:[FloatValue];
  shared_float4s:[float];
  half_values:[HalfValue];
  shared_half4s:[float];
  buffer_refs:[BufferDescriptorMapValue];
  texture2d_refs:[Texture2DDescriptorMapValue];
  tensor_linear_refs:[TensorLinearDescriptorMapValue];
  tensor_refs:[TensorDescriptorMapValue];
  buffer_objects:[BufferDescriptorMapValue];
  texture2d_objects:[Texture2DDescriptorMapValue];
  tensor_linear_objects:[TensorLinearDescriptorMapValue];
  tensor_objects:[TensorDescriptorMapValue];
 }
 enum CalculationsPrecision : byte {
  F32 = 0,
  F32_F16 = 1,
  F16 = 2,
 }
 enum TensorToGrid : byte {
  CUSTOM = 0,
  WB_TO_X_HD_TO_Y_S_TO_Z = 1,
  WB_TO_X_HD_TO_Y_Z_IS_1 = 2,
  WB_TO_X_H_TO_Y_D_TO_Z = 3,
  B_TO_X_Y_IS_1_Z_IS_1 = 4,
 }
 enum CompilerOptions : byte {
  ADRENO_FULL_SIMD_LINE = 0,
  ADRENO_MORE_WAVES = 1,
  POWERVR_FP16 = 2,
  CL_OPT_DISABLE = 3,
  CL_2_0 = 4,
  CL_3_0 = 5,
 }
 table OperationDef {
  precision:CalculationsPrecision;
  src_tensors:[TensorDescriptor];
  dst_tensors:[TensorDescriptor];
 }
 table CompilerOption {
  option:CompilerOptions;
 }
 table GPUOperation {
  arguments:Arguments;
  code:string;
  work_group_size:Int3;
  compiler_options:[CompilerOption];
  tensor_to_grid:TensorToGrid;
  elementwise:bool;
  linkable:bool;
  check_src_channels_size:bool;
  definition:OperationDef;
  grid_dimension:int32;
  work_group_launch_order:Int3;
  grid_size:Int3;
  src_tensors_names:[string];
  dst_tensors_names:[string];
  work_groups_count:Int3;
  linkable_count:int32;
  elementwise_code:string;
 }
 table TensorDescWithId {
  desc:TensorDescriptor;
  id:int32;
 }
 table CLNode {
  gpu_op:GPUOperation;
  input_ids:[int32];
  output_ids:[int32];
  name:string;
 }
 table PairOfValueIds {
  first:int32;
  second:int32;
 }
 table InferenceContext {
  need_flush:bool;
  flush_periodically:bool;
  flush_period:int32;
  need_manual_release:bool;
  precision:CalculationsPrecision;
  storage_type:TensorStorageType;
  nodes:[CLNode];
  tensors:[TensorDescWithId];
  input_ids:[int32];
  variable_ids_and_refs:[PairOfValueIds];
  output_ids:[int32];
 }
 root_type InferenceContext;
--- a/tensorflow/lite/delegates/gpu/cl/serialization.h
+++ b/tensorflow/lite/delegates/gpu/cl/serialization.h
@ -0,0 +1,42 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
 #include "absl/types/span.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
 #include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
 #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 namespace tflite {
 namespace gpu {
 namespace cl {
 class InferenceContext;
 flatbuffers::Offset<data::InferenceContext> Encode(
    const InferenceContext& inference, flatbuffers::FlatBufferBuilder* builder);
 absl::Status Decode(CLContext* context,
                    const data::InferenceContext* fb_inference,
                    InferenceContext* inference);
 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite
 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
--- a/tensorflow/lite/delegates/gpu/cl/tensor.cc
+++ b/tensorflow/lite/delegates/gpu/cl/tensor.cc
@ -605,8 +605,11 @@ absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
  descriptor_.layout = desc.layout;
  memory_owner_ = true;
  CLMemory memory;
-  RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_,
+  uint8_t* data_ptr = desc.data.empty()
-                                       desc.data.data(), &memory));
+                          ? nullptr
                          : const_cast<unsigned char*>(desc.data.data());
  RETURN_IF_ERROR(
      AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
  memory_ = memory.Release();
  if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) {
    RETURN_IF_ERROR(CreateImageBufferFromBuffer(