From 465aeca04268f6e19d5f845610cc7ccaf03f5b8d Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 14 Oct 2020 15:26:13 -0700 Subject: [PATCH] Serialization of OpenCL InferenceContext. PiperOrigin-RevId: 337185119 Change-Id: I3841fd093a692a4acd851792f723381fd29e53bc --- tensorflow/lite/delegates/gpu/cl/BUILD | 26 +- tensorflow/lite/delegates/gpu/cl/arguments.h | 6 + tensorflow/lite/delegates/gpu/cl/gpu_object.h | 5 + .../delegates/gpu/cl/inference_context.cc | 50 +- .../lite/delegates/gpu/cl/inference_context.h | 42 +- .../lite/delegates/gpu/cl/kernels/BUILD | 1 + .../delegates/gpu/cl/kernels/gpu_operation.cc | 40 +- .../delegates/gpu/cl/kernels/gpu_operation.h | 10 + .../lite/delegates/gpu/cl/serialization.cc | 1049 +++++++++++++++++ .../lite/delegates/gpu/cl/serialization.fbs | 278 +++++ .../lite/delegates/gpu/cl/serialization.h | 42 + tensorflow/lite/delegates/gpu/cl/tensor.cc | 7 +- 12 files changed, 1527 insertions(+), 29 deletions(-) create mode 100644 tensorflow/lite/delegates/gpu/cl/serialization.cc create mode 100644 tensorflow/lite/delegates/gpu/cl/serialization.fbs create mode 100644 tensorflow/lite/delegates/gpu/cl/serialization.h diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index c616b081829..63171348b74 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -55,6 +55,7 @@ cc_library( ":cl_device", ":gpu_object", ":opencl_wrapper", + ":serialization_cc_fbs", ":tensor_type", ":util", "//tensorflow/lite/delegates/gpu/common:access_type", @@ -358,6 +359,7 @@ cc_library( deps = [ ":cl_context", ":opencl_wrapper", + ":serialization_cc_fbs", "//tensorflow/lite/delegates/gpu/common:access_type", "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:status", @@ -366,19 +368,30 @@ cc_library( cc_library( name = "inference_context", - srcs = ["inference_context.cc"], - hdrs = ["inference_context.h"], + srcs = [ + "inference_context.cc", + "serialization.cc", + ], + hdrs = [ + "inference_context.h", + "serialization.h", + ], deps = [ + ":arguments", ":buffer", ":cl_command_queue", + ":cl_context", ":cl_device", ":environment", ":gpu_object", + ":linear_storage", ":model_hints", ":opencl_wrapper", ":precision", + ":serialization_cc_fbs", ":storage_type_util", ":tensor_type", + ":texture2d", "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation", "//tensorflow/lite/delegates/gpu/cl/selectors:operation_selector", "//tensorflow/lite/delegates/gpu/cl/selectors:special_selector", @@ -396,6 +409,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common/transformations:merge_padding_with", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/types:span", ], ) @@ -467,6 +481,14 @@ cc_library( ], ) +flatbuffer_cc_library( + name = "serialization_cc_fbs", + srcs = ["serialization.fbs"], + flatc_args = [ + "--scoped-enums", + ], +) + cc_library( name = "storage_type_util", srcs = ["storage_type_util.cc"], diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.h b/tensorflow/lite/delegates/gpu/cl/arguments.h index bac6afd55b7..a5435c4fc2f 100644 --- a/tensorflow/lite/delegates/gpu/cl/arguments.h +++ b/tensorflow/lite/delegates/gpu/cl/arguments.h @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/cl_device.h" #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" +#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h" #include "tensorflow/lite/delegates/gpu/cl/util.h" #include "tensorflow/lite/delegates/gpu/common/access_type.h" #include "tensorflow/lite/delegates/gpu/common/status.h" @@ -77,6 +78,11 @@ class Arguments : public ArgumentsBinder { ~Arguments() override = default; private: + friend flatbuffers::Offset Encode( + const Arguments& args, flatbuffers::FlatBufferBuilder* builder); + friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args, + Arguments* args); + void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc); void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc); void AddImage2DArray(const std::string& name, diff --git a/tensorflow/lite/delegates/gpu/cl/gpu_object.h b/tensorflow/lite/delegates/gpu/cl/gpu_object.h index 297a5f70858..abd77a4489b 100644 --- a/tensorflow/lite/delegates/gpu/cl/gpu_object.h +++ b/tensorflow/lite/delegates/gpu/cl/gpu_object.h @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" +#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h" #include "tensorflow/lite/delegates/gpu/common/access_type.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/status.h" @@ -164,6 +165,10 @@ class GPUObjectDescriptor { AccessType GetAccess() const { return access_type_; } protected: + friend flatbuffers::Offset Encode( + const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder); + friend void Decode(const data::GPUObjectDescriptor* fb_obj, + GPUObjectDescriptor* obj); mutable std::map state_vars_; AccessType access_type_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index b834bbfffef..ca0c0319f54 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -153,7 +153,7 @@ CLNode& CLNode::operator=(CLNode&& node) { absl::Status InferenceContext::InitFromGraph( const CreateInferenceInfo& create_info, const GraphFloat32& graph, - Environment* env) { + Environment* env, std::vector* serialized_model) { CreationContext creation_context; creation_context.device = env->GetDevicePtr(); creation_context.context = &env->context(); @@ -182,10 +182,6 @@ absl::Status InferenceContext::InitFromGraph( RETURN_IF_ERROR(Compile(creation_context)); RETURN_IF_ERROR(UpdateParams()); - for (auto& node : nodes_) { - node.operation->args_.ReleaseCPURepresentation(); - } - TuningParameters tuning_parameters; tuning_parameters.queue = env->profiling_queue(); tuning_parameters.info = &env->device().info_; @@ -201,14 +197,54 @@ absl::Status InferenceContext::InitFromGraph( } } RETURN_IF_ERROR(Tune(tuning_parameters)); + + if (serialized_model) { + flatbuffers::FlatBufferBuilder builder; + auto encoded_fb = Encode(*this, &builder); + data::FinishInferenceContextBuffer(builder, encoded_fb); + serialized_model->resize(builder.GetSize()); + std::memcpy(serialized_model->data(), builder.GetBufferPointer(), + builder.GetSize()); + } + for (auto& node : nodes_) { + node.operation->args_.ReleaseCPURepresentation(); + } + return absl::OkStatus(); +} + +absl::Status InferenceContext::RestoreDeserialized( + const std::vector& serialized_model, Environment* env) { + flatbuffers::Verifier verifier(serialized_model.data(), + serialized_model.size()); + if (!data::VerifyInferenceContextBuffer(verifier)) { + return absl::DataLossError("Deserialization failed."); + } + auto decoded_fb = data::GetInferenceContext(serialized_model.data()); + RETURN_IF_ERROR(Decode(&env->context(), decoded_fb, this)); + + CreationContext creation_context; + creation_context.device = env->GetDevicePtr(); + creation_context.context = &env->context(); + creation_context.queue = env->queue(); + creation_context.cache = env->program_cache(); + + RETURN_IF_ERROR(AllocateMemory(creation_context.context)); + BindMemoryToOperations(); + for (auto& node : nodes_) { + RETURN_IF_ERROR(node.operation->CompileDeserialized(creation_context)); + } + RETURN_IF_ERROR(UpdateParams()); + for (auto& node : nodes_) { + node.operation->args_.ReleaseCPURepresentation(); + } return absl::OkStatus(); } absl::Status InferenceContext::InitFromGraphWithTransforms( const CreateInferenceInfo& create_info, GraphFloat32* graph, - Environment* env) { + Environment* env, std::vector* serialized_model) { RETURN_IF_ERROR(RunGraphTransforms(graph)); - RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env)); + RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env, serialized_model)); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h index da687ffa3b5..ec8055ebcde 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.h +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/model_hints.h" #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" #include "tensorflow/lite/delegates/gpu/cl/precision.h" +#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h" #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/status.h" @@ -65,14 +66,15 @@ class InferenceContext { }; absl::Status InitFromGraph(const CreateInferenceInfo& create_info, - const GraphFloat32& graph, Environment* env); + const GraphFloat32& graph, Environment* env, + std::vector* serialized_model = nullptr); // Applies OpenCL-specific transformations to the graph before the // initialization. These transformations are either impossible or useless in // other backends. absl::Status InitFromGraphWithTransforms( const CreateInferenceInfo& create_info, GraphFloat32* graph, - Environment* env); + Environment* env, std::vector* serialized_model = nullptr); absl::Status AddToQueue(CLCommandQueue* queue); absl::Status Profile(ProfilingCommandQueue* queue, ProfilingInfo* result); @@ -92,9 +94,19 @@ class InferenceContext { const std::vector& GetInputIds() const { return input_ids_; } const std::vector& GetOutputIds() const { return output_ids_; } + absl::Status RestoreDeserialized(const std::vector& serialized_model, + Environment* env); + private: enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 }; + friend flatbuffers::Offset Encode( + const InferenceContext& inference, + flatbuffers::FlatBufferBuilder* builder); + friend absl::Status Decode(CLContext* context, + const data::InferenceContext* fb_inference, + InferenceContext* inference); + void CopyInAndOutIds(const GraphFloat32& graph); absl::Status ConvertOperations(const DeviceInfo& device_info, const GraphFloat32& graph, ModelHints hints); @@ -165,6 +177,32 @@ class InferenceContext { void SetNext(ValueId id) { next_ = id; } DummyTensor Get(ValueId id) { return reservations_[id]; } + std::vector> GetTensorDescs() const { + std::vector> result; + for (auto& v : reservations_) { + TensorDescriptor desc = v.second.descriptor; + desc.shape.b = v.second.shape.b; + desc.shape.h = v.second.shape.h; + desc.shape.w = v.second.shape.w; + desc.shape.d = 1; + desc.shape.c = v.second.shape.c; + result.push_back({v.first, desc}); + } + return result; + } + + void Add(const std::vector>& tensors) { + for (auto& v : tensors) { + DummyTensor dummy; + dummy.descriptor = v.second; + dummy.shape.b = v.second.shape.b; + dummy.shape.h = v.second.shape.h; + dummy.shape.w = v.second.shape.w; + dummy.shape.c = v.second.shape.c; + Add(v.first, dummy); + } + } + private: absl::flat_hash_map reservations_; ValueId next_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index 7bce013a895..d7e7c7dd498 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -651,6 +651,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/cl:device_info", "//tensorflow/lite/delegates/gpu/cl:precision", "//tensorflow/lite/delegates/gpu/cl:program_cache", + "//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs", "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/cl:tensor_type", "//tensorflow/lite/delegates/gpu/common:access_type", diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index 025de5a7c7f..b39f03af846 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -223,7 +223,8 @@ absl::Status GPUOperation::UpdateParams() { return absl::OkStatus(); } -absl::Status GPUOperation::Compile(const CreationContext& creation_context) { +absl::Status GPUOperation::AssembleCode(const DeviceInfo& device_info, + CLContext* context) { if (elementwise_) { auto src_desc = absl::make_unique(definition_.src_tensors[0]); @@ -241,28 +242,35 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) { dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor"); args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - std::string code = - GetElementWiseCode(definition_, check_src_channels_size_); elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_; - RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context)); + code_ = GetElementWiseCode(definition_, check_src_channels_size_); + RETURN_IF_ERROR(args_.AllocateObjects(context)); RETURN_IF_ERROR(args_.TransformToCLCode( - creation_context.device->info_, - {{dst_tensors_names_[0], elementwise_code_}}, &code)); - RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_)); + device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_)); } else { - RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context)); + RETURN_IF_ERROR(args_.AllocateObjects(context)); RETURN_IF_ERROR(args_.TransformToCLCode( - creation_context.device->info_, - {{dst_tensors_names_[0], elementwise_code_}}, &code_)); - RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( - code_, "main_function", compiler_options_, *creation_context.context, - *creation_context.device, &kernel_)); + device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_)); } + return absl::OkStatus(); +} + +absl::Status GPUOperation::Compile(const CreationContext& creation_context) { + RETURN_IF_ERROR( + AssembleCode(creation_context.GetDeviceInfo(), creation_context.context)); + RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( + code_, "main_function", compiler_options_, *creation_context.context, + *creation_context.device, &kernel_)); return PostCompileCheck(creation_context.device->info_, kernel_.info_); } +absl::Status GPUOperation::CompileDeserialized( + const CreationContext& creation_context) { + return creation_context.cache->GetOrCreateCLKernel( + code_, "main_function", compiler_options_, *creation_context.context, + *creation_context.device, &kernel_); +} + void GPUOperation::GetPossibleKernelWorkGroups( TuningType tuning_type, const DeviceInfo& device_info, const KernelInfo& kernel_info, std::vector* work_groups) const { @@ -329,7 +337,7 @@ int3 GPUOperation::GetGridSize() const { const int grid_z = 1; return int3(grid_x, grid_y, grid_z); } - return int3(0, 0, 0); + return grid_size_; } void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index fe41e78f93c..57d8690c54e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h" #include "tensorflow/lite/delegates/gpu/cl/precision.h" #include "tensorflow/lite/delegates/gpu/cl/program_cache.h" +#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h" #include "tensorflow/lite/delegates/gpu/cl/tensor.h" #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" @@ -129,8 +130,12 @@ class GPUOperation { absl::Status Tune(const TuningParameters& params); + absl::Status AssembleCode(const DeviceInfo& device_info, CLContext* context); + absl::Status Compile(const CreationContext& creation_context); + absl::Status CompileDeserialized(const CreationContext& creation_context); + virtual absl::Status PostCompileCheck(const DeviceInfo& device_info, const KernelInfo& kernel_info) { return absl::OkStatus(); @@ -164,6 +169,11 @@ class GPUOperation { bool check_src_channels_size_ = false; protected: + friend flatbuffers::Offset Encode( + const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder); + friend absl::Status Decode(CLContext* context, + const data::GPUOperation* fb_op, GPUOperation* op); + virtual absl::Status BindArguments(ArgumentsBinder* args) { return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/serialization.cc b/tensorflow/lite/delegates/gpu/cl/serialization.cc new file mode 100644 index 00000000000..3b52fc40bdf --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/serialization.cc @@ -0,0 +1,1049 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/cl/serialization.h" + +#include + +#include "tensorflow/lite/delegates/gpu/cl/arguments.h" +#include "tensorflow/lite/delegates/gpu/cl/buffer.h" +#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" +#include "tensorflow/lite/delegates/gpu/cl/inference_context.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" +#include "tensorflow/lite/delegates/gpu/cl/linear_storage.h" +#include "tensorflow/lite/delegates/gpu/cl/precision.h" +#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" +#include "tensorflow/lite/delegates/gpu/cl/texture2d.h" +#include "tensorflow/lite/delegates/gpu/common/model.h" + +namespace tflite { +namespace gpu { +namespace cl { +namespace { +data::AccessType ToFB(AccessType type) { + switch (type) { + case AccessType::READ: + return data::AccessType::READ; + case AccessType::WRITE: + return data::AccessType::WRITE; + case AccessType::READ_WRITE: + return data::AccessType::READ_WRITE; + default: + return data::AccessType::READ_WRITE; + } +} + +data::DataType ToFB(DataType type) { + switch (type) { + case DataType::FLOAT16: + return data::DataType::FLOAT16; + case DataType::FLOAT32: + return data::DataType::FLOAT32; + default: + return data::DataType::UNKNOWN; + } +} + +data::MemoryType ToFB(MemoryType type) { + switch (type) { + case MemoryType::CONSTANT: + return data::MemoryType::CONSTANT; + case MemoryType::GLOBAL: + return data::MemoryType::GLOBAL; + case MemoryType::LOCAL: + return data::MemoryType::LOCAL; + } +} + +data::LinearStorageType ToFB(LinearStorageType type) { + switch (type) { + case LinearStorageType::BUFFER: + return data::LinearStorageType::BUFFER; + case LinearStorageType::TEXTURE_2D: + return data::LinearStorageType::TEXTURE_2D; + } +} + +data::TensorStorageType ToFB(TensorStorageType type) { + switch (type) { + case TensorStorageType::BUFFER: + return data::TensorStorageType::BUFFER; + case TensorStorageType::IMAGE_BUFFER: + return data::TensorStorageType::IMAGE_BUFFER; + case TensorStorageType::TEXTURE_2D: + return data::TensorStorageType::TEXTURE_2D; + case TensorStorageType::TEXTURE_ARRAY: + return data::TensorStorageType::TEXTURE_ARRAY; + case TensorStorageType::TEXTURE_3D: + return data::TensorStorageType::TEXTURE_3D; + case TensorStorageType::SINGLE_TEXTURE_2D: + return data::TensorStorageType::SINGLE_TEXTURE_2D; + case TensorStorageType::UNKNOWN: + return data::TensorStorageType::UNKNOWN; + } +} + +data::Layout ToFB(Layout type) { + switch (type) { + case Layout::HWC: + return data::Layout::HWC; + case Layout::BHWC: + return data::Layout::BHWC; + case Layout::HWDC: + return data::Layout::HWDC; + case Layout::BHWDC: + return data::Layout::BHWDC; + default: + return data::Layout::UNKNOWN; + } +} + +data::CalculationsPrecision ToFB(CalculationsPrecision type) { + switch (type) { + case CalculationsPrecision::F32: + return data::CalculationsPrecision::F32; + case CalculationsPrecision::F32_F16: + return data::CalculationsPrecision::F32_F16; + case CalculationsPrecision::F16: + return data::CalculationsPrecision::F16; + } +} + +data::TensorToGrid ToFB(TensorToGrid type) { + switch (type) { + case TensorToGrid::kCustom: + return data::TensorToGrid::CUSTOM; + case TensorToGrid::kWBToX_HDToY_SToZ: + return data::TensorToGrid::WB_TO_X_HD_TO_Y_S_TO_Z; + case TensorToGrid::kWBToX_HDToY_ZIs1: + return data::TensorToGrid::WB_TO_X_HD_TO_Y_Z_IS_1; + case TensorToGrid::kWBToX_HToY_DToZ: + return data::TensorToGrid::WB_TO_X_H_TO_Y_D_TO_Z; + case TensorToGrid::kBToX_YIs1_ZIs1: + return data::TensorToGrid::B_TO_X_Y_IS_1_Z_IS_1; + } +} + +data::CompilerOptions ToFB(CompilerOptions type) { + switch (type) { + case CompilerOptions::ADRENO_FULL_SIMD_LINE: + return data::CompilerOptions::ADRENO_FULL_SIMD_LINE; + case CompilerOptions::ADRENO_MORE_WAVES: + return data::CompilerOptions::ADRENO_MORE_WAVES; + case CompilerOptions::POWERVR_FP16: + return data::CompilerOptions::POWERVR_FP16; + case CompilerOptions::CL_OPT_DISABLE: + return data::CompilerOptions::CL_OPT_DISABLE; + case CompilerOptions::CL_2_0: + return data::CompilerOptions::CL_2_0; + case CompilerOptions::CL_3_0: + return data::CompilerOptions::CL_3_0; + } +} + +DataType ToEnum(data::DataType type) { + switch (type) { + case data::DataType::FLOAT16: + return DataType::FLOAT16; + case data::DataType::FLOAT32: + return DataType::FLOAT32; + default: + return DataType::UNKNOWN; + } +} + +AccessType ToEnum(data::AccessType type) { + switch (type) { + case data::AccessType::READ: + return AccessType::READ; + case data::AccessType::WRITE: + return AccessType::WRITE; + case data::AccessType::READ_WRITE: + return AccessType::READ_WRITE; + } +} + +MemoryType ToEnum(data::MemoryType type) { + switch (type) { + case data::MemoryType::CONSTANT: + return MemoryType::CONSTANT; + case data::MemoryType::GLOBAL: + return MemoryType::GLOBAL; + case data::MemoryType::LOCAL: + return MemoryType::LOCAL; + } +} + +LinearStorageType ToEnum(data::LinearStorageType type) { + switch (type) { + case data::LinearStorageType::BUFFER: + return LinearStorageType::BUFFER; + case data::LinearStorageType::TEXTURE_2D: + return LinearStorageType::TEXTURE_2D; + } +} + +TensorStorageType ToEnum(data::TensorStorageType type) { + switch (type) { + case data::TensorStorageType::BUFFER: + return TensorStorageType::BUFFER; + case data::TensorStorageType::IMAGE_BUFFER: + return TensorStorageType::IMAGE_BUFFER; + case data::TensorStorageType::TEXTURE_2D: + return TensorStorageType::TEXTURE_2D; + case data::TensorStorageType::TEXTURE_ARRAY: + return TensorStorageType::TEXTURE_ARRAY; + case data::TensorStorageType::TEXTURE_3D: + return TensorStorageType::TEXTURE_3D; + case data::TensorStorageType::SINGLE_TEXTURE_2D: + return TensorStorageType::SINGLE_TEXTURE_2D; + case data::TensorStorageType::UNKNOWN: + return TensorStorageType::UNKNOWN; + } +} + +Layout ToEnum(data::Layout type) { + switch (type) { + case data::Layout::HWC: + return Layout::HWC; + case data::Layout::BHWC: + return Layout::BHWC; + case data::Layout::HWDC: + return Layout::HWDC; + case data::Layout::BHWDC: + return Layout::BHWDC; + default: + return Layout::UNKNOWN; + } +} + +CalculationsPrecision ToEnum(data::CalculationsPrecision type) { + switch (type) { + case data::CalculationsPrecision::F32: + return CalculationsPrecision::F32; + case data::CalculationsPrecision::F32_F16: + return CalculationsPrecision::F32_F16; + case data::CalculationsPrecision::F16: + return CalculationsPrecision::F16; + } +} + +TensorToGrid ToEnum(data::TensorToGrid type) { + switch (type) { + case data::TensorToGrid::CUSTOM: + return TensorToGrid::kCustom; + case data::TensorToGrid::WB_TO_X_HD_TO_Y_S_TO_Z: + return TensorToGrid::kWBToX_HDToY_SToZ; + case data::TensorToGrid::WB_TO_X_HD_TO_Y_Z_IS_1: + return TensorToGrid::kWBToX_HDToY_ZIs1; + case data::TensorToGrid::WB_TO_X_H_TO_Y_D_TO_Z: + return TensorToGrid::kWBToX_HToY_DToZ; + case data::TensorToGrid::B_TO_X_Y_IS_1_Z_IS_1: + return TensorToGrid::kBToX_YIs1_ZIs1; + } +} + +CompilerOptions ToEnum(data::CompilerOptions type) { + switch (type) { + case data::CompilerOptions::ADRENO_FULL_SIMD_LINE: + return CompilerOptions::ADRENO_FULL_SIMD_LINE; + case data::CompilerOptions::ADRENO_MORE_WAVES: + return CompilerOptions::ADRENO_MORE_WAVES; + case data::CompilerOptions::POWERVR_FP16: + return CompilerOptions::POWERVR_FP16; + case data::CompilerOptions::CL_OPT_DISABLE: + return CompilerOptions::CL_OPT_DISABLE; + case data::CompilerOptions::CL_2_0: + return CompilerOptions::CL_2_0; + case data::CompilerOptions::CL_3_0: + return CompilerOptions::CL_3_0; + } +} + +} // namespace + +flatbuffers::Offset Encode( + const int2& v, flatbuffers::FlatBufferBuilder* builder) { + data::Int2Builder int2_builder(*builder); + int2_builder.add_x(v.x); + int2_builder.add_y(v.y); + return int2_builder.Finish(); +} + +flatbuffers::Offset Encode( + const int3& v, flatbuffers::FlatBufferBuilder* builder) { + data::Int3Builder int3_builder(*builder); + int3_builder.add_x(v.x); + int3_builder.add_y(v.y); + int3_builder.add_z(v.z); + return int3_builder.Finish(); +} + +flatbuffers::Offset Encode( + const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder) { + std::vector> state_vars_fb; + for (auto& v0 : desc.state_vars_) { + auto key_fb = builder->CreateString(v0.first); + auto value_fb = builder->CreateString(v0.second); + data::StateVariableBuilder state_builder(*builder); + state_builder.add_key(key_fb); + state_builder.add_value(value_fb); + state_vars_fb.push_back(state_builder.Finish()); + } + auto state_vars_fb_vec = builder->CreateVector(state_vars_fb); + data::GPUObjectDescriptorBuilder obj_builder(*builder); + obj_builder.add_state_vars(state_vars_fb_vec); + obj_builder.add_access_type(ToFB(desc.access_type_)); + return obj_builder.Finish(); +} + +void Decode(const data::GPUObjectDescriptor* fb_obj, GPUObjectDescriptor* obj) { + obj->access_type_ = ToEnum(fb_obj->access_type()); + for (auto state_fb : *fb_obj->state_vars()) { + std::string key(state_fb->key()->c_str(), state_fb->key()->size()); + std::string value(state_fb->value()->c_str(), state_fb->value()->size()); + obj->state_vars_[key] = value; + } +} + +flatbuffers::Offset Encode( + const BufferDescriptor& desc, flatbuffers::FlatBufferBuilder* builder) { + auto obj_fb = + Encode(*static_cast(&desc), builder); + + std::vector> attributes_fb; + for (auto& attr : desc.attributes) { + attributes_fb.push_back(builder->CreateString(attr)); + } + auto attributes_fb_vec = builder->CreateVector(attributes_fb); + auto data_fb = builder->CreateVector(desc.data); + data::BufferDescriptorBuilder buf_builder(*builder); + buf_builder.add_base_obj(obj_fb); + buf_builder.add_element_type(ToFB(desc.element_type)); + buf_builder.add_element_size(desc.element_size); + buf_builder.add_memory_type(ToFB(desc.memory_type)); + buf_builder.add_attributes(attributes_fb_vec); + buf_builder.add_size(desc.size); + buf_builder.add_data(data_fb); + return buf_builder.Finish(); +} + +void Decode(const data::BufferDescriptor* fb_desc, BufferDescriptor* desc) { + Decode(fb_desc->base_obj(), desc); + desc->element_type = ToEnum(fb_desc->element_type()); + desc->element_size = fb_desc->element_size(); + desc->memory_type = ToEnum(fb_desc->memory_type()); + for (auto attr_fb : *fb_desc->attributes()) { + std::string attr(attr_fb->c_str(), attr_fb->size()); + desc->attributes.push_back(attr); + } + desc->size = fb_desc->size(); + desc->data = + std::vector(fb_desc->data()->data(), + fb_desc->data()->data() + fb_desc->data()->size()); +} + +flatbuffers::Offset Encode( + const Texture2DDescriptor& desc, flatbuffers::FlatBufferBuilder* builder) { + auto obj_fb = + Encode(*static_cast(&desc), builder); + + auto data_fb = builder->CreateVector(desc.data); + auto size_fb = Encode(desc.size, builder); + data::Texture2DDescriptorBuilder tex_builder(*builder); + tex_builder.add_base_obj(obj_fb); + tex_builder.add_element_type(ToFB(desc.element_type)); + tex_builder.add_normalized(desc.normalized); + tex_builder.add_normalized_type(ToFB(desc.normalized_type)); + tex_builder.add_size(size_fb); + tex_builder.add_data(data_fb); + return tex_builder.Finish(); +} + +void Decode(const data::Texture2DDescriptor* fb_desc, + Texture2DDescriptor* desc) { + Decode(fb_desc->base_obj(), desc); + desc->element_type = ToEnum(fb_desc->element_type()); + desc->normalized = fb_desc->normalized(); + desc->normalized_type = ToEnum(fb_desc->normalized_type()); + desc->size.x = fb_desc->size()->x(); + desc->size.y = fb_desc->size()->y(); + desc->data = + std::vector(fb_desc->data()->data(), + fb_desc->data()->data() + fb_desc->data()->size()); +} + +flatbuffers::Offset Encode( + const TensorLinearDescriptor& desc, + flatbuffers::FlatBufferBuilder* builder) { + auto obj_fb = + Encode(*static_cast(&desc), builder); + + auto data_fb = builder->CreateVector(desc.data); + data::TensorLinearDescriptorBuilder tensor_builder(*builder); + tensor_builder.add_base_obj(obj_fb); + tensor_builder.add_element_type(ToFB(desc.element_type)); + tensor_builder.add_storage_type(ToFB(desc.storage_type)); + tensor_builder.add_memory_type(ToFB(desc.memory_type)); + tensor_builder.add_size(desc.size); + tensor_builder.add_data(data_fb); + return tensor_builder.Finish(); +} + +void Decode(const data::TensorLinearDescriptor* fb_desc, + TensorLinearDescriptor* desc) { + Decode(fb_desc->base_obj(), desc); + desc->element_type = ToEnum(fb_desc->element_type()); + desc->storage_type = ToEnum(fb_desc->storage_type()); + desc->memory_type = ToEnum(fb_desc->memory_type()); + desc->size = fb_desc->size(); + desc->data = + std::vector(fb_desc->data()->data(), + fb_desc->data()->data() + fb_desc->data()->size()); +} + +flatbuffers::Offset Encode( + const TensorDescriptor& desc, flatbuffers::FlatBufferBuilder* builder) { + auto obj_fb = + Encode(*static_cast(&desc), builder); + + data::BHWDCBuilder shape_builder(*builder); + shape_builder.add_b(desc.shape.b); + shape_builder.add_h(desc.shape.h); + shape_builder.add_w(desc.shape.w); + shape_builder.add_d(desc.shape.d); + shape_builder.add_c(desc.shape.c); + auto shape_fb = shape_builder.Finish(); + + auto data_fb = builder->CreateVector(desc.data); + data::TensorDescriptorBuilder tensor_builder(*builder); + tensor_builder.add_base_obj(obj_fb); + tensor_builder.add_data_type(ToFB(desc.data_type)); + tensor_builder.add_storage_type(ToFB(desc.storage_type)); + tensor_builder.add_layout(ToFB(desc.layout)); + tensor_builder.add_shape(shape_fb); + tensor_builder.add_data(data_fb); + return tensor_builder.Finish(); +} + +void Decode(const data::TensorDescriptor* fb_desc, TensorDescriptor* desc) { + Decode(fb_desc->base_obj(), desc); + desc->data_type = ToEnum(fb_desc->data_type()); + desc->storage_type = ToEnum(fb_desc->storage_type()); + desc->layout = ToEnum(fb_desc->layout()); + desc->shape.b = fb_desc->shape()->b(); + desc->shape.h = fb_desc->shape()->h(); + desc->shape.w = fb_desc->shape()->w(); + desc->shape.d = fb_desc->shape()->d(); + desc->shape.c = fb_desc->shape()->c(); + desc->data = + std::vector(fb_desc->data()->data(), + fb_desc->data()->data() + fb_desc->data()->size()); +} + +flatbuffers::Offset Encode( + const OperationDef& def, flatbuffers::FlatBufferBuilder* builder) { + std::vector> src_tensors_fb; + for (auto& desc : def.src_tensors) { + auto desc_fb = Encode(desc, builder); + src_tensors_fb.push_back(desc_fb); + } + + std::vector> dst_tensors_fb; + for (auto& desc : def.dst_tensors) { + auto desc_fb = Encode(desc, builder); + dst_tensors_fb.push_back(desc_fb); + } + + auto src_tensors_fb_vec = builder->CreateVector(src_tensors_fb); + auto dst_tensors_fb_vec = builder->CreateVector(dst_tensors_fb); + + data::OperationDefBuilder def_builder(*builder); + def_builder.add_precision(ToFB(def.precision)); + def_builder.add_src_tensors(src_tensors_fb_vec); + def_builder.add_dst_tensors(dst_tensors_fb_vec); + return def_builder.Finish(); +} + +void Decode(const data::OperationDef* fb_def, OperationDef* def) { + for (auto src_fb : *fb_def->src_tensors()) { + TensorDescriptor desc; + Decode(src_fb, &desc); + def->src_tensors.push_back(std::move(desc)); + } + for (auto dst_fb : *fb_def->dst_tensors()) { + TensorDescriptor desc; + Decode(dst_fb, &desc); + def->dst_tensors.push_back(std::move(desc)); + } + def->precision = ToEnum(fb_def->precision()); +} + +flatbuffers::Offset Encode( + const TensorDescriptor& desc, const ValueId& id, + flatbuffers::FlatBufferBuilder* builder) { + auto desc_fb = Encode(desc, builder); + data::TensorDescWithIdBuilder desc_builder(*builder); + desc_builder.add_desc(desc_fb); + desc_builder.add_id(id); + return desc_builder.Finish(); +} + +void Decode(const data::TensorDescWithId* fb_desc, TensorDescriptor* desc, + ValueId* id) { + Decode(fb_desc->desc(), desc); + *id = fb_desc->id(); +} + +absl::Status Decode(CLContext* context, const data::Arguments* fb_args, + Arguments* args) { + args->shared_int4s_data_ = std::vector( + fb_args->shared_int4s()->data(), + fb_args->shared_int4s()->data() + fb_args->shared_int4s()->size()); + + args->shared_float4s_data_ = std::vector( + fb_args->shared_float4s()->data(), + fb_args->shared_float4s()->data() + fb_args->shared_float4s()->size()); + + std::vector tmp = std::vector( + fb_args->shared_half4s()->data(), + fb_args->shared_half4s()->data() + fb_args->shared_half4s()->size()); + + args->shared_half4s_data_.resize(tmp.size()); + for (int i = 0; i < tmp.size(); ++i) { + args->shared_half4s_data_[i] = tmp[i]; + } + + args->int_values_.clear(); + for (auto int_values_fb : *fb_args->int_values()) { + Arguments::IntValue value; + value.value = int_values_fb->value(); + value.offset = int_values_fb->offset(); + value.active = int_values_fb->active(); + std::string name(int_values_fb->name()->c_str(), + int_values_fb->name()->size()); + args->int_values_[name] = value; + } + + args->float_values_.clear(); + for (auto float_values_fb : *fb_args->float_values()) { + Arguments::FloatValue value; + value.value = float_values_fb->value(); + value.offset = float_values_fb->offset(); + value.active = float_values_fb->active(); + std::string name(float_values_fb->name()->c_str(), + float_values_fb->name()->size()); + args->float_values_[name] = value; + } + + args->half_values_.clear(); + for (auto half_values_fb : *fb_args->half_values()) { + Arguments::HalfValue value; + value.value = half_values_fb->value(); + value.offset = half_values_fb->offset(); + value.active = half_values_fb->active(); + value.store_as_f32 = half_values_fb->store_as_f32(); + std::string name(half_values_fb->name()->c_str(), + half_values_fb->name()->size()); + args->half_values_[name] = value; + } + + for (auto buffer_pair_fb : *fb_args->buffer_objects()) { + std::string key(buffer_pair_fb->key()->c_str(), + buffer_pair_fb->key()->size()); + BufferDescriptor desc; + Decode(buffer_pair_fb->value(), &desc); + args->AddObject(key, absl::make_unique(std::move(desc))); + } + + for (auto texture_pair_fb : *fb_args->texture2d_objects()) { + std::string key(texture_pair_fb->key()->c_str(), + texture_pair_fb->key()->size()); + Texture2DDescriptor desc; + Decode(texture_pair_fb->value(), &desc); + args->AddObject(key, + absl::make_unique(std::move(desc))); + } + + for (auto tensor_pair_fb : *fb_args->tensor_linear_objects()) { + std::string key(tensor_pair_fb->key()->c_str(), + tensor_pair_fb->key()->size()); + TensorLinearDescriptor desc; + Decode(tensor_pair_fb->value(), &desc); + args->AddObject(key, + absl::make_unique(std::move(desc))); + } + + for (auto tensor_pair_fb : *fb_args->tensor_objects()) { + std::string key(tensor_pair_fb->key()->c_str(), + tensor_pair_fb->key()->size()); + TensorDescriptor desc; + Decode(tensor_pair_fb->value(), &desc); + args->AddObject(key, absl::make_unique(std::move(desc))); + } + + for (auto buffer_pair_fb : *fb_args->buffer_refs()) { + std::string key(buffer_pair_fb->key()->c_str(), + buffer_pair_fb->key()->size()); + BufferDescriptor desc; + Decode(buffer_pair_fb->value(), &desc); + auto access_type = desc.GetAccess(); + args->AddObjectRef(key, access_type, + absl::make_unique(std::move(desc))); + } + + for (auto texture_pair_fb : *fb_args->texture2d_refs()) { + std::string key(texture_pair_fb->key()->c_str(), + texture_pair_fb->key()->size()); + Texture2DDescriptor desc; + Decode(texture_pair_fb->value(), &desc); + auto access_type = desc.GetAccess(); + args->AddObjectRef(key, access_type, + absl::make_unique(std::move(desc))); + } + + for (auto tensor_pair_fb : *fb_args->tensor_linear_refs()) { + std::string key(tensor_pair_fb->key()->c_str(), + tensor_pair_fb->key()->size()); + TensorLinearDescriptor desc; + Decode(tensor_pair_fb->value(), &desc); + auto access_type = desc.GetAccess(); + args->AddObjectRef( + key, access_type, + absl::make_unique(std::move(desc))); + } + + for (auto tensor_pair_fb : *fb_args->tensor_refs()) { + std::string key(tensor_pair_fb->key()->c_str(), + tensor_pair_fb->key()->size()); + TensorDescriptor desc; + Decode(tensor_pair_fb->value(), &desc); + auto access_type = desc.GetAccess(); + args->AddObjectRef(key, access_type, + absl::make_unique(std::move(desc))); + } + + RETURN_IF_ERROR(args->AllocateObjects(context)); + RETURN_IF_ERROR(args->AddObjectArgs()); + return absl::OkStatus(); +} + +flatbuffers::Offset Encode( + const Arguments& args, flatbuffers::FlatBufferBuilder* builder) { + std::vector> int_values_fb; + for (auto& value : args.int_values_) { + auto name_fb = builder->CreateString(value.first); + data::IntValueBuilder value_builder(*builder); + value_builder.add_name(name_fb); + value_builder.add_value(value.second.value); + value_builder.add_offset(value.second.offset); + value_builder.add_active(value.second.active); + int_values_fb.push_back(value_builder.Finish()); + } + + std::vector> float_values_fb; + for (auto& value : args.float_values_) { + auto name_fb = builder->CreateString(value.first); + data::FloatValueBuilder value_builder(*builder); + value_builder.add_name(name_fb); + value_builder.add_value(value.second.value); + value_builder.add_offset(value.second.offset); + value_builder.add_active(value.second.active); + float_values_fb.push_back(value_builder.Finish()); + } + + std::vector> half_values_fb; + for (auto& value : args.half_values_) { + auto name_fb = builder->CreateString(value.first); + data::HalfValueBuilder value_builder(*builder); + value_builder.add_name(name_fb); + value_builder.add_value(value.second.value); + value_builder.add_offset(value.second.offset); + value_builder.add_active(value.second.active); + value_builder.add_store_as_f32(value.second.store_as_f32); + half_values_fb.push_back(value_builder.Finish()); + } + + std::vector> + buffer_objs_fb; + for (auto& value : args.objects_) { + const auto* buffer_desc = + dynamic_cast(value.second.descriptor.get()); + if (!buffer_desc) continue; + auto desc_fb = Encode(*buffer_desc, builder); + auto key_fb = builder->CreateString(value.first); + data::BufferDescriptorMapValueBuilder buf_map_builder(*builder); + buf_map_builder.add_key(key_fb); + buf_map_builder.add_value(desc_fb); + buffer_objs_fb.push_back(buf_map_builder.Finish()); + } + std::vector> + texture2d_objs_fb; + for (auto& value : args.objects_) { + const auto* texture_desc = + dynamic_cast(value.second.descriptor.get()); + if (!texture_desc) continue; + auto desc_fb = Encode(*texture_desc, builder); + auto key_fb = builder->CreateString(value.first); + data::Texture2DDescriptorMapValueBuilder tex_map_builder(*builder); + tex_map_builder.add_key(key_fb); + tex_map_builder.add_value(desc_fb); + texture2d_objs_fb.push_back(tex_map_builder.Finish()); + } + std::vector> + tensor_linear_objs_fb; + for (auto& value : args.objects_) { + const auto* tensor_desc = dynamic_cast( + value.second.descriptor.get()); + if (!tensor_desc) continue; + auto desc_fb = Encode(*tensor_desc, builder); + auto key_fb = builder->CreateString(value.first); + data::TensorLinearDescriptorMapValueBuilder ten_map_builder(*builder); + ten_map_builder.add_key(key_fb); + ten_map_builder.add_value(desc_fb); + tensor_linear_objs_fb.push_back(ten_map_builder.Finish()); + } + std::vector> + tensor_objs_fb; + for (auto& value : args.objects_) { + const auto* tensor_desc = + dynamic_cast(value.second.descriptor.get()); + if (!tensor_desc) continue; + auto desc_fb = Encode(*tensor_desc, builder); + auto key_fb = builder->CreateString(value.first); + data::TensorDescriptorMapValueBuilder ten_map_builder(*builder); + ten_map_builder.add_key(key_fb); + ten_map_builder.add_value(desc_fb); + tensor_objs_fb.push_back(ten_map_builder.Finish()); + } + + std::vector> + buffer_refs_fb; + for (auto& value : args.object_refs_) { + const auto* buffer_desc = + dynamic_cast(value.second.descriptor.get()); + if (!buffer_desc) continue; + auto desc_fb = Encode(*buffer_desc, builder); + auto key_fb = builder->CreateString(value.first); + data::BufferDescriptorMapValueBuilder buf_map_builder(*builder); + buf_map_builder.add_key(key_fb); + buf_map_builder.add_value(desc_fb); + buffer_refs_fb.push_back(buf_map_builder.Finish()); + } + std::vector> + texture2d_refs_fb; + for (auto& value : args.object_refs_) { + const auto* texture_desc = + dynamic_cast(value.second.descriptor.get()); + if (!texture_desc) continue; + auto desc_fb = Encode(*texture_desc, builder); + auto key_fb = builder->CreateString(value.first); + data::Texture2DDescriptorMapValueBuilder tex_map_builder(*builder); + tex_map_builder.add_key(key_fb); + tex_map_builder.add_value(desc_fb); + texture2d_refs_fb.push_back(tex_map_builder.Finish()); + } + std::vector> + tensor_linear_refs_fb; + for (auto& value : args.object_refs_) { + const auto* tensor_desc = dynamic_cast( + value.second.descriptor.get()); + if (!tensor_desc) continue; + auto desc_fb = Encode(*tensor_desc, builder); + auto key_fb = builder->CreateString(value.first); + data::TensorLinearDescriptorMapValueBuilder ten_map_builder(*builder); + ten_map_builder.add_key(key_fb); + ten_map_builder.add_value(desc_fb); + tensor_linear_refs_fb.push_back(ten_map_builder.Finish()); + } + std::vector> + tensor_refs_fb; + for (auto& value : args.object_refs_) { + const auto* tensor_desc = + dynamic_cast(value.second.descriptor.get()); + if (!tensor_desc) continue; + auto desc_fb = Encode(*tensor_desc, builder); + auto key_fb = builder->CreateString(value.first); + data::TensorDescriptorMapValueBuilder ten_map_builder(*builder); + ten_map_builder.add_key(key_fb); + ten_map_builder.add_value(desc_fb); + tensor_refs_fb.push_back(ten_map_builder.Finish()); + } + + auto shared_int4s_data_fb = builder->CreateVector(args.shared_int4s_data_); + auto shared_float4s_data_fb = + builder->CreateVector(args.shared_float4s_data_); + std::vector tmp(args.shared_half4s_data_.size()); + for (int i = 0; i < tmp.size(); ++i) { + tmp[i] = args.shared_half4s_data_[i]; + } + auto shared_half4s_data_fb = builder->CreateVector(tmp); + auto int_values_fb_vec = builder->CreateVector(int_values_fb); + auto float_values_fb_vec = builder->CreateVector(float_values_fb); + auto half_values_fb_vec = builder->CreateVector(half_values_fb); + auto buffer_objs_fb_vec = builder->CreateVector(buffer_objs_fb); + auto texture2d_objs_fb_vec = builder->CreateVector(texture2d_objs_fb); + auto tensor_linear_objs_fb_vec = builder->CreateVector(tensor_linear_objs_fb); + auto tensor_objs_fb_vec = builder->CreateVector(tensor_objs_fb); + auto buffer_refs_fb_vec = builder->CreateVector(buffer_refs_fb); + auto texture2d_refs_fb_vec = builder->CreateVector(texture2d_refs_fb); + auto tensor_linear_refs_fb_vec = builder->CreateVector(tensor_linear_refs_fb); + auto tensor_refs_fb_vec = builder->CreateVector(tensor_refs_fb); + data::ArgumentsBuilder arguments_builder(*builder); + arguments_builder.add_shared_int4s(shared_int4s_data_fb); + arguments_builder.add_shared_float4s(shared_float4s_data_fb); + arguments_builder.add_shared_half4s(shared_half4s_data_fb); + arguments_builder.add_int_values(int_values_fb_vec); + arguments_builder.add_float_values(float_values_fb_vec); + arguments_builder.add_half_values(half_values_fb_vec); + arguments_builder.add_buffer_objects(buffer_objs_fb_vec); + arguments_builder.add_texture2d_objects(texture2d_objs_fb_vec); + arguments_builder.add_tensor_linear_objects(tensor_linear_objs_fb_vec); + arguments_builder.add_tensor_objects(tensor_objs_fb_vec); + arguments_builder.add_buffer_refs(buffer_refs_fb_vec); + arguments_builder.add_texture2d_refs(texture2d_refs_fb_vec); + arguments_builder.add_tensor_linear_refs(tensor_linear_refs_fb_vec); + arguments_builder.add_tensor_refs(tensor_refs_fb_vec); + return arguments_builder.Finish(); +} + +absl::Status Decode(CLContext* context, const data::GPUOperation* fb_op, + GPUOperation* op) { + RETURN_IF_ERROR(Decode(context, fb_op->arguments(), &op->args_)); + op->code_ = std::string(fb_op->code()->c_str(), fb_op->code()->size()); + op->work_group_size_.x = fb_op->work_group_size()->x(); + op->work_group_size_.y = fb_op->work_group_size()->y(); + op->work_group_size_.z = fb_op->work_group_size()->z(); + for (auto option_fb : *fb_op->compiler_options()) { + op->compiler_options_.push_back(ToEnum(option_fb->option())); + } + op->tensor_to_grid_ = ToEnum(fb_op->tensor_to_grid()); + op->elementwise_ = fb_op->elementwise(); + op->linkable_ = fb_op->linkable(); + op->check_src_channels_size_ = fb_op->check_src_channels_size(); + Decode(fb_op->definition(), &op->definition_); + op->grid_dimension_ = fb_op->grid_dimension(); + op->work_group_launch_order_.x = fb_op->work_group_launch_order()->x(); + op->work_group_launch_order_.y = fb_op->work_group_launch_order()->y(); + op->work_group_launch_order_.z = fb_op->work_group_launch_order()->z(); + op->grid_size_.x = fb_op->grid_size()->x(); + op->grid_size_.y = fb_op->grid_size()->y(); + op->grid_size_.z = fb_op->grid_size()->z(); + for (auto name_fb : *fb_op->src_tensors_names()) { + std::string name(name_fb->c_str(), name_fb->size()); + op->src_tensors_names_.push_back(std::move(name)); + } + for (auto name_fb : *fb_op->dst_tensors_names()) { + std::string name(name_fb->c_str(), name_fb->size()); + op->dst_tensors_names_.push_back(std::move(name)); + } + op->work_groups_count_.x = fb_op->work_groups_count()->x(); + op->work_groups_count_.y = fb_op->work_groups_count()->y(); + op->work_groups_count_.z = fb_op->work_groups_count()->z(); + op->linkable_count_ = fb_op->linkable_count(); + op->elementwise_code_ = std::string(fb_op->elementwise_code()->c_str(), + fb_op->elementwise_code()->size()); + return absl::OkStatus(); +} + +flatbuffers::Offset Encode( + const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder) { + auto args_fb = Encode(op.args_, builder); + auto code_fb = builder->CreateString(op.code_); + auto work_group_size_fb = Encode(op.work_group_size_, builder); + std::vector> compiler_options_fb; + for (int i = 0; i < op.compiler_options_.size(); ++i) { + data::CompilerOptionBuilder option_builder(*builder); + option_builder.add_option(ToFB(op.compiler_options_[i])); + compiler_options_fb.push_back(option_builder.Finish()); + } + auto compiler_options_fb_vec = builder->CreateVector(compiler_options_fb); + + auto def_fb = Encode(op.definition_, builder); + auto work_group_launch_order_fb = + Encode(op.work_group_launch_order_, builder); + auto grid_size_fb = Encode(op.grid_size_, builder); + auto work_groups_count_fb = Encode(op.work_groups_count_, builder); + + std::vector> src_names_fb; + for (auto& name : op.src_tensors_names_) { + src_names_fb.push_back(builder->CreateString(name)); + } + auto src_names_fb_vec = builder->CreateVector(src_names_fb); + + std::vector> dst_names_fb; + for (auto& name : op.dst_tensors_names_) { + dst_names_fb.push_back(builder->CreateString(name)); + } + auto dst_names_fb_vec = builder->CreateVector(dst_names_fb); + + auto elementwise_code_fb = builder->CreateString(op.elementwise_code_); + + data::GPUOperationBuilder op_builder(*builder); + op_builder.add_arguments(args_fb); + op_builder.add_code(code_fb); + op_builder.add_work_group_size(work_group_size_fb); + op_builder.add_compiler_options(compiler_options_fb_vec); + op_builder.add_tensor_to_grid(ToFB(op.tensor_to_grid_)); + op_builder.add_elementwise(op.elementwise_); + op_builder.add_linkable(op.linkable_); + op_builder.add_check_src_channels_size(op.check_src_channels_size_); + op_builder.add_definition(def_fb); + op_builder.add_grid_dimension(op.grid_dimension_); + op_builder.add_work_group_launch_order(work_group_launch_order_fb); + op_builder.add_grid_size(grid_size_fb); + op_builder.add_src_tensors_names(src_names_fb_vec); + op_builder.add_dst_tensors_names(dst_names_fb_vec); + op_builder.add_work_groups_count(work_groups_count_fb); + op_builder.add_linkable_count(op.linkable_count_); + op_builder.add_elementwise_code(elementwise_code_fb); + return op_builder.Finish(); +} + +flatbuffers::Offset Encode( + const CLNode& node, flatbuffers::FlatBufferBuilder* builder) { + auto op_fb = Encode(*node.operation, builder); + std::vector in_ids(node.inputs.size()); + for (int i = 0; i < in_ids.size(); ++i) { + in_ids[i] = node.inputs[i]; + } + std::vector out_ids(node.outputs.size()); + for (int i = 0; i < out_ids.size(); ++i) { + out_ids[i] = node.outputs[i]; + } + auto in_ids_fb = builder->CreateVector(in_ids); + auto out_ids_fb = builder->CreateVector(out_ids); + auto name_fb = builder->CreateString(node.name); + data::CLNodeBuilder node_builder(*builder); + node_builder.add_gpu_op(op_fb); + node_builder.add_input_ids(in_ids_fb); + node_builder.add_output_ids(out_ids_fb); + node_builder.add_name(name_fb); + return node_builder.Finish(); +} + +absl::Status Decode(CLContext* context, const data::CLNode* fb_node, + CLNode* node) { + GPUOperation op; + RETURN_IF_ERROR(Decode(context, fb_node->gpu_op(), &op)); + node->operation = absl::make_unique(std::move(op)); + for (auto in_fb : *fb_node->input_ids()) { + node->inputs.push_back(in_fb); + } + for (auto out_fb : *fb_node->output_ids()) { + node->outputs.push_back(out_fb); + } + node->name = std::string(fb_node->name()->c_str(), fb_node->name()->size()); + + return absl::OkStatus(); +} + +flatbuffers::Offset Encode( + const InferenceContext& inference, + flatbuffers::FlatBufferBuilder* builder) { + std::vector in_ids(inference.input_ids_.size()); + for (int i = 0; i < in_ids.size(); ++i) { + in_ids[i] = inference.input_ids_[i]; + } + std::vector out_ids(inference.output_ids_.size()); + for (int i = 0; i < out_ids.size(); ++i) { + out_ids[i] = inference.output_ids_[i]; + } + auto in_ids_fb = builder->CreateVector(in_ids); + auto out_ids_fb = builder->CreateVector(out_ids); + + std::vector> nodes_fb; + for (int i = 0; i < inference.nodes_.size(); ++i) { + auto node_fb = Encode(inference.nodes_[i], builder); + nodes_fb.push_back(node_fb); + } + auto nodes_fb_vec = builder->CreateVector(nodes_fb); + + std::vector> tensors_fb; + auto tensors = inference.tensor_reserver_.GetTensorDescs(); + for (auto& tensor : tensors) { + auto tensor_fb = Encode(tensor.second, tensor.first, builder); + tensors_fb.push_back(tensor_fb); + } + auto tensors_fb_vec = builder->CreateVector(tensors_fb); + + std::vector> + variable_ids_and_refs_fb; + for (auto& pair : inference.variable_ids_and_refs_) { + data::PairOfValueIdsBuilder pair_builder(*builder); + pair_builder.add_first(pair.first); + pair_builder.add_second(pair.second); + variable_ids_and_refs_fb.push_back(pair_builder.Finish()); + } + auto variable_ids_and_refs_fb_vec = + builder->CreateVector(variable_ids_and_refs_fb); + + data::InferenceContextBuilder inf_builder(*builder); + inf_builder.add_need_flush(inference.need_flush_); + inf_builder.add_flush_periodically(inference.flush_periodically_); + inf_builder.add_flush_period(inference.flush_period_); + inf_builder.add_need_manual_release(inference.need_manual_release_); + inf_builder.add_precision(ToFB(inference.precision_)); + inf_builder.add_storage_type(ToFB(inference.storage_type_)); + inf_builder.add_nodes(nodes_fb_vec); + inf_builder.add_tensors(tensors_fb_vec); + inf_builder.add_input_ids(in_ids_fb); + inf_builder.add_output_ids(out_ids_fb); + inf_builder.add_variable_ids_and_refs(variable_ids_and_refs_fb_vec); + return inf_builder.Finish(); +} + +absl::Status Decode(CLContext* context, + const data::InferenceContext* fb_inference, + InferenceContext* inference) { + inference->need_flush_ = fb_inference->need_flush(); + inference->flush_periodically_ = fb_inference->flush_periodically(); + inference->flush_period_ = fb_inference->flush_period(); + inference->need_manual_release_ = fb_inference->need_manual_release(); + inference->precision_ = ToEnum(fb_inference->precision()); + inference->storage_type_ = ToEnum(fb_inference->storage_type()); + + inference->nodes_.resize(fb_inference->nodes()->size()); + int counter = 0; + for (auto node_fb : *fb_inference->nodes()) { + RETURN_IF_ERROR(Decode(context, node_fb, &inference->nodes_[counter])); + counter++; + } + + std::vector> tensors; + for (auto tensor_fb : *fb_inference->tensors()) { + TensorDescriptor desc; + Decode(tensor_fb->desc(), &desc); + tensors.push_back({tensor_fb->id(), std::move(desc)}); + } + inference->tensor_reserver_.Add(tensors); + for (auto in_fb : *fb_inference->input_ids()) { + inference->input_ids_.push_back(in_fb); + } + for (auto out_fb : *fb_inference->output_ids()) { + inference->output_ids_.push_back(out_fb); + } + + for (auto variable_id : *fb_inference->variable_ids_and_refs()) { + inference->variable_ids_and_refs_[variable_id->first()] = + variable_id->second(); + } + return absl::OkStatus(); +} + +} // namespace cl +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/serialization.fbs b/tensorflow/lite/delegates/gpu/cl/serialization.fbs new file mode 100644 index 00000000000..0c0d2241b5a --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/serialization.fbs @@ -0,0 +1,278 @@ +// Copyright 2020 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace tflite.gpu.cl.data; + +table Int4 { + x:int32; + y:int32; + z:int32; + w:int32; +} + +table Int3 { + x:int32; + y:int32; + z:int32; +} + +table Int2 { + x:int32; + y:int32; +} + +table IntValue { + name:string; + value:int32; + active:bool; + offset:uint32; +} + +table FloatValue { + name:string; + value:float; + active:bool; + offset:uint32; +} + +table HalfValue { + name:string; + value:float; + active:bool; + store_as_f32:bool; + offset:uint32; +} + +enum AccessType : byte { + READ = 0, + WRITE = 1, + READ_WRITE = 2, +} + +enum DataType : byte { + UNKNOWN = 0, + FLOAT32 = 1, + FLOAT16 = 2, +} + +enum MemoryType : byte { + GLOBAL = 0, + CONSTANT = 1, + LOCAL = 2, +} + +table StateVariable { + key:string; + value:string; +} + +table GPUObjectDescriptor { + state_vars:[StateVariable]; + access_type:AccessType; +} + +table BufferDescriptor { + base_obj:GPUObjectDescriptor; + element_type:DataType; + element_size:int32; + memory_type:MemoryType; + attributes:[string]; + size:int32; + data:[uint8]; +} + +table Texture2DDescriptor { + base_obj:GPUObjectDescriptor; + element_type:DataType; + normalized:bool; + normalized_type:DataType; + size:Int2; + data:[uint8]; +} + +enum LinearStorageType : byte { + BUFFER = 0, + TEXTURE_2D = 1, +} + +table TensorLinearDescriptor { + base_obj:GPUObjectDescriptor; + storage_type:LinearStorageType; + element_type:DataType; + memory_type:MemoryType; + size:int32; + data:[uint8]; +} + +enum TensorStorageType : byte { + UNKNOWN = 0, + BUFFER = 1, + IMAGE_BUFFER = 2, + TEXTURE_2D = 3, + TEXTURE_3D = 4, + TEXTURE_ARRAY = 5, + SINGLE_TEXTURE_2D = 6, +} + +enum Layout : byte { + UNKNOWN = 0, + HWC = 1, + BHWC = 2, + HWDC = 3, + BHWDC = 4, +} + +table BHWDC { + b:int32; + h:int32; + w:int32; + d:int32; + c:int32; +} + +table TensorDescriptor { + base_obj:GPUObjectDescriptor; + data_type:DataType; + storage_type:TensorStorageType; + layout:Layout; + shape:BHWDC; + data:[uint8]; +} + +table BufferDescriptorMapValue { + key:string; + value:BufferDescriptor; +} + +table Texture2DDescriptorMapValue { + key:string; + value:Texture2DDescriptor; +} + +table TensorLinearDescriptorMapValue { + key:string; + value:TensorLinearDescriptor; +} + +table TensorDescriptorMapValue { + key:string; + value:TensorDescriptor; +} + +table Arguments { + int_values:[IntValue]; + shared_int4s:[int32]; + + float_values:[FloatValue]; + shared_float4s:[float]; + + half_values:[HalfValue]; + shared_half4s:[float]; + + buffer_refs:[BufferDescriptorMapValue]; + texture2d_refs:[Texture2DDescriptorMapValue]; + tensor_linear_refs:[TensorLinearDescriptorMapValue]; + tensor_refs:[TensorDescriptorMapValue]; + + buffer_objects:[BufferDescriptorMapValue]; + texture2d_objects:[Texture2DDescriptorMapValue]; + tensor_linear_objects:[TensorLinearDescriptorMapValue]; + tensor_objects:[TensorDescriptorMapValue]; +} + +enum CalculationsPrecision : byte { + F32 = 0, + F32_F16 = 1, + F16 = 2, +} + +enum TensorToGrid : byte { + CUSTOM = 0, + WB_TO_X_HD_TO_Y_S_TO_Z = 1, + WB_TO_X_HD_TO_Y_Z_IS_1 = 2, + WB_TO_X_H_TO_Y_D_TO_Z = 3, + B_TO_X_Y_IS_1_Z_IS_1 = 4, +} + +enum CompilerOptions : byte { + ADRENO_FULL_SIMD_LINE = 0, + ADRENO_MORE_WAVES = 1, + POWERVR_FP16 = 2, + CL_OPT_DISABLE = 3, + CL_2_0 = 4, + CL_3_0 = 5, +} + +table OperationDef { + precision:CalculationsPrecision; + src_tensors:[TensorDescriptor]; + dst_tensors:[TensorDescriptor]; +} + +table CompilerOption { + option:CompilerOptions; +} + +table GPUOperation { + arguments:Arguments; + code:string; + work_group_size:Int3; + compiler_options:[CompilerOption]; + tensor_to_grid:TensorToGrid; + elementwise:bool; + linkable:bool; + check_src_channels_size:bool; + definition:OperationDef; + grid_dimension:int32; + work_group_launch_order:Int3; + grid_size:Int3; + src_tensors_names:[string]; + dst_tensors_names:[string]; + work_groups_count:Int3; + linkable_count:int32; + elementwise_code:string; +} + +table TensorDescWithId { + desc:TensorDescriptor; + id:int32; +} + +table CLNode { + gpu_op:GPUOperation; + input_ids:[int32]; + output_ids:[int32]; + name:string; +} + +table PairOfValueIds { + first:int32; + second:int32; +} + +table InferenceContext { + need_flush:bool; + flush_periodically:bool; + flush_period:int32; + need_manual_release:bool; + precision:CalculationsPrecision; + storage_type:TensorStorageType; + nodes:[CLNode]; + tensors:[TensorDescWithId]; + input_ids:[int32]; + variable_ids_and_refs:[PairOfValueIds]; + output_ids:[int32]; +} + +root_type InferenceContext; diff --git a/tensorflow/lite/delegates/gpu/cl/serialization.h b/tensorflow/lite/delegates/gpu/cl/serialization.h new file mode 100644 index 00000000000..1273e62a100 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/serialization.h @@ -0,0 +1,42 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_ + +#include "absl/types/span.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" +#include "tensorflow/lite/delegates/gpu/cl/inference_context.h" +#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" + +namespace tflite { +namespace gpu { +namespace cl { + +class InferenceContext; + +flatbuffers::Offset Encode( + const InferenceContext& inference, flatbuffers::FlatBufferBuilder* builder); + +absl::Status Decode(CLContext* context, + const data::InferenceContext* fb_inference, + InferenceContext* inference); + +} // namespace cl +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_ diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.cc b/tensorflow/lite/delegates/gpu/cl/tensor.cc index 72c53c5b1ac..c35554b875b 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor.cc +++ b/tensorflow/lite/delegates/gpu/cl/tensor.cc @@ -605,8 +605,11 @@ absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc, descriptor_.layout = desc.layout; memory_owner_ = true; CLMemory memory; - RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_, - desc.data.data(), &memory)); + uint8_t* data_ptr = desc.data.empty() + ? nullptr + : const_cast(desc.data.data()); + RETURN_IF_ERROR( + AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory)); memory_ = memory.Release(); if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) { RETURN_IF_ERROR(CreateImageBufferFromBuffer(