Serialization of OpenCL InferenceContext.

PiperOrigin-RevId: 337185119
Change-Id: I3841fd093a692a4acd851792f723381fd29e53bc
This commit is contained in:
Raman Sarokin 2020-10-14 15:26:13 -07:00 committed by TensorFlower Gardener
parent 9318f787e6
commit 465aeca042
12 changed files with 1527 additions and 29 deletions

View File

@ -55,6 +55,7 @@ cc_library(
":cl_device", ":cl_device",
":gpu_object", ":gpu_object",
":opencl_wrapper", ":opencl_wrapper",
":serialization_cc_fbs",
":tensor_type", ":tensor_type",
":util", ":util",
"//tensorflow/lite/delegates/gpu/common:access_type", "//tensorflow/lite/delegates/gpu/common:access_type",
@ -358,6 +359,7 @@ cc_library(
deps = [ deps = [
":cl_context", ":cl_context",
":opencl_wrapper", ":opencl_wrapper",
":serialization_cc_fbs",
"//tensorflow/lite/delegates/gpu/common:access_type", "//tensorflow/lite/delegates/gpu/common:access_type",
"//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:data_type",
"//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:status",
@ -366,19 +368,30 @@ cc_library(
cc_library( cc_library(
name = "inference_context", name = "inference_context",
srcs = ["inference_context.cc"], srcs = [
hdrs = ["inference_context.h"], "inference_context.cc",
"serialization.cc",
],
hdrs = [
"inference_context.h",
"serialization.h",
],
deps = [ deps = [
":arguments",
":buffer", ":buffer",
":cl_command_queue", ":cl_command_queue",
":cl_context",
":cl_device", ":cl_device",
":environment", ":environment",
":gpu_object", ":gpu_object",
":linear_storage",
":model_hints", ":model_hints",
":opencl_wrapper", ":opencl_wrapper",
":precision", ":precision",
":serialization_cc_fbs",
":storage_type_util", ":storage_type_util",
":tensor_type", ":tensor_type",
":texture2d",
"//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation", "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
"//tensorflow/lite/delegates/gpu/cl/selectors:operation_selector", "//tensorflow/lite/delegates/gpu/cl/selectors:operation_selector",
"//tensorflow/lite/delegates/gpu/cl/selectors:special_selector", "//tensorflow/lite/delegates/gpu/cl/selectors:special_selector",
@ -396,6 +409,7 @@ cc_library(
"//tensorflow/lite/delegates/gpu/common/transformations:merge_padding_with", "//tensorflow/lite/delegates/gpu/common/transformations:merge_padding_with",
"@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/types:span",
], ],
) )
@ -467,6 +481,14 @@ cc_library(
], ],
) )
flatbuffer_cc_library(
name = "serialization_cc_fbs",
srcs = ["serialization.fbs"],
flatc_args = [
"--scoped-enums",
],
)
cc_library( cc_library(
name = "storage_type_util", name = "storage_type_util",
srcs = ["storage_type_util.cc"], srcs = ["storage_type_util.cc"],

View File

@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h" #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
#include "tensorflow/lite/delegates/gpu/cl/util.h" #include "tensorflow/lite/delegates/gpu/cl/util.h"
#include "tensorflow/lite/delegates/gpu/common/access_type.h" #include "tensorflow/lite/delegates/gpu/common/access_type.h"
#include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/status.h"
@ -77,6 +78,11 @@ class Arguments : public ArgumentsBinder {
~Arguments() override = default; ~Arguments() override = default;
private: private:
friend flatbuffers::Offset<data::Arguments> Encode(
const Arguments& args, flatbuffers::FlatBufferBuilder* builder);
friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
Arguments* args);
void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc); void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc); void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
void AddImage2DArray(const std::string& name, void AddImage2DArray(const std::string& name,

View File

@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
#include "tensorflow/lite/delegates/gpu/common/access_type.h" #include "tensorflow/lite/delegates/gpu/common/access_type.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/status.h"
@ -164,6 +165,10 @@ class GPUObjectDescriptor {
AccessType GetAccess() const { return access_type_; } AccessType GetAccess() const { return access_type_; }
protected: protected:
friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
friend void Decode(const data::GPUObjectDescriptor* fb_obj,
GPUObjectDescriptor* obj);
mutable std::map<std::string, std::string> state_vars_; mutable std::map<std::string, std::string> state_vars_;
AccessType access_type_; AccessType access_type_;
}; };

View File

@ -153,7 +153,7 @@ CLNode& CLNode::operator=(CLNode&& node) {
absl::Status InferenceContext::InitFromGraph( absl::Status InferenceContext::InitFromGraph(
const CreateInferenceInfo& create_info, const GraphFloat32& graph, const CreateInferenceInfo& create_info, const GraphFloat32& graph,
Environment* env) { Environment* env, std::vector<uint8_t>* serialized_model) {
CreationContext creation_context; CreationContext creation_context;
creation_context.device = env->GetDevicePtr(); creation_context.device = env->GetDevicePtr();
creation_context.context = &env->context(); creation_context.context = &env->context();
@ -182,10 +182,6 @@ absl::Status InferenceContext::InitFromGraph(
RETURN_IF_ERROR(Compile(creation_context)); RETURN_IF_ERROR(Compile(creation_context));
RETURN_IF_ERROR(UpdateParams()); RETURN_IF_ERROR(UpdateParams());
for (auto& node : nodes_) {
node.operation->args_.ReleaseCPURepresentation();
}
TuningParameters tuning_parameters; TuningParameters tuning_parameters;
tuning_parameters.queue = env->profiling_queue(); tuning_parameters.queue = env->profiling_queue();
tuning_parameters.info = &env->device().info_; tuning_parameters.info = &env->device().info_;
@ -201,14 +197,54 @@ absl::Status InferenceContext::InitFromGraph(
} }
} }
RETURN_IF_ERROR(Tune(tuning_parameters)); RETURN_IF_ERROR(Tune(tuning_parameters));
if (serialized_model) {
flatbuffers::FlatBufferBuilder builder;
auto encoded_fb = Encode(*this, &builder);
data::FinishInferenceContextBuffer(builder, encoded_fb);
serialized_model->resize(builder.GetSize());
std::memcpy(serialized_model->data(), builder.GetBufferPointer(),
builder.GetSize());
}
for (auto& node : nodes_) {
node.operation->args_.ReleaseCPURepresentation();
}
return absl::OkStatus();
}
absl::Status InferenceContext::RestoreDeserialized(
const std::vector<uint8_t>& serialized_model, Environment* env) {
flatbuffers::Verifier verifier(serialized_model.data(),
serialized_model.size());
if (!data::VerifyInferenceContextBuffer(verifier)) {
return absl::DataLossError("Deserialization failed.");
}
auto decoded_fb = data::GetInferenceContext(serialized_model.data());
RETURN_IF_ERROR(Decode(&env->context(), decoded_fb, this));
CreationContext creation_context;
creation_context.device = env->GetDevicePtr();
creation_context.context = &env->context();
creation_context.queue = env->queue();
creation_context.cache = env->program_cache();
RETURN_IF_ERROR(AllocateMemory(creation_context.context));
BindMemoryToOperations();
for (auto& node : nodes_) {
RETURN_IF_ERROR(node.operation->CompileDeserialized(creation_context));
}
RETURN_IF_ERROR(UpdateParams());
for (auto& node : nodes_) {
node.operation->args_.ReleaseCPURepresentation();
}
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status InferenceContext::InitFromGraphWithTransforms( absl::Status InferenceContext::InitFromGraphWithTransforms(
const CreateInferenceInfo& create_info, GraphFloat32* graph, const CreateInferenceInfo& create_info, GraphFloat32* graph,
Environment* env) { Environment* env, std::vector<uint8_t>* serialized_model) {
RETURN_IF_ERROR(RunGraphTransforms(graph)); RETURN_IF_ERROR(RunGraphTransforms(graph));
RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env)); RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env, serialized_model));
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -31,6 +31,7 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/cl/model_hints.h" #include "tensorflow/lite/delegates/gpu/cl/model_hints.h"
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
#include "tensorflow/lite/delegates/gpu/cl/precision.h" #include "tensorflow/lite/delegates/gpu/cl/precision.h"
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
#include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/model.h"
#include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/status.h"
@ -65,14 +66,15 @@ class InferenceContext {
}; };
absl::Status InitFromGraph(const CreateInferenceInfo& create_info, absl::Status InitFromGraph(const CreateInferenceInfo& create_info,
const GraphFloat32& graph, Environment* env); const GraphFloat32& graph, Environment* env,
std::vector<uint8_t>* serialized_model = nullptr);
// Applies OpenCL-specific transformations to the graph before the // Applies OpenCL-specific transformations to the graph before the
// initialization. These transformations are either impossible or useless in // initialization. These transformations are either impossible or useless in
// other backends. // other backends.
absl::Status InitFromGraphWithTransforms( absl::Status InitFromGraphWithTransforms(
const CreateInferenceInfo& create_info, GraphFloat32* graph, const CreateInferenceInfo& create_info, GraphFloat32* graph,
Environment* env); Environment* env, std::vector<uint8_t>* serialized_model = nullptr);
absl::Status AddToQueue(CLCommandQueue* queue); absl::Status AddToQueue(CLCommandQueue* queue);
absl::Status Profile(ProfilingCommandQueue* queue, ProfilingInfo* result); absl::Status Profile(ProfilingCommandQueue* queue, ProfilingInfo* result);
@ -92,9 +94,19 @@ class InferenceContext {
const std::vector<ValueId>& GetInputIds() const { return input_ids_; } const std::vector<ValueId>& GetInputIds() const { return input_ids_; }
const std::vector<ValueId>& GetOutputIds() const { return output_ids_; } const std::vector<ValueId>& GetOutputIds() const { return output_ids_; }
absl::Status RestoreDeserialized(const std::vector<uint8_t>& serialized_model,
Environment* env);
private: private:
enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 }; enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };
friend flatbuffers::Offset<data::InferenceContext> Encode(
const InferenceContext& inference,
flatbuffers::FlatBufferBuilder* builder);
friend absl::Status Decode(CLContext* context,
const data::InferenceContext* fb_inference,
InferenceContext* inference);
void CopyInAndOutIds(const GraphFloat32& graph); void CopyInAndOutIds(const GraphFloat32& graph);
absl::Status ConvertOperations(const DeviceInfo& device_info, absl::Status ConvertOperations(const DeviceInfo& device_info,
const GraphFloat32& graph, ModelHints hints); const GraphFloat32& graph, ModelHints hints);
@ -165,6 +177,32 @@ class InferenceContext {
void SetNext(ValueId id) { next_ = id; } void SetNext(ValueId id) { next_ = id; }
DummyTensor Get(ValueId id) { return reservations_[id]; } DummyTensor Get(ValueId id) { return reservations_[id]; }
std::vector<std::pair<ValueId, TensorDescriptor>> GetTensorDescs() const {
std::vector<std::pair<ValueId, TensorDescriptor>> result;
for (auto& v : reservations_) {
TensorDescriptor desc = v.second.descriptor;
desc.shape.b = v.second.shape.b;
desc.shape.h = v.second.shape.h;
desc.shape.w = v.second.shape.w;
desc.shape.d = 1;
desc.shape.c = v.second.shape.c;
result.push_back({v.first, desc});
}
return result;
}
void Add(const std::vector<std::pair<ValueId, TensorDescriptor>>& tensors) {
for (auto& v : tensors) {
DummyTensor dummy;
dummy.descriptor = v.second;
dummy.shape.b = v.second.shape.b;
dummy.shape.h = v.second.shape.h;
dummy.shape.w = v.second.shape.w;
dummy.shape.c = v.second.shape.c;
Add(v.first, dummy);
}
}
private: private:
absl::flat_hash_map<ValueId, DummyTensor> reservations_; absl::flat_hash_map<ValueId, DummyTensor> reservations_;
ValueId next_; ValueId next_;

View File

@ -651,6 +651,7 @@ cc_library(
"//tensorflow/lite/delegates/gpu/cl:device_info", "//tensorflow/lite/delegates/gpu/cl:device_info",
"//tensorflow/lite/delegates/gpu/cl:precision", "//tensorflow/lite/delegates/gpu/cl:precision",
"//tensorflow/lite/delegates/gpu/cl:program_cache", "//tensorflow/lite/delegates/gpu/cl:program_cache",
"//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs",
"//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/cl:tensor",
"//tensorflow/lite/delegates/gpu/cl:tensor_type", "//tensorflow/lite/delegates/gpu/cl:tensor_type",
"//tensorflow/lite/delegates/gpu/common:access_type", "//tensorflow/lite/delegates/gpu/common:access_type",

View File

@ -223,7 +223,8 @@ absl::Status GPUOperation::UpdateParams() {
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status GPUOperation::Compile(const CreationContext& creation_context) { absl::Status GPUOperation::AssembleCode(const DeviceInfo& device_info,
CLContext* context) {
if (elementwise_) { if (elementwise_) {
auto src_desc = auto src_desc =
absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]); absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
@ -241,28 +242,35 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor"); dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
std::string code =
GetElementWiseCode(definition_, check_src_channels_size_);
elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_; elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context)); code_ = GetElementWiseCode(definition_, check_src_channels_size_);
RETURN_IF_ERROR(args_.AllocateObjects(context));
RETURN_IF_ERROR(args_.TransformToCLCode( RETURN_IF_ERROR(args_.TransformToCLCode(
creation_context.device->info_, device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
{{dst_tensors_names_[0], elementwise_code_}}, &code));
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
*creation_context.device, &kernel_));
} else { } else {
RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context)); RETURN_IF_ERROR(args_.AllocateObjects(context));
RETURN_IF_ERROR(args_.TransformToCLCode( RETURN_IF_ERROR(args_.TransformToCLCode(
creation_context.device->info_, device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
{{dst_tensors_names_[0], elementwise_code_}}, &code_));
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
code_, "main_function", compiler_options_, *creation_context.context,
*creation_context.device, &kernel_));
} }
return absl::OkStatus();
}
absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
RETURN_IF_ERROR(
AssembleCode(creation_context.GetDeviceInfo(), creation_context.context));
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
code_, "main_function", compiler_options_, *creation_context.context,
*creation_context.device, &kernel_));
return PostCompileCheck(creation_context.device->info_, kernel_.info_); return PostCompileCheck(creation_context.device->info_, kernel_.info_);
} }
absl::Status GPUOperation::CompileDeserialized(
const CreationContext& creation_context) {
return creation_context.cache->GetOrCreateCLKernel(
code_, "main_function", compiler_options_, *creation_context.context,
*creation_context.device, &kernel_);
}
void GPUOperation::GetPossibleKernelWorkGroups( void GPUOperation::GetPossibleKernelWorkGroups(
TuningType tuning_type, const DeviceInfo& device_info, TuningType tuning_type, const DeviceInfo& device_info,
const KernelInfo& kernel_info, std::vector<int3>* work_groups) const { const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
@ -329,7 +337,7 @@ int3 GPUOperation::GetGridSize() const {
const int grid_z = 1; const int grid_z = 1;
return int3(grid_x, grid_y, grid_z); return int3(grid_x, grid_y, grid_z);
} }
return int3(0, 0, 0); return grid_size_;
} }
void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) { void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) {

View File

@ -30,6 +30,7 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
#include "tensorflow/lite/delegates/gpu/cl/precision.h" #include "tensorflow/lite/delegates/gpu/cl/precision.h"
#include "tensorflow/lite/delegates/gpu/cl/program_cache.h" #include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor.h" #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h"
@ -129,8 +130,12 @@ class GPUOperation {
absl::Status Tune(const TuningParameters& params); absl::Status Tune(const TuningParameters& params);
absl::Status AssembleCode(const DeviceInfo& device_info, CLContext* context);
absl::Status Compile(const CreationContext& creation_context); absl::Status Compile(const CreationContext& creation_context);
absl::Status CompileDeserialized(const CreationContext& creation_context);
virtual absl::Status PostCompileCheck(const DeviceInfo& device_info, virtual absl::Status PostCompileCheck(const DeviceInfo& device_info,
const KernelInfo& kernel_info) { const KernelInfo& kernel_info) {
return absl::OkStatus(); return absl::OkStatus();
@ -164,6 +169,11 @@ class GPUOperation {
bool check_src_channels_size_ = false; bool check_src_channels_size_ = false;
protected: protected:
friend flatbuffers::Offset<data::GPUOperation> Encode(
const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
friend absl::Status Decode(CLContext* context,
const data::GPUOperation* fb_op, GPUOperation* op);
virtual absl::Status BindArguments(ArgumentsBinder* args) { virtual absl::Status BindArguments(ArgumentsBinder* args) {
return absl::OkStatus(); return absl::OkStatus();
} }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,278 @@
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
namespace tflite.gpu.cl.data;
table Int4 {
x:int32;
y:int32;
z:int32;
w:int32;
}
table Int3 {
x:int32;
y:int32;
z:int32;
}
table Int2 {
x:int32;
y:int32;
}
table IntValue {
name:string;
value:int32;
active:bool;
offset:uint32;
}
table FloatValue {
name:string;
value:float;
active:bool;
offset:uint32;
}
table HalfValue {
name:string;
value:float;
active:bool;
store_as_f32:bool;
offset:uint32;
}
enum AccessType : byte {
READ = 0,
WRITE = 1,
READ_WRITE = 2,
}
enum DataType : byte {
UNKNOWN = 0,
FLOAT32 = 1,
FLOAT16 = 2,
}
enum MemoryType : byte {
GLOBAL = 0,
CONSTANT = 1,
LOCAL = 2,
}
table StateVariable {
key:string;
value:string;
}
table GPUObjectDescriptor {
state_vars:[StateVariable];
access_type:AccessType;
}
table BufferDescriptor {
base_obj:GPUObjectDescriptor;
element_type:DataType;
element_size:int32;
memory_type:MemoryType;
attributes:[string];
size:int32;
data:[uint8];
}
table Texture2DDescriptor {
base_obj:GPUObjectDescriptor;
element_type:DataType;
normalized:bool;
normalized_type:DataType;
size:Int2;
data:[uint8];
}
enum LinearStorageType : byte {
BUFFER = 0,
TEXTURE_2D = 1,
}
table TensorLinearDescriptor {
base_obj:GPUObjectDescriptor;
storage_type:LinearStorageType;
element_type:DataType;
memory_type:MemoryType;
size:int32;
data:[uint8];
}
enum TensorStorageType : byte {
UNKNOWN = 0,
BUFFER = 1,
IMAGE_BUFFER = 2,
TEXTURE_2D = 3,
TEXTURE_3D = 4,
TEXTURE_ARRAY = 5,
SINGLE_TEXTURE_2D = 6,
}
enum Layout : byte {
UNKNOWN = 0,
HWC = 1,
BHWC = 2,
HWDC = 3,
BHWDC = 4,
}
table BHWDC {
b:int32;
h:int32;
w:int32;
d:int32;
c:int32;
}
table TensorDescriptor {
base_obj:GPUObjectDescriptor;
data_type:DataType;
storage_type:TensorStorageType;
layout:Layout;
shape:BHWDC;
data:[uint8];
}
table BufferDescriptorMapValue {
key:string;
value:BufferDescriptor;
}
table Texture2DDescriptorMapValue {
key:string;
value:Texture2DDescriptor;
}
table TensorLinearDescriptorMapValue {
key:string;
value:TensorLinearDescriptor;
}
table TensorDescriptorMapValue {
key:string;
value:TensorDescriptor;
}
table Arguments {
int_values:[IntValue];
shared_int4s:[int32];
float_values:[FloatValue];
shared_float4s:[float];
half_values:[HalfValue];
shared_half4s:[float];
buffer_refs:[BufferDescriptorMapValue];
texture2d_refs:[Texture2DDescriptorMapValue];
tensor_linear_refs:[TensorLinearDescriptorMapValue];
tensor_refs:[TensorDescriptorMapValue];
buffer_objects:[BufferDescriptorMapValue];
texture2d_objects:[Texture2DDescriptorMapValue];
tensor_linear_objects:[TensorLinearDescriptorMapValue];
tensor_objects:[TensorDescriptorMapValue];
}
enum CalculationsPrecision : byte {
F32 = 0,
F32_F16 = 1,
F16 = 2,
}
enum TensorToGrid : byte {
CUSTOM = 0,
WB_TO_X_HD_TO_Y_S_TO_Z = 1,
WB_TO_X_HD_TO_Y_Z_IS_1 = 2,
WB_TO_X_H_TO_Y_D_TO_Z = 3,
B_TO_X_Y_IS_1_Z_IS_1 = 4,
}
enum CompilerOptions : byte {
ADRENO_FULL_SIMD_LINE = 0,
ADRENO_MORE_WAVES = 1,
POWERVR_FP16 = 2,
CL_OPT_DISABLE = 3,
CL_2_0 = 4,
CL_3_0 = 5,
}
table OperationDef {
precision:CalculationsPrecision;
src_tensors:[TensorDescriptor];
dst_tensors:[TensorDescriptor];
}
table CompilerOption {
option:CompilerOptions;
}
table GPUOperation {
arguments:Arguments;
code:string;
work_group_size:Int3;
compiler_options:[CompilerOption];
tensor_to_grid:TensorToGrid;
elementwise:bool;
linkable:bool;
check_src_channels_size:bool;
definition:OperationDef;
grid_dimension:int32;
work_group_launch_order:Int3;
grid_size:Int3;
src_tensors_names:[string];
dst_tensors_names:[string];
work_groups_count:Int3;
linkable_count:int32;
elementwise_code:string;
}
table TensorDescWithId {
desc:TensorDescriptor;
id:int32;
}
table CLNode {
gpu_op:GPUOperation;
input_ids:[int32];
output_ids:[int32];
name:string;
}
table PairOfValueIds {
first:int32;
second:int32;
}
table InferenceContext {
need_flush:bool;
flush_periodically:bool;
flush_period:int32;
need_manual_release:bool;
precision:CalculationsPrecision;
storage_type:TensorStorageType;
nodes:[CLNode];
tensors:[TensorDescWithId];
input_ids:[int32];
variable_ids_and_refs:[PairOfValueIds];
output_ids:[int32];
}
root_type InferenceContext;

View File

@ -0,0 +1,42 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
#include "absl/types/span.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
namespace tflite {
namespace gpu {
namespace cl {
class InferenceContext;
flatbuffers::Offset<data::InferenceContext> Encode(
const InferenceContext& inference, flatbuffers::FlatBufferBuilder* builder);
absl::Status Decode(CLContext* context,
const data::InferenceContext* fb_inference,
InferenceContext* inference);
} // namespace cl
} // namespace gpu
} // namespace tflite
#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_

View File

@ -605,8 +605,11 @@ absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
descriptor_.layout = desc.layout; descriptor_.layout = desc.layout;
memory_owner_ = true; memory_owner_ = true;
CLMemory memory; CLMemory memory;
RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_, uint8_t* data_ptr = desc.data.empty()
desc.data.data(), &memory)); ? nullptr
: const_cast<unsigned char*>(desc.data.data());
RETURN_IF_ERROR(
AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
memory_ = memory.Release(); memory_ = memory.Release();
if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) { if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) {
RETURN_IF_ERROR(CreateImageBufferFromBuffer( RETURN_IF_ERROR(CreateImageBufferFromBuffer(