Serialization of OpenCL InferenceContext.
PiperOrigin-RevId: 337185119 Change-Id: I3841fd093a692a4acd851792f723381fd29e53bc
This commit is contained in:
parent
9318f787e6
commit
465aeca042
@ -55,6 +55,7 @@ cc_library(
|
||||
":cl_device",
|
||||
":gpu_object",
|
||||
":opencl_wrapper",
|
||||
":serialization_cc_fbs",
|
||||
":tensor_type",
|
||||
":util",
|
||||
"//tensorflow/lite/delegates/gpu/common:access_type",
|
||||
@ -358,6 +359,7 @@ cc_library(
|
||||
deps = [
|
||||
":cl_context",
|
||||
":opencl_wrapper",
|
||||
":serialization_cc_fbs",
|
||||
"//tensorflow/lite/delegates/gpu/common:access_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
@ -366,19 +368,30 @@ cc_library(
|
||||
|
||||
cc_library(
|
||||
name = "inference_context",
|
||||
srcs = ["inference_context.cc"],
|
||||
hdrs = ["inference_context.h"],
|
||||
srcs = [
|
||||
"inference_context.cc",
|
||||
"serialization.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"inference_context.h",
|
||||
"serialization.h",
|
||||
],
|
||||
deps = [
|
||||
":arguments",
|
||||
":buffer",
|
||||
":cl_command_queue",
|
||||
":cl_context",
|
||||
":cl_device",
|
||||
":environment",
|
||||
":gpu_object",
|
||||
":linear_storage",
|
||||
":model_hints",
|
||||
":opencl_wrapper",
|
||||
":precision",
|
||||
":serialization_cc_fbs",
|
||||
":storage_type_util",
|
||||
":tensor_type",
|
||||
":texture2d",
|
||||
"//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
|
||||
"//tensorflow/lite/delegates/gpu/cl/selectors:operation_selector",
|
||||
"//tensorflow/lite/delegates/gpu/cl/selectors:special_selector",
|
||||
@ -396,6 +409,7 @@ cc_library(
|
||||
"//tensorflow/lite/delegates/gpu/common/transformations:merge_padding_with",
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
"@com_google_absl//absl/container:flat_hash_set",
|
||||
"@com_google_absl//absl/types:span",
|
||||
],
|
||||
)
|
||||
|
||||
@ -467,6 +481,14 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
flatbuffer_cc_library(
|
||||
name = "serialization_cc_fbs",
|
||||
srcs = ["serialization.fbs"],
|
||||
flatc_args = [
|
||||
"--scoped-enums",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "storage_type_util",
|
||||
srcs = ["storage_type_util.cc"],
|
||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/access_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
@ -77,6 +78,11 @@ class Arguments : public ArgumentsBinder {
|
||||
~Arguments() override = default;
|
||||
|
||||
private:
|
||||
friend flatbuffers::Offset<data::Arguments> Encode(
|
||||
const Arguments& args, flatbuffers::FlatBufferBuilder* builder);
|
||||
friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
|
||||
Arguments* args);
|
||||
|
||||
void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
|
||||
void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
|
||||
void AddImage2DArray(const std::string& name,
|
||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/access_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
@ -164,6 +165,10 @@ class GPUObjectDescriptor {
|
||||
AccessType GetAccess() const { return access_type_; }
|
||||
|
||||
protected:
|
||||
friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
|
||||
const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
|
||||
friend void Decode(const data::GPUObjectDescriptor* fb_obj,
|
||||
GPUObjectDescriptor* obj);
|
||||
mutable std::map<std::string, std::string> state_vars_;
|
||||
AccessType access_type_;
|
||||
};
|
||||
|
@ -153,7 +153,7 @@ CLNode& CLNode::operator=(CLNode&& node) {
|
||||
|
||||
absl::Status InferenceContext::InitFromGraph(
|
||||
const CreateInferenceInfo& create_info, const GraphFloat32& graph,
|
||||
Environment* env) {
|
||||
Environment* env, std::vector<uint8_t>* serialized_model) {
|
||||
CreationContext creation_context;
|
||||
creation_context.device = env->GetDevicePtr();
|
||||
creation_context.context = &env->context();
|
||||
@ -182,10 +182,6 @@ absl::Status InferenceContext::InitFromGraph(
|
||||
RETURN_IF_ERROR(Compile(creation_context));
|
||||
RETURN_IF_ERROR(UpdateParams());
|
||||
|
||||
for (auto& node : nodes_) {
|
||||
node.operation->args_.ReleaseCPURepresentation();
|
||||
}
|
||||
|
||||
TuningParameters tuning_parameters;
|
||||
tuning_parameters.queue = env->profiling_queue();
|
||||
tuning_parameters.info = &env->device().info_;
|
||||
@ -201,14 +197,54 @@ absl::Status InferenceContext::InitFromGraph(
|
||||
}
|
||||
}
|
||||
RETURN_IF_ERROR(Tune(tuning_parameters));
|
||||
|
||||
if (serialized_model) {
|
||||
flatbuffers::FlatBufferBuilder builder;
|
||||
auto encoded_fb = Encode(*this, &builder);
|
||||
data::FinishInferenceContextBuffer(builder, encoded_fb);
|
||||
serialized_model->resize(builder.GetSize());
|
||||
std::memcpy(serialized_model->data(), builder.GetBufferPointer(),
|
||||
builder.GetSize());
|
||||
}
|
||||
for (auto& node : nodes_) {
|
||||
node.operation->args_.ReleaseCPURepresentation();
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status InferenceContext::RestoreDeserialized(
|
||||
const std::vector<uint8_t>& serialized_model, Environment* env) {
|
||||
flatbuffers::Verifier verifier(serialized_model.data(),
|
||||
serialized_model.size());
|
||||
if (!data::VerifyInferenceContextBuffer(verifier)) {
|
||||
return absl::DataLossError("Deserialization failed.");
|
||||
}
|
||||
auto decoded_fb = data::GetInferenceContext(serialized_model.data());
|
||||
RETURN_IF_ERROR(Decode(&env->context(), decoded_fb, this));
|
||||
|
||||
CreationContext creation_context;
|
||||
creation_context.device = env->GetDevicePtr();
|
||||
creation_context.context = &env->context();
|
||||
creation_context.queue = env->queue();
|
||||
creation_context.cache = env->program_cache();
|
||||
|
||||
RETURN_IF_ERROR(AllocateMemory(creation_context.context));
|
||||
BindMemoryToOperations();
|
||||
for (auto& node : nodes_) {
|
||||
RETURN_IF_ERROR(node.operation->CompileDeserialized(creation_context));
|
||||
}
|
||||
RETURN_IF_ERROR(UpdateParams());
|
||||
for (auto& node : nodes_) {
|
||||
node.operation->args_.ReleaseCPURepresentation();
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status InferenceContext::InitFromGraphWithTransforms(
|
||||
const CreateInferenceInfo& create_info, GraphFloat32* graph,
|
||||
Environment* env) {
|
||||
Environment* env, std::vector<uint8_t>* serialized_model) {
|
||||
RETURN_IF_ERROR(RunGraphTransforms(graph));
|
||||
RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env));
|
||||
RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env, serialized_model));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -31,6 +31,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/cl/model_hints.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
@ -65,14 +66,15 @@ class InferenceContext {
|
||||
};
|
||||
|
||||
absl::Status InitFromGraph(const CreateInferenceInfo& create_info,
|
||||
const GraphFloat32& graph, Environment* env);
|
||||
const GraphFloat32& graph, Environment* env,
|
||||
std::vector<uint8_t>* serialized_model = nullptr);
|
||||
|
||||
// Applies OpenCL-specific transformations to the graph before the
|
||||
// initialization. These transformations are either impossible or useless in
|
||||
// other backends.
|
||||
absl::Status InitFromGraphWithTransforms(
|
||||
const CreateInferenceInfo& create_info, GraphFloat32* graph,
|
||||
Environment* env);
|
||||
Environment* env, std::vector<uint8_t>* serialized_model = nullptr);
|
||||
|
||||
absl::Status AddToQueue(CLCommandQueue* queue);
|
||||
absl::Status Profile(ProfilingCommandQueue* queue, ProfilingInfo* result);
|
||||
@ -92,9 +94,19 @@ class InferenceContext {
|
||||
const std::vector<ValueId>& GetInputIds() const { return input_ids_; }
|
||||
const std::vector<ValueId>& GetOutputIds() const { return output_ids_; }
|
||||
|
||||
absl::Status RestoreDeserialized(const std::vector<uint8_t>& serialized_model,
|
||||
Environment* env);
|
||||
|
||||
private:
|
||||
enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };
|
||||
|
||||
friend flatbuffers::Offset<data::InferenceContext> Encode(
|
||||
const InferenceContext& inference,
|
||||
flatbuffers::FlatBufferBuilder* builder);
|
||||
friend absl::Status Decode(CLContext* context,
|
||||
const data::InferenceContext* fb_inference,
|
||||
InferenceContext* inference);
|
||||
|
||||
void CopyInAndOutIds(const GraphFloat32& graph);
|
||||
absl::Status ConvertOperations(const DeviceInfo& device_info,
|
||||
const GraphFloat32& graph, ModelHints hints);
|
||||
@ -165,6 +177,32 @@ class InferenceContext {
|
||||
void SetNext(ValueId id) { next_ = id; }
|
||||
DummyTensor Get(ValueId id) { return reservations_[id]; }
|
||||
|
||||
std::vector<std::pair<ValueId, TensorDescriptor>> GetTensorDescs() const {
|
||||
std::vector<std::pair<ValueId, TensorDescriptor>> result;
|
||||
for (auto& v : reservations_) {
|
||||
TensorDescriptor desc = v.second.descriptor;
|
||||
desc.shape.b = v.second.shape.b;
|
||||
desc.shape.h = v.second.shape.h;
|
||||
desc.shape.w = v.second.shape.w;
|
||||
desc.shape.d = 1;
|
||||
desc.shape.c = v.second.shape.c;
|
||||
result.push_back({v.first, desc});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void Add(const std::vector<std::pair<ValueId, TensorDescriptor>>& tensors) {
|
||||
for (auto& v : tensors) {
|
||||
DummyTensor dummy;
|
||||
dummy.descriptor = v.second;
|
||||
dummy.shape.b = v.second.shape.b;
|
||||
dummy.shape.h = v.second.shape.h;
|
||||
dummy.shape.w = v.second.shape.w;
|
||||
dummy.shape.c = v.second.shape.c;
|
||||
Add(v.first, dummy);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
absl::flat_hash_map<ValueId, DummyTensor> reservations_;
|
||||
ValueId next_;
|
||||
|
@ -651,6 +651,7 @@ cc_library(
|
||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||
"//tensorflow/lite/delegates/gpu/cl:precision",
|
||||
"//tensorflow/lite/delegates/gpu/cl:program_cache",
|
||||
"//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs",
|
||||
"//tensorflow/lite/delegates/gpu/cl:tensor",
|
||||
"//tensorflow/lite/delegates/gpu/cl:tensor_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:access_type",
|
||||
|
@ -223,7 +223,8 @@ absl::Status GPUOperation::UpdateParams() {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
||||
absl::Status GPUOperation::AssembleCode(const DeviceInfo& device_info,
|
||||
CLContext* context) {
|
||||
if (elementwise_) {
|
||||
auto src_desc =
|
||||
absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
|
||||
@ -241,28 +242,35 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
||||
dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
|
||||
args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
|
||||
|
||||
std::string code =
|
||||
GetElementWiseCode(definition_, check_src_channels_size_);
|
||||
elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
|
||||
RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context));
|
||||
code_ = GetElementWiseCode(definition_, check_src_channels_size_);
|
||||
RETURN_IF_ERROR(args_.AllocateObjects(context));
|
||||
RETURN_IF_ERROR(args_.TransformToCLCode(
|
||||
creation_context.device->info_,
|
||||
{{dst_tensors_names_[0], elementwise_code_}}, &code));
|
||||
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
|
||||
code, "main_function", *creation_context.context,
|
||||
*creation_context.device, &kernel_));
|
||||
device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
|
||||
} else {
|
||||
RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context));
|
||||
RETURN_IF_ERROR(args_.AllocateObjects(context));
|
||||
RETURN_IF_ERROR(args_.TransformToCLCode(
|
||||
creation_context.device->info_,
|
||||
{{dst_tensors_names_[0], elementwise_code_}}, &code_));
|
||||
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
|
||||
code_, "main_function", compiler_options_, *creation_context.context,
|
||||
*creation_context.device, &kernel_));
|
||||
device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
||||
RETURN_IF_ERROR(
|
||||
AssembleCode(creation_context.GetDeviceInfo(), creation_context.context));
|
||||
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
|
||||
code_, "main_function", compiler_options_, *creation_context.context,
|
||||
*creation_context.device, &kernel_));
|
||||
return PostCompileCheck(creation_context.device->info_, kernel_.info_);
|
||||
}
|
||||
|
||||
absl::Status GPUOperation::CompileDeserialized(
|
||||
const CreationContext& creation_context) {
|
||||
return creation_context.cache->GetOrCreateCLKernel(
|
||||
code_, "main_function", compiler_options_, *creation_context.context,
|
||||
*creation_context.device, &kernel_);
|
||||
}
|
||||
|
||||
void GPUOperation::GetPossibleKernelWorkGroups(
|
||||
TuningType tuning_type, const DeviceInfo& device_info,
|
||||
const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
|
||||
@ -329,7 +337,7 @@ int3 GPUOperation::GetGridSize() const {
|
||||
const int grid_z = 1;
|
||||
return int3(grid_x, grid_y, grid_z);
|
||||
}
|
||||
return int3(0, 0, 0);
|
||||
return grid_size_;
|
||||
}
|
||||
|
||||
void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) {
|
||||
|
@ -30,6 +30,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
@ -129,8 +130,12 @@ class GPUOperation {
|
||||
|
||||
absl::Status Tune(const TuningParameters& params);
|
||||
|
||||
absl::Status AssembleCode(const DeviceInfo& device_info, CLContext* context);
|
||||
|
||||
absl::Status Compile(const CreationContext& creation_context);
|
||||
|
||||
absl::Status CompileDeserialized(const CreationContext& creation_context);
|
||||
|
||||
virtual absl::Status PostCompileCheck(const DeviceInfo& device_info,
|
||||
const KernelInfo& kernel_info) {
|
||||
return absl::OkStatus();
|
||||
@ -164,6 +169,11 @@ class GPUOperation {
|
||||
bool check_src_channels_size_ = false;
|
||||
|
||||
protected:
|
||||
friend flatbuffers::Offset<data::GPUOperation> Encode(
|
||||
const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
|
||||
friend absl::Status Decode(CLContext* context,
|
||||
const data::GPUOperation* fb_op, GPUOperation* op);
|
||||
|
||||
virtual absl::Status BindArguments(ArgumentsBinder* args) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
1049
tensorflow/lite/delegates/gpu/cl/serialization.cc
Normal file
1049
tensorflow/lite/delegates/gpu/cl/serialization.cc
Normal file
File diff suppressed because it is too large
Load Diff
278
tensorflow/lite/delegates/gpu/cl/serialization.fbs
Normal file
278
tensorflow/lite/delegates/gpu/cl/serialization.fbs
Normal file
@ -0,0 +1,278 @@
|
||||
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
namespace tflite.gpu.cl.data;
|
||||
|
||||
table Int4 {
|
||||
x:int32;
|
||||
y:int32;
|
||||
z:int32;
|
||||
w:int32;
|
||||
}
|
||||
|
||||
table Int3 {
|
||||
x:int32;
|
||||
y:int32;
|
||||
z:int32;
|
||||
}
|
||||
|
||||
table Int2 {
|
||||
x:int32;
|
||||
y:int32;
|
||||
}
|
||||
|
||||
table IntValue {
|
||||
name:string;
|
||||
value:int32;
|
||||
active:bool;
|
||||
offset:uint32;
|
||||
}
|
||||
|
||||
table FloatValue {
|
||||
name:string;
|
||||
value:float;
|
||||
active:bool;
|
||||
offset:uint32;
|
||||
}
|
||||
|
||||
table HalfValue {
|
||||
name:string;
|
||||
value:float;
|
||||
active:bool;
|
||||
store_as_f32:bool;
|
||||
offset:uint32;
|
||||
}
|
||||
|
||||
enum AccessType : byte {
|
||||
READ = 0,
|
||||
WRITE = 1,
|
||||
READ_WRITE = 2,
|
||||
}
|
||||
|
||||
enum DataType : byte {
|
||||
UNKNOWN = 0,
|
||||
FLOAT32 = 1,
|
||||
FLOAT16 = 2,
|
||||
}
|
||||
|
||||
enum MemoryType : byte {
|
||||
GLOBAL = 0,
|
||||
CONSTANT = 1,
|
||||
LOCAL = 2,
|
||||
}
|
||||
|
||||
table StateVariable {
|
||||
key:string;
|
||||
value:string;
|
||||
}
|
||||
|
||||
table GPUObjectDescriptor {
|
||||
state_vars:[StateVariable];
|
||||
access_type:AccessType;
|
||||
}
|
||||
|
||||
table BufferDescriptor {
|
||||
base_obj:GPUObjectDescriptor;
|
||||
element_type:DataType;
|
||||
element_size:int32;
|
||||
memory_type:MemoryType;
|
||||
attributes:[string];
|
||||
size:int32;
|
||||
data:[uint8];
|
||||
}
|
||||
|
||||
table Texture2DDescriptor {
|
||||
base_obj:GPUObjectDescriptor;
|
||||
element_type:DataType;
|
||||
normalized:bool;
|
||||
normalized_type:DataType;
|
||||
size:Int2;
|
||||
data:[uint8];
|
||||
}
|
||||
|
||||
enum LinearStorageType : byte {
|
||||
BUFFER = 0,
|
||||
TEXTURE_2D = 1,
|
||||
}
|
||||
|
||||
table TensorLinearDescriptor {
|
||||
base_obj:GPUObjectDescriptor;
|
||||
storage_type:LinearStorageType;
|
||||
element_type:DataType;
|
||||
memory_type:MemoryType;
|
||||
size:int32;
|
||||
data:[uint8];
|
||||
}
|
||||
|
||||
enum TensorStorageType : byte {
|
||||
UNKNOWN = 0,
|
||||
BUFFER = 1,
|
||||
IMAGE_BUFFER = 2,
|
||||
TEXTURE_2D = 3,
|
||||
TEXTURE_3D = 4,
|
||||
TEXTURE_ARRAY = 5,
|
||||
SINGLE_TEXTURE_2D = 6,
|
||||
}
|
||||
|
||||
enum Layout : byte {
|
||||
UNKNOWN = 0,
|
||||
HWC = 1,
|
||||
BHWC = 2,
|
||||
HWDC = 3,
|
||||
BHWDC = 4,
|
||||
}
|
||||
|
||||
table BHWDC {
|
||||
b:int32;
|
||||
h:int32;
|
||||
w:int32;
|
||||
d:int32;
|
||||
c:int32;
|
||||
}
|
||||
|
||||
table TensorDescriptor {
|
||||
base_obj:GPUObjectDescriptor;
|
||||
data_type:DataType;
|
||||
storage_type:TensorStorageType;
|
||||
layout:Layout;
|
||||
shape:BHWDC;
|
||||
data:[uint8];
|
||||
}
|
||||
|
||||
table BufferDescriptorMapValue {
|
||||
key:string;
|
||||
value:BufferDescriptor;
|
||||
}
|
||||
|
||||
table Texture2DDescriptorMapValue {
|
||||
key:string;
|
||||
value:Texture2DDescriptor;
|
||||
}
|
||||
|
||||
table TensorLinearDescriptorMapValue {
|
||||
key:string;
|
||||
value:TensorLinearDescriptor;
|
||||
}
|
||||
|
||||
table TensorDescriptorMapValue {
|
||||
key:string;
|
||||
value:TensorDescriptor;
|
||||
}
|
||||
|
||||
table Arguments {
|
||||
int_values:[IntValue];
|
||||
shared_int4s:[int32];
|
||||
|
||||
float_values:[FloatValue];
|
||||
shared_float4s:[float];
|
||||
|
||||
half_values:[HalfValue];
|
||||
shared_half4s:[float];
|
||||
|
||||
buffer_refs:[BufferDescriptorMapValue];
|
||||
texture2d_refs:[Texture2DDescriptorMapValue];
|
||||
tensor_linear_refs:[TensorLinearDescriptorMapValue];
|
||||
tensor_refs:[TensorDescriptorMapValue];
|
||||
|
||||
buffer_objects:[BufferDescriptorMapValue];
|
||||
texture2d_objects:[Texture2DDescriptorMapValue];
|
||||
tensor_linear_objects:[TensorLinearDescriptorMapValue];
|
||||
tensor_objects:[TensorDescriptorMapValue];
|
||||
}
|
||||
|
||||
enum CalculationsPrecision : byte {
|
||||
F32 = 0,
|
||||
F32_F16 = 1,
|
||||
F16 = 2,
|
||||
}
|
||||
|
||||
enum TensorToGrid : byte {
|
||||
CUSTOM = 0,
|
||||
WB_TO_X_HD_TO_Y_S_TO_Z = 1,
|
||||
WB_TO_X_HD_TO_Y_Z_IS_1 = 2,
|
||||
WB_TO_X_H_TO_Y_D_TO_Z = 3,
|
||||
B_TO_X_Y_IS_1_Z_IS_1 = 4,
|
||||
}
|
||||
|
||||
enum CompilerOptions : byte {
|
||||
ADRENO_FULL_SIMD_LINE = 0,
|
||||
ADRENO_MORE_WAVES = 1,
|
||||
POWERVR_FP16 = 2,
|
||||
CL_OPT_DISABLE = 3,
|
||||
CL_2_0 = 4,
|
||||
CL_3_0 = 5,
|
||||
}
|
||||
|
||||
table OperationDef {
|
||||
precision:CalculationsPrecision;
|
||||
src_tensors:[TensorDescriptor];
|
||||
dst_tensors:[TensorDescriptor];
|
||||
}
|
||||
|
||||
table CompilerOption {
|
||||
option:CompilerOptions;
|
||||
}
|
||||
|
||||
table GPUOperation {
|
||||
arguments:Arguments;
|
||||
code:string;
|
||||
work_group_size:Int3;
|
||||
compiler_options:[CompilerOption];
|
||||
tensor_to_grid:TensorToGrid;
|
||||
elementwise:bool;
|
||||
linkable:bool;
|
||||
check_src_channels_size:bool;
|
||||
definition:OperationDef;
|
||||
grid_dimension:int32;
|
||||
work_group_launch_order:Int3;
|
||||
grid_size:Int3;
|
||||
src_tensors_names:[string];
|
||||
dst_tensors_names:[string];
|
||||
work_groups_count:Int3;
|
||||
linkable_count:int32;
|
||||
elementwise_code:string;
|
||||
}
|
||||
|
||||
table TensorDescWithId {
|
||||
desc:TensorDescriptor;
|
||||
id:int32;
|
||||
}
|
||||
|
||||
table CLNode {
|
||||
gpu_op:GPUOperation;
|
||||
input_ids:[int32];
|
||||
output_ids:[int32];
|
||||
name:string;
|
||||
}
|
||||
|
||||
table PairOfValueIds {
|
||||
first:int32;
|
||||
second:int32;
|
||||
}
|
||||
|
||||
table InferenceContext {
|
||||
need_flush:bool;
|
||||
flush_periodically:bool;
|
||||
flush_period:int32;
|
||||
need_manual_release:bool;
|
||||
precision:CalculationsPrecision;
|
||||
storage_type:TensorStorageType;
|
||||
nodes:[CLNode];
|
||||
tensors:[TensorDescWithId];
|
||||
input_ids:[int32];
|
||||
variable_ids_and_refs:[PairOfValueIds];
|
||||
output_ids:[int32];
|
||||
}
|
||||
|
||||
root_type InferenceContext;
|
42
tensorflow/lite/delegates/gpu/cl/serialization.h
Normal file
42
tensorflow/lite/delegates/gpu/cl/serialization.h
Normal file
@ -0,0 +1,42 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
|
||||
|
||||
#include "absl/types/span.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
|
||||
class InferenceContext;
|
||||
|
||||
flatbuffers::Offset<data::InferenceContext> Encode(
|
||||
const InferenceContext& inference, flatbuffers::FlatBufferBuilder* builder);
|
||||
|
||||
absl::Status Decode(CLContext* context,
|
||||
const data::InferenceContext* fb_inference,
|
||||
InferenceContext* inference);
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
|
@ -605,8 +605,11 @@ absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
|
||||
descriptor_.layout = desc.layout;
|
||||
memory_owner_ = true;
|
||||
CLMemory memory;
|
||||
RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_,
|
||||
desc.data.data(), &memory));
|
||||
uint8_t* data_ptr = desc.data.empty()
|
||||
? nullptr
|
||||
: const_cast<unsigned char*>(desc.data.data());
|
||||
RETURN_IF_ERROR(
|
||||
AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
|
||||
memory_ = memory.Release();
|
||||
if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) {
|
||||
RETURN_IF_ERROR(CreateImageBufferFromBuffer(
|
||||
|
Loading…
Reference in New Issue
Block a user