Serialization of OpenCL InferenceContext.
PiperOrigin-RevId: 337185119 Change-Id: I3841fd093a692a4acd851792f723381fd29e53bc
This commit is contained in:
parent
9318f787e6
commit
465aeca042
@ -55,6 +55,7 @@ cc_library(
|
|||||||
":cl_device",
|
":cl_device",
|
||||||
":gpu_object",
|
":gpu_object",
|
||||||
":opencl_wrapper",
|
":opencl_wrapper",
|
||||||
|
":serialization_cc_fbs",
|
||||||
":tensor_type",
|
":tensor_type",
|
||||||
":util",
|
":util",
|
||||||
"//tensorflow/lite/delegates/gpu/common:access_type",
|
"//tensorflow/lite/delegates/gpu/common:access_type",
|
||||||
@ -358,6 +359,7 @@ cc_library(
|
|||||||
deps = [
|
deps = [
|
||||||
":cl_context",
|
":cl_context",
|
||||||
":opencl_wrapper",
|
":opencl_wrapper",
|
||||||
|
":serialization_cc_fbs",
|
||||||
"//tensorflow/lite/delegates/gpu/common:access_type",
|
"//tensorflow/lite/delegates/gpu/common:access_type",
|
||||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||||
"//tensorflow/lite/delegates/gpu/common:status",
|
"//tensorflow/lite/delegates/gpu/common:status",
|
||||||
@ -366,19 +368,30 @@ cc_library(
|
|||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "inference_context",
|
name = "inference_context",
|
||||||
srcs = ["inference_context.cc"],
|
srcs = [
|
||||||
hdrs = ["inference_context.h"],
|
"inference_context.cc",
|
||||||
|
"serialization.cc",
|
||||||
|
],
|
||||||
|
hdrs = [
|
||||||
|
"inference_context.h",
|
||||||
|
"serialization.h",
|
||||||
|
],
|
||||||
deps = [
|
deps = [
|
||||||
|
":arguments",
|
||||||
":buffer",
|
":buffer",
|
||||||
":cl_command_queue",
|
":cl_command_queue",
|
||||||
|
":cl_context",
|
||||||
":cl_device",
|
":cl_device",
|
||||||
":environment",
|
":environment",
|
||||||
":gpu_object",
|
":gpu_object",
|
||||||
|
":linear_storage",
|
||||||
":model_hints",
|
":model_hints",
|
||||||
":opencl_wrapper",
|
":opencl_wrapper",
|
||||||
":precision",
|
":precision",
|
||||||
|
":serialization_cc_fbs",
|
||||||
":storage_type_util",
|
":storage_type_util",
|
||||||
":tensor_type",
|
":tensor_type",
|
||||||
|
":texture2d",
|
||||||
"//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
|
"//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
|
||||||
"//tensorflow/lite/delegates/gpu/cl/selectors:operation_selector",
|
"//tensorflow/lite/delegates/gpu/cl/selectors:operation_selector",
|
||||||
"//tensorflow/lite/delegates/gpu/cl/selectors:special_selector",
|
"//tensorflow/lite/delegates/gpu/cl/selectors:special_selector",
|
||||||
@ -396,6 +409,7 @@ cc_library(
|
|||||||
"//tensorflow/lite/delegates/gpu/common/transformations:merge_padding_with",
|
"//tensorflow/lite/delegates/gpu/common/transformations:merge_padding_with",
|
||||||
"@com_google_absl//absl/container:flat_hash_map",
|
"@com_google_absl//absl/container:flat_hash_map",
|
||||||
"@com_google_absl//absl/container:flat_hash_set",
|
"@com_google_absl//absl/container:flat_hash_set",
|
||||||
|
"@com_google_absl//absl/types:span",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -467,6 +481,14 @@ cc_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
flatbuffer_cc_library(
|
||||||
|
name = "serialization_cc_fbs",
|
||||||
|
srcs = ["serialization.fbs"],
|
||||||
|
flatc_args = [
|
||||||
|
"--scoped-enums",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "storage_type_util",
|
name = "storage_type_util",
|
||||||
srcs = ["storage_type_util.cc"],
|
srcs = ["storage_type_util.cc"],
|
||||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
|
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
|
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/util.h"
|
#include "tensorflow/lite/delegates/gpu/cl/util.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/access_type.h"
|
#include "tensorflow/lite/delegates/gpu/common/access_type.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||||
@ -77,6 +78,11 @@ class Arguments : public ArgumentsBinder {
|
|||||||
~Arguments() override = default;
|
~Arguments() override = default;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
friend flatbuffers::Offset<data::Arguments> Encode(
|
||||||
|
const Arguments& args, flatbuffers::FlatBufferBuilder* builder);
|
||||||
|
friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
|
||||||
|
Arguments* args);
|
||||||
|
|
||||||
void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
|
void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
|
||||||
void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
|
void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
|
||||||
void AddImage2DArray(const std::string& name,
|
void AddImage2DArray(const std::string& name,
|
||||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
|||||||
|
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
|
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/access_type.h"
|
#include "tensorflow/lite/delegates/gpu/common/access_type.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||||
@ -164,6 +165,10 @@ class GPUObjectDescriptor {
|
|||||||
AccessType GetAccess() const { return access_type_; }
|
AccessType GetAccess() const { return access_type_; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
|
||||||
|
const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
|
||||||
|
friend void Decode(const data::GPUObjectDescriptor* fb_obj,
|
||||||
|
GPUObjectDescriptor* obj);
|
||||||
mutable std::map<std::string, std::string> state_vars_;
|
mutable std::map<std::string, std::string> state_vars_;
|
||||||
AccessType access_type_;
|
AccessType access_type_;
|
||||||
};
|
};
|
||||||
|
@ -153,7 +153,7 @@ CLNode& CLNode::operator=(CLNode&& node) {
|
|||||||
|
|
||||||
absl::Status InferenceContext::InitFromGraph(
|
absl::Status InferenceContext::InitFromGraph(
|
||||||
const CreateInferenceInfo& create_info, const GraphFloat32& graph,
|
const CreateInferenceInfo& create_info, const GraphFloat32& graph,
|
||||||
Environment* env) {
|
Environment* env, std::vector<uint8_t>* serialized_model) {
|
||||||
CreationContext creation_context;
|
CreationContext creation_context;
|
||||||
creation_context.device = env->GetDevicePtr();
|
creation_context.device = env->GetDevicePtr();
|
||||||
creation_context.context = &env->context();
|
creation_context.context = &env->context();
|
||||||
@ -182,10 +182,6 @@ absl::Status InferenceContext::InitFromGraph(
|
|||||||
RETURN_IF_ERROR(Compile(creation_context));
|
RETURN_IF_ERROR(Compile(creation_context));
|
||||||
RETURN_IF_ERROR(UpdateParams());
|
RETURN_IF_ERROR(UpdateParams());
|
||||||
|
|
||||||
for (auto& node : nodes_) {
|
|
||||||
node.operation->args_.ReleaseCPURepresentation();
|
|
||||||
}
|
|
||||||
|
|
||||||
TuningParameters tuning_parameters;
|
TuningParameters tuning_parameters;
|
||||||
tuning_parameters.queue = env->profiling_queue();
|
tuning_parameters.queue = env->profiling_queue();
|
||||||
tuning_parameters.info = &env->device().info_;
|
tuning_parameters.info = &env->device().info_;
|
||||||
@ -201,14 +197,54 @@ absl::Status InferenceContext::InitFromGraph(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
RETURN_IF_ERROR(Tune(tuning_parameters));
|
RETURN_IF_ERROR(Tune(tuning_parameters));
|
||||||
|
|
||||||
|
if (serialized_model) {
|
||||||
|
flatbuffers::FlatBufferBuilder builder;
|
||||||
|
auto encoded_fb = Encode(*this, &builder);
|
||||||
|
data::FinishInferenceContextBuffer(builder, encoded_fb);
|
||||||
|
serialized_model->resize(builder.GetSize());
|
||||||
|
std::memcpy(serialized_model->data(), builder.GetBufferPointer(),
|
||||||
|
builder.GetSize());
|
||||||
|
}
|
||||||
|
for (auto& node : nodes_) {
|
||||||
|
node.operation->args_.ReleaseCPURepresentation();
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status InferenceContext::RestoreDeserialized(
|
||||||
|
const std::vector<uint8_t>& serialized_model, Environment* env) {
|
||||||
|
flatbuffers::Verifier verifier(serialized_model.data(),
|
||||||
|
serialized_model.size());
|
||||||
|
if (!data::VerifyInferenceContextBuffer(verifier)) {
|
||||||
|
return absl::DataLossError("Deserialization failed.");
|
||||||
|
}
|
||||||
|
auto decoded_fb = data::GetInferenceContext(serialized_model.data());
|
||||||
|
RETURN_IF_ERROR(Decode(&env->context(), decoded_fb, this));
|
||||||
|
|
||||||
|
CreationContext creation_context;
|
||||||
|
creation_context.device = env->GetDevicePtr();
|
||||||
|
creation_context.context = &env->context();
|
||||||
|
creation_context.queue = env->queue();
|
||||||
|
creation_context.cache = env->program_cache();
|
||||||
|
|
||||||
|
RETURN_IF_ERROR(AllocateMemory(creation_context.context));
|
||||||
|
BindMemoryToOperations();
|
||||||
|
for (auto& node : nodes_) {
|
||||||
|
RETURN_IF_ERROR(node.operation->CompileDeserialized(creation_context));
|
||||||
|
}
|
||||||
|
RETURN_IF_ERROR(UpdateParams());
|
||||||
|
for (auto& node : nodes_) {
|
||||||
|
node.operation->args_.ReleaseCPURepresentation();
|
||||||
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status InferenceContext::InitFromGraphWithTransforms(
|
absl::Status InferenceContext::InitFromGraphWithTransforms(
|
||||||
const CreateInferenceInfo& create_info, GraphFloat32* graph,
|
const CreateInferenceInfo& create_info, GraphFloat32* graph,
|
||||||
Environment* env) {
|
Environment* env, std::vector<uint8_t>* serialized_model) {
|
||||||
RETURN_IF_ERROR(RunGraphTransforms(graph));
|
RETURN_IF_ERROR(RunGraphTransforms(graph));
|
||||||
RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env));
|
RETURN_IF_ERROR(InitFromGraph(create_info, *graph, env, serialized_model));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/delegates/gpu/cl/model_hints.h"
|
#include "tensorflow/lite/delegates/gpu/cl/model_hints.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/precision.h"
|
#include "tensorflow/lite/delegates/gpu/cl/precision.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||||
@ -65,14 +66,15 @@ class InferenceContext {
|
|||||||
};
|
};
|
||||||
|
|
||||||
absl::Status InitFromGraph(const CreateInferenceInfo& create_info,
|
absl::Status InitFromGraph(const CreateInferenceInfo& create_info,
|
||||||
const GraphFloat32& graph, Environment* env);
|
const GraphFloat32& graph, Environment* env,
|
||||||
|
std::vector<uint8_t>* serialized_model = nullptr);
|
||||||
|
|
||||||
// Applies OpenCL-specific transformations to the graph before the
|
// Applies OpenCL-specific transformations to the graph before the
|
||||||
// initialization. These transformations are either impossible or useless in
|
// initialization. These transformations are either impossible or useless in
|
||||||
// other backends.
|
// other backends.
|
||||||
absl::Status InitFromGraphWithTransforms(
|
absl::Status InitFromGraphWithTransforms(
|
||||||
const CreateInferenceInfo& create_info, GraphFloat32* graph,
|
const CreateInferenceInfo& create_info, GraphFloat32* graph,
|
||||||
Environment* env);
|
Environment* env, std::vector<uint8_t>* serialized_model = nullptr);
|
||||||
|
|
||||||
absl::Status AddToQueue(CLCommandQueue* queue);
|
absl::Status AddToQueue(CLCommandQueue* queue);
|
||||||
absl::Status Profile(ProfilingCommandQueue* queue, ProfilingInfo* result);
|
absl::Status Profile(ProfilingCommandQueue* queue, ProfilingInfo* result);
|
||||||
@ -92,9 +94,19 @@ class InferenceContext {
|
|||||||
const std::vector<ValueId>& GetInputIds() const { return input_ids_; }
|
const std::vector<ValueId>& GetInputIds() const { return input_ids_; }
|
||||||
const std::vector<ValueId>& GetOutputIds() const { return output_ids_; }
|
const std::vector<ValueId>& GetOutputIds() const { return output_ids_; }
|
||||||
|
|
||||||
|
absl::Status RestoreDeserialized(const std::vector<uint8_t>& serialized_model,
|
||||||
|
Environment* env);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };
|
enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };
|
||||||
|
|
||||||
|
friend flatbuffers::Offset<data::InferenceContext> Encode(
|
||||||
|
const InferenceContext& inference,
|
||||||
|
flatbuffers::FlatBufferBuilder* builder);
|
||||||
|
friend absl::Status Decode(CLContext* context,
|
||||||
|
const data::InferenceContext* fb_inference,
|
||||||
|
InferenceContext* inference);
|
||||||
|
|
||||||
void CopyInAndOutIds(const GraphFloat32& graph);
|
void CopyInAndOutIds(const GraphFloat32& graph);
|
||||||
absl::Status ConvertOperations(const DeviceInfo& device_info,
|
absl::Status ConvertOperations(const DeviceInfo& device_info,
|
||||||
const GraphFloat32& graph, ModelHints hints);
|
const GraphFloat32& graph, ModelHints hints);
|
||||||
@ -165,6 +177,32 @@ class InferenceContext {
|
|||||||
void SetNext(ValueId id) { next_ = id; }
|
void SetNext(ValueId id) { next_ = id; }
|
||||||
DummyTensor Get(ValueId id) { return reservations_[id]; }
|
DummyTensor Get(ValueId id) { return reservations_[id]; }
|
||||||
|
|
||||||
|
std::vector<std::pair<ValueId, TensorDescriptor>> GetTensorDescs() const {
|
||||||
|
std::vector<std::pair<ValueId, TensorDescriptor>> result;
|
||||||
|
for (auto& v : reservations_) {
|
||||||
|
TensorDescriptor desc = v.second.descriptor;
|
||||||
|
desc.shape.b = v.second.shape.b;
|
||||||
|
desc.shape.h = v.second.shape.h;
|
||||||
|
desc.shape.w = v.second.shape.w;
|
||||||
|
desc.shape.d = 1;
|
||||||
|
desc.shape.c = v.second.shape.c;
|
||||||
|
result.push_back({v.first, desc});
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Add(const std::vector<std::pair<ValueId, TensorDescriptor>>& tensors) {
|
||||||
|
for (auto& v : tensors) {
|
||||||
|
DummyTensor dummy;
|
||||||
|
dummy.descriptor = v.second;
|
||||||
|
dummy.shape.b = v.second.shape.b;
|
||||||
|
dummy.shape.h = v.second.shape.h;
|
||||||
|
dummy.shape.w = v.second.shape.w;
|
||||||
|
dummy.shape.c = v.second.shape.c;
|
||||||
|
Add(v.first, dummy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
absl::flat_hash_map<ValueId, DummyTensor> reservations_;
|
absl::flat_hash_map<ValueId, DummyTensor> reservations_;
|
||||||
ValueId next_;
|
ValueId next_;
|
||||||
|
@ -651,6 +651,7 @@ cc_library(
|
|||||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||||
"//tensorflow/lite/delegates/gpu/cl:precision",
|
"//tensorflow/lite/delegates/gpu/cl:precision",
|
||||||
"//tensorflow/lite/delegates/gpu/cl:program_cache",
|
"//tensorflow/lite/delegates/gpu/cl:program_cache",
|
||||||
|
"//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs",
|
||||||
"//tensorflow/lite/delegates/gpu/cl:tensor",
|
"//tensorflow/lite/delegates/gpu/cl:tensor",
|
||||||
"//tensorflow/lite/delegates/gpu/cl:tensor_type",
|
"//tensorflow/lite/delegates/gpu/cl:tensor_type",
|
||||||
"//tensorflow/lite/delegates/gpu/common:access_type",
|
"//tensorflow/lite/delegates/gpu/common:access_type",
|
||||||
|
@ -223,7 +223,8 @@ absl::Status GPUOperation::UpdateParams() {
|
|||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
absl::Status GPUOperation::AssembleCode(const DeviceInfo& device_info,
|
||||||
|
CLContext* context) {
|
||||||
if (elementwise_) {
|
if (elementwise_) {
|
||||||
auto src_desc =
|
auto src_desc =
|
||||||
absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
|
absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
|
||||||
@ -241,28 +242,35 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
|||||||
dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
|
dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
|
||||||
args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
|
args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
|
||||||
|
|
||||||
std::string code =
|
|
||||||
GetElementWiseCode(definition_, check_src_channels_size_);
|
|
||||||
elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
|
elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
|
||||||
RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context));
|
code_ = GetElementWiseCode(definition_, check_src_channels_size_);
|
||||||
|
RETURN_IF_ERROR(args_.AllocateObjects(context));
|
||||||
RETURN_IF_ERROR(args_.TransformToCLCode(
|
RETURN_IF_ERROR(args_.TransformToCLCode(
|
||||||
creation_context.device->info_,
|
device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
|
||||||
{{dst_tensors_names_[0], elementwise_code_}}, &code));
|
|
||||||
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
|
|
||||||
code, "main_function", *creation_context.context,
|
|
||||||
*creation_context.device, &kernel_));
|
|
||||||
} else {
|
} else {
|
||||||
RETURN_IF_ERROR(args_.AllocateObjects(creation_context.context));
|
RETURN_IF_ERROR(args_.AllocateObjects(context));
|
||||||
RETURN_IF_ERROR(args_.TransformToCLCode(
|
RETURN_IF_ERROR(args_.TransformToCLCode(
|
||||||
creation_context.device->info_,
|
device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
|
||||||
{{dst_tensors_names_[0], elementwise_code_}}, &code_));
|
|
||||||
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
|
|
||||||
code_, "main_function", compiler_options_, *creation_context.context,
|
|
||||||
*creation_context.device, &kernel_));
|
|
||||||
}
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
||||||
|
RETURN_IF_ERROR(
|
||||||
|
AssembleCode(creation_context.GetDeviceInfo(), creation_context.context));
|
||||||
|
RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
|
||||||
|
code_, "main_function", compiler_options_, *creation_context.context,
|
||||||
|
*creation_context.device, &kernel_));
|
||||||
return PostCompileCheck(creation_context.device->info_, kernel_.info_);
|
return PostCompileCheck(creation_context.device->info_, kernel_.info_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
absl::Status GPUOperation::CompileDeserialized(
|
||||||
|
const CreationContext& creation_context) {
|
||||||
|
return creation_context.cache->GetOrCreateCLKernel(
|
||||||
|
code_, "main_function", compiler_options_, *creation_context.context,
|
||||||
|
*creation_context.device, &kernel_);
|
||||||
|
}
|
||||||
|
|
||||||
void GPUOperation::GetPossibleKernelWorkGroups(
|
void GPUOperation::GetPossibleKernelWorkGroups(
|
||||||
TuningType tuning_type, const DeviceInfo& device_info,
|
TuningType tuning_type, const DeviceInfo& device_info,
|
||||||
const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
|
const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
|
||||||
@ -329,7 +337,7 @@ int3 GPUOperation::GetGridSize() const {
|
|||||||
const int grid_z = 1;
|
const int grid_z = 1;
|
||||||
return int3(grid_x, grid_y, grid_z);
|
return int3(grid_x, grid_y, grid_z);
|
||||||
}
|
}
|
||||||
return int3(0, 0, 0);
|
return grid_size_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) {
|
void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) {
|
||||||
|
@ -30,6 +30,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
|
#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/precision.h"
|
#include "tensorflow/lite/delegates/gpu/cl/precision.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
|
#include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
|
#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||||
@ -129,8 +130,12 @@ class GPUOperation {
|
|||||||
|
|
||||||
absl::Status Tune(const TuningParameters& params);
|
absl::Status Tune(const TuningParameters& params);
|
||||||
|
|
||||||
|
absl::Status AssembleCode(const DeviceInfo& device_info, CLContext* context);
|
||||||
|
|
||||||
absl::Status Compile(const CreationContext& creation_context);
|
absl::Status Compile(const CreationContext& creation_context);
|
||||||
|
|
||||||
|
absl::Status CompileDeserialized(const CreationContext& creation_context);
|
||||||
|
|
||||||
virtual absl::Status PostCompileCheck(const DeviceInfo& device_info,
|
virtual absl::Status PostCompileCheck(const DeviceInfo& device_info,
|
||||||
const KernelInfo& kernel_info) {
|
const KernelInfo& kernel_info) {
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
@ -164,6 +169,11 @@ class GPUOperation {
|
|||||||
bool check_src_channels_size_ = false;
|
bool check_src_channels_size_ = false;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
friend flatbuffers::Offset<data::GPUOperation> Encode(
|
||||||
|
const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
|
||||||
|
friend absl::Status Decode(CLContext* context,
|
||||||
|
const data::GPUOperation* fb_op, GPUOperation* op);
|
||||||
|
|
||||||
virtual absl::Status BindArguments(ArgumentsBinder* args) {
|
virtual absl::Status BindArguments(ArgumentsBinder* args) {
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
1049
tensorflow/lite/delegates/gpu/cl/serialization.cc
Normal file
1049
tensorflow/lite/delegates/gpu/cl/serialization.cc
Normal file
File diff suppressed because it is too large
Load Diff
278
tensorflow/lite/delegates/gpu/cl/serialization.fbs
Normal file
278
tensorflow/lite/delegates/gpu/cl/serialization.fbs
Normal file
@ -0,0 +1,278 @@
|
|||||||
|
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
namespace tflite.gpu.cl.data;
|
||||||
|
|
||||||
|
table Int4 {
|
||||||
|
x:int32;
|
||||||
|
y:int32;
|
||||||
|
z:int32;
|
||||||
|
w:int32;
|
||||||
|
}
|
||||||
|
|
||||||
|
table Int3 {
|
||||||
|
x:int32;
|
||||||
|
y:int32;
|
||||||
|
z:int32;
|
||||||
|
}
|
||||||
|
|
||||||
|
table Int2 {
|
||||||
|
x:int32;
|
||||||
|
y:int32;
|
||||||
|
}
|
||||||
|
|
||||||
|
table IntValue {
|
||||||
|
name:string;
|
||||||
|
value:int32;
|
||||||
|
active:bool;
|
||||||
|
offset:uint32;
|
||||||
|
}
|
||||||
|
|
||||||
|
table FloatValue {
|
||||||
|
name:string;
|
||||||
|
value:float;
|
||||||
|
active:bool;
|
||||||
|
offset:uint32;
|
||||||
|
}
|
||||||
|
|
||||||
|
table HalfValue {
|
||||||
|
name:string;
|
||||||
|
value:float;
|
||||||
|
active:bool;
|
||||||
|
store_as_f32:bool;
|
||||||
|
offset:uint32;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum AccessType : byte {
|
||||||
|
READ = 0,
|
||||||
|
WRITE = 1,
|
||||||
|
READ_WRITE = 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum DataType : byte {
|
||||||
|
UNKNOWN = 0,
|
||||||
|
FLOAT32 = 1,
|
||||||
|
FLOAT16 = 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum MemoryType : byte {
|
||||||
|
GLOBAL = 0,
|
||||||
|
CONSTANT = 1,
|
||||||
|
LOCAL = 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
table StateVariable {
|
||||||
|
key:string;
|
||||||
|
value:string;
|
||||||
|
}
|
||||||
|
|
||||||
|
table GPUObjectDescriptor {
|
||||||
|
state_vars:[StateVariable];
|
||||||
|
access_type:AccessType;
|
||||||
|
}
|
||||||
|
|
||||||
|
table BufferDescriptor {
|
||||||
|
base_obj:GPUObjectDescriptor;
|
||||||
|
element_type:DataType;
|
||||||
|
element_size:int32;
|
||||||
|
memory_type:MemoryType;
|
||||||
|
attributes:[string];
|
||||||
|
size:int32;
|
||||||
|
data:[uint8];
|
||||||
|
}
|
||||||
|
|
||||||
|
table Texture2DDescriptor {
|
||||||
|
base_obj:GPUObjectDescriptor;
|
||||||
|
element_type:DataType;
|
||||||
|
normalized:bool;
|
||||||
|
normalized_type:DataType;
|
||||||
|
size:Int2;
|
||||||
|
data:[uint8];
|
||||||
|
}
|
||||||
|
|
||||||
|
enum LinearStorageType : byte {
|
||||||
|
BUFFER = 0,
|
||||||
|
TEXTURE_2D = 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
table TensorLinearDescriptor {
|
||||||
|
base_obj:GPUObjectDescriptor;
|
||||||
|
storage_type:LinearStorageType;
|
||||||
|
element_type:DataType;
|
||||||
|
memory_type:MemoryType;
|
||||||
|
size:int32;
|
||||||
|
data:[uint8];
|
||||||
|
}
|
||||||
|
|
||||||
|
enum TensorStorageType : byte {
|
||||||
|
UNKNOWN = 0,
|
||||||
|
BUFFER = 1,
|
||||||
|
IMAGE_BUFFER = 2,
|
||||||
|
TEXTURE_2D = 3,
|
||||||
|
TEXTURE_3D = 4,
|
||||||
|
TEXTURE_ARRAY = 5,
|
||||||
|
SINGLE_TEXTURE_2D = 6,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum Layout : byte {
|
||||||
|
UNKNOWN = 0,
|
||||||
|
HWC = 1,
|
||||||
|
BHWC = 2,
|
||||||
|
HWDC = 3,
|
||||||
|
BHWDC = 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
table BHWDC {
|
||||||
|
b:int32;
|
||||||
|
h:int32;
|
||||||
|
w:int32;
|
||||||
|
d:int32;
|
||||||
|
c:int32;
|
||||||
|
}
|
||||||
|
|
||||||
|
table TensorDescriptor {
|
||||||
|
base_obj:GPUObjectDescriptor;
|
||||||
|
data_type:DataType;
|
||||||
|
storage_type:TensorStorageType;
|
||||||
|
layout:Layout;
|
||||||
|
shape:BHWDC;
|
||||||
|
data:[uint8];
|
||||||
|
}
|
||||||
|
|
||||||
|
table BufferDescriptorMapValue {
|
||||||
|
key:string;
|
||||||
|
value:BufferDescriptor;
|
||||||
|
}
|
||||||
|
|
||||||
|
table Texture2DDescriptorMapValue {
|
||||||
|
key:string;
|
||||||
|
value:Texture2DDescriptor;
|
||||||
|
}
|
||||||
|
|
||||||
|
table TensorLinearDescriptorMapValue {
|
||||||
|
key:string;
|
||||||
|
value:TensorLinearDescriptor;
|
||||||
|
}
|
||||||
|
|
||||||
|
table TensorDescriptorMapValue {
|
||||||
|
key:string;
|
||||||
|
value:TensorDescriptor;
|
||||||
|
}
|
||||||
|
|
||||||
|
table Arguments {
|
||||||
|
int_values:[IntValue];
|
||||||
|
shared_int4s:[int32];
|
||||||
|
|
||||||
|
float_values:[FloatValue];
|
||||||
|
shared_float4s:[float];
|
||||||
|
|
||||||
|
half_values:[HalfValue];
|
||||||
|
shared_half4s:[float];
|
||||||
|
|
||||||
|
buffer_refs:[BufferDescriptorMapValue];
|
||||||
|
texture2d_refs:[Texture2DDescriptorMapValue];
|
||||||
|
tensor_linear_refs:[TensorLinearDescriptorMapValue];
|
||||||
|
tensor_refs:[TensorDescriptorMapValue];
|
||||||
|
|
||||||
|
buffer_objects:[BufferDescriptorMapValue];
|
||||||
|
texture2d_objects:[Texture2DDescriptorMapValue];
|
||||||
|
tensor_linear_objects:[TensorLinearDescriptorMapValue];
|
||||||
|
tensor_objects:[TensorDescriptorMapValue];
|
||||||
|
}
|
||||||
|
|
||||||
|
enum CalculationsPrecision : byte {
|
||||||
|
F32 = 0,
|
||||||
|
F32_F16 = 1,
|
||||||
|
F16 = 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum TensorToGrid : byte {
|
||||||
|
CUSTOM = 0,
|
||||||
|
WB_TO_X_HD_TO_Y_S_TO_Z = 1,
|
||||||
|
WB_TO_X_HD_TO_Y_Z_IS_1 = 2,
|
||||||
|
WB_TO_X_H_TO_Y_D_TO_Z = 3,
|
||||||
|
B_TO_X_Y_IS_1_Z_IS_1 = 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum CompilerOptions : byte {
|
||||||
|
ADRENO_FULL_SIMD_LINE = 0,
|
||||||
|
ADRENO_MORE_WAVES = 1,
|
||||||
|
POWERVR_FP16 = 2,
|
||||||
|
CL_OPT_DISABLE = 3,
|
||||||
|
CL_2_0 = 4,
|
||||||
|
CL_3_0 = 5,
|
||||||
|
}
|
||||||
|
|
||||||
|
table OperationDef {
|
||||||
|
precision:CalculationsPrecision;
|
||||||
|
src_tensors:[TensorDescriptor];
|
||||||
|
dst_tensors:[TensorDescriptor];
|
||||||
|
}
|
||||||
|
|
||||||
|
table CompilerOption {
|
||||||
|
option:CompilerOptions;
|
||||||
|
}
|
||||||
|
|
||||||
|
table GPUOperation {
|
||||||
|
arguments:Arguments;
|
||||||
|
code:string;
|
||||||
|
work_group_size:Int3;
|
||||||
|
compiler_options:[CompilerOption];
|
||||||
|
tensor_to_grid:TensorToGrid;
|
||||||
|
elementwise:bool;
|
||||||
|
linkable:bool;
|
||||||
|
check_src_channels_size:bool;
|
||||||
|
definition:OperationDef;
|
||||||
|
grid_dimension:int32;
|
||||||
|
work_group_launch_order:Int3;
|
||||||
|
grid_size:Int3;
|
||||||
|
src_tensors_names:[string];
|
||||||
|
dst_tensors_names:[string];
|
||||||
|
work_groups_count:Int3;
|
||||||
|
linkable_count:int32;
|
||||||
|
elementwise_code:string;
|
||||||
|
}
|
||||||
|
|
||||||
|
table TensorDescWithId {
|
||||||
|
desc:TensorDescriptor;
|
||||||
|
id:int32;
|
||||||
|
}
|
||||||
|
|
||||||
|
table CLNode {
|
||||||
|
gpu_op:GPUOperation;
|
||||||
|
input_ids:[int32];
|
||||||
|
output_ids:[int32];
|
||||||
|
name:string;
|
||||||
|
}
|
||||||
|
|
||||||
|
table PairOfValueIds {
|
||||||
|
first:int32;
|
||||||
|
second:int32;
|
||||||
|
}
|
||||||
|
|
||||||
|
table InferenceContext {
|
||||||
|
need_flush:bool;
|
||||||
|
flush_periodically:bool;
|
||||||
|
flush_period:int32;
|
||||||
|
need_manual_release:bool;
|
||||||
|
precision:CalculationsPrecision;
|
||||||
|
storage_type:TensorStorageType;
|
||||||
|
nodes:[CLNode];
|
||||||
|
tensors:[TensorDescWithId];
|
||||||
|
input_ids:[int32];
|
||||||
|
variable_ids_and_refs:[PairOfValueIds];
|
||||||
|
output_ids:[int32];
|
||||||
|
}
|
||||||
|
|
||||||
|
root_type InferenceContext;
|
42
tensorflow/lite/delegates/gpu/cl/serialization.h
Normal file
42
tensorflow/lite/delegates/gpu/cl/serialization.h
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
|
||||||
|
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
|
||||||
|
|
||||||
|
#include "absl/types/span.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||||
|
|
||||||
|
namespace tflite {
|
||||||
|
namespace gpu {
|
||||||
|
namespace cl {
|
||||||
|
|
||||||
|
class InferenceContext;
|
||||||
|
|
||||||
|
flatbuffers::Offset<data::InferenceContext> Encode(
|
||||||
|
const InferenceContext& inference, flatbuffers::FlatBufferBuilder* builder);
|
||||||
|
|
||||||
|
absl::Status Decode(CLContext* context,
|
||||||
|
const data::InferenceContext* fb_inference,
|
||||||
|
InferenceContext* inference);
|
||||||
|
|
||||||
|
} // namespace cl
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace tflite
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_SERIALIZATION_H_
|
@ -605,8 +605,11 @@ absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
|
|||||||
descriptor_.layout = desc.layout;
|
descriptor_.layout = desc.layout;
|
||||||
memory_owner_ = true;
|
memory_owner_ = true;
|
||||||
CLMemory memory;
|
CLMemory memory;
|
||||||
RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_,
|
uint8_t* data_ptr = desc.data.empty()
|
||||||
desc.data.data(), &memory));
|
? nullptr
|
||||||
|
: const_cast<unsigned char*>(desc.data.data());
|
||||||
|
RETURN_IF_ERROR(
|
||||||
|
AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
|
||||||
memory_ = memory.Release();
|
memory_ = memory.Release();
|
||||||
if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) {
|
if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) {
|
||||||
RETURN_IF_ERROR(CreateImageBufferFromBuffer(
|
RETURN_IF_ERROR(CreateImageBufferFromBuffer(
|
||||||
|
Loading…
Reference in New Issue
Block a user