Added support of Const tensors in OpenCL backend.
PiperOrigin-RevId: 342973472 Change-Id: I38932dda93dfb729cf4f14ff6310da57ad9b1e06
This commit is contained in:
parent
39d3d60407
commit
042f6923fe
@ -212,9 +212,7 @@ absl::Status InferenceContext::InitFromGraph(
|
|||||||
node.cl_operation.MoveObjectRefsFromGenericToCL();
|
node.cl_operation.MoveObjectRefsFromGenericToCL();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (auto& node : nodes_) {
|
ReleaseCPURepresentation();
|
||||||
node.cl_operation.GetGpuOperation().args_.ReleaseCPURepresentation();
|
|
||||||
}
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,9 +238,7 @@ absl::Status InferenceContext::RestoreDeserialized(
|
|||||||
RETURN_IF_ERROR(node.cl_operation.CompileDeserialized(creation_context));
|
RETURN_IF_ERROR(node.cl_operation.CompileDeserialized(creation_context));
|
||||||
}
|
}
|
||||||
RETURN_IF_ERROR(UpdateParams());
|
RETURN_IF_ERROR(UpdateParams());
|
||||||
for (auto& node : nodes_) {
|
ReleaseCPURepresentation();
|
||||||
node.cl_operation.GetGpuOperation().args_.ReleaseCPURepresentation();
|
|
||||||
}
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -329,6 +325,16 @@ absl::Status InferenceContext::ConvertOperations(const GpuInfo& gpu_info,
|
|||||||
if (consumed_nodes.find(node.id) != consumed_nodes.end()) {
|
if (consumed_nodes.find(node.id) != consumed_nodes.end()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
auto op_type = OperationTypeFromString(node.operation.type);
|
||||||
|
if (op_type == OperationType::CONST) {
|
||||||
|
auto attr =
|
||||||
|
absl::any_cast<ConstTensorAttributes>(node.operation.attributes);
|
||||||
|
auto outputs = graph.FindOutputs(node.id);
|
||||||
|
const_tensors_descs_[outputs[0]->id] =
|
||||||
|
tensor_reserver_.Get(outputs[0]->id).descriptor;
|
||||||
|
const_tensors_descs_[outputs[0]->id].UploadData(attr.tensor);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
std::string op_name = node.operation.type + " " + std::to_string(node.id);
|
std::string op_name = node.operation.type + " " + std::to_string(node.id);
|
||||||
GPUOperationsSubgraph gpu_subgraph;
|
GPUOperationsSubgraph gpu_subgraph;
|
||||||
if (hints.Check(ModelHints::kAllowSpecialKernels) &&
|
if (hints.Check(ModelHints::kAllowSpecialKernels) &&
|
||||||
@ -481,22 +487,34 @@ void InferenceContext::GetUsages(const std::function<bool(ValueId)>& functor,
|
|||||||
|
|
||||||
InferenceContext::TensorMemoryType InferenceContext::GetTensorMemoryType(
|
InferenceContext::TensorMemoryType InferenceContext::GetTensorMemoryType(
|
||||||
ValueId id) {
|
ValueId id) {
|
||||||
if (variable_ids_and_refs_.find(id) != variable_ids_and_refs_.end()) {
|
if (const_tensors_.find(id) != const_tensors_.end()) {
|
||||||
return TensorMemoryType::VARIABLE;
|
return TensorMemoryType::kConst;
|
||||||
|
} else if (variable_ids_and_refs_.find(id) != variable_ids_and_refs_.end()) {
|
||||||
|
return TensorMemoryType::kVariable;
|
||||||
} else if (IsBufferBased(tensor_reserver_.Get(id).descriptor.storage_type)) {
|
} else if (IsBufferBased(tensor_reserver_.Get(id).descriptor.storage_type)) {
|
||||||
return TensorMemoryType::BUFFER;
|
return TensorMemoryType::kBuffer;
|
||||||
} else {
|
} else {
|
||||||
return TensorMemoryType::STRONG_SHAPE;
|
return TensorMemoryType::kStrongShape;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status InferenceContext::AllocateMemory(CLContext* context) {
|
absl::Status InferenceContext::AllocateMemory(CLContext* context) {
|
||||||
|
RETURN_IF_ERROR(AllocateMemoryForConstTensors(context));
|
||||||
RETURN_IF_ERROR(AllocateMemoryForVariableTensors(context));
|
RETURN_IF_ERROR(AllocateMemoryForVariableTensors(context));
|
||||||
RETURN_IF_ERROR(AllocateMemoryForBuffers(context));
|
RETURN_IF_ERROR(AllocateMemoryForBuffers(context));
|
||||||
RETURN_IF_ERROR(AllocateMemoryForStrongShapes(context));
|
RETURN_IF_ERROR(AllocateMemoryForStrongShapes(context));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
absl::Status InferenceContext::AllocateMemoryForConstTensors(
|
||||||
|
CLContext* context) {
|
||||||
|
for (auto& description : const_tensors_descs_) {
|
||||||
|
RETURN_IF_ERROR(const_tensors_[description.first].CreateFromDescriptor(
|
||||||
|
description.second, context));
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
absl::Status InferenceContext::AllocateMemoryForVariableTensors(
|
absl::Status InferenceContext::AllocateMemoryForVariableTensors(
|
||||||
CLContext* context) {
|
CLContext* context) {
|
||||||
std::map<ValueId, int> ref_value_to_tensor_index;
|
std::map<ValueId, int> ref_value_to_tensor_index;
|
||||||
@ -520,7 +538,7 @@ absl::Status InferenceContext::AllocateMemoryForBuffers(CLContext* context) {
|
|||||||
std::map<ValueId, int2> buffer_usages;
|
std::map<ValueId, int2> buffer_usages;
|
||||||
GetUsages(
|
GetUsages(
|
||||||
[this](ValueId id) {
|
[this](ValueId id) {
|
||||||
return GetTensorMemoryType(id) == TensorMemoryType::BUFFER;
|
return GetTensorMemoryType(id) == TensorMemoryType::kBuffer;
|
||||||
},
|
},
|
||||||
&buffer_usages);
|
&buffer_usages);
|
||||||
|
|
||||||
@ -555,7 +573,7 @@ absl::Status InferenceContext::AllocateMemoryForBuffers(CLContext* context) {
|
|||||||
for (auto& node : nodes_) {
|
for (auto& node : nodes_) {
|
||||||
auto tensors = GetCLNodeTensors(node);
|
auto tensors = GetCLNodeTensors(node);
|
||||||
for (auto& t : tensors) {
|
for (auto& t : tensors) {
|
||||||
if (GetTensorMemoryType(t.first) != TensorMemoryType::BUFFER) continue;
|
if (GetTensorMemoryType(t.first) != TensorMemoryType::kBuffer) continue;
|
||||||
const int tensor_index = graph_ids_to_shared_buffer_tensors_[t.first];
|
const int tensor_index = graph_ids_to_shared_buffer_tensors_[t.first];
|
||||||
if (created_tensors[tensor_index]) continue;
|
if (created_tensors[tensor_index]) continue;
|
||||||
const auto& shape = tensor_reserver_.Get(t.first).shape;
|
const auto& shape = tensor_reserver_.Get(t.first).shape;
|
||||||
@ -574,7 +592,7 @@ absl::Status InferenceContext::AllocateMemoryForStrongShapes(
|
|||||||
std::map<ValueId, int2> usages;
|
std::map<ValueId, int2> usages;
|
||||||
GetUsages(
|
GetUsages(
|
||||||
[this](ValueId id) {
|
[this](ValueId id) {
|
||||||
return GetTensorMemoryType(id) == TensorMemoryType::STRONG_SHAPE;
|
return GetTensorMemoryType(id) == TensorMemoryType::kStrongShape;
|
||||||
},
|
},
|
||||||
&usages);
|
&usages);
|
||||||
|
|
||||||
@ -594,7 +612,7 @@ absl::Status InferenceContext::AllocateMemoryForStrongShapes(
|
|||||||
for (auto& node : nodes_) {
|
for (auto& node : nodes_) {
|
||||||
auto tensors = GetCLNodeTensors(node);
|
auto tensors = GetCLNodeTensors(node);
|
||||||
for (auto& t : tensors) {
|
for (auto& t : tensors) {
|
||||||
if (GetTensorMemoryType(t.first) != TensorMemoryType::STRONG_SHAPE) {
|
if (GetTensorMemoryType(t.first) != TensorMemoryType::kStrongShape) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto& shape = tensor_reserver_.Get(t.first).shape;
|
const auto& shape = tensor_reserver_.Get(t.first).shape;
|
||||||
@ -696,7 +714,9 @@ uint64_t InferenceContext::GetSizeOfMemoryAllocatedForIntermediateTensors()
|
|||||||
}
|
}
|
||||||
|
|
||||||
Tensor* InferenceContext::GetTensor(ValueId id) {
|
Tensor* InferenceContext::GetTensor(ValueId id) {
|
||||||
if (variable_ids_and_refs_.find(id) != variable_ids_and_refs_.end()) {
|
if (const_tensors_.find(id) != const_tensors_.end()) {
|
||||||
|
return &const_tensors_[id];
|
||||||
|
} else if (variable_ids_and_refs_.find(id) != variable_ids_and_refs_.end()) {
|
||||||
return &variable_tensors_[variable_ids_and_refs_[id]];
|
return &variable_tensors_[variable_ids_and_refs_[id]];
|
||||||
} else if (graph_ids_to_shared_buffer_tensors_.find(id) !=
|
} else if (graph_ids_to_shared_buffer_tensors_.find(id) !=
|
||||||
graph_ids_to_shared_buffer_tensors_.end()) {
|
graph_ids_to_shared_buffer_tensors_.end()) {
|
||||||
@ -724,6 +744,13 @@ absl::Status InferenceContext::GetOutputTensor(ValueId id,
|
|||||||
return gpu_tensor.ReadData(queue, result);
|
return gpu_tensor.ReadData(queue, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void InferenceContext::ReleaseCPURepresentation() {
|
||||||
|
for (auto& node : nodes_) {
|
||||||
|
node.cl_operation.GetGpuOperation().args_.ReleaseCPURepresentation();
|
||||||
|
}
|
||||||
|
const_tensors_descs_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
absl::Status RunGraphTransforms(GraphFloat32* graph) {
|
absl::Status RunGraphTransforms(GraphFloat32* graph) {
|
||||||
auto merge_padding_transform = NewMergePaddingWithAdd();
|
auto merge_padding_transform = NewMergePaddingWithAdd();
|
||||||
auto add_bias_transform = NewAddBias();
|
auto add_bias_transform = NewAddBias();
|
||||||
|
@ -102,7 +102,7 @@ class InferenceContext {
|
|||||||
const absl::Span<const uint8_t> serialized_model, Environment* env);
|
const absl::Span<const uint8_t> serialized_model, Environment* env);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };
|
enum class TensorMemoryType { kStrongShape, kBuffer, kVariable, kConst };
|
||||||
|
|
||||||
friend flatbuffers::Offset<data::InferenceContext> Encode(
|
friend flatbuffers::Offset<data::InferenceContext> Encode(
|
||||||
const InferenceContext& inference,
|
const InferenceContext& inference,
|
||||||
@ -119,6 +119,8 @@ class InferenceContext {
|
|||||||
absl::Status Merge();
|
absl::Status Merge();
|
||||||
absl::Status AllocateMemory(CLContext* context);
|
absl::Status AllocateMemory(CLContext* context);
|
||||||
|
|
||||||
|
absl::Status AllocateMemoryForConstTensors(CLContext* context);
|
||||||
|
|
||||||
absl::Status AllocateMemoryForVariableTensors(CLContext* context);
|
absl::Status AllocateMemoryForVariableTensors(CLContext* context);
|
||||||
|
|
||||||
absl::Status AllocateMemoryForBuffers(CLContext* context);
|
absl::Status AllocateMemoryForBuffers(CLContext* context);
|
||||||
@ -137,6 +139,8 @@ class InferenceContext {
|
|||||||
ProfilingCommandQueue* profiling_queue);
|
ProfilingCommandQueue* profiling_queue);
|
||||||
absl::Status UpdateParams();
|
absl::Status UpdateParams();
|
||||||
|
|
||||||
|
void ReleaseCPURepresentation();
|
||||||
|
|
||||||
// performance hacks
|
// performance hacks
|
||||||
bool need_flush_ = false;
|
bool need_flush_ = false;
|
||||||
|
|
||||||
@ -213,6 +217,9 @@ class InferenceContext {
|
|||||||
};
|
};
|
||||||
TensorReserver tensor_reserver_;
|
TensorReserver tensor_reserver_;
|
||||||
|
|
||||||
|
absl::flat_hash_map<ValueId, TensorDescriptor> const_tensors_descs_;
|
||||||
|
std::map<ValueId, Tensor> const_tensors_;
|
||||||
|
|
||||||
std::map<ValueId, Tensor> variable_tensors_;
|
std::map<ValueId, Tensor> variable_tensors_;
|
||||||
std::vector<Buffer> shared_buffers_;
|
std::vector<Buffer> shared_buffers_;
|
||||||
std::vector<Tensor>
|
std::vector<Tensor>
|
||||||
|
@ -944,12 +944,19 @@ flatbuffers::Offset<data::InferenceContext> Encode(
|
|||||||
|
|
||||||
std::vector<flatbuffers::Offset<data::TensorDescWithId>> tensors_fb;
|
std::vector<flatbuffers::Offset<data::TensorDescWithId>> tensors_fb;
|
||||||
auto tensors = inference.tensor_reserver_.GetTensorDescs();
|
auto tensors = inference.tensor_reserver_.GetTensorDescs();
|
||||||
for (auto& tensor : tensors) {
|
for (const auto& tensor : tensors) {
|
||||||
auto tensor_fb = Encode(tensor.second, tensor.first, builder);
|
auto tensor_fb = Encode(tensor.second, tensor.first, builder);
|
||||||
tensors_fb.push_back(tensor_fb);
|
tensors_fb.push_back(tensor_fb);
|
||||||
}
|
}
|
||||||
auto tensors_fb_vec = builder->CreateVector(tensors_fb);
|
auto tensors_fb_vec = builder->CreateVector(tensors_fb);
|
||||||
|
|
||||||
|
std::vector<flatbuffers::Offset<data::TensorDescWithId>> const_tensors_fb;
|
||||||
|
for (const auto& tensor : inference.const_tensors_descs_) {
|
||||||
|
auto tensor_fb = Encode(tensor.second, tensor.first, builder);
|
||||||
|
const_tensors_fb.push_back(tensor_fb);
|
||||||
|
}
|
||||||
|
auto const_tensors_fb_vec = builder->CreateVector(const_tensors_fb);
|
||||||
|
|
||||||
std::vector<flatbuffers::Offset<data::PairOfValueIds>>
|
std::vector<flatbuffers::Offset<data::PairOfValueIds>>
|
||||||
variable_ids_and_refs_fb;
|
variable_ids_and_refs_fb;
|
||||||
for (auto& pair : inference.variable_ids_and_refs_) {
|
for (auto& pair : inference.variable_ids_and_refs_) {
|
||||||
@ -970,6 +977,7 @@ flatbuffers::Offset<data::InferenceContext> Encode(
|
|||||||
inf_builder.add_storage_type(tflite::gpu::ToFB(inference.storage_type_));
|
inf_builder.add_storage_type(tflite::gpu::ToFB(inference.storage_type_));
|
||||||
inf_builder.add_nodes(nodes_fb_vec);
|
inf_builder.add_nodes(nodes_fb_vec);
|
||||||
inf_builder.add_tensors(tensors_fb_vec);
|
inf_builder.add_tensors(tensors_fb_vec);
|
||||||
|
inf_builder.add_const_tensors(const_tensors_fb_vec);
|
||||||
inf_builder.add_input_ids(in_ids_fb);
|
inf_builder.add_input_ids(in_ids_fb);
|
||||||
inf_builder.add_output_ids(out_ids_fb);
|
inf_builder.add_output_ids(out_ids_fb);
|
||||||
inf_builder.add_variable_ids_and_refs(variable_ids_and_refs_fb_vec);
|
inf_builder.add_variable_ids_and_refs(variable_ids_and_refs_fb_vec);
|
||||||
@ -995,12 +1003,17 @@ absl::Status Decode(const data::InferenceContext* fb_inference,
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::pair<ValueId, TensorDescriptor>> tensors;
|
std::vector<std::pair<ValueId, TensorDescriptor>> tensors;
|
||||||
for (auto tensor_fb : *fb_inference->tensors()) {
|
for (const auto& tensor_fb : *fb_inference->tensors()) {
|
||||||
TensorDescriptor desc;
|
TensorDescriptor desc;
|
||||||
Decode(tensor_fb->desc(), &desc);
|
Decode(tensor_fb->desc(), &desc);
|
||||||
tensors.push_back({tensor_fb->id(), std::move(desc)});
|
tensors.push_back({tensor_fb->id(), std::move(desc)});
|
||||||
}
|
}
|
||||||
inference->tensor_reserver_.Add(tensors);
|
inference->tensor_reserver_.Add(tensors);
|
||||||
|
for (const auto& tensor_fb : *fb_inference->const_tensors()) {
|
||||||
|
TensorDescriptor desc;
|
||||||
|
Decode(tensor_fb->desc(), &desc);
|
||||||
|
inference->const_tensors_descs_[tensor_fb->id()] = std::move(desc);
|
||||||
|
}
|
||||||
for (auto in_fb : *fb_inference->input_ids()) {
|
for (auto in_fb : *fb_inference->input_ids()) {
|
||||||
inference->input_ids_.push_back(in_fb);
|
inference->input_ids_.push_back(in_fb);
|
||||||
}
|
}
|
||||||
|
@ -95,6 +95,7 @@ table InferenceContext {
|
|||||||
storage_type:tflite.gpu.data.TensorStorageType;
|
storage_type:tflite.gpu.data.TensorStorageType;
|
||||||
nodes:[CLNode];
|
nodes:[CLNode];
|
||||||
tensors:[TensorDescWithId];
|
tensors:[TensorDescWithId];
|
||||||
|
const_tensors:[TensorDescWithId];
|
||||||
input_ids:[int32];
|
input_ids:[int32];
|
||||||
variable_ids_and_refs:[PairOfValueIds];
|
variable_ids_and_refs:[PairOfValueIds];
|
||||||
output_ids:[int32];
|
output_ids:[int32];
|
||||||
|
@ -748,11 +748,12 @@ struct InferenceContext FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
|
|||||||
VT_STORAGE_TYPE = 14,
|
VT_STORAGE_TYPE = 14,
|
||||||
VT_NODES = 16,
|
VT_NODES = 16,
|
||||||
VT_TENSORS = 18,
|
VT_TENSORS = 18,
|
||||||
VT_INPUT_IDS = 20,
|
VT_CONST_TENSORS = 20,
|
||||||
VT_VARIABLE_IDS_AND_REFS = 22,
|
VT_INPUT_IDS = 22,
|
||||||
VT_OUTPUT_IDS = 24,
|
VT_VARIABLE_IDS_AND_REFS = 24,
|
||||||
VT_INPUT_REFS = 26,
|
VT_OUTPUT_IDS = 26,
|
||||||
VT_OUTPUT_REFS = 28
|
VT_INPUT_REFS = 28,
|
||||||
|
VT_OUTPUT_REFS = 30
|
||||||
};
|
};
|
||||||
bool need_flush() const {
|
bool need_flush() const {
|
||||||
return GetField<uint8_t>(VT_NEED_FLUSH, 0) != 0;
|
return GetField<uint8_t>(VT_NEED_FLUSH, 0) != 0;
|
||||||
@ -778,6 +779,13 @@ struct InferenceContext FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
|
|||||||
const flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *tensors() const {
|
const flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *tensors() const {
|
||||||
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *>(VT_TENSORS);
|
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *>(VT_TENSORS);
|
||||||
}
|
}
|
||||||
|
const flatbuffers::Vector<
|
||||||
|
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>
|
||||||
|
*const_tensors() const {
|
||||||
|
return GetPointer<const flatbuffers::Vector<
|
||||||
|
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *>(
|
||||||
|
VT_CONST_TENSORS);
|
||||||
|
}
|
||||||
const flatbuffers::Vector<int32_t> *input_ids() const {
|
const flatbuffers::Vector<int32_t> *input_ids() const {
|
||||||
return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUT_IDS);
|
return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUT_IDS);
|
||||||
}
|
}
|
||||||
@ -801,12 +809,14 @@ struct InferenceContext FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
|
|||||||
VerifyField<uint8_t>(verifier, VT_NEED_MANUAL_RELEASE) &&
|
VerifyField<uint8_t>(verifier, VT_NEED_MANUAL_RELEASE) &&
|
||||||
VerifyField<int8_t>(verifier, VT_PRECISION) &&
|
VerifyField<int8_t>(verifier, VT_PRECISION) &&
|
||||||
VerifyField<int8_t>(verifier, VT_STORAGE_TYPE) &&
|
VerifyField<int8_t>(verifier, VT_STORAGE_TYPE) &&
|
||||||
VerifyOffset(verifier, VT_NODES) &&
|
VerifyOffset(verifier, VT_NODES) && verifier.VerifyVector(nodes()) &&
|
||||||
verifier.VerifyVector(nodes()) &&
|
|
||||||
verifier.VerifyVectorOfTables(nodes()) &&
|
verifier.VerifyVectorOfTables(nodes()) &&
|
||||||
VerifyOffset(verifier, VT_TENSORS) &&
|
VerifyOffset(verifier, VT_TENSORS) &&
|
||||||
verifier.VerifyVector(tensors()) &&
|
verifier.VerifyVector(tensors()) &&
|
||||||
verifier.VerifyVectorOfTables(tensors()) &&
|
verifier.VerifyVectorOfTables(tensors()) &&
|
||||||
|
VerifyOffset(verifier, VT_CONST_TENSORS) &&
|
||||||
|
verifier.VerifyVector(const_tensors()) &&
|
||||||
|
verifier.VerifyVectorOfTables(const_tensors()) &&
|
||||||
VerifyOffset(verifier, VT_INPUT_IDS) &&
|
VerifyOffset(verifier, VT_INPUT_IDS) &&
|
||||||
verifier.VerifyVector(input_ids()) &&
|
verifier.VerifyVector(input_ids()) &&
|
||||||
VerifyOffset(verifier, VT_VARIABLE_IDS_AND_REFS) &&
|
VerifyOffset(verifier, VT_VARIABLE_IDS_AND_REFS) &&
|
||||||
@ -817,8 +827,7 @@ struct InferenceContext FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
|
|||||||
VerifyOffset(verifier, VT_INPUT_REFS) &&
|
VerifyOffset(verifier, VT_INPUT_REFS) &&
|
||||||
verifier.VerifyVector(input_refs()) &&
|
verifier.VerifyVector(input_refs()) &&
|
||||||
VerifyOffset(verifier, VT_OUTPUT_REFS) &&
|
VerifyOffset(verifier, VT_OUTPUT_REFS) &&
|
||||||
verifier.VerifyVector(output_refs()) &&
|
verifier.VerifyVector(output_refs()) && verifier.EndTable();
|
||||||
verifier.EndTable();
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -850,6 +859,12 @@ struct InferenceContextBuilder {
|
|||||||
void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>> tensors) {
|
void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>> tensors) {
|
||||||
fbb_.AddOffset(InferenceContext::VT_TENSORS, tensors);
|
fbb_.AddOffset(InferenceContext::VT_TENSORS, tensors);
|
||||||
}
|
}
|
||||||
|
void add_const_tensors(
|
||||||
|
flatbuffers::Offset<flatbuffers::Vector<
|
||||||
|
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>>
|
||||||
|
const_tensors) {
|
||||||
|
fbb_.AddOffset(InferenceContext::VT_CONST_TENSORS, const_tensors);
|
||||||
|
}
|
||||||
void add_input_ids(flatbuffers::Offset<flatbuffers::Vector<int32_t>> input_ids) {
|
void add_input_ids(flatbuffers::Offset<flatbuffers::Vector<int32_t>> input_ids) {
|
||||||
fbb_.AddOffset(InferenceContext::VT_INPUT_IDS, input_ids);
|
fbb_.AddOffset(InferenceContext::VT_INPUT_IDS, input_ids);
|
||||||
}
|
}
|
||||||
@ -877,17 +892,26 @@ struct InferenceContextBuilder {
|
|||||||
};
|
};
|
||||||
|
|
||||||
inline flatbuffers::Offset<InferenceContext> CreateInferenceContext(
|
inline flatbuffers::Offset<InferenceContext> CreateInferenceContext(
|
||||||
flatbuffers::FlatBufferBuilder &_fbb,
|
flatbuffers::FlatBufferBuilder &_fbb, bool need_flush = false,
|
||||||
bool need_flush = false,
|
bool flush_periodically = false, int32_t flush_period = 0,
|
||||||
bool flush_periodically = false,
|
|
||||||
int32_t flush_period = 0,
|
|
||||||
bool need_manual_release = false,
|
bool need_manual_release = false,
|
||||||
tflite::gpu::cl::data::CalculationsPrecision precision = tflite::gpu::cl::data::CalculationsPrecision::F32,
|
tflite::gpu::cl::data::CalculationsPrecision precision =
|
||||||
tflite::gpu::data::TensorStorageType storage_type = tflite::gpu::data::TensorStorageType::UNKNOWN,
|
tflite::gpu::cl::data::CalculationsPrecision::F32,
|
||||||
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::CLNode>>> nodes = 0,
|
tflite::gpu::data::TensorStorageType storage_type =
|
||||||
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>> tensors = 0,
|
tflite::gpu::data::TensorStorageType::UNKNOWN,
|
||||||
|
flatbuffers::Offset<
|
||||||
|
flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::CLNode>>>
|
||||||
|
nodes = 0,
|
||||||
|
flatbuffers::Offset<flatbuffers::Vector<
|
||||||
|
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>>
|
||||||
|
tensors = 0,
|
||||||
|
flatbuffers::Offset<flatbuffers::Vector<
|
||||||
|
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>>
|
||||||
|
const_tensors = 0,
|
||||||
flatbuffers::Offset<flatbuffers::Vector<int32_t>> input_ids = 0,
|
flatbuffers::Offset<flatbuffers::Vector<int32_t>> input_ids = 0,
|
||||||
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::PairOfValueIds>>> variable_ids_and_refs = 0,
|
flatbuffers::Offset<flatbuffers::Vector<
|
||||||
|
flatbuffers::Offset<tflite::gpu::cl::data::PairOfValueIds>>>
|
||||||
|
variable_ids_and_refs = 0,
|
||||||
flatbuffers::Offset<flatbuffers::Vector<int32_t>> output_ids = 0,
|
flatbuffers::Offset<flatbuffers::Vector<int32_t>> output_ids = 0,
|
||||||
flatbuffers::Offset<flatbuffers::Vector<int64_t>> input_refs = 0,
|
flatbuffers::Offset<flatbuffers::Vector<int64_t>> input_refs = 0,
|
||||||
flatbuffers::Offset<flatbuffers::Vector<int64_t>> output_refs = 0) {
|
flatbuffers::Offset<flatbuffers::Vector<int64_t>> output_refs = 0) {
|
||||||
@ -897,6 +921,7 @@ inline flatbuffers::Offset<InferenceContext> CreateInferenceContext(
|
|||||||
builder_.add_output_ids(output_ids);
|
builder_.add_output_ids(output_ids);
|
||||||
builder_.add_variable_ids_and_refs(variable_ids_and_refs);
|
builder_.add_variable_ids_and_refs(variable_ids_and_refs);
|
||||||
builder_.add_input_ids(input_ids);
|
builder_.add_input_ids(input_ids);
|
||||||
|
builder_.add_const_tensors(const_tensors);
|
||||||
builder_.add_tensors(tensors);
|
builder_.add_tensors(tensors);
|
||||||
builder_.add_nodes(nodes);
|
builder_.add_nodes(nodes);
|
||||||
builder_.add_flush_period(flush_period);
|
builder_.add_flush_period(flush_period);
|
||||||
@ -909,42 +934,44 @@ inline flatbuffers::Offset<InferenceContext> CreateInferenceContext(
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline flatbuffers::Offset<InferenceContext> CreateInferenceContextDirect(
|
inline flatbuffers::Offset<InferenceContext> CreateInferenceContextDirect(
|
||||||
flatbuffers::FlatBufferBuilder &_fbb,
|
flatbuffers::FlatBufferBuilder &_fbb, bool need_flush = false,
|
||||||
bool need_flush = false,
|
bool flush_periodically = false, int32_t flush_period = 0,
|
||||||
bool flush_periodically = false,
|
|
||||||
int32_t flush_period = 0,
|
|
||||||
bool need_manual_release = false,
|
bool need_manual_release = false,
|
||||||
tflite::gpu::cl::data::CalculationsPrecision precision = tflite::gpu::cl::data::CalculationsPrecision::F32,
|
tflite::gpu::cl::data::CalculationsPrecision precision =
|
||||||
tflite::gpu::data::TensorStorageType storage_type = tflite::gpu::data::TensorStorageType::UNKNOWN,
|
tflite::gpu::cl::data::CalculationsPrecision::F32,
|
||||||
const std::vector<flatbuffers::Offset<tflite::gpu::cl::data::CLNode>> *nodes = nullptr,
|
tflite::gpu::data::TensorStorageType storage_type =
|
||||||
const std::vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *tensors = nullptr,
|
tflite::gpu::data::TensorStorageType::UNKNOWN,
|
||||||
|
const std::vector<flatbuffers::Offset<tflite::gpu::cl::data::CLNode>>
|
||||||
|
*nodes = nullptr,
|
||||||
|
const std::vector<
|
||||||
|
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *tensors =
|
||||||
|
nullptr,
|
||||||
|
const std::vector<flatbuffers::Offset<
|
||||||
|
tflite::gpu::cl::data::TensorDescWithId>> *const_tensors = nullptr,
|
||||||
const std::vector<int32_t> *input_ids = nullptr,
|
const std::vector<int32_t> *input_ids = nullptr,
|
||||||
const std::vector<flatbuffers::Offset<tflite::gpu::cl::data::PairOfValueIds>> *variable_ids_and_refs = nullptr,
|
const std::vector<
|
||||||
|
flatbuffers::Offset<tflite::gpu::cl::data::PairOfValueIds>>
|
||||||
|
*variable_ids_and_refs = nullptr,
|
||||||
const std::vector<int32_t> *output_ids = nullptr,
|
const std::vector<int32_t> *output_ids = nullptr,
|
||||||
const std::vector<int64_t> *input_refs = nullptr,
|
const std::vector<int64_t> *input_refs = nullptr,
|
||||||
const std::vector<int64_t> *output_refs = nullptr) {
|
const std::vector<int64_t> *output_refs = nullptr) {
|
||||||
auto nodes__ = nodes ? _fbb.CreateVector<flatbuffers::Offset<tflite::gpu::cl::data::CLNode>>(*nodes) : 0;
|
auto nodes__ = nodes ? _fbb.CreateVector<flatbuffers::Offset<tflite::gpu::cl::data::CLNode>>(*nodes) : 0;
|
||||||
auto tensors__ = tensors ? _fbb.CreateVector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>(*tensors) : 0;
|
auto tensors__ = tensors ? _fbb.CreateVector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>(*tensors) : 0;
|
||||||
|
auto const_tensors__ =
|
||||||
|
const_tensors
|
||||||
|
? _fbb.CreateVector<
|
||||||
|
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>(
|
||||||
|
*const_tensors)
|
||||||
|
: 0;
|
||||||
auto input_ids__ = input_ids ? _fbb.CreateVector<int32_t>(*input_ids) : 0;
|
auto input_ids__ = input_ids ? _fbb.CreateVector<int32_t>(*input_ids) : 0;
|
||||||
auto variable_ids_and_refs__ = variable_ids_and_refs ? _fbb.CreateVector<flatbuffers::Offset<tflite::gpu::cl::data::PairOfValueIds>>(*variable_ids_and_refs) : 0;
|
auto variable_ids_and_refs__ = variable_ids_and_refs ? _fbb.CreateVector<flatbuffers::Offset<tflite::gpu::cl::data::PairOfValueIds>>(*variable_ids_and_refs) : 0;
|
||||||
auto output_ids__ = output_ids ? _fbb.CreateVector<int32_t>(*output_ids) : 0;
|
auto output_ids__ = output_ids ? _fbb.CreateVector<int32_t>(*output_ids) : 0;
|
||||||
auto input_refs__ = input_refs ? _fbb.CreateVector<int64_t>(*input_refs) : 0;
|
auto input_refs__ = input_refs ? _fbb.CreateVector<int64_t>(*input_refs) : 0;
|
||||||
auto output_refs__ = output_refs ? _fbb.CreateVector<int64_t>(*output_refs) : 0;
|
auto output_refs__ = output_refs ? _fbb.CreateVector<int64_t>(*output_refs) : 0;
|
||||||
return tflite::gpu::cl::data::CreateInferenceContext(
|
return tflite::gpu::cl::data::CreateInferenceContext(
|
||||||
_fbb,
|
_fbb, need_flush, flush_periodically, flush_period, need_manual_release,
|
||||||
need_flush,
|
precision, storage_type, nodes__, tensors__, const_tensors__, input_ids__,
|
||||||
flush_periodically,
|
variable_ids_and_refs__, output_ids__, input_refs__, output_refs__);
|
||||||
flush_period,
|
|
||||||
need_manual_release,
|
|
||||||
precision,
|
|
||||||
storage_type,
|
|
||||||
nodes__,
|
|
||||||
tensors__,
|
|
||||||
input_ids__,
|
|
||||||
variable_ids_and_refs__,
|
|
||||||
output_ids__,
|
|
||||||
input_refs__,
|
|
||||||
output_refs__);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const tflite::gpu::cl::data::InferenceContext *GetInferenceContext(const void *buf) {
|
inline const tflite::gpu::cl::data::InferenceContext *GetInferenceContext(const void *buf) {
|
||||||
|
@ -744,6 +744,12 @@ AddressMode TensorDescriptor::AddressModeFromState() const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TensorDescriptor::UploadData(
|
||||||
|
const tflite::gpu::Tensor<BHWC, DataType::FLOAT32>& src) {
|
||||||
|
shape = BHWDC(src.shape.b, src.shape.h, src.shape.w, 1, src.shape.c);
|
||||||
|
UploadData(absl::MakeConstSpan(src.data));
|
||||||
|
}
|
||||||
|
|
||||||
void TensorDescriptor::UploadData(
|
void TensorDescriptor::UploadData(
|
||||||
const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src) {
|
const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src) {
|
||||||
shape = BHWDC(1, src.shape.h, src.shape.w, 1, src.shape.c);
|
shape = BHWDC(1, src.shape.h, src.shape.w, 1, src.shape.c);
|
||||||
|
@ -75,6 +75,7 @@ struct TensorDescriptor : public GPUObjectDescriptor {
|
|||||||
const std::vector<std::string>& args, std::string* value_name,
|
const std::vector<std::string>& args, std::string* value_name,
|
||||||
std::string* x_coord, std::string* y_coord, std::string* s_coord) const;
|
std::string* x_coord, std::string* y_coord, std::string* s_coord) const;
|
||||||
|
|
||||||
|
void UploadData(const tflite::gpu::Tensor<BHWC, DataType::FLOAT32>& src);
|
||||||
void UploadData(const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src);
|
void UploadData(const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src);
|
||||||
void UploadData(const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src);
|
void UploadData(const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user