Added support of Const tensors in OpenCL backend.

PiperOrigin-RevId: 342973472
Change-Id: I38932dda93dfb729cf4f14ff6310da57ad9b1e06
This commit is contained in:
Raman Sarokin 2020-11-17 16:45:27 -08:00 committed by TensorFlower Gardener
parent 39d3d60407
commit 042f6923fe
7 changed files with 141 additions and 59 deletions

View File

@ -212,9 +212,7 @@ absl::Status InferenceContext::InitFromGraph(
node.cl_operation.MoveObjectRefsFromGenericToCL();
}
}
for (auto& node : nodes_) {
node.cl_operation.GetGpuOperation().args_.ReleaseCPURepresentation();
}
ReleaseCPURepresentation();
return absl::OkStatus();
}
@ -240,9 +238,7 @@ absl::Status InferenceContext::RestoreDeserialized(
RETURN_IF_ERROR(node.cl_operation.CompileDeserialized(creation_context));
}
RETURN_IF_ERROR(UpdateParams());
for (auto& node : nodes_) {
node.cl_operation.GetGpuOperation().args_.ReleaseCPURepresentation();
}
ReleaseCPURepresentation();
return absl::OkStatus();
}
@ -329,6 +325,16 @@ absl::Status InferenceContext::ConvertOperations(const GpuInfo& gpu_info,
if (consumed_nodes.find(node.id) != consumed_nodes.end()) {
continue;
}
auto op_type = OperationTypeFromString(node.operation.type);
if (op_type == OperationType::CONST) {
auto attr =
absl::any_cast<ConstTensorAttributes>(node.operation.attributes);
auto outputs = graph.FindOutputs(node.id);
const_tensors_descs_[outputs[0]->id] =
tensor_reserver_.Get(outputs[0]->id).descriptor;
const_tensors_descs_[outputs[0]->id].UploadData(attr.tensor);
continue;
}
std::string op_name = node.operation.type + " " + std::to_string(node.id);
GPUOperationsSubgraph gpu_subgraph;
if (hints.Check(ModelHints::kAllowSpecialKernels) &&
@ -481,22 +487,34 @@ void InferenceContext::GetUsages(const std::function<bool(ValueId)>& functor,
InferenceContext::TensorMemoryType InferenceContext::GetTensorMemoryType(
ValueId id) {
if (variable_ids_and_refs_.find(id) != variable_ids_and_refs_.end()) {
return TensorMemoryType::VARIABLE;
if (const_tensors_.find(id) != const_tensors_.end()) {
return TensorMemoryType::kConst;
} else if (variable_ids_and_refs_.find(id) != variable_ids_and_refs_.end()) {
return TensorMemoryType::kVariable;
} else if (IsBufferBased(tensor_reserver_.Get(id).descriptor.storage_type)) {
return TensorMemoryType::BUFFER;
return TensorMemoryType::kBuffer;
} else {
return TensorMemoryType::STRONG_SHAPE;
return TensorMemoryType::kStrongShape;
}
}
absl::Status InferenceContext::AllocateMemory(CLContext* context) {
RETURN_IF_ERROR(AllocateMemoryForConstTensors(context));
RETURN_IF_ERROR(AllocateMemoryForVariableTensors(context));
RETURN_IF_ERROR(AllocateMemoryForBuffers(context));
RETURN_IF_ERROR(AllocateMemoryForStrongShapes(context));
return absl::OkStatus();
}
absl::Status InferenceContext::AllocateMemoryForConstTensors(
CLContext* context) {
for (auto& description : const_tensors_descs_) {
RETURN_IF_ERROR(const_tensors_[description.first].CreateFromDescriptor(
description.second, context));
}
return absl::OkStatus();
}
absl::Status InferenceContext::AllocateMemoryForVariableTensors(
CLContext* context) {
std::map<ValueId, int> ref_value_to_tensor_index;
@ -520,7 +538,7 @@ absl::Status InferenceContext::AllocateMemoryForBuffers(CLContext* context) {
std::map<ValueId, int2> buffer_usages;
GetUsages(
[this](ValueId id) {
return GetTensorMemoryType(id) == TensorMemoryType::BUFFER;
return GetTensorMemoryType(id) == TensorMemoryType::kBuffer;
},
&buffer_usages);
@ -555,7 +573,7 @@ absl::Status InferenceContext::AllocateMemoryForBuffers(CLContext* context) {
for (auto& node : nodes_) {
auto tensors = GetCLNodeTensors(node);
for (auto& t : tensors) {
if (GetTensorMemoryType(t.first) != TensorMemoryType::BUFFER) continue;
if (GetTensorMemoryType(t.first) != TensorMemoryType::kBuffer) continue;
const int tensor_index = graph_ids_to_shared_buffer_tensors_[t.first];
if (created_tensors[tensor_index]) continue;
const auto& shape = tensor_reserver_.Get(t.first).shape;
@ -574,7 +592,7 @@ absl::Status InferenceContext::AllocateMemoryForStrongShapes(
std::map<ValueId, int2> usages;
GetUsages(
[this](ValueId id) {
return GetTensorMemoryType(id) == TensorMemoryType::STRONG_SHAPE;
return GetTensorMemoryType(id) == TensorMemoryType::kStrongShape;
},
&usages);
@ -594,7 +612,7 @@ absl::Status InferenceContext::AllocateMemoryForStrongShapes(
for (auto& node : nodes_) {
auto tensors = GetCLNodeTensors(node);
for (auto& t : tensors) {
if (GetTensorMemoryType(t.first) != TensorMemoryType::STRONG_SHAPE) {
if (GetTensorMemoryType(t.first) != TensorMemoryType::kStrongShape) {
continue;
}
const auto& shape = tensor_reserver_.Get(t.first).shape;
@ -696,7 +714,9 @@ uint64_t InferenceContext::GetSizeOfMemoryAllocatedForIntermediateTensors()
}
Tensor* InferenceContext::GetTensor(ValueId id) {
if (variable_ids_and_refs_.find(id) != variable_ids_and_refs_.end()) {
if (const_tensors_.find(id) != const_tensors_.end()) {
return &const_tensors_[id];
} else if (variable_ids_and_refs_.find(id) != variable_ids_and_refs_.end()) {
return &variable_tensors_[variable_ids_and_refs_[id]];
} else if (graph_ids_to_shared_buffer_tensors_.find(id) !=
graph_ids_to_shared_buffer_tensors_.end()) {
@ -724,6 +744,13 @@ absl::Status InferenceContext::GetOutputTensor(ValueId id,
return gpu_tensor.ReadData(queue, result);
}
void InferenceContext::ReleaseCPURepresentation() {
for (auto& node : nodes_) {
node.cl_operation.GetGpuOperation().args_.ReleaseCPURepresentation();
}
const_tensors_descs_.clear();
}
absl::Status RunGraphTransforms(GraphFloat32* graph) {
auto merge_padding_transform = NewMergePaddingWithAdd();
auto add_bias_transform = NewAddBias();

View File

@ -102,7 +102,7 @@ class InferenceContext {
const absl::Span<const uint8_t> serialized_model, Environment* env);
private:
enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };
enum class TensorMemoryType { kStrongShape, kBuffer, kVariable, kConst };
friend flatbuffers::Offset<data::InferenceContext> Encode(
const InferenceContext& inference,
@ -119,6 +119,8 @@ class InferenceContext {
absl::Status Merge();
absl::Status AllocateMemory(CLContext* context);
absl::Status AllocateMemoryForConstTensors(CLContext* context);
absl::Status AllocateMemoryForVariableTensors(CLContext* context);
absl::Status AllocateMemoryForBuffers(CLContext* context);
@ -137,6 +139,8 @@ class InferenceContext {
ProfilingCommandQueue* profiling_queue);
absl::Status UpdateParams();
void ReleaseCPURepresentation();
// performance hacks
bool need_flush_ = false;
@ -213,6 +217,9 @@ class InferenceContext {
};
TensorReserver tensor_reserver_;
absl::flat_hash_map<ValueId, TensorDescriptor> const_tensors_descs_;
std::map<ValueId, Tensor> const_tensors_;
std::map<ValueId, Tensor> variable_tensors_;
std::vector<Buffer> shared_buffers_;
std::vector<Tensor>

View File

@ -944,12 +944,19 @@ flatbuffers::Offset<data::InferenceContext> Encode(
std::vector<flatbuffers::Offset<data::TensorDescWithId>> tensors_fb;
auto tensors = inference.tensor_reserver_.GetTensorDescs();
for (auto& tensor : tensors) {
for (const auto& tensor : tensors) {
auto tensor_fb = Encode(tensor.second, tensor.first, builder);
tensors_fb.push_back(tensor_fb);
}
auto tensors_fb_vec = builder->CreateVector(tensors_fb);
std::vector<flatbuffers::Offset<data::TensorDescWithId>> const_tensors_fb;
for (const auto& tensor : inference.const_tensors_descs_) {
auto tensor_fb = Encode(tensor.second, tensor.first, builder);
const_tensors_fb.push_back(tensor_fb);
}
auto const_tensors_fb_vec = builder->CreateVector(const_tensors_fb);
std::vector<flatbuffers::Offset<data::PairOfValueIds>>
variable_ids_and_refs_fb;
for (auto& pair : inference.variable_ids_and_refs_) {
@ -970,6 +977,7 @@ flatbuffers::Offset<data::InferenceContext> Encode(
inf_builder.add_storage_type(tflite::gpu::ToFB(inference.storage_type_));
inf_builder.add_nodes(nodes_fb_vec);
inf_builder.add_tensors(tensors_fb_vec);
inf_builder.add_const_tensors(const_tensors_fb_vec);
inf_builder.add_input_ids(in_ids_fb);
inf_builder.add_output_ids(out_ids_fb);
inf_builder.add_variable_ids_and_refs(variable_ids_and_refs_fb_vec);
@ -995,12 +1003,17 @@ absl::Status Decode(const data::InferenceContext* fb_inference,
}
std::vector<std::pair<ValueId, TensorDescriptor>> tensors;
for (auto tensor_fb : *fb_inference->tensors()) {
for (const auto& tensor_fb : *fb_inference->tensors()) {
TensorDescriptor desc;
Decode(tensor_fb->desc(), &desc);
tensors.push_back({tensor_fb->id(), std::move(desc)});
}
inference->tensor_reserver_.Add(tensors);
for (const auto& tensor_fb : *fb_inference->const_tensors()) {
TensorDescriptor desc;
Decode(tensor_fb->desc(), &desc);
inference->const_tensors_descs_[tensor_fb->id()] = std::move(desc);
}
for (auto in_fb : *fb_inference->input_ids()) {
inference->input_ids_.push_back(in_fb);
}

View File

@ -95,6 +95,7 @@ table InferenceContext {
storage_type:tflite.gpu.data.TensorStorageType;
nodes:[CLNode];
tensors:[TensorDescWithId];
const_tensors:[TensorDescWithId];
input_ids:[int32];
variable_ids_and_refs:[PairOfValueIds];
output_ids:[int32];

View File

@ -748,11 +748,12 @@ struct InferenceContext FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
VT_STORAGE_TYPE = 14,
VT_NODES = 16,
VT_TENSORS = 18,
VT_INPUT_IDS = 20,
VT_VARIABLE_IDS_AND_REFS = 22,
VT_OUTPUT_IDS = 24,
VT_INPUT_REFS = 26,
VT_OUTPUT_REFS = 28
VT_CONST_TENSORS = 20,
VT_INPUT_IDS = 22,
VT_VARIABLE_IDS_AND_REFS = 24,
VT_OUTPUT_IDS = 26,
VT_INPUT_REFS = 28,
VT_OUTPUT_REFS = 30
};
bool need_flush() const {
return GetField<uint8_t>(VT_NEED_FLUSH, 0) != 0;
@ -778,6 +779,13 @@ struct InferenceContext FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
const flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *tensors() const {
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *>(VT_TENSORS);
}
const flatbuffers::Vector<
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>
*const_tensors() const {
return GetPointer<const flatbuffers::Vector<
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *>(
VT_CONST_TENSORS);
}
const flatbuffers::Vector<int32_t> *input_ids() const {
return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUT_IDS);
}
@ -801,12 +809,14 @@ struct InferenceContext FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
VerifyField<uint8_t>(verifier, VT_NEED_MANUAL_RELEASE) &&
VerifyField<int8_t>(verifier, VT_PRECISION) &&
VerifyField<int8_t>(verifier, VT_STORAGE_TYPE) &&
VerifyOffset(verifier, VT_NODES) &&
verifier.VerifyVector(nodes()) &&
VerifyOffset(verifier, VT_NODES) && verifier.VerifyVector(nodes()) &&
verifier.VerifyVectorOfTables(nodes()) &&
VerifyOffset(verifier, VT_TENSORS) &&
verifier.VerifyVector(tensors()) &&
verifier.VerifyVectorOfTables(tensors()) &&
VerifyOffset(verifier, VT_CONST_TENSORS) &&
verifier.VerifyVector(const_tensors()) &&
verifier.VerifyVectorOfTables(const_tensors()) &&
VerifyOffset(verifier, VT_INPUT_IDS) &&
verifier.VerifyVector(input_ids()) &&
VerifyOffset(verifier, VT_VARIABLE_IDS_AND_REFS) &&
@ -817,8 +827,7 @@ struct InferenceContext FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
VerifyOffset(verifier, VT_INPUT_REFS) &&
verifier.VerifyVector(input_refs()) &&
VerifyOffset(verifier, VT_OUTPUT_REFS) &&
verifier.VerifyVector(output_refs()) &&
verifier.EndTable();
verifier.VerifyVector(output_refs()) && verifier.EndTable();
}
};
@ -850,6 +859,12 @@ struct InferenceContextBuilder {
void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>> tensors) {
fbb_.AddOffset(InferenceContext::VT_TENSORS, tensors);
}
void add_const_tensors(
flatbuffers::Offset<flatbuffers::Vector<
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>>
const_tensors) {
fbb_.AddOffset(InferenceContext::VT_CONST_TENSORS, const_tensors);
}
void add_input_ids(flatbuffers::Offset<flatbuffers::Vector<int32_t>> input_ids) {
fbb_.AddOffset(InferenceContext::VT_INPUT_IDS, input_ids);
}
@ -877,17 +892,26 @@ struct InferenceContextBuilder {
};
inline flatbuffers::Offset<InferenceContext> CreateInferenceContext(
flatbuffers::FlatBufferBuilder &_fbb,
bool need_flush = false,
bool flush_periodically = false,
int32_t flush_period = 0,
flatbuffers::FlatBufferBuilder &_fbb, bool need_flush = false,
bool flush_periodically = false, int32_t flush_period = 0,
bool need_manual_release = false,
tflite::gpu::cl::data::CalculationsPrecision precision = tflite::gpu::cl::data::CalculationsPrecision::F32,
tflite::gpu::data::TensorStorageType storage_type = tflite::gpu::data::TensorStorageType::UNKNOWN,
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::CLNode>>> nodes = 0,
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>> tensors = 0,
tflite::gpu::cl::data::CalculationsPrecision precision =
tflite::gpu::cl::data::CalculationsPrecision::F32,
tflite::gpu::data::TensorStorageType storage_type =
tflite::gpu::data::TensorStorageType::UNKNOWN,
flatbuffers::Offset<
flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::CLNode>>>
nodes = 0,
flatbuffers::Offset<flatbuffers::Vector<
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>>
tensors = 0,
flatbuffers::Offset<flatbuffers::Vector<
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>>
const_tensors = 0,
flatbuffers::Offset<flatbuffers::Vector<int32_t>> input_ids = 0,
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::gpu::cl::data::PairOfValueIds>>> variable_ids_and_refs = 0,
flatbuffers::Offset<flatbuffers::Vector<
flatbuffers::Offset<tflite::gpu::cl::data::PairOfValueIds>>>
variable_ids_and_refs = 0,
flatbuffers::Offset<flatbuffers::Vector<int32_t>> output_ids = 0,
flatbuffers::Offset<flatbuffers::Vector<int64_t>> input_refs = 0,
flatbuffers::Offset<flatbuffers::Vector<int64_t>> output_refs = 0) {
@ -897,6 +921,7 @@ inline flatbuffers::Offset<InferenceContext> CreateInferenceContext(
builder_.add_output_ids(output_ids);
builder_.add_variable_ids_and_refs(variable_ids_and_refs);
builder_.add_input_ids(input_ids);
builder_.add_const_tensors(const_tensors);
builder_.add_tensors(tensors);
builder_.add_nodes(nodes);
builder_.add_flush_period(flush_period);
@ -909,42 +934,44 @@ inline flatbuffers::Offset<InferenceContext> CreateInferenceContext(
}
inline flatbuffers::Offset<InferenceContext> CreateInferenceContextDirect(
flatbuffers::FlatBufferBuilder &_fbb,
bool need_flush = false,
bool flush_periodically = false,
int32_t flush_period = 0,
flatbuffers::FlatBufferBuilder &_fbb, bool need_flush = false,
bool flush_periodically = false, int32_t flush_period = 0,
bool need_manual_release = false,
tflite::gpu::cl::data::CalculationsPrecision precision = tflite::gpu::cl::data::CalculationsPrecision::F32,
tflite::gpu::data::TensorStorageType storage_type = tflite::gpu::data::TensorStorageType::UNKNOWN,
const std::vector<flatbuffers::Offset<tflite::gpu::cl::data::CLNode>> *nodes = nullptr,
const std::vector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *tensors = nullptr,
tflite::gpu::cl::data::CalculationsPrecision precision =
tflite::gpu::cl::data::CalculationsPrecision::F32,
tflite::gpu::data::TensorStorageType storage_type =
tflite::gpu::data::TensorStorageType::UNKNOWN,
const std::vector<flatbuffers::Offset<tflite::gpu::cl::data::CLNode>>
*nodes = nullptr,
const std::vector<
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>> *tensors =
nullptr,
const std::vector<flatbuffers::Offset<
tflite::gpu::cl::data::TensorDescWithId>> *const_tensors = nullptr,
const std::vector<int32_t> *input_ids = nullptr,
const std::vector<flatbuffers::Offset<tflite::gpu::cl::data::PairOfValueIds>> *variable_ids_and_refs = nullptr,
const std::vector<
flatbuffers::Offset<tflite::gpu::cl::data::PairOfValueIds>>
*variable_ids_and_refs = nullptr,
const std::vector<int32_t> *output_ids = nullptr,
const std::vector<int64_t> *input_refs = nullptr,
const std::vector<int64_t> *output_refs = nullptr) {
auto nodes__ = nodes ? _fbb.CreateVector<flatbuffers::Offset<tflite::gpu::cl::data::CLNode>>(*nodes) : 0;
auto tensors__ = tensors ? _fbb.CreateVector<flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>(*tensors) : 0;
auto const_tensors__ =
const_tensors
? _fbb.CreateVector<
flatbuffers::Offset<tflite::gpu::cl::data::TensorDescWithId>>(
*const_tensors)
: 0;
auto input_ids__ = input_ids ? _fbb.CreateVector<int32_t>(*input_ids) : 0;
auto variable_ids_and_refs__ = variable_ids_and_refs ? _fbb.CreateVector<flatbuffers::Offset<tflite::gpu::cl::data::PairOfValueIds>>(*variable_ids_and_refs) : 0;
auto output_ids__ = output_ids ? _fbb.CreateVector<int32_t>(*output_ids) : 0;
auto input_refs__ = input_refs ? _fbb.CreateVector<int64_t>(*input_refs) : 0;
auto output_refs__ = output_refs ? _fbb.CreateVector<int64_t>(*output_refs) : 0;
return tflite::gpu::cl::data::CreateInferenceContext(
_fbb,
need_flush,
flush_periodically,
flush_period,
need_manual_release,
precision,
storage_type,
nodes__,
tensors__,
input_ids__,
variable_ids_and_refs__,
output_ids__,
input_refs__,
output_refs__);
_fbb, need_flush, flush_periodically, flush_period, need_manual_release,
precision, storage_type, nodes__, tensors__, const_tensors__, input_ids__,
variable_ids_and_refs__, output_ids__, input_refs__, output_refs__);
}
inline const tflite::gpu::cl::data::InferenceContext *GetInferenceContext(const void *buf) {

View File

@ -744,6 +744,12 @@ AddressMode TensorDescriptor::AddressModeFromState() const {
}
}
void TensorDescriptor::UploadData(
const tflite::gpu::Tensor<BHWC, DataType::FLOAT32>& src) {
shape = BHWDC(src.shape.b, src.shape.h, src.shape.w, 1, src.shape.c);
UploadData(absl::MakeConstSpan(src.data));
}
void TensorDescriptor::UploadData(
const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src) {
shape = BHWDC(1, src.shape.h, src.shape.w, 1, src.shape.c);

View File

@ -75,6 +75,7 @@ struct TensorDescriptor : public GPUObjectDescriptor {
const std::vector<std::string>& args, std::string* value_name,
std::string* x_coord, std::string* y_coord, std::string* s_coord) const;
void UploadData(const tflite::gpu::Tensor<BHWC, DataType::FLOAT32>& src);
void UploadData(const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src);
void UploadData(const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src);