TFLite Rsqrt is converted to Div(Quantize(1), Sqrt(input)) in the hexagon graph. PiperOrigin-RevId: 349308872 Change-Id: Ic392af0f0e1eac4cf712775c5944ca650271ea4b
422 lines
17 KiB
C++
422 lines
17 KiB
C++
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
#include "tensorflow/lite/delegates/hexagon/builders/op_builder.h"
|
|
|
|
#include "hexagon/hexagon_nn_ops.h"
|
|
#include "tensorflow/lite/builtin_ops.h"
|
|
#include "tensorflow/lite/c/common.h"
|
|
#include "tensorflow/lite/delegates/hexagon/builders/op_factory.h"
|
|
#include <farmhash.h>
|
|
|
|
namespace tflite {
|
|
namespace delegates {
|
|
namespace hexagon {
|
|
namespace {
|
|
// Farmhash Fingerprint
|
|
inline uint64_t CombineFingerprints(uint64_t l, uint64_t h) {
|
|
// Murmur-inspired hashing.
|
|
const uint64_t kMul = 0x9ddfea08eb382d69ULL;
|
|
uint64_t a = (l ^ h) * kMul;
|
|
a ^= (a >> 47);
|
|
uint64_t b = (h ^ a) * kMul;
|
|
b ^= (b >> 44);
|
|
b *= kMul;
|
|
b ^= (b >> 41);
|
|
b *= kMul;
|
|
return b;
|
|
}
|
|
|
|
inline uint64_t ComputeHash(const int shape[], const char* data,
|
|
const int data_len) {
|
|
return CombineFingerprints(
|
|
::util::Fingerprint64(data, data_len),
|
|
::util::Fingerprint64(reinterpret_cast<const char*>(shape),
|
|
sizeof(shape[0]) * 4));
|
|
}
|
|
|
|
inline uint64_t ComputeHash(const TfLiteTensor& tensor, const int shape[],
|
|
int int8_to_uint8) {
|
|
auto data_hash = ComputeHash(shape, tensor.data.raw_const, tensor.bytes);
|
|
auto int8_to_uint8_hash = ::util::Fingerprint64(
|
|
reinterpret_cast<char*>(&int8_to_uint8), sizeof(int8_to_uint8));
|
|
return CombineFingerprints(data_hash, int8_to_uint8_hash);
|
|
}
|
|
|
|
int GetElementSize(TfLiteType type) {
|
|
switch (type) {
|
|
case kTfLiteFloat32:
|
|
return sizeof(float);
|
|
case kTfLiteBool:
|
|
return sizeof(bool);
|
|
case kTfLiteInt32:
|
|
return sizeof(int32_t);
|
|
case kTfLiteInt8:
|
|
return sizeof(int8_t);
|
|
case kTfLiteUInt8:
|
|
return sizeof(uint8_t);
|
|
default:
|
|
return sizeof(int8_t);
|
|
}
|
|
}
|
|
} // namespace
|
|
|
|
OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type,
|
|
TfLiteNode* node) {
|
|
switch (op_type) {
|
|
case kTfLiteBuiltinAdd:
|
|
return CreateArithmeticBuilder(this, OP_QuantizedAdd_8p8to8);
|
|
case kTfLiteBuiltinArgMax:
|
|
return CreateArgMinMaxOpBuilder(this, OP_ArgMax_8toInt32);
|
|
case kTfLiteBuiltinArgMin:
|
|
return CreateArgMinMaxOpBuilder(this, OP_ArgMin_8);
|
|
case kTfLiteBuiltinMul:
|
|
// The 32-bit version of Mul is more accurate, and robust to disparities
|
|
// in input/output ranges.
|
|
return CreateArithmeticBuilder(this, OP_QuantizedMul_8x8to32);
|
|
case kTfLiteBuiltinSub:
|
|
return CreateArithmeticBuilder(this, OP_QuantizedSub_8p8to8);
|
|
case kTfLiteBuiltinMean:
|
|
return CreateReduceBuilder(this, OP_QuantizedMean_8);
|
|
case kTfLiteBuiltinSum:
|
|
return CreateReduceBuilder(this, OP_QuantizedSum_8to32);
|
|
case kTfLiteBuiltinPad:
|
|
return CreatePadBuilder(this, OP_QuantizedPad_8);
|
|
case kTfLiteBuiltinMirrorPad:
|
|
return CreateMirrorPadBuilder(this, OP_MirrorPad_8);
|
|
case kTfLiteBuiltinFullyConnected: {
|
|
const auto& weights_tensor = context_->tensors[node->inputs->data[1]];
|
|
if (weights_tensor.allocation_type == kTfLiteMmapRo)
|
|
return CreateMatMulWithConstWeightsOpBuilder(
|
|
this, OP_QuantizedMatMul_8x8to32);
|
|
else
|
|
return CreateMatMulOpBuilder(this, OP_Transpose_8);
|
|
}
|
|
case kTfLiteBuiltinAveragePool2d:
|
|
return CreatePool2DBuilder(this, OP_QuantizedAvgPool_8);
|
|
case kTfLiteBuiltinMaxPool2d:
|
|
return CreatePool2DBuilder(this, OP_QuantizedMaxPool_8);
|
|
case kTfLiteBuiltinConcatenation:
|
|
return CreateConcatBuilder(this, OP_QuantizedConcat_8);
|
|
case kTfLiteBuiltinConv2d:
|
|
return CreateConv2DBuilder(this, OP_Supernode_8x8p32to8);
|
|
case kTfLiteBuiltinTransposeConv:
|
|
return CreateTransposeConv2DBuilder(
|
|
this, OP_QuantizedTransposeConv2d_8x8p32to8);
|
|
case kTfLiteBuiltinDepthwiseConv2d:
|
|
return CreateConv2DBuilder(this, OP_DepthwiseSupernode_8x8p32to8);
|
|
case kTfLiteBuiltinReshape:
|
|
return CreateReshapeBuilder(this, OP_Reshape);
|
|
case kTfLiteBuiltinSoftmax:
|
|
return CreateSoftmaxBuilder(this, OP_QuantizedSoftmax_8);
|
|
case kTfLiteBuiltinResizeNearestNeighbor:
|
|
return CreateResizeNearestNeighborBuilder(this,
|
|
OP_ResizeNearestNeighbor_8);
|
|
case kTfLiteBuiltinL2Normalization:
|
|
return CreateL2NormalizationBuilder(this, OP_L2Normalize_8);
|
|
case kTfLiteBuiltinRelu:
|
|
return CreateActivationBuilder(this, OP_QuantizedRelu_8);
|
|
case kTfLiteBuiltinRelu6:
|
|
return CreateActivationBuilder(this, OP_QuantizedReluX_8);
|
|
case kTfLiteBuiltinTanh:
|
|
return CreateActivationBuilder(this, OP_QuantizedTanh_8);
|
|
case kTfLiteBuiltinLogistic:
|
|
return CreateActivationBuilder(this, OP_QuantizedSigmoid_8);
|
|
case kTfLiteBuiltinSplit:
|
|
return CreateSplitBuilder(this, OP_QuantizedSplit_8);
|
|
case kTfLiteBuiltinResizeBilinear:
|
|
return CreateResizeBilinearOpBuilder(this, OP_QuantizedResizeBilinear_8);
|
|
case kTfLiteBuiltinNeg:
|
|
return CreateNegOpBuilder(this, OP_QuantizedNeg_8);
|
|
case kTfLiteBuiltinTranspose:
|
|
return CreateTransposeBuilder(this, OP_Transpose_8);
|
|
case kTfLiteBuiltinSpaceToDepth:
|
|
return CreateSpaceToDepthBuilder(this, OP_SpaceToDepth_8);
|
|
case kTfLiteBuiltinDepthToSpace:
|
|
return CreateSpaceToDepthBuilder(this, OP_DepthToSpace_8);
|
|
case kTfLiteBuiltinQuantize:
|
|
return CreateQuantizeBuilder(this, OP_Requantize_8to8);
|
|
case kTfLiteBuiltinHardSwish:
|
|
return CreateHardSwishBuilder(this, OP_QuantizedHardSwish_8);
|
|
case kTfLiteBuiltinMinimum:
|
|
return CreateMinMaxBuilder(this, OP_QuantizedMinimum_8);
|
|
case kTfLiteBuiltinMaximum:
|
|
return CreateMinMaxBuilder(this, OP_QuantizedMaximum_8);
|
|
case kTfLiteBuiltinSlice:
|
|
return CreateSliceOpBuilder(this, OP_QuantizedSlice_8);
|
|
case kTfLiteBuiltinPack:
|
|
return CreatePackBuilder(this, OP_QuantizedPack_8);
|
|
case kTfLiteBuiltinStridedSlice:
|
|
return CreateStridedSliceBuilder(this, OP_QuantizedStridedSlice_8);
|
|
case kTfLiteBuiltinSquaredDifference:
|
|
return CreateSquaredDifferenceOpBuilder(this, OP_QuantizedSub_8p8to8);
|
|
case kTfLiteBuiltinRsqrt:
|
|
return CreateRSqrtOpBuilder(this, OP_QuantizedSqrt_8);
|
|
default:
|
|
context_->ReportError(context_, "Op not supported: %d", op_type);
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
OpBuilder* GraphBuilder::LookupConstData(uint64_t cache_key) {
|
|
auto lookup_result = cache_.find(cache_key);
|
|
if (lookup_result != cache_.end()) return lookup_result->second;
|
|
return nullptr;
|
|
}
|
|
|
|
void GraphBuilder::AddToCache(uint64_t cache_key, OpBuilder* value) {
|
|
cache_[cache_key] = value;
|
|
}
|
|
|
|
OpBuilder* GraphBuilder::AddConstNodeWithData(const int shape[], char* data,
|
|
int data_size) {
|
|
auto cache_key = ComputeHash(shape, data, data_size);
|
|
if (auto lookup_result = LookupConstData(cache_key)) return lookup_result;
|
|
builders_.emplace_back(new OpBuilder(this, OP_Const));
|
|
builders_.back()->SetConstNode();
|
|
builders_.back()->SetNodeId(builders_.size());
|
|
int error = hexagon_nn_->hexagon_nn_append_const_node(
|
|
graph_id_, builders_.size(), shape[0], shape[1], shape[2], shape[3],
|
|
reinterpret_cast<const uint8_t*>(data), data_size);
|
|
if (error != 0) {
|
|
TF_LITE_KERNEL_LOG(context_, "Error adding const node with shape id: %d",
|
|
static_cast<int>(builders_.size()));
|
|
return nullptr;
|
|
}
|
|
AddToCache(cache_key, builders_.back().get());
|
|
return builders_.back().get();
|
|
}
|
|
|
|
OpBuilder* GraphBuilder::AddConstNodeWithData(int tensor_id,
|
|
const TfLiteTensor& tensor,
|
|
bool int8_to_uint8) {
|
|
// Fetch shape of tensor and pad 1's so it is always 4D.
|
|
int batch_size, height_size, width_size, depth_size;
|
|
GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims);
|
|
const int shape[] = {batch_size, height_size, width_size, depth_size};
|
|
|
|
auto cache_key = ComputeHash(tensor, shape, int8_to_uint8 ? 1 : 0);
|
|
if (auto lookup_result = LookupConstData(cache_key)) {
|
|
// If tensor is cached but with no id, that can happen when the same
|
|
// data is added from a constant value (not tensor). We can cache the data
|
|
// and reuse it.
|
|
// We assign the tensor to this cached const node before returning.
|
|
if (!HasTensor(tensor_id))
|
|
AddTensorWithID(tensor_id, lookup_result->GetID(), 0);
|
|
return lookup_result;
|
|
}
|
|
builders_.emplace_back(new OpBuilder(this, OP_Const));
|
|
const int node_id = builders_.size();
|
|
builders_.back()->SetConstNode();
|
|
builders_.back()->SetNodeId(node_id);
|
|
int error = hexagon_nn_->hexagon_nn_append_const_node(
|
|
graph_id_, node_id, batch_size, height_size, width_size, depth_size,
|
|
reinterpret_cast<const uint8_t*>(tensor.data.raw), tensor.bytes);
|
|
if (error > 0) {
|
|
context_->ReportError(
|
|
context_, "Failed to add const node for tensor with id: %d", tensor_id);
|
|
return nullptr;
|
|
}
|
|
AddTensorWithID(tensor_id, node_id, 0);
|
|
// We need to return the builder with result, so we can't rely
|
|
// on builders_.back() as it can change while casting, so we hold pointer
|
|
// and update with value from casting if needed.
|
|
OpBuilder* result_builder = builders_.back().get();
|
|
// Cast int8 to uint8 if requested.
|
|
// This will add cast op to uint8 and update tensor map to point
|
|
// to the casted tensor.
|
|
if (int8_to_uint8 && tensor.type == kTfLiteInt8) {
|
|
AddCastOp(context_, OP_Quantized_CastInt8ToUInt8, tensor_id,
|
|
&result_builder);
|
|
}
|
|
AddToCache(cache_key, result_builder);
|
|
return result_builder;
|
|
}
|
|
|
|
// TODO(b/154604279): Support these casting ops in Hexagon op profiling (which
|
|
// seems to key tensors on a single op, which may not be the case now).
|
|
TfLiteStatus GraphBuilder::AddCastOp(TfLiteContext* context, int op_type,
|
|
int tensor_id,
|
|
OpBuilder** cast_op_builder) {
|
|
// Create a new OpBuilder for casting the tensor.
|
|
OpBuilder* cast_builder = CreateCastBuilder(this, op_type);
|
|
builders_.emplace_back(cast_builder);
|
|
cast_builder->SetNodeId(builders_.size());
|
|
// We cast the tensor in-place, so there is only 1 input & output which is the
|
|
// same.
|
|
auto* tensor_data = TfLiteIntArrayCreate(1);
|
|
tensor_data->data[0] = tensor_id;
|
|
|
|
TF_LITE_ENSURE_STATUS(
|
|
cast_builder->PopulateSubGraph(tensor_data, tensor_data, context));
|
|
TF_LITE_ENSURE_STATUS(cast_builder->RegisterOutputs(tensor_data, context));
|
|
|
|
TfLiteIntArrayFree(tensor_data);
|
|
if (cast_op_builder != nullptr) *cast_op_builder = cast_builder;
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus GraphBuilder::AddInputTensors(const TfLiteIntArray* input_tensors,
|
|
TfLiteContext* context) {
|
|
auto* input_op = AddNode();
|
|
input_op->SetOpType(OP_INPUT);
|
|
|
|
// We need to track num_inputs since not all input_tensors are actual input
|
|
// data. Some are constants.
|
|
int num_inputs = 0;
|
|
for (int i = 0; i < input_tensors->size; ++i) {
|
|
const int tensor_id = input_tensors->data[i];
|
|
const auto& tensor = context->tensors[tensor_id];
|
|
if (tensor.allocation_type == kTfLiteMmapRo) continue;
|
|
input_op->AddOutput(tensor.dims, GetElementSize(tensor.type));
|
|
AddTensorWithID(tensor_id, input_op->GetID(), num_inputs);
|
|
// If tensor is of type int8, add an op to cast it to uint8.
|
|
if (tensor.type == kTfLiteInt8) {
|
|
TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastInt8ToUInt8,
|
|
tensor_id, /*cast_op_builder=*/nullptr));
|
|
}
|
|
++num_inputs;
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus GraphBuilder::AddOutputTensors(
|
|
const TfLiteIntArray* output_tensors, TfLiteContext* context) {
|
|
std::vector<OpBuilder::TensorID> hexagon_output_ids;
|
|
hexagon_output_ids.reserve(output_tensors->size);
|
|
|
|
for (int i = 0; i < output_tensors->size; ++i) {
|
|
const int tensor_id = output_tensors->data[i];
|
|
const auto& tensor = context->tensors[tensor_id];
|
|
// If tensor is of type int8, add an op to cast it to uint8.
|
|
if (tensor.type == kTfLiteInt8) {
|
|
TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastUInt8ToInt8,
|
|
tensor_id, /*cast_op_builder=*/nullptr));
|
|
}
|
|
hexagon_output_ids.push_back(GetHexagonTensorId(tensor_id));
|
|
}
|
|
|
|
// Add Hexagon OUTPUT op.
|
|
auto* output_op = AddNode();
|
|
output_op->SetOpType(OP_OUTPUT);
|
|
for (auto hexagon_output : hexagon_output_ids) {
|
|
output_op->AddInput(hexagon_output);
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
OpBuilder::TensorID OpBuilder::AddOutput(const TfLiteIntArray* dims,
|
|
int element_size) {
|
|
op_node_.outputs.push_back(hexagon_nn_output());
|
|
op_node_.outputs.back().elementsize = element_size;
|
|
op_node_.outputs.back().rank = 4;
|
|
// TODO(karimnosseir): What is a good to estimate the max size ?
|
|
int batch_size, height_size, width_size, depth_size;
|
|
GetDims(&batch_size, &height_size, &width_size, &depth_size, dims);
|
|
auto& max_sizes = op_node_.outputs.back().max_sizes;
|
|
if (graph_builder_->GraphHasDynamicBatch()) {
|
|
max_sizes[0] = graph_builder_->GetMaxBatchSize();
|
|
} else {
|
|
max_sizes[0] = batch_size;
|
|
}
|
|
max_sizes[1] = height_size;
|
|
max_sizes[2] = width_size;
|
|
max_sizes[3] = depth_size;
|
|
return TensorID(GetID(), op_node_.outputs.size() - 1);
|
|
}
|
|
|
|
OpBuilder::TensorID OpBuilder::AddOutput(int elementsize, int rank,
|
|
const int* max_sizes_vect) {
|
|
op_node_.outputs.push_back(hexagon_nn_output());
|
|
op_node_.outputs.back().elementsize = elementsize;
|
|
op_node_.outputs.back().rank = rank;
|
|
auto& max_sizes = op_node_.outputs.back().max_sizes;
|
|
for (int i = 0; i < rank; ++i) {
|
|
max_sizes[i] = max_sizes_vect[i];
|
|
}
|
|
if (graph_builder_->GraphHasDynamicBatch()) {
|
|
max_sizes[0] = graph_builder_->GetMaxBatchSize();
|
|
}
|
|
return TensorID(GetID(), op_node_.outputs.size() - 1);
|
|
}
|
|
|
|
OpBuilder::TensorID OpBuilder::AddOutput(
|
|
int elementsize, int rank, const std::vector<int>& max_sizes_vect) {
|
|
return AddOutput(elementsize, rank, max_sizes_vect.data());
|
|
}
|
|
|
|
const OpNode* OpBuilder::Build() {
|
|
for (const auto& id : input_ids_) {
|
|
op_node_.inputs.push_back(hexagon_nn_input());
|
|
op_node_.inputs.back().src_id = id.first;
|
|
op_node_.inputs.back().output_idx = id.second;
|
|
}
|
|
return &op_node_;
|
|
}
|
|
|
|
TfLiteStatus OpBuilder::ComputeAndAddMinAndMax(TfLiteContext* context,
|
|
const TfLiteTensor& tensor) {
|
|
float tensor_min, tensor_max;
|
|
TF_LITE_ENSURE_STATUS(
|
|
ComputeMinAndMaxQuantValues(tensor, &tensor_min, &tensor_max));
|
|
auto* min_const_node = graph_builder_->AddConstNodeWithData(
|
|
kScalarShape, reinterpret_cast<char*>(&tensor_min), sizeof(tensor_min));
|
|
auto* max_const_node = graph_builder_->AddConstNodeWithData(
|
|
kScalarShape, reinterpret_cast<char*>(&tensor_max), sizeof(tensor_max));
|
|
AddInput(TensorID(min_const_node->GetID(), 0));
|
|
AddInput(TensorID(max_const_node->GetID(), 0));
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
// Static
|
|
constexpr int OpBuilder::kScalarShape[];
|
|
|
|
OpBuilder* GraphBuilder::AddNode(int tflite_node_index) {
|
|
OpBuilder* op = new OpBuilder(this, OP_Nop);
|
|
builders_.emplace_back(op);
|
|
op->SetNodeId(builders_.size());
|
|
op->SetTFLiteNodeId(tflite_node_index);
|
|
return op;
|
|
}
|
|
|
|
OpBuilder* GraphBuilder::AddNodeFromTfLiteOp(int op_type, TfLiteNode* node,
|
|
int tflite_node_index) {
|
|
OpBuilder* op = CreateOpBuilderFromTfLiteOp(op_type, node);
|
|
builders_.emplace_back(op);
|
|
op->SetNodeId(builders_.size());
|
|
op->SetTFLiteNodeId(tflite_node_index);
|
|
op->SetBuiltinData(node->builtin_data);
|
|
op->SetTfLiteNode(node);
|
|
return op;
|
|
}
|
|
|
|
void GraphBuilder::AddBatchSeqConfig(int max_size_for_batch,
|
|
TfLiteIntArray* input_batch_dimensions,
|
|
TfLiteIntArray* output_batch_dimensions) {
|
|
OpBuilder* batch_seq_node =
|
|
CreateBatchSeqBuilder(this, OP_BatchSeqConfig, max_size_for_batch,
|
|
input_batch_dimensions, output_batch_dimensions);
|
|
builders_.emplace_back(batch_seq_node);
|
|
batch_seq_node->SetNodeId(builders_.size());
|
|
batch_seq_node->PopulateSubGraph(nullptr, nullptr, nullptr);
|
|
max_size_for_batch_ = max_size_for_batch;
|
|
}
|
|
|
|
} // namespace hexagon
|
|
} // namespace delegates
|
|
} // namespace tflite
|