5147 lines
218 KiB
C++
5147 lines
218 KiB
C++
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
|
|
|
|
#include <algorithm>
|
|
#include <cstdarg>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#include <functional>
|
|
#include <initializer_list>
|
|
#include <iostream>
|
|
#include <iterator>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <tuple>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
|
|
|
|
#ifdef __ANDROID__
|
|
#include <sys/system_properties.h>
|
|
#endif
|
|
|
|
#if defined __ANDROID__ || defined __unix__
|
|
#define TFLITE_NNAPI_ALLOW_MMAP_SHARING
|
|
#include <sys/mman.h>
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#include "tensorflow/lite/allocation.h"
|
|
#include "tensorflow/lite/builtin_op_data.h"
|
|
#include "tensorflow/lite/builtin_ops.h"
|
|
#include "tensorflow/lite/c/builtin_op_data.h"
|
|
#include "tensorflow/lite/c/common.h"
|
|
#include "tensorflow/lite/context_util.h"
|
|
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h"
|
|
#include "tensorflow/lite/delegates/nnapi/quant_lstm_sup.h"
|
|
#include "tensorflow/lite/delegates/utils.h"
|
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
|
#include "tensorflow/lite/minimal_logging.h"
|
|
#include "tensorflow/lite/nnapi/nnapi_implementation.h"
|
|
#include "tensorflow/lite/nnapi/nnapi_util.h"
|
|
#include "tensorflow/lite/util.h"
|
|
|
|
namespace tflite {
|
|
namespace {
|
|
|
|
// Returns the enum name corresponding to the given error code if the given
|
|
// value corresponds to an of the error codes in the enumeration above or
|
|
// an message with the unknown code.
|
|
// LINT.IfChange(NnApiErrorDescription)
|
|
std::string NnApiErrorDescription(int error_code) {
|
|
switch (error_code) {
|
|
case ANEURALNETWORKS_NO_ERROR:
|
|
return "ANEURALNETWORKS_NO_ERROR";
|
|
case ANEURALNETWORKS_OUT_OF_MEMORY:
|
|
return "ANEURALNETWORKS_OUT_OF_MEMORY";
|
|
case ANEURALNETWORKS_INCOMPLETE:
|
|
return "ANEURALNETWORKS_INCOMPLETE";
|
|
case ANEURALNETWORKS_UNEXPECTED_NULL:
|
|
return "ANEURALNETWORKS_UNEXPECTED_NULL";
|
|
case ANEURALNETWORKS_BAD_DATA:
|
|
return "ANEURALNETWORKS_BAD_DATA";
|
|
case ANEURALNETWORKS_OP_FAILED:
|
|
return "ANEURALNETWORKS_OP_FAILED";
|
|
case ANEURALNETWORKS_BAD_STATE:
|
|
return "ANEURALNETWORKS_BAD_STATE";
|
|
case ANEURALNETWORKS_UNMAPPABLE:
|
|
return "ANEURALNETWORKS_UNMAPPABLE";
|
|
case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE:
|
|
return "ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE";
|
|
case ANEURALNETWORKS_UNAVAILABLE_DEVICE:
|
|
return "ANEURALNETWORKS_UNAVAILABLE_DEVICE";
|
|
case ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT:
|
|
return "ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT";
|
|
case ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT:
|
|
return "ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT";
|
|
case ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT:
|
|
return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT";
|
|
case ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT:
|
|
return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT";
|
|
case ANEURALNETWORKS_DEAD_OBJECT:
|
|
return "ANEURALNETWORKS_DEAD_OBJECT";
|
|
default:
|
|
return "Unknown NNAPI error code: " + std::to_string(error_code);
|
|
}
|
|
}
|
|
// LINT.ThenChange()
|
|
|
|
#define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code, call_desc, p_errno) \
|
|
do { \
|
|
const auto _code = (code); \
|
|
const auto _call_desc = (call_desc); \
|
|
if (_code != ANEURALNETWORKS_NO_ERROR) { \
|
|
const auto error_desc = NnApiErrorDescription(_code); \
|
|
TF_LITE_KERNEL_LOG(context, \
|
|
"NN API returned error %s at line %d while %s.\n", \
|
|
error_desc.c_str(), __LINE__, _call_desc); \
|
|
*p_errno = _code; \
|
|
return kTfLiteError; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(context, code, call_desc, \
|
|
p_tensor, p_errno) \
|
|
do { \
|
|
const auto _code = (code); \
|
|
const auto _call_desc = (call_desc); \
|
|
if (_code != ANEURALNETWORKS_NO_ERROR) { \
|
|
const auto error_desc = NnApiErrorDescription(_code); \
|
|
TF_LITE_KERNEL_LOG(context, \
|
|
"NN API returned error %s at line %d while %s " \
|
|
"for tensor '%s'.\n", \
|
|
error_desc.c_str(), __LINE__, _call_desc, \
|
|
(p_tensor)->name ? (p_tensor)->name : "no-name"); \
|
|
*p_errno = _code; \
|
|
return kTfLiteError; \
|
|
} \
|
|
} while (0)
|
|
|
|
bool IsFloat(TfLiteType type) {
|
|
switch (type) {
|
|
case kTfLiteFloat32:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool IsFloatOrUInt8(TfLiteType type) {
|
|
switch (type) {
|
|
case kTfLiteFloat32:
|
|
case kTfLiteUInt8:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool IsQuantized(TfLiteType type) {
|
|
switch (type) {
|
|
case kTfLiteUInt8:
|
|
case kTfLiteInt8:
|
|
return true;
|
|
default:
|
|
// kTfLiteInt16 isn't supported as quantized type yet.
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool IsInt32(TfLiteType type) {
|
|
switch (type) {
|
|
case kTfLiteInt32:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool IsFloatOrQuantized(TfLiteType type) {
|
|
switch (type) {
|
|
case kTfLiteFloat32:
|
|
case kTfLiteUInt8:
|
|
case kTfLiteInt8:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool IsFloatOrInt32(TfLiteType type) {
|
|
switch (type) {
|
|
case kTfLiteFloat32:
|
|
case kTfLiteInt32:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool IsFloatQuantizedOrInt32(TfLiteType type) {
|
|
switch (type) {
|
|
case kTfLiteFloat32:
|
|
case kTfLiteUInt8:
|
|
case kTfLiteInt8:
|
|
case kTfLiteInt32:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool IsScalarInputSupported(int builtin_code) {
|
|
switch (builtin_code) {
|
|
case kTfLiteBuiltinAdd:
|
|
case kTfLiteBuiltinMul:
|
|
case kTfLiteBuiltinSub:
|
|
case kTfLiteBuiltinDiv:
|
|
case kTfLiteBuiltinEqual:
|
|
case kTfLiteBuiltinNotEqual:
|
|
case kTfLiteBuiltinGreater:
|
|
case kTfLiteBuiltinGreaterEqual:
|
|
case kTfLiteBuiltinLess:
|
|
case kTfLiteBuiltinLessEqual:
|
|
case kTfLiteBuiltinPow:
|
|
case kTfLiteBuiltinMaximum:
|
|
case kTfLiteBuiltinMinimum:
|
|
case kTfLiteBuiltinPrelu:
|
|
case kTfLiteBuiltinLeakyRelu:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Check if the operation requires explicit conversion from int8 to uint8
|
|
// values.
|
|
bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
|
|
const TfLiteNode* node) {
|
|
const int input_id = node->inputs->data[0];
|
|
const TfLiteType input_type = context->tensors[input_id].type;
|
|
switch (builtin_code) {
|
|
case kTfLiteBuiltinConv2d:
|
|
case kTfLiteBuiltinDepthwiseConv2d:
|
|
case kTfLiteBuiltinFullyConnected: {
|
|
if (input_type == kTfLiteInt8) {
|
|
const int weights_id = node->inputs->data[1];
|
|
const auto& weights_tensor = context->tensors[weights_id];
|
|
if ((weights_tensor.type == kTfLiteInt8 ||
|
|
weights_tensor.type == kTfLiteUInt8) &&
|
|
weights_tensor.quantization.type == kTfLiteAffineQuantization) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
case kTfLiteBuiltinSelect: {
|
|
const auto value_type = context->tensors[node->inputs->data[1]].type;
|
|
return value_type == kTfLiteInt8;
|
|
}
|
|
case kTfLiteBuiltinAdd:
|
|
case kTfLiteBuiltinArgMax:
|
|
case kTfLiteBuiltinArgMin:
|
|
case kTfLiteBuiltinAveragePool2d:
|
|
case kTfLiteBuiltinBatchToSpaceNd:
|
|
case kTfLiteBuiltinConcatenation:
|
|
case kTfLiteBuiltinEqual:
|
|
case kTfLiteBuiltinExpandDims:
|
|
case kTfLiteBuiltinGather:
|
|
case kTfLiteBuiltinGreater:
|
|
case kTfLiteBuiltinGreaterEqual:
|
|
case kTfLiteBuiltinHardSwish:
|
|
case kTfLiteBuiltinL2Normalization:
|
|
case kTfLiteBuiltinLess:
|
|
case kTfLiteBuiltinLessEqual:
|
|
case kTfLiteBuiltinLogistic:
|
|
case kTfLiteBuiltinMaximum:
|
|
case kTfLiteBuiltinMaxPool2d:
|
|
case kTfLiteBuiltinMean:
|
|
case kTfLiteBuiltinMinimum:
|
|
case kTfLiteBuiltinMul:
|
|
case kTfLiteBuiltinNotEqual:
|
|
case kTfLiteBuiltinPad:
|
|
case kTfLiteBuiltinPadv2:
|
|
case kTfLiteBuiltinReduceMax:
|
|
case kTfLiteBuiltinReduceMin:
|
|
case kTfLiteBuiltinRelu:
|
|
case kTfLiteBuiltinReluN1To1:
|
|
case kTfLiteBuiltinRelu6:
|
|
case kTfLiteBuiltinResizeBilinear:
|
|
case kTfLiteBuiltinResizeNearestNeighbor:
|
|
case kTfLiteBuiltinReshape:
|
|
case kTfLiteBuiltinSlice:
|
|
case kTfLiteBuiltinSoftmax:
|
|
case kTfLiteBuiltinSpaceToBatchNd:
|
|
case kTfLiteBuiltinSpaceToDepth:
|
|
case kTfLiteBuiltinDepthToSpace:
|
|
case kTfLiteBuiltinStridedSlice:
|
|
case kTfLiteBuiltinSub:
|
|
case kTfLiteBuiltinTanh:
|
|
case kTfLiteBuiltinTile:
|
|
case kTfLiteBuiltinTopkV2:
|
|
case kTfLiteBuiltinTranspose: {
|
|
return input_type == kTfLiteInt8;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
constexpr int kLstmFullKernelInputSize = 24;
|
|
// The 20 input version is deprecated and kept only to
|
|
// support old model. The latest version of the LSTM Full Kernel
|
|
// is the one with 24 inputs
|
|
constexpr int kLstmFullKernelNoOptionalParamsInputSize = 20;
|
|
constexpr int kLstmBasicKernelInputSize = 5;
|
|
|
|
inline bool isLstmBasicKernel(const TfLiteNode* node) {
|
|
return node->inputs->size == kLstmBasicKernelInputSize;
|
|
}
|
|
|
|
inline bool isLstmFullKernel(const TfLiteNode* node) {
|
|
return node->inputs->size == kLstmFullKernelInputSize ||
|
|
node->inputs->size == kLstmFullKernelNoOptionalParamsInputSize;
|
|
}
|
|
|
|
bool IsHybridOperator(const TfLiteContext* context, int builtin_code,
|
|
const TfLiteNode* node) {
|
|
switch (builtin_code) {
|
|
case kTfLiteBuiltinConv2d:
|
|
case kTfLiteBuiltinFullyConnected: {
|
|
const int input_id = node->inputs->data[0];
|
|
const int filter_id = node->inputs->data[1];
|
|
const TfLiteType input_type = context->tensors[input_id].type;
|
|
const TfLiteType filter_type = context->tensors[filter_id].type;
|
|
return IsFloat(input_type) && IsQuantized(filter_type);
|
|
}
|
|
case kTfLiteBuiltinLstm: {
|
|
const int input_id = node->inputs->data[0];
|
|
// Input #1 is optional so use #2 to determine if hybrid.
|
|
const int weights_id = node->inputs->data[2];
|
|
const TfLiteType input_type = context->tensors[input_id].type;
|
|
const TfLiteType weights_type = context->tensors[weights_id].type;
|
|
return isLstmFullKernel(node) && IsFloat(input_type) &&
|
|
IsQuantized(weights_type);
|
|
}
|
|
case kTfLiteBuiltinUnidirectionalSequenceLstm: {
|
|
const int input_id = node->inputs->data[0];
|
|
// Input #1 is optional so use #2 to determine if hybrid.
|
|
const int weights_id = node->inputs->data[2];
|
|
const TfLiteType input_type = context->tensors[input_id].type;
|
|
const TfLiteType weights_type = context->tensors[weights_id].type;
|
|
return IsFloat(input_type) && IsQuantized(weights_type);
|
|
}
|
|
case kTfLiteBuiltinBidirectionalSequenceLstm: {
|
|
const int input_id = node->inputs->data[0];
|
|
// Input #1 is optional so use #2 to determine if hybrid.
|
|
const int weights_id = node->inputs->data[2];
|
|
const TfLiteType input_type = context->tensors[input_id].type;
|
|
const TfLiteType weights_type = context->tensors[weights_id].type;
|
|
return IsFloat(input_type) && IsQuantized(weights_type);
|
|
}
|
|
case kTfLiteBuiltinUnidirectionalSequenceRnn: {
|
|
const int input_id = node->inputs->data[0];
|
|
const int weights_id = node->inputs->data[1];
|
|
const TfLiteType input_type = context->tensors[input_id].type;
|
|
const TfLiteType weights_type = context->tensors[weights_id].type;
|
|
return IsFloat(input_type) && IsQuantized(weights_type);
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool HasUnspecifiedDimension(const TfLiteTensor* tensor) {
|
|
if (tensor->dims_signature) {
|
|
for (int i : TfLiteIntArrayView(tensor->dims_signature)) {
|
|
if (i == -1) return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
ANeuralNetworksOperandType ConvertTensorTypeToNNType(
|
|
const TfLiteTensor* tensor, TfLiteType ann_type_equivalent) {
|
|
int32_t nn_type = 0;
|
|
float scale = 0.0f;
|
|
int32_t zero_point = 0;
|
|
switch (tensor->type) {
|
|
case kTfLiteFloat32:
|
|
nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
|
|
break;
|
|
case kTfLiteUInt8:
|
|
nn_type = ann_type_equivalent == kTfLiteInt32
|
|
? ANEURALNETWORKS_TENSOR_INT32
|
|
: ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
|
|
scale = tensor->params.scale;
|
|
zero_point = tensor->params.zero_point;
|
|
if (scale == 0) {
|
|
// TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
|
|
// with zero scale are not valid in NNAPI.
|
|
scale = 1;
|
|
}
|
|
break;
|
|
case kTfLiteInt8:
|
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
|
|
scale = tensor->params.scale;
|
|
zero_point = tensor->params.zero_point;
|
|
if (ann_type_equivalent == kTfLiteUInt8) {
|
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
|
|
zero_point += 128;
|
|
} else if (ann_type_equivalent == kTfLiteInt32) {
|
|
nn_type = ANEURALNETWORKS_TENSOR_INT32;
|
|
zero_point += 128;
|
|
}
|
|
if (scale == 0) {
|
|
// TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
|
|
// with zero scale are not valid in NNAPI.
|
|
scale = 1;
|
|
}
|
|
break;
|
|
case kTfLiteInt32:
|
|
nn_type = ANEURALNETWORKS_TENSOR_INT32;
|
|
scale = tensor->params.scale;
|
|
zero_point = tensor->params.zero_point;
|
|
break;
|
|
case kTfLiteBool:
|
|
nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
|
|
break;
|
|
case kTfLiteInt16:
|
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
|
|
scale = tensor->params.scale;
|
|
zero_point = tensor->params.zero_point;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
|
|
uint32_t* tensor_dims = reinterpret_cast<uint32_t*>(tensor->dims->data);
|
|
static uint32_t scalar_rank = 1;
|
|
// treat scalar input as single cell tensor in NNAPI.
|
|
if (tensor_rank == 0) {
|
|
tensor_rank = scalar_rank;
|
|
tensor_dims = &scalar_rank;
|
|
}
|
|
ANeuralNetworksOperandType nn_operand_type{
|
|
.type = nn_type,
|
|
.dimensionCount = tensor_rank,
|
|
.dimensions = tensor_dims,
|
|
.scale = scale,
|
|
.zeroPoint = zero_point,
|
|
};
|
|
return nn_operand_type;
|
|
}
|
|
|
|
constexpr size_t kDefaultByteAlignmentForNNAPI = 16;
|
|
|
|
static size_t getNumPaddingBytes(size_t byte_size) {
|
|
size_t num_padding_bytes = 0;
|
|
if (byte_size % kDefaultByteAlignmentForNNAPI) {
|
|
num_padding_bytes = kDefaultByteAlignmentForNNAPI -
|
|
(byte_size % kDefaultByteAlignmentForNNAPI);
|
|
}
|
|
return num_padding_bytes;
|
|
}
|
|
|
|
// Return NNAPI device handle with the provided null-terminated device name.
|
|
// Returns kTfLiteError in case of any NNAPI error and if no device with the
|
|
// given name can be found.
|
|
TfLiteStatus GetDeviceHandle(const NnApi* nnapi, TfLiteContext* context,
|
|
const char* device_name_ptr,
|
|
ANeuralNetworksDevice** result, int* nnapi_errno) {
|
|
if (!device_name_ptr) return kTfLiteError;
|
|
*result = nullptr;
|
|
std::string device_name(device_name_ptr);
|
|
uint32_t num_devices = 0;
|
|
nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
|
|
|
|
for (uint32_t i = 0; i < num_devices; i++) {
|
|
ANeuralNetworksDevice* device = nullptr;
|
|
const char* buffer = nullptr;
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context, nnapi->ANeuralNetworks_getDevice(i, &device),
|
|
"Searching for target device", nnapi_errno);
|
|
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
|
|
"Searching for target device", nnapi_errno);
|
|
|
|
if (device_name == buffer) {
|
|
*result = device;
|
|
return kTfLiteOk;
|
|
}
|
|
}
|
|
|
|
context->ReportError(context,
|
|
"Could not find the specified NNAPI accelerator: %s. "
|
|
"Must be one of: {%s}.",
|
|
device_name_ptr,
|
|
nnapi::GetStringDeviceNamesList().c_str());
|
|
return kTfLiteError;
|
|
}
|
|
|
|
// Compute the hash of a TfLiteIntArray.
|
|
uint64_t GetHash(const TfLiteIntArray* int_array, uint64_t combine_with = 0) {
|
|
constexpr auto kHashConst = 0x9e3779b97f4a7800ULL;
|
|
uint64_t result = combine_with;
|
|
for (auto i : TfLiteIntArrayView(int_array)) {
|
|
result = result ^ (i + kHashConst + (result << 10) + (result >> 4));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
bool HasZeroes(TfLiteIntArrayView array) {
|
|
for (auto value : array) {
|
|
if (value == 0) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Bit mask for tensor flags.
|
|
enum {
|
|
NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
|
|
NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
|
|
NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
|
|
};
|
|
|
|
// Returns the SDK level to target when delegating to the given devices.
|
|
// The SDK level is the max of the ones supported by the devices or
|
|
// the current Android SDK level if no device is present.
|
|
TfLiteStatus GetTargetSdkVersion(
|
|
TfLiteContext* context, const NnApi* nnapi,
|
|
const std::vector<ANeuralNetworksDevice*>& device_handles,
|
|
int* target_sdk_version, int* nnapi_errno) {
|
|
*target_sdk_version = nnapi->android_sdk_version;
|
|
int64_t devices_sdk_version = -1;
|
|
for (const auto* device_handle : device_handles) {
|
|
int64_t curr_device_sdk_version;
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context,
|
|
nnapi->ANeuralNetworksDevice_getFeatureLevel(device_handle,
|
|
&curr_device_sdk_version),
|
|
"Searching for target device", nnapi_errno);
|
|
|
|
devices_sdk_version =
|
|
std::max(curr_device_sdk_version, devices_sdk_version);
|
|
}
|
|
|
|
if ((devices_sdk_version > 0) &&
|
|
// This second check is necessary since if the nnapi-reference device is
|
|
// in the list of target devices the devices_sdk_version value will be
|
|
// 1000.
|
|
(devices_sdk_version < nnapi->android_sdk_version)) {
|
|
TFLITE_LOG(TFLITE_LOG_INFO,
|
|
"Changing Android NN SDK version %d to version "
|
|
"supported by target devices: %lld",
|
|
nnapi->android_sdk_version, devices_sdk_version);
|
|
|
|
*target_sdk_version = devices_sdk_version;
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
// Returns true if this delegate is configured to use a specific set of devices.
|
|
// This will happen either if:
|
|
// - accelerator_name option has been specified
|
|
// - NNAPI CPU implementation has been explicitly disabled.
|
|
// If exclude_nnapi_reference is true this method will return false if the
|
|
// accelerator_name in the delegate options is equal to "nnapi-reference"
|
|
bool ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,
|
|
const NnApi* nnapi,
|
|
bool exclude_nnapi_reference = false) {
|
|
const char* device_name_ptr = delegate_options.accelerator_name;
|
|
std::string nnapi_cpu("nnapi-reference");
|
|
bool has_selected_accelerator = device_name_ptr != nullptr;
|
|
if (exclude_nnapi_reference && has_selected_accelerator) {
|
|
if (nnapi_cpu == device_name_ptr) return false;
|
|
}
|
|
return (delegate_options.disallow_nnapi_cpu &&
|
|
nnapi->android_sdk_version >=
|
|
delegate::nnapi::kMinSdkVersionForNNAPI12) ||
|
|
has_selected_accelerator;
|
|
}
|
|
|
|
// Fills the given result vector with the list of devices the given delegate
|
|
// is referring to.
|
|
// There are three possible results:
|
|
// - an empty array (not the full list of available accelerators,
|
|
// for efficiency reasons) if no accelerator is chosen and the
|
|
// disallow_nnapi_cpu delegate option is false.
|
|
// - A single element array with the target processor, if an accelerator name
|
|
// is specified in the delegate options.
|
|
// - The full list of devices available on device less the nnapi reference
|
|
// implementation if the delegate option disallow_nnapi_cpu has been
|
|
// specified.
|
|
TfLiteStatus GetTargetDevices(TfLiteContext* context, TfLiteDelegate* delegate,
|
|
const NnApi* nnapi, int* nnapi_errno,
|
|
std::vector<ANeuralNetworksDevice*>* result) {
|
|
if (nnapi->android_sdk_version < delegate::nnapi::kMinSdkVersionForNNAPI12) {
|
|
return kTfLiteError;
|
|
}
|
|
|
|
const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate);
|
|
const char* device_name_ptr = delegate_options.accelerator_name;
|
|
|
|
if (device_name_ptr != nullptr) {
|
|
// User specified an accelerator to use.
|
|
ANeuralNetworksDevice* nnapi_device = nullptr;
|
|
TF_LITE_ENSURE_STATUS(GetDeviceHandle(nnapi, context, device_name_ptr,
|
|
&nnapi_device, nnapi_errno));
|
|
result->push_back(nnapi_device);
|
|
} else if (delegate_options.disallow_nnapi_cpu) {
|
|
std::string nnapi_cpu("nnapi-reference");
|
|
uint32_t num_devices = 0;
|
|
nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
|
|
|
|
for (uint32_t i = 0; i < num_devices; i++) {
|
|
ANeuralNetworksDevice* device = nullptr;
|
|
const char* buffer = nullptr;
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context, nnapi->ANeuralNetworks_getDevice(i, &device),
|
|
"Getting list of available devices", nnapi_errno);
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
|
|
"Getting list of available devices", nnapi_errno);
|
|
if (nnapi_cpu != buffer) {
|
|
result->push_back(device);
|
|
}
|
|
}
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
namespace delegate {
|
|
namespace nnapi {
|
|
|
|
#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
|
|
NNMemory::NNMemory(const NnApi* nnapi, const char* name, size_t size) {
|
|
if (name && size > 0) {
|
|
nnapi_ = nnapi;
|
|
byte_size_ = size;
|
|
fd_ = nnapi_->ASharedMemory_create(name, size);
|
|
data_ptr_ = reinterpret_cast<uint8_t*>(
|
|
mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
|
|
nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
|
|
fd_, 0, &nn_memory_handle_);
|
|
}
|
|
}
|
|
#else
|
|
NNMemory::NNMemory(const NnApi* /*nnapi*/, const char* /*name*/,
|
|
size_t /*size*/)
|
|
: nnapi_(nullptr) {}
|
|
#endif
|
|
|
|
NNMemory::~NNMemory() {
|
|
#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
|
|
if (data_ptr_) {
|
|
munmap(data_ptr_, byte_size_);
|
|
}
|
|
if (nn_memory_handle_) {
|
|
nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
|
|
}
|
|
if (fd_ > 0) close(fd_);
|
|
#endif
|
|
}
|
|
|
|
class DequantizeMapping {
|
|
public:
|
|
int DequantizedAnnIndex(int ann_index, TfLiteType type) const {
|
|
for (const auto& element : mapping_) {
|
|
if (ann_index == std::get<0>(element) && type == std::get<1>(element)) {
|
|
return std::get<2>(element);
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
void Add(int ann_index, TfLiteType type, int dequantized_ann_index) {
|
|
// This assumes it is not already mapped.
|
|
mapping_.emplace_back(ann_index, type, dequantized_ann_index);
|
|
}
|
|
|
|
private:
|
|
// Each tuple specifies the ANN (quantized) tensor index, the desired
|
|
// floating-point type and the matching ANN (dequantized) tensor index. This
|
|
// could use a map but instead std::vector is used to keep code size lower.
|
|
std::vector<std::tuple<int, TfLiteType, int>> mapping_;
|
|
};
|
|
|
|
// Abstract builder for building an op in the NN API graph. This handles
|
|
// the disparity between TFLite and NN API operand types. NN API has singular
|
|
// operands for both tensors and parameters, and TFLite separates the two.
|
|
class NNAPIOpBuilder {
|
|
public:
|
|
NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
|
|
OperandMapping* tensor_mapping,
|
|
DequantizeMapping* dequantize_mapping,
|
|
std::map<const MMAPAllocation*, ANeuralNetworksMemory*>*
|
|
allocation_mapping,
|
|
std::vector<int>* nnapi_to_tflite_op_mapping,
|
|
ANeuralNetworksModel* nn_model, int* nnapi_errno,
|
|
bool allow_dynamic_dimensions)
|
|
: nnapi_(nnapi),
|
|
context_(context),
|
|
operand_mapping_(tensor_mapping),
|
|
dequantize_mapping_(dequantize_mapping),
|
|
allocation_memory_mapping_(allocation_mapping),
|
|
nnapi_to_tflite_op_mapping_(nnapi_to_tflite_op_mapping),
|
|
nn_model_(nn_model),
|
|
nnapi_errno_(nnapi_errno),
|
|
allow_dynamic_dimensions_(allow_dynamic_dimensions) {}
|
|
|
|
TfLiteStatus AddScalarBoolOperand(bool value) {
|
|
return AddScalarOperand<bool>(value, ANEURALNETWORKS_BOOL);
|
|
}
|
|
|
|
TfLiteStatus AddScalarInt32Operand(int32_t value) {
|
|
return AddScalarOperand<int32_t>(value, ANEURALNETWORKS_INT32);
|
|
}
|
|
|
|
TfLiteStatus AddScalarFloat32Operand(float value) {
|
|
return AddScalarOperand<float>(value, ANEURALNETWORKS_FLOAT32);
|
|
}
|
|
|
|
TfLiteStatus AddVectorInt32Operand(const int32_t* values,
|
|
uint32_t num_values) {
|
|
return AddVectorOperand<int32_t>(values, num_values,
|
|
ANEURALNETWORKS_TENSOR_INT32,
|
|
/*scale=*/0.f, /*zero_point=*/0);
|
|
}
|
|
|
|
TfLiteStatus AddVectorInt32Operand(const int32_t* values, uint32_t num_values,
|
|
float scale, int32_t zero_point) {
|
|
return AddVectorOperand<int32_t>(
|
|
values, num_values, ANEURALNETWORKS_TENSOR_INT32, scale, zero_point);
|
|
}
|
|
|
|
TfLiteStatus AddVectorInt16Operand(const int16_t* values,
|
|
uint32_t num_values) {
|
|
return AddVectorOperand<int16_t>(values, num_values,
|
|
ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
|
|
/*scale=*/1.f, /*zero_point=*/0);
|
|
}
|
|
|
|
TfLiteStatus AddVectorInt8Operand(const int8_t* values, uint32_t num_values) {
|
|
return AddVectorOperand<int8_t>(values, num_values,
|
|
ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
|
|
/*scale=*/1.f, /*zero_point=*/0);
|
|
}
|
|
|
|
TfLiteStatus AddVectorFloat32Operand(const float* values,
|
|
uint32_t num_values) {
|
|
return AddVectorOperand<float>(values, num_values,
|
|
ANEURALNETWORKS_TENSOR_FLOAT32);
|
|
}
|
|
|
|
TfLiteStatus AddPoolingParams(void* data) {
|
|
auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
|
|
AddScalarInt32Operand(builtin->padding);
|
|
AddScalarInt32Operand(builtin->stride_width);
|
|
AddScalarInt32Operand(builtin->stride_height);
|
|
AddScalarInt32Operand(builtin->filter_width);
|
|
AddScalarInt32Operand(builtin->filter_height);
|
|
AddScalarInt32Operand(builtin->activation);
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus AddTensorInput(int tensor_index, bool hybrid_op,
|
|
int tensor_flags = 0) {
|
|
return AddTensor(tensor_index, hybrid_op, &augmented_inputs_, tensor_flags);
|
|
}
|
|
|
|
TfLiteStatus AddTensorOutput(int tensor_index, int tensor_flags = 0) {
|
|
return AddTensor(tensor_index, /*hybrid_op=*/false, &augmented_outputs_,
|
|
tensor_flags);
|
|
}
|
|
|
|
TfLiteStatus AddAdditionalFloat32OutputTensor(uint32_t dimension_count) {
|
|
std::vector<uint32_t> dims(dimension_count, 0);
|
|
return AddFloat32OutputTensor(dimension_count, dims.data(), nullptr);
|
|
}
|
|
|
|
TfLiteStatus AddStateFloat32Tensor(int tensor_index,
|
|
int* ann_tensor_index_out) {
|
|
TfLiteTensor* tensor = &context_->tensors[tensor_index];
|
|
return AddFloat32OutputTensor(
|
|
tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
|
|
ann_tensor_index_out);
|
|
}
|
|
|
|
TfLiteStatus AddStateInt16Tensor(int tensor_index,
|
|
int* ann_tensor_index_out) {
|
|
TfLiteTensor* tensor = &context_->tensors[tensor_index];
|
|
return AddAdditionalOutputTensor(
|
|
tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
|
|
ANEURALNETWORKS_TENSOR_QUANT16_SYMM, tensor->params.scale,
|
|
tensor->params.zero_point, ann_tensor_index_out);
|
|
}
|
|
|
|
TfLiteStatus AddStateInt8AsymTensor(int tensor_index,
|
|
int* ann_tensor_index_out) {
|
|
TfLiteTensor* tensor = &context_->tensors[tensor_index];
|
|
return AddAdditionalOutputTensor(
|
|
tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, tensor->params.scale,
|
|
tensor->params.zero_point, ann_tensor_index_out);
|
|
}
|
|
|
|
// Add a constant tensor with a single element, intended for broadcast capable
|
|
// ops.
|
|
TfLiteStatus AddSingleValueConstantTensor(float value, bool is_quantized) {
|
|
if (!is_quantized) {
|
|
return AddVectorFloat32Operand(&value, 1);
|
|
} else {
|
|
// in the case that we need to add a quantized tensor, set the value to
|
|
// 64, zero_point to be 0 and adjust scale accordingly.
|
|
const uint8_t quant8_value = 64;
|
|
return AddVectorOperand<uint8_t>(&quant8_value, 1,
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
|
|
value / quant8_value, 0);
|
|
}
|
|
}
|
|
|
|
// Calculate the scale and zero_point for 8-bit unsigned tensor, given float
|
|
// min and max. zero_point is clamped to [0, 255].
|
|
TfLiteStatus CalculateQuantizationParams(float min, float max, float* scale,
|
|
int* zero_point) {
|
|
if (max < min) return kTfLiteError;
|
|
*scale = (max - min) / 255.f;
|
|
if (min > 0.f) {
|
|
*zero_point = 0;
|
|
} else if (max < 0.f) {
|
|
*zero_point = 255;
|
|
} else {
|
|
*zero_point = (0.f - min) / (*scale);
|
|
}
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
// Lower hardswish according to the following equation:
|
|
// hard_swish[x] = x (ReLU6(x + 3)) / 6 == x * (Relu_N1_to_1(x/3) * 3 + 3) / 6
|
|
// = 0.5x * Relu_N1_to_1(x/3) + 0.5x
|
|
TfLiteStatus TransformHardSwishIntoSupportedOps(int lite_input_index,
|
|
int lite_output_index,
|
|
bool need_int8_conversion,
|
|
int lite_node_index) {
|
|
const TfLiteTensor& tensor = context_->tensors[lite_input_index];
|
|
float input_scale = tensor.params.scale;
|
|
int input_zero_point = tensor.params.zero_point;
|
|
float input_min = 0.f;
|
|
float input_max = 0.f;
|
|
int tensor_flags = 0;
|
|
if (need_int8_conversion) {
|
|
tensor_flags = tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION;
|
|
input_zero_point += 128;
|
|
}
|
|
bool is_quantized = false;
|
|
int nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
|
|
if (tensor.type == kTfLiteInt8 || tensor.type == kTfLiteUInt8) {
|
|
is_quantized = true;
|
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
|
|
input_min = (0 - input_zero_point) * input_scale;
|
|
input_max = (255 - input_zero_point) * input_scale;
|
|
}
|
|
|
|
// Stage1 : s1 = Relu1(x * 1/3)
|
|
float s1_output_min = 0.f;
|
|
float s1_output_max = 0.f;
|
|
int s1_out_ann_index = 0;
|
|
{
|
|
float s1_output_scale = 0.f;
|
|
int s1_output_zero_point = 0;
|
|
if (is_quantized) {
|
|
// clamp the output range to [-1, 1] if needed.
|
|
s1_output_min = input_min / 3.f < -1.f ? -1.f : input_min / 3.f;
|
|
s1_output_max = input_max / 3.f > 1.f ? 1.f : input_max / 3.f;
|
|
CalculateQuantizationParams(s1_output_min, s1_output_max,
|
|
&s1_output_scale, &s1_output_zero_point);
|
|
}
|
|
TF_LITE_ENSURE_OK(context_,
|
|
AddTensorInput(lite_input_index, false, tensor_flags));
|
|
const float value3f = 1.f / 3.f;
|
|
TF_LITE_ENSURE_OK(context_,
|
|
AddSingleValueConstantTensor(value3f, is_quantized));
|
|
TF_LITE_ENSURE_OK(context_,
|
|
AddScalarInt32Operand(ANEURALNETWORKS_FUSED_RELU1));
|
|
TF_LITE_ENSURE_OK(
|
|
context_,
|
|
AddAdditionalOutputTensor(
|
|
tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
|
|
nn_type, s1_output_scale, s1_output_zero_point,
|
|
&s1_out_ann_index));
|
|
TF_LITE_ENSURE_OK(
|
|
context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
|
|
}
|
|
|
|
// Stage2 : s2 = x / 2
|
|
float s2_output_min = input_min / 2.f;
|
|
float s2_output_max = input_max / 2.f;
|
|
int s2_out_ann_index = 0;
|
|
{
|
|
float s2_output_scale = input_scale / 2.0f;
|
|
int s2_output_zero_point = input_zero_point;
|
|
TF_LITE_ENSURE_OK(context_,
|
|
AddTensorInput(lite_input_index, false, tensor_flags));
|
|
const float value2f = 0.5f;
|
|
TF_LITE_ENSURE_OK(context_,
|
|
AddSingleValueConstantTensor(value2f, is_quantized));
|
|
TF_LITE_ENSURE_OK(context_,
|
|
AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
|
|
TF_LITE_ENSURE_OK(
|
|
context_,
|
|
AddAdditionalOutputTensor(
|
|
tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
|
|
nn_type, s2_output_scale, s2_output_zero_point,
|
|
&s2_out_ann_index));
|
|
TF_LITE_ENSURE_OK(
|
|
context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
|
|
}
|
|
|
|
// Stage 3 : s3 = s1 * s2
|
|
int s3_out_ann_index = 0;
|
|
{
|
|
augmented_inputs_.push_back(s1_out_ann_index);
|
|
augmented_inputs_.push_back(s2_out_ann_index);
|
|
TF_LITE_ENSURE_OK(context_,
|
|
AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
|
|
float s3_output_scale = 0.f;
|
|
int s3_output_zero_point = 0;
|
|
if (is_quantized) {
|
|
// the min for stage 3 is always 0.0f.
|
|
float s3_output_min = 0.f;
|
|
// the max for stage 3 is max(s1_min * s2_min, s1_max * s3_max).
|
|
float s3_output_max =
|
|
s1_output_max * s2_output_max > s1_output_min * s2_output_min
|
|
? s1_output_max * s2_output_max
|
|
: s1_output_min * s2_output_min;
|
|
CalculateQuantizationParams(s3_output_min, s3_output_max,
|
|
&s3_output_scale, &s3_output_zero_point);
|
|
}
|
|
TF_LITE_ENSURE_OK(
|
|
context_,
|
|
AddAdditionalOutputTensor(
|
|
tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
|
|
nn_type, s3_output_scale, s3_output_zero_point,
|
|
&s3_out_ann_index));
|
|
TF_LITE_ENSURE_OK(
|
|
context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
|
|
}
|
|
|
|
// Stage 4: y = s3 + s2
|
|
{
|
|
augmented_inputs_.push_back(s2_out_ann_index);
|
|
augmented_inputs_.push_back(s3_out_ann_index);
|
|
TF_LITE_ENSURE_OK(context_,
|
|
AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
|
|
TF_LITE_ENSURE_OK(context_,
|
|
AddTensorOutput(lite_output_index, tensor_flags));
|
|
TF_LITE_ENSURE_OK(
|
|
context_, FinalizeAddOperation(ANEURALNETWORKS_ADD, lite_node_index));
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
// Adds the operation to the model and maps the operation to the originating
|
|
// TFLite one.
|
|
TfLiteStatus AddOperationToModel(ANeuralNetworksOperationType type,
|
|
uint32_t input_count, const uint32_t* inputs,
|
|
uint32_t output_count,
|
|
const uint32_t* outputs,
|
|
int lite_node_index) {
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_addOperation(
|
|
nn_model_, type, input_count, inputs, output_count, outputs),
|
|
"adding operation", nnapi_errno_);
|
|
nnapi_to_tflite_op_mapping_->push_back(lite_node_index);
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
// Adds a Dequantize operator and replaces the input tensor index with the
|
|
// dequantized version. If the dequantized version of the operator already
|
|
// exists then it is not added again.
|
|
TfLiteStatus AddDequantize(int nn_input_index, int lite_tensor_index,
|
|
TfLiteType dequantized_type, int lite_node_index) {
|
|
const int ann_index =
|
|
operand_mapping_->lite_index_to_ann(lite_tensor_index);
|
|
int dequantized_ann_index =
|
|
dequantize_mapping_->DequantizedAnnIndex(ann_index, dequantized_type);
|
|
|
|
if (dequantized_ann_index == -1) {
|
|
// The dequantized version does not exist yet, it has to be added: a new
|
|
// Dequantize operation is added, yielding a new tensor.
|
|
const TfLiteTensor& tensor = context_->tensors[lite_tensor_index];
|
|
ANeuralNetworksOperandType operand_type{
|
|
ANEURALNETWORKS_TENSOR_FLOAT32,
|
|
static_cast<uint32_t>(tensor.dims->size),
|
|
reinterpret_cast<uint32_t*>(tensor.dims->data), 0.f, 0};
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
|
|
"adding operand", nnapi_errno_);
|
|
dequantized_ann_index = operand_mapping_->add_new_non_tensor_operand();
|
|
|
|
// Add Dequantize operation.
|
|
const uint32_t dequantize_input[1] = {static_cast<uint32_t>(ann_index)};
|
|
const uint32_t dequantize_output[1] = {
|
|
static_cast<uint32_t>(dequantized_ann_index)};
|
|
TF_LITE_ENSURE_OK(
|
|
context_, AddOperationToModel(ANEURALNETWORKS_DEQUANTIZE,
|
|
/*input_count=*/1, dequantize_input,
|
|
/*output_count=*/1, dequantize_output,
|
|
lite_node_index));
|
|
dequantize_mapping_->Add(ann_index, dequantized_type,
|
|
dequantized_ann_index);
|
|
}
|
|
|
|
// The input for the original operation is modified so that the operation
|
|
// now uses the dequantized tensor as input.
|
|
augmented_inputs_[nn_input_index] = dequantized_ann_index;
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
// Finish emitting the op (of type `type`) into the NN API.
|
|
TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type,
|
|
int lite_node_index) {
|
|
// Actually add a NN API operation
|
|
TF_LITE_ENSURE_OK(context_,
|
|
AddOperationToModel(
|
|
type, static_cast<uint32_t>(augmented_inputs_.size()),
|
|
augmented_inputs_.data(),
|
|
static_cast<uint32_t>(augmented_outputs_.size()),
|
|
augmented_outputs_.data(), lite_node_index));
|
|
augmented_inputs_.clear();
|
|
augmented_outputs_.clear();
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus AddSingleValueTensorAsScalarOperand(int tensor_index,
|
|
int nn_type) {
|
|
const TfLiteTensor* tensor = &context_->tensors[tensor_index];
|
|
TF_LITE_ENSURE_EQ(context_, NumElements(tensor), 1);
|
|
|
|
ANeuralNetworksOperandType operand_type{.type = nn_type};
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
|
|
"adding operand", tensor, nnapi_errno_);
|
|
int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
|
|
if (ann_tensor_index != -1) {
|
|
augmented_inputs_.push_back(ann_tensor_index);
|
|
return kTfLiteOk;
|
|
}
|
|
// Allocate a new tensor index
|
|
ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
|
|
augmented_inputs_.push_back(ann_tensor_index);
|
|
|
|
const TfLiteType tensor_type = tensor->type;
|
|
TfLiteType nn_type_equivalent;
|
|
TF_LITE_ENSURE_OK(context_, GetEquivalentToANNType(context_, nn_type,
|
|
&nn_type_equivalent));
|
|
if (tensor_type != nn_type_equivalent) {
|
|
operand_mapping_->add_type_conversion(tensor_index, nn_type_equivalent);
|
|
}
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
template <typename T>
|
|
TfLiteStatus AddNewInputConstantTensor(
|
|
int32_t nn_type, TfLiteType type, const TfLiteIntArray* dims,
|
|
const std::vector<T>& tensor_value,
|
|
const TfLiteQuantizationParams& quant_params, int* tensor_index) {
|
|
TF_LITE_ENSURE_OK(context_,
|
|
context_->AddTensors(context_, 1, tensor_index));
|
|
|
|
TfLiteTensor* new_tensor = &context_->tensors[*tensor_index];
|
|
new_tensor->type = type;
|
|
new_tensor->allocation_type = kTfLiteDynamic;
|
|
new_tensor->params = quant_params;
|
|
|
|
// Not removing the new tensor in case of resizing errors since it will
|
|
// be cleared by the context
|
|
TF_LITE_ENSURE_OK(
|
|
context_,
|
|
context_->ResizeTensor(
|
|
context_, new_tensor,
|
|
// Resize Tensor takes ownership of the dims array passed as param
|
|
TfLiteIntArrayCopy(dims)));
|
|
|
|
memcpy(new_tensor->data.raw,
|
|
reinterpret_cast<const char*>(tensor_value.data()),
|
|
tensor_value.size() * sizeof(T));
|
|
|
|
const uint32_t tensor_rank = static_cast<uint32_t>(dims->size);
|
|
const uint32_t* tensor_dims = reinterpret_cast<const uint32_t*>(dims->data);
|
|
ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
|
|
quant_params.scale,
|
|
quant_params.zero_point};
|
|
|
|
const int ann_tensor_index =
|
|
operand_mapping_->add_delegate_generated_input_ann_tensors_operand();
|
|
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
|
|
"adding operand", nnapi_errno_);
|
|
|
|
augmented_inputs_.push_back(ann_tensor_index);
|
|
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_setOperandValue(
|
|
nn_model_, ann_tensor_index, new_tensor->data.raw,
|
|
new_tensor->bytes),
|
|
"setting new operand value", nnapi_errno_);
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
template <typename T>
|
|
TfLiteStatus AddNewInputConstantTensor(
|
|
int32_t nn_type, TfLiteType type, std::initializer_list<int> dims,
|
|
const std::vector<T>& tensor_value,
|
|
const TfLiteQuantizationParams& quant_params, int* tensor_index) {
|
|
TfLiteIntArray* dim_array = TfLiteIntArrayCreate(dims.size());
|
|
dim_array->size = dims.size();
|
|
std::copy(dims.begin(), dims.end(), dim_array->data);
|
|
|
|
const auto result = AddNewInputConstantTensor(
|
|
nn_type, type, dim_array, tensor_value, quant_params, tensor_index);
|
|
TfLiteIntArrayFree(dim_array);
|
|
return result;
|
|
}
|
|
|
|
private:
|
|
// Returns a TF Lite type which has the same memory representation as a
|
|
// provided NN API type.
|
|
TfLiteStatus GetEquivalentToANNType(TfLiteContext* context, int nn_type,
|
|
TfLiteType* type) {
|
|
switch (nn_type) {
|
|
case ANEURALNETWORKS_INT32:
|
|
*type = kTfLiteInt32;
|
|
return kTfLiteOk;
|
|
case ANEURALNETWORKS_FLOAT32:
|
|
*type = kTfLiteFloat32;
|
|
return kTfLiteOk;
|
|
default:
|
|
context->ReportError(context,
|
|
"NN API Delegate: Can't get an equivalent TF Lite "
|
|
"type for provided NN API type: %d.\n",
|
|
nn_type);
|
|
return kTfLiteError;
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
TfLiteStatus AddScalarOperand(T value, int32_t nn_type) {
|
|
ANeuralNetworksOperandType operand_type{.type = nn_type};
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
|
|
"adding operand", nnapi_errno_);
|
|
const int ann_index = operand_mapping_->add_new_non_tensor_operand();
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_setOperandValue(nn_model_, ann_index,
|
|
&value, sizeof(T)),
|
|
"setting new operand value", nnapi_errno_);
|
|
augmented_inputs_.push_back(ann_index);
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
template <typename T>
|
|
TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
|
|
int32_t nn_type, float scale,
|
|
int32_t zero_point) {
|
|
ANeuralNetworksOperandType operand_type{.type = nn_type,
|
|
.dimensionCount = 1,
|
|
.dimensions = &num_values,
|
|
.scale = scale,
|
|
.zeroPoint = zero_point};
|
|
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
|
|
"adding operand", nnapi_errno_);
|
|
|
|
const int ann_index = operand_mapping_->add_new_non_tensor_operand();
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_setOperandValue(
|
|
nn_model_, ann_index, values, sizeof(T) * num_values),
|
|
"settings new operand value", nnapi_errno_);
|
|
augmented_inputs_.push_back(ann_index);
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
template <typename T>
|
|
TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
|
|
int32_t nn_type) {
|
|
return AddVectorOperand(values, num_values, nn_type, /*scale=*/0.f,
|
|
/*zero_point=*/0);
|
|
}
|
|
|
|
TfLiteStatus AddFloat32OutputTensor(uint32_t dimension_count,
|
|
const uint32_t* dimension_data,
|
|
int* ann_index_out) {
|
|
return AddAdditionalOutputTensor(
|
|
dimension_count, dimension_data, ANEURALNETWORKS_TENSOR_FLOAT32,
|
|
/*scale=*/0.f, /*zero_point=*/0, ann_index_out);
|
|
}
|
|
|
|
TfLiteStatus AddAdditionalOutputTensor(uint32_t dimension_count,
|
|
const uint32_t* dimension_data,
|
|
int32_t nn_type, float scale,
|
|
int32_t zero_point,
|
|
int* ann_index_out) {
|
|
ANeuralNetworksOperandType operand_type{
|
|
.type = nn_type,
|
|
.dimensionCount = dimension_count,
|
|
.dimensions = dimension_data,
|
|
.scale = scale,
|
|
.zeroPoint = zero_point,
|
|
};
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
|
|
"adding operand", nnapi_errno_);
|
|
const int ann_index = operand_mapping_->add_new_non_tensor_operand();
|
|
augmented_outputs_.push_back(ann_index);
|
|
if (ann_index_out) *ann_index_out = ann_index;
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
// Adds a new NN API tensor that shadows the TF Lite tensor `tensor_index`.
|
|
// This returns the NN API tensor index corresponding to the created tensor.
|
|
// If another caller previously created a NN API tensor for `tensor_index`
|
|
// then the existing one is returned.
|
|
TfLiteStatus AddTensor(int tensor_index, bool hybrid_op,
|
|
std::vector<uint32_t>* indices, int tensor_flags = 0) {
|
|
const bool scalar_as_tensor =
|
|
tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
|
|
const bool need_int8_conversion =
|
|
tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
|
|
const bool use_int8_asymm_signed =
|
|
tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
|
|
int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
|
|
if (ann_tensor_index != -1) {
|
|
indices->push_back(ann_tensor_index);
|
|
return kTfLiteOk;
|
|
}
|
|
// Allocate a new tensor index
|
|
ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
|
|
|
|
// Parameters needed for new type.
|
|
int32_t nn_type = 0;
|
|
float scale = 0.0f;
|
|
int32_t zeroPoint = 0;
|
|
ANeuralNetworksSymmPerChannelQuantParams ann_perchannel_params;
|
|
TfLiteTensor* tensor = &context_->tensors[tensor_index];
|
|
TfLiteType tensor_type = tensor->type;
|
|
if (hybrid_op && (tensor_type == kTfLiteUInt8)) {
|
|
// For legacy reason, UINT8 weights in hybrid operators are actually INT8
|
|
// values and should be interpreted as such.
|
|
tensor_type = kTfLiteInt8;
|
|
}
|
|
switch (tensor_type) {
|
|
case kTfLiteNoType:
|
|
// Tensors added during initialization of Ops don't have a type yet and
|
|
// should not be registered with the NNAPI.
|
|
indices->push_back(-1);
|
|
return kTfLiteOk;
|
|
case kTfLiteFloat32:
|
|
nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
|
|
break;
|
|
case kTfLiteUInt8:
|
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
|
|
scale = tensor->params.scale;
|
|
zeroPoint = tensor->params.zero_point;
|
|
if (scale == 0) {
|
|
// ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
|
|
// NNAPI.
|
|
scale = 1;
|
|
}
|
|
break;
|
|
case kTfLiteInt8:
|
|
// If explicit int8 conversion is needed, we still need
|
|
// ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
|
|
if (use_int8_asymm_signed) {
|
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
|
|
} else if (need_int8_conversion) {
|
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
|
|
} else {
|
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
|
|
}
|
|
scale = tensor->params.scale;
|
|
zeroPoint = tensor->params.zero_point;
|
|
if (tensor->quantization.type == kTfLiteAffineQuantization) {
|
|
TfLiteAffineQuantization* quantization_params =
|
|
static_cast<TfLiteAffineQuantization*>(
|
|
tensor->quantization.params);
|
|
if (quantization_params->scale->size > 1) {
|
|
// Set up per-channel quantization.
|
|
ann_perchannel_params = {
|
|
.channelDim = static_cast<uint32_t>(
|
|
quantization_params->quantized_dimension),
|
|
.scaleCount =
|
|
static_cast<uint32_t>(quantization_params->scale->size),
|
|
.scales = quantization_params->scale->data,
|
|
};
|
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL;
|
|
scale = 0.0f;
|
|
zeroPoint = 0;
|
|
} else if (quantization_params->scale->size == 1) {
|
|
scale = quantization_params->scale->data[0];
|
|
zeroPoint = quantization_params->zero_point->data[0];
|
|
}
|
|
}
|
|
if (nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
|
|
if (need_int8_conversion) {
|
|
zeroPoint += 128;
|
|
operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8);
|
|
}
|
|
if (scale == 0) {
|
|
// QUANT8 tensors with zero scale are not valid in NNAPI.
|
|
scale = 1;
|
|
}
|
|
}
|
|
break;
|
|
case kTfLiteInt32:
|
|
nn_type = ANEURALNETWORKS_TENSOR_INT32;
|
|
scale = tensor->params.scale;
|
|
zeroPoint = tensor->params.zero_point;
|
|
break;
|
|
case kTfLiteBool:
|
|
nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
|
|
break;
|
|
case kTfLiteInt16:
|
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
|
|
scale = tensor->params.scale;
|
|
zeroPoint = tensor->params.zero_point;
|
|
break;
|
|
default:
|
|
context_->ReportError(
|
|
context_, "Failed to add NN API tensor: type %s is not supported.",
|
|
TfLiteTypeGetName(tensor_type));
|
|
return kTfLiteError;
|
|
}
|
|
bool has_unspecified_dimensions = HasUnspecifiedDimension(tensor);
|
|
uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
|
|
std::vector<uint32_t> dims_unspecified(tensor_rank, 0);
|
|
if (has_unspecified_dimensions) {
|
|
for (int i = 0; i < tensor->dims_signature->size; i++) {
|
|
dims_unspecified[i] = tensor->dims_signature->data[i] == -1
|
|
? 0
|
|
: tensor->dims_signature->data[i];
|
|
}
|
|
}
|
|
uint32_t* tensor_dims =
|
|
has_unspecified_dimensions && allow_dynamic_dimensions_
|
|
? dims_unspecified.data()
|
|
: reinterpret_cast<uint32_t*>(tensor->dims->data);
|
|
if (scalar_as_tensor && tensor_rank == 0) {
|
|
// Use rank 1, shape {1} operand for TFLite scalar tensors.
|
|
tensor_rank = 1;
|
|
tensor_dims = &tensor_rank;
|
|
}
|
|
if (tensor_rank == 0) {
|
|
// if the tensor_rank is 0, the dimension ptr must be nullptr.
|
|
tensor_dims = nullptr;
|
|
}
|
|
|
|
ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
|
|
scale, zeroPoint};
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
|
|
"adding operand", tensor, nnapi_errno_);
|
|
|
|
if (nn_type == ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
|
|
nn_model_, ann_tensor_index, &ann_perchannel_params),
|
|
"setting new operand per channel quantization params", tensor,
|
|
nnapi_errno_);
|
|
}
|
|
if (tensor->allocation_type == kTfLiteMmapRo) {
|
|
if (IsQuantized(tensor_type) && need_int8_conversion &&
|
|
nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
|
|
// We need to add a tensor and convert the weights into uint8.
|
|
// Currently this is only needed for fully_connected. The new_tensor is
|
|
// needed for lifetime management for the converted weights.
|
|
int new_tensor_index = -1;
|
|
TF_LITE_ENSURE_OK(context_,
|
|
context_->AddTensors(context_, 1, &new_tensor_index));
|
|
TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index];
|
|
new_tensor->type = kTfLiteUInt8;
|
|
new_tensor->allocation_type = kTfLiteDynamic;
|
|
new_tensor->params.scale = scale;
|
|
new_tensor->params.zero_point = zeroPoint;
|
|
// Not removing the new tensor in case of resizing errors since it will
|
|
// be cleared by the context
|
|
TF_LITE_ENSURE_OK(
|
|
context_, context_->ResizeTensor(context_, new_tensor,
|
|
// Resize Tensor takes ownership of
|
|
// the dims array passed as param
|
|
TfLiteIntArrayCopy(tensor->dims)));
|
|
// Convert the int8 value into corresponding uint8 value;
|
|
const auto num_elements = NumElements(tensor);
|
|
for (int i = 0; i < num_elements; ++i) {
|
|
new_tensor->data.uint8[i] = static_cast<const uint8_t>(
|
|
static_cast<int32_t>(tensor->data.int8[i]) + 128);
|
|
}
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_setOperandValue(
|
|
nn_model_, ann_tensor_index, new_tensor->data.raw,
|
|
new_tensor->bytes),
|
|
"setting new operand value", tensor, nnapi_errno_);
|
|
#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
|
|
} else if (tensor->allocation &&
|
|
static_cast<const Allocation*>(tensor->allocation)->type() ==
|
|
Allocation::Type::kMMap) {
|
|
const MMAPAllocation* mmap_alloc =
|
|
static_cast<const MMAPAllocation*>(tensor->allocation);
|
|
if (allocation_memory_mapping_->count(mmap_alloc) == 0) {
|
|
ANeuralNetworksMemory* ann_memory_handle = nullptr;
|
|
nnapi_->ANeuralNetworksMemory_createFromFd(
|
|
mmap_alloc->bytes(), PROT_READ, mmap_alloc->fd(), 0,
|
|
&ann_memory_handle);
|
|
allocation_memory_mapping_->insert(
|
|
std::make_pair(mmap_alloc, ann_memory_handle));
|
|
}
|
|
ANeuralNetworksMemory* ann_memory_handle =
|
|
allocation_memory_mapping_->at(mmap_alloc);
|
|
// Compute the offset to the base pointer of the MMAPAllocation.
|
|
auto offset = reinterpret_cast<const uint8_t*>(tensor->data.raw) -
|
|
reinterpret_cast<const uint8_t*>(mmap_alloc->base());
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_setOperandValueFromMemory(
|
|
nn_model_, ann_tensor_index, ann_memory_handle, offset,
|
|
tensor->bytes),
|
|
"setting new operand value from memory", tensor, nnapi_errno_);
|
|
#endif
|
|
} else {
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
|
|
context_,
|
|
nnapi_->ANeuralNetworksModel_setOperandValue(
|
|
nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes),
|
|
"setting new operand value", tensor, nnapi_errno_);
|
|
}
|
|
}
|
|
indices->push_back(ann_tensor_index);
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
// Access to NNAPI.
|
|
const NnApi* const nnapi_;
|
|
|
|
// TfLiteContext for error handling.
|
|
TfLiteContext* const context_;
|
|
|
|
// Tracks relationship between indices.
|
|
OperandMapping* const operand_mapping_;
|
|
|
|
// Keeps mapping of ANN quantized tensor and float data type to equivalent
|
|
// dequantized ANN tensor. For example, tensor #4 (UINT8) + FLOAT32 could map
|
|
// to tensor #10 (FLOAT32) because a DEQUANTIZE operator was added to convert
|
|
// tensor #4 to a FLOAT32 tensor.
|
|
DequantizeMapping* const dequantize_mapping_;
|
|
|
|
std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* const
|
|
allocation_memory_mapping_;
|
|
|
|
// Tracks for every operation in the NNAPI model the source TfLite model
|
|
// node index.
|
|
std::vector<int>* const nnapi_to_tflite_op_mapping_;
|
|
|
|
// The NNAPI model.
|
|
ANeuralNetworksModel* const nn_model_;
|
|
|
|
// Inputs and outputs for the current op. These are augmented in the sense
|
|
// that NN API uses operands for all arguments, not just tensors, unlike
|
|
// TensorFlow Lite.
|
|
std::vector<uint32_t> augmented_inputs_;
|
|
std::vector<uint32_t> augmented_outputs_;
|
|
|
|
// Return status code of the latest NNAPI call.
|
|
int* nnapi_errno_;
|
|
|
|
// Whether to allow dynamic batch size without re-compilation.
|
|
bool allow_dynamic_dimensions_;
|
|
}; // namespace nnapi
|
|
|
|
namespace {
|
|
struct OpValidationContext {
|
|
bool is_valid;
|
|
std::vector<NNAPIValidationFailure>* validation_failures;
|
|
};
|
|
|
|
#define EXPECT_INPUT_TYPE_IN(actual_type, ...) \
|
|
ExpectTypeIn(actual_type, {__VA_ARGS__}, \
|
|
NNAPIValidationFailureType::kUnsupportedInputType, \
|
|
"Input type not in expected list " #__VA_ARGS__, &val_ctx)
|
|
|
|
inline void AddValidationFailure(NNAPIValidationFailureType failure_type,
|
|
const char* message,
|
|
OpValidationContext* val_ctx) {
|
|
val_ctx->is_valid = false;
|
|
|
|
#ifdef NNAPI_VERBOSE_VALIDATION
|
|
if (val_ctx->validation_failures) {
|
|
val_ctx->validation_failures->push_back({failure_type, message});
|
|
}
|
|
#endif
|
|
}
|
|
|
|
template <typename... Args>
|
|
inline void AddValidationFailureFmt(OpValidationContext* val_ctx,
|
|
NNAPIValidationFailureType failure_type,
|
|
const char* message_fmt, Args... args) {
|
|
val_ctx->is_valid = false;
|
|
#ifdef NNAPI_VERBOSE_VALIDATION
|
|
if (val_ctx->validation_failures) {
|
|
size_t req_buf_size = snprintf(nullptr, 0, message_fmt, args...) + 1;
|
|
std::unique_ptr<char[]> tmp_buf(new char[req_buf_size]);
|
|
snprintf(tmp_buf.get(), req_buf_size, message_fmt, args...);
|
|
|
|
val_ctx->validation_failures->push_back({failure_type, tmp_buf.get()});
|
|
}
|
|
#endif
|
|
}
|
|
|
|
inline bool Expect(bool condition, NNAPIValidationFailureType failure_type,
|
|
const char* message, OpValidationContext* val_ctx) {
|
|
if (!condition) {
|
|
AddValidationFailure(failure_type, message, val_ctx);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <typename... Args>
|
|
inline bool ExpectFmt(bool condition, OpValidationContext* val_ctx,
|
|
NNAPIValidationFailureType failure_type,
|
|
const char* message_fmt, Args... args) {
|
|
if (!condition) {
|
|
AddValidationFailureFmt(val_ctx, failure_type, message_fmt, args...);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
inline bool ExpectTypeIn(TfLiteType actual_type,
|
|
std::initializer_list<TfLiteType> allowed_types,
|
|
NNAPIValidationFailureType failure_type,
|
|
const char* msg, OpValidationContext* val_ctx) {
|
|
return Expect(std::find(allowed_types.begin(), allowed_types.end(),
|
|
actual_type) != allowed_types.end(),
|
|
failure_type, msg, val_ctx);
|
|
}
|
|
|
|
inline bool ExpectMinAndroidSdkVersion(int curr_version, int min_version,
|
|
OpValidationContext* val_ctx) {
|
|
return ExpectFmt(curr_version >= min_version, val_ctx,
|
|
NNAPIValidationFailureType::kUnsupportedAndroidVersion,
|
|
"Android sdk version less than %d", min_version);
|
|
}
|
|
|
|
inline bool ExpectMaxOpVersion(int curr_version, int max_version,
|
|
OpValidationContext* val_ctx) {
|
|
return ExpectFmt(curr_version <= max_version, val_ctx,
|
|
NNAPIValidationFailureType::kUnsupportedOperatorVersion,
|
|
"OP Version higher than %d", max_version);
|
|
}
|
|
|
|
inline bool ExpectOpVersion(int curr_version, int max_version,
|
|
OpValidationContext* val_ctx) {
|
|
return ExpectFmt(curr_version <= max_version, val_ctx,
|
|
NNAPIValidationFailureType::kUnsupportedOperatorVersion,
|
|
"OP Version different from %d", max_version);
|
|
}
|
|
|
|
inline bool ExpectIsFloatOperator(const TfLiteContext* context,
|
|
const TfLiteNode* node,
|
|
OpValidationContext* val_ctx) {
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
return Expect(IsFloat(input_type),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Input should be Float", val_ctx);
|
|
}
|
|
|
|
bool ExpectIsFloatOrUint8Operator(const TfLiteContext* context,
|
|
const TfLiteNode* node,
|
|
OpValidationContext* val_ctx) {
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
return Expect(IsFloatOrUInt8(input_type),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Input should be Float or UINT8", val_ctx);
|
|
}
|
|
|
|
bool ExpectIsFloatOrQuant8Operator(const TfLiteContext* context,
|
|
const TfLiteNode* node,
|
|
OpValidationContext* val_ctx) {
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
return Expect(IsFloatOrQuantized(input_type),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Input should be Float or Quant8", val_ctx);
|
|
}
|
|
|
|
bool ExpectIsFloatOrInt32Operator(const TfLiteContext* context,
|
|
const TfLiteNode* node,
|
|
OpValidationContext* val_ctx) {
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
return Expect(IsFloatOrInt32(input_type),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Input should be Float or Int32", val_ctx);
|
|
}
|
|
|
|
bool ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext* context,
|
|
const TfLiteNode* node,
|
|
OpValidationContext* val_ctx) {
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
return Expect(IsFloatQuantizedOrInt32(input_type),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Input should be Float, Quant8, or Int32", val_ctx);
|
|
}
|
|
|
|
// When using NN API version 1.0 or 1.1, the condition below must be true for
|
|
// When using NN API version 1.0 or 1.1, the condition below must be true for
|
|
// quantized versions of the following ops:
|
|
// * CONV_2D
|
|
// * DEPTHWISE_CONV_2D
|
|
// * FULLY_CONNECTED (where filter actually stands for weights)
|
|
// The condition is relaxed and no longer required since version 1.2.
|
|
bool ExpectIsRestrictedScalesCompliant(const TfLiteContext* context,
|
|
const TfLiteNode* node,
|
|
OpValidationContext* val_ctx) {
|
|
const int input_id = node->inputs->data[0];
|
|
const int filter_id = node->inputs->data[1];
|
|
const int output_id = node->outputs->data[0];
|
|
const float input_scale = context->tensors[input_id].params.scale;
|
|
const float filter_scale = context->tensors[filter_id].params.scale;
|
|
const float output_scale = context->tensors[output_id].params.scale;
|
|
return Expect(input_scale * filter_scale < output_scale,
|
|
NNAPIValidationFailureType::kNotRestrictedScaleCompliant,
|
|
"When using NN API version 1.0 or 1.1, input_scale * "
|
|
"filter_scale < output_scale:",
|
|
val_ctx);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
// Return a function that knows how to translate a node into its operands
|
|
// when called. You can use this function to see if a node is supported
|
|
// (i.e. if the returned MappingFn is null, then the node is not supported).
|
|
bool NNAPIDelegateKernel::Validate(
|
|
const TfLiteContext* context, int builtin_code, int version,
|
|
int android_sdk_version, const TfLiteNode* node,
|
|
bool is_accelerator_specified,
|
|
std::vector<NNAPIValidationFailure>* map_failures) {
|
|
OpValidationContext val_ctx{true, map_failures};
|
|
switch (builtin_code) {
|
|
case kTfLiteBuiltinAdd: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
|
ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
|
|
if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
|
|
Expect(reinterpret_cast<TfLiteAddParams*>(node->builtin_data)
|
|
->activation == kTfLiteActNone,
|
|
NNAPIValidationFailureType::kNoActivationExpected,
|
|
"No activation function supported", &val_ctx);
|
|
}
|
|
} else {
|
|
ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinArgMax:
|
|
case kTfLiteBuiltinArgMin: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
// Those operators were introduced in NNAPI 1.2.
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const TfLiteType input_type =
|
|
context->tensors[node->inputs->data[(0)]].type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
|
|
kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
|
|
|
|
const auto& axis_tensor = context->tensors[node->inputs->data[1]];
|
|
if (axis_tensor.type == kTfLiteInt64) {
|
|
Expect(
|
|
axis_tensor.allocation_type == kTfLiteMmapRo &&
|
|
*axis_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
|
|
*axis_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI only supports axis as int32. If the axis type is int64 and "
|
|
"constant we can convert it to int32 if the value isn't too "
|
|
"large.",
|
|
&val_ctx);
|
|
} else {
|
|
Expect(axis_tensor.type == kTfLiteInt32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Axis should be Int32", &val_ctx);
|
|
}
|
|
if (builtin_code == kTfLiteBuiltinArgMax) {
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteArgMaxParams*>(node->builtin_data);
|
|
Expect(builtin->output_type == kTfLiteInt32,
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"NNAPI only supports int32 output.", &val_ctx);
|
|
} else {
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteArgMinParams*>(node->builtin_data);
|
|
Expect(builtin->output_type == kTfLiteInt32,
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"NNAPI only supports int32 output.", &val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinMul: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
|
ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
|
|
if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
|
|
Expect(reinterpret_cast<TfLiteMulParams*>(node->builtin_data)
|
|
->activation == kTfLiteActNone,
|
|
NNAPIValidationFailureType::kNoActivationExpected,
|
|
"No activation function supported", &val_ctx);
|
|
}
|
|
} else {
|
|
ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinAveragePool2d: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
|
|
auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
|
// TODO(b/138756912): Large filter window would overflow on the
|
|
// quantized reference CPU path.
|
|
if (IsQuantized(context->tensors[node->inputs->data[0]].type)) {
|
|
Expect(is_accelerator_specified ||
|
|
(builtin->filter_width * builtin->filter_height <= 256),
|
|
NNAPIValidationFailureType::kUnsupportedOperandSize,
|
|
"Large filter window would overflow on the reference CPU path",
|
|
&val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinMaxPool2d: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinL2Pool2d: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectIsFloatOperator(context, node, &val_ctx);
|
|
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
|
Expect(builtin->activation == kTfLiteActNone,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"Before NNAPI 1.2 fused activation for l2_pool may not be "
|
|
"supported.",
|
|
&val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinConv2d: {
|
|
ExpectMaxOpVersion(version, 3, &val_ctx);
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
Expect(!IsHybridOperator(context, builtin_code, node),
|
|
NNAPIValidationFailureType::kUnsupportedHybridOperator,
|
|
"Hybrid operators not supported before NNAPI 1.2", &val_ctx);
|
|
ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
|
|
|
|
const auto& filter_tensor = context->tensors[node->inputs->data[1]];
|
|
if (filter_tensor.quantization.type == kTfLiteAffineQuantization) {
|
|
TfLiteAffineQuantization* quantization_params =
|
|
static_cast<TfLiteAffineQuantization*>(
|
|
filter_tensor.quantization.params);
|
|
Expect(quantization_params->scale->size <= 1,
|
|
NNAPIValidationFailureType::kUnsupportedQuantizationType,
|
|
"Per-channel quantized convolution not supported before NNAPI "
|
|
"1.2.",
|
|
&val_ctx);
|
|
}
|
|
}
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
|
|
input_type == kTfLiteUInt8) {
|
|
ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
|
|
}
|
|
auto builtin = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
|
// TODO(b/132950584): Add support for Conv2D with omitted bias.
|
|
Expect(node->inputs->size == 3,
|
|
NNAPIValidationFailureType::kMissingRequiredOperand,
|
|
"Conv2D with omitted bias not supported", &val_ctx);
|
|
if (builtin->dilation_width_factor != 1 ||
|
|
builtin->dilation_height_factor != 1) {
|
|
Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"NNAPI supports dilated Conv2D since NNAPI 1.2.", &val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinDepthwiseConv2d: {
|
|
ExpectMaxOpVersion(version, 3, &val_ctx);
|
|
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
|
|
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
if (input_type == kTfLiteUInt8) {
|
|
ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
|
|
}
|
|
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
|
Expect(builtin->dilation_width_factor == 1 &&
|
|
builtin->dilation_height_factor == 1,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"dilation_width_factor and dilation_height_factor expected to "
|
|
"be equal to 1",
|
|
&val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinFullyConnected: {
|
|
ExpectMaxOpVersion(version, 5, &val_ctx);
|
|
// TODO(b/132950584): Add support for FullyConnected with no bias.
|
|
Expect(node->inputs->size == 3 &&
|
|
node->inputs->data[2] != kTfLiteOptionalTensor,
|
|
NNAPIValidationFailureType::kMissingRequiredOperand,
|
|
"FullyConnected with no bias not supported", &val_ctx);
|
|
const auto output_type = context->tensors[node->outputs->data[0]].type;
|
|
Expect(output_type != kTfLiteInt16,
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"Unsupported output of type kTfLiteInt16", &val_ctx);
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
Expect(!IsHybridOperator(context, builtin_code, node),
|
|
NNAPIValidationFailureType::kUnsupportedHybridOperator,
|
|
"Hybrid operators not supported before NNAPI 1.2", &val_ctx);
|
|
ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
|
|
}
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
|
|
input_type == kTfLiteUInt8) {
|
|
ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
|
|
}
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
|
|
Expect(!builtin->keep_num_dims,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"keep_num_dims == true not supported", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinHardSwish: {
|
|
// Add support for hardswish. For Pre-Q devices, deconstructing it into
|
|
// basic ops. Though for some nnapi accelerators using optimized tflite
|
|
// kernels might even be faster.
|
|
ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinSoftmax: {
|
|
ExpectOpVersion(version, 2, &val_ctx);
|
|
const auto& input = context->tensors[node->outputs->data[0]];
|
|
ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
|
|
const int input_rank = input.dims->size;
|
|
Expect(input_rank <= 4,
|
|
NNAPIValidationFailureType::kUnsupportedOperandRank,
|
|
"Input rank should be <= 4", &val_ctx);
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
Expect(
|
|
input_rank == 2 || input_rank == 4,
|
|
NNAPIValidationFailureType::kUnsupportedOperandRank,
|
|
"Before API level 29 only 2D and 4D input tensors were supported.",
|
|
&val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinReshape: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
|
|
Expect(node->inputs->size >= 2,
|
|
NNAPIValidationFailureType::kMissingRequiredOperand,
|
|
"Expected at least 2 inputs", &val_ctx);
|
|
if (node->inputs->size >= 2) {
|
|
Expect(context->tensors[node->inputs->data[1]].allocation_type ==
|
|
kTfLiteMmapRo,
|
|
NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
|
|
"The shape input tensor must be constant.", &val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinResizeBilinear: {
|
|
ExpectMaxOpVersion(version, 3, &val_ctx);
|
|
const auto& input = context->tensors[node->inputs->data[0]];
|
|
const auto output_dims = context->tensors[node->outputs->data[0]].dims;
|
|
Expect(input.dims->size == 4,
|
|
NNAPIValidationFailureType::kUnsupportedOperandRank,
|
|
"Input should have rank 4", &val_ctx);
|
|
ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
|
|
Expect(node->inputs->size >= 2,
|
|
NNAPIValidationFailureType::kUnsupportedOperatorVariant,
|
|
"Expected at least 2 inputs", &val_ctx);
|
|
if (node->inputs->size >= 2) {
|
|
Expect(context->tensors[node->inputs->data[1]].allocation_type ==
|
|
kTfLiteMmapRo,
|
|
NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
|
|
"The size input tensor must be constant.", &val_ctx);
|
|
}
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
Expect(output_dims->data[1] == output_dims->data[2],
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"Require width == height due to driver differences in NNAPI "
|
|
"< 1.2",
|
|
&val_ctx);
|
|
}
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
|
|
if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
|
|
Expect(!builtin->align_corners,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"NNAPI does not support align_corners == true.", &val_ctx);
|
|
Expect(!builtin->half_pixel_centers,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"NNAPI does not support half_pixel_centers == true.", &val_ctx);
|
|
}
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
Expect(input.type == kTfLiteFloat32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI 1.0 & 1.1 only supports float input.", &val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinResizeNearestNeighbor: {
|
|
ExpectMaxOpVersion(version, 3, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
|
|
Expect(node->inputs->size >= 2,
|
|
NNAPIValidationFailureType::kUnsupportedOperatorVariant,
|
|
"Expected at least 2 inputs", &val_ctx);
|
|
if (node->inputs->size >= 2) {
|
|
Expect(context->tensors[node->inputs->data[1]].allocation_type ==
|
|
kTfLiteMmapRo,
|
|
NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
|
|
"The size input tensor must be constant.", &val_ctx);
|
|
}
|
|
auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
|
|
node->builtin_data);
|
|
if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
|
|
Expect(!builtin->align_corners,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"NNAPI does not support align_corners == true.", &val_ctx);
|
|
Expect(!builtin->half_pixel_centers,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"NNAPI does not support half_pixel_centers == true.", &val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinSqueeze: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
|
|
&val_ctx);
|
|
auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data);
|
|
if (android_sdk_version == kMinSdkVersionForNNAPI11) {
|
|
Expect(builtin->num_squeeze_dims != 0,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"NNAPI 1.1 does not support null squeeze_dims properly.",
|
|
&val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinUnidirectionalSequenceLstm: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
|
|
Expect(!IsHybridOperator(context, builtin_code, node),
|
|
NNAPIValidationFailureType::kUnsupportedHybridOperator,
|
|
"Hybrid version of this op is not supported by NN API.", &val_ctx);
|
|
|
|
Expect(node->inputs->size == 20 || node->inputs->size == 24,
|
|
NNAPIValidationFailureType::kUnsupportedOperatorVariant,
|
|
"Supporting only operation with 20 or 24 inputs", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinL2Normalization: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
ExpectIsFloatOperator(context, node, &val_ctx);
|
|
|
|
const auto& input = context->tensors[node->inputs->data[0]];
|
|
Expect(input.dims->size == 4,
|
|
NNAPIValidationFailureType::kUnsupportedOperatorVariant,
|
|
"Expected 4 inputs", &val_ctx);
|
|
}
|
|
auto builtin = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
|
|
Expect(builtin->activation == kTfLiteActNone,
|
|
NNAPIValidationFailureType::kNoActivationExpected,
|
|
"Expected no activation", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinLocalResponseNormalization: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinLshProjection: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
|
|
if (reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data)
|
|
->type == kTfLiteLshProjectionSparse) {
|
|
// NNAPI does not support sparse projection correctly pre-Q
|
|
// (b/111751836).
|
|
Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI does not support sparse projection correctly pre-Q",
|
|
&val_ctx);
|
|
Expect(node->inputs->size == 2,
|
|
NNAPIValidationFailureType::kUnsupportedOperatorVariant,
|
|
" NNAPI does not support weights for sparse projects.",
|
|
&val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinConcatenation: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
Expect(reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data)
|
|
->activation == kTfLiteActNone,
|
|
NNAPIValidationFailureType::kNoActivationExpected,
|
|
"No activation function supported", &val_ctx);
|
|
Expect(context->tensors[node->inputs->data[0]].dims->size <= 4,
|
|
NNAPIValidationFailureType::kUnsupportedOperandRank,
|
|
"Input rank should be less than 4", &val_ctx);
|
|
|
|
if (context->tensors[node->inputs->data[0]].type == kTfLiteUInt8 &&
|
|
android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
auto first_param = context->tensors[node->inputs->data[0]].params;
|
|
for (int i = 1; i < node->inputs->size; i++) {
|
|
auto curr_param = context->tensors[node->inputs->data[i]].params;
|
|
if (!Expect(curr_param.scale == first_param.scale &&
|
|
curr_param.zero_point == first_param.zero_point,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"NNAPI 1.0-1 only supported concatenating quantized "
|
|
"tensor of the same scale and offset.",
|
|
&val_ctx)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinDequantize: {
|
|
Expect(version == 1 || version == 2,
|
|
NNAPIValidationFailureType::kUnsupportedOperatorVersion,
|
|
"Supported op versions are 1 and 2 only", &val_ctx);
|
|
|
|
const auto& input = context->tensors[node->inputs->data[0]];
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
|
|
} else {
|
|
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);
|
|
|
|
if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
|
|
input.type == kTfLiteInt8) {
|
|
const auto zero_point = input.params.zero_point;
|
|
Expect(zero_point == 0,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NN API supports int8 type since version 1.2 but only for "
|
|
"symmetric quantization.",
|
|
&val_ctx);
|
|
}
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinFloor: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinRelu:
|
|
case kTfLiteBuiltinReluN1To1:
|
|
case kTfLiteBuiltinRelu6:
|
|
case kTfLiteBuiltinLogistic: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinTanh: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
const TfLiteType input_type =
|
|
context->tensors[node->inputs->data[0]].type;
|
|
Expect(IsFloat(input_type) ||
|
|
(IsQuantized(input_type) &&
|
|
android_sdk_version >= kMinSdkVersionForNNAPI12),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
" NNAPI only support float tanh.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinSub: {
|
|
ExpectMaxOpVersion(version, 3, &val_ctx);
|
|
const TfLiteType input_type =
|
|
context->tensors[node->inputs->data[0]].type;
|
|
Expect((android_sdk_version >= kMinSdkVersionForNNAPI11 &&
|
|
IsFloat(input_type)) ||
|
|
(android_sdk_version >= kMinSdkVersionForNNAPI12 &&
|
|
IsQuantized(input_type)) ||
|
|
(android_sdk_version >= kMinSdkVersionForNNAPI13 &&
|
|
IsInt32(input_type)),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI only support float sub.", &val_ctx);
|
|
if (IsInt32(input_type)) {
|
|
Expect(reinterpret_cast<TfLiteSubParams*>(node->builtin_data)
|
|
->activation == kTfLiteActNone,
|
|
NNAPIValidationFailureType::kNoActivationExpected,
|
|
"No activation function supported", &val_ctx);
|
|
}
|
|
const int input0_rank =
|
|
context->tensors[node->inputs->data[0]].dims->size;
|
|
const int input1_rank =
|
|
context->tensors[node->inputs->data[1]].dims->size;
|
|
Expect(input0_rank <= 4 && input1_rank <= 4,
|
|
NNAPIValidationFailureType::kUnsupportedOperandRank,
|
|
"Input rank must be <= 4", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinDiv: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
|
|
&val_ctx);
|
|
Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI only support float div.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinPad:
|
|
case kTfLiteBuiltinPadv2: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
|
|
&val_ctx);
|
|
|
|
const TfLiteIntArrayView input_shape(
|
|
context->tensors[node->inputs->data[0]].dims);
|
|
Expect(!HasZeroes(input_shape),
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"NN API pad ops do not support input tensors with no elements",
|
|
&val_ctx);
|
|
|
|
Expect(node->inputs->size >= 2,
|
|
NNAPIValidationFailureType::kUnsupportedOperatorVariant,
|
|
"Expecting at least 2 inputs", &val_ctx);
|
|
|
|
if (node->inputs->size == 3) {
|
|
// This is going to be mapped with a PadV2
|
|
Expect(
|
|
android_sdk_version >= kMinSdkVersionForNNAPI12,
|
|
NNAPIValidationFailureType::kUnsupportedOperatorVariant,
|
|
"Specification of the padding value is supported from NNAPI 1.2.",
|
|
&val_ctx);
|
|
} else { // this is going to be mapped as Pad
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Only Float32 inputs are supported before NNAPI 1.2",
|
|
&val_ctx);
|
|
}
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinUnidirectionalSequenceRnn: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
Expect(!IsHybridOperator(context, builtin_code, node),
|
|
NNAPIValidationFailureType::kUnsupportedHybridOperator,
|
|
"Hybrid version of this op is not supported by NN API.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinSpaceToBatchNd: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
|
|
&val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinBatchToSpaceNd: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
|
|
&val_ctx);
|
|
auto crops = context->tensors[node->inputs->data[2]];
|
|
auto crops_data = crops.data.i32;
|
|
Expect(crops_data && crops.bytes == 16 && crops_data[0] == 0 &&
|
|
crops_data[1] == 0 && crops_data[2] == 0 && crops_data[3] == 0,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"All crops should be 0.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinStridedSlice: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
|
|
&val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinTranspose: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
|
|
&val_ctx);
|
|
// Note that the permutation input tensor value dictates the output
|
|
// dimensions.
|
|
// TODO(b/110888333): Support dynamically-sized tensors in delegates.
|
|
Expect((node->inputs->size > 1) &&
|
|
(context->tensors[node->inputs->data[1]].allocation_type ==
|
|
kTfLiteMmapRo),
|
|
NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
|
|
"Dynamically-sized tensors not supported.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinAbs:
|
|
case kTfLiteBuiltinExp:
|
|
case kTfLiteBuiltinLog:
|
|
case kTfLiteBuiltinRsqrt:
|
|
case kTfLiteBuiltinPow: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
ExpectIsFloatOperator(context, node, &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinSlice: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
const auto begin_type = context->tensors[node->inputs->data[1]].type;
|
|
const auto size_type = context->tensors[node->inputs->data[2]].type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
|
|
kTfLiteUInt8, kTfLiteInt8);
|
|
Expect(begin_type == kTfLiteInt32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Begin type should be Int32", &val_ctx);
|
|
Expect(size_type == kTfLiteInt32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Size type should be Int32", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinSin: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
ExpectIsFloatOperator(context, node, &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinTransposeConv: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
Expect((node->inputs->size > 1) &&
|
|
(context->tensors[node->inputs->data[0]].allocation_type ==
|
|
kTfLiteMmapRo) &&
|
|
(context->tensors[node->inputs->data[1]].allocation_type ==
|
|
kTfLiteMmapRo),
|
|
NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
|
|
"Dynamically-sized tensors not supported.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinSqrt: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
ExpectIsFloatOperator(context, node, &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinRnn: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
Expect(node->inputs->size == 5,
|
|
NNAPIValidationFailureType::kUnsupportedOperatorVariant,
|
|
"Expected 5 input", &val_ctx);
|
|
if (node->inputs->size >= 2) {
|
|
Expect(
|
|
context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
|
|
kTfLiteFloat32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI only support float32 weights.", &val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinSpaceToDepth: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
const TfLiteType input_type =
|
|
context->tensors[node->inputs->data[0]].type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
|
|
kTfLiteInt8);
|
|
} break;
|
|
case kTfLiteBuiltinSvdf: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
Expect(node->inputs->size == 5,
|
|
NNAPIValidationFailureType::kUnsupportedOperandRank,
|
|
"Expected input of rank 5", &val_ctx);
|
|
if (node->inputs->size >= 2) {
|
|
Expect(
|
|
context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
|
|
kTfLiteFloat32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI only support float32 weights.", &val_ctx);
|
|
}
|
|
Expect(android_sdk_version >= kMinSdkVersionForNNAPI11,
|
|
NNAPIValidationFailureType::kUnsupportedOperandRank,
|
|
"SVDF does not support rank > 1 on NNAPI 1.0.", &val_ctx);
|
|
Expect(context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]]
|
|
.type == kTfLiteFloat32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Weights should be Float32", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinLstm: {
|
|
ExpectMaxOpVersion(version, 3, &val_ctx);
|
|
Expect(
|
|
android_sdk_version >= kMinSdkVersionForNNAPI11,
|
|
NNAPIValidationFailureType::kUnsupportedAndroidVersion,
|
|
"NNAPI 1.0 has a bug for optional tensors which would affect LSTM.",
|
|
&val_ctx);
|
|
Expect(android_sdk_version >= kMinSdkVersionForNNAPI12 ||
|
|
!IsHybridOperator(context, builtin_code, node),
|
|
NNAPIValidationFailureType::kUnsupportedHybridOperator,
|
|
"Hybrid operators not supported before NNAPI 1.2.", &val_ctx);
|
|
|
|
const auto weight_input_index =
|
|
isLstmBasicKernel(node) ? 2 /* basic::kInputWeights */
|
|
: 4 /* full::kInputToOutputWeightsTensor */;
|
|
|
|
const TfLiteType weight_type =
|
|
context->tensors[node->inputs->data[weight_input_index]].type;
|
|
|
|
if (isLstmBasicKernel(node)) {
|
|
Expect(weight_type == kTfLiteUInt8,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Basic LSTM Kernels support only UINT8 weights", &val_ctx);
|
|
|
|
const auto input_quantization_params =
|
|
context->tensors[node->inputs->data[0]].params;
|
|
Expect(input_quantization_params.scale == 1. / 128. &&
|
|
input_quantization_params.zero_point == 128,
|
|
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
|
|
"Invalid input quantization", &val_ctx);
|
|
|
|
const auto output_quantization_params =
|
|
context->tensors[node->outputs->data[0]].params;
|
|
Expect(output_quantization_params.scale == 1. / 128. &&
|
|
output_quantization_params.zero_point == 128,
|
|
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
|
|
"Invalid output quantization", &val_ctx);
|
|
|
|
const auto cell_state_quantization_params =
|
|
context->tensors[node->outputs->data[1]].params;
|
|
Expect(cell_state_quantization_params.scale == 16. / 32768. ||
|
|
cell_state_quantization_params.zero_point == 0,
|
|
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
|
|
"Invalid cell state quantization", &val_ctx);
|
|
|
|
auto is_const_tensor = [&node, &context](int tensor_idx) {
|
|
return context->tensors[node->inputs->data[tensor_idx]]
|
|
.allocation_type == kTfLiteMmapRo;
|
|
};
|
|
|
|
Expect(is_const_tensor(2 /* kInputWeights */),
|
|
NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
|
|
"Weights tensor should be constant", &val_ctx);
|
|
Expect(is_const_tensor(3 /* kInputBiases */),
|
|
NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
|
|
"Biases tensor should be constant", &val_ctx);
|
|
|
|
return val_ctx.is_valid;
|
|
} else {
|
|
if (node->inputs->size == 24) {
|
|
ExpectMinAndroidSdkVersion(android_sdk_version,
|
|
kMinSdkVersionForNNAPI12, &val_ctx);
|
|
}
|
|
|
|
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
|
Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8 ||
|
|
weight_type == kTfLiteInt8,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Weight has to be Float32 or UINT8 or INT8", &val_ctx);
|
|
} else {
|
|
Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Weight has to be Float32 or UINT8", &val_ctx);
|
|
}
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinMean: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
|
|
&val_ctx);
|
|
if (android_sdk_version >= kMinSdkVersionForNNAPI12) {
|
|
Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 ||
|
|
IsQuantized(context->tensors[node->inputs->data[0]].type),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Expected Float32 or Quantized input", &val_ctx);
|
|
} else {
|
|
Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Expected Float32 input", &val_ctx);
|
|
}
|
|
Expect(context->tensors[node->outputs->data[0]].dims->size > 0,
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"NNAPI does not support generating a scalar as output for MEAN.",
|
|
&val_ctx);
|
|
|
|
auto input_param = context->tensors[node->inputs->data[0]].params;
|
|
auto output_param = context->tensors[node->outputs->data[0]].params;
|
|
Expect(input_param.scale == output_param.scale &&
|
|
input_param.zero_point == output_param.zero_point,
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"NNAPI requires that the input and output have the same "
|
|
"quantization parameters.",
|
|
&val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinEmbeddingLookup: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
Expect(context->tensors[node->inputs->data[1]].type == kTfLiteFloat32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI only support float32 values.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinHashtableLookup: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
Expect(context->tensors[node->outputs->data[0]].type == kTfLiteFloat32,
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"NNAPI only support float32 output.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinMaximum:
|
|
case kTfLiteBuiltinMinimum: {
|
|
ExpectMaxOpVersion(version, 3, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
|
|
kTfLiteInt8, kTfLiteInt32);
|
|
const TfLiteTensor& operand0 = context->tensors[node->inputs->data[0]];
|
|
if (operand0.dims->size == 0) {
|
|
Expect(operand0.allocation_type == kTfLiteMmapRo,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Scalar operand should be constant", &val_ctx);
|
|
}
|
|
const TfLiteTensor& operand1 = context->tensors[node->inputs->data[1]];
|
|
if (operand1.dims->size == 0) {
|
|
Expect(operand1.allocation_type == kTfLiteMmapRo,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Scalar operand should be constant", &val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinCast: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const TfLiteType input_type =
|
|
context->tensors[node->inputs->data[0]].type;
|
|
const TfLiteType output_type =
|
|
context->tensors[node->outputs->data[0]].type;
|
|
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
|
|
kTfLiteUInt8, kTfLiteInt8);
|
|
|
|
ExpectTypeIn(
|
|
output_type,
|
|
{kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
|
|
"kTfLiteUInt8, kTfLiteInt8.",
|
|
&val_ctx);
|
|
} else {
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
|
|
kTfLiteUInt8);
|
|
|
|
ExpectTypeIn(
|
|
output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
|
|
"kTfLiteUInt8.",
|
|
&val_ctx);
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinLeakyRelu:
|
|
case kTfLiteBuiltinPrelu: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
|
|
kTfLiteInt8);
|
|
} break;
|
|
case kTfLiteBuiltinTile: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8,
|
|
kTfLiteUInt8, kTfLiteInt32);
|
|
const auto multipliers_type =
|
|
context->tensors[node->inputs->data[1]].type;
|
|
Expect(multipliers_type == kTfLiteInt32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Multipliers should be Int32", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinLogicalOr:
|
|
case kTfLiteBuiltinLogicalAnd:
|
|
case kTfLiteBuiltinLogicalNot: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
Expect(input_type == kTfLiteBool,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Input should be bool", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinLess:
|
|
case kTfLiteBuiltinLessEqual:
|
|
case kTfLiteBuiltinGreater:
|
|
case kTfLiteBuiltinGreaterEqual:
|
|
case kTfLiteBuiltinEqual:
|
|
case kTfLiteBuiltinNotEqual: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
|
|
&val_ctx);
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
|
|
kTfLiteInt8, kTfLiteBool, kTfLiteInt32);
|
|
} break;
|
|
case kTfLiteBuiltinNeg: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
|
|
&val_ctx);
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32);
|
|
} break;
|
|
case kTfLiteBuiltinTopkV2: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const auto& input_type = context->tensors[node->inputs->data[0]].type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
|
|
kTfLiteUInt8, kTfLiteInt8);
|
|
const auto& k_param = context->tensors[node->inputs->data[1]];
|
|
Expect(k_param.type == kTfLiteInt32 &&
|
|
k_param.allocation_type == kTfLiteMmapRo,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"K param should be a constant of type Int32", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinSelect: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
|
|
&val_ctx);
|
|
const auto value_type = context->tensors[node->inputs->data[1]].type;
|
|
EXPECT_INPUT_TYPE_IN(value_type, kTfLiteFloat32, kTfLiteInt32,
|
|
kTfLiteUInt8, kTfLiteInt8);
|
|
TfLiteIntArray* condition_shape =
|
|
context->tensors[node->inputs->data[0]].dims;
|
|
TfLiteIntArray* input_shape =
|
|
context->tensors[node->inputs->data[1]].dims;
|
|
Expect(TfLiteIntArrayEqual(condition_shape, input_shape),
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"Condition and inputs tensors should have the same shape",
|
|
&val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinGather: {
|
|
ExpectOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
const auto& positions = context->tensors[node->inputs->data[1]];
|
|
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
|
|
kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
|
|
|
|
Expect(positions.type == kTfLiteInt32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Positions type should be one of kTfLiteInt32", &val_ctx);
|
|
Expect(positions.dims->size != 0,
|
|
NNAPIValidationFailureType::kUnsupportedOperandRank,
|
|
"0-dimension args are not supported by NNAPI.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinBidirectionalSequenceLstm: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
Expect(!IsHybridOperator(context, builtin_code, node),
|
|
NNAPIValidationFailureType::kUnsupportedHybridOperator,
|
|
"Hybrid version of this op is not supported by NN API.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinExpandDims: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
|
|
kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
|
|
const auto axis = context->tensors[node->inputs->data[1]];
|
|
Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI only supports constant int32 axis tensor.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinSplit: {
|
|
ExpectOpVersion(version, 3, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
// Tensor indices: split_dim: 0, value: 1
|
|
const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
|
|
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
|
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
|
|
kTfLiteInt8, kTfLiteInt32);
|
|
} else {
|
|
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
|
|
kTfLiteInt32);
|
|
}
|
|
const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
|
|
Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI only supports constant int32 axis tensor.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinLogSoftmax: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
Expect(input_type == kTfLiteFloat32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Input should be Float32.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinQuantize: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const auto value_type = context->tensors[node->inputs->data[0]].type;
|
|
Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"Value should be quantized or Float32.", &val_ctx);
|
|
if (IsQuantized(value_type)) {
|
|
const auto quantization_params =
|
|
context->tensors[node->inputs->data[0]].params;
|
|
Expect(quantization_params.scale > 0.f,
|
|
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
|
|
"Quantization scale should be > 0.", &val_ctx);
|
|
}
|
|
const auto output_type = context->tensors[node->outputs->data[0]].type;
|
|
if (android_sdk_version < kMinSdkVersionForNNAPI13) {
|
|
Expect(output_type == kTfLiteUInt8,
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"Output should be kTfLiteUInt8.", &val_ctx);
|
|
} else {
|
|
ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"Output should be kTfLiteUInt8.", &val_ctx);
|
|
}
|
|
const auto quantization_params =
|
|
context->tensors[node->outputs->data[0]].params;
|
|
Expect(quantization_params.scale > 0.f,
|
|
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
|
|
"Quantization scale should be > 0.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinReduceAny: {
|
|
ExpectOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"NNAPI does not support generating a scalar as output.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinReduceMin:
|
|
case kTfLiteBuiltinReduceMax: {
|
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
const auto input_tensor = context->tensors[node->inputs->data[0]];
|
|
const auto input_type = input_tensor.type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
|
|
kTfLiteInt8);
|
|
Expect(input_tensor.dims->size != 0,
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"NNAPI does not support generating a scalar as output.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinDepthToSpace: {
|
|
const TfLiteType input_type =
|
|
context->tensors[node->inputs->data[0]].type;
|
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
|
|
kTfLiteInt8);
|
|
} break;
|
|
case kTfLiteBuiltinReduceProd:
|
|
case kTfLiteBuiltinSum: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
|
&val_ctx);
|
|
Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
|
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
|
"NNAPI does not support generating a scalar as output", &val_ctx);
|
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
|
Expect(input_type == kTfLiteFloat32,
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI only supports floating point input.", &val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinElu: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
|
|
&val_ctx);
|
|
} break;
|
|
case kTfLiteBuiltinFill: {
|
|
ExpectOpVersion(version, 1, &val_ctx);
|
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
|
|
&val_ctx);
|
|
const auto& dims_tensor = context->tensors[node->inputs->data[0]];
|
|
Expect(IsConstantTensor(&dims_tensor),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI doesn't support dynamic dimensions tensor.", &val_ctx);
|
|
EXPECT_INPUT_TYPE_IN(dims_tensor.type, kTfLiteInt32, kTfLiteInt64);
|
|
if (IsConstantTensor(&dims_tensor)) {
|
|
Expect(dims_tensor.dims->data[0] != 0,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"NNAPI doesn't support generating scalars from FILL", &val_ctx);
|
|
if (dims_tensor.type == kTfLiteInt64) {
|
|
bool fit_in_int32 =
|
|
std::all_of(dims_tensor.data.i64,
|
|
dims_tensor.data.i64 + dims_tensor.dims->data[0],
|
|
[](int64_t dim) {
|
|
return std::numeric_limits<int32_t>::min() <= dim &&
|
|
dim <= std::numeric_limits<int32_t>::max();
|
|
});
|
|
Expect(fit_in_int32,
|
|
NNAPIValidationFailureType::kUnsupportedOperandValue,
|
|
"NNAPI only supports int32 dimensions tensor. If the "
|
|
"dimensions type is int64 and they are constant we can "
|
|
"convert them to int32 if the value isn't too large.",
|
|
&val_ctx);
|
|
}
|
|
}
|
|
const auto& value_tensor = context->tensors[node->inputs->data[1]];
|
|
EXPECT_INPUT_TYPE_IN(value_tensor.type, kTfLiteFloat32, kTfLiteInt32,
|
|
kTfLiteInt64);
|
|
if (value_tensor.type == kTfLiteInt64) {
|
|
Expect(
|
|
IsConstantTensor(&value_tensor) &&
|
|
*value_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
|
|
*value_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
|
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
|
"NNAPI only supports int32 input. If the input type is int64 and "
|
|
"constant we can convert it to int32 if the value isn't too "
|
|
"large.",
|
|
&val_ctx);
|
|
}
|
|
} break;
|
|
default:
|
|
// All other operators are not mapped.
|
|
AddValidationFailure(NNAPIValidationFailureType::kUnsupportedOperator,
|
|
"Unsupported operation type.", &val_ctx);
|
|
}
|
|
return val_ctx.is_valid;
|
|
} // NOLINT(readability/fn_size)
|
|
|
|
TfLiteStatus NNAPIDelegateKernel::Map(
|
|
TfLiteContext* context, int builtin_code, int version,
|
|
int android_sdk_version, const NNAPIOpMappingArgs& mapping_args,
|
|
ANeuralNetworksOperationType* nn_op_type) {
|
|
switch (builtin_code) {
|
|
case kTfLiteBuiltinAdd: {
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteAddParams*>(mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
*nn_op_type = ANEURALNETWORKS_ADD;
|
|
} break;
|
|
case kTfLiteBuiltinArgMax: {
|
|
*nn_op_type = ANEURALNETWORKS_ARGMAX;
|
|
} break;
|
|
case kTfLiteBuiltinArgMin: {
|
|
*nn_op_type = ANEURALNETWORKS_ARGMIN;
|
|
} break;
|
|
case kTfLiteBuiltinMul: {
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteMulParams*>(mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
*nn_op_type = ANEURALNETWORKS_MUL;
|
|
} break;
|
|
case kTfLiteBuiltinAveragePool2d: {
|
|
mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
|
|
*nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D;
|
|
} break;
|
|
case kTfLiteBuiltinMaxPool2d: {
|
|
mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
|
|
*nn_op_type = ANEURALNETWORKS_MAX_POOL_2D;
|
|
} break;
|
|
case kTfLiteBuiltinL2Pool2d: {
|
|
mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
|
|
*nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
|
|
} break;
|
|
case kTfLiteBuiltinConv2d: {
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteConvParams*>(mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->padding);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
// NNAPI supports dilated Conv2D since NNAPI 1.2.
|
|
if (builtin->dilation_width_factor != 1 ||
|
|
builtin->dilation_height_factor != 1) {
|
|
mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
|
|
mapping_args.builder->AddScalarInt32Operand(
|
|
builtin->dilation_width_factor);
|
|
mapping_args.builder->AddScalarInt32Operand(
|
|
builtin->dilation_height_factor);
|
|
}
|
|
*nn_op_type = ANEURALNETWORKS_CONV_2D;
|
|
} break;
|
|
case kTfLiteBuiltinDepthwiseConv2d: {
|
|
auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->padding);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->depth_multiplier);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
if (builtin->dilation_width_factor != 1 ||
|
|
builtin->dilation_height_factor != 1) {
|
|
mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format.
|
|
mapping_args.builder->AddScalarInt32Operand(
|
|
builtin->dilation_width_factor);
|
|
mapping_args.builder->AddScalarInt32Operand(
|
|
builtin->dilation_height_factor);
|
|
}
|
|
*nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
|
|
} break;
|
|
case kTfLiteBuiltinFullyConnected: {
|
|
auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
*nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
|
|
} break;
|
|
case kTfLiteBuiltinHardSwish: {
|
|
*nn_op_type = ANEURALNETWORKS_HARD_SWISH;
|
|
} break;
|
|
case kTfLiteBuiltinSoftmax: {
|
|
auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
|
|
// Optional scalar specifying the dimension the activation would be
|
|
// performed on is not added. Default to -1.
|
|
*nn_op_type = ANEURALNETWORKS_SOFTMAX;
|
|
} break;
|
|
case kTfLiteBuiltinReshape: {
|
|
*nn_op_type = ANEURALNETWORKS_RESHAPE;
|
|
} break;
|
|
case kTfLiteBuiltinResizeBilinear: {
|
|
const int output_id = mapping_args.node->outputs->data[0];
|
|
auto& output = mapping_args.context->tensors[output_id];
|
|
const int output_height = output.dims->data[1];
|
|
const int output_width = output.dims->data[2];
|
|
mapping_args.builder->AddScalarInt32Operand(output_width);
|
|
mapping_args.builder->AddScalarInt32Operand(output_height);
|
|
auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(
|
|
mapping_args.node->builtin_data);
|
|
if (builtin->align_corners == true ||
|
|
builtin->half_pixel_centers == true) {
|
|
mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
|
|
}
|
|
*nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR;
|
|
} break;
|
|
case kTfLiteBuiltinResizeNearestNeighbor: {
|
|
const TfLiteTensor& new_shape =
|
|
mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
|
|
// NNAPI uses scalar inputs for height and width.
|
|
mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]);
|
|
mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]);
|
|
mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
|
|
auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
|
|
mapping_args.node->builtin_data);
|
|
if (builtin->align_corners == true ||
|
|
builtin->half_pixel_centers == true) {
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
|
|
}
|
|
*nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR;
|
|
} break;
|
|
case kTfLiteBuiltinSqueeze: {
|
|
auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(
|
|
mapping_args.node->builtin_data);
|
|
// Note that we add the squeeze dimensions even if the dimensions
|
|
// were unspecified (empty), as NNAPI requires the operand.
|
|
mapping_args.builder->AddVectorInt32Operand(
|
|
builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr,
|
|
static_cast<uint32_t>(builtin->num_squeeze_dims));
|
|
*nn_op_type = ANEURALNETWORKS_SQUEEZE;
|
|
} break;
|
|
case kTfLiteBuiltinUnidirectionalSequenceLstm: {
|
|
auto builtin = reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
|
|
mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
|
|
const bool hybrid_op = IsHybridOperator(
|
|
mapping_args.context, kTfLiteBuiltinUnidirectionalSequenceLstm,
|
|
mapping_args.node);
|
|
if (mapping_args.node->inputs->size == 24) {
|
|
// Add layer normalization tensors if they are provided.
|
|
for (int i = 20; i < 24; ++i) {
|
|
const int input_index = mapping_args.node->inputs->data[i];
|
|
if (input_index != kTfLiteOptionalTensor) {
|
|
mapping_args.builder->AddTensorInput(input_index, hybrid_op);
|
|
} else {
|
|
mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
|
|
}
|
|
}
|
|
} else {
|
|
for (int i = 0; i < 4; ++i) {
|
|
mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
|
|
}
|
|
}
|
|
|
|
*nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM;
|
|
} break;
|
|
case kTfLiteBuiltinL2Normalization: {
|
|
*nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
|
|
} break;
|
|
case kTfLiteBuiltinLocalResponseNormalization: {
|
|
auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->radius);
|
|
mapping_args.builder->AddScalarFloat32Operand(builtin->bias);
|
|
mapping_args.builder->AddScalarFloat32Operand(builtin->alpha);
|
|
mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
|
|
*nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
|
|
} break;
|
|
case kTfLiteBuiltinLshProjection: {
|
|
auto builtin = reinterpret_cast<TfLiteLSHProjectionParams*>(
|
|
mapping_args.node->builtin_data);
|
|
int type = builtin->type;
|
|
// In Android Q+, NNAPI uses 3 to denote
|
|
// kTfLiteLshProjectionSparse.
|
|
const int kNNAPILshProjectionSparse = 3;
|
|
if (builtin->type == kTfLiteLshProjectionSparse) {
|
|
type = kNNAPILshProjectionSparse;
|
|
// Add NNAPI null weight operand.
|
|
mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
|
|
}
|
|
mapping_args.builder->AddScalarInt32Operand(type);
|
|
*nn_op_type = ANEURALNETWORKS_LSH_PROJECTION;
|
|
} break;
|
|
case kTfLiteBuiltinConcatenation: {
|
|
auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(
|
|
mapping_args.node->builtin_data);
|
|
int axis = builtin->axis < 0
|
|
? mapping_args.context
|
|
->tensors[mapping_args.node->inputs->data[0]]
|
|
.dims->size +
|
|
builtin->axis
|
|
: builtin->axis;
|
|
mapping_args.builder->AddScalarInt32Operand(axis);
|
|
*nn_op_type = ANEURALNETWORKS_CONCATENATION;
|
|
} break;
|
|
case kTfLiteBuiltinDequantize: {
|
|
*nn_op_type = ANEURALNETWORKS_DEQUANTIZE;
|
|
} break;
|
|
case kTfLiteBuiltinFloor: {
|
|
*nn_op_type = ANEURALNETWORKS_FLOOR;
|
|
} break;
|
|
case kTfLiteBuiltinRelu: {
|
|
*nn_op_type = ANEURALNETWORKS_RELU;
|
|
} break;
|
|
case kTfLiteBuiltinReluN1To1: {
|
|
*nn_op_type = ANEURALNETWORKS_RELU1;
|
|
} break;
|
|
case kTfLiteBuiltinRelu6: {
|
|
*nn_op_type = ANEURALNETWORKS_RELU6;
|
|
} break;
|
|
case kTfLiteBuiltinLogistic: {
|
|
*nn_op_type = ANEURALNETWORKS_LOGISTIC;
|
|
} break;
|
|
case kTfLiteBuiltinTanh: {
|
|
*nn_op_type = ANEURALNETWORKS_TANH;
|
|
} break;
|
|
case kTfLiteBuiltinSub: {
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteSubParams*>(mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
*nn_op_type = ANEURALNETWORKS_SUB;
|
|
} break;
|
|
case kTfLiteBuiltinDiv: {
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteDivParams*>(mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
*nn_op_type = ANEURALNETWORKS_DIV;
|
|
} break;
|
|
case kTfLiteBuiltinPad:
|
|
case kTfLiteBuiltinPadv2: {
|
|
// We want to map to PAD as much as possible since it is more widely
|
|
// supported. We map to PadV2 only when there is the need to specify
|
|
// the padding value
|
|
if (mapping_args.node->inputs->size == 2) {
|
|
*nn_op_type = ANEURALNETWORKS_PAD;
|
|
} else {
|
|
const int constant_value_id = mapping_args.node->inputs->data[2];
|
|
if (constant_value_id == kTfLiteOptionalTensor) {
|
|
*nn_op_type = ANEURALNETWORKS_PAD;
|
|
} else {
|
|
*nn_op_type = ANEURALNETWORKS_PAD_V2;
|
|
}
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinUnidirectionalSequenceRnn: {
|
|
auto builtin = reinterpret_cast<TfLiteSequenceRNNParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->time_major);
|
|
*nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN;
|
|
} break;
|
|
case kTfLiteBuiltinSpaceToBatchNd: {
|
|
*nn_op_type = ANEURALNETWORKS_SPACE_TO_BATCH_ND;
|
|
} break;
|
|
case kTfLiteBuiltinBatchToSpaceNd: {
|
|
*nn_op_type = ANEURALNETWORKS_BATCH_TO_SPACE_ND;
|
|
} break;
|
|
case kTfLiteBuiltinStridedSlice: {
|
|
auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->end_mask);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->shrink_axis_mask);
|
|
*nn_op_type = ANEURALNETWORKS_STRIDED_SLICE;
|
|
} break;
|
|
case kTfLiteBuiltinTranspose: {
|
|
*nn_op_type = ANEURALNETWORKS_TRANSPOSE;
|
|
} break;
|
|
case kTfLiteBuiltinAbs: {
|
|
*nn_op_type = ANEURALNETWORKS_ABS;
|
|
} break;
|
|
case kTfLiteBuiltinExp: {
|
|
*nn_op_type = ANEURALNETWORKS_EXP;
|
|
} break;
|
|
case kTfLiteBuiltinLog: {
|
|
*nn_op_type = ANEURALNETWORKS_LOG;
|
|
} break;
|
|
case kTfLiteBuiltinRsqrt: {
|
|
*nn_op_type = ANEURALNETWORKS_RSQRT;
|
|
} break;
|
|
case kTfLiteBuiltinPow: {
|
|
*nn_op_type = ANEURALNETWORKS_POW;
|
|
} break;
|
|
case kTfLiteBuiltinSlice: {
|
|
*nn_op_type = ANEURALNETWORKS_SLICE;
|
|
} break;
|
|
case kTfLiteBuiltinSin: {
|
|
*nn_op_type = ANEURALNETWORKS_SIN;
|
|
} break;
|
|
case kTfLiteBuiltinTransposeConv: {
|
|
const bool hybrid_op = IsHybridOperator(
|
|
mapping_args.context, kTfLiteBuiltinTransposeConv, mapping_args.node);
|
|
int input_tensor_flags = 0;
|
|
const int input_tensor_id =
|
|
mapping_args.node->inputs->data[/*kDataInputTensor*/ 2];
|
|
const int weight_tensor_id =
|
|
mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
|
|
if (context->tensors[input_tensor_id].type == kTfLiteInt8) {
|
|
const auto& weights_tensor = context->tensors[weight_tensor_id];
|
|
if ((weights_tensor.type == kTfLiteInt8 ||
|
|
weights_tensor.type == kTfLiteUInt8) &&
|
|
weights_tensor.quantization.type == kTfLiteAffineQuantization) {
|
|
input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
|
|
}
|
|
}
|
|
|
|
mapping_args.builder->AddTensorInput(input_tensor_id, hybrid_op,
|
|
input_tensor_flags);
|
|
mapping_args.builder->AddTensorInput(weight_tensor_id, hybrid_op,
|
|
input_tensor_flags);
|
|
|
|
// NNAPI requires a bias tensor, so we allocate a new tensor to fill
|
|
// it with zeroes. It is deleted with other tensors in the context
|
|
// during subgraph destructor call.
|
|
int bias_index = -1;
|
|
mapping_args.context->AddTensors(mapping_args.context, 1, &bias_index);
|
|
TfLiteTensor* bias_tensor = &mapping_args.context->tensors[bias_index];
|
|
const auto input_type =
|
|
mapping_args.context
|
|
->tensors[mapping_args.node->inputs->data[/*kDataInputTensor*/ 2]]
|
|
.type;
|
|
if (input_type == kTfLiteFloat32) {
|
|
bias_tensor->type = kTfLiteFloat32;
|
|
} else {
|
|
bias_tensor->type = kTfLiteInt32;
|
|
}
|
|
|
|
// Create an array with a required bias shape and resize the bias
|
|
// tensor.
|
|
TfLiteIntArray* bias_shape = TfLiteIntArrayCreate(1);
|
|
const TfLiteTensor& output_shape =
|
|
mapping_args.context->tensors[mapping_args.node->inputs
|
|
->data[/*kOutputShapeTensor*/ 0]];
|
|
const int output_depth = output_shape.data.i32[3];
|
|
bias_shape->data[0] = output_depth;
|
|
bias_tensor->allocation_type = kTfLiteDynamic;
|
|
mapping_args.context->ResizeTensor(mapping_args.context, bias_tensor,
|
|
bias_shape);
|
|
|
|
// Set tensor's values to zeroes and add it using AddVector*, so
|
|
// that the values are copied to NNAPI. We don't use the AddTensor
|
|
// function because it doesn't copy values and the tensor we just
|
|
// created is not in the node->inputs.
|
|
if (input_type == kTfLiteFloat32) {
|
|
memset(bias_tensor->data.f, 0, output_depth * sizeof(float));
|
|
mapping_args.builder->AddVectorFloat32Operand(bias_tensor->data.f,
|
|
output_depth);
|
|
} else {
|
|
memset(bias_tensor->data.i32, 0, output_depth * sizeof(int));
|
|
const TfLiteTensor& input_tensor =
|
|
mapping_args.context->tensors[mapping_args.node->inputs
|
|
->data[/*kDataInputTensor*/ 2]];
|
|
const TfLiteTensor& filter_tensor =
|
|
mapping_args.context->tensors[mapping_args.node->inputs
|
|
->data[/*kWeightsTensor*/ 1]];
|
|
// NNAPI requires bias scale to be a product of an input scale and
|
|
// a filter scale.
|
|
bias_tensor->params.scale =
|
|
input_tensor.params.scale * filter_tensor.params.scale;
|
|
mapping_args.builder->AddVectorInt32Operand(
|
|
bias_tensor->data.i32, output_depth,
|
|
input_tensor.params.scale * filter_tensor.params.scale,
|
|
/*zero_point=*/0);
|
|
}
|
|
|
|
mapping_args.builder->AddTensorInput(
|
|
mapping_args.node->inputs->data[/*kOutputShapeTensor*/ 0], hybrid_op);
|
|
|
|
auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->padding);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
|
|
mapping_args.builder->AddScalarInt32Operand(
|
|
/*ANEURALNETWORKS_FUSED_NONE*/ 0);
|
|
// Use NHWC layout for input and output.
|
|
mapping_args.builder->AddScalarBoolOperand(false);
|
|
*nn_op_type = ANEURALNETWORKS_TRANSPOSE_CONV;
|
|
} break;
|
|
case kTfLiteBuiltinSqrt: {
|
|
*nn_op_type = ANEURALNETWORKS_SQRT;
|
|
} break;
|
|
case kTfLiteBuiltinRnn: {
|
|
// NNAPI need both state_in and state_out.
|
|
int ann_index;
|
|
mapping_args.builder->AddStateFloat32Tensor(
|
|
mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4],
|
|
&ann_index);
|
|
mapping_args.model_state_outputs->push_back(ann_index);
|
|
mapping_args.model_state_tfl_inputs->push_back(
|
|
mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]);
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteRNNParams*>(mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
*nn_op_type = ANEURALNETWORKS_RNN;
|
|
} break;
|
|
case kTfLiteBuiltinSpaceToDepth: {
|
|
auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
|
|
*nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
|
|
} break;
|
|
case kTfLiteBuiltinSvdf: {
|
|
// NNAPI need both state_in and state_out.
|
|
int ann_index;
|
|
mapping_args.builder->AddStateFloat32Tensor(
|
|
mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4],
|
|
&ann_index);
|
|
mapping_args.model_state_outputs->push_back(ann_index);
|
|
mapping_args.model_state_tfl_inputs->push_back(
|
|
mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4]);
|
|
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteSVDFParams*>(mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->rank);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
*nn_op_type = ANEURALNETWORKS_SVDF;
|
|
} break;
|
|
case kTfLiteBuiltinLstm: {
|
|
if (isLstmBasicKernel(mapping_args.node)) {
|
|
const auto output_dims =
|
|
mapping_args.context->tensors[mapping_args.node->outputs->data[1]]
|
|
.dims;
|
|
|
|
// Inputs kInputData
|
|
mapping_args.builder->AddTensorInput(
|
|
mapping_args.node->inputs->data[0 /* kInputData */],
|
|
/* hybrid_op */ false,
|
|
/* scalar_as_tensor */ false);
|
|
|
|
// The 8 weights tensors are set decomposing the
|
|
// kInputWeights param
|
|
const auto weight_tensor =
|
|
mapping_args.context->tensors[mapping_args.node->inputs
|
|
->data[2 /* kInputWeights */]];
|
|
|
|
std::vector<uint8_t> recurrent_to_input;
|
|
std::vector<uint8_t> input_to_input;
|
|
std::vector<uint8_t> recurrent_to_cell;
|
|
std::vector<uint8_t> input_to_cell;
|
|
std::vector<uint8_t> recurrent_to_forget;
|
|
std::vector<uint8_t> input_to_forget;
|
|
std::vector<uint8_t> recurrent_to_output;
|
|
std::vector<uint8_t> input_to_output;
|
|
tflite::delegate::nnapi::DecomposeQuantLstmWeightsTensor(
|
|
weight_tensor.data.uint8, weight_tensor.dims, &recurrent_to_input,
|
|
&input_to_input, &recurrent_to_cell, &input_to_cell,
|
|
&recurrent_to_forget, &input_to_forget, &recurrent_to_output,
|
|
&input_to_output);
|
|
|
|
TfLiteIntArray* recurrent_weight_dims = TfLiteIntArrayCreate(2);
|
|
TfLiteIntArray* input_weight_dims = TfLiteIntArrayCreate(2);
|
|
tflite::delegate::nnapi::SetWeightSubmatrixDims(
|
|
weight_tensor.dims, recurrent_weight_dims, input_weight_dims);
|
|
|
|
int new_tensor_index = -1;
|
|
|
|
mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
|
|
input_weight_dims, input_to_input, weight_tensor.params,
|
|
&new_tensor_index);
|
|
|
|
mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
|
|
input_weight_dims, input_to_forget, weight_tensor.params,
|
|
&new_tensor_index);
|
|
|
|
mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
|
|
input_weight_dims, input_to_cell, weight_tensor.params,
|
|
&new_tensor_index);
|
|
|
|
mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
|
|
input_weight_dims, input_to_output, weight_tensor.params,
|
|
&new_tensor_index);
|
|
|
|
mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
|
|
recurrent_weight_dims, recurrent_to_input, weight_tensor.params,
|
|
&new_tensor_index);
|
|
|
|
mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
|
|
recurrent_weight_dims, recurrent_to_forget, weight_tensor.params,
|
|
&new_tensor_index);
|
|
|
|
mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
|
|
recurrent_weight_dims, recurrent_to_cell, weight_tensor.params,
|
|
&new_tensor_index);
|
|
|
|
mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
|
|
recurrent_weight_dims, recurrent_to_output, weight_tensor.params,
|
|
&new_tensor_index);
|
|
|
|
TfLiteIntArrayFree(input_weight_dims);
|
|
TfLiteIntArrayFree(recurrent_weight_dims);
|
|
|
|
// Biases have to be split in four.
|
|
const auto bias_size = output_dims->data[1];
|
|
const TfLiteTensor& biases_tensor =
|
|
mapping_args.context->tensors[mapping_args.node->inputs
|
|
->data[3 /* kInputBiases */]];
|
|
|
|
std::vector<int32_t> input_bias;
|
|
std::vector<int32_t> cell_bias;
|
|
std::vector<int32_t> forget_bias;
|
|
std::vector<int32_t> output_bias;
|
|
delegate::nnapi::DecomposeBiasTensor(biases_tensor.data.i32, bias_size,
|
|
&input_bias, &cell_bias,
|
|
&forget_bias, &output_bias);
|
|
|
|
int input_bias_tensor = -1;
|
|
mapping_args.builder->AddNewInputConstantTensor<int32_t>(
|
|
ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, input_bias,
|
|
biases_tensor.params, &input_bias_tensor);
|
|
int forget_bias_tensor = -1;
|
|
mapping_args.builder->AddNewInputConstantTensor(
|
|
ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
|
|
forget_bias, biases_tensor.params, &forget_bias_tensor);
|
|
int cell_gate_bias_tensor = -1;
|
|
mapping_args.builder->AddNewInputConstantTensor(
|
|
ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, cell_bias,
|
|
biases_tensor.params, &cell_gate_bias_tensor);
|
|
int output_gate_bias_tensor = -1;
|
|
mapping_args.builder->AddNewInputConstantTensor(
|
|
ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
|
|
output_bias, biases_tensor.params, &output_gate_bias_tensor);
|
|
|
|
mapping_args.builder->AddTensorInput(
|
|
mapping_args.node->inputs->data[4 /* kInputPrevState */],
|
|
/* hybrid_op */ false,
|
|
/* scalar_as_tensor */ false);
|
|
|
|
// kInputPrevActivation
|
|
mapping_args.builder->AddTensorInput(
|
|
mapping_args.node->inputs->data[1 /* kInputPrevActivation */],
|
|
/* hybrid_op */ false,
|
|
/* scalar_as_tensor */ false);
|
|
|
|
// Configuring the copy from the activation, state outputs
|
|
// to their associated inputs
|
|
mapping_args.feedback_loops->push_back(std::make_tuple(
|
|
mapping_args.node->outputs->data[0 /*kOutputActivation*/],
|
|
mapping_args.node->inputs->data[1 /*kInputPrevActivation*/]));
|
|
|
|
mapping_args.feedback_loops->push_back(std::make_tuple(
|
|
mapping_args.node->outputs->data[1 /*kOutputState*/],
|
|
mapping_args.node->inputs->data[4 /*kInputPrevState*/]));
|
|
|
|
// OUTPUTS
|
|
// Setting only the first two since the remaining ones are
|
|
// ignored by NNAPI
|
|
mapping_args.builder->AddTensorOutput(
|
|
mapping_args.node->outputs->data[1 /* kOutputState */], 0);
|
|
|
|
mapping_args.builder->AddTensorOutput(
|
|
mapping_args.node->outputs->data[0 /* kOutputActivation */], 0);
|
|
|
|
*nn_op_type = ANEURALNETWORKS_QUANTIZED_16BIT_LSTM;
|
|
} else {
|
|
auto builtin = reinterpret_cast<TfLiteLSTMParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
|
|
mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
|
|
|
|
// Current NNAPI implementation requires the scratch_buffer as
|
|
// output.
|
|
mapping_args.builder->AddAdditionalFloat32OutputTensor(2);
|
|
|
|
// NNAPI need both state_in and state_out for cell_state and
|
|
// output_state.
|
|
int ann_index;
|
|
mapping_args.builder->AddStateFloat32Tensor(
|
|
mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 18],
|
|
&ann_index);
|
|
mapping_args.model_state_outputs->push_back(ann_index);
|
|
mapping_args.model_state_tfl_inputs->push_back(
|
|
mapping_args.node->inputs
|
|
->data[/*kInputActivationStateTensor*/ 18]);
|
|
mapping_args.builder->AddStateFloat32Tensor(
|
|
mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19],
|
|
&ann_index);
|
|
mapping_args.model_state_outputs->push_back(ann_index);
|
|
mapping_args.model_state_tfl_inputs->push_back(
|
|
mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]);
|
|
|
|
const bool hybrid_op = IsHybridOperator(
|
|
mapping_args.context, kTfLiteBuiltinLstm, mapping_args.node);
|
|
|
|
if (mapping_args.node->inputs->size == 24) {
|
|
for (int i = 20; i < 24; ++i) {
|
|
const auto input_index = mapping_args.node->inputs->data[i];
|
|
if (input_index != kTfLiteOptionalTensor) {
|
|
mapping_args.builder->AddTensorInput(input_index, hybrid_op);
|
|
} else {
|
|
mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
*nn_op_type = ANEURALNETWORKS_LSTM;
|
|
}
|
|
} break;
|
|
case kTfLiteBuiltinMean: {
|
|
auto builtin = reinterpret_cast<TfLiteReducerParams*>(
|
|
mapping_args.node->builtin_data);
|
|
int32_t keep_dims = 0;
|
|
if (builtin->keep_dims) keep_dims = 1;
|
|
mapping_args.builder->AddScalarInt32Operand(keep_dims);
|
|
*nn_op_type = ANEURALNETWORKS_MEAN;
|
|
} break;
|
|
case kTfLiteBuiltinEmbeddingLookup: {
|
|
*nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP;
|
|
} break;
|
|
case kTfLiteBuiltinHashtableLookup: {
|
|
*nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP;
|
|
} break;
|
|
case kTfLiteBuiltinMaximum: {
|
|
*nn_op_type = ANEURALNETWORKS_MAXIMUM;
|
|
} break;
|
|
case kTfLiteBuiltinMinimum: {
|
|
*nn_op_type = ANEURALNETWORKS_MINIMUM;
|
|
} break;
|
|
case kTfLiteBuiltinCast: {
|
|
*nn_op_type = ANEURALNETWORKS_CAST;
|
|
} break;
|
|
case kTfLiteBuiltinLeakyRelu: {
|
|
const auto input_type =
|
|
mapping_args.context->tensors[mapping_args.node->inputs->data[0]]
|
|
.type;
|
|
auto builtin = reinterpret_cast<TfLiteLeakyReluParams*>(
|
|
mapping_args.node->builtin_data);
|
|
|
|
TfLiteTensor alpha_tensor;
|
|
alpha_tensor.type = input_type;
|
|
alpha_tensor.allocation_type = kTfLiteDynamic;
|
|
alpha_tensor.dims = TfLiteIntArrayCreate(1);
|
|
alpha_tensor.dims->data[0] = 1;
|
|
alpha_tensor.params.zero_point = 0;
|
|
|
|
int new_tensor_index = -1;
|
|
if (input_type == kTfLiteFloat32) {
|
|
alpha_tensor.params.scale = 0;
|
|
std::vector<float> alpha_value = {builtin->alpha};
|
|
mapping_args.builder->AddNewInputConstantTensor(
|
|
ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, alpha_tensor.dims,
|
|
alpha_value, alpha_tensor.params, &new_tensor_index);
|
|
} else {
|
|
alpha_tensor.params.scale = builtin->alpha;
|
|
std::vector<uint8_t> alpha_value = {1};
|
|
mapping_args.builder->AddNewInputConstantTensor(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
|
|
alpha_tensor.dims, alpha_value, alpha_tensor.params,
|
|
&new_tensor_index);
|
|
}
|
|
|
|
*nn_op_type = ANEURALNETWORKS_PRELU;
|
|
} break;
|
|
case kTfLiteBuiltinPrelu: {
|
|
*nn_op_type = ANEURALNETWORKS_PRELU;
|
|
} break;
|
|
case kTfLiteBuiltinTile: {
|
|
*nn_op_type = ANEURALNETWORKS_TILE;
|
|
} break;
|
|
case kTfLiteBuiltinLogicalOr: {
|
|
*nn_op_type = ANEURALNETWORKS_LOGICAL_OR;
|
|
} break;
|
|
case kTfLiteBuiltinLogicalAnd: {
|
|
*nn_op_type = ANEURALNETWORKS_LOGICAL_AND;
|
|
} break;
|
|
case kTfLiteBuiltinLogicalNot: {
|
|
*nn_op_type = ANEURALNETWORKS_LOGICAL_NOT;
|
|
} break;
|
|
case kTfLiteBuiltinLess: {
|
|
*nn_op_type = ANEURALNETWORKS_LESS;
|
|
} break;
|
|
case kTfLiteBuiltinLessEqual: {
|
|
*nn_op_type = ANEURALNETWORKS_LESS_EQUAL;
|
|
} break;
|
|
case kTfLiteBuiltinGreater: {
|
|
*nn_op_type = ANEURALNETWORKS_GREATER;
|
|
} break;
|
|
case kTfLiteBuiltinGreaterEqual: {
|
|
*nn_op_type = ANEURALNETWORKS_GREATER_EQUAL;
|
|
} break;
|
|
case kTfLiteBuiltinEqual: {
|
|
*nn_op_type = ANEURALNETWORKS_EQUAL;
|
|
} break;
|
|
case kTfLiteBuiltinNotEqual: {
|
|
*nn_op_type = ANEURALNETWORKS_NOT_EQUAL;
|
|
} break;
|
|
case kTfLiteBuiltinNeg: {
|
|
*nn_op_type = ANEURALNETWORKS_NEG;
|
|
} break;
|
|
case kTfLiteBuiltinTopkV2: {
|
|
const TfLiteTensor& k_param =
|
|
mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
|
|
mapping_args.builder->AddScalarInt32Operand(*k_param.data.i32);
|
|
*nn_op_type = ANEURALNETWORKS_TOPK_V2;
|
|
} break;
|
|
case kTfLiteBuiltinSelect: {
|
|
*nn_op_type = ANEURALNETWORKS_SELECT;
|
|
} break;
|
|
case kTfLiteBuiltinGather: {
|
|
auto builtin = reinterpret_cast<TfLiteGatherParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->axis);
|
|
mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
|
|
/* hybrid_op */ false,
|
|
/* tensor_flags */ 0);
|
|
*nn_op_type = ANEURALNETWORKS_GATHER;
|
|
} break;
|
|
case kTfLiteBuiltinBidirectionalSequenceLstm: {
|
|
auto builtin = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->activation);
|
|
mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
|
|
mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->merge_outputs);
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
|
|
// TF Lite doesn't support layer normalization in bidirectional
|
|
// sequence LSTM, so we insert optional tensors for NNAPI.
|
|
for (int i = 0; i < 8; ++i) {
|
|
mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
|
|
}
|
|
*nn_op_type = ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM;
|
|
} break;
|
|
case kTfLiteBuiltinExpandDims: {
|
|
const TfLiteTensor& axis_param =
|
|
mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
|
|
mapping_args.builder->AddScalarInt32Operand(*axis_param.data.i32);
|
|
*nn_op_type = ANEURALNETWORKS_EXPAND_DIMS;
|
|
} break;
|
|
case kTfLiteBuiltinSplit: {
|
|
const TfLiteTensor& axis =
|
|
mapping_args.context->tensors[mapping_args.node->inputs->data[0]];
|
|
auto builtin =
|
|
reinterpret_cast<TfLiteSplitParams*>(mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(*axis.data.i32);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->num_splits);
|
|
*nn_op_type = ANEURALNETWORKS_SPLIT;
|
|
} break;
|
|
case kTfLiteBuiltinLogSoftmax: {
|
|
// Scaling and axis are hardcoded to respectively 1 and -1
|
|
// in TFLite.
|
|
mapping_args.builder->AddScalarFloat32Operand(1);
|
|
mapping_args.builder->AddScalarInt32Operand(-1);
|
|
*nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
|
|
} break;
|
|
case kTfLiteBuiltinQuantize: {
|
|
auto input_index = mapping_args.node->inputs->data[0];
|
|
// NNAPI doesn't support requantization cases but only quantizations
|
|
// from float. Dequantizing our input adding a Dequantize node before
|
|
// this one.
|
|
if (IsQuantized(mapping_args.context->tensors[input_index].type)) {
|
|
mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32,
|
|
mapping_args.node_index);
|
|
}
|
|
|
|
*nn_op_type = ANEURALNETWORKS_QUANTIZE;
|
|
} break;
|
|
case kTfLiteBuiltinReduceAny: {
|
|
auto builtin = reinterpret_cast<TfLiteReducerParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
|
|
*nn_op_type = ANEURALNETWORKS_REDUCE_ANY;
|
|
} break;
|
|
case kTfLiteBuiltinReduceMin: {
|
|
auto builtin = reinterpret_cast<TfLiteReducerParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
|
|
*nn_op_type = ANEURALNETWORKS_REDUCE_MIN;
|
|
} break;
|
|
case kTfLiteBuiltinReduceMax: {
|
|
auto builtin = reinterpret_cast<TfLiteReducerParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
|
|
*nn_op_type = ANEURALNETWORKS_REDUCE_MAX;
|
|
} break;
|
|
case kTfLiteBuiltinDepthToSpace: {
|
|
auto builtin = reinterpret_cast<TfLiteDepthToSpaceParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
|
|
*nn_op_type = ANEURALNETWORKS_DEPTH_TO_SPACE;
|
|
} break;
|
|
case kTfLiteBuiltinReduceProd: {
|
|
auto builtin = reinterpret_cast<TfLiteReducerParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
|
|
*nn_op_type = ANEURALNETWORKS_REDUCE_PROD;
|
|
} break;
|
|
case kTfLiteBuiltinSum: {
|
|
auto builtin = reinterpret_cast<TfLiteReducerParams*>(
|
|
mapping_args.node->builtin_data);
|
|
mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
|
|
*nn_op_type = ANEURALNETWORKS_REDUCE_SUM;
|
|
} break;
|
|
case kTfLiteBuiltinElu: {
|
|
mapping_args.builder->AddScalarFloat32Operand(1.0);
|
|
*nn_op_type = ANEURALNETWORKS_ELU;
|
|
} break;
|
|
case kTfLiteBuiltinFill: {
|
|
*nn_op_type = ANEURALNETWORKS_FILL;
|
|
} break;
|
|
default:
|
|
// All other operators are not mapped.
|
|
return kTfLiteError;
|
|
}
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
// Initialize the kernel (a NN model).
|
|
TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
|
|
const TfLiteDelegateParams* params,
|
|
int* nnapi_errno) {
|
|
for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
|
|
nodes_.push_back(node_index);
|
|
}
|
|
|
|
const auto delegate_options =
|
|
StatefulNnApiDelegate::GetOptions(params->delegate);
|
|
if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
|
|
ShouldUseTargetDevices(delegate_options, nnapi_)) {
|
|
TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_,
|
|
nnapi_errno, &nnapi_devices_));
|
|
|
|
if (nnapi_devices_.empty()) {
|
|
context->ReportError(
|
|
context, "NNAPI delegate requested but no accelerators available.");
|
|
return kTfLiteError;
|
|
}
|
|
}
|
|
|
|
// Mark the handle backed tensors.
|
|
tensor_memory_map_ =
|
|
&StatefulNnApiDelegate::GetTensorMemoryMap(params->delegate);
|
|
|
|
if (!nn_model_) {
|
|
ANeuralNetworksModel* model = nullptr;
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
|
|
nnapi_->ANeuralNetworksModel_create(&model),
|
|
"creating NNAPI model", nnapi_errno);
|
|
nn_model_.reset(model);
|
|
|
|
TF_LITE_ENSURE_STATUS(BuildGraph(context, delegate_options,
|
|
params->input_tensors,
|
|
params->output_tensors, nnapi_errno));
|
|
}
|
|
|
|
// Calculating model compilation cache here since the value depends on
|
|
// some of the TfLiteDelegateParams
|
|
nn_compilation_cache_token_.clear();
|
|
const char* cache_dir = delegate_options.cache_dir;
|
|
const char* model_token = delegate_options.model_token;
|
|
if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir &&
|
|
model_token) {
|
|
// Compilation caching could be enabled, try construct the uint8
|
|
// token.
|
|
// TODO(b/133342794): use a generic token generator class.
|
|
uint64_t token_parts[4];
|
|
// Create bits from model_token.
|
|
// TODO(b/172237993): should not use std::hash, as that is not
|
|
// guaranteed to be stable across program invocations.
|
|
token_parts[0] = std::hash<std::string>{}(model_token);
|
|
// Create bits from params->nodes_to_replace.
|
|
token_parts[1] = GetHash(params->nodes_to_replace);
|
|
// Create bits from params->input_tensors. These include the input tensor
|
|
// sizes, as the cached compilations are size-dependent.
|
|
token_parts[2] = GetHash(params->input_tensors);
|
|
for (int i : TfLiteIntArrayView(params->input_tensors)) {
|
|
if (i != kTfLiteOptionalTensor) {
|
|
TfLiteTensor* t = &context->tensors[i];
|
|
TF_LITE_ENSURE(context, t->dims);
|
|
token_parts[2] = GetHash(t->dims, token_parts[2]);
|
|
}
|
|
}
|
|
// bits from params->output_tensors.
|
|
token_parts[3] = GetHash(params->output_tensors);
|
|
// NNAPI requires the token to be 256bit long.
|
|
// TODO(b/172238515): get token size from header instead of
|
|
// hardcoding.
|
|
std::vector<uint8_t> nnapi_cache_token(32, 0);
|
|
// Copy the token bits.
|
|
uint8_t* p = reinterpret_cast<uint8_t*>(token_parts);
|
|
for (int i = 0; i < 4 * sizeof(uint64_t); i++) {
|
|
nnapi_cache_token[i] = p[i];
|
|
}
|
|
|
|
nn_compilation_cache_token_ = nnapi_cache_token;
|
|
}
|
|
|
|
initialised_ = true;
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
|
|
TfLiteNode* node, int* nnapi_errno) {
|
|
if (!initialised_) {
|
|
return kTfLiteError;
|
|
}
|
|
|
|
const auto delegate_options =
|
|
StatefulNnApiDelegate::GetOptions(node->delegate);
|
|
if (nn_compilation_) {
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
ANeuralNetworksCompilation* compilation = nullptr;
|
|
if (!nnapi_devices_.empty()) {
|
|
// Compile for the selected accelerator.
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context,
|
|
nnapi_->ANeuralNetworksCompilation_createForDevices(
|
|
nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
|
|
&compilation),
|
|
"creating NNAPI model for given devices", nnapi_errno);
|
|
} else {
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
|
|
nnapi_->ANeuralNetworksCompilation_create(
|
|
nn_model_.get(), &compilation),
|
|
"creating NNAPI compilation", nnapi_errno);
|
|
}
|
|
|
|
auto preference = delegate_options.execution_preference;
|
|
if (preference !=
|
|
StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) {
|
|
const int preference_result =
|
|
nnapi_->ANeuralNetworksCompilation_setPreference(compilation,
|
|
preference);
|
|
if (preference_result != ANEURALNETWORKS_NO_ERROR) {
|
|
nnapi_->ANeuralNetworksCompilation_free(compilation);
|
|
compilation = nullptr;
|
|
}
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
|
|
"setting compilation preferences",
|
|
nnapi_errno);
|
|
}
|
|
|
|
if (!nn_compilation_cache_token_.empty()) {
|
|
const char* cache_dir = delegate_options.cache_dir;
|
|
const int set_caching_result =
|
|
nnapi_->ANeuralNetworksCompilation_setCaching(
|
|
compilation, cache_dir, nn_compilation_cache_token_.data());
|
|
if (set_caching_result != ANEURALNETWORKS_NO_ERROR) {
|
|
nnapi_->ANeuralNetworksCompilation_free(compilation);
|
|
compilation = nullptr;
|
|
}
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
|
|
"configuring NNAPI caching", nnapi_errno);
|
|
}
|
|
// Set compilation timeout if applicable.
|
|
if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
|
if (delegate_options.max_compilation_timeout_duration_ns > 0) {
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context,
|
|
nnapi_->ANeuralNetworksCompilation_setTimeout(
|
|
compilation,
|
|
delegate_options.max_compilation_timeout_duration_ns),
|
|
"setting compilation timeout", nnapi_errno);
|
|
}
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context,
|
|
nnapi_->ANeuralNetworksCompilation_setPriority(
|
|
compilation, delegate_options.execution_priority),
|
|
"setting compilation priority", nnapi_errno);
|
|
}
|
|
const int finish_result =
|
|
nnapi_->ANeuralNetworksCompilation_finish(compilation);
|
|
if (finish_result != ANEURALNETWORKS_NO_ERROR) {
|
|
nnapi_->ANeuralNetworksCompilation_free(compilation);
|
|
compilation = nullptr;
|
|
}
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result,
|
|
"completing NNAPI compilation", nnapi_errno);
|
|
nn_compilation_.reset(compilation);
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus NNAPIDelegateKernel::GetOperationsSupportedByTargetNnApiDevices(
|
|
TfLiteContext* context, std::vector<int>* supported_nodes,
|
|
int* nnapi_errno) {
|
|
if (!nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices) {
|
|
return kTfLiteError;
|
|
}
|
|
|
|
const auto nnapi_model_size = nnapi_to_tflite_op_mapping_.size();
|
|
|
|
// Determine the list of operations the device actually supports
|
|
std::unique_ptr<bool[]> nnapi_ops_support_flags(new bool[nnapi_model_size]);
|
|
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context,
|
|
nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices(
|
|
nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
|
|
nnapi_ops_support_flags.get()),
|
|
"Checking supported operations for devices", nnapi_errno);
|
|
|
|
// A TfLite op is supported only if all the associated NNAPI ones are.
|
|
auto tflite_ops_support_status = std::map<int, bool>();
|
|
std::for_each(nodes_.begin(), nodes_.end(),
|
|
[&tflite_ops_support_status](int tflite_node_index) {
|
|
tflite_ops_support_status[tflite_node_index] = true;
|
|
});
|
|
for (int nnapi_op_index = 0; nnapi_op_index < nnapi_model_size;
|
|
nnapi_op_index++) {
|
|
const auto tflite_op_index = nnapi_to_tflite_op_mapping_[nnapi_op_index];
|
|
tflite_ops_support_status[tflite_op_index] &=
|
|
nnapi_ops_support_flags[nnapi_op_index];
|
|
}
|
|
|
|
supported_nodes->clear();
|
|
std::for_each(nodes_.begin(), nodes_.end(),
|
|
[&supported_nodes, &tflite_ops_support_status](int node_index) {
|
|
if (tflite_ops_support_status[node_index]) {
|
|
supported_nodes->push_back(node_index);
|
|
}
|
|
});
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
|
|
TfLiteNode* node, int* nnapi_errno) {
|
|
ANeuralNetworksExecution* execution = nullptr;
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
|
|
nnapi_->ANeuralNetworksExecution_create(
|
|
nn_compilation_.get(), &execution),
|
|
"creating NNAPI execution", nnapi_errno);
|
|
std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution>
|
|
execution_unique_ptr(execution, NNFreeExecution(nnapi_));
|
|
|
|
// Set compilation timeout if applicable.
|
|
const auto delegate_options =
|
|
StatefulNnApiDelegate::GetOptions(node->delegate);
|
|
if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
|
if (delegate_options.max_execution_timeout_duration_ns > 0) {
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context,
|
|
nnapi_->ANeuralNetworksExecution_setTimeout(
|
|
execution, delegate_options.max_execution_timeout_duration_ns),
|
|
"setting execution timeout", nnapi_errno);
|
|
}
|
|
if (delegate_options.max_execution_loop_timeout_duration_ns > 0) {
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context,
|
|
nnapi_->ANeuralNetworksExecution_setLoopTimeout(
|
|
execution,
|
|
delegate_options.max_execution_loop_timeout_duration_ns),
|
|
"setting execution loop timeout", nnapi_errno);
|
|
}
|
|
}
|
|
// Check if the size of input and output memory pool needs to be resized.
|
|
if (delegate_options.allow_dynamic_dimensions) {
|
|
size_t total_input_byte_size = 0;
|
|
// Make the TensorFlow Lite inputs and outputs to ann_indices.
|
|
for (int i : TfLiteIntArrayView(node->inputs)) {
|
|
// Constant tensors are not NNAPI inputs.
|
|
if (i != kTfLiteOptionalTensor &&
|
|
context->tensors[i].allocation_type != kTfLiteMmapRo &&
|
|
// The delegate might not have mapped this input (this can
|
|
// happen if one tensor is split in several ones)
|
|
operand_mapping_.lite_index_to_ann(i) != -1) {
|
|
if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
|
|
continue;
|
|
}
|
|
const TfLiteType nn_type_conversion =
|
|
operand_mapping_.lite_index_to_ann_type_conversion(i);
|
|
int tensor_size = 0;
|
|
if (nn_type_conversion == kTfLiteNoType) {
|
|
tensor_size = context->tensors[i].bytes;
|
|
} else {
|
|
size_t type_size;
|
|
TF_LITE_ENSURE_OK(
|
|
context, GetSizeOfType(context, nn_type_conversion, &type_size));
|
|
tensor_size = NumElements(&context->tensors[i]) * type_size;
|
|
}
|
|
total_input_byte_size += tensor_size;
|
|
total_input_byte_size += getNumPaddingBytes(tensor_size);
|
|
}
|
|
}
|
|
if (total_input_byte_size > nn_input_memory_->get_byte_size()) {
|
|
nn_input_memory_.reset(
|
|
new NNMemory(nnapi_, "input_pool", total_input_byte_size));
|
|
}
|
|
|
|
size_t total_output_byte_size = 0;
|
|
for (int i : TfLiteIntArrayView(node->outputs)) {
|
|
if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
|
|
continue;
|
|
}
|
|
total_output_byte_size += context->tensors[i].bytes;
|
|
total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
|
|
}
|
|
if (total_output_byte_size > nn_output_memory_->get_byte_size()) {
|
|
nn_output_memory_.reset(
|
|
new NNMemory(nnapi_, "output_pool", total_output_byte_size));
|
|
}
|
|
}
|
|
|
|
// Set the input tensor buffers. Note: we access tflite tensors using
|
|
// absolute indices but NN api indices inputs by relative indices.
|
|
int relative_input_index = 0;
|
|
|
|
const bool use_int8_asymm_signed =
|
|
target_sdk_version_ >= kMinSdkVersionForNNAPI13;
|
|
|
|
size_t input_offset = 0;
|
|
for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
|
|
if (absolute_input_index == kTfLiteOptionalTensor) {
|
|
continue;
|
|
}
|
|
ANeuralNetworksOperandType input_nn_operand_type;
|
|
ANeuralNetworksOperandType* input_nn_operand_type_ptr = nullptr;
|
|
TfLiteTensor* tensor = &context->tensors[absolute_input_index];
|
|
TfLiteType ann_type_equivalent =
|
|
operand_mapping_.lite_index_to_ann_type_conversion(
|
|
absolute_input_index);
|
|
if (delegate_options.allow_dynamic_dimensions &&
|
|
HasUnspecifiedDimension(tensor)) {
|
|
input_nn_operand_type =
|
|
ConvertTensorTypeToNNType(tensor, ann_type_equivalent);
|
|
input_nn_operand_type_ptr = &input_nn_operand_type;
|
|
}
|
|
if (tensor->allocation_type != kTfLiteMmapRo) {
|
|
if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
|
|
tensor->buffer_handle < tensor_memory_map_->size()) {
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
|
|
context,
|
|
nnapi_->ANeuralNetworksExecution_setInputFromMemory(
|
|
execution, relative_input_index, input_nn_operand_type_ptr,
|
|
tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
|
|
tensor->bytes),
|
|
"associating NNAPI execution input with a memory object", tensor,
|
|
nnapi_errno);
|
|
relative_input_index++;
|
|
continue;
|
|
}
|
|
int tensor_size = 0;
|
|
if (ann_type_equivalent != kTfLiteNoType) {
|
|
const auto num_elements = NumElements(tensor);
|
|
uint8_t* input_ptr = nn_input_memory_->get_data_ptr() + input_offset;
|
|
if (tensor->type == kTfLiteUInt8 &&
|
|
ann_type_equivalent == kTfLiteInt32) {
|
|
for (int i = 0; i < num_elements; ++i) {
|
|
reinterpret_cast<int32_t*>(input_ptr)[i] =
|
|
static_cast<const int32_t>(tensor->data.uint8[i]);
|
|
}
|
|
} else if (tensor->type == kTfLiteInt8 &&
|
|
ann_type_equivalent == kTfLiteUInt8) {
|
|
// Explicitly convert int8 values to uint8 values.
|
|
for (int i = 0; i < num_elements; ++i) {
|
|
input_ptr[i] = static_cast<const uint8_t>(
|
|
static_cast<int32_t>(tensor->data.int8[i]) + 128);
|
|
}
|
|
} else if (tensor->type == kTfLiteInt8 &&
|
|
ann_type_equivalent == kTfLiteInt32) {
|
|
if (use_int8_asymm_signed) {
|
|
for (int i = 0; i < num_elements; ++i) {
|
|
reinterpret_cast<int32_t*>(input_ptr)[i] =
|
|
static_cast<const int32_t>(tensor->data.int8[i]);
|
|
}
|
|
} else {
|
|
for (int i = 0; i < num_elements; ++i) {
|
|
reinterpret_cast<int32_t*>(input_ptr)[i] =
|
|
static_cast<const int32_t>(tensor->data.int8[i]) + 128;
|
|
}
|
|
}
|
|
} else {
|
|
context->ReportError(
|
|
context,
|
|
"NN API Delegate: unsupported tensor types conversion: "
|
|
"from type code %d to type code %d.\n",
|
|
tensor->type, ann_type_equivalent);
|
|
return kTfLiteError;
|
|
}
|
|
size_t type_size;
|
|
TF_LITE_ENSURE_OK(
|
|
context, GetSizeOfType(context, ann_type_equivalent, &type_size));
|
|
tensor_size = NumElements(tensor) * type_size;
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
|
|
context,
|
|
nnapi_->ANeuralNetworksExecution_setInputFromMemory(
|
|
execution, relative_input_index, input_nn_operand_type_ptr,
|
|
nn_input_memory_->get_handle(), input_offset, tensor_size),
|
|
"associating NNAPI execution input with a memory object", tensor,
|
|
nnapi_errno);
|
|
} else {
|
|
// copy data to pre-allocated shared memory.
|
|
memcpy(nn_input_memory_->get_data_ptr() + input_offset,
|
|
tensor->data.raw, tensor->bytes);
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
|
|
context,
|
|
nnapi_->ANeuralNetworksExecution_setInputFromMemory(
|
|
execution, relative_input_index, input_nn_operand_type_ptr,
|
|
nn_input_memory_->get_handle(), input_offset, tensor->bytes),
|
|
"associating NNAPI execution input with a memory object", tensor,
|
|
nnapi_errno);
|
|
tensor_size = tensor->bytes;
|
|
}
|
|
input_offset += tensor_size;
|
|
input_offset += getNumPaddingBytes(tensor_size);
|
|
relative_input_index++;
|
|
}
|
|
}
|
|
|
|
// Set the output tensor buffers.
|
|
int relative_output_index = 0;
|
|
size_t output_offset = 0;
|
|
for (auto output_index : TfLiteIntArrayView(node->outputs)) {
|
|
// If the NNAPI implementation doesn't have some of the outputs
|
|
// they are left unmapped and we should not try to read their value here
|
|
if (operand_mapping_.lite_index_to_ann(output_index) == -1) {
|
|
continue;
|
|
}
|
|
ANeuralNetworksOperandType output_nn_operand_type;
|
|
ANeuralNetworksOperandType* output_nn_operand_type_ptr = nullptr;
|
|
TfLiteTensor* tensor = &context->tensors[output_index];
|
|
if (delegate_options.allow_dynamic_dimensions &&
|
|
HasUnspecifiedDimension(tensor)) {
|
|
TfLiteType ann_type_equivalent =
|
|
operand_mapping_.lite_index_to_ann_type_conversion(output_index);
|
|
output_nn_operand_type =
|
|
ConvertTensorTypeToNNType(tensor, ann_type_equivalent);
|
|
output_nn_operand_type_ptr = &output_nn_operand_type;
|
|
}
|
|
if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
|
|
tensor->buffer_handle < tensor_memory_map_->size()) {
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
|
|
context,
|
|
nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
|
|
execution, relative_output_index, output_nn_operand_type_ptr,
|
|
tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
|
|
tensor->bytes),
|
|
"associating NNAPI execution output to a memory object", tensor,
|
|
nnapi_errno);
|
|
|
|
} else {
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
|
|
context,
|
|
nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
|
|
execution, relative_output_index, output_nn_operand_type_ptr,
|
|
nn_output_memory_->get_handle(), output_offset, tensor->bytes),
|
|
"associating NNAPI execution output to a memory object", tensor,
|
|
nnapi_errno);
|
|
output_offset += tensor->bytes;
|
|
output_offset += getNumPaddingBytes(tensor->bytes);
|
|
}
|
|
relative_output_index++;
|
|
}
|
|
|
|
// The state_out of previous invocation need to be mapped to state_in of
|
|
// current invocation.
|
|
for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
|
|
int state_tensor_idx = model_state_tfl_inputs_[i];
|
|
TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
|
|
// Here we are using a deep copy for state_in tensors so that we are not
|
|
// reading and writing into the same buffer during a invocation.
|
|
// TODO(b/110369471): using double shared buffer to minimize the copies.
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context,
|
|
nnapi_->ANeuralNetworksExecution_setOutput(
|
|
execution, relative_output_index, nullptr, tensor->data.raw,
|
|
tensor->bytes),
|
|
"associating NNAPI execution output to a buffer", nnapi_errno);
|
|
relative_output_index++;
|
|
}
|
|
// Invoke ANN in blocking fashion.
|
|
if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) {
|
|
ANeuralNetworksEvent* event = nullptr;
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context,
|
|
nnapi_->ANeuralNetworksExecution_startCompute(execution, &event),
|
|
"starting async computation", nnapi_errno);
|
|
const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event);
|
|
nnapi_->ANeuralNetworksEvent_free(event);
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result,
|
|
"waiting for async computation completion",
|
|
nnapi_errno);
|
|
} else {
|
|
// Use synchronous execution for NNAPI 1.2+.
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context, nnapi_->ANeuralNetworksExecution_compute(execution),
|
|
"running computation", nnapi_errno);
|
|
}
|
|
|
|
// copy results from shared memory to the destination.
|
|
output_offset = 0;
|
|
for (auto output_index : TfLiteIntArrayView(node->outputs)) {
|
|
TfLiteTensor* tensor = &context->tensors[output_index];
|
|
if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
|
|
continue;
|
|
}
|
|
TfLiteType ann_type_equivalent =
|
|
operand_mapping_.lite_index_to_ann_type_conversion(output_index);
|
|
if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) {
|
|
// Explicitly convert uint8 values to int8 values.
|
|
uint8_t* output_ptr = reinterpret_cast<uint8_t*>(
|
|
nn_output_memory_->get_data_ptr() + output_offset);
|
|
const auto num_elements = NumElements(tensor);
|
|
for (int i = 0; i < num_elements; ++i) {
|
|
output_ptr[i] =
|
|
static_cast<uint8_t>(static_cast<int32_t>(output_ptr[i]) - 128);
|
|
}
|
|
}
|
|
memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
|
|
tensor->bytes);
|
|
output_offset += tensor->bytes;
|
|
output_offset += getNumPaddingBytes(tensor->bytes);
|
|
}
|
|
|
|
// copy output of all output tensors in feedback_loops_ into the
|
|
// associated input
|
|
for (auto feedback_loop : feedback_loops_) {
|
|
int output_tensor_idx;
|
|
int input_tensor_idx;
|
|
std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop;
|
|
TfLiteTensor& src = context->tensors[output_tensor_idx];
|
|
TfLiteTensor& dest = context->tensors[input_tensor_idx];
|
|
|
|
memcpy(dest.data.raw, src.data.raw, src.bytes);
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
void NNAPIDelegateKernel::AddDequantizeOperatorsWhereNeeded(
|
|
const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
|
|
int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno) {
|
|
// Depending on the operator and the input data format, Dequantize
|
|
// operators may need to be added. For example when the input is
|
|
// floating-point but weights are quantized then the weights will first be
|
|
// dequantized to the same format as the input before being passed to the
|
|
// operator.
|
|
|
|
// The tensor determining whether the inputs should be floating-point.
|
|
int input_tensor_index = -1;
|
|
std::vector<int> inputs_to_potentially_dequantize;
|
|
|
|
switch (builtin_code) {
|
|
case kTfLiteBuiltinConv2d:
|
|
case kTfLiteBuiltinFullyConnected: {
|
|
input_tensor_index = 0;
|
|
// Weights and bias are inputs #1 and #2 respectively and may require
|
|
// dequantization.
|
|
inputs_to_potentially_dequantize = {1, 2};
|
|
break;
|
|
}
|
|
case kTfLiteBuiltinLstm: {
|
|
input_tensor_index = 0;
|
|
inputs_to_potentially_dequantize = {1, 2, 3, 4, 5, 6, 7,
|
|
8, 9, 10, 11, 12, 13, 14,
|
|
15, 16, 17, 20, 21, 22, 23};
|
|
break;
|
|
}
|
|
default:
|
|
return;
|
|
}
|
|
|
|
int tensor_id = node->inputs->data[input_tensor_index];
|
|
if (tensor_id < 0) return;
|
|
|
|
// Nothing to do if the input is not floating-point.
|
|
if (!IsFloat(context->tensors[tensor_id].type)) return;
|
|
|
|
for (int i : inputs_to_potentially_dequantize) {
|
|
if (i < 0 || i >= node->inputs->size) continue; // Ignore invalid index.
|
|
tensor_id = node->inputs->data[i];
|
|
if (tensor_id < 0) continue; // Ignore optional input.
|
|
|
|
const TfLiteType type = context->tensors[tensor_id].type;
|
|
// Nothing to do for this tensor if it's not quantized.
|
|
if (!IsQuantized(type)) continue;
|
|
|
|
// Insert Dequantize operator if it hasn't been done already and change
|
|
// the node's input accordingly.
|
|
builder->AddDequantize(i, node->inputs->data[i], type, tflite_node_index);
|
|
}
|
|
}
|
|
|
|
TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
|
|
TfLiteContext* context, int* nnapi_errno, bool allow_dynamic_dimensions) {
|
|
DequantizeMapping dequantize_mapping;
|
|
// The operand builder allows creating a single op. It is created outside
|
|
// the for loop to avoid reallocating the vectors.
|
|
NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_,
|
|
&dequantize_mapping, &allocation_memory_mapping_,
|
|
&nnapi_to_tflite_op_mapping_, nn_model_.get(),
|
|
nnapi_errno, allow_dynamic_dimensions);
|
|
// If we have target accelerators the target SDK version might be
|
|
// different than the current android version.
|
|
target_sdk_version_ = nnapi_->android_sdk_version;
|
|
if (!nnapi_devices_.empty()) {
|
|
TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
|
|
context, nnapi_, nnapi_devices_, &target_sdk_version_, nnapi_errno));
|
|
}
|
|
// Add Tensors.
|
|
for (auto node_index : nodes_) {
|
|
// Obtain the op and registration.
|
|
TfLiteNode* node;
|
|
TfLiteRegistration* reg;
|
|
TF_LITE_ENSURE_STATUS(
|
|
context->GetNodeAndRegistration(context, node_index, &node, ®));
|
|
|
|
// Fully quantized full LSTM.
|
|
if (target_sdk_version_ >= kMinSdkVersionForNNAPI13 &&
|
|
reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
|
|
context->tensors[node->inputs->data[0]].type == kTfLiteInt8) {
|
|
const auto quant8_full_lstm_op_code = ANEURALNETWORKS_QUANTIZED_LSTM;
|
|
|
|
constexpr int kInputTensor = 0;
|
|
constexpr int kInputToInputWeightsTensor = 1;
|
|
constexpr int kRecurrentToInputWeightsTensor = 5;
|
|
constexpr int kInputGateBiasTensor = 12;
|
|
constexpr int kForgetGateBiasTensor = 13;
|
|
constexpr int kCellGateBiasTensor = 14;
|
|
constexpr int kOutputGateBiasTensor = 15;
|
|
constexpr int kProjectionWeightsTensor = 16;
|
|
constexpr int kProjectionBiasTensor = 17;
|
|
constexpr int kPrevOutputTensor = 18;
|
|
|
|
// Add input tensors.
|
|
for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
|
|
const auto input_index = node->inputs->data[input_pos];
|
|
if (input_index == kTfLiteOptionalTensor) {
|
|
if (input_pos == kInputToInputWeightsTensor ||
|
|
input_pos == kRecurrentToInputWeightsTensor ||
|
|
input_pos == kProjectionWeightsTensor) {
|
|
TF_LITE_ENSURE_STATUS(builder.AddVectorInt8Operand(nullptr, 0));
|
|
} else if (input_pos == kInputGateBiasTensor ||
|
|
input_pos == kForgetGateBiasTensor ||
|
|
input_pos == kCellGateBiasTensor ||
|
|
input_pos == kOutputGateBiasTensor ||
|
|
input_pos == kProjectionBiasTensor) {
|
|
TF_LITE_ENSURE_STATUS(builder.AddVectorInt32Operand(nullptr, 0));
|
|
} else { // cell-to-* and layer norm weights.
|
|
TF_LITE_ENSURE_STATUS(builder.AddVectorInt16Operand(nullptr, 0));
|
|
}
|
|
} else {
|
|
// Only input and previous output use INT8_ASYM_SIGNED.
|
|
int flags =
|
|
(input_pos == kInputTensor || input_pos == kPrevOutputTensor)
|
|
? NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED
|
|
: 0;
|
|
TF_LITE_ENSURE_STATUS(
|
|
builder.AddTensorInput(input_index, /*hybrid_op=*/false, flags));
|
|
}
|
|
}
|
|
|
|
// Add clip parameters.
|
|
auto builtin = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
|
|
TF_LITE_ENSURE_STATUS(
|
|
builder.AddScalarFloat32Operand(builtin->cell_clip));
|
|
TF_LITE_ENSURE_STATUS(
|
|
builder.AddScalarFloat32Operand(builtin->proj_clip));
|
|
|
|
// Add quantization parameters for intermediate tensors.
|
|
TF_LITE_ENSURE_EQ(context, node->intermediates->size, 5);
|
|
for (int intermediate_pos = 0;
|
|
intermediate_pos < node->intermediates->size; ++intermediate_pos) {
|
|
const auto intermediate_index =
|
|
node->intermediates->data[intermediate_pos];
|
|
const TfLiteTensor& tensor = context->tensors[intermediate_index];
|
|
TfLiteAffineQuantization* quantization_params =
|
|
static_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
|
|
if (intermediate_pos == 4) {
|
|
TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
|
|
quantization_params->zero_point->data[0]));
|
|
}
|
|
TF_LITE_ENSURE_STATUS(builder.AddScalarFloat32Operand(
|
|
quantization_params->scale->data[0]));
|
|
}
|
|
|
|
// Activation state output.
|
|
int ann_index;
|
|
builder.AddStateInt8AsymTensor(
|
|
node->inputs->data[/*kInputActivationStateTensor*/ 18], &ann_index);
|
|
model_state_outputs_.push_back(ann_index);
|
|
model_state_tfl_inputs_.push_back(
|
|
node->inputs->data[/*kInputActivationStateTensor*/ 18]);
|
|
|
|
// Cell state output.
|
|
builder.AddStateInt16Tensor(
|
|
node->inputs->data[/*kInputCellStateTensor*/ 19], &ann_index);
|
|
model_state_outputs_.push_back(ann_index);
|
|
model_state_tfl_inputs_.push_back(
|
|
node->inputs->data[/*kInputCellStateTensor*/ 19]);
|
|
|
|
// Add output tensors.
|
|
for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
|
|
const auto output_index = node->outputs->data[output_pos];
|
|
TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(
|
|
output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
|
|
}
|
|
|
|
builder.FinalizeAddOperation(quant8_full_lstm_op_code, node_index);
|
|
continue;
|
|
}
|
|
|
|
const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
|
|
const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
|
|
const bool need_int8_conversion =
|
|
target_sdk_version_ < kMinSdkVersionForNNAPI13 &&
|
|
NeedInt8Conversion(context, reg->builtin_code, node);
|
|
const bool use_int8_asymm_signed =
|
|
target_sdk_version_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
|
|
|
|
int input_tensor_flags = 0;
|
|
if (scalar_as_tensor) {
|
|
input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
|
|
}
|
|
if (use_int8_asymm_signed) {
|
|
input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
|
|
}
|
|
|
|
// On SDK level less than 30, h_swish will be lowered into supported NNAPI
|
|
// operations. Since SDK level 30, h_swish is supported as a single
|
|
// operation.
|
|
if (reg->builtin_code == kTfLiteBuiltinHardSwish &&
|
|
nnapi_->android_sdk_version < kMinSdkVersionForNNAPI13) {
|
|
builder.TransformHardSwishIntoSupportedOps(
|
|
node->inputs->data[0], node->outputs->data[0], need_int8_conversion,
|
|
node_index);
|
|
continue;
|
|
}
|
|
// Map inputs to NN API tensor indices.
|
|
for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
|
|
const auto input_index = node->inputs->data[input_pos];
|
|
if (need_int8_conversion &&
|
|
(input_pos == 0 ||
|
|
reg->builtin_code == kTfLiteBuiltinFullyConnected ||
|
|
reg->builtin_code == kTfLiteBuiltinConv2d ||
|
|
reg->builtin_code == kTfLiteBuiltinDepthwiseConv2d ||
|
|
reg->builtin_code == kTfLiteBuiltinAdd ||
|
|
reg->builtin_code == kTfLiteBuiltinMul ||
|
|
reg->builtin_code == kTfLiteBuiltinSub ||
|
|
reg->builtin_code == kTfLiteBuiltinConcatenation ||
|
|
reg->builtin_code == kTfLiteBuiltinMaximum ||
|
|
reg->builtin_code == kTfLiteBuiltinMinimum ||
|
|
reg->builtin_code == kTfLiteBuiltinLess ||
|
|
reg->builtin_code == kTfLiteBuiltinLessEqual ||
|
|
reg->builtin_code == kTfLiteBuiltinGreater ||
|
|
reg->builtin_code == kTfLiteBuiltinGreaterEqual ||
|
|
reg->builtin_code == kTfLiteBuiltinEqual ||
|
|
reg->builtin_code == kTfLiteBuiltinNotEqual ||
|
|
reg->builtin_code == kTfLiteBuiltinSelect)) {
|
|
// Only selected inputs require int8 conversion.
|
|
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(
|
|
input_index, hybrid_op,
|
|
input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION));
|
|
continue;
|
|
}
|
|
if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
|
|
input_pos >= 20) {
|
|
// Skip layer normalization weights. They are added in the Map
|
|
// function (after all the other inputs added there) since layer
|
|
// normalization weights are the last four inputs of the LSTM op in
|
|
// NNAPI.
|
|
continue;
|
|
}
|
|
if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
|
|
// Configuring all inputs in the Map function
|
|
continue;
|
|
}
|
|
if (reg->builtin_code == kTfLiteBuiltinUnidirectionalSequenceLstm) {
|
|
if (input_pos >= 20) {
|
|
// Skip layer normalization weights. They are added in the Map
|
|
// function (after all the other inputs added there) since layer
|
|
// normalization weights are the last four inputs of the
|
|
// unidirectional sequence LSTM op in NNAPI.
|
|
continue;
|
|
}
|
|
if (input_index == kTfLiteOptionalTensor) {
|
|
TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
|
|
continue;
|
|
}
|
|
}
|
|
if ((reg->builtin_code == kTfLiteBuiltinSplit) &&
|
|
(input_index == node->inputs->data[0])) {
|
|
// Skip the axis input tensor; it will be added as a scalar operand
|
|
// by the Map() mapping.
|
|
continue;
|
|
}
|
|
if (reg->builtin_code == kTfLiteBuiltinTransposeConv) {
|
|
// Everything is added during Map since input tensors
|
|
// have different order.
|
|
continue;
|
|
}
|
|
|
|
// Pad and Padv2 have an optional parameter for a pad value which has
|
|
// to be converted to a scalar type in NN API.
|
|
if ((reg->builtin_code == kTfLiteBuiltinPadv2 ||
|
|
reg->builtin_code == kTfLiteBuiltinPad) &&
|
|
node->inputs->size == 3 && input_pos == 2) {
|
|
const int constant_value_id = node->inputs->data[2];
|
|
if (constant_value_id == kTfLiteOptionalTensor) {
|
|
continue;
|
|
}
|
|
const TfLiteTensor constant_value = context->tensors[constant_value_id];
|
|
|
|
switch (constant_value.type) {
|
|
case kTfLiteFloat32:
|
|
if (constant_value.allocation_type == kTfLiteMmapRo) {
|
|
builder.AddScalarFloat32Operand(*constant_value.data.f);
|
|
} else {
|
|
builder.AddSingleValueTensorAsScalarOperand(
|
|
constant_value_id, ANEURALNETWORKS_FLOAT32);
|
|
}
|
|
break;
|
|
case kTfLiteUInt8:
|
|
if (constant_value.allocation_type == kTfLiteMmapRo) {
|
|
builder.AddScalarInt32Operand(
|
|
static_cast<int32_t>(*constant_value.data.uint8));
|
|
} else {
|
|
builder.AddSingleValueTensorAsScalarOperand(
|
|
constant_value_id, ANEURALNETWORKS_INT32);
|
|
}
|
|
break;
|
|
case kTfLiteInt8:
|
|
if (constant_value.allocation_type == kTfLiteMmapRo) {
|
|
if (need_int8_conversion) {
|
|
builder.AddScalarInt32Operand(
|
|
static_cast<int32_t>(*constant_value.data.int8) + 128);
|
|
} else {
|
|
builder.AddScalarInt32Operand(*constant_value.data.int8);
|
|
}
|
|
} else {
|
|
builder.AddSingleValueTensorAsScalarOperand(
|
|
constant_value_id, ANEURALNETWORKS_INT32);
|
|
}
|
|
break;
|
|
default:
|
|
context->ReportError(context,
|
|
"Unsupported type of pad value for pad_v2\n");
|
|
return kTfLiteError;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (input_index == kTfLiteOptionalTensor &&
|
|
(reg->builtin_code == kTfLiteBuiltinLstm ||
|
|
reg->builtin_code == kTfLiteBuiltinSvdf ||
|
|
reg->builtin_code == kTfLiteBuiltinBidirectionalSequenceLstm)) {
|
|
// properly handle the optional tensor for LSTM and SVDF.
|
|
// currently only support float32.
|
|
TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
|
|
} else if (reg->builtin_code == kTfLiteBuiltinResizeBilinear ||
|
|
reg->builtin_code == kTfLiteBuiltinResizeNearestNeighbor) {
|
|
if (input_pos == 0) {
|
|
// Only the first input tensor is added. The second one,
|
|
// specifying the output height and width, is not added and
|
|
// instead the height and width will be added individually as
|
|
// scalars by the mapping function returned by Map().
|
|
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
|
|
input_tensor_flags));
|
|
}
|
|
} else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
|
|
// The K parameter tensor is not handled here but by the functor
|
|
// returned by Map, the input tensor is instead added in
|
|
// the else clause below
|
|
continue;
|
|
} else if (reg->builtin_code == kTfLiteBuiltinGather) {
|
|
// Everything else is added during Map since input tensors
|
|
// have different order.
|
|
if (input_pos == 0) {
|
|
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
|
|
input_tensor_flags));
|
|
}
|
|
continue;
|
|
} else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
|
|
input_pos == 1) {
|
|
// The axis param is added during Map
|
|
continue;
|
|
} else if (reg->builtin_code == kTfLiteBuiltinBatchToSpaceNd &&
|
|
input_pos == 2) {
|
|
// NNAPI does not support crops.
|
|
// The Map function will check if all crops are zero.
|
|
continue;
|
|
} else if (reg->builtin_code == kTfLiteBuiltinArgMin ||
|
|
reg->builtin_code == kTfLiteBuiltinArgMax) {
|
|
// The first input tensor is added as is. The second one, specifying
|
|
// the axis, needs to be converted to a scalar since TFLite uses a
|
|
// tensor but NNAPI uses a scalar as the axis.
|
|
if (input_pos == 0) {
|
|
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
|
|
input_tensor_flags));
|
|
} else {
|
|
const int axis_id = node->inputs->data[1];
|
|
const TfLiteTensor& axis_tensor = context->tensors[axis_id];
|
|
switch (axis_tensor.type) {
|
|
case kTfLiteInt32:
|
|
if (axis_tensor.allocation_type == kTfLiteMmapRo) {
|
|
TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
|
|
static_cast<int32_t>(*axis_tensor.data.i32)));
|
|
} else {
|
|
TF_LITE_ENSURE_STATUS(
|
|
builder.AddSingleValueTensorAsScalarOperand(
|
|
axis_id, ANEURALNETWORKS_INT32));
|
|
}
|
|
break;
|
|
case kTfLiteInt64:
|
|
// Map() function already makes sure int64 input is constant.
|
|
TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
|
|
static_cast<int32_t>(*axis_tensor.data.i64)));
|
|
break;
|
|
default:
|
|
return kTfLiteError;
|
|
}
|
|
}
|
|
} else if (reg->builtin_code == kTfLiteBuiltinMaximum ||
|
|
reg->builtin_code == kTfLiteBuiltinMinimum) {
|
|
const TfLiteTensor& operand_tensor =
|
|
context->tensors[node->inputs->data[input_pos]];
|
|
if (operand_tensor.dims->size == 0) {
|
|
int tensor_index;
|
|
|
|
TF_LITE_ENSURE_EQ(context, operand_tensor.allocation_type,
|
|
kTfLiteMmapRo);
|
|
switch (operand_tensor.type) {
|
|
case kTfLiteFloat32:
|
|
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
|
ANEURALNETWORKS_TENSOR_FLOAT32, operand_tensor.type, {1},
|
|
std::vector<float>(1, operand_tensor.data.f[0]),
|
|
operand_tensor.params, &tensor_index));
|
|
break;
|
|
case kTfLiteUInt8:
|
|
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type, {1},
|
|
std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
|
|
operand_tensor.params, &tensor_index));
|
|
break;
|
|
case kTfLiteInt8: {
|
|
auto params = operand_tensor.params;
|
|
if (params.scale == 0.0) {
|
|
params.scale = 1.0;
|
|
}
|
|
|
|
if (use_int8_asymm_signed) {
|
|
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
|
|
operand_tensor.type, {1},
|
|
std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
|
|
&tensor_index));
|
|
} else {
|
|
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
|
|
{1},
|
|
std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
|
|
params, &tensor_index));
|
|
}
|
|
} break;
|
|
case kTfLiteInt32:
|
|
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
|
ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
|
|
std::vector<int32_t>(1, operand_tensor.data.i32[0]),
|
|
operand_tensor.params, &tensor_index));
|
|
break;
|
|
default:
|
|
return kTfLiteError;
|
|
}
|
|
} else {
|
|
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
|
|
input_tensor_flags));
|
|
}
|
|
} else if ((reg->builtin_code == kTfLiteBuiltinReduceAny ||
|
|
reg->builtin_code == kTfLiteBuiltinReduceMax ||
|
|
reg->builtin_code == kTfLiteBuiltinReduceMin ||
|
|
reg->builtin_code == kTfLiteBuiltinReduceProd ||
|
|
reg->builtin_code == kTfLiteBuiltinSum) &&
|
|
(input_pos == 1)) {
|
|
// The axis needs, be converted to a tensor if specified as scalar
|
|
const TfLiteTensor& axis_tensor =
|
|
context->tensors[node->inputs->data[input_pos]];
|
|
if (axis_tensor.dims->size == 0) {
|
|
TF_LITE_ENSURE_STATUS(
|
|
builder.AddVectorInt32Operand(axis_tensor.data.i32, 1));
|
|
} else {
|
|
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
|
|
input_tensor_flags));
|
|
}
|
|
} else if (reg->builtin_code == kTfLiteBuiltinFill) {
|
|
if (input_pos == 0) {
|
|
const int dims_id = node->inputs->data[0];
|
|
const TfLiteTensor& dims_tensor = context->tensors[dims_id];
|
|
switch (dims_tensor.type) {
|
|
case kTfLiteInt32:
|
|
TF_LITE_ENSURE_STATUS(
|
|
builder.AddTensorInput(input_index, hybrid_op));
|
|
break;
|
|
case kTfLiteInt64: {
|
|
// We made sure that dimensions are constant and fit into int32
|
|
// in Map(), so we can safely create a new tensor with casted
|
|
// values.
|
|
const int dims_size = dims_tensor.dims->data[0];
|
|
std::vector<int32_t> dims_int32(dims_size);
|
|
std::copy(dims_tensor.data.i64, dims_tensor.data.i64 + dims_size,
|
|
dims_int32.begin());
|
|
int new_tensor_index = -1;
|
|
builder.AddNewInputConstantTensor(
|
|
ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, dims_tensor.dims,
|
|
dims_int32, dims_tensor.params, &new_tensor_index);
|
|
} break;
|
|
default:
|
|
return kTfLiteError;
|
|
}
|
|
} else {
|
|
const int value_id = node->inputs->data[1];
|
|
const TfLiteTensor& value_tensor = context->tensors[value_id];
|
|
switch (value_tensor.type) {
|
|
case kTfLiteFloat32:
|
|
TF_LITE_ENSURE_STATUS(
|
|
builder.AddScalarFloat32Operand(*value_tensor.data.f));
|
|
break;
|
|
case kTfLiteInt32:
|
|
TF_LITE_ENSURE_STATUS(
|
|
builder.AddScalarInt32Operand(*value_tensor.data.i32));
|
|
break;
|
|
case kTfLiteInt64:
|
|
// Map() function already makes sure int64 input is constant and
|
|
// fits into int32.
|
|
TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
|
|
static_cast<int32_t>(*value_tensor.data.i64)));
|
|
break;
|
|
default:
|
|
return kTfLiteError;
|
|
}
|
|
}
|
|
} else {
|
|
TF_LITE_ENSURE_STATUS(
|
|
builder.AddTensorInput(input_index, hybrid_op, input_tensor_flags));
|
|
}
|
|
}
|
|
|
|
// Get op type and operands
|
|
// Fails if the Validate function failed
|
|
int nn_op_type;
|
|
TF_LITE_ENSURE_STATUS(
|
|
Map(context, reg->builtin_code, reg->version, target_sdk_version_,
|
|
{context, &builder, node, node_index, &model_state_outputs_,
|
|
&model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
|
|
&nn_op_type));
|
|
|
|
// Map outputs to NN API tensor indices.
|
|
int output_tensor_flags = 0;
|
|
if (need_int8_conversion) {
|
|
output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
|
|
}
|
|
if (use_int8_asymm_signed) {
|
|
output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
|
|
}
|
|
for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
|
|
const auto output_index = node->outputs->data[output_pos];
|
|
|
|
// Outputs for basic LSTM cell are set in the Map function since
|
|
if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
|
|
continue;
|
|
}
|
|
|
|
TF_LITE_ENSURE_STATUS(
|
|
builder.AddTensorOutput(output_index, output_tensor_flags));
|
|
}
|
|
|
|
// Dequantize operators may have to be added in case inputs are to be
|
|
// floating-point.
|
|
AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node,
|
|
node_index, &builder, nnapi_errno);
|
|
|
|
TF_LITE_ENSURE_OK(context_,
|
|
builder.FinalizeAddOperation(nn_op_type, node_index));
|
|
}
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus NNAPIDelegateKernel::BuildGraph(
|
|
TfLiteContext* context,
|
|
const StatefulNnApiDelegate::Options& delegate_options,
|
|
const TfLiteIntArray* input_tensors, const TfLiteIntArray* output_tensors,
|
|
int* nnapi_errno) {
|
|
// Build the ops and tensors.
|
|
TF_LITE_ENSURE_STATUS(AddOpsAndTensors(
|
|
context, nnapi_errno, delegate_options.allow_dynamic_dimensions));
|
|
// Map input and output tensor indices to ANN
|
|
std::vector<uint32_t> inputs;
|
|
inputs.reserve(input_tensors->size);
|
|
std::vector<uint32_t> outputs;
|
|
outputs.reserve(output_tensors->size);
|
|
|
|
size_t total_input_byte_size = 0;
|
|
// Make the TensorFlow Lite inputs and outputs to ann_indices.
|
|
for (int i : TfLiteIntArrayView(input_tensors)) {
|
|
// Constant tensors are not NNAPI inputs.
|
|
if (i != kTfLiteOptionalTensor &&
|
|
context->tensors[i].allocation_type != kTfLiteMmapRo &&
|
|
// The delegate might not have mapped this input (this can
|
|
// happen if one tensor is split in several ones)
|
|
operand_mapping_.lite_index_to_ann(i) != -1) {
|
|
inputs.push_back(operand_mapping_.lite_index_to_ann(i));
|
|
if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
|
|
continue;
|
|
}
|
|
const TfLiteType nn_type_conversion =
|
|
operand_mapping_.lite_index_to_ann_type_conversion(i);
|
|
int tensor_size = 0;
|
|
if (nn_type_conversion == kTfLiteNoType) {
|
|
tensor_size = context->tensors[i].bytes;
|
|
} else {
|
|
size_t type_size;
|
|
TF_LITE_ENSURE_OK(
|
|
context, GetSizeOfType(context, nn_type_conversion, &type_size));
|
|
tensor_size = NumElements(&context->tensors[i]) * type_size;
|
|
}
|
|
total_input_byte_size += tensor_size;
|
|
total_input_byte_size += getNumPaddingBytes(tensor_size);
|
|
}
|
|
}
|
|
|
|
size_t total_output_byte_size = 0;
|
|
for (int i : TfLiteIntArrayView(output_tensors)) {
|
|
const int output_tensor_ann_index = operand_mapping_.lite_index_to_ann(i);
|
|
// Unmapped outputs are not added
|
|
if (output_tensor_ann_index != -1) {
|
|
outputs.push_back(output_tensor_ann_index);
|
|
}
|
|
if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
|
|
continue;
|
|
}
|
|
total_output_byte_size += context->tensors[i].bytes;
|
|
total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
|
|
}
|
|
|
|
// Add state output tensors as model outputs.
|
|
for (int i : model_state_outputs_) {
|
|
outputs.push_back(i);
|
|
}
|
|
|
|
// Tell ANN to declare inputs/outputs
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context,
|
|
nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs(
|
|
nn_model_.get(), inputs.size(), inputs.data(), outputs.size(),
|
|
outputs.data()),
|
|
"identifying model inputs and outputs", nnapi_errno);
|
|
|
|
auto allow_fp16 =
|
|
context->allow_fp32_relax_to_fp16 | delegate_options.allow_fp16;
|
|
if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context,
|
|
nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
|
|
nn_model_.get(), allow_fp16),
|
|
"set relaxed computation mode for fp32 if possible", nnapi_errno);
|
|
}
|
|
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()),
|
|
"finalizing the model", nnapi_errno);
|
|
|
|
// Create shared memory pool for inputs and outputs.
|
|
nn_input_memory_.reset(
|
|
new NNMemory(nnapi_, "input_pool", total_input_byte_size));
|
|
nn_output_memory_.reset(
|
|
new NNMemory(nnapi_, "output_pool", total_output_byte_size));
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
} // namespace nnapi
|
|
} // namespace delegate
|
|
|
|
using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
|
|
using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI11;
|
|
using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
|
|
using ::tflite::delegate::nnapi::NNAPIDelegateKernel;
|
|
|
|
StatefulNnApiDelegate::Data::Data(const NnApi* nnapi) : nnapi(nnapi) {}
|
|
|
|
StatefulNnApiDelegate::Data::~Data() {
|
|
std::for_each(std::begin(delegate_state_cache),
|
|
std::end(delegate_state_cache),
|
|
[](const std::pair<int, NNAPIDelegateKernel*>& entry) {
|
|
delete entry.second;
|
|
});
|
|
}
|
|
|
|
void StatefulNnApiDelegate::Data::CacheDelegateKernel(
|
|
const TfLiteDelegateParams* delegate_params,
|
|
NNAPIDelegateKernel* delegate_state) {
|
|
const int cache_key = delegate_params->nodes_to_replace->data[0];
|
|
delegate_state_cache.emplace(cache_key, delegate_state);
|
|
}
|
|
|
|
NNAPIDelegateKernel* StatefulNnApiDelegate::Data::MaybeGetCachedDelegateKernel(
|
|
const TfLiteDelegateParams* delegate_params) {
|
|
const int cache_key = delegate_params->nodes_to_replace->data[0];
|
|
const auto cached_state = delegate_state_cache.find(cache_key);
|
|
if (cached_state != std::end(delegate_state_cache)) {
|
|
auto result = cached_state->second;
|
|
delegate_state_cache.erase(cached_state);
|
|
return result;
|
|
} else {
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi)
|
|
: StatefulNnApiDelegate(nnapi, Options()) {}
|
|
|
|
StatefulNnApiDelegate::StatefulNnApiDelegate(Options options)
|
|
: StatefulNnApiDelegate(NnApiImplementation(), options) {}
|
|
|
|
StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
|
|
Options options)
|
|
: TfLiteDelegate(TfLiteDelegateCreate()), delegate_data_(nnapi) {
|
|
if (options.accelerator_name) {
|
|
delegate_data_.accelerator_name = options.accelerator_name;
|
|
}
|
|
if (options.cache_dir) {
|
|
delegate_data_.cache_dir = options.cache_dir;
|
|
}
|
|
if (options.model_token) {
|
|
delegate_data_.model_token = options.model_token;
|
|
}
|
|
delegate_data_.execution_preference = options.execution_preference;
|
|
delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
|
|
delegate_data_.max_number_delegated_partitions =
|
|
options.max_number_delegated_partitions;
|
|
delegate_data_.allow_fp16 = options.allow_fp16;
|
|
delegate_data_.execution_priority = options.execution_priority;
|
|
delegate_data_.max_compilation_timeout_duration_ns =
|
|
options.max_compilation_timeout_duration_ns;
|
|
delegate_data_.max_execution_timeout_duration_ns =
|
|
options.max_execution_timeout_duration_ns;
|
|
delegate_data_.max_execution_loop_timeout_duration_ns =
|
|
options.max_execution_loop_timeout_duration_ns;
|
|
if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI11) {
|
|
delegate_data_.allow_dynamic_dimensions = options.allow_dynamic_dimensions;
|
|
}
|
|
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
|
"Created TensorFlow Lite delegate for NNAPI.");
|
|
Prepare = DoPrepare;
|
|
CopyFromBufferHandle = DoCopyFromBufferHandle;
|
|
CopyToBufferHandle = DoCopyToBufferHandle;
|
|
FreeBufferHandle = DoFreeBufferHandle;
|
|
data_ = &delegate_data_;
|
|
if (delegate_data_.allow_dynamic_dimensions) {
|
|
flags |= kTfLiteDelegateFlagsAllowDynamicTensors;
|
|
flags |= kTfLiteDelegateFlagsRequirePropagatedShapes;
|
|
}
|
|
}
|
|
|
|
StatefulNnApiDelegate::StatefulNnApiDelegate()
|
|
: StatefulNnApiDelegate(Options()) {}
|
|
|
|
const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
|
|
TfLiteDelegate* delegate) {
|
|
auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
|
|
StatefulNnApiDelegate::Options options;
|
|
options.execution_preference = delegate_data->execution_preference;
|
|
options.accelerator_name = delegate_data->accelerator_name.empty()
|
|
? nullptr
|
|
: delegate_data->accelerator_name.c_str();
|
|
options.cache_dir = delegate_data->cache_dir.empty()
|
|
? nullptr
|
|
: delegate_data->cache_dir.c_str();
|
|
options.model_token = delegate_data->model_token.empty()
|
|
? nullptr
|
|
: delegate_data->model_token.c_str();
|
|
options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
|
|
options.max_number_delegated_partitions =
|
|
delegate_data->max_number_delegated_partitions;
|
|
options.allow_fp16 = delegate_data->allow_fp16;
|
|
options.execution_priority = delegate_data->execution_priority;
|
|
options.max_compilation_timeout_duration_ns =
|
|
delegate_data->max_compilation_timeout_duration_ns;
|
|
options.max_execution_timeout_duration_ns =
|
|
delegate_data->max_execution_timeout_duration_ns;
|
|
options.max_execution_loop_timeout_duration_ns =
|
|
delegate_data->max_execution_loop_timeout_duration_ns;
|
|
options.allow_dynamic_dimensions = delegate_data->allow_dynamic_dimensions;
|
|
return options;
|
|
}
|
|
|
|
const std::vector<StatefulNnApiDelegate::MemoryRegistration>&
|
|
StatefulNnApiDelegate::GetTensorMemoryMap(TfLiteDelegate* delegate) {
|
|
auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
|
|
return delegate_data->tensor_memory_map;
|
|
}
|
|
|
|
TfLiteBufferHandle StatefulNnApiDelegate::RegisterNnapiMemory(
|
|
ANeuralNetworksMemory* memory, CopyToHostTensorFnPtr callback,
|
|
void* callback_context) {
|
|
int map_size = delegate_data_.tensor_memory_map.size();
|
|
for (int i = 0; i < map_size; i++) {
|
|
if (delegate_data_.tensor_memory_map[i].memory == nullptr) {
|
|
delegate_data_.tensor_memory_map[i] = {memory, callback,
|
|
callback_context};
|
|
return i;
|
|
}
|
|
}
|
|
delegate_data_.tensor_memory_map.push_back(
|
|
{memory, callback, callback_context});
|
|
return map_size;
|
|
}
|
|
|
|
TfLiteStatus StatefulNnApiDelegate::DoCopyFromBufferHandle(
|
|
TfLiteContext* context, TfLiteDelegate* delegate,
|
|
TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
|
|
auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
|
|
if (buffer_handle < 0 ||
|
|
buffer_handle >= delegate_data->tensor_memory_map.size()) {
|
|
return kTfLiteError;
|
|
}
|
|
auto memory = delegate_data->tensor_memory_map[buffer_handle].memory;
|
|
auto callback = delegate_data->tensor_memory_map[buffer_handle].callback;
|
|
auto callback_context =
|
|
delegate_data->tensor_memory_map[buffer_handle].callback_context;
|
|
if (!memory || !callback) {
|
|
return kTfLiteError;
|
|
}
|
|
return callback(tensor, memory, 0, tensor->bytes, callback_context);
|
|
}
|
|
|
|
TfLiteStatus StatefulNnApiDelegate::DoCopyToBufferHandle(
|
|
TfLiteContext* context, TfLiteDelegate* delegate,
|
|
TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
|
|
return kTfLiteError;
|
|
}
|
|
|
|
void StatefulNnApiDelegate::DoFreeBufferHandle(TfLiteContext* context,
|
|
TfLiteDelegate* delegate,
|
|
TfLiteBufferHandle* handle) {
|
|
auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
|
|
if (*handle >= 0 && *handle < delegate_data->tensor_memory_map.size()) {
|
|
delegate_data->tensor_memory_map[*handle] = {nullptr, nullptr, nullptr};
|
|
*handle = kTfLiteNullBufferHandle;
|
|
}
|
|
}
|
|
|
|
int StatefulNnApiDelegate::GetNnApiErrno() const {
|
|
return delegate_data_.nnapi_errno;
|
|
}
|
|
|
|
// static
|
|
TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
|
|
TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
|
|
const std::vector<int>& supported_nodes,
|
|
std::vector<int>* device_supported_nodes, int* num_partitions,
|
|
TfLiteDelegateParams** params_array, int* nnapi_errno) {
|
|
auto* delegate_data = static_cast<Data*>(delegate->data_);
|
|
// The first entry in the array is the element count
|
|
|
|
auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
|
|
TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
|
|
context, supported_nodes_int_array.get(), params_array, num_partitions));
|
|
// For each partition check if which nodes are actually supported by the
|
|
// target accelerators.
|
|
delegate_data->delegate_state_cache.clear();
|
|
for (int idx = 0; idx < *num_partitions; idx++) {
|
|
const auto& partition_params = (*params_array)[idx];
|
|
std::unique_ptr<NNAPIDelegateKernel> kernel_state(
|
|
new NNAPIDelegateKernel(nnapi));
|
|
TfLiteDelegateParams params_with_delegate = partition_params;
|
|
params_with_delegate.delegate = delegate;
|
|
TF_LITE_ENSURE_STATUS(
|
|
kernel_state->Init(context, ¶ms_with_delegate, nnapi_errno));
|
|
std::vector<int> supported_partition_nodes;
|
|
TF_LITE_ENSURE_STATUS(
|
|
kernel_state->GetOperationsSupportedByTargetNnApiDevices(
|
|
context, &supported_partition_nodes, nnapi_errno));
|
|
device_supported_nodes->insert(device_supported_nodes->end(),
|
|
supported_partition_nodes.begin(),
|
|
supported_partition_nodes.end());
|
|
|
|
bool model_fully_supported = (supported_partition_nodes.size() ==
|
|
partition_params.nodes_to_replace->size);
|
|
if (model_fully_supported) {
|
|
delegate_data->CacheDelegateKernel(&partition_params,
|
|
kernel_state.release());
|
|
}
|
|
}
|
|
|
|
if (device_supported_nodes->size() != supported_nodes.size()) {
|
|
// We changed the set of nodes to delegate this will create a different
|
|
// partitioning layout.
|
|
auto device_sup_nodes_int_array =
|
|
BuildTfLiteIntArray(*device_supported_nodes);
|
|
TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
|
|
context, device_sup_nodes_int_array.get(), params_array,
|
|
num_partitions));
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
// static
|
|
TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions(
|
|
int max_partitions,
|
|
std::vector<TfLiteDelegateParams> partition_params_array,
|
|
std::vector<int>* nodes_to_delegate) {
|
|
int num_partitions = partition_params_array.size();
|
|
if (max_partitions <= 0 || num_partitions <= max_partitions) {
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
int number_delegated_partitions = std::count_if(
|
|
partition_params_array.begin(), partition_params_array.end(),
|
|
[nodes_to_delegate](const TfLiteDelegateParams& partition_params) {
|
|
return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(),
|
|
partition_params.nodes_to_replace->data[0]) !=
|
|
nodes_to_delegate->end();
|
|
});
|
|
|
|
if (number_delegated_partitions > max_partitions) {
|
|
std::sort(partition_params_array.begin(), partition_params_array.end(),
|
|
[](const TfLiteDelegateParams& left,
|
|
const TfLiteDelegateParams& right) -> bool {
|
|
// Reverse sort
|
|
return left.nodes_to_replace->size >
|
|
right.nodes_to_replace->size;
|
|
});
|
|
|
|
nodes_to_delegate->clear();
|
|
|
|
for (int i = 0; i < max_partitions; i++) {
|
|
const TfLiteDelegateParams& partition_params = partition_params_array[i];
|
|
|
|
nodes_to_delegate->insert(nodes_to_delegate->end(),
|
|
partition_params.nodes_to_replace->data,
|
|
partition_params.nodes_to_replace->data +
|
|
partition_params.nodes_to_replace->size);
|
|
}
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
|
|
TfLiteDelegate* delegate) {
|
|
auto* delegate_data = static_cast<Data*>(delegate->data_);
|
|
int* nnapi_errno = &(delegate_data->nnapi_errno);
|
|
const NnApi* nnapi = delegate_data->nnapi;
|
|
|
|
// Resetting the error code when the delegate is initialized
|
|
// by TFLite. This causes the error to be reset if reusing the same
|
|
// StatefulNnApiDelegate after a failure
|
|
*nnapi_errno = 0;
|
|
|
|
// Do not check nodes_ if NN API is unavailable.
|
|
if (nnapi->android_sdk_version < kMinSdkVersionForNNAPI ||
|
|
!nnapi->nnapi_exists) {
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
int target_sdk_version = nnapi->android_sdk_version;
|
|
const StatefulNnApiDelegate::Options delegate_options =
|
|
StatefulNnApiDelegate::GetOptions(delegate);
|
|
// For NNAPI 1.2+, check if there is any accelerator available.
|
|
// If not, don't delegate to NNAPI's CPU reference implementation unless
|
|
// it has been specified as target accelerator.
|
|
if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
|
|
if (ShouldUseTargetDevices(delegate_options, nnapi)) {
|
|
std::vector<ANeuralNetworksDevice*> devices;
|
|
TF_LITE_ENSURE_STATUS(
|
|
GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices));
|
|
|
|
if (devices.empty()) {
|
|
if (delegate_options.accelerator_name) {
|
|
// There was a selected device and it is not available.
|
|
return kTfLiteError;
|
|
} else {
|
|
// Only nnapi-reference is available but was disabled by the delegate
|
|
// options
|
|
return kTfLiteOk;
|
|
}
|
|
}
|
|
|
|
TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
|
|
context, nnapi, devices, &target_sdk_version, nnapi_errno));
|
|
} else {
|
|
// If no accelerator is specified, only use NNAPI if an accelerator is
|
|
// available. Any available accelerator will make the device_count larger
|
|
// than 1. More sophisticated check and allowlisting can be added later.
|
|
uint32_t device_count = 0;
|
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
|
context, nnapi->ANeuralNetworks_getDeviceCount(&device_count),
|
|
"getting number of NNAPI devices", nnapi_errno);
|
|
if (device_count <= 1) {
|
|
return kTfLiteOk;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::vector<int> supported_nodes;
|
|
// We don't care about all nodes_, we only care about ones in the
|
|
// current plan.
|
|
TfLiteIntArray* plan;
|
|
TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
|
|
|
|
// Check for every node if it is supported
|
|
const bool is_accelerator_specified = ShouldUseTargetDevices(
|
|
delegate_options, nnapi, /*exclude_nnapi_reference=*/true);
|
|
for (int node_index : TfLiteIntArrayView(plan)) {
|
|
TfLiteNode* node;
|
|
TfLiteRegistration* registration;
|
|
TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
|
|
context, node_index, &node, ®istration));
|
|
if (NNAPIDelegateKernel::Validate(context, registration->builtin_code,
|
|
registration->version, target_sdk_version,
|
|
node, is_accelerator_specified)) {
|
|
supported_nodes.push_back(node_index);
|
|
}
|
|
}
|
|
|
|
// If there are no delegated nodes, short-circuit node replacement.
|
|
if (supported_nodes.empty()) {
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
// NN API Delegate Registration (the pseudo kernel that will invoke NN
|
|
// API node sub sets)
|
|
static const TfLiteRegistration nnapi_delegate_kernel = {
|
|
.init = [](TfLiteContext* context, const char* buffer,
|
|
size_t length) -> void* {
|
|
const TfLiteDelegateParams* params =
|
|
reinterpret_cast<const TfLiteDelegateParams*>(buffer);
|
|
|
|
auto* delegate_data = static_cast<Data*>(params->delegate->data_);
|
|
int* nnapi_errno = &(delegate_data->nnapi_errno);
|
|
|
|
NNAPIDelegateKernel* kernel_state =
|
|
delegate_data->MaybeGetCachedDelegateKernel(params);
|
|
if (!kernel_state) {
|
|
kernel_state = new NNAPIDelegateKernel(delegate_data->nnapi);
|
|
kernel_state->Init(context, params, nnapi_errno);
|
|
}
|
|
|
|
return kernel_state;
|
|
},
|
|
|
|
.free = [](TfLiteContext* context, void* buffer) -> void {
|
|
delete reinterpret_cast<NNAPIDelegateKernel*>(buffer);
|
|
},
|
|
|
|
.prepare = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
|
|
NNAPIDelegateKernel* state =
|
|
reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
|
|
int* nnapi_errno =
|
|
&(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
|
|
return state->Prepare(context, node, nnapi_errno);
|
|
},
|
|
|
|
.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
|
|
NNAPIDelegateKernel* state =
|
|
reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
|
|
int* nnapi_errno =
|
|
&(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
|
|
return state->Invoke(context, node, nnapi_errno);
|
|
},
|
|
|
|
.profiling_string = nullptr,
|
|
.builtin_code = kTfLiteBuiltinDelegate,
|
|
.custom_name = "TfLiteNnapiDelegate",
|
|
.version = 1,
|
|
};
|
|
|
|
std::vector<int> nodes_to_delegate;
|
|
|
|
int num_partitions;
|
|
TfLiteDelegateParams* params_array;
|
|
if (is_accelerator_specified &&
|
|
nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
|
|
// Filtering out nodes not supported by target accelerators.
|
|
// Cannot query supported operation before NNAPI 1.2
|
|
TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
|
|
context, delegate, nnapi, supported_nodes, &nodes_to_delegate,
|
|
&num_partitions, ¶ms_array, nnapi_errno));
|
|
} else {
|
|
nodes_to_delegate = supported_nodes;
|
|
auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
|
|
TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
|
|
context, supported_nodes_int_array.get(), ¶ms_array,
|
|
&num_partitions));
|
|
}
|
|
|
|
TF_LITE_ENSURE_STATUS(
|
|
LimitDelegatedPartitions(delegate_options.max_number_delegated_partitions,
|
|
std::vector<TfLiteDelegateParams>(
|
|
params_array, params_array + num_partitions),
|
|
&nodes_to_delegate));
|
|
|
|
if (nodes_to_delegate.empty()) {
|
|
return kTfLiteOk;
|
|
} else {
|
|
// Request TFLite to partition the graph and make kernels
|
|
// for each independent node sub set a new nnapi_delegate_kernel.
|
|
auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate);
|
|
return context->ReplaceNodeSubsetsWithDelegateKernels(
|
|
context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(),
|
|
delegate);
|
|
}
|
|
}
|
|
|
|
// Returns a singleton NNAPI Delegate that can check for support of ops.
|
|
TfLiteDelegate* NnApiDelegate() {
|
|
static StatefulNnApiDelegate* delegate = new StatefulNnApiDelegate();
|
|
return delegate;
|
|
}
|
|
|
|
} // namespace tflite
|