diff --git a/tensorflow/lite/delegates/nnapi/BUILD b/tensorflow/lite/delegates/nnapi/BUILD index f8439da7087..954a943715c 100644 --- a/tensorflow/lite/delegates/nnapi/BUILD +++ b/tensorflow/lite/delegates/nnapi/BUILD @@ -22,7 +22,10 @@ cc_library( "quant_lstm_sup.cc", ], }), - hdrs = ["nnapi_delegate.h"], + hdrs = [ + "nnapi_delegate.h", + "nnapi_delegate_kernel.h", + ], deps = [ "//tensorflow/lite:allocation", "//tensorflow/lite:kernel_api", diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index 8431cf600d0..34c7bffe0d0 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -28,18 +28,9 @@ limitations under the License. #include #include -#include "tensorflow/lite/allocation.h" -#include "tensorflow/lite/builtin_op_data.h" -#include "tensorflow/lite/builtin_ops.h" -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/c_api_internal.h" -#include "tensorflow/lite/context_util.h" -#include "tensorflow/lite/delegates/nnapi/quant_lstm_sup.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/minimal_logging.h" -#include "tensorflow/lite/nnapi/nnapi_implementation.h" -#include "tensorflow/lite/util.h" - +// This section needs to be before the import of nnapi_delegate_kernel +// because the code changes according to the definition of +// TFLITE_NNAPI_ALLOW_MMAP_SHARING #ifdef __ANDROID__ #include #endif @@ -49,6 +40,19 @@ limitations under the License. #include #endif +#include "tensorflow/lite/allocation.h" +#include "tensorflow/lite/builtin_op_data.h" +#include "tensorflow/lite/builtin_ops.h" +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/context_util.h" +#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h" +#include "tensorflow/lite/delegates/nnapi/quant_lstm_sup.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/minimal_logging.h" +#include "tensorflow/lite/nnapi/nnapi_implementation.h" +#include "tensorflow/lite/util.h" + namespace tflite { namespace { @@ -64,8 +68,6 @@ namespace { } \ } while (0) -namespace { - bool IsFloat(TfLiteType type) { switch (type) { case kTfLiteFloat32: @@ -283,9 +285,6 @@ bool IsRestrictedScalesCompliant(const TfLiteContext* context, return input_scale * filter_scale < output_scale; } -constexpr int32_t kMinSdkVersionForNNAPI = 27; -constexpr int32_t kMinSdkVersionForNNAPI11 = 28; -constexpr int32_t kMinSdkVersionForNNAPI12 = 29; constexpr size_t kDefaultByteAlignmentForNNAPI = 16; static size_t getNumPaddingBytes(size_t byte_size) { @@ -371,18 +370,8 @@ enum { } // namespace -// RAII NN API Model Destructor for use with std::unique_ptr -struct NNFreeModel { - void operator()(ANeuralNetworksModel* model) { - NnApiImplementation()->ANeuralNetworksModel_free(model); - } -}; -// RAII NN API Compilation Destructor for use with std::unique_ptr -struct NNFreeCompilation { - void operator()(ANeuralNetworksCompilation* model) { - NnApiImplementation()->ANeuralNetworksCompilation_free(model); - } -}; +namespace delegate { +namespace nnapi { // RAII NN API Execution Destructor for use with std::unique_ptr struct NNFreeExecution { @@ -391,117 +380,6 @@ struct NNFreeExecution { } }; -// Manage NNAPI shared memory handle -class NNMemory { - public: -#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING - NNMemory(const NnApi* nnapi, const char* name, size_t size) { - if (name && size > 0) { - nnapi_ = nnapi; - byte_size_ = size; - fd_ = nnapi_->ASharedMemory_create(name, size); - data_ptr_ = reinterpret_cast( - mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0)); - nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE, - fd_, 0, &nn_memory_handle_); - } - } -#else - NNMemory(const NnApi* /*nnapi*/, const char* /*name*/, size_t /*size*/) {} -#endif - - ~NNMemory() { -#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING - if (data_ptr_) { - munmap(data_ptr_, byte_size_); - } - if (nn_memory_handle_) { - nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_); - } - if (fd_ > 0) close(fd_); -#endif - } - - ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; } - uint8_t* get_data_ptr() { return data_ptr_; } - - private: -#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING - const NnApi* nnapi_; - int fd_ = 0; - size_t byte_size_ = 0; -#endif - uint8_t* data_ptr_ = nullptr; - ANeuralNetworksMemory* nn_memory_handle_ = nullptr; -}; // namespace - -// Track tensor indices to NN API tensor indices mapping. -class OperandMapping { - public: - // Given a TFLite index return the ANN index. If it doesn't exist - // return -1. - int lite_index_to_ann(int index) const { - if (index < lite_tensor_to_ann_tensor_.size()) - return lite_tensor_to_ann_tensor_[index]; - else - return -1; - } - - // NN API uses non tensor operands instead of structs. This creates one - // and returns the index. It uses a std::vector and resizes it as needed - // keeping -1 to unmapped values. Intermediate tensors likely will not - // be mapped. - int add_new_non_tensor_operand() { return next_ann_tensor_index_++; } - - // This call is necessary for input operands generated by the delegate - // to map constant inputs not present in TFLite but required by NNAPI, - // for example when splitting one input in several ones. - int add_delegate_generated_input_ann_tensors_operand() { - return next_ann_tensor_index_++; - } - - // Add a new mapping from `tflite_index` and return the NN API tensor index. - int add_new_ann_tensor_index(int tflite_index) { - if (tflite_index >= lite_tensor_to_ann_tensor_.size()) { - lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1); - } - int new_tensor_index = next_ann_tensor_index_++; - lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index; - return new_tensor_index; - } - - // Given a TFLite index returns a TFLite type to which a tensor must be - // converted during copying the data to the memory allocated for NN API. - // kTfLiteNoType means no conversion is needed. - TfLiteType lite_index_to_ann_type_conversion(int index) const { - if (index >= 0 && index < index_to_type_conversion_.size()) - return index_to_type_conversion_[index]; - else - return kTfLiteNoType; - } - - // Add a new mapping from TFLite index to a type conversion. - void add_type_conversion(int tflite_index, TfLiteType tflite_type) { - if (tflite_index >= index_to_type_conversion_.size()) { - index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType); - } - index_to_type_conversion_[tflite_index] = tflite_type; - } - - private: - // Next index of ann tensor - int next_ann_tensor_index_ = 0; - - // Mapping from lite index. Use a std::vector for speed and code size - // rather than a map. - std::vector lite_tensor_to_ann_tensor_; - // Mapping from lite index to a type which tensor must be converted to during - // the copying of the data to the memory allocated for NN API. kTfLiteNoType - // means no conversion is needed. Use an std::vector for speed and code size - // rather than a map. - std::vector index_to_type_conversion_; -}; - class DequantizeMapping { public: int DequantizedAnnIndex(int ann_index, TfLiteType type) const { @@ -958,10 +836,39 @@ class NNAPIOpBuilder { nn_model_, ann_tensor_index, &ann_perchannel_params)); } if (tensor->allocation_type == kTfLiteMmapRo) { + if (IsQuantized(tensor_type) && need_int8_conversion) { + // We need to to add a tensor and convert the weights into uint8. + // Currently this is only needed for fully_connected. The new_tensor is + // needed for lifetime management for the converted weights. + int new_tensor_index = -1; + TF_LITE_ENSURE_OK(context_, + context_->AddTensors(context_, 1, &new_tensor_index)); + TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index]; + new_tensor->type = kTfLiteUInt8; + new_tensor->allocation_type = kTfLiteDynamic; + new_tensor->params.scale = scale; + new_tensor->params.zero_point = zeroPoint; + // Not removing the new tensor in case of resizing errors since it will + // be cleared by the context + TF_LITE_ENSURE_OK( + context_, context_->ResizeTensor(context_, new_tensor, + // Resize Tensor takes ownership of + // the dims array passed as param + TfLiteIntArrayCopy(tensor->dims))); + // Convert the int8 value into corresponding uint8 value; + const auto num_elements = NumElements(tensor); + for (int i = 0; i < num_elements; ++i) { + new_tensor->data.uint8[i] = static_cast( + static_cast(tensor->data.int8[i]) + 128); + } + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context_, nnapi_->ANeuralNetworksModel_setOperandValue( + nn_model_, ann_tensor_index, new_tensor->data.raw, + new_tensor->bytes)); #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING - if (tensor->allocation && - static_cast(tensor->allocation)->type() == - Allocation::Type::kMMap) { + } else if (tensor->allocation && + static_cast(tensor->allocation)->type() == + Allocation::Type::kMMap) { const MMAPAllocation* mmap_alloc = static_cast(tensor->allocation); if (allocation_memory_mapping_->count(mmap_alloc) == 0) { @@ -981,15 +888,13 @@ class NNAPIOpBuilder { context_, nnapi_->ANeuralNetworksModel_setOperandValueFromMemory( nn_model_, ann_tensor_index, ann_memory_handle, offset, tensor->bytes)); - } else { #endif + } else { RETURN_TFLITE_ERROR_IF_NN_ERROR( context_, nnapi_->ANeuralNetworksModel_setOperandValue( nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes)); -#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING } -#endif } indices->push_back(ann_tensor_index); @@ -1024,15 +929,6 @@ class NNAPIOpBuilder { std::vector augmented_outputs_; }; -struct NNAPIOpMappingArgs { - TfLiteContext* context; - NNAPIOpBuilder* builder; - TfLiteNode* node; - std::vector* model_state_outputs; - std::vector* model_state_tfl_inputs; - std::vector>* feedback_loops; -}; - // Mapping function simply returning the operation type without adding any // additional parameter. template @@ -1041,210 +937,176 @@ ANeuralNetworksOperationType BasicMappingFn( return OperationType; } -// The kernel that represents the node sub set of TF Lite being run on NN API. -class NNAPIDelegateKernel { - public: - NNAPIDelegateKernel() { nnapi_ = NnApiImplementation(); } - ~NNAPIDelegateKernel() { - for (auto content : allocation_memory_mapping_) { - nnapi_->ANeuralNetworksMemory_free(content.second); - } - } - - typedef ANeuralNetworksOperationType (*MappingFn)( - const NNAPIOpMappingArgs& mapping_args); - - // Return a function that knows how to translate a node into its operands - // when called. You can use this function to see if a node is supported - // (i.e. if the returned MappingFn is null, then the node is not supported). - static MappingFn Map(const TfLiteContext* context, int builtin_code, - int version, int android_sdk_version, - const TfLiteNode* node, bool is_accelerator_specified) { - switch (builtin_code) { - case kTfLiteBuiltinAdd: - if (version <= 2) { - if (!IsFloatOrQuant8Operator(context, node)) { +// Return a function that knows how to translate a node into its operands +// when called. You can use this function to see if a node is supported +// (i.e. if the returned MappingFn is null, then the node is not supported). +NNAPIDelegateKernel::MappingFn NNAPIDelegateKernel::Map( + const TfLiteContext* context, int builtin_code, int version, + int android_sdk_version, const TfLiteNode* node, + bool is_accelerator_specified) { + switch (builtin_code) { + case kTfLiteBuiltinAdd: + if (version <= 2) { + if (!IsFloatOrQuant8Operator(context, node)) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + return ANEURALNETWORKS_ADD; + }; + } + break; + case kTfLiteBuiltinArgMax: + case kTfLiteBuiltinArgMin: + if (version <= 2) { + // Those operators were introduced in NNAPI 1.2. + if (android_sdk_version < kMinSdkVersionForNNAPI12) { + return nullptr; + } + // Only certain input types are supported. + auto input_type = context->tensors[node->inputs->data[0]].type; + if (input_type != kTfLiteFloat16 && input_type != kTfLiteFloat32 && + input_type != kTfLiteInt32 && input_type != kTfLiteUInt8 && + input_type != kTfLiteInt8) { + return nullptr; + } + // NNAPI only supports axis as int32. If the axis type is int64 and + // constant we can convert it to int32 if the value isn't too large. + const auto& axis_tensor = context->tensors[node->inputs->data[1]]; + if (axis_tensor.type == kTfLiteInt64) { + if (axis_tensor.allocation_type != kTfLiteMmapRo || + *axis_tensor.data.i64 > std::numeric_limits::max() || + *axis_tensor.data.i64 < std::numeric_limits::min()) { return nullptr; } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - return ANEURALNETWORKS_ADD; - }; + } else if (axis_tensor.type != kTfLiteInt32) { + return nullptr; } - break; - case kTfLiteBuiltinArgMax: - case kTfLiteBuiltinArgMin: - if (version <= 2) { - // Those operators were introduced in NNAPI 1.2. + if (builtin_code == kTfLiteBuiltinArgMax) { + // NNAPI only supports int32 output. + auto builtin = + reinterpret_cast(node->builtin_data); + if (builtin->output_type != kTfLiteInt32) { + return nullptr; + } + return BasicMappingFn; + } else { + // NNAPI only supports int32 output. + auto builtin = + reinterpret_cast(node->builtin_data); + if (builtin->output_type != kTfLiteInt32) { + return nullptr; + } + return BasicMappingFn; + } + } + break; + case kTfLiteBuiltinMul: + if (version <= 2) { + if (!IsFloatOrQuant8Operator(context, node)) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + return ANEURALNETWORKS_MUL; + }; + } + break; + case kTfLiteBuiltinAveragePool2d: + if (version <= 2) { + if (!IsFloatOrQuant8Operator(context, node)) { + return nullptr; + } + auto builtin = reinterpret_cast(node->builtin_data); + // TODO(b/138756912): Large filter window would overflow on the + // reference CPU path. + if (!is_accelerator_specified && + (builtin->filter_width * builtin->filter_height > 256)) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + mapping_args.builder->AddPoolingParams( + mapping_args.node->builtin_data); + return ANEURALNETWORKS_AVERAGE_POOL_2D; + }; + } + break; + case kTfLiteBuiltinMaxPool2d: + if (version <= 2) { + if (!IsFloatOrQuant8Operator(context, node)) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + mapping_args.builder->AddPoolingParams( + mapping_args.node->builtin_data); + return ANEURALNETWORKS_MAX_POOL_2D; + }; + } + break; + case kTfLiteBuiltinL2Pool2d: + if (version == 1) { + if (!IsFloatOperator(context, node)) { + return nullptr; + } + auto builtin = reinterpret_cast(node->builtin_data); + // Pre-Q devices may not support fused activation for l2_pool. + if (android_sdk_version < kMinSdkVersionForNNAPI12 && + builtin->activation != kTfLiteActNone) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + mapping_args.builder->AddPoolingParams( + mapping_args.node->builtin_data); + return ANEURALNETWORKS_L2_POOL_2D; + }; + } + break; + case kTfLiteBuiltinConv2d: + if (version <= 3) { + if ((android_sdk_version < kMinSdkVersionForNNAPI12) && + (IsHybridOperator(context, builtin_code, node) || + !IsFloatOrUint8Operator(context, node))) { + // Hybrid operators not supported before NNAPI 1.2. + return nullptr; + } + if (android_sdk_version < kMinSdkVersionForNNAPI12) { + // Per-channel quantized convolution not supported before NNAPI 1.2. + const auto& filter_tensor = context->tensors[node->inputs->data[1]]; + if (filter_tensor.quantization.type == kTfLiteAffineQuantization) { + TfLiteAffineQuantization* quantization_params = + static_cast( + filter_tensor.quantization.params); + if (quantization_params->scale->size > 1) { + return nullptr; + } + } + } + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (android_sdk_version < kMinSdkVersionForNNAPI12 && + input_type == kTfLiteUInt8 && + !IsRestrictedScalesCompliant(context, node)) { + return nullptr; + } + auto builtin = reinterpret_cast(node->builtin_data); + if (node->inputs->size != 3) { + // TODO(b/132950584): Add support for Conv2D with omitted bias + return nullptr; + } + // NNAPI supports dilated Conv2D since NNAPI 1.2. + if (builtin->dilation_width_factor != 1 || + builtin->dilation_height_factor != 1) { if (android_sdk_version < kMinSdkVersionForNNAPI12) { return nullptr; } - // Only certain input types are supported. - auto input_type = context->tensors[node->inputs->data[0]].type; - if (input_type != kTfLiteFloat16 && input_type != kTfLiteFloat32 && - input_type != kTfLiteInt32 && input_type != kTfLiteUInt8 && - input_type != kTfLiteInt8) { - return nullptr; - } - // NNAPI only supports axis as int32. If the axis type is int64 and - // constant we can convert it to int32 if the value isn't too large. - const auto& axis_tensor = context->tensors[node->inputs->data[1]]; - if (axis_tensor.type == kTfLiteInt64) { - if (axis_tensor.allocation_type != kTfLiteMmapRo || - *axis_tensor.data.i64 > std::numeric_limits::max() || - *axis_tensor.data.i64 < std::numeric_limits::min()) { - return nullptr; - } - } else if (axis_tensor.type != kTfLiteInt32) { - return nullptr; - } - if (builtin_code == kTfLiteBuiltinArgMax) { - // NNAPI only supports int32 output. - auto builtin = - reinterpret_cast(node->builtin_data); - if (builtin->output_type != kTfLiteInt32) { - return nullptr; - } - return BasicMappingFn; - } else { - // NNAPI only supports int32 output. - auto builtin = - reinterpret_cast(node->builtin_data); - if (builtin->output_type != kTfLiteInt32) { - return nullptr; - } - return BasicMappingFn; - } - } - break; - case kTfLiteBuiltinMul: - if (version <= 2) { - if (!IsFloatOrQuant8Operator(context, node)) { - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - return ANEURALNETWORKS_MUL; - }; - } - break; - case kTfLiteBuiltinAveragePool2d: - if (version <= 2) { - if (!IsFloatOrQuant8Operator(context, node)) { - return nullptr; - } - auto builtin = - reinterpret_cast(node->builtin_data); - // TODO(b/138756912): Large filter window would overflow on the - // reference CPU path. - if (!is_accelerator_specified && - (builtin->filter_width * builtin->filter_height > 256)) { - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - mapping_args.builder->AddPoolingParams( - mapping_args.node->builtin_data); - return ANEURALNETWORKS_AVERAGE_POOL_2D; - }; - } - break; - case kTfLiteBuiltinMaxPool2d: - if (version <= 2) { - if (!IsFloatOrQuant8Operator(context, node)) { - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - mapping_args.builder->AddPoolingParams( - mapping_args.node->builtin_data); - return ANEURALNETWORKS_MAX_POOL_2D; - }; - } - break; - case kTfLiteBuiltinL2Pool2d: - if (version == 1) { - if (!IsFloatOperator(context, node)) { - return nullptr; - } - auto builtin = - reinterpret_cast(node->builtin_data); - // Pre-Q devices may not support fused activation for l2_pool. - if (android_sdk_version < kMinSdkVersionForNNAPI12 && - builtin->activation != kTfLiteActNone) { - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - mapping_args.builder->AddPoolingParams( - mapping_args.node->builtin_data); - return ANEURALNETWORKS_L2_POOL_2D; - }; - } - break; - case kTfLiteBuiltinConv2d: - if (version <= 3) { - if ((android_sdk_version < kMinSdkVersionForNNAPI12) && - (IsHybridOperator(context, builtin_code, node) || - !IsFloatOrUint8Operator(context, node))) { - // Hybrid operators not supported before NNAPI 1.2. - return nullptr; - } - if (android_sdk_version < kMinSdkVersionForNNAPI12) { - // Per-channel quantized convolution not supported before NNAPI 1.2. - const auto& filter_tensor = context->tensors[node->inputs->data[1]]; - if (filter_tensor.quantization.type == kTfLiteAffineQuantization) { - TfLiteAffineQuantization* quantization_params = - static_cast( - filter_tensor.quantization.params); - if (quantization_params->scale->size > 1) { - return nullptr; - } - } - } - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (android_sdk_version < kMinSdkVersionForNNAPI12 && - input_type == kTfLiteUInt8 && - !IsRestrictedScalesCompliant(context, node)) { - return nullptr; - } - auto builtin = - reinterpret_cast(node->builtin_data); - if (node->inputs->size != 3) { - // TODO(b/132950584): Add support for Conv2D with omitted bias - return nullptr; - } - // NNAPI supports dilated Conv2D since NNAPI 1.2. - if (builtin->dilation_width_factor != 1 || - builtin->dilation_height_factor != 1) { - if (android_sdk_version < kMinSdkVersionForNNAPI12) { - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->padding); - mapping_args.builder->AddScalarInt32Operand( - builtin->stride_width); - mapping_args.builder->AddScalarInt32Operand( - builtin->stride_height); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - mapping_args.builder->AddScalarBoolOperand( - false); // Use NHWC format - mapping_args.builder->AddScalarInt32Operand( - builtin->dilation_width_factor); - mapping_args.builder->AddScalarInt32Operand( - builtin->dilation_height_factor); - return ANEURALNETWORKS_CONV_2D; - }; - } return [](const NNAPIOpMappingArgs& mapping_args) -> ANeuralNetworksOperationType { auto builtin = reinterpret_cast( @@ -1253,2161 +1115,2139 @@ class NNAPIDelegateKernel { mapping_args.builder->AddScalarInt32Operand(builtin->stride_width); mapping_args.builder->AddScalarInt32Operand(builtin->stride_height); mapping_args.builder->AddScalarInt32Operand(builtin->activation); + mapping_args.builder->AddScalarBoolOperand( + false); // Use NHWC format + mapping_args.builder->AddScalarInt32Operand( + builtin->dilation_width_factor); + mapping_args.builder->AddScalarInt32Operand( + builtin->dilation_height_factor); return ANEURALNETWORKS_CONV_2D; }; } - break; - case kTfLiteBuiltinDepthwiseConv2d: - if (version <= 3) { - if (android_sdk_version < kMinSdkVersionForNNAPI12 && - !IsFloatOrUint8Operator(context, node)) { - return nullptr; - } - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (android_sdk_version < kMinSdkVersionForNNAPI12 && - input_type == kTfLiteUInt8 && - !IsRestrictedScalesCompliant(context, node)) { - return nullptr; - } - auto builtin = - reinterpret_cast(node->builtin_data); - if (android_sdk_version < kMinSdkVersionForNNAPI12 && - (builtin->dilation_width_factor != 1 || - builtin->dilation_height_factor != 1)) { - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->padding); - mapping_args.builder->AddScalarInt32Operand(builtin->stride_width); - mapping_args.builder->AddScalarInt32Operand(builtin->stride_height); - mapping_args.builder->AddScalarInt32Operand( - builtin->depth_multiplier); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - if (builtin->dilation_width_factor != 1 || - builtin->dilation_height_factor != 1) { - mapping_args.builder->AddScalarBoolOperand( - false); // Use NHWC format - mapping_args.builder->AddScalarInt32Operand( - builtin->dilation_width_factor); - mapping_args.builder->AddScalarInt32Operand( - builtin->dilation_height_factor); - } - return ANEURALNETWORKS_DEPTHWISE_CONV_2D; - }; - } - break; - case kTfLiteBuiltinFullyConnected: - if (version <= 4) { - if (node->inputs->size != 3 || - node->inputs->data[2] == kOptionalTensor) { - // TODO(b/132950584): Add support for FullyConnected with no bias. - return nullptr; - } - const auto output_type = - context->tensors[node->outputs->data[0]].type; - if (output_type == kTfLiteInt16) { - return nullptr; - } - if (android_sdk_version < kMinSdkVersionForNNAPI12 && - (IsHybridOperator(context, builtin_code, node) || - !IsFloatOrUint8Operator(context, node))) { - // Hybrid operators not supported before NNAPI 1.2. - return nullptr; - } - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (android_sdk_version < kMinSdkVersionForNNAPI12 && - input_type == kTfLiteUInt8 && - !IsRestrictedScalesCompliant(context, node)) { - return nullptr; - } - auto builtin = - reinterpret_cast(node->builtin_data); - if (builtin->keep_num_dims) { - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - return ANEURALNETWORKS_FULLY_CONNECTED; - }; - } - break; - case kTfLiteBuiltinSoftmax: - if (version <= 2) { - const auto& input = context->tensors[node->outputs->data[0]]; - if (!IsFloatOrQuant8Operator(context, node)) { - return nullptr; - } - const int input_rank = input.dims->size; - if (input_rank > 4) return nullptr; - // Before API level 29 only 2D and 4D input tensors were supported. - if (android_sdk_version < kMinSdkVersionForNNAPI12) { - if (input_rank != 2 && input_rank != 4) return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarFloat32Operand(builtin->beta); - // Optional scalar specifying the dimension the activation would be - // performed on is not added. Default to -1. - return ANEURALNETWORKS_SOFTMAX; - }; - } - break; - case kTfLiteBuiltinReshape: - if (version == 1) { - if (!IsFloatOrQuant8Operator(context, node)) { - return nullptr; - } - // The shape input tensor must be constant. - if ((node->inputs->size < 2) || - (context->tensors[node->inputs->data[1]].allocation_type != - kTfLiteMmapRo)) { - return nullptr; - } - return BasicMappingFn; - } - break; - case kTfLiteBuiltinResizeBilinear: - if (version <= 2) { - const auto& input = context->tensors[node->inputs->data[0]]; - const auto output_dims = - context->tensors[node->outputs->data[0]].dims; - if (input.dims->size != 4) return nullptr; - if (!IsFloatOrQuant8Operator(context, node)) { - return nullptr; - } - // The size input tensor must be constant. - if ((node->inputs->size < 2) || - (context->tensors[node->inputs->data[1]].allocation_type != - kTfLiteMmapRo)) { - return nullptr; - } - if (android_sdk_version < kMinSdkVersionForNNAPI12 && - output_dims->data[1] != output_dims->data[2]) { - // Require width == height due to driver differences in NNAPI < 1.2 - return nullptr; - } - auto builtin = - reinterpret_cast(node->builtin_data); - if (builtin->align_corners) { - // NNAPI does not support align_corners == true. - return nullptr; - } - if (android_sdk_version < kMinSdkVersionForNNAPI12 && - input.type != kTfLiteFloat32) { - // NNAPI 1.0 & 1.1 only supports float input. - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - const int output_id = mapping_args.node->outputs->data[0]; - auto& output = mapping_args.context->tensors[output_id]; - const int output_height = output.dims->data[1]; - const int output_width = output.dims->data[2]; - mapping_args.builder->AddScalarInt32Operand(output_width); - mapping_args.builder->AddScalarInt32Operand(output_height); - return ANEURALNETWORKS_RESIZE_BILINEAR; - }; - } - break; - case kTfLiteBuiltinResizeNearestNeighbor: { - if (version > 2 || android_sdk_version < kMinSdkVersionForNNAPI12) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->padding); + mapping_args.builder->AddScalarInt32Operand(builtin->stride_width); + mapping_args.builder->AddScalarInt32Operand(builtin->stride_height); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + return ANEURALNETWORKS_CONV_2D; + }; + } + break; + case kTfLiteBuiltinDepthwiseConv2d: + if (version <= 3) { + if (android_sdk_version < kMinSdkVersionForNNAPI12 && + !IsFloatOrUint8Operator(context, node)) { return nullptr; } + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (android_sdk_version < kMinSdkVersionForNNAPI12 && + input_type == kTfLiteUInt8 && + !IsRestrictedScalesCompliant(context, node)) { + return nullptr; + } + auto builtin = + reinterpret_cast(node->builtin_data); + if (android_sdk_version < kMinSdkVersionForNNAPI12 && + (builtin->dilation_width_factor != 1 || + builtin->dilation_height_factor != 1)) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->padding); + mapping_args.builder->AddScalarInt32Operand(builtin->stride_width); + mapping_args.builder->AddScalarInt32Operand(builtin->stride_height); + mapping_args.builder->AddScalarInt32Operand( + builtin->depth_multiplier); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + if (builtin->dilation_width_factor != 1 || + builtin->dilation_height_factor != 1) { + mapping_args.builder->AddScalarBoolOperand( + false); // Use NHWC format + mapping_args.builder->AddScalarInt32Operand( + builtin->dilation_width_factor); + mapping_args.builder->AddScalarInt32Operand( + builtin->dilation_height_factor); + } + return ANEURALNETWORKS_DEPTHWISE_CONV_2D; + }; + } + break; + case kTfLiteBuiltinFullyConnected: + if (version <= 4) { + if (node->inputs->size != 3 || + node->inputs->data[2] == kOptionalTensor) { + // TODO(b/132950584): Add support for FullyConnected with no bias. + return nullptr; + } + const auto output_type = context->tensors[node->outputs->data[0]].type; + if (output_type == kTfLiteInt16) { + return nullptr; + } + if (android_sdk_version < kMinSdkVersionForNNAPI12 && + (IsHybridOperator(context, builtin_code, node) || + !IsFloatOrUint8Operator(context, node))) { + // Hybrid operators not supported before NNAPI 1.2. + return nullptr; + } + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (android_sdk_version < kMinSdkVersionForNNAPI12 && + input_type == kTfLiteUInt8 && + !IsRestrictedScalesCompliant(context, node)) { + return nullptr; + } + auto builtin = + reinterpret_cast(node->builtin_data); + if (builtin->keep_num_dims) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + return ANEURALNETWORKS_FULLY_CONNECTED; + }; + } + break; + case kTfLiteBuiltinSoftmax: + if (version <= 2) { + const auto& input = context->tensors[node->outputs->data[0]]; if (!IsFloatOrQuant8Operator(context, node)) { return nullptr; } - auto builtin = reinterpret_cast( - node->builtin_data); + const int input_rank = input.dims->size; + if (input_rank > 4) return nullptr; + // Before API level 29 only 2D and 4D input tensors were supported. + if (android_sdk_version < kMinSdkVersionForNNAPI12) { + if (input_rank != 2 && input_rank != 4) return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarFloat32Operand(builtin->beta); + // Optional scalar specifying the dimension the activation would be + // performed on is not added. Default to -1. + return ANEURALNETWORKS_SOFTMAX; + }; + } + break; + case kTfLiteBuiltinReshape: + if (version == 1) { + if (!IsFloatOrQuant8Operator(context, node)) { + return nullptr; + } + // The shape input tensor must be constant. + if ((node->inputs->size < 2) || + (context->tensors[node->inputs->data[1]].allocation_type != + kTfLiteMmapRo)) { + return nullptr; + } + return BasicMappingFn; + } + break; + case kTfLiteBuiltinResizeBilinear: + if (version <= 2) { + const auto& input = context->tensors[node->inputs->data[0]]; + const auto output_dims = context->tensors[node->outputs->data[0]].dims; + if (input.dims->size != 4) return nullptr; + if (!IsFloatOrQuant8Operator(context, node)) { + return nullptr; + } + // The size input tensor must be constant. + if ((node->inputs->size < 2) || + (context->tensors[node->inputs->data[1]].allocation_type != + kTfLiteMmapRo)) { + return nullptr; + } + if (android_sdk_version < kMinSdkVersionForNNAPI12 && + output_dims->data[1] != output_dims->data[2]) { + // Require width == height due to driver differences in NNAPI < 1.2 + return nullptr; + } + auto builtin = + reinterpret_cast(node->builtin_data); if (builtin->align_corners) { // NNAPI does not support align_corners == true. return nullptr; } + if (android_sdk_version < kMinSdkVersionForNNAPI12 && + input.type != kTfLiteFloat32) { + // NNAPI 1.0 & 1.1 only supports float input. + return nullptr; + } return [](const NNAPIOpMappingArgs& mapping_args) -> ANeuralNetworksOperationType { - const TfLiteTensor& new_shape = - mapping_args.context->tensors[mapping_args.node->inputs->data[1]]; - // NNAPI uses scalar inputs for height and width. - mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]); - mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]); - mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format - - return ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR; + const int output_id = mapping_args.node->outputs->data[0]; + auto& output = mapping_args.context->tensors[output_id]; + const int output_height = output.dims->data[1]; + const int output_width = output.dims->data[2]; + mapping_args.builder->AddScalarInt32Operand(output_width); + mapping_args.builder->AddScalarInt32Operand(output_height); + return ANEURALNETWORKS_RESIZE_BILINEAR; }; - } break; - case kTfLiteBuiltinSqueeze: - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11) { - auto builtin = - reinterpret_cast(node->builtin_data); - if (android_sdk_version == kMinSdkVersionForNNAPI11 && - builtin->num_squeeze_dims == 0) { - // NNAPI 1.1 does not support null squeeze_dims properly. - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - // Note that we add the squeeze dimensions even if the dimensions - // were unspecified (empty), as NNAPI requires the operand. - mapping_args.builder->AddVectorInt32Operand( - builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr, - static_cast(builtin->num_squeeze_dims)); - return ANEURALNETWORKS_SQUEEZE; - }; + } + break; + case kTfLiteBuiltinResizeNearestNeighbor: { + if (version > 2 || android_sdk_version < kMinSdkVersionForNNAPI12) { + return nullptr; + } + if (!IsFloatOrQuant8Operator(context, node)) { + return nullptr; + } + auto builtin = reinterpret_cast( + node->builtin_data); + if (builtin->align_corners) { + // NNAPI does not support align_corners == true. + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + const TfLiteTensor& new_shape = + mapping_args.context->tensors[mapping_args.node->inputs->data[1]]; + // NNAPI uses scalar inputs for height and width. + mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]); + mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]); + mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format + + return ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR; + }; + } break; + case kTfLiteBuiltinSqueeze: + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11) { + auto builtin = + reinterpret_cast(node->builtin_data); + if (android_sdk_version == kMinSdkVersionForNNAPI11 && + builtin->num_squeeze_dims == 0) { + // NNAPI 1.1 does not support null squeeze_dims properly. + return nullptr; } - break; - case kTfLiteBuiltinUnidirectionalSequenceLstm: - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12) { - if (IsHybridOperator(context, builtin_code, node)) { - // Hybrid version of this op is not supported by NN API. - return nullptr; - } - if (node->inputs->size != 20 && node->inputs->size != 24) { - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = - reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip); - mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip); - mapping_args.builder->AddScalarBoolOperand(builtin->time_major); - const bool hybrid_op = IsHybridOperator( - mapping_args.context, kTfLiteBuiltinUnidirectionalSequenceLstm, - mapping_args.node); - if (mapping_args.node->inputs->size == 24) { - // Add layer normalization tensors if they are provided. - for (int i = 20; i < 24; ++i) { - const int input_index = mapping_args.node->inputs->data[i]; - if (input_index != kOptionalTensor) { - mapping_args.builder->AddTensorInput(input_index, hybrid_op); - } else { - mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); - } - } - } else { - for (int i = 0; i < 4; ++i) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + // Note that we add the squeeze dimensions even if the dimensions + // were unspecified (empty), as NNAPI requires the operand. + mapping_args.builder->AddVectorInt32Operand( + builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr, + static_cast(builtin->num_squeeze_dims)); + return ANEURALNETWORKS_SQUEEZE; + }; + } + break; + case kTfLiteBuiltinUnidirectionalSequenceLstm: + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12) { + if (IsHybridOperator(context, builtin_code, node)) { + // Hybrid version of this op is not supported by NN API. + return nullptr; + } + if (node->inputs->size != 20 && node->inputs->size != 24) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = + reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip); + mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip); + mapping_args.builder->AddScalarBoolOperand(builtin->time_major); + const bool hybrid_op = IsHybridOperator( + mapping_args.context, kTfLiteBuiltinUnidirectionalSequenceLstm, + mapping_args.node); + if (mapping_args.node->inputs->size == 24) { + // Add layer normalization tensors if they are provided. + for (int i = 20; i < 24; ++i) { + const int input_index = mapping_args.node->inputs->data[i]; + if (input_index != kOptionalTensor) { + mapping_args.builder->AddTensorInput(input_index, hybrid_op); + } else { mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); } } - - return ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM; - }; - } - break; - case kTfLiteBuiltinL2Normalization: { - if (version <= 2) { - const auto& input = context->tensors[node->inputs->data[0]]; - if (android_sdk_version < kMinSdkVersionForNNAPI12 && - (!IsFloatOperator(context, node) || input.dims->size != 4)) { - return nullptr; - } - auto builtin = - reinterpret_cast(node->builtin_data); - if (builtin->activation == kTfLiteActNone) { - return BasicMappingFn; - } - } - break; - } - case kTfLiteBuiltinLocalResponseNormalization: - if (version == 1) { - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->radius); - mapping_args.builder->AddScalarFloat32Operand(builtin->bias); - mapping_args.builder->AddScalarFloat32Operand(builtin->alpha); - mapping_args.builder->AddScalarFloat32Operand(builtin->beta); - return ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION; - }; - } - break; - case kTfLiteBuiltinLshProjection: - if (version == 1) { - if (reinterpret_cast(node->builtin_data) - ->type == kTfLiteLshProjectionSparse) { - // NNAPI does not support sparse projection correctly pre-Q - // (b/111751836). - if (android_sdk_version < kMinSdkVersionForNNAPI12) { - return nullptr; - } - // NNAPI does not support weights for sparse projects. - if (node->inputs->size != 2) { - return nullptr; - } - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - int type = builtin->type; - // In Android Q+, NNAPI uses 3 to denote - // kTfLiteLshProjectionSparse. - const int kNNAPILshProjectionSparse = 3; - if (builtin->type == kTfLiteLshProjectionSparse) { - type = kNNAPILshProjectionSparse; - // Add NNAPI null weight operand. - mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); - } - mapping_args.builder->AddScalarInt32Operand(type); - return ANEURALNETWORKS_LSH_PROJECTION; - }; - } - break; - case kTfLiteBuiltinConcatenation: - if (version <= 2 && - reinterpret_cast(node->builtin_data) - ->activation == kTfLiteActNone && - context->tensors[node->inputs->data[0]].dims->size <= 4) { - if (context->tensors[node->inputs->data[0]].type == kTfLiteUInt8 && - android_sdk_version < kMinSdkVersionForNNAPI12) { - // NNAPI 1.0-1 only supported concatenating quantized tensor of - // the same scale and offset. - auto first_param = context->tensors[node->inputs->data[0]].params; - for (int i = 1; i < node->inputs->size; i++) { - auto curr_param = context->tensors[node->inputs->data[i]].params; - if (curr_param.scale != first_param.scale || - curr_param.zero_point != first_param.zero_point) { - return nullptr; - } - } - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - int axis = - builtin->axis < 0 - ? mapping_args.context - ->tensors[mapping_args.node->inputs->data[0]] - .dims->size + - builtin->axis - : builtin->axis; - mapping_args.builder->AddScalarInt32Operand(axis); - return ANEURALNETWORKS_CONCATENATION; - }; - } - break; - case kTfLiteBuiltinDequantize: - if (version == 1 || version == 2) { - const auto& input = context->tensors[node->inputs->data[0]]; - if (input.type == kTfLiteFloat16) { - return nullptr; - } - const auto zero_point = input.params.zero_point; - // NN API supports int8 type since version 1.2 but only for - // symmetric quantization. - if (input.type == kTfLiteInt8 && - (zero_point != 0 || - android_sdk_version < kMinSdkVersionForNNAPI12)) { - return nullptr; - } - return BasicMappingFn; - } - break; - case kTfLiteBuiltinFloor: - if (version == 1) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinRelu: - if (version == 1) { - if (!IsFloatOrQuant8Operator(context, node)) { - return nullptr; - } - return BasicMappingFn; - } - break; - case kTfLiteBuiltinReluN1To1: - if (version == 1) { - if (!IsFloatOrQuant8Operator(context, node)) { - return nullptr; - } - return BasicMappingFn; - } - break; - case kTfLiteBuiltinRelu6: - if (version == 1) { - if (!IsFloatOrQuant8Operator(context, node)) { - return nullptr; - } - return BasicMappingFn; - } - break; - case kTfLiteBuiltinLogistic: - if (version <= 2) { - if (!IsFloatOrQuant8Operator(context, node)) { - return nullptr; - } - return BasicMappingFn; - } - break; - case kTfLiteBuiltinTanh: - if (version <= 2) { - const TfLiteType input_type = - context->tensors[node->inputs->data[0]].type; - if (IsFloat(input_type) || - (IsQuantized(input_type) && - android_sdk_version >= kMinSdkVersionForNNAPI12)) { - // NNAPI only support float tanh. - return BasicMappingFn; - } - } - break; - case kTfLiteBuiltinSub: - if (version <= 2) { - const TfLiteType input_type = - context->tensors[node->inputs->data[0]].type; - if ((android_sdk_version >= kMinSdkVersionForNNAPI11 && - IsFloat(input_type)) || - (android_sdk_version >= kMinSdkVersionForNNAPI12 && - IsQuantized(input_type))) { - // NNAPI only support float sub. - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - return ANEURALNETWORKS_SUB; - }; - } - } - break; - case kTfLiteBuiltinDiv: - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 && - context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) { - // NNAPI only support float div. - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - return ANEURALNETWORKS_DIV; - }; - } - break; - case kTfLiteBuiltinPad: - case kTfLiteBuiltinPadv2: { - if (version <= 2 && IsFloatOrQuant8Operator(context, node)) { - const TfLiteIntArrayView input_shape( - context->tensors[node->inputs->data[0]].dims); - if (HasZeroes(input_shape)) { - // NN API pad ops do not support input tensors with no elements - return nullptr; - } - if (node->inputs->size == 2 && - android_sdk_version >= kMinSdkVersionForNNAPI11 && - (context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 || - android_sdk_version >= kMinSdkVersionForNNAPI12)) { - // NNAPI does not support specifying the padding value. - // Before 1.2, NNAPI pads physical zero for quantized tensors, so - // only delegate float pad to NNAPI. NNAPI 1.2 onwards pads with - // zero-point, so delegate quantized pad as well. - return BasicMappingFn; - } else if (node->inputs->size == 3 && - android_sdk_version >= kMinSdkVersionForNNAPI12) { - const int constant_value_id = node->inputs->data[2]; - if (constant_value_id == kOptionalTensor) { - return BasicMappingFn; - } - return BasicMappingFn; - } - } - } break; - case kTfLiteBuiltinUnidirectionalSequenceRnn: - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12) { - if (IsHybridOperator(context, builtin_code, node)) { - // Hybrid version of this op is not supported by NN API. - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - mapping_args.builder->AddScalarInt32Operand(builtin->time_major); - return ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN; - }; - } - break; - case kTfLiteBuiltinSpaceToBatchNd: - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI11) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinBatchToSpaceNd: - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI11) { - auto crops = context->tensors[node->inputs->data[2]]; - auto crops_data = crops.data.i32; - // Check if all crops are 0. - if (!crops_data || crops.bytes != 16 || crops_data[0] != 0 || - crops_data[1] != 0 || crops_data[2] != 0 || crops_data[3] != 0) { - return nullptr; - } - return BasicMappingFn; - } - break; - case kTfLiteBuiltinStridedSlice: - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI11) { - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask); - mapping_args.builder->AddScalarInt32Operand(builtin->end_mask); - mapping_args.builder->AddScalarInt32Operand( - builtin->shrink_axis_mask); - return ANEURALNETWORKS_STRIDED_SLICE; - }; - } - break; - case kTfLiteBuiltinTranspose: - // Note that the permutation input tensor value dictates the output - // dimensions. - // TODO(b/110888333): Support dynamically-sized tensors in delegates. - if ((version <= 2) && - (android_sdk_version >= kMinSdkVersionForNNAPI11) && - (node->inputs->size > 1) && - (context->tensors[node->inputs->data[1]].allocation_type == - kTfLiteMmapRo)) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinAbs: - // NN API only supports float inputs to this op. - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - IsFloat(context->tensors[node->inputs->data[0]].type)) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinExp: - // NN API only supports float inputs to this op. - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - IsFloat(context->tensors[node->inputs->data[0]].type)) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinLog: - // NN API only supports float inputs to this op. - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - IsFloat(context->tensors[node->inputs->data[0]].type)) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinRsqrt: - // NN API only supports float inputs to this op. - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - IsFloatOperator(context, node)) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinPow: - // NN API only supports float inputs to this op. - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - IsFloat(context->tensors[node->inputs->data[0]].type)) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinSlice: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - const auto begin_type = context->tensors[node->inputs->data[1]].type; - const auto size_type = context->tensors[node->inputs->data[2]].type; - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteInt32 || - input_type == kTfLiteUInt8 || input_type == kTfLiteInt8) && - begin_type == kTfLiteInt32 && size_type == kTfLiteInt32) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinSin: - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - IsFloat(context->tensors[node->inputs->data[0]].type)) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinTransposeConv: - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12) { - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - const bool hybrid_op = IsHybridOperator(mapping_args.context, - kTfLiteBuiltinTransposeConv, - mapping_args.node); - mapping_args.builder->AddTensorInput(/*kDataInputTensor*/ 2, - hybrid_op); - mapping_args.builder->AddTensorInput(/*kWeightsTensor*/ 1, - hybrid_op); - - // NNAPI requires a bias tensor, so we allocate a new tensor to fill - // it with zeroes. It is deleted with other tensors in the context - // during subgraph destructor call. - int bias_index = -1; - mapping_args.context->AddTensors(mapping_args.context, 1, - &bias_index); - TfLiteTensor* bias_tensor = - &mapping_args.context->tensors[bias_index]; - const auto input_type = - mapping_args.context - ->tensors[mapping_args.node->inputs - ->data[/*kDataInputTensor*/ 2]] - .type; - if (input_type == kTfLiteFloat32) { - bias_tensor->type = kTfLiteFloat32; - } else { - bias_tensor->type = kTfLiteInt32; - } - - // Create an array with a required bias shape and resize the bias - // tensor. - TfLiteIntArray* bias_shape = TfLiteIntArrayCreate(1); - const TfLiteTensor& output_shape = - mapping_args.context->tensors - [mapping_args.node->inputs->data[/*kOutputShapeTensor*/ 0]]; - const int output_depth = output_shape.data.i32[3]; - bias_shape->data[0] = output_depth; - bias_tensor->allocation_type = kTfLiteDynamic; - mapping_args.context->ResizeTensor(mapping_args.context, - bias_tensor, bias_shape); - - // Set tensor's values to zeroes and add it using AddVector*, so - // that the values are copied to NNAPI. We don't use the AddTensor - // function because it doesn't copy values and the tensor we just - // created is not in the node->inputs. - if (input_type == kTfLiteFloat32) { - memset(bias_tensor->data.f, 0, output_depth * sizeof(float)); - mapping_args.builder->AddVectorFloat32Operand(bias_tensor->data.f, - output_depth); - } else { - memset(bias_tensor->data.i32, 0, output_depth * sizeof(int)); - const TfLiteTensor& input_tensor = - mapping_args.context->tensors - [mapping_args.node->inputs->data[/*kDataInputTensor*/ 2]]; - const TfLiteTensor& filter_tensor = - mapping_args.context->tensors - [mapping_args.node->inputs->data[/*kWeightsTensor*/ 1]]; - // NNAPI requires bias scale to be a product of an input scale and - // a filter scale. - bias_tensor->params.scale = - input_tensor.params.scale * filter_tensor.params.scale; - mapping_args.builder->AddVectorInt32Operand( - bias_tensor->data.i32, output_depth, - input_tensor.params.scale * filter_tensor.params.scale, - /*zero_point=*/0); - } - - mapping_args.builder->AddTensorInput(/*kOutputShapeTensor*/ 0, - hybrid_op); - - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->padding); - mapping_args.builder->AddScalarInt32Operand(builtin->stride_width); - mapping_args.builder->AddScalarInt32Operand(builtin->stride_height); - mapping_args.builder->AddScalarInt32Operand( - /*ANEURALNETWORKS_FUSED_NONE*/ 0); - // Use NHWC layout for input and output - mapping_args.builder->AddScalarBoolOperand(false); - return ANEURALNETWORKS_TRANSPOSE_CONV; - }; - } - break; - case kTfLiteBuiltinSqrt: - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - IsFloat(context->tensors[node->inputs->data[0]].type)) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinRnn: - // NNAPI only support float32 weights. - if (version == 1 && node->inputs->size == 5 && - context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type == - kTfLiteFloat32) { - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - // NNAPI need both state_in and state_out. - int ann_index; - mapping_args.builder->AddStateFloat32Tensor( - mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4], - &ann_index); - mapping_args.model_state_outputs->push_back(ann_index); - mapping_args.model_state_tfl_inputs->push_back( - mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]); - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - return ANEURALNETWORKS_RNN; - }; - } - break; - case kTfLiteBuiltinSpaceToDepth: { - const TfLiteType input_type = - context->tensors[node->inputs->data[0]].type; - if (version <= 2 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8)) { - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->block_size); - return ANEURALNETWORKS_SPACE_TO_DEPTH; - }; - } - } break; - case kTfLiteBuiltinSvdf: - // NNAPI only support float32 weights. - // Only delegate to NNAPI 1.1, as SVDF does not support rank > 1 - // on 1.0. - if (version == 1 && node->inputs->size == 5 && - android_sdk_version >= kMinSdkVersionForNNAPI11 && - context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]] - .type == kTfLiteFloat32) { - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - // NNAPI need both state_in and state_out. - int ann_index; - mapping_args.builder->AddStateFloat32Tensor( - mapping_args.node->inputs - ->data[/*kInputActivationStateTensor*/ 4], - &ann_index); - mapping_args.model_state_outputs->push_back(ann_index); - mapping_args.model_state_tfl_inputs->push_back( - mapping_args.node->inputs - ->data[/*kInputActivationStateTensor*/ 4]); - - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->rank); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - return ANEURALNETWORKS_SVDF; - }; - } - break; - case kTfLiteBuiltinLstm: - // TODO(miaowang): add loggings to indicate why the op is rejected. - if (version <= 3) { - if (android_sdk_version < kMinSdkVersionForNNAPI11) { - // Only delegate to NNAPI 1.1+, as 1.0 has a bug for optional - // tensors which would affect LSTM. - return nullptr; - } - if (android_sdk_version < kMinSdkVersionForNNAPI12 && - IsHybridOperator(context, builtin_code, node)) { - // Hybrid operators not supported before NNAPI 1.2. - return nullptr; - } - - const auto weight_input_index = - isLstmBasicKernel(node) - ? 2 /* basic::kInputWeights */ - : 4 /* full::kInputToOutputWeightsTensor */; - - const TfLiteType weight_type = - context->tensors[node->inputs->data[weight_input_index]].type; - - if (isLstmBasicKernel(node)) { - if (weight_type != kTfLiteUInt8) { - return nullptr; - } - const auto input_quantization_params = - context->tensors[node->inputs->data[0]].params; - if (input_quantization_params.scale != 1. / 128. || - input_quantization_params.zero_point != 128) { - return nullptr; - } - - const auto output_quantization_params = - context->tensors[node->outputs->data[0]].params; - if (output_quantization_params.scale != 1. / 128. || - output_quantization_params.zero_point != 128) { - return nullptr; - } - - const auto cell_state_quantization_params = - context->tensors[node->outputs->data[1]].params; - if (cell_state_quantization_params.scale != 16. / 32768. || - cell_state_quantization_params.zero_point != 0) { - return nullptr; - } - - auto is_const_tensor = [&node, &context](int tensor_idx) { - return context->tensors[node->inputs->data[tensor_idx]] - .allocation_type == kTfLiteMmapRo; - }; - - if (!is_const_tensor(2 /* kInputWeights */)) { - return nullptr; - } - - if (!is_const_tensor(3 /* kInputBiases */)) { - return nullptr; - } - - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - const auto output_dims = - mapping_args.context - ->tensors[mapping_args.node->outputs->data[1]] - .dims; - - // Inputs kInputData - mapping_args.builder->AddTensorInput( - mapping_args.node->inputs->data[0 /* kInputData */], - /* hybrid_op */ false, - /* scalar_as_tensor */ false); - - // The 8 weights tensors are set decomposing the - // kInputWeights param - const auto weight_tensor = - mapping_args.context->tensors - [mapping_args.node->inputs->data[2 /* kInputWeights */]]; - - std::vector recurrent_to_input; - std::vector input_to_input; - std::vector recurrent_to_cell; - std::vector input_to_cell; - std::vector recurrent_to_forget; - std::vector input_to_forget; - std::vector recurrent_to_output; - std::vector input_to_output; - tflite::delegate::nnapi::DecomposeQuantLstmWeightsTensor( - weight_tensor.data.uint8, weight_tensor.dims, - &recurrent_to_input, &input_to_input, &recurrent_to_cell, - &input_to_cell, &recurrent_to_forget, &input_to_forget, - &recurrent_to_output, &input_to_output); - - TfLiteIntArray* recurrent_weight_dims = TfLiteIntArrayCreate(2); - TfLiteIntArray* input_weight_dims = TfLiteIntArrayCreate(2); - tflite::delegate::nnapi::SetWeightSubmatrixDims( - weight_tensor.dims, recurrent_weight_dims, input_weight_dims); - - int new_tensor_index = -1; - - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, - input_weight_dims, input_to_input, weight_tensor.params, - &new_tensor_index); - - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, - input_weight_dims, input_to_forget, weight_tensor.params, - &new_tensor_index); - - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, - input_weight_dims, input_to_cell, weight_tensor.params, - &new_tensor_index); - - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, - input_weight_dims, input_to_output, weight_tensor.params, - &new_tensor_index); - - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, - recurrent_weight_dims, recurrent_to_input, - weight_tensor.params, &new_tensor_index); - - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, - recurrent_weight_dims, recurrent_to_forget, - weight_tensor.params, &new_tensor_index); - - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, - recurrent_weight_dims, recurrent_to_cell, - weight_tensor.params, &new_tensor_index); - - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, - recurrent_weight_dims, recurrent_to_output, - weight_tensor.params, &new_tensor_index); - - TfLiteIntArrayFree(input_weight_dims); - TfLiteIntArrayFree(recurrent_weight_dims); - - // Biases have to be split in four - const auto bias_size = output_dims->data[1]; - const TfLiteTensor& biases_tensor = - mapping_args.context->tensors - [mapping_args.node->inputs->data[3 /* kInputBiases */]]; - - std::vector input_bias; - std::vector cell_bias; - std::vector forget_bias; - std::vector output_bias; - delegate::nnapi::DecomposeBiasTensor( - biases_tensor.data.i32, bias_size, &input_bias, &cell_bias, - &forget_bias, &output_bias); - - int input_bias_tensor = -1; - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, - input_bias, biases_tensor.params, &input_bias_tensor); - int forget_bias_tensor = -1; - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, - forget_bias, biases_tensor.params, &forget_bias_tensor); - int cell_gate_bias_tensor = -1; - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, - cell_bias, biases_tensor.params, &cell_gate_bias_tensor); - int output_gate_bias_tensor = -1; - mapping_args.builder->AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, - output_bias, biases_tensor.params, &output_gate_bias_tensor); - - mapping_args.builder->AddTensorInput( - mapping_args.node->inputs->data[4 /* kInputPrevState */], - /* hybrid_op */ false, - /* scalar_as_tensor */ false); - - // kInputPrevActivation - mapping_args.builder->AddTensorInput( - mapping_args.node->inputs->data[1 /* kInputPrevActivation */], - /* hybrid_op */ false, - /* scalar_as_tensor */ false); - - // Configuring the copy from the activation, state outputs - // to their associated inputs - mapping_args.feedback_loops->push_back(std::make_tuple( - 0 /*kOutputActivation*/, 1 /*kInputPrevActivation*/)); - - mapping_args.feedback_loops->push_back( - std::make_tuple(1 /*kOutputState*/, 4 /*kInputPrevState*/)); - - // OUTPUTS - // Setting only the first two since the remaining ones are - // ignored by NNAPI - mapping_args.builder->AddTensorOutput( - mapping_args.node->outputs->data[1 /* kOutputState */], 0); - - mapping_args.builder->AddTensorOutput( - mapping_args.node->outputs - ->data[0 /* kOutputkOutputActivationState */], - 0); - - return ANEURALNETWORKS_QUANTIZED_16BIT_LSTM; - }; - } - if (node->inputs->size == 24 && - android_sdk_version < kMinSdkVersionForNNAPI12) { - // LSTM with layer norm introduced in API level 29 - return nullptr; - } - if (weight_type != kTfLiteFloat32 && weight_type != kTfLiteUInt8) { - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip); - mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip); - - // Current NNAPI implementation requires the scratch_buffer as - // output. - mapping_args.builder->AddAdditionalFloat32OutputTensor(2); - - // NNAPI need both state_in and state_out for cell_state and - // output_state. - int ann_index; - mapping_args.builder->AddStateFloat32Tensor( - mapping_args.node->inputs - ->data[/*kInputActivationStateTensor*/ 18], - &ann_index); - mapping_args.model_state_outputs->push_back(ann_index); - mapping_args.model_state_tfl_inputs->push_back( - mapping_args.node->inputs - ->data[/*kInputActivationStateTensor*/ 18]); - mapping_args.builder->AddStateFloat32Tensor( - mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19], - &ann_index); - mapping_args.model_state_outputs->push_back(ann_index); - mapping_args.model_state_tfl_inputs->push_back( - mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]); - - const bool hybrid_op = IsHybridOperator( - mapping_args.context, kTfLiteBuiltinLstm, mapping_args.node); - - if (mapping_args.node->inputs->size == 24) { - for (int i = 20; i < 24; ++i) { - const auto input_index = mapping_args.node->inputs->data[i]; - if (input_index != kOptionalTensor) { - mapping_args.builder->AddTensorInput(input_index, hybrid_op); - } else { - mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); - } - } - } - - return ANEURALNETWORKS_LSTM; - }; - } - break; - case kTfLiteBuiltinMean: - // NNAPI does not support generating a scalar as output for MEAN. - if (version <= 2 && - ((android_sdk_version >= kMinSdkVersionForNNAPI11 && - context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) || - (android_sdk_version >= kMinSdkVersionForNNAPI12 && - IsQuantized(context->tensors[node->inputs->data[0]].type))) && - context->tensors[node->outputs->data[0]].dims->size > 0) { - auto input_param = context->tensors[node->inputs->data[0]].params; - auto output_param = context->tensors[node->outputs->data[0]].params; - // NNAPI requires that the input and output have the same - // quantization parameters. - if (input_param.scale != output_param.scale || - input_param.zero_point != output_param.zero_point) { - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - int32_t keep_dims = 0; - if (builtin->keep_dims) keep_dims = 1; - mapping_args.builder->AddScalarInt32Operand(keep_dims); - return ANEURALNETWORKS_MEAN; - }; - } - break; - case kTfLiteBuiltinEmbeddingLookup: - // NNAPI only support float32 values. - if (version == 1 && - context->tensors[node->inputs->data[1]].type == kTfLiteFloat32) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinHashtableLookup: - // NNAPI only support float32 output. - if (version == 1 && - context->tensors[node->outputs->data[0]].type == kTfLiteFloat32) { - return BasicMappingFn; - } - break; - case kTfLiteBuiltinMaximum: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8 || input_type == kTfLiteInt32)) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinMinimum: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8 || input_type == kTfLiteInt32)) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinCast: { - const TfLiteType input_type = - context->tensors[node->inputs->data[0]].type; - const TfLiteType output_type = - context->tensors[node->outputs->data[0]].type; - auto is_supported_tensor_type = [](const TfLiteType& type) { - return (type == kTfLiteFloat32 || type == kTfLiteInt32 || - type == kTfLiteUInt8); - }; - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - is_supported_tensor_type(input_type) && - is_supported_tensor_type(output_type)) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinPrelu: - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12) { - if (!IsFloatOrUint8Operator(context, node)) { - return nullptr; - } - return BasicMappingFn; - } - break; - case kTfLiteBuiltinTile: { - // NN API doesn't support int64 and boolean inputs to this op - const auto input_type = context->tensors[node->inputs->data[0]].type; - const auto multipliers_type = - context->tensors[node->inputs->data[1]].type; - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8 || input_type == kTfLiteInt32) && - (multipliers_type == kTfLiteInt32)) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinLogicalOr: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - input_type == kTfLiteBool) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinLogicalAnd: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - input_type == kTfLiteBool) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinLogicalNot: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - input_type == kTfLiteBool) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinLess: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8 || input_type == kTfLiteBool || - input_type == kTfLiteInt32)) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinLessEqual: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8 || input_type == kTfLiteBool || - input_type == kTfLiteInt32)) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinGreater: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8 || input_type == kTfLiteBool || - input_type == kTfLiteInt32)) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinGreaterEqual: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8 || input_type == kTfLiteBool || - input_type == kTfLiteInt32)) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinEqual: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8 || input_type == kTfLiteBool || - input_type == kTfLiteInt32)) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinNotEqual: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8 || input_type == kTfLiteBool || - input_type == kTfLiteInt32)) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinNeg: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat32 || input_type == kTfLiteInt32)) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinTopkV2: { - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12) { - const auto& input = context->tensors[node->outputs->data[0]]; - const auto& k_param = context->tensors[node->outputs->data[1]]; - if ((input.type == kTfLiteFloat32 || input.type == kTfLiteInt32 || - input.type == kTfLiteUInt8 || input.type == kTfLiteInt8) && - (k_param.type == kTfLiteInt32 && - k_param.allocation_type == kTfLiteMmapRo)) { - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - const TfLiteTensor& k_param = - mapping_args.context - ->tensors[mapping_args.node->inputs->data[1]]; - mapping_args.builder->AddScalarInt32Operand(*k_param.data.i32); - return ANEURALNETWORKS_TOPK_V2; - }; } else { - return nullptr; - } - } - } break; - case kTfLiteBuiltinSelect: { - const auto value_type = context->tensors[node->inputs->data[1]].type; - if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (value_type == kTfLiteFloat32 || value_type == kTfLiteUInt8 || - value_type == kTfLiteInt8 || value_type == kTfLiteInt32)) { - TfLiteIntArray* condition_shape = - context->tensors[node->inputs->data[0]].dims; - TfLiteIntArray* input_shape = - context->tensors[node->inputs->data[1]].dims; - // The Android Q-variant of select does not support broadcasting. - if (!TfLiteIntArrayEqual(condition_shape, input_shape)) { - return nullptr; - } - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinGather: { - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12) { - const auto& input = context->tensors[node->inputs->data[0]]; - const auto& positions = context->tensors[node->inputs->data[1]]; - - auto is_supported_input_type = [](const TfLiteTensor& t) { - return (t.type == kTfLiteFloat32 || t.type == kTfLiteFloat16 || - t.type == kTfLiteInt32 || t.type == kTfLiteUInt8); - }; - - if (!is_supported_input_type(input) || - !is_supported_input_type(positions)) { - return nullptr; - } - - // 0-dimension args are not supported by NNAPI. - if (positions.dims->size == 0) { - return nullptr; - } - - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddTensorInput( - mapping_args.node->inputs->data[0], - /* hybrid_op */ false, - /* scalar_as_tensor */ false); - - mapping_args.builder->AddScalarInt32Operand(builtin->axis); - - mapping_args.builder->AddTensorInput( - mapping_args.node->inputs->data[1], - /* hybrid_op */ false, - /* scalar_as_tensor */ false); - - return ANEURALNETWORKS_GATHER; - }; - } - } break; - case kTfLiteBuiltinBidirectionalSequenceLstm: - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12) { - if (IsHybridOperator(context, builtin_code, node)) { - // Hybrid version of this op is not supported by NN API. - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = - reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(builtin->activation); - mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip); - mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip); - mapping_args.builder->AddScalarBoolOperand(builtin->merge_outputs); - mapping_args.builder->AddScalarBoolOperand(builtin->time_major); - // TF Lite doesn't support layer normalization in bidirectional - // sequence LSTM, so we insert optional tensors for NNAPI - for (int i = 0; i < 8; ++i) { + for (int i = 0; i < 4; ++i) { mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); } - return ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM; - }; + } + + return ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM; + }; + } + break; + case kTfLiteBuiltinL2Normalization: { + if (version <= 2) { + const auto& input = context->tensors[node->inputs->data[0]]; + if (android_sdk_version < kMinSdkVersionForNNAPI12 && + (!IsFloatOperator(context, node) || input.dims->size != 4)) { + return nullptr; } - break; - case kTfLiteBuiltinExpandDims: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - const auto axis = context->tensors[node->inputs->data[1]]; - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input_type == kTfLiteFloat16 || input_type == kTfLiteFloat32 || - input_type == kTfLiteInt32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8) && - // TFLite supports axis also as int64 but NNAPI only int32 - (axis.type == kTfLiteInt32 && - axis.allocation_type == kTfLiteMmapRo)) { + auto builtin = + reinterpret_cast(node->builtin_data); + if (builtin->activation == kTfLiteActNone) { + return BasicMappingFn; + } + } + break; + } + case kTfLiteBuiltinLocalResponseNormalization: + if (version == 1) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->radius); + mapping_args.builder->AddScalarFloat32Operand(builtin->bias); + mapping_args.builder->AddScalarFloat32Operand(builtin->alpha); + mapping_args.builder->AddScalarFloat32Operand(builtin->beta); + return ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION; + }; + } + break; + case kTfLiteBuiltinLshProjection: + if (version == 1) { + if (reinterpret_cast(node->builtin_data) + ->type == kTfLiteLshProjectionSparse) { + // NNAPI does not support sparse projection correctly pre-Q + // (b/111751836). + if (android_sdk_version < kMinSdkVersionForNNAPI12) { + return nullptr; + } + // NNAPI does not support weights for sparse projects. + if (node->inputs->size != 2) { + return nullptr; + } + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + int type = builtin->type; + // In Android Q+, NNAPI uses 3 to denote + // kTfLiteLshProjectionSparse. + const int kNNAPILshProjectionSparse = 3; + if (builtin->type == kTfLiteLshProjectionSparse) { + type = kNNAPILshProjectionSparse; + // Add NNAPI null weight operand. + mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); + } + mapping_args.builder->AddScalarInt32Operand(type); + return ANEURALNETWORKS_LSH_PROJECTION; + }; + } + break; + case kTfLiteBuiltinConcatenation: + if (version <= 2 && + reinterpret_cast(node->builtin_data) + ->activation == kTfLiteActNone && + context->tensors[node->inputs->data[0]].dims->size <= 4) { + if (context->tensors[node->inputs->data[0]].type == kTfLiteUInt8 && + android_sdk_version < kMinSdkVersionForNNAPI12) { + // NNAPI 1.0-1 only supported concatenating quantized tensor of + // the same scale and offset. + auto first_param = context->tensors[node->inputs->data[0]].params; + for (int i = 1; i < node->inputs->size; i++) { + auto curr_param = context->tensors[node->inputs->data[i]].params; + if (curr_param.scale != first_param.scale || + curr_param.zero_point != first_param.zero_point) { + return nullptr; + } + } + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + int axis = builtin->axis < 0 + ? mapping_args.context + ->tensors[mapping_args.node->inputs->data[0]] + .dims->size + + builtin->axis + : builtin->axis; + mapping_args.builder->AddScalarInt32Operand(axis); + return ANEURALNETWORKS_CONCATENATION; + }; + } + break; + case kTfLiteBuiltinDequantize: + if (version == 1 || version == 2) { + const auto& input = context->tensors[node->inputs->data[0]]; + if (input.type == kTfLiteFloat16) { + return nullptr; + } + const auto zero_point = input.params.zero_point; + // NN API supports int8 type since version 1.2 but only for + // symmetric quantization. + if (input.type == kTfLiteInt8 && + (zero_point != 0 || + android_sdk_version < kMinSdkVersionForNNAPI12)) { + return nullptr; + } + return BasicMappingFn; + } + break; + case kTfLiteBuiltinFloor: + if (version == 1) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinRelu: + if (version == 1) { + if (!IsFloatOrQuant8Operator(context, node)) { + return nullptr; + } + return BasicMappingFn; + } + break; + case kTfLiteBuiltinReluN1To1: + if (version == 1) { + if (!IsFloatOrQuant8Operator(context, node)) { + return nullptr; + } + return BasicMappingFn; + } + break; + case kTfLiteBuiltinRelu6: + if (version == 1) { + if (!IsFloatOrQuant8Operator(context, node)) { + return nullptr; + } + return BasicMappingFn; + } + break; + case kTfLiteBuiltinLogistic: + if (version <= 2) { + if (!IsFloatOrQuant8Operator(context, node)) { + return nullptr; + } + return BasicMappingFn; + } + break; + case kTfLiteBuiltinTanh: + if (version <= 2) { + const TfLiteType input_type = + context->tensors[node->inputs->data[0]].type; + if (IsFloat(input_type) || + (IsQuantized(input_type) && + android_sdk_version >= kMinSdkVersionForNNAPI12)) { + // NNAPI only support float tanh. + return BasicMappingFn; + } + } + break; + case kTfLiteBuiltinSub: + if (version <= 2) { + const TfLiteType input_type = + context->tensors[node->inputs->data[0]].type; + if ((android_sdk_version >= kMinSdkVersionForNNAPI11 && + IsFloat(input_type)) || + (android_sdk_version >= kMinSdkVersionForNNAPI12 && + IsQuantized(input_type))) { + // NNAPI only support float sub. return [](const NNAPIOpMappingArgs& mapping_args) -> ANeuralNetworksOperationType { - const TfLiteTensor& axis_param = + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + return ANEURALNETWORKS_SUB; + }; + } + } + break; + case kTfLiteBuiltinDiv: + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 && + context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) { + // NNAPI only support float div. + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + return ANEURALNETWORKS_DIV; + }; + } + break; + case kTfLiteBuiltinPad: + case kTfLiteBuiltinPadv2: { + if (version <= 2 && IsFloatOrQuant8Operator(context, node)) { + const TfLiteIntArrayView input_shape( + context->tensors[node->inputs->data[0]].dims); + if (HasZeroes(input_shape)) { + // NN API pad ops do not support input tensors with no elements + return nullptr; + } + if (node->inputs->size == 2 && + android_sdk_version >= kMinSdkVersionForNNAPI11 && + (context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 || + android_sdk_version >= kMinSdkVersionForNNAPI12)) { + // NNAPI does not support specifying the padding value. + // Before 1.2, NNAPI pads physical zero for quantized tensors, so + // only delegate float pad to NNAPI. NNAPI 1.2 onwards pads with + // zero-point, so delegate quantized pad as well. + return BasicMappingFn; + } else if (node->inputs->size == 3 && + android_sdk_version >= kMinSdkVersionForNNAPI12) { + const int constant_value_id = node->inputs->data[2]; + if (constant_value_id == kOptionalTensor) { + return BasicMappingFn; + } + return BasicMappingFn; + } + } + } break; + case kTfLiteBuiltinUnidirectionalSequenceRnn: + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12) { + if (IsHybridOperator(context, builtin_code, node)) { + // Hybrid version of this op is not supported by NN API. + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + mapping_args.builder->AddScalarInt32Operand(builtin->time_major); + return ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN; + }; + } + break; + case kTfLiteBuiltinSpaceToBatchNd: + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI11) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinBatchToSpaceNd: + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI11) { + auto crops = context->tensors[node->inputs->data[2]]; + auto crops_data = crops.data.i32; + // Check if all crops are 0. + if (!crops_data || crops.bytes != 16 || crops_data[0] != 0 || + crops_data[1] != 0 || crops_data[2] != 0 || crops_data[3] != 0) { + return nullptr; + } + return BasicMappingFn; + } + break; + case kTfLiteBuiltinStridedSlice: + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI11) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask); + mapping_args.builder->AddScalarInt32Operand(builtin->end_mask); + mapping_args.builder->AddScalarInt32Operand( + builtin->shrink_axis_mask); + return ANEURALNETWORKS_STRIDED_SLICE; + }; + } + break; + case kTfLiteBuiltinTranspose: + // Note that the permutation input tensor value dictates the output + // dimensions. + // TODO(b/110888333): Support dynamically-sized tensors in delegates. + if ((version <= 2) && (android_sdk_version >= kMinSdkVersionForNNAPI11) && + (node->inputs->size > 1) && + (context->tensors[node->inputs->data[1]].allocation_type == + kTfLiteMmapRo)) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinAbs: + // NN API only supports float inputs to this op. + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + IsFloat(context->tensors[node->inputs->data[0]].type)) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinExp: + // NN API only supports float inputs to this op. + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + IsFloat(context->tensors[node->inputs->data[0]].type)) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinLog: + // NN API only supports float inputs to this op. + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + IsFloat(context->tensors[node->inputs->data[0]].type)) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinRsqrt: + // NN API only supports float inputs to this op. + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + IsFloatOperator(context, node)) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinPow: + // NN API only supports float inputs to this op. + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + IsFloat(context->tensors[node->inputs->data[0]].type)) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinSlice: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + const auto begin_type = context->tensors[node->inputs->data[1]].type; + const auto size_type = context->tensors[node->inputs->data[2]].type; + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteInt32 || + input_type == kTfLiteUInt8 || input_type == kTfLiteInt8) && + begin_type == kTfLiteInt32 && size_type == kTfLiteInt32) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinSin: + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + IsFloat(context->tensors[node->inputs->data[0]].type)) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinTransposeConv: + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + const bool hybrid_op = + IsHybridOperator(mapping_args.context, + kTfLiteBuiltinTransposeConv, mapping_args.node); + mapping_args.builder->AddTensorInput(/*kDataInputTensor*/ 2, + hybrid_op); + mapping_args.builder->AddTensorInput(/*kWeightsTensor*/ 1, hybrid_op); + + // NNAPI requires a bias tensor, so we allocate a new tensor to fill + // it with zeroes. It is deleted with other tensors in the context + // during subgraph destructor call. + int bias_index = -1; + mapping_args.context->AddTensors(mapping_args.context, 1, + &bias_index); + TfLiteTensor* bias_tensor = + &mapping_args.context->tensors[bias_index]; + const auto input_type = + mapping_args.context + ->tensors[mapping_args.node->inputs + ->data[/*kDataInputTensor*/ 2]] + .type; + if (input_type == kTfLiteFloat32) { + bias_tensor->type = kTfLiteFloat32; + } else { + bias_tensor->type = kTfLiteInt32; + } + + // Create an array with a required bias shape and resize the bias + // tensor. + TfLiteIntArray* bias_shape = TfLiteIntArrayCreate(1); + const TfLiteTensor& output_shape = + mapping_args.context->tensors + [mapping_args.node->inputs->data[/*kOutputShapeTensor*/ 0]]; + const int output_depth = output_shape.data.i32[3]; + bias_shape->data[0] = output_depth; + bias_tensor->allocation_type = kTfLiteDynamic; + mapping_args.context->ResizeTensor(mapping_args.context, bias_tensor, + bias_shape); + + // Set tensor's values to zeroes and add it using AddVector*, so + // that the values are copied to NNAPI. We don't use the AddTensor + // function because it doesn't copy values and the tensor we just + // created is not in the node->inputs. + if (input_type == kTfLiteFloat32) { + memset(bias_tensor->data.f, 0, output_depth * sizeof(float)); + mapping_args.builder->AddVectorFloat32Operand(bias_tensor->data.f, + output_depth); + } else { + memset(bias_tensor->data.i32, 0, output_depth * sizeof(int)); + const TfLiteTensor& input_tensor = + mapping_args.context->tensors + [mapping_args.node->inputs->data[/*kDataInputTensor*/ 2]]; + const TfLiteTensor& filter_tensor = + mapping_args.context->tensors[mapping_args.node->inputs + ->data[/*kWeightsTensor*/ 1]]; + // NNAPI requires bias scale to be a product of an input scale and + // a filter scale. + bias_tensor->params.scale = + input_tensor.params.scale * filter_tensor.params.scale; + mapping_args.builder->AddVectorInt32Operand( + bias_tensor->data.i32, output_depth, + input_tensor.params.scale * filter_tensor.params.scale, + /*zero_point=*/0); + } + + mapping_args.builder->AddTensorInput(/*kOutputShapeTensor*/ 0, + hybrid_op); + + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->padding); + mapping_args.builder->AddScalarInt32Operand(builtin->stride_width); + mapping_args.builder->AddScalarInt32Operand(builtin->stride_height); + mapping_args.builder->AddScalarInt32Operand( + /*ANEURALNETWORKS_FUSED_NONE*/ 0); + // Use NHWC layout for input and output + mapping_args.builder->AddScalarBoolOperand(false); + return ANEURALNETWORKS_TRANSPOSE_CONV; + }; + } + break; + case kTfLiteBuiltinSqrt: + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + IsFloat(context->tensors[node->inputs->data[0]].type)) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinRnn: + // NNAPI only support float32 weights. + if (version == 1 && node->inputs->size == 5 && + context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type == + kTfLiteFloat32) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + // NNAPI need both state_in and state_out. + int ann_index; + mapping_args.builder->AddStateFloat32Tensor( + mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4], + &ann_index); + mapping_args.model_state_outputs->push_back(ann_index); + mapping_args.model_state_tfl_inputs->push_back( + mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]); + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + return ANEURALNETWORKS_RNN; + }; + } + break; + case kTfLiteBuiltinSpaceToDepth: { + const TfLiteType input_type = + context->tensors[node->inputs->data[0]].type; + if (version <= 2 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8)) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->block_size); + return ANEURALNETWORKS_SPACE_TO_DEPTH; + }; + } + } break; + case kTfLiteBuiltinSvdf: + // NNAPI only support float32 weights. + // Only delegate to NNAPI 1.1, as SVDF does not support rank > 1 + // on 1.0. + if (version == 1 && node->inputs->size == 5 && + android_sdk_version >= kMinSdkVersionForNNAPI11 && + context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]] + .type == kTfLiteFloat32) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + // NNAPI need both state_in and state_out. + int ann_index; + mapping_args.builder->AddStateFloat32Tensor( + mapping_args.node->inputs + ->data[/*kInputActivationStateTensor*/ 4], + &ann_index); + mapping_args.model_state_outputs->push_back(ann_index); + mapping_args.model_state_tfl_inputs->push_back( + mapping_args.node->inputs + ->data[/*kInputActivationStateTensor*/ 4]); + + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->rank); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + return ANEURALNETWORKS_SVDF; + }; + } + break; + case kTfLiteBuiltinLstm: + // TODO(miaowang): add loggings to indicate why the op is rejected. + if (version <= 3) { + if (android_sdk_version < kMinSdkVersionForNNAPI11) { + // Only delegate to NNAPI 1.1+, as 1.0 has a bug for optional + // tensors which would affect LSTM. + return nullptr; + } + if (android_sdk_version < kMinSdkVersionForNNAPI12 && + IsHybridOperator(context, builtin_code, node)) { + // Hybrid operators not supported before NNAPI 1.2. + return nullptr; + } + + const auto weight_input_index = + isLstmBasicKernel(node) ? 2 /* basic::kInputWeights */ + : 4 /* full::kInputToOutputWeightsTensor */; + + const TfLiteType weight_type = + context->tensors[node->inputs->data[weight_input_index]].type; + + if (isLstmBasicKernel(node)) { + if (weight_type != kTfLiteUInt8) { + return nullptr; + } + const auto input_quantization_params = + context->tensors[node->inputs->data[0]].params; + if (input_quantization_params.scale != 1. / 128. || + input_quantization_params.zero_point != 128) { + return nullptr; + } + + const auto output_quantization_params = + context->tensors[node->outputs->data[0]].params; + if (output_quantization_params.scale != 1. / 128. || + output_quantization_params.zero_point != 128) { + return nullptr; + } + + const auto cell_state_quantization_params = + context->tensors[node->outputs->data[1]].params; + if (cell_state_quantization_params.scale != 16. / 32768. || + cell_state_quantization_params.zero_point != 0) { + return nullptr; + } + + auto is_const_tensor = [&node, &context](int tensor_idx) { + return context->tensors[node->inputs->data[tensor_idx]] + .allocation_type == kTfLiteMmapRo; + }; + + if (!is_const_tensor(2 /* kInputWeights */)) { + return nullptr; + } + + if (!is_const_tensor(3 /* kInputBiases */)) { + return nullptr; + } + + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + const auto output_dims = + mapping_args.context + ->tensors[mapping_args.node->outputs->data[1]] + .dims; + + // Inputs kInputData + mapping_args.builder->AddTensorInput( + mapping_args.node->inputs->data[0 /* kInputData */], + /* hybrid_op */ false, + /* scalar_as_tensor */ false); + + // The 8 weights tensors are set decomposing the + // kInputWeights param + const auto weight_tensor = + mapping_args.context->tensors + [mapping_args.node->inputs->data[2 /* kInputWeights */]]; + + std::vector recurrent_to_input; + std::vector input_to_input; + std::vector recurrent_to_cell; + std::vector input_to_cell; + std::vector recurrent_to_forget; + std::vector input_to_forget; + std::vector recurrent_to_output; + std::vector input_to_output; + tflite::delegate::nnapi::DecomposeQuantLstmWeightsTensor( + weight_tensor.data.uint8, weight_tensor.dims, + &recurrent_to_input, &input_to_input, &recurrent_to_cell, + &input_to_cell, &recurrent_to_forget, &input_to_forget, + &recurrent_to_output, &input_to_output); + + TfLiteIntArray* recurrent_weight_dims = TfLiteIntArrayCreate(2); + TfLiteIntArray* input_weight_dims = TfLiteIntArrayCreate(2); + tflite::delegate::nnapi::SetWeightSubmatrixDims( + weight_tensor.dims, recurrent_weight_dims, input_weight_dims); + + int new_tensor_index = -1; + + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, + input_weight_dims, input_to_input, weight_tensor.params, + &new_tensor_index); + + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, + input_weight_dims, input_to_forget, weight_tensor.params, + &new_tensor_index); + + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, + input_weight_dims, input_to_cell, weight_tensor.params, + &new_tensor_index); + + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, + input_weight_dims, input_to_output, weight_tensor.params, + &new_tensor_index); + + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, + recurrent_weight_dims, recurrent_to_input, weight_tensor.params, + &new_tensor_index); + + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, + recurrent_weight_dims, recurrent_to_forget, + weight_tensor.params, &new_tensor_index); + + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, + recurrent_weight_dims, recurrent_to_cell, weight_tensor.params, + &new_tensor_index); + + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8, + recurrent_weight_dims, recurrent_to_output, + weight_tensor.params, &new_tensor_index); + + TfLiteIntArrayFree(input_weight_dims); + TfLiteIntArrayFree(recurrent_weight_dims); + + // Biases have to be split in four + const auto bias_size = output_dims->data[1]; + const TfLiteTensor& biases_tensor = + mapping_args.context->tensors[mapping_args.node->inputs + ->data[3 /* kInputBiases */]]; + + std::vector input_bias; + std::vector cell_bias; + std::vector forget_bias; + std::vector output_bias; + delegate::nnapi::DecomposeBiasTensor( + biases_tensor.data.i32, bias_size, &input_bias, &cell_bias, + &forget_bias, &output_bias); + + int input_bias_tensor = -1; + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, + input_bias, biases_tensor.params, &input_bias_tensor); + int forget_bias_tensor = -1; + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, + forget_bias, biases_tensor.params, &forget_bias_tensor); + int cell_gate_bias_tensor = -1; + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, + cell_bias, biases_tensor.params, &cell_gate_bias_tensor); + int output_gate_bias_tensor = -1; + mapping_args.builder->AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, + output_bias, biases_tensor.params, &output_gate_bias_tensor); + + mapping_args.builder->AddTensorInput( + mapping_args.node->inputs->data[4 /* kInputPrevState */], + /* hybrid_op */ false, + /* scalar_as_tensor */ false); + + // kInputPrevActivation + mapping_args.builder->AddTensorInput( + mapping_args.node->inputs->data[1 /* kInputPrevActivation */], + /* hybrid_op */ false, + /* scalar_as_tensor */ false); + + // Configuring the copy from the activation, state outputs + // to their associated inputs + mapping_args.feedback_loops->push_back(std::make_tuple( + 0 /*kOutputActivation*/, 1 /*kInputPrevActivation*/)); + + mapping_args.feedback_loops->push_back( + std::make_tuple(1 /*kOutputState*/, 4 /*kInputPrevState*/)); + + // OUTPUTS + // Setting only the first two since the remaining ones are + // ignored by NNAPI + mapping_args.builder->AddTensorOutput( + mapping_args.node->outputs->data[1 /* kOutputState */], 0); + + mapping_args.builder->AddTensorOutput( + mapping_args.node->outputs + ->data[0 /* kOutputkOutputActivationState */], + 0); + + return ANEURALNETWORKS_QUANTIZED_16BIT_LSTM; + }; + } + if (node->inputs->size == 24 && + android_sdk_version < kMinSdkVersionForNNAPI12) { + // LSTM with layer norm introduced in API level 29 + return nullptr; + } + if (weight_type != kTfLiteFloat32 && weight_type != kTfLiteUInt8) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip); + mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip); + + // Current NNAPI implementation requires the scratch_buffer as + // output. + mapping_args.builder->AddAdditionalFloat32OutputTensor(2); + + // NNAPI need both state_in and state_out for cell_state and + // output_state. + int ann_index; + mapping_args.builder->AddStateFloat32Tensor( + mapping_args.node->inputs + ->data[/*kInputActivationStateTensor*/ 18], + &ann_index); + mapping_args.model_state_outputs->push_back(ann_index); + mapping_args.model_state_tfl_inputs->push_back( + mapping_args.node->inputs + ->data[/*kInputActivationStateTensor*/ 18]); + mapping_args.builder->AddStateFloat32Tensor( + mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19], + &ann_index); + mapping_args.model_state_outputs->push_back(ann_index); + mapping_args.model_state_tfl_inputs->push_back( + mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]); + + const bool hybrid_op = IsHybridOperator( + mapping_args.context, kTfLiteBuiltinLstm, mapping_args.node); + + if (mapping_args.node->inputs->size == 24) { + for (int i = 20; i < 24; ++i) { + const auto input_index = mapping_args.node->inputs->data[i]; + if (input_index != kOptionalTensor) { + mapping_args.builder->AddTensorInput(input_index, hybrid_op); + } else { + mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); + } + } + } + + return ANEURALNETWORKS_LSTM; + }; + } + break; + case kTfLiteBuiltinMean: + // NNAPI does not support generating a scalar as output for MEAN. + if (version <= 2 && + ((android_sdk_version >= kMinSdkVersionForNNAPI11 && + context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) || + (android_sdk_version >= kMinSdkVersionForNNAPI12 && + IsQuantized(context->tensors[node->inputs->data[0]].type))) && + context->tensors[node->outputs->data[0]].dims->size > 0) { + auto input_param = context->tensors[node->inputs->data[0]].params; + auto output_param = context->tensors[node->outputs->data[0]].params; + // NNAPI requires that the input and output have the same + // quantization parameters. + if (input_param.scale != output_param.scale || + input_param.zero_point != output_param.zero_point) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + int32_t keep_dims = 0; + if (builtin->keep_dims) keep_dims = 1; + mapping_args.builder->AddScalarInt32Operand(keep_dims); + return ANEURALNETWORKS_MEAN; + }; + } + break; + case kTfLiteBuiltinEmbeddingLookup: + // NNAPI only support float32 values. + if (version == 1 && + context->tensors[node->inputs->data[1]].type == kTfLiteFloat32) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinHashtableLookup: + // NNAPI only support float32 output. + if (version == 1 && + context->tensors[node->outputs->data[0]].type == kTfLiteFloat32) { + return BasicMappingFn; + } + break; + case kTfLiteBuiltinMaximum: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8 || input_type == kTfLiteInt32)) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinMinimum: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8 || input_type == kTfLiteInt32)) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinCast: { + const TfLiteType input_type = + context->tensors[node->inputs->data[0]].type; + const TfLiteType output_type = + context->tensors[node->outputs->data[0]].type; + auto is_supported_tensor_type = [](const TfLiteType& type) { + return (type == kTfLiteFloat32 || type == kTfLiteInt32 || + type == kTfLiteUInt8); + }; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + is_supported_tensor_type(input_type) && + is_supported_tensor_type(output_type)) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinPrelu: + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12) { + if (!IsFloatOrUint8Operator(context, node)) { + return nullptr; + } + return BasicMappingFn; + } + break; + case kTfLiteBuiltinTile: { + // NN API doesn't support int64 and boolean inputs to this op + const auto input_type = context->tensors[node->inputs->data[0]].type; + const auto multipliers_type = + context->tensors[node->inputs->data[1]].type; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8 || input_type == kTfLiteInt32) && + (multipliers_type == kTfLiteInt32)) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinLogicalOr: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + input_type == kTfLiteBool) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinLogicalAnd: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + input_type == kTfLiteBool) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinLogicalNot: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + input_type == kTfLiteBool) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinLess: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8 || input_type == kTfLiteBool || + input_type == kTfLiteInt32)) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinLessEqual: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8 || input_type == kTfLiteBool || + input_type == kTfLiteInt32)) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinGreater: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8 || input_type == kTfLiteBool || + input_type == kTfLiteInt32)) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinGreaterEqual: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8 || input_type == kTfLiteBool || + input_type == kTfLiteInt32)) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinEqual: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8 || input_type == kTfLiteBool || + input_type == kTfLiteInt32)) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinNotEqual: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8 || input_type == kTfLiteBool || + input_type == kTfLiteInt32)) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinNeg: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat32 || input_type == kTfLiteInt32)) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinTopkV2: { + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12) { + const auto& input = context->tensors[node->outputs->data[0]]; + const auto& k_param = context->tensors[node->outputs->data[1]]; + if ((input.type == kTfLiteFloat32 || input.type == kTfLiteInt32 || + input.type == kTfLiteUInt8 || input.type == kTfLiteInt8) && + (k_param.type == kTfLiteInt32 && + k_param.allocation_type == kTfLiteMmapRo)) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + const TfLiteTensor& k_param = mapping_args.context ->tensors[mapping_args.node->inputs->data[1]]; - mapping_args.builder->AddScalarInt32Operand(*axis_param.data.i32); - return ANEURALNETWORKS_EXPAND_DIMS; + mapping_args.builder->AddScalarInt32Operand(*k_param.data.i32); + return ANEURALNETWORKS_TOPK_V2; }; - } - } break; - case kTfLiteBuiltinSplit: { - // Tensor indices: split_dim: 0, value: 1 - const TfLiteTensor& axis = context->tensors[node->inputs->data[0]]; - const TfLiteTensor& input = context->tensors[node->inputs->data[1]]; - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - (input.type == kTfLiteFloat32 || input.type == kTfLiteUInt8 || - input.type == kTfLiteInt32) && - (axis.type == kTfLiteInt32 && - axis.allocation_type == kTfLiteMmapRo)) { - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - const TfLiteTensor& axis = - mapping_args.context - ->tensors[mapping_args.node->inputs->data[0]]; - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarInt32Operand(*axis.data.i32); - mapping_args.builder->AddScalarInt32Operand(builtin->num_splits); - return ANEURALNETWORKS_SPLIT; - }; - } - } break; - case kTfLiteBuiltinLogSoftmax: { - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - input_type == kTfLiteFloat32) { - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - // Scaling and axis are hardcoded to respectively 1 and -1 - // in TFLite. - mapping_args.builder->AddScalarFloat32Operand(1); - mapping_args.builder->AddScalarInt32Operand(-1); - return ANEURALNETWORKS_LOG_SOFTMAX; - }; - } - } break; - case kTfLiteBuiltinQuantize: { - const auto value_type = context->tensors[node->inputs->data[0]].type; - const auto output_type = context->tensors[node->outputs->data[0]].type; - const auto quantization_params = - context->tensors[node->outputs->data[0]].params; - if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && - value_type == kTfLiteFloat32 && output_type == kTfLiteUInt8 && - quantization_params.scale > 0.f) { - return BasicMappingFn; - } - } break; - case kTfLiteBuiltinReduceAny: { - if (version != 1 || android_sdk_version < kMinSdkVersionForNNAPI12) { + } else { return nullptr; } - // NNAPI does not support generating a scalar as output for REDUCE_ANY. - if (context->tensors[node->outputs->data[0]].dims->size == 0) { + } + } break; + case kTfLiteBuiltinSelect: { + const auto value_type = context->tensors[node->inputs->data[1]].type; + if (version <= 2 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (value_type == kTfLiteFloat32 || value_type == kTfLiteUInt8 || + value_type == kTfLiteInt8 || value_type == kTfLiteInt32)) { + TfLiteIntArray* condition_shape = + context->tensors[node->inputs->data[0]].dims; + TfLiteIntArray* input_shape = + context->tensors[node->inputs->data[1]].dims; + // The Android Q-variant of select does not support broadcasting. + if (!TfLiteIntArrayEqual(condition_shape, input_shape)) { + return nullptr; + } + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinGather: { + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12) { + const auto& input = context->tensors[node->inputs->data[0]]; + const auto& positions = context->tensors[node->inputs->data[1]]; + + auto is_supported_input_type = [](const TfLiteTensor& t) { + return (t.type == kTfLiteFloat32 || t.type == kTfLiteFloat16 || + t.type == kTfLiteInt32 || t.type == kTfLiteUInt8); + }; + + if (!is_supported_input_type(input) || + !is_supported_input_type(positions)) { + return nullptr; + } + + // 0-dimension args are not supported by NNAPI. + if (positions.dims->size == 0) { + return nullptr; + } + + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddTensorInput( + mapping_args.node->inputs->data[0], + /* hybrid_op */ false, + /* scalar_as_tensor */ false); + + mapping_args.builder->AddScalarInt32Operand(builtin->axis); + + mapping_args.builder->AddTensorInput( + mapping_args.node->inputs->data[1], + /* hybrid_op */ false, + /* scalar_as_tensor */ false); + + return ANEURALNETWORKS_GATHER; + }; + } + } break; + case kTfLiteBuiltinBidirectionalSequenceLstm: + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12) { + if (IsHybridOperator(context, builtin_code, node)) { + // Hybrid version of this op is not supported by NN API. return nullptr; } return [](const NNAPIOpMappingArgs& mapping_args) -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); - return ANEURALNETWORKS_REDUCE_ANY; + auto builtin = + reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarInt32Operand(builtin->activation); + mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip); + mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip); + mapping_args.builder->AddScalarBoolOperand(builtin->merge_outputs); + mapping_args.builder->AddScalarBoolOperand(builtin->time_major); + // TF Lite doesn't support layer normalization in bidirectional + // sequence LSTM, so we insert optional tensors for NNAPI + for (int i = 0; i < 8; ++i) { + mapping_args.builder->AddVectorFloat32Operand(nullptr, 0); + } + return ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM; }; - } break; - case kTfLiteBuiltinReduceMin: { - if (version > 2 || android_sdk_version < kMinSdkVersionForNNAPI12) { - return nullptr; - } - // NNAPI does not support generating a scalar as output for REDUCE_MIN. - if (context->tensors[node->outputs->data[0]].dims->size == 0) { - return nullptr; - } + } + break; + case kTfLiteBuiltinExpandDims: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + const auto axis = context->tensors[node->inputs->data[1]]; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input_type == kTfLiteFloat16 || input_type == kTfLiteFloat32 || + input_type == kTfLiteInt32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8) && + // TFLite supports axis also as int64 but NNAPI only int32 + (axis.type == kTfLiteInt32 && + axis.allocation_type == kTfLiteMmapRo)) { return [](const NNAPIOpMappingArgs& mapping_args) -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); - return ANEURALNETWORKS_REDUCE_MIN; + const TfLiteTensor& axis_param = + mapping_args.context->tensors[mapping_args.node->inputs->data[1]]; + mapping_args.builder->AddScalarInt32Operand(*axis_param.data.i32); + return ANEURALNETWORKS_EXPAND_DIMS; }; - } break; - case kTfLiteBuiltinReduceMax: { - if (version > 2 || android_sdk_version < kMinSdkVersionForNNAPI12) { - return nullptr; - } - // NNAPI does not support generating a scalar as output for REDUCE_MAX. - if (context->tensors[node->outputs->data[0]].dims->size == 0) { - return nullptr; - } + } + } break; + case kTfLiteBuiltinSplit: { + // Tensor indices: split_dim: 0, value: 1 + const TfLiteTensor& axis = context->tensors[node->inputs->data[0]]; + const TfLiteTensor& input = context->tensors[node->inputs->data[1]]; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + (input.type == kTfLiteFloat32 || input.type == kTfLiteUInt8 || + input.type == kTfLiteInt32) && + (axis.type == kTfLiteInt32 && + axis.allocation_type == kTfLiteMmapRo)) { return [](const NNAPIOpMappingArgs& mapping_args) -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( + const TfLiteTensor& axis = + mapping_args.context->tensors[mapping_args.node->inputs->data[0]]; + auto builtin = reinterpret_cast( mapping_args.node->builtin_data); - mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); - return ANEURALNETWORKS_REDUCE_MAX; + mapping_args.builder->AddScalarInt32Operand(*axis.data.i32); + mapping_args.builder->AddScalarInt32Operand(builtin->num_splits); + return ANEURALNETWORKS_SPLIT; }; - } break; - case kTfLiteBuiltinReduceProd: { - if (version != 1 || android_sdk_version < kMinSdkVersionForNNAPI12) { - return nullptr; - } - // NNAPI only supports floating point REDUCE_PROD. - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (input_type != kTfLiteFloat32) { - return nullptr; - } - // NNAPI does not support generating a scalar as output for REDUCE_PROD. - if (context->tensors[node->outputs->data[0]].dims->size == 0) { - return nullptr; - } + } + } break; + case kTfLiteBuiltinLogSoftmax: { + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + input_type == kTfLiteFloat32) { return [](const NNAPIOpMappingArgs& mapping_args) -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); - return ANEURALNETWORKS_REDUCE_PROD; + // Scaling and axis are hardcoded to respectively 1 and -1 + // in TFLite. + mapping_args.builder->AddScalarFloat32Operand(1); + mapping_args.builder->AddScalarInt32Operand(-1); + return ANEURALNETWORKS_LOG_SOFTMAX; }; - } break; - case kTfLiteBuiltinSum: { - if (version != 1 || android_sdk_version < kMinSdkVersionForNNAPI12) { - return nullptr; - } - // NNAPI only supports floating point REDUCE_SUM. - const auto input_type = context->tensors[node->inputs->data[0]].type; - if (input_type != kTfLiteFloat32) { - return nullptr; - } - // NNAPI does not support generating a scalar as output for REDUCE_SUM. - if (context->tensors[node->outputs->data[0]].dims->size == 0) { - return nullptr; - } - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - auto builtin = reinterpret_cast( - mapping_args.node->builtin_data); - mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); - return ANEURALNETWORKS_REDUCE_SUM; - }; - } break; - default: - // All other operators are not mapped. + } + } break; + case kTfLiteBuiltinQuantize: { + const auto value_type = context->tensors[node->inputs->data[0]].type; + const auto output_type = context->tensors[node->outputs->data[0]].type; + const auto quantization_params = + context->tensors[node->outputs->data[0]].params; + if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI12 && + value_type == kTfLiteFloat32 && output_type == kTfLiteUInt8 && + quantization_params.scale > 0.f) { + return BasicMappingFn; + } + } break; + case kTfLiteBuiltinReduceAny: { + if (version != 1 || android_sdk_version < kMinSdkVersionForNNAPI12) { return nullptr; - } - return nullptr; + } + // NNAPI does not support generating a scalar as output for REDUCE_ANY. + if (context->tensors[node->outputs->data[0]].dims->size == 0) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); + return ANEURALNETWORKS_REDUCE_ANY; + }; + } break; + case kTfLiteBuiltinReduceMin: { + if (version > 2 || android_sdk_version < kMinSdkVersionForNNAPI12) { + return nullptr; + } + // NNAPI does not support generating a scalar as output for REDUCE_MIN. + if (context->tensors[node->outputs->data[0]].dims->size == 0) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); + return ANEURALNETWORKS_REDUCE_MIN; + }; + } break; + case kTfLiteBuiltinReduceMax: { + if (version > 2 || android_sdk_version < kMinSdkVersionForNNAPI12) { + return nullptr; + } + // NNAPI does not support generating a scalar as output for REDUCE_MAX. + if (context->tensors[node->outputs->data[0]].dims->size == 0) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); + return ANEURALNETWORKS_REDUCE_MAX; + }; + } break; + case kTfLiteBuiltinReduceProd: { + if (version != 1 || android_sdk_version < kMinSdkVersionForNNAPI12) { + return nullptr; + } + // NNAPI only supports floating point REDUCE_PROD. + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (input_type != kTfLiteFloat32) { + return nullptr; + } + // NNAPI does not support generating a scalar as output for REDUCE_PROD. + if (context->tensors[node->outputs->data[0]].dims->size == 0) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); + return ANEURALNETWORKS_REDUCE_PROD; + }; + } break; + case kTfLiteBuiltinSum: { + if (version != 1 || android_sdk_version < kMinSdkVersionForNNAPI12) { + return nullptr; + } + // NNAPI only supports floating point REDUCE_SUM. + const auto input_type = context->tensors[node->inputs->data[0]].type; + if (input_type != kTfLiteFloat32) { + return nullptr; + } + // NNAPI does not support generating a scalar as output for REDUCE_SUM. + if (context->tensors[node->outputs->data[0]].dims->size == 0) { + return nullptr; + } + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims); + return ANEURALNETWORKS_REDUCE_SUM; + }; + } break; + default: + // All other operators are not mapped. + return nullptr; + } + return nullptr; +} + +// Initialize the kernel (a NN model). +TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context, + const TfLiteDelegateParams* params) { + for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) { + nodes_.push_back(node_index); } - // Initialize the kernel (a NN model). - TfLiteStatus Init(TfLiteContext* context, - const TfLiteDelegateParams* params) { - for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) { - nodes_.push_back(node_index); + const auto delegate_options = + StatefulNnApiDelegate::GetOptions(params->delegate); + const char* device_name_ptr = delegate_options.accelerator_name; + // user specified an acclelerator to use. + if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && + device_name_ptr != nullptr) { + nnapi_device_ = GetDeviceHandle(context, device_name_ptr); + if (nnapi_device_ == nullptr) { + return kTfLiteError; } + } - const auto delegate_options = - StatefulNnApiDelegate::GetOptions(params->delegate); - const char* device_name_ptr = delegate_options.accelerator_name; - // user specified an acclelerator to use. - if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && - device_name_ptr != nullptr) { - nnapi_device_ = GetDeviceHandle(context, device_name_ptr); - if (nnapi_device_ == nullptr) { - return kTfLiteError; - } - } + // Mark the handle backed tensors. + tensor_memory_map_ = + &StatefulNnApiDelegate::GetTensorMemoryMap(params->delegate); - // Mark the handle backed tensors. - tensor_memory_map_ = - &StatefulNnApiDelegate::GetTensorMemoryMap(params->delegate); + if (!nn_model_) { + ANeuralNetworksModel* model = nullptr; + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, nnapi_->ANeuralNetworksModel_create(&model)); + nn_model_.reset(model); - if (!nn_model_) { - ANeuralNetworksModel* model = nullptr; + TF_LITE_ENSURE_STATUS( + BuildGraph(context, params->input_tensors, params->output_tensors)); + } + + if (!nn_compilation_) { + ANeuralNetworksCompilation* compilation = nullptr; + if (nnapi_device_ != nullptr) { + // Compile for the selected accelerator. RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, nnapi_->ANeuralNetworksModel_create(&model)); - nn_model_.reset(model); - - TF_LITE_ENSURE_STATUS( - BuildGraph(context, params->input_tensors, params->output_tensors)); + context, nnapi_->ANeuralNetworksCompilation_createForDevices( + nn_model_.get(), &nnapi_device_, 1, &compilation)); + } else { + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, nnapi_->ANeuralNetworksCompilation_create(nn_model_.get(), + &compilation)); } - if (!nn_compilation_) { - ANeuralNetworksCompilation* compilation = nullptr; - if (nnapi_device_ != nullptr) { - // Compile for the selected accelerator. - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, nnapi_->ANeuralNetworksCompilation_createForDevices( - nn_model_.get(), &nnapi_device_, 1, &compilation)); - } else { - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, nnapi_->ANeuralNetworksCompilation_create(nn_model_.get(), - &compilation)); - } - - auto preference = delegate_options.execution_preference; - if (preference != - StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) { - const int preference_result = - nnapi_->ANeuralNetworksCompilation_setPreference(compilation, - preference); - if (preference_result != ANEURALNETWORKS_NO_ERROR) { - nnapi_->ANeuralNetworksCompilation_free(compilation); - compilation = nullptr; - } - RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result); - } - - const char* cache_dir = delegate_options.cache_dir; - const char* model_token = delegate_options.model_token; - if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && - cache_dir && model_token) { - // Compilation caching could be enabled, try construct the uint8 - // token. - // TODO(133342794): use a generic token generator class. - uint64_t token_parts[4]; - // bits from model_token. - token_parts[0] = std::hash{}(model_token); - // bits from params->nodes_to_replace. - token_parts[1] = GetHash(params->nodes_to_replace); - // bits from params->input_tensors. - token_parts[2] = GetHash(params->input_tensors); - // bits from params->output_tensors. - token_parts[3] = GetHash(params->output_tensors); - // NNAPI requires the token to be 256bit long. - std::vector nnapi_cache_token(32, 0); - // Copy the token bits. - uint8_t* p = reinterpret_cast(token_parts); - for (int i = 0; i < 4 * sizeof(uint64_t); i++) { - nnapi_cache_token[i] = p[i]; - } - const int set_caching_result = - nnapi_->ANeuralNetworksCompilation_setCaching( - compilation, cache_dir, nnapi_cache_token.data()); - if (set_caching_result != ANEURALNETWORKS_NO_ERROR) { - nnapi_->ANeuralNetworksCompilation_free(compilation); - compilation = nullptr; - } - RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result); - } - const int finish_result = - nnapi_->ANeuralNetworksCompilation_finish(compilation); - if (finish_result != ANEURALNETWORKS_NO_ERROR) { + auto preference = delegate_options.execution_preference; + if (preference != + StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) { + const int preference_result = + nnapi_->ANeuralNetworksCompilation_setPreference(compilation, + preference); + if (preference_result != ANEURALNETWORKS_NO_ERROR) { nnapi_->ANeuralNetworksCompilation_free(compilation); compilation = nullptr; } - RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result); - nn_compilation_.reset(compilation); + RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result); } - return kTfLiteOk; + + const char* cache_dir = delegate_options.cache_dir; + const char* model_token = delegate_options.model_token; + if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir && + model_token) { + // Compilation caching could be enabled, try construct the uint8 + // token. + // TODO(133342794): use a generic token generator class. + uint64_t token_parts[4]; + // bits from model_token. + token_parts[0] = std::hash{}(model_token); + // bits from params->nodes_to_replace. + token_parts[1] = GetHash(params->nodes_to_replace); + // bits from params->input_tensors. + token_parts[2] = GetHash(params->input_tensors); + // bits from params->output_tensors. + token_parts[3] = GetHash(params->output_tensors); + // NNAPI requires the token to be 256bit long. + std::vector nnapi_cache_token(32, 0); + // Copy the token bits. + uint8_t* p = reinterpret_cast(token_parts); + for (int i = 0; i < 4 * sizeof(uint64_t); i++) { + nnapi_cache_token[i] = p[i]; + } + const int set_caching_result = + nnapi_->ANeuralNetworksCompilation_setCaching( + compilation, cache_dir, nnapi_cache_token.data()); + if (set_caching_result != ANEURALNETWORKS_NO_ERROR) { + nnapi_->ANeuralNetworksCompilation_free(compilation); + compilation = nullptr; + } + RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result); + } + const int finish_result = + nnapi_->ANeuralNetworksCompilation_finish(compilation); + if (finish_result != ANEURALNETWORKS_NO_ERROR) { + nnapi_->ANeuralNetworksCompilation_free(compilation); + compilation = nullptr; + } + RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result); + nn_compilation_.reset(compilation); } + return kTfLiteOk; +} - TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - if (!nn_compilation_) { - // Compilation failed earlier, return error. - return kTfLiteError; - } - return kTfLiteOk; +TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context, + TfLiteNode* node) { + if (!nn_compilation_) { + // Compilation failed earlier, return error. + return kTfLiteError; } + return kTfLiteOk; +} - TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) { - ANeuralNetworksExecution* execution = nullptr; - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, nnapi_->ANeuralNetworksExecution_create(nn_compilation_.get(), - &execution)); - std::unique_ptr - execution_unique_ptr(execution); +TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, + TfLiteNode* node) { + ANeuralNetworksExecution* execution = nullptr; + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, nnapi_->ANeuralNetworksExecution_create(nn_compilation_.get(), + &execution)); + std::unique_ptr + execution_unique_ptr(execution); - // Set the input tensor buffers. Note: we access tflite tensors using - // absolute indices but NN api indices inputs by relative indices. - int relative_input_index = 0; + // Set the input tensor buffers. Note: we access tflite tensors using + // absolute indices but NN api indices inputs by relative indices. + int relative_input_index = 0; - size_t input_offset = 0; - for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) { - if (absolute_input_index == kOptionalTensor) { - continue; - } - TfLiteTensor* tensor = &context->tensors[absolute_input_index]; - if (tensor->allocation_type != kTfLiteMmapRo) { - if (tensor->buffer_handle != kTfLiteNullBufferHandle && - tensor->buffer_handle < tensor_memory_map_->size()) { - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, nnapi_->ANeuralNetworksExecution_setInputFromMemory( - execution, relative_input_index, nullptr, - tensor_memory_map_->at(tensor->buffer_handle).memory, - 0, tensor->bytes)); - relative_input_index++; - continue; - } - TfLiteType ann_type_equivalent = - operand_mapping_.lite_index_to_ann_type_conversion( - absolute_input_index); - int tensor_size = 0; - if (ann_type_equivalent != kTfLiteNoType) { - const auto num_elements = NumElements(tensor); - uint8_t* input_ptr = nn_input_memory_->get_data_ptr() + input_offset; - if (tensor->type == kTfLiteUInt8 && - ann_type_equivalent == kTfLiteInt32) { - for (int i = 0; i < num_elements; ++i) { - reinterpret_cast(input_ptr)[i] = - static_cast(tensor->data.raw_const[i]); - } - } else if (tensor->type == kTfLiteInt8 && - ann_type_equivalent == kTfLiteUInt8) { - // Explicitly convert int8 values to uint8 values. - for (int i = 0; i < num_elements; ++i) { - input_ptr[i] = static_cast( - static_cast(tensor->data.int8[i]) + 128); - } - } else if (tensor->type == kTfLiteInt8 && - ann_type_equivalent == kTfLiteInt32) { - for (int i = 0; i < num_elements; ++i) { - reinterpret_cast(input_ptr)[i] = - static_cast(tensor->data.raw_const[i]) + 128; - } - } else { - context->ReportError( - context, - "NN API Delegate: unsupported tensor types conversion: " - "from type code %d to type code %d.\n", - tensor->type, ann_type_equivalent); - return kTfLiteError; - } - size_t type_size; - TF_LITE_ENSURE_OK( - context, GetSizeOfType(context, ann_type_equivalent, &type_size)); - tensor_size = NumElements(tensor) * type_size; - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, - nnapi_->ANeuralNetworksExecution_setInputFromMemory( - execution, relative_input_index, nullptr, - nn_input_memory_->get_handle(), input_offset, tensor_size)); - } else { - // copy data to pre-allocated shared memory. - memcpy(nn_input_memory_->get_data_ptr() + input_offset, - tensor->data.raw, tensor->bytes); - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, - nnapi_->ANeuralNetworksExecution_setInputFromMemory( - execution, relative_input_index, nullptr, - nn_input_memory_->get_handle(), input_offset, tensor->bytes)); - tensor_size = tensor->bytes; - } - input_offset += tensor_size; - input_offset += getNumPaddingBytes(tensor_size); - relative_input_index++; - } + size_t input_offset = 0; + for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) { + if (absolute_input_index == kOptionalTensor) { + continue; } - - // Set the output tensor buffers. - int relative_output_index = 0; - size_t output_offset = 0; - for (auto output_index : TfLiteIntArrayView(node->outputs)) { - // If the NNAPI implementation doesn't have some of the outputs - // they are left unmapped and we should not try to read their value here - if (operand_mapping_.lite_index_to_ann(output_index) == -1) { - continue; - } - TfLiteTensor* tensor = &context->tensors[output_index]; + TfLiteTensor* tensor = &context->tensors[absolute_input_index]; + if (tensor->allocation_type != kTfLiteMmapRo) { if (tensor->buffer_handle != kTfLiteNullBufferHandle && tensor->buffer_handle < tensor_memory_map_->size()) { RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, nnapi_->ANeuralNetworksExecution_setOutputFromMemory( - execution, relative_output_index, nullptr, + context, nnapi_->ANeuralNetworksExecution_setInputFromMemory( + execution, relative_input_index, nullptr, tensor_memory_map_->at(tensor->buffer_handle).memory, 0, tensor->bytes)); - - } else { - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, - nnapi_->ANeuralNetworksExecution_setOutputFromMemory( - execution, relative_output_index, nullptr, - nn_output_memory_->get_handle(), output_offset, tensor->bytes)); - output_offset += tensor->bytes; - output_offset += getNumPaddingBytes(tensor->bytes); - } - relative_output_index++; - } - - // The state_out of previous invocation need to be mapped to state_in of - // current invocation. - for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) { - int state_tensor_idx = model_state_tfl_inputs_[i]; - TfLiteTensor* tensor = &context->tensors[state_tensor_idx]; - // Here we are using a deep copy for state_in tensors so that we are not - // reading and writing into the same buffer during a invocation. - // TODO(110369471): using double shared buffer to minimize the copies. - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, nnapi_->ANeuralNetworksExecution_setOutput( - execution, relative_output_index, nullptr, - tensor->data.raw, tensor->bytes)); - relative_output_index++; - } - // Invoke ANN in blocking fashion. - if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) { - ANeuralNetworksEvent* event = nullptr; - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, - nnapi_->ANeuralNetworksExecution_startCompute(execution, &event)); - const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event); - nnapi_->ANeuralNetworksEvent_free(event); - RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result); - } else { - // Use synchronous execution for NNAPI 1.2+. - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, nnapi_->ANeuralNetworksExecution_compute(execution)); - } - - // copy results from shared memory to the destination. - output_offset = 0; - for (auto output_index : TfLiteIntArrayView(node->outputs)) { - TfLiteTensor* tensor = &context->tensors[output_index]; - if (tensor->buffer_handle != kTfLiteNullBufferHandle) { + relative_input_index++; continue; } TfLiteType ann_type_equivalent = - operand_mapping_.lite_index_to_ann_type_conversion(output_index); - if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) { - // Explicitly convert uint8 values to int8 values. - uint8_t* output_ptr = reinterpret_cast( - nn_output_memory_->get_data_ptr() + output_offset); - for (int i = 0; i < NumElements(tensor); ++i) { - output_ptr[i] = - static_cast(static_cast(output_ptr[i]) - 128); + operand_mapping_.lite_index_to_ann_type_conversion( + absolute_input_index); + int tensor_size = 0; + if (ann_type_equivalent != kTfLiteNoType) { + const auto num_elements = NumElements(tensor); + uint8_t* input_ptr = nn_input_memory_->get_data_ptr() + input_offset; + if (tensor->type == kTfLiteUInt8 && + ann_type_equivalent == kTfLiteInt32) { + for (int i = 0; i < num_elements; ++i) { + reinterpret_cast(input_ptr)[i] = + static_cast(tensor->data.raw_const[i]); + } + } else if (tensor->type == kTfLiteInt8 && + ann_type_equivalent == kTfLiteUInt8) { + // Explicitly convert int8 values to uint8 values. + for (int i = 0; i < num_elements; ++i) { + input_ptr[i] = static_cast( + static_cast(tensor->data.int8[i]) + 128); + } + } else if (tensor->type == kTfLiteInt8 && + ann_type_equivalent == kTfLiteInt32) { + for (int i = 0; i < num_elements; ++i) { + reinterpret_cast(input_ptr)[i] = + static_cast(tensor->data.raw_const[i]) + 128; + } + } else { + context->ReportError( + context, + "NN API Delegate: unsupported tensor types conversion: " + "from type code %d to type code %d.\n", + tensor->type, ann_type_equivalent); + return kTfLiteError; } + size_t type_size; + TF_LITE_ENSURE_OK( + context, GetSizeOfType(context, ann_type_equivalent, &type_size)); + tensor_size = NumElements(tensor) * type_size; + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, + nnapi_->ANeuralNetworksExecution_setInputFromMemory( + execution, relative_input_index, nullptr, + nn_input_memory_->get_handle(), input_offset, tensor_size)); + } else { + // copy data to pre-allocated shared memory. + memcpy(nn_input_memory_->get_data_ptr() + input_offset, + tensor->data.raw, tensor->bytes); + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, + nnapi_->ANeuralNetworksExecution_setInputFromMemory( + execution, relative_input_index, nullptr, + nn_input_memory_->get_handle(), input_offset, tensor->bytes)); + tensor_size = tensor->bytes; } - memcpy(tensor->data.raw, - nn_output_memory_->get_data_ptr() + output_offset, tensor->bytes); + input_offset += tensor_size; + input_offset += getNumPaddingBytes(tensor_size); + relative_input_index++; + } + } + + // Set the output tensor buffers. + int relative_output_index = 0; + size_t output_offset = 0; + for (auto output_index : TfLiteIntArrayView(node->outputs)) { + // If the NNAPI implementation doesn't have some of the outputs + // they are left unmapped and we should not try to read their value here + if (operand_mapping_.lite_index_to_ann(output_index) == -1) { + continue; + } + TfLiteTensor* tensor = &context->tensors[output_index]; + if (tensor->buffer_handle != kTfLiteNullBufferHandle && + tensor->buffer_handle < tensor_memory_map_->size()) { + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, nnapi_->ANeuralNetworksExecution_setOutputFromMemory( + execution, relative_output_index, nullptr, + tensor_memory_map_->at(tensor->buffer_handle).memory, 0, + tensor->bytes)); + + } else { + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, + nnapi_->ANeuralNetworksExecution_setOutputFromMemory( + execution, relative_output_index, nullptr, + nn_output_memory_->get_handle(), output_offset, tensor->bytes)); output_offset += tensor->bytes; output_offset += getNumPaddingBytes(tensor->bytes); } - - // copy output of all output tensors in feedback_loops_ into the - // associated input - for (auto feedback_loop : feedback_loops_) { - int output_tensor_idx; - int input_tensor_idx; - std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop; - TfLiteTensor* src = - &context->tensors[node->outputs->data[output_tensor_idx]]; - TfLiteTensor* dest = - &context->tensors[node->inputs->data[input_tensor_idx]]; - - memcpy(dest->data.raw, src->data.raw, src->bytes); - } - - return kTfLiteOk; + relative_output_index++; } - private: - // Access to NNApi. - const NnApi* nnapi_; - // ANN device handle. - ANeuralNetworksDevice* nnapi_device_ = nullptr; - // ANN API state. - std::unique_ptr nn_model_; - std::unique_ptr - nn_compilation_; - // Node indices that this delegate is responsible for. Indices here - // indexes into the nodes array in the TfLiteContext. - std::vector nodes_; - // Track indices we use - OperandMapping operand_mapping_; - std::map - allocation_memory_mapping_; - // Track memory map - const std::vector* - tensor_memory_map_; - std::vector model_state_outputs_; - std::vector model_state_tfl_inputs_; - // This is the equivalent of the pair model_state_outputs_, - // model_state_tfl_inputs_ for all tensors where we have to keep the output - // data available for TFLite model users - std::vector> feedback_loops_; - - std::unique_ptr nn_input_memory_; - std::unique_ptr nn_output_memory_; - - void AddDequantizeOperatorsWhereNeeded(const TfLiteContext* context, - int builtin_code, - const TfLiteNode* node, - NNAPIOpBuilder* builder) { - // Depending on the operator and the input data format, Dequantize - // operators may need to be added. For example when the input is - // floating-point but weights are quantized then the weights will first be - // dequantized to the same format as the input before being passed to the - // operator. - - // The tensor determining whether the inputs should be floating-point. - int input_tensor_index = -1; - std::vector inputs_to_potentially_dequantize; - - switch (builtin_code) { - case kTfLiteBuiltinConv2d: - case kTfLiteBuiltinFullyConnected: { - input_tensor_index = 0; - // Weights and bias are inputs #1 and #2 respectively and may require - // dequantization. - inputs_to_potentially_dequantize = {1, 2}; - break; - } - case kTfLiteBuiltinLstm: { - input_tensor_index = 0; - inputs_to_potentially_dequantize = {1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 20, 21, 22, 23}; - break; - } - default: - return; - } - - int tensor_id = node->inputs->data[input_tensor_index]; - if (tensor_id < 0) return; - - // Nothing to do if the input is not floating-point. - if (!IsFloat(context->tensors[tensor_id].type)) return; - - for (int i : inputs_to_potentially_dequantize) { - if (i < 0 || i >= node->inputs->size) continue; // Ignore invalid index. - tensor_id = node->inputs->data[i]; - if (tensor_id < 0) continue; // Ignore optional input. - - const TfLiteType type = context->tensors[tensor_id].type; - // Nothing to do for this tensor if it's not quantized. - if (!IsQuantized(type)) continue; - - // Insert Dequantize operator if it hasn't been done already and change - // the node's input accordingly. - builder->AddDequantize(i, node->inputs->data[i], type); - } + // The state_out of previous invocation need to be mapped to state_in of + // current invocation. + for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) { + int state_tensor_idx = model_state_tfl_inputs_[i]; + TfLiteTensor* tensor = &context->tensors[state_tensor_idx]; + // Here we are using a deep copy for state_in tensors so that we are not + // reading and writing into the same buffer during a invocation. + // TODO(110369471): using double shared buffer to minimize the copies. + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, nnapi_->ANeuralNetworksExecution_setOutput( + execution, relative_output_index, nullptr, + tensor->data.raw, tensor->bytes)); + relative_output_index++; + } + // Invoke ANN in blocking fashion. + if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) { + ANeuralNetworksEvent* event = nullptr; + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, + nnapi_->ANeuralNetworksExecution_startCompute(execution, &event)); + const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event); + nnapi_->ANeuralNetworksEvent_free(event); + RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result); + } else { + // Use synchronous execution for NNAPI 1.2+. + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, nnapi_->ANeuralNetworksExecution_compute(execution)); } - TfLiteStatus AddOpsAndTensors(TfLiteContext* context) { - DequantizeMapping dequantize_mapping; - // The operand builder allows creating a single op. It is created outside - // the for loop to avoid reallocating the vectors. - NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_, - &dequantize_mapping, &allocation_memory_mapping_, - nn_model_.get()); - // Add Tensors. - for (auto node_index : nodes_) { - // Obtain the op and registration. - TfLiteNode* node; - TfLiteRegistration* reg; - TF_LITE_ENSURE_STATUS( - context->GetNodeAndRegistration(context, node_index, &node, ®)); - - const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node); - const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code); - const bool need_int8_conversion = - NeedInt8Conversion(context, reg->builtin_code, node); - int input_tensor_flags = 0; - if (scalar_as_tensor) { - input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR; + // copy results from shared memory to the destination. + output_offset = 0; + for (auto output_index : TfLiteIntArrayView(node->outputs)) { + TfLiteTensor* tensor = &context->tensors[output_index]; + if (tensor->buffer_handle != kTfLiteNullBufferHandle) { + continue; + } + TfLiteType ann_type_equivalent = + operand_mapping_.lite_index_to_ann_type_conversion(output_index); + if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) { + // Explicitly convert uint8 values to int8 values. + uint8_t* output_ptr = reinterpret_cast( + nn_output_memory_->get_data_ptr() + output_offset); + const auto num_elements = NumElements(tensor); + for (int i = 0; i < num_elements; ++i) { + output_ptr[i] = + static_cast(static_cast(output_ptr[i]) - 128); } + } + memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset, + tensor->bytes); + output_offset += tensor->bytes; + output_offset += getNumPaddingBytes(tensor->bytes); + } - // Map inputs to NN API tensor indices. - for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) { - const auto input_index = node->inputs->data[input_pos]; - if (need_int8_conversion && - (input_pos == 0 || - reg->builtin_code == kTfLiteBuiltinFullyConnected || - reg->builtin_code == kTfLiteBuiltinAdd || - reg->builtin_code == kTfLiteBuiltinMul || - reg->builtin_code == kTfLiteBuiltinSub || - reg->builtin_code == kTfLiteBuiltinConcatenation || - reg->builtin_code == kTfLiteBuiltinMaximum || - reg->builtin_code == kTfLiteBuiltinMinimum || - reg->builtin_code == kTfLiteBuiltinLess || - reg->builtin_code == kTfLiteBuiltinLessEqual || - reg->builtin_code == kTfLiteBuiltinGreater || - reg->builtin_code == kTfLiteBuiltinGreaterEqual || - reg->builtin_code == kTfLiteBuiltinEqual || - reg->builtin_code == kTfLiteBuiltinNotEqual || - reg->builtin_code == kTfLiteBuiltinSelect)) { - // Only selected inputs require int8 conversion. - TF_LITE_ENSURE_STATUS(builder.AddTensorInput( - input_index, hybrid_op, - input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION)); - continue; - } - if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) && - input_pos >= 20) { + // copy output of all output tensors in feedback_loops_ into the + // associated input + for (auto feedback_loop : feedback_loops_) { + int output_tensor_idx; + int input_tensor_idx; + std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop; + TfLiteTensor* src = + &context->tensors[node->outputs->data[output_tensor_idx]]; + TfLiteTensor* dest = + &context->tensors[node->inputs->data[input_tensor_idx]]; + + memcpy(dest->data.raw, src->data.raw, src->bytes); + } + + return kTfLiteOk; +} + +void NNAPIDelegateKernel::AddDequantizeOperatorsWhereNeeded( + const TfLiteContext* context, int builtin_code, const TfLiteNode* node, + NNAPIOpBuilder* builder) { + // Depending on the operator and the input data format, Dequantize + // operators may need to be added. For example when the input is + // floating-point but weights are quantized then the weights will first be + // dequantized to the same format as the input before being passed to the + // operator. + + // The tensor determining whether the inputs should be floating-point. + int input_tensor_index = -1; + std::vector inputs_to_potentially_dequantize; + + switch (builtin_code) { + case kTfLiteBuiltinConv2d: + case kTfLiteBuiltinFullyConnected: { + input_tensor_index = 0; + // Weights and bias are inputs #1 and #2 respectively and may require + // dequantization. + inputs_to_potentially_dequantize = {1, 2}; + break; + } + case kTfLiteBuiltinLstm: { + input_tensor_index = 0; + inputs_to_potentially_dequantize = {1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 20, 21, 22, 23}; + break; + } + default: + return; + } + + int tensor_id = node->inputs->data[input_tensor_index]; + if (tensor_id < 0) return; + + // Nothing to do if the input is not floating-point. + if (!IsFloat(context->tensors[tensor_id].type)) return; + + for (int i : inputs_to_potentially_dequantize) { + if (i < 0 || i >= node->inputs->size) continue; // Ignore invalid index. + tensor_id = node->inputs->data[i]; + if (tensor_id < 0) continue; // Ignore optional input. + + const TfLiteType type = context->tensors[tensor_id].type; + // Nothing to do for this tensor if it's not quantized. + if (!IsQuantized(type)) continue; + + // Insert Dequantize operator if it hasn't been done already and change + // the node's input accordingly. + builder->AddDequantize(i, node->inputs->data[i], type); + } +} + +TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context) { + DequantizeMapping dequantize_mapping; + // The operand builder allows creating a single op. It is created outside + // the for loop to avoid reallocating the vectors. + NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_, + &dequantize_mapping, &allocation_memory_mapping_, + nn_model_.get()); + // Add Tensors. + for (auto node_index : nodes_) { + // Obtain the op and registration. + TfLiteNode* node; + TfLiteRegistration* reg; + TF_LITE_ENSURE_STATUS( + context->GetNodeAndRegistration(context, node_index, &node, ®)); + + const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node); + const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code); + const bool need_int8_conversion = + NeedInt8Conversion(context, reg->builtin_code, node); + int input_tensor_flags = 0; + if (scalar_as_tensor) { + input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR; + } + + // Map inputs to NN API tensor indices. + for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) { + const auto input_index = node->inputs->data[input_pos]; + if (need_int8_conversion && + (input_pos == 0 || + reg->builtin_code == kTfLiteBuiltinFullyConnected || + reg->builtin_code == kTfLiteBuiltinAdd || + reg->builtin_code == kTfLiteBuiltinMul || + reg->builtin_code == kTfLiteBuiltinSub || + reg->builtin_code == kTfLiteBuiltinConcatenation || + reg->builtin_code == kTfLiteBuiltinMaximum || + reg->builtin_code == kTfLiteBuiltinMinimum || + reg->builtin_code == kTfLiteBuiltinLess || + reg->builtin_code == kTfLiteBuiltinLessEqual || + reg->builtin_code == kTfLiteBuiltinGreater || + reg->builtin_code == kTfLiteBuiltinGreaterEqual || + reg->builtin_code == kTfLiteBuiltinEqual || + reg->builtin_code == kTfLiteBuiltinNotEqual || + reg->builtin_code == kTfLiteBuiltinSelect)) { + // Only selected inputs require int8 conversion. + TF_LITE_ENSURE_STATUS(builder.AddTensorInput( + input_index, hybrid_op, + input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION)); + continue; + } + if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) && + input_pos >= 20) { + // Skip layer normalization weights. They are added in the Map + // function (after all the other inputs added there) since layer + // normalization weights are the last four inputs of the LSTM op in + // NNAPI. + continue; + } + if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) { + // Configuring all inputs in the Map function + continue; + } + if (reg->builtin_code == kTfLiteBuiltinUnidirectionalSequenceLstm) { + if (input_pos >= 20) { // Skip layer normalization weights. They are added in the Map // function (after all the other inputs added there) since layer - // normalization weights are the last four inputs of the LSTM op in - // NNAPI. + // normalization weights are the last four inputs of the + // unidirectional sequence LSTM op in NNAPI. continue; } - if (reg->builtin_code == kTfLiteBuiltinLstm && - isLstmBasicKernel(node)) { - // Configuring all inputs in the Map function - continue; - } - if (reg->builtin_code == kTfLiteBuiltinUnidirectionalSequenceLstm) { - if (input_pos >= 20) { - // Skip layer normalization weights. They are added in the Map - // function (after all the other inputs added there) since layer - // normalization weights are the last four inputs of the - // unidirectional sequence LSTM op in NNAPI. - continue; - } - if (input_index == kOptionalTensor) { - TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0)); - continue; - } - } - if ((reg->builtin_code == kTfLiteBuiltinSplit) && - (input_index == node->inputs->data[0])) { - // Skip the axis input tensor; it will be added as a scalar operand - // by the Map() mapping. - continue; - } - if (reg->builtin_code == kTfLiteBuiltinTransposeConv) { - // Everything is added during Map since input tensors - // have different order. + if (input_index == kOptionalTensor) { + TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0)); continue; } + } + if ((reg->builtin_code == kTfLiteBuiltinSplit) && + (input_index == node->inputs->data[0])) { + // Skip the axis input tensor; it will be added as a scalar operand + // by the Map() mapping. + continue; + } + if (reg->builtin_code == kTfLiteBuiltinTransposeConv) { + // Everything is added during Map since input tensors + // have different order. + continue; + } - // Pad and Padv2 have an optional parameter for a pad value which has - // to be converted to a scalar type in NN API. - if ((reg->builtin_code == kTfLiteBuiltinPadv2 || - reg->builtin_code == kTfLiteBuiltinPad) && - node->inputs->size == 3 && input_pos == 2) { - const int constant_value_id = node->inputs->data[2]; - if (constant_value_id == kOptionalTensor) { - continue; - } - const TfLiteTensor constant_value = - context->tensors[constant_value_id]; + // Pad and Padv2 have an optional parameter for a pad value which has + // to be converted to a scalar type in NN API. + if ((reg->builtin_code == kTfLiteBuiltinPadv2 || + reg->builtin_code == kTfLiteBuiltinPad) && + node->inputs->size == 3 && input_pos == 2) { + const int constant_value_id = node->inputs->data[2]; + if (constant_value_id == kOptionalTensor) { + continue; + } + const TfLiteTensor constant_value = context->tensors[constant_value_id]; - switch (constant_value.type) { - case kTfLiteFloat32: - if (constant_value.allocation_type == kTfLiteMmapRo) { - builder.AddScalarFloat32Operand(*constant_value.data.f); + switch (constant_value.type) { + case kTfLiteFloat32: + if (constant_value.allocation_type == kTfLiteMmapRo) { + builder.AddScalarFloat32Operand(*constant_value.data.f); + } else { + builder.AddSingleValueTensorAsScalarOperand( + constant_value_id, ANEURALNETWORKS_FLOAT32); + } + break; + case kTfLiteUInt8: + if (constant_value.allocation_type == kTfLiteMmapRo) { + builder.AddScalarInt32Operand( + static_cast(*constant_value.data.uint8)); + } else { + builder.AddSingleValueTensorAsScalarOperand( + constant_value_id, ANEURALNETWORKS_INT32); + } + break; + case kTfLiteInt8: + if (constant_value.allocation_type == kTfLiteMmapRo) { + builder.AddScalarInt32Operand( + static_cast(*constant_value.data.int8) + 128); + } else { + builder.AddSingleValueTensorAsScalarOperand( + constant_value_id, ANEURALNETWORKS_INT32); + } + break; + default: + context->ReportError(context, + "Unsupported type of pad value for pad_v2\n"); + return kTfLiteError; + } + continue; + } + + if (input_index == kOptionalTensor && + (reg->builtin_code == kTfLiteBuiltinLstm || + reg->builtin_code == kTfLiteBuiltinSvdf || + reg->builtin_code == kTfLiteBuiltinBidirectionalSequenceLstm)) { + // properly handle the optional tensor for LSTM and SVDF. + // currently only support float32. + TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0)); + } else if (reg->builtin_code == kTfLiteBuiltinResizeBilinear || + reg->builtin_code == kTfLiteBuiltinResizeNearestNeighbor) { + if (input_pos == 0) { + // Only the first input tensor is added. The second one, + // specifying the output height and width, is not added and + // instead the height and width will be added individually as + // scalars by the mapping function returned by Map(). + TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op)); + } + } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) { + // The K parameter tensor is not handled here but by the functor + // returned by Map, the input tensor is instead added in + // the else clause below + continue; + } else if (reg->builtin_code == kTfLiteBuiltinGather) { + // Everything is added during Map since input tensors + // have different order. + continue; + } else if (reg->builtin_code == kTfLiteBuiltinExpandDims && + input_pos == 1) { + // The axis param is added during Map + continue; + } else if (reg->builtin_code == kTfLiteBuiltinBatchToSpaceNd && + input_pos == 2) { + // NNAPI does not support crops. + // The Map fucntion will check if all crops are zero. + continue; + } else if (reg->builtin_code == kTfLiteBuiltinArgMin || + reg->builtin_code == kTfLiteBuiltinArgMax) { + // The first input tensor is added as is. The second one, specifying + // the axis, needs to be converted to a scalar since TFLite uses a + // tensor but NNAPI uses a scalar as the axis. + if (input_pos == 0) { + TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op)); + } else { + const int axis_id = node->inputs->data[1]; + const TfLiteTensor& axis_tensor = context->tensors[axis_id]; + switch (axis_tensor.type) { + case kTfLiteInt32: + if (axis_tensor.allocation_type == kTfLiteMmapRo) { + TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand( + static_cast(*axis_tensor.data.i32))); } else { - builder.AddSingleValueTensorAsScalarOperand( - constant_value_id, ANEURALNETWORKS_FLOAT32); + TF_LITE_ENSURE_STATUS( + builder.AddSingleValueTensorAsScalarOperand( + axis_id, ANEURALNETWORKS_INT32)); } break; - case kTfLiteUInt8: - if (constant_value.allocation_type == kTfLiteMmapRo) { - builder.AddScalarInt32Operand( - static_cast(*constant_value.data.uint8)); - } else { - builder.AddSingleValueTensorAsScalarOperand( - constant_value_id, ANEURALNETWORKS_INT32); - } - break; - case kTfLiteInt8: - if (constant_value.allocation_type == kTfLiteMmapRo) { - builder.AddScalarInt32Operand( - static_cast(*constant_value.data.int8) + 128); - } else { - builder.AddSingleValueTensorAsScalarOperand( - constant_value_id, ANEURALNETWORKS_INT32); - } + case kTfLiteInt64: + // Map() function already makes sure int64 input is constant. + TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand( + static_cast(*axis_tensor.data.i64))); break; default: - context->ReportError( - context, "Unsupported type of pad value for pad_v2\n"); return kTfLiteError; } - continue; } - - if (input_index == kOptionalTensor && - (reg->builtin_code == kTfLiteBuiltinLstm || - reg->builtin_code == kTfLiteBuiltinSvdf || - reg->builtin_code == kTfLiteBuiltinBidirectionalSequenceLstm)) { - // properly handle the optional tensor for LSTM and SVDF. - // currently only support float32. - TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0)); - } else if (reg->builtin_code == kTfLiteBuiltinResizeBilinear || - reg->builtin_code == kTfLiteBuiltinResizeNearestNeighbor) { - if (input_pos == 0) { - // Only the first input tensor is added. The second one, - // specifying the output height and width, is not added and - // instead the height and width will be added individually as - // scalars by the mapping function returned by Map(). - TF_LITE_ENSURE_STATUS( - builder.AddTensorInput(input_index, hybrid_op)); - } - } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) { - // The K parameter tensor is not handled here but by the functor - // returned by Map, the input tensor is instead added in - // the else clause below - continue; - } else if (reg->builtin_code == kTfLiteBuiltinGather) { - // Everything is added during Map since input tensors - // have different order. - continue; - } else if (reg->builtin_code == kTfLiteBuiltinExpandDims && - input_pos == 1) { - // The axis param is added during Map - continue; - } else if (reg->builtin_code == kTfLiteBuiltinBatchToSpaceNd && - input_pos == 2) { - // NNAPI does not support crops. - // The Map fucntion will check if all crops are zero. - continue; - } else if (reg->builtin_code == kTfLiteBuiltinArgMin || - reg->builtin_code == kTfLiteBuiltinArgMax) { - // The first input tensor is added as is. The second one, specifying - // the axis, needs to be converted to a scalar since TFLite uses a - // tensor but NNAPI uses a scalar as the axis. - if (input_pos == 0) { - TF_LITE_ENSURE_STATUS( - builder.AddTensorInput(input_index, hybrid_op)); - } else { - const int axis_id = node->inputs->data[1]; - const TfLiteTensor& axis_tensor = context->tensors[axis_id]; - switch (axis_tensor.type) { - case kTfLiteInt32: - if (axis_tensor.allocation_type == kTfLiteMmapRo) { - TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand( - static_cast(*axis_tensor.data.i32))); - } else { - TF_LITE_ENSURE_STATUS( - builder.AddSingleValueTensorAsScalarOperand( - axis_id, ANEURALNETWORKS_INT32)); - } - break; - case kTfLiteInt64: - // Map() function already makes sure int64 input is constant. - TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand( - static_cast(*axis_tensor.data.i64))); - break; - default: - return kTfLiteError; - } - } - } else { - TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op, - input_tensor_flags)); - } - } - // Get op type and operands - int nn_op_type = Map( - context, reg->builtin_code, reg->version, nnapi_->android_sdk_version, - node, /*is_accelerator_specified=*/nnapi_device_ != - nullptr)({context, &builder, node, &model_state_outputs_, - &model_state_tfl_inputs_, &feedback_loops_}); - // Map outputs to NN API tensor indices. - int output_tensor_flags = 0; - if (need_int8_conversion) { - output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION; - } - for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) { - const auto output_index = node->outputs->data[output_pos]; - - // Outputs for basic LSTM cell are set in the Map function since - if (reg->builtin_code == kTfLiteBuiltinLstm && - isLstmBasicKernel(node)) { - continue; - } - + } else { TF_LITE_ENSURE_STATUS( - builder.AddTensorOutput(output_index, output_tensor_flags)); + builder.AddTensorInput(input_index, hybrid_op, input_tensor_flags)); + } + } + // Get op type and operands + int nn_op_type = Map(context, reg->builtin_code, reg->version, + nnapi_->android_sdk_version, node, + /*is_accelerator_specified=*/nnapi_device_ != nullptr)( + {context, &builder, node, &model_state_outputs_, + &model_state_tfl_inputs_, &feedback_loops_}); + // Map outputs to NN API tensor indices. + int output_tensor_flags = 0; + if (need_int8_conversion) { + output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION; + } + for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) { + const auto output_index = node->outputs->data[output_pos]; + + // Outputs for basic LSTM cell are set in the Map function since + if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) { + continue; } - // Dequantize operators may have to be added in case inputs are to be - // floating-point. - AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node, - &builder); - - builder.FinalizeAddOperation(nn_op_type); + TF_LITE_ENSURE_STATUS( + builder.AddTensorOutput(output_index, output_tensor_flags)); } - return kTfLiteOk; + + // Dequantize operators may have to be added in case inputs are to be + // floating-point. + AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node, + &builder); + + builder.FinalizeAddOperation(nn_op_type); } + return kTfLiteOk; +} - TfLiteStatus BuildGraph(TfLiteContext* context, - const TfLiteIntArray* input_tensors, - const TfLiteIntArray* output_tensors) { - // Build the ops and tensors. - TF_LITE_ENSURE_STATUS(AddOpsAndTensors(context)); - // Map input and output tensor indices to ANN - std::vector inputs; - inputs.reserve(input_tensors->size); - std::vector outputs; - outputs.reserve(output_tensors->size); +TfLiteStatus NNAPIDelegateKernel::BuildGraph( + TfLiteContext* context, const TfLiteIntArray* input_tensors, + const TfLiteIntArray* output_tensors) { + // Build the ops and tensors. + TF_LITE_ENSURE_STATUS(AddOpsAndTensors(context)); + // Map input and output tensor indices to ANN + std::vector inputs; + inputs.reserve(input_tensors->size); + std::vector outputs; + outputs.reserve(output_tensors->size); - size_t total_input_byte_size = 0; - // Make the TensorFlow Lite inputs and outputs to ann_indices. - for (int i : TfLiteIntArrayView(input_tensors)) { - // Constant tensors are not NNAPI inputs. - if (i != kOptionalTensor && - context->tensors[i].allocation_type != kTfLiteMmapRo && - // The delegate might not have mapped this input (this can - // happen if one tensor is split in several ones) - operand_mapping_.lite_index_to_ann(i) != -1) { - inputs.push_back(operand_mapping_.lite_index_to_ann(i)); - if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) { - continue; - } - const TfLiteType nn_type_conversion = - operand_mapping_.lite_index_to_ann_type_conversion(i); - int tensor_size = 0; - if (nn_type_conversion == kTfLiteNoType) { - tensor_size = context->tensors[i].bytes; - } else { - size_t type_size; - TF_LITE_ENSURE_OK( - context, GetSizeOfType(context, nn_type_conversion, &type_size)); - tensor_size = NumElements(&context->tensors[i]) * type_size; - } - total_input_byte_size += tensor_size; - total_input_byte_size += getNumPaddingBytes(tensor_size); - } - } - - size_t total_output_byte_size = 0; - for (int i : TfLiteIntArrayView(output_tensors)) { - const int output_tensor_ann_index = operand_mapping_.lite_index_to_ann(i); - // Unmapped outputs are not added - if (output_tensor_ann_index != -1) { - outputs.push_back(output_tensor_ann_index); - } + size_t total_input_byte_size = 0; + // Make the TensorFlow Lite inputs and outputs to ann_indices. + for (int i : TfLiteIntArrayView(input_tensors)) { + // Constant tensors are not NNAPI inputs. + if (i != kOptionalTensor && + context->tensors[i].allocation_type != kTfLiteMmapRo && + // The delegate might not have mapped this input (this can + // happen if one tensor is split in several ones) + operand_mapping_.lite_index_to_ann(i) != -1) { + inputs.push_back(operand_mapping_.lite_index_to_ann(i)); if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) { continue; } - total_output_byte_size += context->tensors[i].bytes; - total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes); + const TfLiteType nn_type_conversion = + operand_mapping_.lite_index_to_ann_type_conversion(i); + int tensor_size = 0; + if (nn_type_conversion == kTfLiteNoType) { + tensor_size = context->tensors[i].bytes; + } else { + size_t type_size; + TF_LITE_ENSURE_OK( + context, GetSizeOfType(context, nn_type_conversion, &type_size)); + tensor_size = NumElements(&context->tensors[i]) * type_size; + } + total_input_byte_size += tensor_size; + total_input_byte_size += getNumPaddingBytes(tensor_size); } - - // Add state output tensors as model outputs. - for (int i : model_state_outputs_) { - outputs.push_back(i); - } - - // Tell ANN to declare inputs/outputs - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs( - nn_model_.get(), inputs.size(), inputs.data(), - outputs.size(), outputs.data())); - - // Set relaxed computation mode for fp32 if possible. - if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) { - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, - nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16( - nn_model_.get(), context->allow_fp32_relax_to_fp16)); - } - - // Finalize the model - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get())); - - // Create shared memory pool for inputs and outputs. - nn_input_memory_.reset( - new NNMemory(nnapi_, "input_pool", total_input_byte_size)); - nn_output_memory_.reset( - new NNMemory(nnapi_, "output_pool", total_output_byte_size)); - - return kTfLiteOk; } -}; -} // namespace + size_t total_output_byte_size = 0; + for (int i : TfLiteIntArrayView(output_tensors)) { + const int output_tensor_ann_index = operand_mapping_.lite_index_to_ann(i); + // Unmapped outputs are not added + if (output_tensor_ann_index != -1) { + outputs.push_back(output_tensor_ann_index); + } + if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) { + continue; + } + total_output_byte_size += context->tensors[i].bytes; + total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes); + } + + // Add state output tensors as model outputs. + for (int i : model_state_outputs_) { + outputs.push_back(i); + } + + // Tell ANN to declare inputs/outputs + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs( + nn_model_.get(), inputs.size(), inputs.data(), + outputs.size(), outputs.data())); + + // Set relaxed computation mode for fp32 if possible. + if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) { + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16( + nn_model_.get(), context->allow_fp32_relax_to_fp16)); + } + + // Finalize the model + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get())); + + // Create shared memory pool for inputs and outputs. + nn_input_memory_.reset( + new NNMemory(nnapi_, "input_pool", total_input_byte_size)); + nn_output_memory_.reset( + new NNMemory(nnapi_, "output_pool", total_output_byte_size)); + + return kTfLiteOk; +} + +} // namespace nnapi +} // namespace delegate + +using ::tflite::delegate::nnapi::NNAPIDelegateKernel; StatefulNnApiDelegate::StatefulNnApiDelegate(Options options) : TfLiteDelegate(TfLiteDelegateCreate()), @@ -3507,6 +3347,9 @@ void StatefulNnApiDelegate::DoFreeBufferHandle(TfLiteContext* context, } } +using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI; +using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12; + TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context, TfLiteDelegate* delegate) { // Do not check nodes_ if NN API is unavailable. diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h new file mode 100644 index 00000000000..3a65c3d5620 --- /dev/null +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h @@ -0,0 +1,243 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ +#define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ + +#include +#include + +#include "tensorflow/lite/allocation.h" +#include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" +#include "tensorflow/lite/nnapi/nnapi_implementation.h" + +namespace tflite { +namespace delegate { +namespace nnapi { + +constexpr int32_t kMinSdkVersionForNNAPI = 27; +constexpr int32_t kMinSdkVersionForNNAPI11 = 28; +constexpr int32_t kMinSdkVersionForNNAPI12 = 29; + +// Track tensor indices to NN API tensor indices mapping. +class OperandMapping { + public: + // Given a TFLite index return the ANN index. If it doesn't exist + // return -1. + int lite_index_to_ann(int index) const { + if (index >= 0 && index < lite_tensor_to_ann_tensor_.size()) + return lite_tensor_to_ann_tensor_[index]; + else + return -1; + } + + // NN API uses non tensor operands instead of structs. This creates one + // and returns the index. It uses a std::vector and resizes it as needed + // keeping -1 to unmapped values. Intermediate tensors likely will not + // be mapped. + int add_new_non_tensor_operand() { return next_ann_tensor_index_++; } + + // This call is necessary for input operands generated by the delegate + // to map constant inputs not present in TFLite but required by NNAPI, + // for example when splitting one input in several ones. + int add_delegate_generated_input_ann_tensors_operand() { + return next_ann_tensor_index_++; + } + + // Add a new mapping from `tflite_index` and return the NN API tensor index. + int add_new_ann_tensor_index(int tflite_index) { + if (tflite_index >= lite_tensor_to_ann_tensor_.size()) { + lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1); + } + const int new_tensor_index = next_ann_tensor_index_++; + lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index; + return new_tensor_index; + } + + // Given a TFLite index returns a TFLite type to which a tensor must be + // converted during copying the data to the memory allocated for NN API. + // kTfLiteNoType means no conversion is needed. + TfLiteType lite_index_to_ann_type_conversion(int index) const { + if (index >= 0 && index < index_to_type_conversion_.size()) + return index_to_type_conversion_[index]; + else + return kTfLiteNoType; + } + + // Add a new mapping from TFLite index to a type conversion. + void add_type_conversion(int tflite_index, TfLiteType tflite_type) { + if (tflite_index >= index_to_type_conversion_.size()) { + index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType); + } + index_to_type_conversion_[tflite_index] = tflite_type; + } + + private: + // Next index of ann tensor + int next_ann_tensor_index_ = 0; + + // Mapping from lite index. Use a std::vector for speed and code size + // rather than a map. + std::vector lite_tensor_to_ann_tensor_; + // Mapping from lite index to a type which tensor must be converted to during + // the copying of the data to the memory allocated for NN API. kTfLiteNoType + // means no conversion is needed. Use an std::vector for speed and code size + // rather than a map. + std::vector index_to_type_conversion_; +}; + +class NNAPIOpBuilder; + +// The kernel that represents the node sub set of TF Lite being run on NN API. +struct NNAPIOpMappingArgs { + TfLiteContext* context; + NNAPIOpBuilder* builder; + TfLiteNode* node; + std::vector* model_state_outputs; + std::vector* model_state_tfl_inputs; + std::vector>* feedback_loops; +}; + +// RAII NN API Model Destructor for use with std::unique_ptr +struct NNFreeModel { + void operator()(ANeuralNetworksModel* model) { + NnApiImplementation()->ANeuralNetworksModel_free(model); + } +}; +// RAII NN API Compilation Destructor for use with std::unique_ptr +struct NNFreeCompilation { + void operator()(ANeuralNetworksCompilation* model) { + NnApiImplementation()->ANeuralNetworksCompilation_free(model); + } +}; + +// Manage NNAPI shared memory handle +class NNMemory { + public: +#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING + NNMemory(const NnApi* nnapi, const char* name, size_t size) { + if (name && size > 0) { + nnapi_ = nnapi; + byte_size_ = size; + fd_ = nnapi_->ASharedMemory_create(name, size); + data_ptr_ = reinterpret_cast( + mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0)); + nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE, + fd_, 0, &nn_memory_handle_); + } + } +#else + NNMemory(const NnApi* /*nnapi*/, const char* /*name*/, size_t /*size*/) {} +#endif + + ~NNMemory() { +#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING + if (data_ptr_) { + munmap(data_ptr_, byte_size_); + } + if (nn_memory_handle_) { + nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_); + } + if (fd_ > 0) close(fd_); +#endif + } + + ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; } + uint8_t* get_data_ptr() { return data_ptr_; } + + private: +#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING + const NnApi* nnapi_; + int fd_ = 0; + size_t byte_size_ = 0; +#endif + uint8_t* data_ptr_ = nullptr; + ANeuralNetworksMemory* nn_memory_handle_ = nullptr; +}; + +// The kernel that represents the node sub set of TF Lite being run on NN API. +class NNAPIDelegateKernel { + public: + NNAPIDelegateKernel() { nnapi_ = NnApiImplementation(); } + ~NNAPIDelegateKernel() { + for (auto content : allocation_memory_mapping_) { + nnapi_->ANeuralNetworksMemory_free(content.second); + } + } + + typedef ANeuralNetworksOperationType (*MappingFn)( + const NNAPIOpMappingArgs& mapping_args); + + // Return a function that knows how to translate a node into its operands + // when called. You can use this function to see if a node is supported + // (i.e. if the returned MappingFn is null, then the node is not supported). + static MappingFn Map(const TfLiteContext* context, int builtin_code, + int version, int android_sdk_version, + const TfLiteNode* node, bool is_accelerator_specified); + + // Initialize the kernel (a NN model). + TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params); + + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); + + TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); + + private: + // Access to NNApi. + const NnApi* nnapi_; + // ANN device handle. + ANeuralNetworksDevice* nnapi_device_ = nullptr; + // ANN API state. + std::unique_ptr nn_model_; + std::unique_ptr + nn_compilation_; + // Node indices that this delegate is responsible for. Indices here + // indexes into the nodes array in the TfLiteContext. + std::vector nodes_; + // Track indices we use + OperandMapping operand_mapping_; + std::map + allocation_memory_mapping_; + // Track memory map + const std::vector* + tensor_memory_map_; + std::vector model_state_outputs_; + std::vector model_state_tfl_inputs_; + // This is the equivalent of the pair model_state_outputs_, + // model_state_tfl_inputs_ for all tensors where we have to keep the output + // data available for TFLite model users + std::vector> feedback_loops_; + + std::unique_ptr nn_input_memory_; + std::unique_ptr nn_output_memory_; + + void AddDequantizeOperatorsWhereNeeded(const TfLiteContext* context, + int builtin_code, + const TfLiteNode* node, + NNAPIOpBuilder* builder); + + TfLiteStatus AddOpsAndTensors(TfLiteContext* context); + + TfLiteStatus BuildGraph(TfLiteContext* context, + const TfLiteIntArray* input_tensors, + const TfLiteIntArray* output_tensors); +}; + +} // namespace nnapi +} // namespace delegate +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_