diff --git a/tensorflow/lite/delegates/nnapi/BUILD b/tensorflow/lite/delegates/nnapi/BUILD index 94c48f80313..3953c73f263 100644 --- a/tensorflow/lite/delegates/nnapi/BUILD +++ b/tensorflow/lite/delegates/nnapi/BUILD @@ -34,6 +34,7 @@ cc_library( "//tensorflow/lite/c:common", "//tensorflow/lite/kernels:kernel_util", "//tensorflow/lite/nnapi:nnapi_implementation", + "//tensorflow/lite/nnapi:nnapi_lib", "//tensorflow/lite/nnapi:nnapi_util", ], ) @@ -105,6 +106,7 @@ cc_library( ":nnapi_delegate", "//tensorflow/lite/nnapi:nnapi_handler", "//tensorflow/lite/nnapi:nnapi_implementation", + "//tensorflow/lite/nnapi:nnapi_lib", "@com_google_absl//absl/memory", "@com_google_googletest//:gtest", ], @@ -122,6 +124,7 @@ cc_test( ], deps = [ ":nnapi_delegate", + ":nnapi_delegate_mock_test", "//tensorflow/lite:framework", "//tensorflow/lite:minimal_logging", "//tensorflow/lite/c:common", diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index 08763dd55c3..830e374b125 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -28,9 +28,6 @@ limitations under the License. #include #include -// This section needs to be before the import of nnapi_delegate_kernel -// because the code changes according to the definition of -// TFLITE_NNAPI_ALLOW_MMAP_SHARING #ifdef __ANDROID__ #include #endif @@ -299,12 +296,14 @@ static size_t getNumPaddingBytes(size_t byte_size) { return num_padding_bytes; } -// Return NNAPI device handle with the provided null-terminated device name. If -// no matching device could be found, nullptr will be returned. -ANeuralNetworksDevice* GetDeviceHandle(TfLiteContext* context, - const char* device_name_ptr) { - if (!device_name_ptr) return nullptr; - ANeuralNetworksDevice* device_handle = nullptr; +// Return NNAPI device handle with the provided null-terminated device name. +// Returns kTfLiteError in case of any NNAPI error and if no device with the +// given name can be found. +TfLiteStatus GetDeviceHandle(TfLiteContext* context, + const char* device_name_ptr, + ANeuralNetworksDevice** result, int* nnapi_errno) { + if (!device_name_ptr) return kTfLiteError; + *result = nullptr; std::string device_name(device_name_ptr); uint32_t num_devices = 0; NnApiImplementation()->ANeuralNetworks_getDeviceCount(&num_devices); @@ -312,21 +311,27 @@ ANeuralNetworksDevice* GetDeviceHandle(TfLiteContext* context, for (uint32_t i = 0; i < num_devices; i++) { ANeuralNetworksDevice* device = nullptr; const char* buffer = nullptr; - NnApiImplementation()->ANeuralNetworks_getDevice(i, &device); - NnApiImplementation()->ANeuralNetworksDevice_getName(device, &buffer); + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, NnApiImplementation()->ANeuralNetworks_getDevice(i, &device), + "Searching for target device", nnapi_errno); + + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, + NnApiImplementation()->ANeuralNetworksDevice_getName(device, &buffer), + "Searching for target device", nnapi_errno); + if (device_name == buffer) { - device_handle = device; - break; + *result = device; + return kTfLiteOk; } } - if (!device_handle) { - context->ReportError(context, - "Could not find the specified NNAPI accelerator: %s. " - "Must be one of: {%s}.", - device_name_ptr, - nnapi::GetStringDeviceNamesList().c_str()); - } - return device_handle; + + context->ReportError(context, + "Could not find the specified NNAPI accelerator: %s. " + "Must be one of: {%s}.", + device_name_ptr, + nnapi::GetStringDeviceNamesList().c_str()); + return kTfLiteError; } // Compute the hash of a TfLiteIntArray. @@ -354,6 +359,112 @@ enum { NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1, }; +// Returns the SDK level to target when delegating to the given devices. +// The SDK level is the max of the ones supported by the devices or +// the current Android SDK level if no device is present. +TfLiteStatus GetTargetSdkVersion( + TfLiteContext* context, const NnApi* nnapi, + const std::vector& device_handles, + int* target_sdk_version, int* nnapi_errno) { + *target_sdk_version = nnapi->android_sdk_version; + int64_t devices_sdk_version = -1; + for (const auto* device_handle : device_handles) { + int64_t curr_device_sdk_version; + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, + nnapi->ANeuralNetworksDevice_getFeatureLevel(device_handle, + &curr_device_sdk_version), + "Searching for target device", nnapi_errno); + + devices_sdk_version = + std::max(curr_device_sdk_version, devices_sdk_version); + } + + if ((devices_sdk_version > 0) && + // This second check is necessary since if the nnapi-reference device is + // in the list of target devices the devices_sdk_version value will be + // 1000. + (devices_sdk_version < nnapi->android_sdk_version)) { + TFLITE_LOG(TFLITE_LOG_INFO, + "Changing Android NN SDK version %d to version " + "supported by target devices: %d", + nnapi->android_sdk_version, devices_sdk_version); + + *target_sdk_version = devices_sdk_version; + } + + return kTfLiteOk; +} + +// Returns true if this delegate is configured to use a specific set of devices. +// This will happen either if: +// - accelerator_name option has been specified +// - NNAPI CPU implementation has been explicitly disabled. +// If exclude_nnapi_reference is true this method will return false if the +// accelerator_name in the delegate options is equal to "nnapi-reference" +bool ShouldUseTargetDevices(TfLiteDelegate* delegate, + bool exclude_nnapi_reference = false) { + const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate); + const char* device_name_ptr = delegate_options.accelerator_name; + std::string nnapi_cpu("nnapi-reference"); + bool has_selected_accelerator = device_name_ptr != nullptr; + if (exclude_nnapi_reference && has_selected_accelerator) { + has_selected_accelerator = nnapi_cpu != device_name_ptr; + } + return (delegate_options.disallow_nnapi_cpu) || has_selected_accelerator; +} + +// Fills the given result vector with the list of devices the given delegate +// is referring to. +// There are three possible results: +// - an empty array (not the full list of available accelerators, +// for efficiency reasons) if no accelerator is chosen and the +// disallow_nnapi_cpu delegate option is false. +// - A single element array with the target processor, if an accelerator name +// is specified in the delegate options. +// - The full list of devices available on device less the nnapi reference +// implementation if the delegate option disallow_nnapi_cpu has been +// specified. +TfLiteStatus GetTargetDevices(TfLiteContext* context, TfLiteDelegate* delegate, + const NnApi* nnapi, int* nnapi_errno, + std::vector* result) { + if (nnapi->android_sdk_version < delegate::nnapi::kMinSdkVersionForNNAPI12) { + return kTfLiteError; + } + + const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate); + const char* device_name_ptr = delegate_options.accelerator_name; + + if (device_name_ptr != nullptr) { + // User specified an accelerator to use. + ANeuralNetworksDevice* nnapi_device = nullptr; + TF_LITE_ENSURE_STATUS( + GetDeviceHandle(context, device_name_ptr, &nnapi_device, nnapi_errno)); + result->push_back(nnapi_device); + } else if (delegate_options.disallow_nnapi_cpu) { + std::string nnapi_cpu("nnapi-reference"); + uint32_t num_devices = 0; + NnApiImplementation()->ANeuralNetworks_getDeviceCount(&num_devices); + + for (uint32_t i = 0; i < num_devices; i++) { + ANeuralNetworksDevice* device = nullptr; + const char* buffer = nullptr; + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, NnApiImplementation()->ANeuralNetworks_getDevice(i, &device), + "Getting list of available devices", nnapi_errno); + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, + NnApiImplementation()->ANeuralNetworksDevice_getName(device, &buffer), + "Getting list of available devices", nnapi_errno); + if (nnapi_cpu != buffer) { + result->push_back(device); + } + } + } + + return kTfLiteOk; +} + } // namespace namespace delegate { @@ -2899,35 +3010,15 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context, const auto delegate_options = StatefulNnApiDelegate::GetOptions(params->delegate); - const char* device_name_ptr = delegate_options.accelerator_name; - if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12) { - if (device_name_ptr != nullptr) { - // User specified an accelerator to use. - ANeuralNetworksDevice* nnapi_device = - GetDeviceHandle(context, device_name_ptr); - if (nnapi_device == nullptr) { - return kTfLiteError; - } - nnapi_devices_.push_back(nnapi_device); - } else if (delegate_options.disallow_nnapi_cpu) { - std::string nnapi_cpu("nnapi-reference"); - uint32_t num_devices = 0; - NnApiImplementation()->ANeuralNetworks_getDeviceCount(&num_devices); + if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && + ShouldUseTargetDevices(params->delegate)) { + TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_, + nnapi_errno, &nnapi_devices_)); - for (uint32_t i = 0; i < num_devices; i++) { - ANeuralNetworksDevice* device = nullptr; - const char* buffer = nullptr; - NnApiImplementation()->ANeuralNetworks_getDevice(i, &device); - NnApiImplementation()->ANeuralNetworksDevice_getName(device, &buffer); - if (nnapi_cpu != buffer) { - nnapi_devices_.push_back(device); - } - } - if (nnapi_devices_.empty()) { - context->ReportError( - context, "NNAPI delegate requested but no accelerators available."); - return kTfLiteError; - } + if (nnapi_devices_.empty()) { + context->ReportError( + context, "NNAPI delegate requested but no accelerators available."); + return kTfLiteError; } } @@ -3504,11 +3595,20 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, builder.AddTensorInput(input_index, hybrid_op, input_tensor_flags)); } } + + // If we have target accelerators the target SDK version might be + // different than the current android version. + int target_sdk_version = nnapi_->android_sdk_version; + if (!nnapi_devices_.empty()) { + TF_LITE_ENSURE_STATUS(GetTargetSdkVersion( + context, nnapi_, nnapi_devices_, &target_sdk_version, nnapi_errno)); + } + // Get op type and operands - // Fails if the Map function failed + // Fails if the Validate function failed int nn_op_type; TF_LITE_ENSURE_STATUS(Map(context, reg->builtin_code, reg->version, - nnapi_->android_sdk_version, + target_sdk_version, {context, &builder, node, &model_state_outputs_, &model_state_tfl_inputs_, &feedback_loops_}, &nn_op_type)); @@ -3755,20 +3855,30 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context, !nnapi->nnapi_exists) { return kTfLiteOk; } - bool is_accelerator_specified = false; + + int target_sdk_version = nnapi->android_sdk_version; // For NNAPI 1.2+, check if there is any accelerator available. - // If not, don't delegate to NNAPI's CPU reference implementation. + // If not, don't delegate to NNAPI's CPU reference implementation unless + // it has been specified as target accelerator. if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) { - // Check if user specified an acclelerator to use. - const char* device_name_ptr = GetOptions(delegate).accelerator_name; - if (device_name_ptr) { - if (!GetDeviceHandle(context, device_name_ptr)) { - return kTfLiteError; - } else { - // also check if the selected device is not CPU reference impl. - const string kNnapiReferenceImplName = "nnapi-reference"; - is_accelerator_specified = kNnapiReferenceImplName != device_name_ptr; + if (ShouldUseTargetDevices(delegate)) { + std::vector devices; + TF_LITE_ENSURE_STATUS( + GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices)); + + if (devices.empty()) { + if (StatefulNnApiDelegate::GetOptions(delegate).accelerator_name) { + // There was a selected device and it is not available. + return kTfLiteError; + } else { + // Only nnapi-reference is available but was disabled by the delegate + // options + return kTfLiteOk; + } } + + TF_LITE_ENSURE_STATUS(GetTargetSdkVersion( + context, nnapi, devices, &target_sdk_version, nnapi_errno)); } else { // If no accelerator is specified, only use NNAPI if an accelerator is // available. Any available accelerator will make the device_count larger @@ -3791,16 +3901,17 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context, TfLiteIntArray* plan; TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan)); - int android_sdk_version = NnApiImplementation()->android_sdk_version; // Check for every node if it is supported for (int node_index : TfLiteIntArrayView(plan)) { TfLiteNode* node; TfLiteRegistration* registration; TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration( context, node_index, &node, ®istration)); - if (NNAPIDelegateKernel::Validate( - context, registration->builtin_code, registration->version, - android_sdk_version, node, is_accelerator_specified)) { + const bool is_accelerator_specified = + ShouldUseTargetDevices(delegate, /*exclude_nnapi_reference=*/true); + if (NNAPIDelegateKernel::Validate(context, registration->builtin_code, + registration->version, target_sdk_version, + node, is_accelerator_specified)) { supported_nodes.push_back(node_index); } } diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc index 146bf1eaa47..1d9ef8f1cea 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc @@ -180,6 +180,52 @@ TEST_F(NnApiDeviceSelectionTest, DisallowsCPUBasedOnOptions) { EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); } +TEST_F(NnApiDeviceSelectionTest, + DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuEnabled) { + // Only nnapi-reference is available on device + nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int { + *numDevices = 1; + return 0; + }; + nnapi_->ANeuralNetworksDevice_getName = + [](const ANeuralNetworksDevice* device, const char** name) -> int { + if (device == reinterpret_cast(1)) { + *name = "nnapi-reference"; + } + return 0; + }; + + tflite::StatefulNnApiDelegate::Options options; + options.disallow_nnapi_cpu = false; + InitWithOptions(options); + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1); +} + +TEST_F(NnApiDeviceSelectionTest, + DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuDisabled) { + // Only nnapi-reference is available on device + nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int { + *numDevices = 1; + return 0; + }; + nnapi_->ANeuralNetworksDevice_getName = + [](const ANeuralNetworksDevice* device, const char** name) -> int { + if (device == reinterpret_cast(1)) { + *name = "nnapi-reference"; + } + return 0; + }; + + tflite::StatefulNnApiDelegate::Options options; + options.disallow_nnapi_cpu = true; + InitWithOptions(options); + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1); +} + } // namespace } // namespace tflite diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h index db263a195f4..ec38d1ee008 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h @@ -288,6 +288,8 @@ class NNAPIDelegateKernel { const NnApi* nnapi_; // ANN device handle. std::vector nnapi_devices_; + // Name of the nnapi device, empty if nnapi_devices_ is empty; + std::string device_name_; // ANN API state. std::unique_ptr nn_model_; std::unique_ptr diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h index 4a48409de1e..6a1720971b2 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h @@ -28,6 +28,7 @@ limitations under the License. #include #include "absl/memory/memory.h" #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" +#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h" #include "tensorflow/lite/nnapi/nnapi_handler.h" #include "tensorflow/lite/nnapi/nnapi_implementation.h" @@ -52,21 +53,22 @@ class NnApiMock : public ::tflite::nnapi::NnApiHandler { return open("/dev/zero", O_RDWR); }; - GetDeviceCountReturns<0>(); - ModelCreateReturns<0>(); - AddOperandReturns<0>(); - SetOperandValueReturns<0>(); - AddOperationReturns<0>(); - IdentifyInputAndOutputsReturns<0>(); - RelaxComputationFloatReturns<0>(); - ModelFinishReturns<0>(); - MemoryCreateFromFdReturns<0>(); - CompilationCreateReturns<0>(); - CompilationFinishReturns<0>(); - ExecutionCreateReturns<0>(); - ExecutionSetInputFromMemoryReturns<0>(); - ExecutionSetOutputFromMemoryReturns<0>(); - ExecutionComputeReturns<0>(); + ModelCreateReturns(); + AddOperandReturns(); + SetOperandValueReturns(); + AddOperationReturns(); + IdentifyInputAndOutputsReturns(); + RelaxComputationFloatReturns(); + ModelFinishReturns(); + MemoryCreateFromFdReturns(); + CompilationCreateReturns(); + CompilationCreateForDevicesReturns(); + CompilationFinishReturns(); + ExecutionCreateReturns(); + ExecutionSetInputFromMemoryReturns(); + ExecutionSetOutputFromMemoryReturns(); + ExecutionComputeReturns(); + SetNnapiSupportedDevice("test-device", android_sdk_version); } ~NnApiMock() { Reset(); } diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc index 780e50c84dc..058ecf45c1a 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h" #include "tensorflow/lite/interpreter.h" #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/minimal_logging.h" @@ -1895,7 +1896,7 @@ class BaseActivationsOpModel : public SingleOpModelWithNNAPI { public: // Most activations don't take any options, so this constructor works for // them. - BaseActivationsOpModel(BuiltinOperator type, TensorData input) { + BaseActivationsOpModel(BuiltinOperator type, const TensorData& input) { input_ = AddInput(input); if (input.type == TensorType_UINT8) { output_ = AddOutput({input.type, {}, 0, 0, 1. / 256}); @@ -3031,19 +3032,19 @@ class LSTMOpModel : public SingleOpModelWithNNAPI { PopulateTensor(projection_bias_, f); } - void SetInputLayerNormCoefficients(std::vector f) { + void SetInputLayerNormCoefficients(const std::vector& f) { PopulateTensor(input_layer_norm_coefficients_, f); } - void SetForgetLayerNormCoefficients(std::vector f) { + void SetForgetLayerNormCoefficients(const std::vector& f) { PopulateTensor(forget_layer_norm_coefficients_, f); } - void SetCellLayerNormCoefficients(std::vector f) { + void SetCellLayerNormCoefficients(const std::vector& f) { PopulateTensor(cell_layer_norm_coefficients_, f); } - void SetOutputLayerNormCoefficients(std::vector f) { + void SetOutputLayerNormCoefficients(const std::vector& f) { PopulateTensor(output_layer_norm_coefficients_, f); } @@ -5122,6 +5123,129 @@ TEST(QuantizedPadV2OpTest, Int8AdvancedDynamicValuedTest) { AdvancedDynamicValuedTest(); } +struct UnsupportedOperationOnDeviceTest + : ::tflite::delegate::nnapi::NnApiDelegateMockTest {}; + +class AcceleratedModel { + public: + StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); } + + protected: + // build a delegate with a target accelerator name. + explicit AcceleratedModel(const std::string& accelerator_name) { + StatefulNnApiDelegate::Options options; + options.accelerator_name = accelerator_name.c_str(); + stateful_delegate_.reset(new StatefulNnApiDelegate(options)); + } + + // build a delegate with no target accelerator name, can disable the NNAPI CPU + // fallback implementation using the disallow_nnapi_cpu flag. + explicit AcceleratedModel(bool disallow_nnapi_cpu) { + StatefulNnApiDelegate::Options options; + options.disallow_nnapi_cpu = disallow_nnapi_cpu; + stateful_delegate_.reset(new StatefulNnApiDelegate(options)); + } + + private: + std::unique_ptr stateful_delegate_; +}; + +class ArgMaxOpModel : public SingleOpModel, public AcceleratedModel { + public: + ArgMaxOpModel(std::initializer_list input_shape, TensorType input_type, + int axis_value, TensorType output_type, const char* device_name) + : SingleOpModel(), AcceleratedModel(device_name) { + Init(input_shape, input_type, axis_value, output_type); + } + + ArgMaxOpModel(std::initializer_list input_shape, TensorType input_type, + int axis_value, TensorType output_type, bool disallow_nnapi_cpu) + : SingleOpModel(), AcceleratedModel(disallow_nnapi_cpu) { + Init(input_shape, input_type, axis_value, output_type); + } + + int input() const { return input_; } + + protected: + int input_; + int axis_; + int output_; + + void Init(std::initializer_list input_shape, TensorType input_type, + int axis_value, TensorType output_type) { + auto* delegate = GetDelegate(); + this->SetApplyDelegate([delegate](Interpreter* interpreter) { + interpreter->ModifyGraphWithDelegate(delegate); + }); + input_ = AddInput(input_type); + axis_ = AddConstInput(TensorType_INT32, {axis_value}, {1}); + output_ = AddOutput(output_type); + + SetBuiltinOp(BuiltinOperator_ARG_MAX, BuiltinOptions_ArgMaxOptions, + CreateArgMaxOptions(builder_, output_type).Union()); + BuildInterpreter({input_shape, {1}}); + } +}; + +TEST_F(UnsupportedOperationOnDeviceTest, + ShouldUseDeviceFeatureLevelWhenSpecifyingTargetDevice) { + nnapi_mock_->SetAndroidSdkVersion(29); + nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28); + + ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3, + TensorType_INT32, "test-device"); + m.PopulateTensor(m.input(), {0.1, 0.9, 0.7, 0.3}); + m.Invoke(); + + EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1) + << "Expected Max not to be delegates since it not supported before NNAPI " + "1.2 and device declares to support only NNAPI 1.1."; + + nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29); + + ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3, + TensorType_INT32, "test-device"); + m1.PopulateTensor(m.input(), {0.1, 0.9, 0.7, 0.3}); + m1.Invoke(); + + EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0) + << "Expected Max op to be delegated since it is supported in NNAPI 1.2."; +} + +TEST_F(UnsupportedOperationOnDeviceTest, + ShouldUseDeviceFeatureLevelWhenDisablingCPU) { + nnapi_mock_->SetAndroidSdkVersion(29); + nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28); + + ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3, + TensorType_INT32, /*disallow_nnapi_cpu=*/true); + m.PopulateTensor(m.input(), {0.1, 0.9, 0.7, 0.3}); + m.Invoke(); + + EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1) + << "Expected Max not to be delegates since it not supported before NNAPI " + "1.2 and device declares to support only NNAPI 1.1."; + + ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3, + TensorType_INT32, /*disallow_nnapi_cpu=*/false); + m1.PopulateTensor(m.input(), {0.1, 0.9, 0.7, 0.3}); + m1.Invoke(); + + EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0) + << "Expected Max op to be delegated since we enabled NNAPI CPU " + "implementation."; + + nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29); + + ArgMaxOpModel m2({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3, + TensorType_INT32, /*disallow_nnapi_cpu=*/true); + m2.PopulateTensor(m.input(), {0.1, 0.9, 0.7, 0.3}); + m2.Invoke(); + + EXPECT_EQ(m2.CountOpsExecutedByCpuKernel(), 0) + << "Expected Max op to be delegated since it is supported in NNAPI 1.2."; +} + } // namespace } // namespace tflite diff --git a/tensorflow/lite/kernels/test_util.cc b/tensorflow/lite/kernels/test_util.cc index 67cd514e1e8..5e326c32219 100644 --- a/tensorflow/lite/kernels/test_util.cc +++ b/tensorflow/lite/kernels/test_util.cc @@ -295,6 +295,22 @@ int CountPartitionsDelegatedTo(Interpreter* interpreter, return result; } +// Returns the number of nodes that will be executed on the CPU +int CountPartitionsExecutedByCpuKernel(const Interpreter* interpreter) { + int result = 0; + for (int node_idx : interpreter->execution_plan()) { + TfLiteNode node; + TfLiteRegistration reg; + std::tie(node, reg) = *(interpreter->node_and_registration(node_idx)); + + if (node.delegate == nullptr) { + ++result; + } + } + + return result; +} + } // namespace void SingleOpModel::ExpectOpAcceleratedWithNnapi(const std::string& test_id) { @@ -322,6 +338,10 @@ void SingleOpModel::ValidateAcceleration() { } } +int SingleOpModel::CountOpsExecutedByCpuKernel() { + return CountPartitionsExecutedByCpuKernel(interpreter_.get()); +} + SingleOpModel::~SingleOpModel() { ValidateAcceleration(); } } // namespace tflite diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h index 0885e129d4a..7ee67914663 100644 --- a/tensorflow/lite/kernels/test_util.h +++ b/tensorflow/lite/kernels/test_util.h @@ -377,6 +377,7 @@ class SingleOpModel { // Enables NNAPI delegate application during interpreter creation. static void SetForceUseNnapi(bool use_nnapi); static bool GetForceUseNnapi(); + int CountOpsExecutedByCpuKernel(); protected: int32_t GetTensorSize(int index) const; diff --git a/tensorflow/lite/nnapi/nnapi_handler.cc b/tensorflow/lite/nnapi/nnapi_handler.cc index 354ad66463c..c26b18d4ee7 100644 --- a/tensorflow/lite/nnapi/nnapi_handler.cc +++ b/tensorflow/lite/nnapi/nnapi_handler.cc @@ -21,6 +21,16 @@ limitations under the License. namespace tflite { namespace nnapi { +// static +const char NnApiHandler::kNnapiReferenceDeviceName[] = "nnapi-reference"; +// static +const int NnApiHandler::kNnapiReferenceDevice = 1; +// static +const int NnApiHandler::kNnapiDevice = 2; + +char* NnApiHandler::nnapi_device_name_ = nullptr; +int NnApiHandler::nnapi_device_feature_level_; + const NnApi* NnApiPassthroughInstance() { static const NnApi orig_nnapi_copy = *NnApiImplementation(); return &orig_nnapi_copy; @@ -40,5 +50,73 @@ void NnApiHandler::Reset() { *nnapi_ = *NnApiPassthroughInstance(); } +void NnApiHandler::SetAndroidSdkVersion(int version) { + nnapi_->android_sdk_version = version; +} + +void NnApiHandler::SetDeviceName(const std::string& name) { + delete[] nnapi_device_name_; + nnapi_device_name_ = new char[name.size() + 1]; + std::strcpy(nnapi_device_name_, name.c_str()); // NOLINT +} + +void NnApiHandler::GetDeviceNameReturnsName(const std::string& name) { + NnApiHandler::SetDeviceName(name); + GetDeviceNameReturns<0>(); +} + +void NnApiHandler::SetNnapiSupportedDevice(const std::string& name, + int feature_level) { + NnApiHandler::SetDeviceName(name); + nnapi_device_feature_level_ = feature_level; + + GetDeviceCountReturnsCount<2>(); + nnapi_->ANeuralNetworks_getDevice = + [](uint32_t devIndex, ANeuralNetworksDevice** device) -> int { + if (devIndex > 1) { + return ANEURALNETWORKS_BAD_DATA; + } + + if (devIndex == 1) { + *device = + reinterpret_cast(NnApiHandler::kNnapiDevice); + } else { + *device = reinterpret_cast( + NnApiHandler::kNnapiReferenceDevice); + } + return ANEURALNETWORKS_NO_ERROR; + }; + nnapi_->ANeuralNetworksDevice_getName = + [](const ANeuralNetworksDevice* device, const char** name) -> int { + if (device == + reinterpret_cast(NnApiHandler::kNnapiDevice)) { + *name = NnApiHandler::nnapi_device_name_; + return ANEURALNETWORKS_NO_ERROR; + } + if (device == reinterpret_cast( + NnApiHandler::kNnapiReferenceDevice)) { + *name = NnApiHandler::kNnapiReferenceDeviceName; + return ANEURALNETWORKS_NO_ERROR; + } + + return ANEURALNETWORKS_BAD_DATA; + }; + nnapi_->ANeuralNetworksDevice_getFeatureLevel = + [](const ANeuralNetworksDevice* device, int64_t* featureLevel) -> int { + if (device == + reinterpret_cast(NnApiHandler::kNnapiDevice)) { + *featureLevel = NnApiHandler::nnapi_device_feature_level_; + return ANEURALNETWORKS_NO_ERROR; + } + if (device == reinterpret_cast( + NnApiHandler::kNnapiReferenceDevice)) { + *featureLevel = 1000; + return ANEURALNETWORKS_NO_ERROR; + } + + return ANEURALNETWORKS_BAD_DATA; + }; +} + } // namespace nnapi } // namespace tflite diff --git a/tensorflow/lite/nnapi/nnapi_handler.h b/tensorflow/lite/nnapi/nnapi_handler.h index 70406ba2c6e..0bcdda26a46 100644 --- a/tensorflow/lite/nnapi/nnapi_handler.h +++ b/tensorflow/lite/nnapi/nnapi_handler.h @@ -46,15 +46,49 @@ class NnApiHandler { template void GetDeviceCountReturns() { nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int { - *numDevices = 2; + *numDevices = 1; return Value; }; } + template + void GetDeviceCountReturnsCount() { + nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int { + *numDevices = DeviceCount; + return ANEURALNETWORKS_NO_ERROR; + }; + } + void StubGetDeviceCountWith(int(stub)(uint32_t*)) { nnapi_->ANeuralNetworks_getDeviceCount = stub; } + template + void GetDeviceReturns() { + nnapi_->ANeuralNetworks_getDevice = + [](uint32_t devIndex, ANeuralNetworksDevice** device) -> int { + *device = + reinterpret_cast(NnApiHandler::kNnapiDevice); + return Value; + }; + } + + template + void GetDeviceNameReturns() { + nnapi_->ANeuralNetworksDevice_getName = + [](const ANeuralNetworksDevice* device, const char** name) -> int { + *name = NnApiHandler::nnapi_device_name_; + return Value; + }; + } + + void GetDeviceNameReturnsName(const std::string& name); + + // Configure all the functions related to device browsing to support + // a device with the given name and the cpu fallback nnapi-reference. + // The extra device will return support the specified feature level + void SetNnapiSupportedDevice(const std::string& name, int feature_level = 29); + template void ModelCreateReturns() { nnapi_->ANeuralNetworksModel_create = [](ANeuralNetworksModel** model) { @@ -126,6 +160,17 @@ class NnApiHandler { }; } + template + void CompilationCreateForDevicesReturns() { + nnapi_->ANeuralNetworksCompilation_createForDevices = + [](ANeuralNetworksModel* model, + const ANeuralNetworksDevice* const* devices, uint32_t numDevices, + ANeuralNetworksCompilation** compilation) { + *compilation = reinterpret_cast(3); + return Value; + }; + } + template void CompilationFinishReturns() { nnapi_->ANeuralNetworksCompilation_finish = @@ -165,10 +210,22 @@ class NnApiHandler { [](ANeuralNetworksExecution* execution) { return Value; }; } + void SetAndroidSdkVersion(int version); + protected: explicit NnApiHandler(NnApi* nnapi) : nnapi_(nnapi) { DCHECK(nnapi); } NnApi* nnapi_; + + static const char kNnapiReferenceDeviceName[]; + static const int kNnapiReferenceDevice; + static const int kNnapiDevice; + + static void SetDeviceName(const std::string& name); + + private: + static char* nnapi_device_name_; + static int nnapi_device_feature_level_; }; // Returns a pointer to an unaltered instance of NNAPI. Is intended