diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index 5cb02e2ae54..6f6b450575f 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -138,6 +138,28 @@ static size_t getNumPaddingBytes(size_t byte_size) { } return num_padding_bytes; } + +// Return NNAPI device handle with the provided null-terminated device name. If +// no matching device could be found, nullptr will be returned. +ANeuralNetworksDevice* GetDeviceHandle(const char* device_name_ptr) { + if (!device_name_ptr) return nullptr; + ANeuralNetworksDevice* device_handle = nullptr; + std::string device_name(device_name_ptr); + uint32_t numDevices = 0; + NnApiImplementation()->ANeuralNetworks_getDeviceCount(&numDevices); + + for (uint32_t i = 0; i < numDevices; i++) { + ANeuralNetworksDevice* device = nullptr; + const char* buffer = nullptr; + NnApiImplementation()->ANeuralNetworks_getDevice(i, &device); + NnApiImplementation()->ANeuralNetworksDevice_getName(device, &buffer); + if (device_name == buffer) { + device_handle = device; + break; + } + } + return device_handle; +} } // namespace // RAII NN API Model Destructor for use with std::unique_ptr @@ -1161,6 +1183,20 @@ class NNAPIDelegateKernel { nodes_.push_back(node_index); } + const char* device_name_ptr = + StatefulNnApiDelegate::GetOptions(params->delegate).accelerator_name; + // user specified an acclelerator to use. + if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && + device_name_ptr != nullptr) { + nnapi_device_ = GetDeviceHandle(device_name_ptr); + if (nnapi_device_ == nullptr) { + context->ReportError(context, + "Could not find the specified accelerator: %s.", + device_name_ptr); + return kTfLiteError; + } + } + if (!nn_model_) { ANeuralNetworksModel* model = nullptr; RETURN_TFLITE_ERROR_IF_NN_ERROR( @@ -1173,9 +1209,16 @@ class NNAPIDelegateKernel { if (!nn_compilation_) { ANeuralNetworksCompilation* compilation = nullptr; - RETURN_TFLITE_ERROR_IF_NN_ERROR( - context, nnapi_->ANeuralNetworksCompilation_create(nn_model_.get(), - &compilation)); + if (nnapi_device_ != nullptr) { + // Compile for the selected accelerator. + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, nnapi_->ANeuralNetworksCompilation_createForDevices( + nn_model_.get(), &nnapi_device_, 1, &compilation)); + } else { + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, nnapi_->ANeuralNetworksCompilation_create(nn_model_.get(), + &compilation)); + } auto preference = StatefulNnApiDelegate::GetOptions(params->delegate) .execution_preference; @@ -1298,6 +1341,8 @@ class NNAPIDelegateKernel { private: // Access to NNApi. const NnApi* nnapi_; + // ANN device handle. + ANeuralNetworksDevice* nnapi_device_ = nullptr; // ANN API state. std::unique_ptr nn_model_; std::unique_ptr @@ -1506,7 +1551,11 @@ class NNAPIDelegateKernel { StatefulNnApiDelegate::StatefulNnApiDelegate(Options options) : TfLiteDelegate(TfLiteDelegateCreate()), - delegate_data_(Data{.options = options}) { + delegate_data_( + Data{.execution_preference = options.execution_preference}) { + if (options.accelerator_name) { + delegate_data_.accelerator_name = options.accelerator_name; + } Prepare = DoPrepare; data_ = &delegate_data_; } @@ -1514,10 +1563,15 @@ StatefulNnApiDelegate::StatefulNnApiDelegate(Options options) StatefulNnApiDelegate::StatefulNnApiDelegate() : StatefulNnApiDelegate(Options()) {} -const StatefulNnApiDelegate::Options& StatefulNnApiDelegate::GetOptions( +const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions( TfLiteDelegate* delegate) { auto delegate_data = reinterpret_cast(delegate->data_); - return delegate_data->options; + StatefulNnApiDelegate::Options options; + options.execution_preference = delegate_data->execution_preference; + options.accelerator_name = delegate_data->accelerator_name.empty() + ? nullptr + : delegate_data->accelerator_name.c_str(); + return options; } TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context, @@ -1539,6 +1593,15 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context, if (device_count <= 1) { return kTfLiteOk; } + // Check if user specified an acclelerator to use. + const char* device_name_ptr = GetOptions(delegate).accelerator_name; + if (device_name_ptr && !GetDeviceHandle(device_name_ptr)) { + // If the selected accelerator cannot be found, NNAPI will not be used. + context->ReportError(context, + "Could not find the specified accelerator: %s.", + device_name_ptr); + return kTfLiteOk; + } } // Allocate one element in vector already since TensorFlow Lite uses // the first value as the number of nodes. The actual value will be set diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h index 9981e384ae8..782744efb10 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h @@ -39,6 +39,14 @@ class StatefulNnApiDelegate : public TfLiteDelegate { // Preferred Power/perf trade-off. ExecutionPreference execution_preference = kUndefined; + + // Selected NNAPI accelerator with nul-terminated name. + // Default to nullptr, which implies the NNAPI default behavior: NNAPI + // runtime is allowed to use all available accelerators. If the selected + // accelerator cannot be found, NNAPI will not be used. + // It is the caller's responsibility to ensure the string is valid for the + // duration of the Options object lifetime. + const char* accelerator_name = nullptr; }; // Uses default options. @@ -50,13 +58,15 @@ class StatefulNnApiDelegate : public TfLiteDelegate { ~StatefulNnApiDelegate() = default; // Returns the delegate options. - static const Options& GetOptions(TfLiteDelegate* delegate); + static const Options GetOptions(TfLiteDelegate* delegate); private: // Encapsulates all delegate data. struct Data { - // Delegate options to use. - Options options; + // Preferred Power/perf trade-off. + Options::ExecutionPreference execution_preference; + // Selected NNAPI accelerator name. + std::string accelerator_name; }; // Implements TfLiteDelegate::Prepare. Please refer to TFLiteDelegate diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc index e8c71e0a0d9..cf20d10e485 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc @@ -217,6 +217,23 @@ TEST(NNAPIDelegate, StatefulDelegate) { EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3})); } +// Sanity check for the state-ful NNAPI delegate with accelerator_name +// specified. +TEST(NNAPIDelegate, StatefulDelegateWithAcceleratorName) { + StatefulNnApiDelegate::Options options; + options.execution_preference = + StatefulNnApiDelegate::Options::ExecutionPreference::kLowPower; + options.accelerator_name = "nnapi-reference"; + + FloatAddOpModel m(options, {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.7, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3})); +} + class FloatMulOpModel : public SingleOpModelWithNNAPI { public: FloatMulOpModel(const TensorData& input1, const TensorData& input2,