From cfb6d217c9963de69a31d543a373b9a39854108c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 May 2020 14:41:08 -0700 Subject: [PATCH] Implement NNAPI QoS APIs in NNAPI delegate. PiperOrigin-RevId: 311804298 Change-Id: Ia018050ca90fbc2cc12f363b5bc52727734e4abf --- .../lite/delegates/nnapi/nnapi_delegate.cc | 37 +++++ .../lite/delegates/nnapi/nnapi_delegate.h | 36 +++++ .../delegates/nnapi/nnapi_delegate_test.cc | 17 ++ tensorflow/lite/nnapi/NeuralNetworksTypes.h | 24 +++ tensorflow/lite/nnapi/nnapi_implementation.cc | 11 ++ tensorflow/lite/nnapi/nnapi_implementation.h | 148 ++++++++++++++++++ 6 files changed, 273 insertions(+) diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index e6faea62bf6..39ab19aed2d 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -3256,6 +3256,22 @@ TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context, RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result, "configuring NNAPI caching", nnapi_errno); } + // Set compilation timeout if applicable. + if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) { + if (delegate_options.max_compilation_timeout_duration_ns > 0) { + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, + nnapi_->ANeuralNetworksCompilation_setTimeout( + compilation, + delegate_options.max_compilation_timeout_duration_ns), + "setting compilation timeout", nnapi_errno); + } + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, + nnapi_->ANeuralNetworksCompilation_setPriority( + compilation, delegate_options.execution_priority), + "setting compilation priority", nnapi_errno); + } const int finish_result = nnapi_->ANeuralNetworksCompilation_finish(compilation); if (finish_result != ANEURALNETWORKS_NO_ERROR) { @@ -3322,6 +3338,27 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, std::unique_ptr execution_unique_ptr(execution, NNFreeExecution(nnapi_)); + // Set compilation timeout if applicable. + const auto delegate_options = + StatefulNnApiDelegate::GetOptions(node->delegate); + if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) { + if (delegate_options.max_execution_timeout_duration_ns > 0) { + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, + nnapi_->ANeuralNetworksExecution_setTimeout( + execution, delegate_options.max_execution_timeout_duration_ns), + "setting execution timeout", nnapi_errno); + } + if (delegate_options.max_execution_loop_timeout_duration_ns > 0) { + RETURN_TFLITE_ERROR_IF_NN_ERROR( + context, + nnapi_->ANeuralNetworksExecution_setLoopTimeout( + execution, + delegate_options.max_execution_loop_timeout_duration_ns), + "setting execution loop timeout", nnapi_errno); + } + } + // Set the input tensor buffers. Note: we access tflite tensors using // absolute indices but NN api indices inputs by relative indices. int relative_input_index = 0; diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h index b94c6d66978..68c55e1aef4 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h @@ -22,6 +22,7 @@ limitations under the License. #include "absl/types/optional.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h" #include "tensorflow/lite/nnapi/nnapi_implementation.h" typedef struct ANeuralNetworksMemory ANeuralNetworksMemory; @@ -92,6 +93,30 @@ class StatefulNnApiDelegate : public TfLiteDelegate { // allow fp32 compuation to be run in fp16. bool allow_fp16 = false; + + // Specifies the relative priority for executions of the model. + // Available values are {ANEURALNETWORKS_PRIORITY_LOW, + // ANEURALNETWORKS_PRIORITY_MEDIUM, ANEURALNETWORKS_PRIORITY_HIGH, + // ANEURALNETWORKS_PRIORITY_DEFAULT}. + int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT; + + // Specifies the maximum expected duration in nanosecond for compiling the + // model. If the device is not able to complete the compilation within the + // specified duration, the compilation may be aborted. If set to 0, the + // timeout duration is considered infinite. + uint64_t max_compilation_timeout_duration_ns = 0; + + // Specifies the maximum expected duration in nanosecond for executing the + // model. If the device is not able to complete the execution within the + // specified duration, the execution may be aborted. If set to 0, the + // timeout duration is considered infinite. + uint64_t max_execution_timeout_duration_ns = 0; + + // Specifies the maximum expected duration in nanosecond for WHILE loops in + // the execution. If a WHILE loop condition model does not output false + // within the specified duration, the execution will be aborted. If set to + // 0, the default timeout for loops will be used. + uint64_t max_execution_loop_timeout_duration_ns = 0; }; // Uses default options. @@ -189,6 +214,17 @@ class StatefulNnApiDelegate : public TfLiteDelegate { int max_number_delegated_partitions; // allow fp32 computation to be run in fp16. bool allow_fp16; + // Specifies the relative priority for executions of the model. + int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT; + // Specifies the maximum expected duration in nanosecond for compiling the + // model. + uint64_t max_compilation_timeout_duration_ns = 0; + // Specifies the maximum expected duration in nanosecond for executing the + // model. + uint64_t max_execution_timeout_duration_ns = 0; + // Specifies the maximum expected duration in nanosecond for WHILE loops in + // the execution + uint64_t max_execution_loop_timeout_duration_ns = 0; ~Data(); diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc index ea9111c4567..acfa0c77d30 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc @@ -304,6 +304,23 @@ TEST(NNAPIDelegate, StatefulDelegateWithCompilationCaching) { EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3})); } +// Sanity check for the state-ful NNAPI delegate with QoS hints. +TEST(NNAPIDelegate, StatefulDelegateWithQoS) { + StatefulNnApiDelegate::Options options; + options.execution_priority = ANEURALNETWORKS_PRIORITY_HIGH; + options.max_compilation_timeout_duration_ns = UINT64_MAX; + options.max_execution_timeout_duration_ns = UINT64_MAX; + options.max_execution_loop_timeout_duration_ns = UINT64_MAX; + + FloatAddOpModel m(options, {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.7, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3})); +} + // Sanity check for the state-ful NNAPI delegate using TfLiteBufferHandle. TEST(NNAPIDelegate, StatefulDelegateWithBufferHandles) { // Skip the test if Android specific functions could not be found. diff --git a/tensorflow/lite/nnapi/NeuralNetworksTypes.h b/tensorflow/lite/nnapi/NeuralNetworksTypes.h index a3dfd373405..6739838e4d1 100644 --- a/tensorflow/lite/nnapi/NeuralNetworksTypes.h +++ b/tensorflow/lite/nnapi/NeuralNetworksTypes.h @@ -215,6 +215,18 @@ enum { ANEURALNETWORKS_DEVICE_ACCELERATOR = 4, }; +/** + * Relative execution priority. + * + * Available since API level 30. + */ +enum { + ANEURALNETWORKS_PRIORITY_LOW = 90, + ANEURALNETWORKS_PRIORITY_MEDIUM = 100, + ANEURALNETWORKS_PRIORITY_HIGH = 110, + ANEURALNETWORKS_PRIORITY_DEFAULT = ANEURALNETWORKS_PRIORITY_MEDIUM, +}; + /** * ANeuralNetworksMemory is an opaque type that represents memory. * @@ -528,9 +540,21 @@ typedef int (*ANeuralNetworksCompilation_setCaching_fn)( ANeuralNetworksCompilation* compilation, const char* cacheDir, const uint8_t* token); +typedef int (*ANeuralNetworksCompilation_setTimeout_fn)( + ANeuralNetworksCompilation* compilation, uint64_t duration); + +typedef int (*ANeuralNetworksCompilation_setPriority_fn)( + ANeuralNetworksCompilation* compilation, int priority); + typedef int (*ANeuralNetworksExecution_compute_fn)( ANeuralNetworksExecution* execution); +typedef int (*ANeuralNetworksExecution_setTimeout_fn)( + ANeuralNetworksExecution* execution, uint64_t duration); + +typedef int (*ANeuralNetworksExecution_setLoopTimeout_fn)( + ANeuralNetworksExecution* execution, uint64_t duration); + typedef int (*ANeuralNetworksExecution_getOutputOperandRank_fn)( ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank); diff --git a/tensorflow/lite/nnapi/nnapi_implementation.cc b/tensorflow/lite/nnapi/nnapi_implementation.cc index accdfb6c7da..ad5869fec04 100644 --- a/tensorflow/lite/nnapi/nnapi_implementation.cc +++ b/tensorflow/lite/nnapi/nnapi_implementation.cc @@ -215,6 +215,17 @@ const NnApi LoadNnApi() { ANeuralNetworksModel_getExtensionOperationType); LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworksModel_setOperandExtensionData); + + // API 30 (NNAPI 1.3) methods. + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksCompilation_setTimeout); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksCompilation_setPriority); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksExecution_setTimeout); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksExecution_setLoopTimeout); + return nnapi; } diff --git a/tensorflow/lite/nnapi/nnapi_implementation.h b/tensorflow/lite/nnapi/nnapi_implementation.h index a27f5ba661a..abee0fbdef3 100644 --- a/tensorflow/lite/nnapi/nnapi_implementation.h +++ b/tensorflow/lite/nnapi/nnapi_implementation.h @@ -789,6 +789,76 @@ struct NnApi { ANeuralNetworksCompilation* compilation, const char* cacheDir, const uint8_t* token); + /** + * Set the maximum expected duration for compiling the model. + * + * If the device is not able to complete the compilation within the specified + * duration, the compilation may be aborted. The timeout duration begins at + * the call to {@link ANeuralNetworksCompilation_finish}. + * + * This timeout duration acts as a hint to drivers, and can be used to both + * free up compute resources within the driver and return control back to the + * application quicker than is possible without the hint. It enables drivers + * that are able to estimate how long a compilation will take to abort the + * compilation before it has even started if the driver believes the + * compilation cannot be completed within the timeout duration. Similarly, it + * enables drivers to abort an ongoing compilation if it is taking too long. + * However, this call does not guarantee that the compilation will complete or + * abort within the timeout duration. + * + * By default (i.e., unless ANeuralNetworksCompilation_setTimeout is called), + * the timeout duration for compiling the model is considered infinite. + * + * The {@link ANeuralNetworksCompilation} must have been created with + * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1, + * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the + * device has a feature level reported by + * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then + * the timeout duration hint will be ignored. + * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * @param compilation The compilation to be modified. + * @param duration The maximum amount of time in nanoseconds that is expected + * to be spent finishing a compilation. If this duration is exceeded, the + * compilation may be aborted. If set to 0, the timeout duration is + * considered infinite. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 30. + */ + int (*ANeuralNetworksCompilation_setTimeout)( + ANeuralNetworksCompilation* compilation, uint64_t duration); + + /** + * Set the execution priority. + * + * Execution priorities are relative to other executions created by the same + * application (specifically same uid) for the same device. Specifically, + * priorities of executions from one application will not affect executions + * from another application. Similarly, priorities of executions on one device + * will not affect executions on another device. + * + * Higher priority executions may use more compute resources than lower + * priority executions, and may preempt or starve lower priority executions. + * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * Available since API level 30. + * + * @param compilation The compilation to be modified. + * @param priority The relative priority of the execution compared to other + * executions created by the application. Must be one of + * ANEURALNETWORKS_PRIORITY_*. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ + int (*ANeuralNetworksCompilation_setPriority)( + ANeuralNetworksCompilation* compilation, int priority); + /** * Schedule synchronous evaluation of the execution. * @@ -813,6 +883,84 @@ struct NnApi { */ int (*ANeuralNetworksExecution_compute)(ANeuralNetworksExecution* execution); + /** + * Set the maximum expected duration of the specified execution. + * + * If the device is not able to complete the execution within the specified + * duration, the execution may be aborted. The timeout duration begins at a + * call to one of: + * - {@link ANeuralNetworksExecution_burstCompute} + * - {@link ANeuralNetworksExecution_compute} + * - {@link ANeuralNetworksExecution_startCompute} + * - {@link ANeuralNetworksExecution_startComputeWithDependencies} + * + * This timeout duration acts as a hint to drivers, and can be used to both + * free up compute resources within the driver and return control back to the + * application quicker than is possible without the hint. It enables drivers + * that are able to estimate how long an execution will take to abort the + * execution before it has even started if the driver believes the execution + * cannot be completed within the timeout duration. Similarly, it enables + * drivers to abort an ongoing execution if it is taking too long. However, + * this call does not guarantee that the execution will complete or abort + * within the timeout duration. + * + * By default (i.e., unless ANeuralNetworksExecution_setTimeout is called), + * the timeout duration for execution is considered infinite. + * + * The {@link ANeuralNetworksExecution} must have been created from an + * {@link ANeuralNetworksCompilation} which in turn was created from + * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1, + * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the + * device has a feature level reported by + * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then + * the timeout duration hint will be ignored. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded + * usage. + * + * @param execution The execution to be modified. + * @param duration The maximum amount of time in nanoseconds that is expected + * to be spent executing a model. If this duration is exceeded, the execution + * may be aborted. If set to 0, the timeout duration is considered + * infinite. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 30. + */ + int (*ANeuralNetworksExecution_setTimeout)( + ANeuralNetworksExecution* execution, uint64_t duration); + + /** + * Set the maximum duration of WHILE loops in the specified execution. + * + * This is a fuzzy per-loop timeout intended to prevent infinite loops. + * + * If a WHILE loop condition model does not output false within the specified + * duration, the execution will be aborted. + * + * See {@link ANeuralNetworks_getDefaultLoopTimeout} and + * {@link ANeuralNetworks_getMaximumLoopTimeout} for the default + * and maximum timeout values. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded + * usage. + * + * @param execution The execution to be modified. + * @param duration The maximum amount of time in nanoseconds that can be spent + * executing a WHILE loop. If the specified duration value exceeds the + * value produced by {@link ANeuralNetworks_getMaximumLoopTimeout}, it will be + * overridden by that value. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * ANEURALNETWORKS_BAD_STATE if execution has started. + * ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL. + * + * Available since API level 30. + */ + int (*ANeuralNetworksExecution_setLoopTimeout)( + ANeuralNetworksExecution* execution, uint64_t duration); + /** * Get the dimensional information of the specified output operand of the * model of the