Implement NNAPI QoS APIs in NNAPI delegate.

PiperOrigin-RevId: 311804298
Change-Id: Ia018050ca90fbc2cc12f363b5bc52727734e4abf
This commit is contained in:
A. Unique TensorFlower 2020-05-15 14:41:08 -07:00 committed by TensorFlower Gardener
parent c77c31d45d
commit cfb6d217c9
6 changed files with 273 additions and 0 deletions

View File

@ -3256,6 +3256,22 @@ TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
"configuring NNAPI caching", nnapi_errno);
}
// Set compilation timeout if applicable.
if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
if (delegate_options.max_compilation_timeout_duration_ns > 0) {
RETURN_TFLITE_ERROR_IF_NN_ERROR(
context,
nnapi_->ANeuralNetworksCompilation_setTimeout(
compilation,
delegate_options.max_compilation_timeout_duration_ns),
"setting compilation timeout", nnapi_errno);
}
RETURN_TFLITE_ERROR_IF_NN_ERROR(
context,
nnapi_->ANeuralNetworksCompilation_setPriority(
compilation, delegate_options.execution_priority),
"setting compilation priority", nnapi_errno);
}
const int finish_result =
nnapi_->ANeuralNetworksCompilation_finish(compilation);
if (finish_result != ANEURALNETWORKS_NO_ERROR) {
@ -3322,6 +3338,27 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution>
execution_unique_ptr(execution, NNFreeExecution(nnapi_));
// Set compilation timeout if applicable.
const auto delegate_options =
StatefulNnApiDelegate::GetOptions(node->delegate);
if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
if (delegate_options.max_execution_timeout_duration_ns > 0) {
RETURN_TFLITE_ERROR_IF_NN_ERROR(
context,
nnapi_->ANeuralNetworksExecution_setTimeout(
execution, delegate_options.max_execution_timeout_duration_ns),
"setting execution timeout", nnapi_errno);
}
if (delegate_options.max_execution_loop_timeout_duration_ns > 0) {
RETURN_TFLITE_ERROR_IF_NN_ERROR(
context,
nnapi_->ANeuralNetworksExecution_setLoopTimeout(
execution,
delegate_options.max_execution_loop_timeout_duration_ns),
"setting execution loop timeout", nnapi_errno);
}
}
// Set the input tensor buffers. Note: we access tflite tensors using
// absolute indices but NN api indices inputs by relative indices.
int relative_input_index = 0;

View File

@ -22,6 +22,7 @@ limitations under the License.
#include "absl/types/optional.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
#include "tensorflow/lite/nnapi/nnapi_implementation.h"
typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
@ -92,6 +93,30 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
// allow fp32 compuation to be run in fp16.
bool allow_fp16 = false;
// Specifies the relative priority for executions of the model.
// Available values are {ANEURALNETWORKS_PRIORITY_LOW,
// ANEURALNETWORKS_PRIORITY_MEDIUM, ANEURALNETWORKS_PRIORITY_HIGH,
// ANEURALNETWORKS_PRIORITY_DEFAULT}.
int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT;
// Specifies the maximum expected duration in nanosecond for compiling the
// model. If the device is not able to complete the compilation within the
// specified duration, the compilation may be aborted. If set to 0, the
// timeout duration is considered infinite.
uint64_t max_compilation_timeout_duration_ns = 0;
// Specifies the maximum expected duration in nanosecond for executing the
// model. If the device is not able to complete the execution within the
// specified duration, the execution may be aborted. If set to 0, the
// timeout duration is considered infinite.
uint64_t max_execution_timeout_duration_ns = 0;
// Specifies the maximum expected duration in nanosecond for WHILE loops in
// the execution. If a WHILE loop condition model does not output false
// within the specified duration, the execution will be aborted. If set to
// 0, the default timeout for loops will be used.
uint64_t max_execution_loop_timeout_duration_ns = 0;
};
// Uses default options.
@ -189,6 +214,17 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
int max_number_delegated_partitions;
// allow fp32 computation to be run in fp16.
bool allow_fp16;
// Specifies the relative priority for executions of the model.
int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT;
// Specifies the maximum expected duration in nanosecond for compiling the
// model.
uint64_t max_compilation_timeout_duration_ns = 0;
// Specifies the maximum expected duration in nanosecond for executing the
// model.
uint64_t max_execution_timeout_duration_ns = 0;
// Specifies the maximum expected duration in nanosecond for WHILE loops in
// the execution
uint64_t max_execution_loop_timeout_duration_ns = 0;
~Data();

View File

@ -304,6 +304,23 @@ TEST(NNAPIDelegate, StatefulDelegateWithCompilationCaching) {
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3}));
}
// Sanity check for the state-ful NNAPI delegate with QoS hints.
TEST(NNAPIDelegate, StatefulDelegateWithQoS) {
StatefulNnApiDelegate::Options options;
options.execution_priority = ANEURALNETWORKS_PRIORITY_HIGH;
options.max_compilation_timeout_duration_ns = UINT64_MAX;
options.max_execution_timeout_duration_ns = UINT64_MAX;
options.max_execution_loop_timeout_duration_ns = UINT64_MAX;
FloatAddOpModel m(options, {TensorType_FLOAT32, {1, 2, 2, 1}},
{TensorType_FLOAT32, {1, 2, 2, 1}},
{TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.7, 0.8});
m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.3, 0.5});
m.Invoke();
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3}));
}
// Sanity check for the state-ful NNAPI delegate using TfLiteBufferHandle.
TEST(NNAPIDelegate, StatefulDelegateWithBufferHandles) {
// Skip the test if Android specific functions could not be found.

View File

@ -215,6 +215,18 @@ enum {
ANEURALNETWORKS_DEVICE_ACCELERATOR = 4,
};
/**
* Relative execution priority.
*
* Available since API level 30.
*/
enum {
ANEURALNETWORKS_PRIORITY_LOW = 90,
ANEURALNETWORKS_PRIORITY_MEDIUM = 100,
ANEURALNETWORKS_PRIORITY_HIGH = 110,
ANEURALNETWORKS_PRIORITY_DEFAULT = ANEURALNETWORKS_PRIORITY_MEDIUM,
};
/**
* ANeuralNetworksMemory is an opaque type that represents memory.
*
@ -528,9 +540,21 @@ typedef int (*ANeuralNetworksCompilation_setCaching_fn)(
ANeuralNetworksCompilation* compilation, const char* cacheDir,
const uint8_t* token);
typedef int (*ANeuralNetworksCompilation_setTimeout_fn)(
ANeuralNetworksCompilation* compilation, uint64_t duration);
typedef int (*ANeuralNetworksCompilation_setPriority_fn)(
ANeuralNetworksCompilation* compilation, int priority);
typedef int (*ANeuralNetworksExecution_compute_fn)(
ANeuralNetworksExecution* execution);
typedef int (*ANeuralNetworksExecution_setTimeout_fn)(
ANeuralNetworksExecution* execution, uint64_t duration);
typedef int (*ANeuralNetworksExecution_setLoopTimeout_fn)(
ANeuralNetworksExecution* execution, uint64_t duration);
typedef int (*ANeuralNetworksExecution_getOutputOperandRank_fn)(
ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank);

View File

@ -215,6 +215,17 @@ const NnApi LoadNnApi() {
ANeuralNetworksModel_getExtensionOperationType);
LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
ANeuralNetworksModel_setOperandExtensionData);
// API 30 (NNAPI 1.3) methods.
LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
ANeuralNetworksCompilation_setTimeout);
LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
ANeuralNetworksCompilation_setPriority);
LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
ANeuralNetworksExecution_setTimeout);
LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
ANeuralNetworksExecution_setLoopTimeout);
return nnapi;
}

View File

@ -789,6 +789,76 @@ struct NnApi {
ANeuralNetworksCompilation* compilation, const char* cacheDir,
const uint8_t* token);
/**
* Set the maximum expected duration for compiling the model.
*
* If the device is not able to complete the compilation within the specified
* duration, the compilation may be aborted. The timeout duration begins at
* the call to {@link ANeuralNetworksCompilation_finish}.
*
* This timeout duration acts as a hint to drivers, and can be used to both
* free up compute resources within the driver and return control back to the
* application quicker than is possible without the hint. It enables drivers
* that are able to estimate how long a compilation will take to abort the
* compilation before it has even started if the driver believes the
* compilation cannot be completed within the timeout duration. Similarly, it
* enables drivers to abort an ongoing compilation if it is taking too long.
* However, this call does not guarantee that the compilation will complete or
* abort within the timeout duration.
*
* By default (i.e., unless ANeuralNetworksCompilation_setTimeout is called),
* the timeout duration for compiling the model is considered infinite.
*
* The {@link ANeuralNetworksCompilation} must have been created with
* {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
* otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
* device has a feature level reported by
* {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then
* the timeout duration hint will be ignored.
*
* See {@link ANeuralNetworksCompilation} for information on multithreaded
* usage.
*
* @param compilation The compilation to be modified.
* @param duration The maximum amount of time in nanoseconds that is expected
* to be spent finishing a compilation. If this duration is exceeded, the
* compilation may be aborted. If set to 0, the timeout duration is
* considered infinite.
*
* @return ANEURALNETWORKS_NO_ERROR if successful.
*
* Available since API level 30.
*/
int (*ANeuralNetworksCompilation_setTimeout)(
ANeuralNetworksCompilation* compilation, uint64_t duration);
/**
* Set the execution priority.
*
* Execution priorities are relative to other executions created by the same
* application (specifically same uid) for the same device. Specifically,
* priorities of executions from one application will not affect executions
* from another application. Similarly, priorities of executions on one device
* will not affect executions on another device.
*
* Higher priority executions may use more compute resources than lower
* priority executions, and may preempt or starve lower priority executions.
*
* See {@link ANeuralNetworksCompilation} for information on multithreaded
* usage.
*
* Available since API level 30.
*
* @param compilation The compilation to be modified.
* @param priority The relative priority of the execution compared to other
* executions created by the application. Must be one of
* ANEURALNETWORKS_PRIORITY_*.
*
* @return ANEURALNETWORKS_NO_ERROR if successful.
*/
int (*ANeuralNetworksCompilation_setPriority)(
ANeuralNetworksCompilation* compilation, int priority);
/**
* Schedule synchronous evaluation of the execution.
*
@ -813,6 +883,84 @@ struct NnApi {
*/
int (*ANeuralNetworksExecution_compute)(ANeuralNetworksExecution* execution);
/**
* Set the maximum expected duration of the specified execution.
*
* If the device is not able to complete the execution within the specified
* duration, the execution may be aborted. The timeout duration begins at a
* call to one of:
* - {@link ANeuralNetworksExecution_burstCompute}
* - {@link ANeuralNetworksExecution_compute}
* - {@link ANeuralNetworksExecution_startCompute}
* - {@link ANeuralNetworksExecution_startComputeWithDependencies}
*
* This timeout duration acts as a hint to drivers, and can be used to both
* free up compute resources within the driver and return control back to the
* application quicker than is possible without the hint. It enables drivers
* that are able to estimate how long an execution will take to abort the
* execution before it has even started if the driver believes the execution
* cannot be completed within the timeout duration. Similarly, it enables
* drivers to abort an ongoing execution if it is taking too long. However,
* this call does not guarantee that the execution will complete or abort
* within the timeout duration.
*
* By default (i.e., unless ANeuralNetworksExecution_setTimeout is called),
* the timeout duration for execution is considered infinite.
*
* The {@link ANeuralNetworksExecution} must have been created from an
* {@link ANeuralNetworksCompilation} which in turn was created from
* {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
* otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
* device has a feature level reported by
* {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then
* the timeout duration hint will be ignored.
*
* See {@link ANeuralNetworksExecution} for information on multithreaded
* usage.
*
* @param execution The execution to be modified.
* @param duration The maximum amount of time in nanoseconds that is expected
* to be spent executing a model. If this duration is exceeded, the execution
* may be aborted. If set to 0, the timeout duration is considered
* infinite.
*
* @return ANEURALNETWORKS_NO_ERROR if successful.
*
* Available since API level 30.
*/
int (*ANeuralNetworksExecution_setTimeout)(
ANeuralNetworksExecution* execution, uint64_t duration);
/**
* Set the maximum duration of WHILE loops in the specified execution.
*
* This is a fuzzy per-loop timeout intended to prevent infinite loops.
*
* If a WHILE loop condition model does not output false within the specified
* duration, the execution will be aborted.
*
* See {@link ANeuralNetworks_getDefaultLoopTimeout} and
* {@link ANeuralNetworks_getMaximumLoopTimeout} for the default
* and maximum timeout values.
*
* See {@link ANeuralNetworksExecution} for information on multithreaded
* usage.
*
* @param execution The execution to be modified.
* @param duration The maximum amount of time in nanoseconds that can be spent
* executing a WHILE loop. If the specified duration value exceeds the
* value produced by {@link ANeuralNetworks_getMaximumLoopTimeout}, it will be
* overridden by that value.
*
* @return ANEURALNETWORKS_NO_ERROR if successful.
* ANEURALNETWORKS_BAD_STATE if execution has started.
* ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL.
*
* Available since API level 30.
*/
int (*ANeuralNetworksExecution_setLoopTimeout)(
ANeuralNetworksExecution* execution, uint64_t duration);
/**
* Get the dimensional information of the specified output operand of the
* model of the