Implement NNAPI QoS APIs in NNAPI delegate.
PiperOrigin-RevId: 311804298 Change-Id: Ia018050ca90fbc2cc12f363b5bc52727734e4abf
This commit is contained in:
parent
c77c31d45d
commit
cfb6d217c9
@ -3256,6 +3256,22 @@ TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
|
||||
RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
|
||||
"configuring NNAPI caching", nnapi_errno);
|
||||
}
|
||||
// Set compilation timeout if applicable.
|
||||
if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
||||
if (delegate_options.max_compilation_timeout_duration_ns > 0) {
|
||||
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
||||
context,
|
||||
nnapi_->ANeuralNetworksCompilation_setTimeout(
|
||||
compilation,
|
||||
delegate_options.max_compilation_timeout_duration_ns),
|
||||
"setting compilation timeout", nnapi_errno);
|
||||
}
|
||||
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
||||
context,
|
||||
nnapi_->ANeuralNetworksCompilation_setPriority(
|
||||
compilation, delegate_options.execution_priority),
|
||||
"setting compilation priority", nnapi_errno);
|
||||
}
|
||||
const int finish_result =
|
||||
nnapi_->ANeuralNetworksCompilation_finish(compilation);
|
||||
if (finish_result != ANEURALNETWORKS_NO_ERROR) {
|
||||
@ -3322,6 +3338,27 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
|
||||
std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution>
|
||||
execution_unique_ptr(execution, NNFreeExecution(nnapi_));
|
||||
|
||||
// Set compilation timeout if applicable.
|
||||
const auto delegate_options =
|
||||
StatefulNnApiDelegate::GetOptions(node->delegate);
|
||||
if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
||||
if (delegate_options.max_execution_timeout_duration_ns > 0) {
|
||||
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
||||
context,
|
||||
nnapi_->ANeuralNetworksExecution_setTimeout(
|
||||
execution, delegate_options.max_execution_timeout_duration_ns),
|
||||
"setting execution timeout", nnapi_errno);
|
||||
}
|
||||
if (delegate_options.max_execution_loop_timeout_duration_ns > 0) {
|
||||
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
||||
context,
|
||||
nnapi_->ANeuralNetworksExecution_setLoopTimeout(
|
||||
execution,
|
||||
delegate_options.max_execution_loop_timeout_duration_ns),
|
||||
"setting execution loop timeout", nnapi_errno);
|
||||
}
|
||||
}
|
||||
|
||||
// Set the input tensor buffers. Note: we access tflite tensors using
|
||||
// absolute indices but NN api indices inputs by relative indices.
|
||||
int relative_input_index = 0;
|
||||
|
@ -22,6 +22,7 @@ limitations under the License.
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
|
||||
#include "tensorflow/lite/nnapi/nnapi_implementation.h"
|
||||
|
||||
typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
|
||||
@ -92,6 +93,30 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
|
||||
|
||||
// allow fp32 compuation to be run in fp16.
|
||||
bool allow_fp16 = false;
|
||||
|
||||
// Specifies the relative priority for executions of the model.
|
||||
// Available values are {ANEURALNETWORKS_PRIORITY_LOW,
|
||||
// ANEURALNETWORKS_PRIORITY_MEDIUM, ANEURALNETWORKS_PRIORITY_HIGH,
|
||||
// ANEURALNETWORKS_PRIORITY_DEFAULT}.
|
||||
int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT;
|
||||
|
||||
// Specifies the maximum expected duration in nanosecond for compiling the
|
||||
// model. If the device is not able to complete the compilation within the
|
||||
// specified duration, the compilation may be aborted. If set to 0, the
|
||||
// timeout duration is considered infinite.
|
||||
uint64_t max_compilation_timeout_duration_ns = 0;
|
||||
|
||||
// Specifies the maximum expected duration in nanosecond for executing the
|
||||
// model. If the device is not able to complete the execution within the
|
||||
// specified duration, the execution may be aborted. If set to 0, the
|
||||
// timeout duration is considered infinite.
|
||||
uint64_t max_execution_timeout_duration_ns = 0;
|
||||
|
||||
// Specifies the maximum expected duration in nanosecond for WHILE loops in
|
||||
// the execution. If a WHILE loop condition model does not output false
|
||||
// within the specified duration, the execution will be aborted. If set to
|
||||
// 0, the default timeout for loops will be used.
|
||||
uint64_t max_execution_loop_timeout_duration_ns = 0;
|
||||
};
|
||||
|
||||
// Uses default options.
|
||||
@ -189,6 +214,17 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
|
||||
int max_number_delegated_partitions;
|
||||
// allow fp32 computation to be run in fp16.
|
||||
bool allow_fp16;
|
||||
// Specifies the relative priority for executions of the model.
|
||||
int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT;
|
||||
// Specifies the maximum expected duration in nanosecond for compiling the
|
||||
// model.
|
||||
uint64_t max_compilation_timeout_duration_ns = 0;
|
||||
// Specifies the maximum expected duration in nanosecond for executing the
|
||||
// model.
|
||||
uint64_t max_execution_timeout_duration_ns = 0;
|
||||
// Specifies the maximum expected duration in nanosecond for WHILE loops in
|
||||
// the execution
|
||||
uint64_t max_execution_loop_timeout_duration_ns = 0;
|
||||
|
||||
~Data();
|
||||
|
||||
|
@ -304,6 +304,23 @@ TEST(NNAPIDelegate, StatefulDelegateWithCompilationCaching) {
|
||||
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3}));
|
||||
}
|
||||
|
||||
// Sanity check for the state-ful NNAPI delegate with QoS hints.
|
||||
TEST(NNAPIDelegate, StatefulDelegateWithQoS) {
|
||||
StatefulNnApiDelegate::Options options;
|
||||
options.execution_priority = ANEURALNETWORKS_PRIORITY_HIGH;
|
||||
options.max_compilation_timeout_duration_ns = UINT64_MAX;
|
||||
options.max_execution_timeout_duration_ns = UINT64_MAX;
|
||||
options.max_execution_loop_timeout_duration_ns = UINT64_MAX;
|
||||
|
||||
FloatAddOpModel m(options, {TensorType_FLOAT32, {1, 2, 2, 1}},
|
||||
{TensorType_FLOAT32, {1, 2, 2, 1}},
|
||||
{TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
|
||||
m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.7, 0.8});
|
||||
m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.3, 0.5});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3}));
|
||||
}
|
||||
|
||||
// Sanity check for the state-ful NNAPI delegate using TfLiteBufferHandle.
|
||||
TEST(NNAPIDelegate, StatefulDelegateWithBufferHandles) {
|
||||
// Skip the test if Android specific functions could not be found.
|
||||
|
@ -215,6 +215,18 @@ enum {
|
||||
ANEURALNETWORKS_DEVICE_ACCELERATOR = 4,
|
||||
};
|
||||
|
||||
/**
|
||||
* Relative execution priority.
|
||||
*
|
||||
* Available since API level 30.
|
||||
*/
|
||||
enum {
|
||||
ANEURALNETWORKS_PRIORITY_LOW = 90,
|
||||
ANEURALNETWORKS_PRIORITY_MEDIUM = 100,
|
||||
ANEURALNETWORKS_PRIORITY_HIGH = 110,
|
||||
ANEURALNETWORKS_PRIORITY_DEFAULT = ANEURALNETWORKS_PRIORITY_MEDIUM,
|
||||
};
|
||||
|
||||
/**
|
||||
* ANeuralNetworksMemory is an opaque type that represents memory.
|
||||
*
|
||||
@ -528,9 +540,21 @@ typedef int (*ANeuralNetworksCompilation_setCaching_fn)(
|
||||
ANeuralNetworksCompilation* compilation, const char* cacheDir,
|
||||
const uint8_t* token);
|
||||
|
||||
typedef int (*ANeuralNetworksCompilation_setTimeout_fn)(
|
||||
ANeuralNetworksCompilation* compilation, uint64_t duration);
|
||||
|
||||
typedef int (*ANeuralNetworksCompilation_setPriority_fn)(
|
||||
ANeuralNetworksCompilation* compilation, int priority);
|
||||
|
||||
typedef int (*ANeuralNetworksExecution_compute_fn)(
|
||||
ANeuralNetworksExecution* execution);
|
||||
|
||||
typedef int (*ANeuralNetworksExecution_setTimeout_fn)(
|
||||
ANeuralNetworksExecution* execution, uint64_t duration);
|
||||
|
||||
typedef int (*ANeuralNetworksExecution_setLoopTimeout_fn)(
|
||||
ANeuralNetworksExecution* execution, uint64_t duration);
|
||||
|
||||
typedef int (*ANeuralNetworksExecution_getOutputOperandRank_fn)(
|
||||
ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank);
|
||||
|
||||
|
@ -215,6 +215,17 @@ const NnApi LoadNnApi() {
|
||||
ANeuralNetworksModel_getExtensionOperationType);
|
||||
LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
|
||||
ANeuralNetworksModel_setOperandExtensionData);
|
||||
|
||||
// API 30 (NNAPI 1.3) methods.
|
||||
LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
|
||||
ANeuralNetworksCompilation_setTimeout);
|
||||
LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
|
||||
ANeuralNetworksCompilation_setPriority);
|
||||
LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
|
||||
ANeuralNetworksExecution_setTimeout);
|
||||
LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
|
||||
ANeuralNetworksExecution_setLoopTimeout);
|
||||
|
||||
return nnapi;
|
||||
}
|
||||
|
||||
|
@ -789,6 +789,76 @@ struct NnApi {
|
||||
ANeuralNetworksCompilation* compilation, const char* cacheDir,
|
||||
const uint8_t* token);
|
||||
|
||||
/**
|
||||
* Set the maximum expected duration for compiling the model.
|
||||
*
|
||||
* If the device is not able to complete the compilation within the specified
|
||||
* duration, the compilation may be aborted. The timeout duration begins at
|
||||
* the call to {@link ANeuralNetworksCompilation_finish}.
|
||||
*
|
||||
* This timeout duration acts as a hint to drivers, and can be used to both
|
||||
* free up compute resources within the driver and return control back to the
|
||||
* application quicker than is possible without the hint. It enables drivers
|
||||
* that are able to estimate how long a compilation will take to abort the
|
||||
* compilation before it has even started if the driver believes the
|
||||
* compilation cannot be completed within the timeout duration. Similarly, it
|
||||
* enables drivers to abort an ongoing compilation if it is taking too long.
|
||||
* However, this call does not guarantee that the compilation will complete or
|
||||
* abort within the timeout duration.
|
||||
*
|
||||
* By default (i.e., unless ANeuralNetworksCompilation_setTimeout is called),
|
||||
* the timeout duration for compiling the model is considered infinite.
|
||||
*
|
||||
* The {@link ANeuralNetworksCompilation} must have been created with
|
||||
* {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
|
||||
* otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
|
||||
* device has a feature level reported by
|
||||
* {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then
|
||||
* the timeout duration hint will be ignored.
|
||||
*
|
||||
* See {@link ANeuralNetworksCompilation} for information on multithreaded
|
||||
* usage.
|
||||
*
|
||||
* @param compilation The compilation to be modified.
|
||||
* @param duration The maximum amount of time in nanoseconds that is expected
|
||||
* to be spent finishing a compilation. If this duration is exceeded, the
|
||||
* compilation may be aborted. If set to 0, the timeout duration is
|
||||
* considered infinite.
|
||||
*
|
||||
* @return ANEURALNETWORKS_NO_ERROR if successful.
|
||||
*
|
||||
* Available since API level 30.
|
||||
*/
|
||||
int (*ANeuralNetworksCompilation_setTimeout)(
|
||||
ANeuralNetworksCompilation* compilation, uint64_t duration);
|
||||
|
||||
/**
|
||||
* Set the execution priority.
|
||||
*
|
||||
* Execution priorities are relative to other executions created by the same
|
||||
* application (specifically same uid) for the same device. Specifically,
|
||||
* priorities of executions from one application will not affect executions
|
||||
* from another application. Similarly, priorities of executions on one device
|
||||
* will not affect executions on another device.
|
||||
*
|
||||
* Higher priority executions may use more compute resources than lower
|
||||
* priority executions, and may preempt or starve lower priority executions.
|
||||
*
|
||||
* See {@link ANeuralNetworksCompilation} for information on multithreaded
|
||||
* usage.
|
||||
*
|
||||
* Available since API level 30.
|
||||
*
|
||||
* @param compilation The compilation to be modified.
|
||||
* @param priority The relative priority of the execution compared to other
|
||||
* executions created by the application. Must be one of
|
||||
* ANEURALNETWORKS_PRIORITY_*.
|
||||
*
|
||||
* @return ANEURALNETWORKS_NO_ERROR if successful.
|
||||
*/
|
||||
int (*ANeuralNetworksCompilation_setPriority)(
|
||||
ANeuralNetworksCompilation* compilation, int priority);
|
||||
|
||||
/**
|
||||
* Schedule synchronous evaluation of the execution.
|
||||
*
|
||||
@ -813,6 +883,84 @@ struct NnApi {
|
||||
*/
|
||||
int (*ANeuralNetworksExecution_compute)(ANeuralNetworksExecution* execution);
|
||||
|
||||
/**
|
||||
* Set the maximum expected duration of the specified execution.
|
||||
*
|
||||
* If the device is not able to complete the execution within the specified
|
||||
* duration, the execution may be aborted. The timeout duration begins at a
|
||||
* call to one of:
|
||||
* - {@link ANeuralNetworksExecution_burstCompute}
|
||||
* - {@link ANeuralNetworksExecution_compute}
|
||||
* - {@link ANeuralNetworksExecution_startCompute}
|
||||
* - {@link ANeuralNetworksExecution_startComputeWithDependencies}
|
||||
*
|
||||
* This timeout duration acts as a hint to drivers, and can be used to both
|
||||
* free up compute resources within the driver and return control back to the
|
||||
* application quicker than is possible without the hint. It enables drivers
|
||||
* that are able to estimate how long an execution will take to abort the
|
||||
* execution before it has even started if the driver believes the execution
|
||||
* cannot be completed within the timeout duration. Similarly, it enables
|
||||
* drivers to abort an ongoing execution if it is taking too long. However,
|
||||
* this call does not guarantee that the execution will complete or abort
|
||||
* within the timeout duration.
|
||||
*
|
||||
* By default (i.e., unless ANeuralNetworksExecution_setTimeout is called),
|
||||
* the timeout duration for execution is considered infinite.
|
||||
*
|
||||
* The {@link ANeuralNetworksExecution} must have been created from an
|
||||
* {@link ANeuralNetworksCompilation} which in turn was created from
|
||||
* {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
|
||||
* otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
|
||||
* device has a feature level reported by
|
||||
* {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then
|
||||
* the timeout duration hint will be ignored.
|
||||
*
|
||||
* See {@link ANeuralNetworksExecution} for information on multithreaded
|
||||
* usage.
|
||||
*
|
||||
* @param execution The execution to be modified.
|
||||
* @param duration The maximum amount of time in nanoseconds that is expected
|
||||
* to be spent executing a model. If this duration is exceeded, the execution
|
||||
* may be aborted. If set to 0, the timeout duration is considered
|
||||
* infinite.
|
||||
*
|
||||
* @return ANEURALNETWORKS_NO_ERROR if successful.
|
||||
*
|
||||
* Available since API level 30.
|
||||
*/
|
||||
int (*ANeuralNetworksExecution_setTimeout)(
|
||||
ANeuralNetworksExecution* execution, uint64_t duration);
|
||||
|
||||
/**
|
||||
* Set the maximum duration of WHILE loops in the specified execution.
|
||||
*
|
||||
* This is a fuzzy per-loop timeout intended to prevent infinite loops.
|
||||
*
|
||||
* If a WHILE loop condition model does not output false within the specified
|
||||
* duration, the execution will be aborted.
|
||||
*
|
||||
* See {@link ANeuralNetworks_getDefaultLoopTimeout} and
|
||||
* {@link ANeuralNetworks_getMaximumLoopTimeout} for the default
|
||||
* and maximum timeout values.
|
||||
*
|
||||
* See {@link ANeuralNetworksExecution} for information on multithreaded
|
||||
* usage.
|
||||
*
|
||||
* @param execution The execution to be modified.
|
||||
* @param duration The maximum amount of time in nanoseconds that can be spent
|
||||
* executing a WHILE loop. If the specified duration value exceeds the
|
||||
* value produced by {@link ANeuralNetworks_getMaximumLoopTimeout}, it will be
|
||||
* overridden by that value.
|
||||
*
|
||||
* @return ANEURALNETWORKS_NO_ERROR if successful.
|
||||
* ANEURALNETWORKS_BAD_STATE if execution has started.
|
||||
* ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL.
|
||||
*
|
||||
* Available since API level 30.
|
||||
*/
|
||||
int (*ANeuralNetworksExecution_setLoopTimeout)(
|
||||
ANeuralNetworksExecution* execution, uint64_t duration);
|
||||
|
||||
/**
|
||||
* Get the dimensional information of the specified output operand of the
|
||||
* model of the
|
||||
|
Loading…
Reference in New Issue
Block a user