diff --git a/tensorflow/lite/nnapi/NeuralNetworksShim.h b/tensorflow/lite/nnapi/NeuralNetworksShim.h index 3a4e15006e8..1800c70685c 100644 --- a/tensorflow/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/lite/nnapi/NeuralNetworksShim.h @@ -255,6 +255,32 @@ inline int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model, EXECUTE_FUNCTION_RETURN(model, index, buffer, length); } +/** + * Sets an operand's per channel quantization parameters. + * + * Sets parameters required by a tensor of type + * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}. + * This function must be called for every tensor of type + * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} before + * calling {@link ANeuralNetworksModel_finish}. + * + * Available since API level 29. + * + * @param model The model to be modified. + * @param index The index of the model operand we're setting. + * @param channelQuant The per channel quantization parameters for the operand. + * No memory in this struct needs to outlive the call to + * this function. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_setOperandSymmPerChannelQuantParams( + ANeuralNetworksModel* model, int32_t index, + const ANeuralNetworksSymmPerChannelQuantParams* channelQuant) { + LOAD_FUNCTION(ANeuralNetworksModel_setOperandSymmPerChannelQuantParams); + EXECUTE_FUNCTION_RETURN(model, index, channelQuant); +} + /** * Sets an operand to a value stored in a memory object. * @@ -686,6 +712,445 @@ inline void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event) { EXECUTE_FUNCTION(event); } +/** + * Get the number of available devices. + * + * @param numDevices Used to return the number of devices. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ +inline int ANeuralNetworks_getDeviceCount(uint32_t* numDevices) { + LOAD_FUNCTION(ANeuralNetworks_getDeviceCount); + EXECUTE_FUNCTION_RETURN(numDevices); +} + +/** + * Get the representation of the specified device. + * + * @param devIndex The index of the specified device. Must be less than the + * number of available devices. + * @param device The representation of the specified device. + * The same representation will always be returned for the + * specified device. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ + +inline int ANeuralNetworks_getDevice(uint32_t devIndex, + ANeuralNetworksDevice** device) { + LOAD_FUNCTION(ANeuralNetworks_getDevice); + EXECUTE_FUNCTION_RETURN(devIndex, device); +} + +/** + * Get the name of the specified device. + * + * @param device The representation of the specified device. + * @param name The returned name of the specified device. The name will be in + * UTF-8 and will be null-terminated. It will be recognizable as a + * known device name rather than a cryptic string. For devices + * with API level 29 and above, the format of the name is + * {VENDOR}-{DEVICE}, e.g. “google-ipu”. For devices with feature + * level 28 or lower, the name will always be “unknown-device”. + * The name will remain valid for the duration of the application. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ +inline int ANeuralNetworksDevice_getName(const ANeuralNetworksDevice* device, + const char** name) { + LOAD_FUNCTION(ANeuralNetworksDevice_getName); + EXECUTE_FUNCTION_RETURN(device, name); +} + +/** + * Get the version of the driver implementation of the specified device. + * + * It’s the responsibility of the driver implementor to insure that this version + * string uniquely distinguishes this implementation from all previous + * implementations. + * + * This version string must not be confused with the feature level which is + * solely defined by {@link ANeuralNetworksDevice_getFeatureLevel}. There is no + * implicit ordering of the versions. For example, it is not possible to filter + * all drivers older than a certain version. + * + * Application developers may use this version string to avoid or prefer + * specific driver implementations. For example, an application may want to do + * so because: + * - A specific version of the driver does not provide the required + * performance, perhaps because of a performance regression. + * - A specific version of the driver has a bug or returns results that + * don’t match the minimum precision requirement for the application. + * + * @param device The representation of the specified device. + * @param version The returned version string of the driver for the specified + * device. The string will be in UTF-8 and will be + * null-terminated. For devices with feature level 28 or lower, + * "UNKNOWN" will be returned. The version string will remain + * valid for the duration of the application. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ +inline int ANeuralNetworksDevice_getVersion(const ANeuralNetworksDevice* device, + const char** version) { + LOAD_FUNCTION(ANeuralNetworksDevice_getVersion); + EXECUTE_FUNCTION_RETURN(device, version); +} + +/** + * Get the supported NNAPI version of the specified device. + * + * Each device has a supported feature level, which is the most advanced feature + * this driver implements. For example, if the driver implements the features + * introduced in Android P, but does not implement the features introduced after + * Android P, the value would be 28. Developers could decide whether or not the + * specified device should be used for a Model that has certain feature + * requirements. + * + * @param device The representation of the specified device. + * @param featureLevel The API level of the most advanced feature this driver + * implements. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ +inline int ANeuralNetworksDevice_getFeatureLevel( + const ANeuralNetworksDevice* device, int64_t* featureLevel) { + LOAD_FUNCTION(ANeuralNetworksDevice_getFeatureLevel); + EXECUTE_FUNCTION_RETURN(device, featureLevel); +} + +/** + * Get the supported operations for a specified set of devices. If multiple + * devices are selected, the supported operation list is a union of supported + * operations of all selected devices. + * + * @param model The model to be queried. + * @param devices The set of devices. Must not contain duplicates. + * @param numDevices The number of devices in the set. + * @param supportedOps The boolean array to be filled. True means supported. The + * size of the boolean array must be at least as large as + * the number of operations in the model. The order of + * elements in the supportedOps array matches the order in + * which the corresponding operations were added to the + * model. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ +inline int ANeuralNetworksModel_getSupportedOperationsForDevices( + const ANeuralNetworksModel* model, + const ANeuralNetworksDevice* const* devices, uint32_t numDevices, + bool* supportedOps) { + LOAD_FUNCTION(ANeuralNetworksModel_getSupportedOperationsForDevices); + EXECUTE_FUNCTION_RETURN(model, devices, numDevices, supportedOps); +} + +/** + * Create a {@link ANeuralNetworksCompilation} to compile the given model for a + * specified set of devices. If more than one device is specified, the + * compilation will distribute the workload automatically across the devices. + * The model must be fully supported by the specified set of devices. This means + * that ANeuralNetworksModel_getSupportedOperationsForDevices() must have + * returned true for every operation for that model/devices pair. + * + * @param model The {@link ANeuralNetworksModel} to be compiled. + * @param devices The set of devices. Must not contain duplicates. + * @param numDevices The number of devices in the set. + * @param compilation The newly created object or NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA + * if the model is invalid. + * + * Available since API level 29. + */ +inline int ANeuralNetworksCompilation_createForDevices( + ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, + uint32_t numDevices, ANeuralNetworksCompilation** compilation) { + LOAD_FUNCTION(ANeuralNetworksCompilation_createForDevices); + EXECUTE_FUNCTION_RETURN(model, devices, numDevices, compilation); +} + +/** + * Sets the compilation caching signature and the cache directory. + * + * Provides optional caching information to the runtime for faster repeated + * compilation. + * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * @param compilation The compilation to be modified. + * @param cacheDir The cache directory to store and retrieve caching data. It is + * recommended to use the code_cache provided by the Android + * runtime. If not using the code_cache, the user should choose + * a directory local to the application, and is responsible to + * manage and clean the cache entries. + * @param token The token provided by the user to specify a model, must be of + * length ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN. The user should + * ensure that the token is unique to a model within the + * application. The NNAPI runtime will not detected token + * collisions. If there is a collision, the compilation outcome may + * be incorrect without notifying with error. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ +inline int ANeuralNetworksCompilation_setCaching( + ANeuralNetworksCompilation* compilation, const char* cacheDir, + const uint8_t* token) { + LOAD_FUNCTION(ANeuralNetworksCompilation_setCaching); + EXECUTE_FUNCTION_RETURN(compilation, cacheDir, token); +} + +/** + * Schedule synchronous evaluation of the execution. + * + *

Schedules synchronous evaluation of the execution. Returns once the + * execution has completed and the outputs are ready to be consumed. + *

+ * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * See {@link ANeuralNetworksExecution_startCompute} for asynchronous execution. + * Synchronous execution incurs lower overhead than asynchronous execution. + * + * Available since API level 29. + * + * @param execution The execution to be scheduled and executed. + * + * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally. + * ANEURALNETWORKS_UNMAPPABLE if the execution input or output memory + * cannot be properly mapped. + */ +inline int ANeuralNetworksExecution_compute( + ANeuralNetworksExecution* execution) { + LOAD_FUNCTION(ANeuralNetworksExecution_compute); + EXECUTE_FUNCTION_RETURN(execution); +} + +/** + * Get the dimensional information of the specified output operand of the model + * of the + * {@link ANeuralNetworksExecution}. + * + * On asynchronous execution initiated by {@link + * ANeuralNetworksExecution_startCompute}, + * {@link ANeuralNetworksEvent_wait} must be called prior to this function to + * recuperate the resources used by the execution. + * + * @param execution The execution to be queried. + * @param index The index of the output argument we are querying. It is + * an index into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not + * the index associated with {@link + * ANeuralNetworksModel_addOperand}. + * @param rank The rank of the output operand. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, + * ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if the target output is provided an + * insufficient buffer at execution time, ANEURALNETWORKS_BAD_DATA if the index + * is invalid. + * + * Available since API level 29. + */ +inline int ANeuralNetworksExecution_getOutputOperandRank( + ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank) { + LOAD_FUNCTION(ANeuralNetworksExecution_getOutputOperandRank); + EXECUTE_FUNCTION_RETURN(execution, index, rank); +} + +/** + * Get the dimensional information of the specified output operand of the model + * of the + * {@link ANeuralNetworksExecution}. The target output operand cannot be a + * scalar. + * + * On asynchronous execution initiated by + * {@link ANeuralNetworksExecution_startCompute}, + * {@link ANeuralNetworksEvent_wait} must be called prior to this function to + * recuperate the resources used by the execution. + * + * @param execution The execution to be queried. + * @param index The index of the output argument we are querying. It is an index + * into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not + * the index associated with + * {@link ANeuralNetworksModel_addOperand}. + * @param dimensions The dimension array to be filled. The size of the array + * must be exactly as large as the rank of the output operand + * to be queried in the model. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, + * ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if the target output is provided an + * insufficient buffer at execution time, ANEURALNETWORKS_BAD_DATA if the index + * is invalid or if the target is a scalar. + * + * Available since API level 29. + */ +inline int ANeuralNetworksExecution_getOutputOperandDimensions( + ANeuralNetworksExecution* execution, int32_t index, uint32_t* dimensions) { + LOAD_FUNCTION(ANeuralNetworksExecution_getOutputOperandDimensions); + EXECUTE_FUNCTION_RETURN(execution, index, dimensions); +} + +/** + * Create a {@link ANeuralNetworksBurst} to apply the given compilation. + * This only creates the burst object. Computation is only performed once + * {@link ANeuralNetworksExecution_burstCompute} is invoked with a valid + * {@link ANeuralNetworksExecution} and {@link ANeuralNetworksBurst}. + * + *

The provided compilation must outlive the burst object.

+ * + * Available since API level 29. + * + * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated. + * @param burst The newly created object or NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA + * if the compilation is invalid. + */ +inline int ANeuralNetworksBurst_create(ANeuralNetworksCompilation* compilation, + ANeuralNetworksBurst** burst) { + LOAD_FUNCTION(ANeuralNetworksBurst_create); + EXECUTE_FUNCTION_RETURN(compilation, burst); +} + +/** + * Destroys the burst object. + * + * Available since API level 29. + * + * @param burst The burst object to be destroyed. Passing NULL is acceptable and + * results in no operation. + */ +inline void ANeuralNetworksBurst_free(ANeuralNetworksBurst* burst) { + LOAD_FUNCTION(ANeuralNetworksBurst_free); + EXECUTE_FUNCTION(burst); +} + +/** + * Schedule synchronous evaluation of the execution on a burst object. + * + *

Schedules synchronous evaluation of the execution. Returns once the + * execution has completed and the outputs are ready to be consumed.

+ * + *

There must be at most one {@link ANeuralNetworksExecution} processing at + * any given time for any given burst object. Any + * {@link ANeuralNetworksExecution} launched before the previous has finished + * will result in ANEURALNETWORKS_BAD_STATE.

+ * + * Available since API level 29. + * + * @param burst The burst object to execute on. + * @param execution The execution to be scheduled and executed. The execution + * must be created from the same {@link + * ANeuralNetworksCompilation} as the burst object. + * + * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally. + */ +inline int ANeuralNetworksExecution_burstCompute( + ANeuralNetworksExecution* execution, ANeuralNetworksBurst* burst) { + LOAD_FUNCTION(ANeuralNetworksExecution_burstCompute); + EXECUTE_FUNCTION_RETURN(execution, burst); +} + +/** + * Creates a shared memory object from an AHardwareBuffer handle. + * + * If the shared memory is backed by an AHardwareBuffer of + * AHARDWAREBUFFER_FORMAT_BLOB format, it can be used the same way as shared + * memory created from a file handle. See + * {@link ANeuralNetworksMemory} for a description on how to use this shared + * memory. + * + * If the shared memory is backed by an AHardwareBuffer of a format other than + * AHARDWAREBUFFER_FORMAT_BLOB, it can only be used for Model inputs and + * outputs. When calling {@link ANeuralNetworksExecution_setInputFromMemory} or + * {@link ANeuralNetworksExecution_setOutputFromMemory} with the shared memory, + * both offset and length must be set to zero and the entire memory region will + * be associated with the specified input or output operand. There is no + * guarantee that an arbitrary AHardwareBuffer_Format and + * AHardwareBuffer_UsageFlags combination can be used by arbitrary devices. The + * execution will fail if selected set of devices cannot consume the buffer. + * + * Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with shared + * memory backed by an AHardwareBuffer of a format other than + * AHARDWAREBUFFER_FORMAT_BLOB is disallowed. + * + * TODO(miaowang): add documentation about intended usage with introspection + * API. + * + * Available since API level 29. + * + * @param ahwb The AHardwareBuffer handle. + * @param memory The memory object to be created. + * Set to NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if the request completed normally. + * + * @see AHardwareBuffer + */ +inline int ANeuralNetworksMemory_createFromAHardwareBuffer( + const AHardwareBuffer* ahwb, ANeuralNetworksMemory** memory) { + LOAD_FUNCTION(ANeuralNetworksMemory_createFromAHardwareBuffer); + EXECUTE_FUNCTION_RETURN(ahwb, memory); +} + +/** + * Specifies whether duration of the {@link ANeuralNetworksExecution} is to be + * measured. By default, duration is not measured. + * + * The {@link ANeuralNetworksExecution} must have been created with + * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * Available since API level 29. + * + * @param execution The execution to be modified. + * @param measure 'true' if duration is to be measured, 'false' if not. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksExecution_setMeasureTiming( + ANeuralNetworksExecution* execution, bool measure) { + LOAD_FUNCTION(ANeuralNetworksExecution_setMeasureTiming); + EXECUTE_FUNCTION_RETURN(execution, measure); +} + +/** + * Get the time spent in the specified {@link ANeuralNetworksExecution}, in + * nanoseconds. The execution must have completed. + * + * @param execution The execution to be queried. + * @param durationCode The measurement to be queried, specified by {@link + * DurationCode}. + * @param duration The returned duration. If no measurement was requested by + * {@link ANeuralNetworksExecution_setMeasureTiming}, or for + * some other reason the duration is not available, UINT64_MAX will be returned. + * A particular device need not support any given measurement. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksExecution_getDuration( + const ANeuralNetworksExecution* execution, int32_t durationCode, + uint64_t* duration) { + LOAD_FUNCTION(ANeuralNetworksExecution_getDuration); + EXECUTE_FUNCTION_RETURN(execution, durationCode, duration); +} + /**/ #endif // TENSORFLOW_LITE_NNAPI_NEURALNETWORKSSHIM_H_ diff --git a/tensorflow/lite/nnapi/NeuralNetworksTypes.h b/tensorflow/lite/nnapi/NeuralNetworksTypes.h index 9291391491a..573500dbb35 100644 --- a/tensorflow/lite/nnapi/NeuralNetworksTypes.h +++ b/tensorflow/lite/nnapi/NeuralNetworksTypes.h @@ -18,6 +18,8 @@ limitations under the License. #include #include +typedef struct AHardwareBuffer AHardwareBuffer; + // NN api types based on NNAPI header file // https://developer.android.com/ndk/reference/group/neural-networks @@ -241,6 +243,53 @@ typedef struct ANeuralNetworksCompilation ANeuralNetworksCompilation; */ typedef struct ANeuralNetworksExecution ANeuralNetworksExecution; +/** + * Parameters for ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL operand. + */ +typedef struct ANeuralNetworksSymmPerChannelQuantParams { + /* The index of the channel dimension. */ + uint32_t channelDim; + /** The size of the scale array. Should be equal to dimension[channelDim] of + * the Operand. */ + uint32_t scaleCount; + /** The array of scaling values for each channel. Each value must be greater + * than zero. */ + const float* scales; +} ANeuralNetworksSymmPerChannelQuantParams; + +/** + * ANeuralNetworksBurst is an opaque type that can be used to reduce the latency + * of a rapid sequence of executions. It will likely cause overhead if only used + * for a single execution. + * + * ANeuralNetworksBurst serves as a context object for any number of inferences + * using {@link ANeuralNetworksExecution} objects. An ANeuralNetworksBurst + * object and the {@link ANeuralNetworksExecution} objects used with it must all + * have been created from the same {@link ANeuralNetworksCompilation} object. + * + * This object is also used as a hint to drivers, providing insight to the + * lifetime of a rapid sequence of executions. For example, a driver may choose + * to increase the clock frequency of its accelerator for the lifetime of a + * burst object. + * + *

To use:

+ * + * Available since API level 29. + */ +typedef struct ANeuralNetworksBurst ANeuralNetworksBurst; + /** * ANeuralNetworksOperandType describes the type of an operand. * This structure is used to describe both scalars and tensors. @@ -268,6 +317,16 @@ typedef struct ANeuralNetworksEvent ANeuralNetworksEvent; typedef int32_t ANeuralNetworksOperationType; +/** + * ANeuralNetworksDevice is an opaque type that represents a device. + * + * This type is used to query basic properties and supported operations of the + * corresponding device, and control which device(s) a model is to be run on. + * + * Available since API level 29. + */ +typedef struct ANeuralNetworksDevice ANeuralNetworksDevice; + // nn api function types typedef int (*ANeuralNetworksMemory_createFromFd_fn)( @@ -301,6 +360,10 @@ typedef int (*ANeuralNetworksModel_setOperandValue_fn)( ANeuralNetworksModel* model, int32_t index, const void* buffer, size_t length); +typedef int (*ANeuralNetworksModel_setOperandSymmPerChannelQuantParams_fn)( + ANeuralNetworksModel* model, int32_t index, + const ANeuralNetworksSymmPerChannelQuantParams* channelQuant); + typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)( ANeuralNetworksModel* model, int32_t index, const ANeuralNetworksMemory* memory, size_t offset, size_t length); @@ -351,4 +414,70 @@ typedef void (*ANeuralNetworksEvent_free_fn)(ANeuralNetworksEvent* event); typedef int (*ASharedMemory_create_fn)(const char* name, size_t size); +typedef int (*ANeuralNetworks_getDeviceCount_fn)(uint32_t* numDevices); + +typedef int (*ANeuralNetworks_getDevice_fn)(uint32_t devIndex, + ANeuralNetworksDevice** device); + +typedef int (*ANeuralNetworksDevice_getName_fn)( + const ANeuralNetworksDevice* device, const char** name); + +typedef int (*ANeuralNetworksDevice_getType_fn)( + const ANeuralNetworksDevice* device, int32_t* type); + +typedef int (*ANeuralNetworksDevice_getVersion_fn)( + const ANeuralNetworksDevice* device, const char** version); + +typedef int (*ANeuralNetworksDevice_getFeatureLevel_fn)( + const ANeuralNetworksDevice* device, int64_t* featureLevel); + +typedef int (*ANeuralNetworksModel_getSupportedOperationsForDevices_fn)( + const ANeuralNetworksModel* model, + const ANeuralNetworksDevice* const* devices, uint32_t numDevices, + bool* supportedOps); + +typedef int (*ANeuralNetworksCompilation_createForDevices_fn)( + ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, + uint32_t numDevices, ANeuralNetworksCompilation** compilation); + +typedef int (*ANeuralNetworksCompilation_setCaching_fn)( + ANeuralNetworksCompilation* compilation, const char* cacheDir, + const uint8_t* token); + +typedef int (*ANeuralNetworksExecution_compute_fn)( + ANeuralNetworksExecution* execution); + +typedef int (*ANeuralNetworksExecution_getOutputOperandRank_fn)( + ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank); + +typedef int (*ANeuralNetworksExecution_getOutputOperandDimensions_fn)( + ANeuralNetworksExecution* execution, int32_t index, uint32_t* dimensions); + +typedef int (*ANeuralNetworksBurst_create_fn)( + ANeuralNetworksCompilation* compilation, ANeuralNetworksBurst** burst); + +typedef void (*ANeuralNetworksBurst_free_fn)(ANeuralNetworksBurst* burst); + +typedef int (*ANeuralNetworksExecution_burstCompute_fn)( + ANeuralNetworksExecution* execution, ANeuralNetworksBurst* burst); + +typedef int (*ANeuralNetworksMemory_createFromAHardwareBuffer_fn)( + const AHardwareBuffer* ahwb, ANeuralNetworksMemory** memory); + +typedef int (*ANeuralNetworksExecution_setMeasureTiming_fn)( + ANeuralNetworksExecution* execution, bool measure); + +typedef enum { + // Execution time on hardware (not driver, which runs on host processor). + ANEURALNETWORKS_DURATION_ON_HARDWARE = 0, + // Execution time in driver (including time on hardware). Excludes overhead + // such as that of the runtime itself and the IPC needed for the runtime to + // communicate with the driver. + ANEURALNETWORKS_DURATION_IN_DRIVER = 1, +} DurationCode; + +typedef int (*ANeuralNetworksExecution_getDuration_fn)( + const ANeuralNetworksExecution* execution, int32_t durationCode, + uint64_t* duration); + #endif // TENSORFLOW_LITE_NNAPI_NEURALNETWORKSTYPES_H_ diff --git a/tensorflow/lite/nnapi/nnapi_implementation.cc b/tensorflow/lite/nnapi/nnapi_implementation.cc index e8b9aed4226..36301f1a83a 100644 --- a/tensorflow/lite/nnapi/nnapi_implementation.cc +++ b/tensorflow/lite/nnapi/nnapi_implementation.cc @@ -51,12 +51,12 @@ int32_t GetAndroidSdkVersion() { } #endif // __ANDROID__ -void* LoadFunction(void* handle, const char* name) { +void* LoadFunction(void* handle, const char* name, bool optional) { if (handle == nullptr) { return nullptr; } void* fn = dlsym(handle, name); - if (fn == nullptr) { + if (fn == nullptr && !optional) { NNAPI_LOG("nnapi error: unable to open function %s", name); } return fn; @@ -78,8 +78,13 @@ int ASharedMemory_create(const char* name, size_t size) { } #endif // __ANDROID__ -#define LOAD_FUNCTION(handle, name) \ - nnapi.name = reinterpret_cast(LoadFunction(handle, #name)); +#define LOAD_FUNCTION(handle, name) \ + nnapi.name = reinterpret_cast( \ + LoadFunction(handle, #name, /*optional*/ false)); + +#define LOAD_FUNCTION_OPTIONAL(handle, name) \ + nnapi.name = reinterpret_cast( \ + LoadFunction(handle, #name, /*optional*/ true)); const NnApi LoadNnApi() { NnApi nnapi = {}; @@ -117,6 +122,9 @@ const NnApi LoadNnApi() { LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksModel_finish); LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksModel_addOperand); LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksModel_setOperandValue); + LOAD_FUNCTION_OPTIONAL( + libneuralnetworks, + ANeuralNetworksModel_setOperandSymmPerChannelQuantParams); LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksModel_setOperandValueFromMemory); LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksModel_addOperation); @@ -143,7 +151,33 @@ const NnApi LoadNnApi() { #else nnapi.ASharedMemory_create = ASharedMemory_create; #endif // __ANDROID__ - + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworks_getDeviceCount); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworks_getDevice); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworksDevice_getName); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworksDevice_getVersion); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksDevice_getFeatureLevel); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksModel_getSupportedOperationsForDevices); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksCompilation_createForDevices); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksCompilation_setCaching); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworksExecution_compute); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksExecution_getOutputOperandRank); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksExecution_getOutputOperandDimensions); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworksBurst_create); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworksBurst_free); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksExecution_burstCompute); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksMemory_createFromAHardwareBuffer); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksExecution_setMeasureTiming); + LOAD_FUNCTION_OPTIONAL(libneuralnetworks, + ANeuralNetworksExecution_getDuration); return nnapi; } diff --git a/tensorflow/lite/nnapi/nnapi_implementation.h b/tensorflow/lite/nnapi/nnapi_implementation.h index 82d7cc75c12..66a36dbbc3c 100644 --- a/tensorflow/lite/nnapi/nnapi_implementation.h +++ b/tensorflow/lite/nnapi/nnapi_implementation.h @@ -173,6 +173,29 @@ struct NnApi { int32_t index, const void* buffer, size_t length); + /** + * Sets an operand's per channel quantization parameters. + * + * Sets parameters required by a tensor of type + * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}. + * This function must be called for every tensor of type + * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} before + * calling {@link ANeuralNetworksModel_finish}. + * + * Available since API level 29. + * + * @param model The model to be modified. + * @param index The index of the model operand we're setting. + * @param channelQuant The per channel quantization parameters for the + * operand. No memory in this struct needs to outlive the + * call to this function. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ + int (*ANeuralNetworksModel_setOperandSymmPerChannelQuantParams)( + ANeuralNetworksModel* model, int32_t index, + const ANeuralNetworksSymmPerChannelQuantParams* channelQuant); + /** * Sets an operand to a value stored in a memory object. * @@ -569,6 +592,399 @@ struct NnApi { // which was added in 8.1. int (*ASharedMemory_create)(const char* name, size_t size); + /** + * Get the number of available devices. + * + * @param numDevices Used to return the number of devices. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ + int (*ANeuralNetworks_getDeviceCount)(uint32_t* numDevices); + + /** + * Get the representation of the specified device. + * + * @param devIndex The index of the specified device. Must be less than the + * number of available devices. + * @param device The representation of the specified device. + * The same representation will always be returned for the + * specified device. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ + + int (*ANeuralNetworks_getDevice)(uint32_t devIndex, + ANeuralNetworksDevice** device); + + /** + * Get the name of the specified device. + * + * @param device The representation of the specified device. + * @param name The returned name of the specified device. The name will be + * in UTF-8 and will be null-terminated. It will be recognizable + * as a known device name rather than a cryptic string. For + * devices with API level 29 and above, the format of the name is + * {VENDOR}-{DEVICE}, e.g. “google-ipu”. For devices with feature + * level 28 or lower, the name will always be “unknown-device”. + * The name will remain valid for the duration of the application. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ + int (*ANeuralNetworksDevice_getName)(const ANeuralNetworksDevice* device, + const char** name); + + /** + * Get the version of the driver implementation of the specified device. + * + * It’s the responsibility of the driver implementor to insure that this + * version string uniquely distinguishes this implementation from all previous + * implementations. + * + * This version string must not be confused with the feature level which is + * solely defined by {@link ANeuralNetworksDevice_getFeatureLevel}. There is + * no implicit ordering of the versions. For example, it is not possible to + * filter all drivers older than a certain version. + * + * Application developers may use this version string to avoid or prefer + * specific driver implementations. For example, an application may want to do + * so because: + * - A specific version of the driver does not provide the required + * performance, perhaps because of a performance regression. + * - A specific version of the driver has a bug or returns results that + * don’t match the minimum precision requirement for the application. + * + * @param device The representation of the specified device. + * @param version The returned version string of the driver for the specified + * device. The string will be in UTF-8 and will be + * null-terminated. For devices with feature level 28 or lower, + * "UNKNOWN" will be returned. The version string will remain + * valid for the duration of the application. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ + int (*ANeuralNetworksDevice_getVersion)(const ANeuralNetworksDevice* device, + const char** version); + + /** + * Get the supported NNAPI version of the specified device. + * + * Each device has a supported feature level, which is the most advanced + * feature this driver implements. For example, if the driver implements the + * features introduced in Android P, but does not implement the features + * introduced after Android P, the value would be 28. Developers could decide + * whether or not the specified device should be used for a Model that has + * certain feature requirements. + * + * @param device The representation of the specified device. + * @param featureLevel The API level of the most advanced feature this driver + * implements. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ + int (*ANeuralNetworksDevice_getFeatureLevel)( + const ANeuralNetworksDevice* device, int64_t* featureLevel); + + /** + * Get the supported operations for a specified set of devices. If multiple + * devices are selected, the supported operation list is a union of supported + * operations of all selected devices. + * + * @param model The model to be queried. + * @param devices The set of devices. Must not contain duplicates. + * @param numDevices The number of devices in the set. + * @param supportedOps The boolean array to be filled. True means supported. + * The size of the boolean array must be at least as large + * as the number of operations in the model. The order of + * elements in the supportedOps array matches the order in + * which the corresponding operations were added to the + * model. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ + int (*ANeuralNetworksModel_getSupportedOperationsForDevices)( + const ANeuralNetworksModel* model, + const ANeuralNetworksDevice* const* devices, uint32_t numDevices, + bool* supportedOps); + + /** + * Create a {@link ANeuralNetworksCompilation} to compile the given model for + * a specified set of devices. If more than one device is specified, the + * compilation will distribute the workload automatically across the devices. + * The model must be fully supported by the specified set of devices. This + * means that ANeuralNetworksModel_getSupportedOperationsForDevices() must + * have returned true for every operation for that model/devices pair. + * + * @param model The {@link ANeuralNetworksModel} to be compiled. + * @param devices The set of devices. Must not contain duplicates. + * @param numDevices The number of devices in the set. + * @param compilation The newly created object or NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA + * if the model is invalid. + * + * Available since API level 29. + */ + int (*ANeuralNetworksCompilation_createForDevices)( + ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, + uint32_t numDevices, ANeuralNetworksCompilation** compilation); + + /** + * Sets the compilation caching signature and the cache directory. + * + * Provides optional caching information to the runtime for faster repeated + * compilation. + * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * @param compilation The compilation to be modified. + * @param cacheDir The cache directory to store and retrieve caching data. It + * is recommended to use the code_cache provided by the + * Android runtime. If not using the code_cache, the user + * should choose a directory local to the application, and is + * responsible to manage and clean the cache entries. + * @param token The token provided by the user to specify a model, must be of + * length ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN. The user + * should ensure that the token is unique to a model within the + * application. The NNAPI runtime will not detected token + * collisions. If there is a collision, the compilation outcome + * may be incorrect without notifying with error. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + * + * Available since API level 29. + */ + int (*ANeuralNetworksCompilation_setCaching)( + ANeuralNetworksCompilation* compilation, const char* cacheDir, + const uint8_t* token); + + /** + * Schedule synchronous evaluation of the execution. + * + *

Schedules synchronous evaluation of the execution. Returns once the + * execution has completed and the outputs are ready to be consumed. + *

+ * + * See {@link ANeuralNetworksExecution} for information on multithreaded + * usage. + * + * See {@link ANeuralNetworksExecution_startCompute} for asynchronous + * execution. Synchronous execution incurs lower overhead than asynchronous + * execution. + * + * Available since API level 29. + * + * @param execution The execution to be scheduled and executed. + * + * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally. + * ANEURALNETWORKS_UNMAPPABLE if the execution input or output memory + * cannot be properly mapped. + */ + int (*ANeuralNetworksExecution_compute)(ANeuralNetworksExecution* execution); + + /** + * Get the dimensional information of the specified output operand of the + * model of the + * {@link ANeuralNetworksExecution}. + * + * On asynchronous execution initiated by {@link + * ANeuralNetworksExecution_startCompute}, + * {@link ANeuralNetworksEvent_wait} must be called prior to this function to + * recuperate the resources used by the execution. + * + * @param execution The execution to be queried. + * @param index The index of the output argument we are querying. It is + * an index into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is + * not the index associated with + * {@link ANeuralNetworksModel_addOperand}. + * @param rank The rank of the output operand. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, + * ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if the target output is + * provided an insufficient buffer at execution time, + * ANEURALNETWORKS_BAD_DATA if the index is invalid. + * + * Available since API level 29. + */ + int (*ANeuralNetworksExecution_getOutputOperandRank)( + ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank); + + /** + * Get the dimensional information of the specified output operand of the + * model of the + * {@link ANeuralNetworksExecution}. The target output operand cannot be a + * scalar. + * + * On asynchronous execution initiated by {@link + * ANeuralNetworksExecution_startCompute}, + * {@link ANeuralNetworksEvent_wait} must be called prior to this function to + * recuperate the resources used by the execution. + * + * @param execution The execution to be queried. + * @param index The index of the output argument we are querying. It is an + * index into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is + * not the index associated with + * {@link ANeuralNetworksModel_addOperand}. + * @param dimensions The dimension array to be filled. The size of the array + * must be exactly as large as the rank of the output + * operand to be queried in the model. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, + * ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE if the target output is + * provided an insufficient buffer at execution time, + * ANEURALNETWORKS_BAD_DATA if the index is invalid or if the target + * is a scalar. + * + * Available since API level 29. + */ + int (*ANeuralNetworksExecution_getOutputOperandDimensions)( + ANeuralNetworksExecution* execution, int32_t index, uint32_t* dimensions); + + /** + * Create a {@link ANeuralNetworksBurst} to apply the given compilation. + * This only creates the burst object. Computation is only performed once + * {@link ANeuralNetworksExecution_burstCompute} is invoked with a valid + * {@link ANeuralNetworksExecution} and {@link ANeuralNetworksBurst}. + * + *

The provided compilation must outlive the burst object.

+ * + * Available since API level 29. + * + * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated. + * @param burst The newly created object or NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA + * if the compilation is invalid. + */ + int (*ANeuralNetworksBurst_create)(ANeuralNetworksCompilation* compilation, + ANeuralNetworksBurst** burst); + + /** + * Destroys the burst object. + * + * Available since API level 29. + * + * @param burst The burst object to be destroyed. Passing NULL is acceptable + * and results in no operation. + */ + void (*ANeuralNetworksBurst_free)(ANeuralNetworksBurst* burst); + + /** + * Schedule synchronous evaluation of the execution on a burst object. + * + *

Schedules synchronous evaluation of the execution. Returns once the + * execution has completed and the outputs are ready to be consumed.

+ * + *

There must be at most one {@link ANeuralNetworksExecution} processing at + * any given time for any given burst object. Any + * {@link ANeuralNetworksExecution} launched before the previous has finished + * will result in ANEURALNETWORKS_BAD_STATE.

+ * + * Available since API level 29. + * + * @param burst The burst object to execute on. + * @param execution The execution to be scheduled and executed. The execution + * must be created from the same {@link + * ANeuralNetworksCompilation} as the burst object. + * + * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally. + */ + int (*ANeuralNetworksExecution_burstCompute)( + ANeuralNetworksExecution* execution, ANeuralNetworksBurst* burst); + + /** + * Creates a shared memory object from an AHardwareBuffer handle. + * + * If the shared memory is backed by an AHardwareBuffer of + * AHARDWAREBUFFER_FORMAT_BLOB format, it can be used the same way as + * shared memory created from a file handle. See + * {@link ANeuralNetworksMemory} for a description on how to use this + * shared memory. + * + * If the shared memory is backed by an AHardwareBuffer of a format other + * than AHARDWAREBUFFER_FORMAT_BLOB, it can only be used for Model inputs + * and outputs. When calling + * {@link ANeuralNetworksExecution_setInputFromMemory} or + * {@link ANeuralNetworksExecution_setOutputFromMemory} with the shared + * memory, both offset and length must be set to zero and the entire + * memory region will be associated with the specified input or output + * operand. There is no guarantee that an arbitrary AHardwareBuffer_Format + * and AHardwareBuffer_UsageFlags combination can be used by arbitrary + * devices. The execution will fail if selected set of devices cannot + * consume the buffer. + * + * Calling {@link ANeuralNetworksModel_setOperandValueFromMemory} with + * shared memory backed by an AHardwareBuffer of a format other than + * AHARDWAREBUFFER_FORMAT_BLOB is disallowed. + * + * TODO(miaowang): add documentation about intended usage with + * introspection API. + * + * Available since API level 29. + * + * @param ahwb The AHardwareBuffer handle. + * @param memory The memory object to be created. + * Set to NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if the request completed normally. + * + * @see AHardwareBuffer + */ + int (*ANeuralNetworksMemory_createFromAHardwareBuffer)( + const AHardwareBuffer* ahwb, ANeuralNetworksMemory** memory); + + /** + * Specifies whether duration of the {@link ANeuralNetworksExecution} is to be + * measured. By default, duration is not measured. + * + * The {@link ANeuralNetworksExecution} must have been created with + * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded + * usage. + * + * Available since API level 29. + * + * @param execution The execution to be modified. + * @param measure 'true' if duration is to be measured, 'false' if not. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ + int (*ANeuralNetworksExecution_setMeasureTiming)( + ANeuralNetworksExecution* execution, bool measure); + + /** + * Get the time spent in the specified {@link ANeuralNetworksExecution}, in + * nanoseconds. The execution must have completed. + * + * @param execution The execution to be queried. + * @param durationCode The measurement to be queried, specified by {@link + * DurationCode}. + * @param duration The returned duration. If no measurement was requested by + * {@link ANeuralNetworksExecution_setMeasureTiming}, or for + * some other reason the duration is not available, UINT64_MAX will be + * returned. A particular device need not support any given measurement. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ + int (*ANeuralNetworksExecution_getDuration)( + const ANeuralNetworksExecution* execution, int32_t durationCode, + uint64_t* duration); + /**/ }; diff --git a/tensorflow/lite/nnapi/nnapi_implementation_test.cc b/tensorflow/lite/nnapi/nnapi_implementation_test.cc index 51fc404ff8f..9f30b95ec37 100644 --- a/tensorflow/lite/nnapi/nnapi_implementation_test.cc +++ b/tensorflow/lite/nnapi/nnapi_implementation_test.cc @@ -84,6 +84,7 @@ TEST(NnapiLibTest, NnApiImplementation) { EXPECT_NE(nnapi->ANeuralNetworksEvent_wait, nullptr); EXPECT_NE(nnapi->ANeuralNetworksEvent_free, nullptr); EXPECT_NE(nnapi->ASharedMemory_create, nullptr); + // TODO(b/123423795): Test Q-specific APIs after release. } #else EXPECT_FALSE(nnapi->nnapi_exists); @@ -95,6 +96,8 @@ TEST(NnapiLibTest, NnApiImplementation) { EXPECT_EQ(nnapi->ANeuralNetworksModel_finish, nullptr); EXPECT_EQ(nnapi->ANeuralNetworksModel_addOperand, nullptr); EXPECT_EQ(nnapi->ANeuralNetworksModel_setOperandValue, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams, + nullptr); EXPECT_EQ(nnapi->ANeuralNetworksModel_setOperandValueFromMemory, nullptr); EXPECT_EQ(nnapi->ANeuralNetworksModel_addOperation, nullptr); EXPECT_EQ(nnapi->ANeuralNetworksModel_identifyInputsAndOutputs, nullptr); @@ -114,6 +117,25 @@ TEST(NnapiLibTest, NnApiImplementation) { EXPECT_EQ(nnapi->ANeuralNetworksEvent_wait, nullptr); EXPECT_EQ(nnapi->ANeuralNetworksEvent_free, nullptr); EXPECT_NE(nnapi->ASharedMemory_create, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworks_getDeviceCount, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworks_getDevice, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksDevice_getName, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksDevice_getVersion, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksDevice_getFeatureLevel, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksModel_getSupportedOperationsForDevices, + nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksCompilation_createForDevices, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksCompilation_setCaching, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksExecution_compute, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksExecution_getOutputOperandRank, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksExecution_getOutputOperandDimensions, + nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksBurst_create, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksBurst_free, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksExecution_burstCompute, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksMemory_createFromAHardwareBuffer, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksExecution_setMeasureTiming, nullptr); + EXPECT_EQ(nnapi->ANeuralNetworksExecution_getDuration, nullptr); #endif }