Use CMSIS-NN optimized kernels for Arduino library

This change should provide significant performance improvements when running on the Nano BLE 33 Sense and other Cortex-M-based Arduino boards.

PiperOrigin-RevId: 316195847
Change-Id: Ie5197a2f2be417fca4efa5d1b7f0534ad0c47820
This commit is contained in:
Pete Warden 2020-06-12 16:10:18 -07:00 committed by TensorFlower Gardener
parent eb7d642789
commit 5c5804f21e
2 changed files with 11 additions and 2 deletions

View File

@ -25,14 +25,14 @@ cd "${ROOT_DIR}"
source tensorflow/lite/micro/tools/ci_build/helper_functions.sh
readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean
readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean clean_downloads
TARGET=arduino
# TODO(b/143715361): parallel builds do not work with generated files right now.
readable_run make -f tensorflow/lite/micro/tools/make/Makefile \
TARGET=${TARGET} \
TAGS="portable_optimized" \
TAGS="cmsis-nn" \
generate_arduino_zip
readable_run tensorflow/lite/micro/tools/ci_build/install_arduino_cli.sh

View File

@ -114,6 +114,15 @@ patch_cmsis() {
-iname '*.*' -exec \
sed -i -E $'s@#include "arm_nn_tables.h"@#include "cmsis/CMSIS/NN/Include/arm_nn_tables.h"@g' {} \;
# Until the fix for https://github.com/ARMmbed/mbed-os/issues/12568 is
# rolled into Mbed version used on the Arduino IDE, we have to replace
# one intrinsic with a patched equivalent.
sed -i -E 's@__SXTB16_RORn@__patched_SXTB16_RORn@g' \
tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
sed -i -E $'33 a \\\n\\\n// Work around for https://github.com/ARMmbed/mbed-os/issues/12568\\\n__STATIC_FORCEINLINE uint32_t __patched_SXTB16_RORn(uint32_t op1, uint32_t rotate) {\\\n uint32_t result;\\\n __ASM ("sxtb16 %0, %1, ROR %2" : "=r" (result) : "r" (op1), "i" (rotate) );\\\n return result;\\\n}' \
tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
echo "Finished patching CMSIS"
}