From 5c5804f21e3c4279b412bf2578245b35049b4349 Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Fri, 12 Jun 2020 16:10:18 -0700 Subject: [PATCH] Use CMSIS-NN optimized kernels for Arduino library This change should provide significant performance improvements when running on the Nano BLE 33 Sense and other Cortex-M-based Arduino boards. PiperOrigin-RevId: 316195847 Change-Id: Ie5197a2f2be417fca4efa5d1b7f0534ad0c47820 --- tensorflow/lite/micro/tools/ci_build/test_arduino.sh | 4 ++-- tensorflow/lite/micro/tools/make/download_and_extract.sh | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/micro/tools/ci_build/test_arduino.sh b/tensorflow/lite/micro/tools/ci_build/test_arduino.sh index ecb821bde63..e333e9e6cd9 100755 --- a/tensorflow/lite/micro/tools/ci_build/test_arduino.sh +++ b/tensorflow/lite/micro/tools/ci_build/test_arduino.sh @@ -25,14 +25,14 @@ cd "${ROOT_DIR}" source tensorflow/lite/micro/tools/ci_build/helper_functions.sh -readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean +readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean clean_downloads TARGET=arduino # TODO(b/143715361): parallel builds do not work with generated files right now. readable_run make -f tensorflow/lite/micro/tools/make/Makefile \ TARGET=${TARGET} \ - TAGS="portable_optimized" \ + TAGS="cmsis-nn" \ generate_arduino_zip readable_run tensorflow/lite/micro/tools/ci_build/install_arduino_cli.sh diff --git a/tensorflow/lite/micro/tools/make/download_and_extract.sh b/tensorflow/lite/micro/tools/make/download_and_extract.sh index a51a8908b90..fa5e57dd91a 100755 --- a/tensorflow/lite/micro/tools/make/download_and_extract.sh +++ b/tensorflow/lite/micro/tools/make/download_and_extract.sh @@ -114,6 +114,15 @@ patch_cmsis() { -iname '*.*' -exec \ sed -i -E $'s@#include "arm_nn_tables.h"@#include "cmsis/CMSIS/NN/Include/arm_nn_tables.h"@g' {} \; + # Until the fix for https://github.com/ARMmbed/mbed-os/issues/12568 is + # rolled into Mbed version used on the Arduino IDE, we have to replace + # one intrinsic with a patched equivalent. + sed -i -E 's@__SXTB16_RORn@__patched_SXTB16_RORn@g' \ + tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c + + sed -i -E $'33 a \\\n\\\n// Work around for https://github.com/ARMmbed/mbed-os/issues/12568\\\n__STATIC_FORCEINLINE uint32_t __patched_SXTB16_RORn(uint32_t op1, uint32_t rotate) {\\\n uint32_t result;\\\n __ASM ("sxtb16 %0, %1, ROR %2" : "=r" (result) : "r" (op1), "i" (rotate) );\\\n return result;\\\n}' \ + tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c + echo "Finished patching CMSIS" }