From fd50feed714e9a765636937bf502ff162ff453a0 Mon Sep 17 00:00:00 2001 From: Daniel Situnayake Date: Sun, 12 Apr 2020 10:06:29 -0700 Subject: [PATCH 1/3] Enable TensorFlow Lite for Microcontrollers to build with CMSIS-NN --- tensorflow/lite/micro/kernels/cmsis-nn/README.md | 11 ++++++++++- tensorflow/lite/micro/kernels/cmsis-nn/conv.cc | 3 --- .../lite/micro/tools/make/third_party_downloads.inc | 4 ++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/README.md b/tensorflow/lite/micro/kernels/cmsis-nn/README.md index 4107ba466db..6224b3b3796 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/README.md +++ b/tensorflow/lite/micro/kernels/cmsis-nn/README.md @@ -48,7 +48,16 @@ cp tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/DSP/Include/\ arm_math.h mbed-os/cmsis/TARGET_CORTEX_M/arm_math.h ``` -This issue will be resolved soon. Now type +There's also a dependency to an old cmsis_gcc.h, which you can fix with the following: + +``` +tensorflow/lite/micro/tools/make/downloads/cmsis/CMSIS/Core/Include/\ +cmsis_gcc.h mbed-os/cmsis/TARGET_CORTEX_M/cmsis_gcc.h +``` + +This issue will be resolved soon. + +Now type: ``` mbed compile -m DISCO_F746NG -t GCC_ARM diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc index 273fdaea65b..8b5a7c028e5 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc @@ -268,9 +268,6 @@ TfLiteStatus EvalQuantizedPerChannel( (output_width % 4 == 0) && batches == 1) { const int32_t buf_size = arm_convolve_1_x_n_s8_get_buffer_size( input_depth, filter_width, filter_height); - if (get_cmsis_scratch_buffer(context, &buf, buf_size) != kTfLiteOk) { - return kTfLiteError; - } if (arm_convolve_1_x_n_s8( GetTensorData(input), input_width, input_depth, batches, GetTensorData(filter), output_depth, filter_width, diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 30a27c0a758..a3089e42d44 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -20,8 +20,8 @@ LEON_BCC2_MD5 := "cdf78082be4882da2a92c9baa82fe765" TSIM_URL := "https://www.gaisler.com/anonftp/tsim/tsim-eval-2.0.63.tar.gz" TSIM_MD5 := "afa0095d3ed989a949e1467f94e41d2f" -CMSIS_URL := "https://github.com/ARM-software/CMSIS_5/archive/b2937134bd2047bd569c4408391ae20d7677d35c.zip" -CMSIS_MD5 := "04cb3a2cb4834284767a01e8f1c6f834" +CMSIS_URL := "https://github.com/ARM-software/CMSIS_5/archive/8a4db53f69da06e97565fe2f2e8926d193a5759d.zip" +CMSIS_MD5 := "e9864fb71b65adc4f7d92a9dea6e1aab" AM_SDK_URL := "http://s3.asia.ambiqmicro.com/downloads/AmbiqSuite-Rel2.2.0.zip" AM_SDK_MD5 := "7605fa2d4d97e6bb7a1190c92b66b597" From 7ce67938d0f84d5e724ab6cde7adaa78e1756a10 Mon Sep 17 00:00:00 2001 From: Daniel Situnayake Date: Sun, 12 Apr 2020 19:50:29 -0700 Subject: [PATCH 2/3] Integrate CMSIS-NN optimized function for max pooling --- .../lite/micro/kernels/cmsis-nn/pooling.cc | 79 ++++++++++++++++++- 1 file changed, 75 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc b/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc index bf7370ee79a..61f703cf91e 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc @@ -145,7 +145,7 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, ARM_MATH_SUCCESS); #else #pragma message( \ - "CMSIS-NN optimization for depthwise_conv not available for this target. Using reference kernel.") + "CMSIS-NN optimization for avg_pool not available for this target. Using reference kernel.") PoolParams op_params; op_params.stride_height = params->stride_height; @@ -166,7 +166,7 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { + TfLiteTensor* input, TfLiteTensor* output) { float activation_min, activation_max; CalculateActivationRange(params->activation, &activation_min, &activation_max); @@ -187,7 +187,7 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { + TfLiteTensor* input, TfLiteTensor* output) { int32_t activation_min, activation_max; (void)CalculateActivationRangeQuantized(context, params->activation, output, &activation_min, &activation_max); @@ -206,6 +206,73 @@ void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node, GetTensorData(output)); } +TfLiteStatus MaxEvalInt8(TfLiteContext* context, const TfLiteNode* node, + const TfLitePoolParams* params, const OpData* data, + TfLiteTensor* input, TfLiteTensor* output) { + int32_t activation_min, activation_max; + (void)CalculateActivationRangeQuantized(context, params->activation, output, + &activation_min, &activation_max); + + TFLITE_DCHECK_LE(activation_min, activation_max); + +#if defined(__ARM_FEATURE_DSP) + RuntimeShape input_shape = GetTensorShape(input); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + + RuntimeShape output_shape = GetTensorShape(output); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int stride_height = params->stride_height; + const int stride_width = params->stride_width; + + const int filter_height = params->filter_height; + const int filter_width = params->filter_width; + const int padding_height = data->padding.height; + const int padding_width = data->padding.width; + + int16_t* scratch_buffer = nullptr; + + auto* buffer_idx = reinterpret_cast(node->user_data); + + if (*buffer_idx > -1) { + void* raw = context->GetScratchBuffer(context, *buffer_idx); + scratch_buffer = reinterpret_cast(raw); + } + + TF_LITE_ENSURE_EQ( + context, + arm_max_pool_s8_opt(input_height, input_width, output_height, output_width, + stride_height, stride_width, filter_height, filter_width, + padding_height, padding_width, activation_min, + activation_max, depth, GetTensorData(input), + scratch_buffer, GetTensorData(output)), + ARM_MATH_SUCCESS); +#else +#pragma message( \ + "CMSIS-NN optimization for max_pool not available for this target. Using reference kernel.") + + PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = activation_min; + op_params.quantized_activation_max = activation_max; + reference_integer_ops::MaxPool( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); + +#endif + return kTfLiteOk; +} + } // namespace void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -277,7 +344,8 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); OpData data; - const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* input = &context->tensors[flatbuffers::EndianScalar( + node->inputs->data[kInputTensor])]; TfLiteTensor* output = GetOutput(context, node, kOutputTensor); TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data)); @@ -289,6 +357,9 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteUInt8: MaxEvalQuantizedUInt8(context, node, params, &data, input, output); break; + case kTfLiteInt8: + MaxEvalInt8(context, node, params, &data, input, output); + break; default: TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.", TfLiteTypeGetName(input->type)); From a9aa8cb2d7caf22d61e0c1b9c12b6e002c3d7bb9 Mon Sep 17 00:00:00 2001 From: Daniel Situnayake Date: Mon, 27 Apr 2020 12:21:39 -0700 Subject: [PATCH 3/3] Ensure `ParseOpData` always has a return value --- tensorflow/lite/core/api/flatbuffer_conversions.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 998b7d5fbf1..6c861151283 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -913,6 +913,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_SEGMENT_SUM: return kTfLiteOk; } + return kTfLiteError; } // NOLINT[readability/fn_size] } // namespace tflite