TFLu: replace old cmsis scratch buffer

Change-Id: Ie695e999113c5d26eb74a6ea91d0542226a03d9f
This commit is contained in:
Måns Nilsson 2020-03-10 11:30:12 +01:00
parent c3dbc73edc
commit bd46152e59
8 changed files with 154 additions and 98 deletions

View File

@ -24,7 +24,6 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/cmsis-nn/scratch_buffer.h"
namespace tflite {
namespace ops {
@ -111,12 +110,59 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
return nullptr;
void* raw;
context->AllocatePersistentBuffer(
context, sizeof(int), &raw);
return raw;
}
void Free(TfLiteContext* context, void* buffer) {}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
#if defined(__ARM_FEATURE_DSP)
OpData data;
int32_t buf_size;
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
RuntimeShape input_shape = GetTensorShape(input);
const int input_depth = input_shape.Dims(3);
const int input_width = input->dims->data[2];
const int input_height = input->dims->data[1];
const int filter_width = filter->dims->data[2];
const int filter_height = filter->dims->data[1];
const int output_width = output->dims->data[2];
const int output_height = output->dims->data[1];
int* buffer_idx = reinterpret_cast<int*>(node->user_data);
TF_LITE_ENSURE_STATUS(CalculateOpData(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, &data));
if (data.padding.width == 0 &&
data.padding.height == 0 && (input_depth % 4 == 0) &&
params->stride_width == 1 &&
params->stride_height == 1 && filter_width == 1 && filter_height == 1) {
buf_size = arm_convolve_1x1_s8_fast_get_buffer_size(input_depth);
}
else
{
buf_size = arm_convolve_s8_get_buffer_size(input_depth, filter_width, filter_height);
}
node->user_data = buffer_idx;
if (buf_size > 0) {
context->RequestScratchBufferInArena(context, buf_size, buffer_idx);
} else {
*buffer_idx = -1;
}
#endif
return kTfLiteOk;
}
@ -200,15 +246,16 @@ TfLiteStatus EvalQuantizedPerChannel(
const int output_width = output_shape.Dims(2);
int16_t* buf = nullptr;
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
if (*buffer_idx > -1) {
void *raw = context->GetScratchBuffer(context, *buffer_idx);
buf = reinterpret_cast<int16_t*>(raw);
}
if (op_params.padding_values.width == 0 &&
op_params.padding_values.height == 0 && (input_depth % 4 == 0) &&
(output_depth % 2 == 0) && op_params.stride_width == 1 &&
op_params.stride_height == 1 && filter_width == 1 && filter_height == 1) {
const int32_t buf_size =
arm_convolve_1x1_s8_fast_get_buffer_size(input_depth);
if (get_cmsis_scratch_buffer(context, &buf, buf_size) != kTfLiteOk) {
return kTfLiteError;
}
if (arm_convolve_1x1_s8_fast(
GetTensorData<int8_t>(input), input_width, input_height,
input_depth, batches, GetTensorData<int8_t>(filter), output_depth,
@ -222,11 +269,6 @@ TfLiteStatus EvalQuantizedPerChannel(
return kTfLiteError;
}
} else {
const int32_t buf_size = arm_convolve_s8_get_buffer_size(
input_depth, filter_width, filter_height);
if (get_cmsis_scratch_buffer(context, &buf, buf_size) != kTfLiteOk) {
return kTfLiteError;
}
if (arm_convolve_s8(
GetTensorData<int8_t>(input), input_width, input_height,
input_depth, batches, GetTensorData<int8_t>(filter), output_depth,

View File

@ -25,7 +25,6 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/cmsis-nn/scratch_buffer.h"
namespace tflite {
namespace ops {
@ -99,12 +98,40 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
return nullptr;
void* raw;
context->AllocatePersistentBuffer(
context, sizeof(int), &raw);
return raw;
}
void Free(TfLiteContext* context, void* buffer) {}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
#if defined(__ARM_FEATURE_DSP)
auto* params = reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const int filter_width = SizeOfDimension(filter, 2);
const int filter_height = SizeOfDimension(filter, 1);
RuntimeShape input_shape = GetTensorShape(input);
const int input_depth = input_shape.Dims(3);
int* buffer_idx = reinterpret_cast<int*>(node->user_data);
*buffer_idx = -1;
node->user_data = buffer_idx;
if (params->depth_multiplier == 1) {
const int32_t buf_size = arm_depthwise_conv_s8_opt_get_buffer_size(input_depth, filter_width, filter_height);
if (buf_size > 0) {
context->RequestScratchBufferInArena(context, buf_size, buffer_idx);
}
}
#endif
return kTfLiteOk;
}
@ -174,10 +201,12 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
if (op_params.depth_multiplier == 1) {
int16_t* buf = nullptr;
const int32_t buf_size = arm_depthwise_conv_s8_opt_get_buffer_size(
input_depth, filter_width, filter_height);
TF_LITE_ENSURE_OK(context,
get_cmsis_scratch_buffer(context, &buf, buf_size));
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
if (*buffer_idx > -1) {
void *raw = context->GetScratchBuffer(context, *buffer_idx);
buf = reinterpret_cast<int16_t*>(raw);
}
TF_LITE_ENSURE_EQ(
context,
arm_depthwise_conv_s8_opt(

View File

@ -23,7 +23,6 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/cmsis-nn/scratch_buffer.h"
namespace tflite {
namespace ops {
@ -73,14 +72,33 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
return nullptr;
void* raw;
context->AllocatePersistentBuffer(
context, sizeof(int), &raw);
return raw;
}
void Free(TfLiteContext* context, void* buffer) {}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// todo: call AllocateTemporaryTensor() instead of using
// get_cmsis_scratch_buffer()
#if defined(__ARM_FEATURE_DSP)
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
RuntimeShape filter_shape = GetTensorShape(filter);
const int filter_dim_count = filter_shape.DimensionsCount();
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
const int32_t buf_size = arm_fully_connected_s8_get_buffer_size(accum_depth);
int* buffer_idx = reinterpret_cast<int*>(node->user_data);
node->user_data = buffer_idx;
if (buf_size > 0) {
context->RequestScratchBufferInArena(context, buf_size, buffer_idx);
} else {
*buffer_idx = -1;
}
#endif
return kTfLiteOk;
}
@ -97,9 +115,14 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
#if defined(__ARM_FEATURE_DSP)
const int32_t buf_size = arm_fully_connected_s8_get_buffer_size(accum_depth);
int16_t* buf = nullptr;
TF_LITE_ENSURE_OK(context, get_cmsis_scratch_buffer(context, &buf, buf_size));
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
if (*buffer_idx > -1) {
void *raw = context->GetScratchBuffer(context, *buffer_idx);
buf = reinterpret_cast<int16_t*>(raw);
}
TF_LITE_ENSURE_EQ(
context,
arm_fully_connected_s8(

View File

@ -16,7 +16,6 @@ limitations under the License.
// These are headers from the ARM CMSIS-NN library.
#include "arm_nnfunctions.h" // NOLINT
#include "scratch_buffer.h" // NOLINT
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
@ -128,10 +127,13 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node,
const int padding_width = data->padding.width;
int16_t* scratch_buffer = nullptr;
int32_t buffer_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
TF_LITE_ENSURE_OK(
context, get_cmsis_scratch_buffer(context, &scratch_buffer, buffer_size));
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
if (*buffer_idx > -1) {
void *raw = context->GetScratchBuffer(context, *buffer_idx);
scratch_buffer = reinterpret_cast<int16_t*>(raw);
}
TF_LITE_ENSURE_EQ(
context,
@ -207,12 +209,39 @@ void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node,
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
return nullptr;
void* raw;
context->AllocatePersistentBuffer(
context, sizeof(int), &raw);
return raw;
}
void Free(TfLiteContext* context, void* buffer) {}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
#if defined(__ARM_FEATURE_DSP)
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
RuntimeShape input_shape = GetTensorShape(input);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
RuntimeShape output_shape = GetTensorShape(output);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int output_width = output_shape.Dims(2);
const int32_t buffer_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
int* buffer_idx = reinterpret_cast<int*>(node->user_data);
node->user_data = buffer_idx;
if (buffer_size > 0) {
context->RequestScratchBufferInArena(context, buffer_size, buffer_idx);
} else {
*buffer_idx = -1;
}
#endif
return kTfLiteOk;
}

View File

@ -1,36 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "scratch_buffer.h"
// todo: remove this function once context->AllocateTemporaryTensor() is
// implemented.
// This buffer is used by CMSIS-NN optimized operator implementations.
// SCRATCH_BUFFER_BYTES bytes is chosen empirically. It needs to be large
// enough to hold the biggest buffer needed by all CMSIS-NN operators in the
// network.
// note: buffer must be 32-bit aligned for SIMD
#define SCRATCH_BUFFER_BYTES 13000
TfLiteStatus get_cmsis_scratch_buffer(TfLiteContext* context, int16_t** buf,
int32_t buf_size_bytes) {
__attribute__((aligned(
4))) static int16_t cmsis_scratch_buffer[SCRATCH_BUFFER_BYTES / 2] = {0};
TF_LITE_ENSURE(context, buf_size_bytes <= SCRATCH_BUFFER_BYTES);
*buf = cmsis_scratch_buffer;
return kTfLiteOk;
}

View File

@ -1,26 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CMSIS_NN_SCRATCH_BUFFER_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CMSIS_NN_SCRATCH_BUFFER_H_
#include "tensorflow/lite/c/common.h"
// todo: remove this function once context->AllocateTemporaryTensor() is
// implemented.
TfLiteStatus get_cmsis_scratch_buffer(TfLiteContext* context, int16_t** buf,
int32_t buf_size);
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CMSIS_NN_SCRATCH_BUFFER_H_

View File

@ -21,13 +21,6 @@ ifneq ($(filter cmsis-nn,$(ALL_TAGS)),)
THIRD_PARTY_CC_HDRS += \
$(call recursive_find,$(CMSIS_PATH)/CMSIS/Core/Include,*.h)
# todo: remove the two lines below once context->AllocateTemporaryTensor()
# is implemented.
MICROLITE_CC_HDRS += \
tensorflow/lite/micro/kernels/cmsis-nn/scratch_buffer.h
MICROLITE_CC_SRCS += \
tensorflow/lite/micro/kernels/cmsis-nn/scratch_buffer.cc
INCLUDES += -I$(CMSIS_PATH)/CMSIS/Core/Include \
-I$(CMSIS_PATH)/CMSIS/NN/Include \
-I$(CMSIS_PATH)/CMSIS/DSP/Include

View File

@ -76,6 +76,8 @@ ifeq ($(TARGET), stm32f4)
tensorflow/lite/micro/kernels/dequantize_test.cc \
tensorflow/lite/micro/kernels/unpack_test.cc \
tensorflow/lite/micro/kernels/split_test.cc \
tensorflow/lite/micro/kernels/conv_test.cc \
tensorflow/lite/micro/kernels/depthwise_conv_test.cc \
tensorflow/lite/micro/simple_tensor_allocator_test.cc
MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS))
EXCLUDED_EXAMPLE_TESTS := \