TFLu: replace old cmsis scratch buffer
Change-Id: Ie695e999113c5d26eb74a6ea91d0542226a03d9f
This commit is contained in:
parent
c3dbc73edc
commit
bd46152e59
@ -24,7 +24,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/cmsis-nn/scratch_buffer.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -111,12 +110,59 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
return nullptr;
|
||||
void* raw;
|
||||
context->AllocatePersistentBuffer(
|
||||
context, sizeof(int), &raw);
|
||||
return raw;
|
||||
}
|
||||
|
||||
void Free(TfLiteContext* context, void* buffer) {}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(__ARM_FEATURE_DSP)
|
||||
OpData data;
|
||||
int32_t buf_size;
|
||||
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input_shape = GetTensorShape(input);
|
||||
|
||||
const int input_depth = input_shape.Dims(3);
|
||||
const int input_width = input->dims->data[2];
|
||||
const int input_height = input->dims->data[1];
|
||||
const int filter_width = filter->dims->data[2];
|
||||
const int filter_height = filter->dims->data[1];
|
||||
const int output_width = output->dims->data[2];
|
||||
const int output_height = output->dims->data[1];
|
||||
|
||||
int* buffer_idx = reinterpret_cast<int*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, &data));
|
||||
|
||||
if (data.padding.width == 0 &&
|
||||
data.padding.height == 0 && (input_depth % 4 == 0) &&
|
||||
params->stride_width == 1 &&
|
||||
params->stride_height == 1 && filter_width == 1 && filter_height == 1) {
|
||||
buf_size = arm_convolve_1x1_s8_fast_get_buffer_size(input_depth);
|
||||
}
|
||||
else
|
||||
{
|
||||
buf_size = arm_convolve_s8_get_buffer_size(input_depth, filter_width, filter_height);
|
||||
}
|
||||
|
||||
node->user_data = buffer_idx;
|
||||
if (buf_size > 0) {
|
||||
context->RequestScratchBufferInArena(context, buf_size, buffer_idx);
|
||||
} else {
|
||||
*buffer_idx = -1;
|
||||
}
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@ -200,15 +246,16 @@ TfLiteStatus EvalQuantizedPerChannel(
|
||||
const int output_width = output_shape.Dims(2);
|
||||
int16_t* buf = nullptr;
|
||||
|
||||
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
|
||||
if (*buffer_idx > -1) {
|
||||
void *raw = context->GetScratchBuffer(context, *buffer_idx);
|
||||
buf = reinterpret_cast<int16_t*>(raw);
|
||||
}
|
||||
|
||||
if (op_params.padding_values.width == 0 &&
|
||||
op_params.padding_values.height == 0 && (input_depth % 4 == 0) &&
|
||||
(output_depth % 2 == 0) && op_params.stride_width == 1 &&
|
||||
op_params.stride_height == 1 && filter_width == 1 && filter_height == 1) {
|
||||
const int32_t buf_size =
|
||||
arm_convolve_1x1_s8_fast_get_buffer_size(input_depth);
|
||||
if (get_cmsis_scratch_buffer(context, &buf, buf_size) != kTfLiteOk) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
if (arm_convolve_1x1_s8_fast(
|
||||
GetTensorData<int8_t>(input), input_width, input_height,
|
||||
input_depth, batches, GetTensorData<int8_t>(filter), output_depth,
|
||||
@ -222,11 +269,6 @@ TfLiteStatus EvalQuantizedPerChannel(
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
const int32_t buf_size = arm_convolve_s8_get_buffer_size(
|
||||
input_depth, filter_width, filter_height);
|
||||
if (get_cmsis_scratch_buffer(context, &buf, buf_size) != kTfLiteOk) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
if (arm_convolve_s8(
|
||||
GetTensorData<int8_t>(input), input_width, input_height,
|
||||
input_depth, batches, GetTensorData<int8_t>(filter), output_depth,
|
||||
|
@ -25,7 +25,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/cmsis-nn/scratch_buffer.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -99,12 +98,40 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
return nullptr;
|
||||
void* raw;
|
||||
context->AllocatePersistentBuffer(
|
||||
context, sizeof(int), &raw);
|
||||
return raw;
|
||||
}
|
||||
|
||||
void Free(TfLiteContext* context, void* buffer) {}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(__ARM_FEATURE_DSP)
|
||||
auto* params = reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
|
||||
const int filter_width = SizeOfDimension(filter, 2);
|
||||
const int filter_height = SizeOfDimension(filter, 1);
|
||||
|
||||
RuntimeShape input_shape = GetTensorShape(input);
|
||||
const int input_depth = input_shape.Dims(3);
|
||||
|
||||
int* buffer_idx = reinterpret_cast<int*>(node->user_data);
|
||||
|
||||
*buffer_idx = -1;
|
||||
node->user_data = buffer_idx;
|
||||
|
||||
if (params->depth_multiplier == 1) {
|
||||
const int32_t buf_size = arm_depthwise_conv_s8_opt_get_buffer_size(input_depth, filter_width, filter_height);
|
||||
|
||||
if (buf_size > 0) {
|
||||
context->RequestScratchBufferInArena(context, buf_size, buffer_idx);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@ -174,10 +201,12 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
if (op_params.depth_multiplier == 1) {
|
||||
int16_t* buf = nullptr;
|
||||
const int32_t buf_size = arm_depthwise_conv_s8_opt_get_buffer_size(
|
||||
input_depth, filter_width, filter_height);
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
get_cmsis_scratch_buffer(context, &buf, buf_size));
|
||||
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
|
||||
if (*buffer_idx > -1) {
|
||||
void *raw = context->GetScratchBuffer(context, *buffer_idx);
|
||||
buf = reinterpret_cast<int16_t*>(raw);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context,
|
||||
arm_depthwise_conv_s8_opt(
|
||||
|
@ -23,7 +23,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/cmsis-nn/scratch_buffer.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -73,14 +72,33 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
return nullptr;
|
||||
void* raw;
|
||||
context->AllocatePersistentBuffer(
|
||||
context, sizeof(int), &raw);
|
||||
return raw;
|
||||
}
|
||||
|
||||
void Free(TfLiteContext* context, void* buffer) {}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// todo: call AllocateTemporaryTensor() instead of using
|
||||
// get_cmsis_scratch_buffer()
|
||||
#if defined(__ARM_FEATURE_DSP)
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
|
||||
RuntimeShape filter_shape = GetTensorShape(filter);
|
||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
|
||||
const int32_t buf_size = arm_fully_connected_s8_get_buffer_size(accum_depth);
|
||||
|
||||
int* buffer_idx = reinterpret_cast<int*>(node->user_data);
|
||||
|
||||
node->user_data = buffer_idx;
|
||||
if (buf_size > 0) {
|
||||
context->RequestScratchBufferInArena(context, buf_size, buffer_idx);
|
||||
} else {
|
||||
*buffer_idx = -1;
|
||||
}
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@ -97,9 +115,14 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
|
||||
#if defined(__ARM_FEATURE_DSP)
|
||||
const int32_t buf_size = arm_fully_connected_s8_get_buffer_size(accum_depth);
|
||||
int16_t* buf = nullptr;
|
||||
TF_LITE_ENSURE_OK(context, get_cmsis_scratch_buffer(context, &buf, buf_size));
|
||||
|
||||
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
|
||||
if (*buffer_idx > -1) {
|
||||
void *raw = context->GetScratchBuffer(context, *buffer_idx);
|
||||
buf = reinterpret_cast<int16_t*>(raw);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context,
|
||||
arm_fully_connected_s8(
|
||||
|
@ -16,7 +16,6 @@ limitations under the License.
|
||||
|
||||
// These are headers from the ARM CMSIS-NN library.
|
||||
#include "arm_nnfunctions.h" // NOLINT
|
||||
#include "scratch_buffer.h" // NOLINT
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
@ -128,10 +127,13 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node,
|
||||
const int padding_width = data->padding.width;
|
||||
|
||||
int16_t* scratch_buffer = nullptr;
|
||||
int32_t buffer_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
|
||||
|
||||
TF_LITE_ENSURE_OK(
|
||||
context, get_cmsis_scratch_buffer(context, &scratch_buffer, buffer_size));
|
||||
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
|
||||
|
||||
if (*buffer_idx > -1) {
|
||||
void *raw = context->GetScratchBuffer(context, *buffer_idx);
|
||||
scratch_buffer = reinterpret_cast<int16_t*>(raw);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context,
|
||||
@ -207,12 +209,39 @@ void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
return nullptr;
|
||||
void* raw;
|
||||
context->AllocatePersistentBuffer(
|
||||
context, sizeof(int), &raw);
|
||||
return raw;
|
||||
}
|
||||
|
||||
void Free(TfLiteContext* context, void* buffer) {}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(__ARM_FEATURE_DSP)
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input_shape = GetTensorShape(input);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
|
||||
RuntimeShape output_shape = GetTensorShape(output);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
|
||||
const int32_t buffer_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
|
||||
|
||||
int* buffer_idx = reinterpret_cast<int*>(node->user_data);
|
||||
|
||||
node->user_data = buffer_idx;
|
||||
if (buffer_size > 0) {
|
||||
context->RequestScratchBufferInArena(context, buffer_size, buffer_idx);
|
||||
} else {
|
||||
*buffer_idx = -1;
|
||||
}
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
|
@ -1,36 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "scratch_buffer.h"
|
||||
|
||||
// todo: remove this function once context->AllocateTemporaryTensor() is
|
||||
// implemented.
|
||||
|
||||
// This buffer is used by CMSIS-NN optimized operator implementations.
|
||||
// SCRATCH_BUFFER_BYTES bytes is chosen empirically. It needs to be large
|
||||
// enough to hold the biggest buffer needed by all CMSIS-NN operators in the
|
||||
// network.
|
||||
// note: buffer must be 32-bit aligned for SIMD
|
||||
#define SCRATCH_BUFFER_BYTES 13000
|
||||
|
||||
TfLiteStatus get_cmsis_scratch_buffer(TfLiteContext* context, int16_t** buf,
|
||||
int32_t buf_size_bytes) {
|
||||
__attribute__((aligned(
|
||||
4))) static int16_t cmsis_scratch_buffer[SCRATCH_BUFFER_BYTES / 2] = {0};
|
||||
|
||||
TF_LITE_ENSURE(context, buf_size_bytes <= SCRATCH_BUFFER_BYTES);
|
||||
*buf = cmsis_scratch_buffer;
|
||||
return kTfLiteOk;
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CMSIS_NN_SCRATCH_BUFFER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_CMSIS_NN_SCRATCH_BUFFER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
// todo: remove this function once context->AllocateTemporaryTensor() is
|
||||
// implemented.
|
||||
TfLiteStatus get_cmsis_scratch_buffer(TfLiteContext* context, int16_t** buf,
|
||||
int32_t buf_size);
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CMSIS_NN_SCRATCH_BUFFER_H_
|
@ -21,13 +21,6 @@ ifneq ($(filter cmsis-nn,$(ALL_TAGS)),)
|
||||
THIRD_PARTY_CC_HDRS += \
|
||||
$(call recursive_find,$(CMSIS_PATH)/CMSIS/Core/Include,*.h)
|
||||
|
||||
# todo: remove the two lines below once context->AllocateTemporaryTensor()
|
||||
# is implemented.
|
||||
MICROLITE_CC_HDRS += \
|
||||
tensorflow/lite/micro/kernels/cmsis-nn/scratch_buffer.h
|
||||
MICROLITE_CC_SRCS += \
|
||||
tensorflow/lite/micro/kernels/cmsis-nn/scratch_buffer.cc
|
||||
|
||||
INCLUDES += -I$(CMSIS_PATH)/CMSIS/Core/Include \
|
||||
-I$(CMSIS_PATH)/CMSIS/NN/Include \
|
||||
-I$(CMSIS_PATH)/CMSIS/DSP/Include
|
||||
|
@ -76,6 +76,8 @@ ifeq ($(TARGET), stm32f4)
|
||||
tensorflow/lite/micro/kernels/dequantize_test.cc \
|
||||
tensorflow/lite/micro/kernels/unpack_test.cc \
|
||||
tensorflow/lite/micro/kernels/split_test.cc \
|
||||
tensorflow/lite/micro/kernels/conv_test.cc \
|
||||
tensorflow/lite/micro/kernels/depthwise_conv_test.cc \
|
||||
tensorflow/lite/micro/simple_tensor_allocator_test.cc
|
||||
MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS))
|
||||
EXCLUDED_EXAMPLE_TESTS := \
|
||||
|
Loading…
Reference in New Issue
Block a user