Merge pull request #39723 from gmiodice:new_glue_conv
PiperOrigin-RevId: 313274514 Change-Id: Ic373074f02cee87fd53d8484a21b169e08d8fbee
This commit is contained in:
commit
676a68963e
@ -15,6 +15,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
|
||||
#include "arm_nn_types.h"
|
||||
#include "arm_nnfunctions.h"
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
@ -116,7 +117,7 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(__ARM_FEATURE_DSP)
|
||||
OpData data;
|
||||
int32_t buf_size;
|
||||
int32_t buf_size = 0;
|
||||
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
@ -127,32 +128,49 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
RuntimeShape input_shape = GetTensorShape(input);
|
||||
RuntimeShape output_shape = GetTensorShape(output);
|
||||
|
||||
const int input_depth = input_shape.Dims(3);
|
||||
const int input_width = input->dims->data[2];
|
||||
const int input_height = input->dims->data[1];
|
||||
const int filter_width = filter->dims->data[2];
|
||||
const int filter_height = filter->dims->data[1];
|
||||
const int output_width = output->dims->data[2];
|
||||
const int output_height = output->dims->data[1];
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
// Initialize cmsis-nn input dimensions
|
||||
cmsis_nn_dims input_dims;
|
||||
input_dims.n = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
input_dims.h = input->dims->data[1];
|
||||
input_dims.w = input->dims->data[2];
|
||||
input_dims.c = input_shape.Dims(3);
|
||||
|
||||
// Initialize cmsis-nn filter dimensions
|
||||
cmsis_nn_dims filter_dims;
|
||||
filter_dims.n = output_shape.Dims(3);
|
||||
filter_dims.h = filter->dims->data[1];
|
||||
filter_dims.w = filter->dims->data[2];
|
||||
filter_dims.c = input_dims.c;
|
||||
|
||||
// Initialize cmsis-nn output dimensions
|
||||
cmsis_nn_dims output_dims;
|
||||
output_dims.n = input_dims.n;
|
||||
output_dims.h = output->dims->data[1];
|
||||
output_dims.w = output->dims->data[2];
|
||||
output_dims.c = output_shape.Dims(3);
|
||||
|
||||
int* buffer_idx = reinterpret_cast<int*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, &data));
|
||||
context, node, params, input_dims.w, input_dims.h, filter_dims.w,
|
||||
filter_dims.h, output_dims.w, output_dims.h, input->type, &data));
|
||||
|
||||
if (data.padding.width == 0 && data.padding.height == 0 &&
|
||||
(input_depth % 4 == 0) && params->stride_width == 1 &&
|
||||
params->stride_height == 1 && filter_width == 1 && filter_height == 1) {
|
||||
buf_size = arm_convolve_1x1_s8_fast_get_buffer_size(input_depth);
|
||||
} else if (output_height == 1 && input_height == 1 && filter_height == 1 &&
|
||||
(output_width % 4 == 0) && batches == 1) {
|
||||
buf_size = arm_convolve_1_x_n_s8_get_buffer_size(input_depth, filter_width,
|
||||
filter_height);
|
||||
} else {
|
||||
buf_size = arm_convolve_s8_get_buffer_size(input_depth, filter_width,
|
||||
filter_height);
|
||||
if (input->type == kTfLiteInt8) {
|
||||
// Initialize cmsis-nn convolution parameters
|
||||
cmsis_nn_conv_params conv_params;
|
||||
conv_params.input_offset = -input->params.zero_point;
|
||||
conv_params.output_offset = output->params.zero_point;
|
||||
conv_params.stride.h = params->stride_height;
|
||||
conv_params.stride.w = params->stride_width;
|
||||
conv_params.dilation.h = params->dilation_height_factor;
|
||||
conv_params.dilation.w = params->dilation_width_factor;
|
||||
conv_params.padding.h = data.padding.height;
|
||||
conv_params.padding.w = data.padding.width;
|
||||
conv_params.activation.min = data.output_activation_min;
|
||||
conv_params.activation.max = data.output_activation_max;
|
||||
|
||||
buf_size = arm_convolve_wrapper_s8_get_buffer_size(
|
||||
&conv_params, &input_dims, &filter_dims, &output_dims);
|
||||
}
|
||||
|
||||
node->user_data = buffer_idx;
|
||||
@ -204,6 +222,102 @@ TfLiteStatus EvalQuantizedPerChannel(
|
||||
TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params,
|
||||
OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output, TfLiteTensor* im2col) {
|
||||
// Initialize cmsis-nn convolution parameters
|
||||
cmsis_nn_conv_params conv_params;
|
||||
conv_params.input_offset = -input->params.zero_point;
|
||||
conv_params.output_offset = output->params.zero_point;
|
||||
conv_params.stride.h = params->stride_height;
|
||||
conv_params.stride.w = params->stride_width;
|
||||
conv_params.dilation.h = params->dilation_height_factor;
|
||||
conv_params.dilation.w = params->dilation_width_factor;
|
||||
conv_params.padding.h = data->padding.height;
|
||||
conv_params.padding.w = data->padding.width;
|
||||
conv_params.activation.min = data->output_activation_min;
|
||||
conv_params.activation.max = data->output_activation_max;
|
||||
|
||||
// Initialize cmsis-nn per channel quantization parameters
|
||||
cmsis_nn_per_channel_quant_params quant_params;
|
||||
quant_params.multiplier = data->per_channel_output_multiplier;
|
||||
quant_params.shift = data->per_channel_output_shift;
|
||||
|
||||
#if defined(__ARM_FEATURE_DSP)
|
||||
RuntimeShape filter_shape = GetTensorShape(filter);
|
||||
RuntimeShape input_shape = GetTensorShape(input);
|
||||
RuntimeShape output_shape = GetTensorShape(output);
|
||||
RuntimeShape bias_shape = GetTensorShape(bias);
|
||||
|
||||
// Sanity check.
|
||||
TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
|
||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
||||
if (GetTensorData<int8_t>(bias)) {
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
}
|
||||
|
||||
// Initialize cmsis-nn dimensions
|
||||
// Input
|
||||
cmsis_nn_dims input_dims;
|
||||
input_dims.n = batch_size;
|
||||
input_dims.h = input_shape.Dims(1);
|
||||
input_dims.w = input_shape.Dims(2);
|
||||
input_dims.c = input_depth;
|
||||
|
||||
// Filter
|
||||
cmsis_nn_dims filter_dims;
|
||||
filter_dims.n = output_depth;
|
||||
filter_dims.h = filter_shape.Dims(1);
|
||||
filter_dims.w = filter_shape.Dims(2);
|
||||
filter_dims.c = input_depth;
|
||||
|
||||
// Bias
|
||||
cmsis_nn_dims bias_dims;
|
||||
bias_dims.n = 1;
|
||||
bias_dims.h = 1;
|
||||
bias_dims.w = 1;
|
||||
bias_dims.c = output_depth;
|
||||
|
||||
// Output
|
||||
cmsis_nn_dims output_dims;
|
||||
output_dims.n = batch_size;
|
||||
output_dims.h = output_shape.Dims(1);
|
||||
output_dims.w = output_shape.Dims(2);
|
||||
output_dims.c = output_depth;
|
||||
|
||||
// Initialize cmsis-nn context
|
||||
cmsis_nn_context ctx;
|
||||
ctx.buf = nullptr;
|
||||
ctx.size = 0;
|
||||
|
||||
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
|
||||
if (*buffer_idx > -1) {
|
||||
ctx.buf = context->GetScratchBuffer(context, *buffer_idx);
|
||||
// Note: ctx.size is currently not used in cmsis-nn.
|
||||
// The buffer should be allocated in the Prepare function through
|
||||
// arm_convolve_wrapper_s8_get_buffer_size
|
||||
}
|
||||
|
||||
// arm_convolve_wrapper_s8 dispatches the optimized kernel accordingly with
|
||||
// the parameters passed
|
||||
arm_status status = arm_convolve_wrapper_s8(
|
||||
&ctx, &conv_params, &quant_params, &input_dims,
|
||||
GetTensorData<int8_t>(input), &filter_dims, GetTensorData<int8_t>(filter),
|
||||
&bias_dims, GetTensorData<int32>(bias), &output_dims,
|
||||
GetTensorData<int8_t>(output));
|
||||
|
||||
if (status == ARM_MATH_SUCCESS) {
|
||||
return kTfLiteOk;
|
||||
} else {
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
#else
|
||||
#pragma message( \
|
||||
"CMSIS-NN optimization for conv not available for this target. Using reference kernel.")
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
@ -216,91 +330,6 @@ TfLiteStatus EvalQuantizedPerChannel(
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
|
||||
#if defined(__ARM_FEATURE_DSP)
|
||||
RuntimeShape filter_shape = GetTensorShape(filter);
|
||||
RuntimeShape input_shape = GetTensorShape(input);
|
||||
RuntimeShape output_shape = GetTensorShape(output);
|
||||
RuntimeShape bias_shape = GetTensorShape(bias);
|
||||
|
||||
// Set min and max value of the output.
|
||||
const int32 output_activation_min = std::numeric_limits<int8_t>::min();
|
||||
const int32 output_activation_max = std::numeric_limits<int8_t>::max();
|
||||
|
||||
// Sanity check.
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
|
||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
||||
if (GetTensorData<int8_t>(bias)) {
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
}
|
||||
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
int16_t* buf = nullptr;
|
||||
|
||||
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
|
||||
if (*buffer_idx > -1) {
|
||||
void* raw = context->GetScratchBuffer(context, *buffer_idx);
|
||||
buf = reinterpret_cast<int16_t*>(raw);
|
||||
}
|
||||
|
||||
if (op_params.padding_values.width == 0 &&
|
||||
op_params.padding_values.height == 0 && (input_depth % 4 == 0) &&
|
||||
op_params.stride_width == 1 && op_params.stride_height == 1 &&
|
||||
filter_width == 1 && filter_height == 1) {
|
||||
if (arm_convolve_1x1_s8_fast(
|
||||
GetTensorData<int8_t>(input), input_width, input_height,
|
||||
input_depth, batches, GetTensorData<int8_t>(filter), output_depth,
|
||||
op_params.padding_values.width, op_params.padding_values.height,
|
||||
op_params.stride_width, op_params.stride_height,
|
||||
GetTensorData<int32>(bias), GetTensorData<int8_t>(output),
|
||||
data->per_channel_output_shift, data->per_channel_output_multiplier,
|
||||
op_params.output_offset, op_params.input_offset,
|
||||
output_activation_min, output_activation_max, output_width,
|
||||
output_height, buf) != ARM_MATH_SUCCESS) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
} else if (output_height == 1 && input_height == 1 && filter_height == 1 &&
|
||||
(output_width % 4 == 0) && batches == 1) {
|
||||
if (arm_convolve_1_x_n_s8(
|
||||
GetTensorData<int8_t>(input), input_width, input_depth, batches,
|
||||
GetTensorData<int8_t>(filter), output_depth, filter_width,
|
||||
op_params.padding_values.width, op_params.stride_width,
|
||||
GetTensorData<int32_t>(bias), GetTensorData<int8_t>(output),
|
||||
data->per_channel_output_shift, data->per_channel_output_multiplier,
|
||||
op_params.output_offset, op_params.input_offset,
|
||||
output_activation_min, output_activation_max, output_width,
|
||||
buf) != ARM_MATH_SUCCESS) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
if (arm_convolve_s8(
|
||||
GetTensorData<int8_t>(input), input_width, input_height,
|
||||
input_depth, batches, GetTensorData<int8_t>(filter), output_depth,
|
||||
filter_width, filter_height, op_params.padding_values.width,
|
||||
op_params.padding_values.height, op_params.stride_width,
|
||||
op_params.stride_height, GetTensorData<int32>(bias),
|
||||
GetTensorData<int8_t>(output), data->per_channel_output_shift,
|
||||
data->per_channel_output_multiplier, op_params.output_offset,
|
||||
op_params.input_offset, output_activation_min,
|
||||
output_activation_max, output_width, output_height,
|
||||
buf) != ARM_MATH_SUCCESS) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
#else
|
||||
#pragma message( \
|
||||
"CMSIS-NN optimization for conv not available for this target. Using reference kernel.")
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
|
@ -28,8 +28,8 @@ LEON_BCC2_MD5 := "cdf78082be4882da2a92c9baa82fe765"
|
||||
TSIM_URL := "https://www.gaisler.com/anonftp/tsim/tsim-eval-2.0.63.tar.gz"
|
||||
TSIM_MD5 := "afa0095d3ed989a949e1467f94e41d2f"
|
||||
|
||||
CMSIS_URL := "https://github.com/ARM-software/CMSIS_5/archive/8a4db53f69da06e97565fe2f2e8926d193a5759d.zip"
|
||||
CMSIS_MD5 := "e9864fb71b65adc4f7d92a9dea6e1aab"
|
||||
CMSIS_URL := "https://github.com/ARM-software/CMSIS_5/archive/1150e71e07c79b538efd842aba5b210a31827ae5.zip"
|
||||
CMSIS_MD5 := "e05f4222ef58825193910b41a0871dcb"
|
||||
|
||||
AM_SDK_URL := "http://s3.asia.ambiqmicro.com/downloads/AmbiqSuite-Rel2.2.0.zip"
|
||||
AM_SDK_MD5 := "7605fa2d4d97e6bb7a1190c92b66b597"
|
||||
|
Loading…
x
Reference in New Issue
Block a user