Merge pull request #39723 from gmiodice:new_glue_conv

PiperOrigin-RevId: 313274514
Change-Id: Ic373074f02cee87fd53d8484a21b169e08d8fbee
This commit is contained in:
TensorFlower Gardener 2020-05-26 15:07:11 -07:00
commit 676a68963e
2 changed files with 138 additions and 109 deletions

View File

@ -15,6 +15,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "arm_nn_types.h"
#include "arm_nnfunctions.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
@ -116,7 +117,7 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
#if defined(__ARM_FEATURE_DSP)
OpData data;
int32_t buf_size;
int32_t buf_size = 0;
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
@ -127,32 +128,49 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
RuntimeShape input_shape = GetTensorShape(input);
RuntimeShape output_shape = GetTensorShape(output);
const int input_depth = input_shape.Dims(3);
const int input_width = input->dims->data[2];
const int input_height = input->dims->data[1];
const int filter_width = filter->dims->data[2];
const int filter_height = filter->dims->data[1];
const int output_width = output->dims->data[2];
const int output_height = output->dims->data[1];
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
// Initialize cmsis-nn input dimensions
cmsis_nn_dims input_dims;
input_dims.n = MatchingDim(input_shape, 0, output_shape, 0);
input_dims.h = input->dims->data[1];
input_dims.w = input->dims->data[2];
input_dims.c = input_shape.Dims(3);
// Initialize cmsis-nn filter dimensions
cmsis_nn_dims filter_dims;
filter_dims.n = output_shape.Dims(3);
filter_dims.h = filter->dims->data[1];
filter_dims.w = filter->dims->data[2];
filter_dims.c = input_dims.c;
// Initialize cmsis-nn output dimensions
cmsis_nn_dims output_dims;
output_dims.n = input_dims.n;
output_dims.h = output->dims->data[1];
output_dims.w = output->dims->data[2];
output_dims.c = output_shape.Dims(3);
int* buffer_idx = reinterpret_cast<int*>(node->user_data);
TF_LITE_ENSURE_STATUS(CalculateOpData(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, &data));
context, node, params, input_dims.w, input_dims.h, filter_dims.w,
filter_dims.h, output_dims.w, output_dims.h, input->type, &data));
if (data.padding.width == 0 && data.padding.height == 0 &&
(input_depth % 4 == 0) && params->stride_width == 1 &&
params->stride_height == 1 && filter_width == 1 && filter_height == 1) {
buf_size = arm_convolve_1x1_s8_fast_get_buffer_size(input_depth);
} else if (output_height == 1 && input_height == 1 && filter_height == 1 &&
(output_width % 4 == 0) && batches == 1) {
buf_size = arm_convolve_1_x_n_s8_get_buffer_size(input_depth, filter_width,
filter_height);
} else {
buf_size = arm_convolve_s8_get_buffer_size(input_depth, filter_width,
filter_height);
if (input->type == kTfLiteInt8) {
// Initialize cmsis-nn convolution parameters
cmsis_nn_conv_params conv_params;
conv_params.input_offset = -input->params.zero_point;
conv_params.output_offset = output->params.zero_point;
conv_params.stride.h = params->stride_height;
conv_params.stride.w = params->stride_width;
conv_params.dilation.h = params->dilation_height_factor;
conv_params.dilation.w = params->dilation_width_factor;
conv_params.padding.h = data.padding.height;
conv_params.padding.w = data.padding.width;
conv_params.activation.min = data.output_activation_min;
conv_params.activation.max = data.output_activation_max;
buf_size = arm_convolve_wrapper_s8_get_buffer_size(
&conv_params, &input_dims, &filter_dims, &output_dims);
}
node->user_data = buffer_idx;
@ -204,6 +222,102 @@ TfLiteStatus EvalQuantizedPerChannel(
TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params,
OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output, TfLiteTensor* im2col) {
// Initialize cmsis-nn convolution parameters
cmsis_nn_conv_params conv_params;
conv_params.input_offset = -input->params.zero_point;
conv_params.output_offset = output->params.zero_point;
conv_params.stride.h = params->stride_height;
conv_params.stride.w = params->stride_width;
conv_params.dilation.h = params->dilation_height_factor;
conv_params.dilation.w = params->dilation_width_factor;
conv_params.padding.h = data->padding.height;
conv_params.padding.w = data->padding.width;
conv_params.activation.min = data->output_activation_min;
conv_params.activation.max = data->output_activation_max;
// Initialize cmsis-nn per channel quantization parameters
cmsis_nn_per_channel_quant_params quant_params;
quant_params.multiplier = data->per_channel_output_multiplier;
quant_params.shift = data->per_channel_output_shift;
#if defined(__ARM_FEATURE_DSP)
RuntimeShape filter_shape = GetTensorShape(filter);
RuntimeShape input_shape = GetTensorShape(input);
RuntimeShape output_shape = GetTensorShape(output);
RuntimeShape bias_shape = GetTensorShape(bias);
// Sanity check.
TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (GetTensorData<int8_t>(bias)) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
// Initialize cmsis-nn dimensions
// Input
cmsis_nn_dims input_dims;
input_dims.n = batch_size;
input_dims.h = input_shape.Dims(1);
input_dims.w = input_shape.Dims(2);
input_dims.c = input_depth;
// Filter
cmsis_nn_dims filter_dims;
filter_dims.n = output_depth;
filter_dims.h = filter_shape.Dims(1);
filter_dims.w = filter_shape.Dims(2);
filter_dims.c = input_depth;
// Bias
cmsis_nn_dims bias_dims;
bias_dims.n = 1;
bias_dims.h = 1;
bias_dims.w = 1;
bias_dims.c = output_depth;
// Output
cmsis_nn_dims output_dims;
output_dims.n = batch_size;
output_dims.h = output_shape.Dims(1);
output_dims.w = output_shape.Dims(2);
output_dims.c = output_depth;
// Initialize cmsis-nn context
cmsis_nn_context ctx;
ctx.buf = nullptr;
ctx.size = 0;
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
if (*buffer_idx > -1) {
ctx.buf = context->GetScratchBuffer(context, *buffer_idx);
// Note: ctx.size is currently not used in cmsis-nn.
// The buffer should be allocated in the Prepare function through
// arm_convolve_wrapper_s8_get_buffer_size
}
// arm_convolve_wrapper_s8 dispatches the optimized kernel accordingly with
// the parameters passed
arm_status status = arm_convolve_wrapper_s8(
&ctx, &conv_params, &quant_params, &input_dims,
GetTensorData<int8_t>(input), &filter_dims, GetTensorData<int8_t>(filter),
&bias_dims, GetTensorData<int32>(bias), &output_dims,
GetTensorData<int8_t>(output));
if (status == ARM_MATH_SUCCESS) {
return kTfLiteOk;
} else {
return kTfLiteError;
}
#else
#pragma message( \
"CMSIS-NN optimization for conv not available for this target. Using reference kernel.")
ConvParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.output_offset = output->params.zero_point;
@ -216,91 +330,6 @@ TfLiteStatus EvalQuantizedPerChannel(
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
#if defined(__ARM_FEATURE_DSP)
RuntimeShape filter_shape = GetTensorShape(filter);
RuntimeShape input_shape = GetTensorShape(input);
RuntimeShape output_shape = GetTensorShape(output);
RuntimeShape bias_shape = GetTensorShape(bias);
// Set min and max value of the output.
const int32 output_activation_min = std::numeric_limits<int8_t>::min();
const int32 output_activation_max = std::numeric_limits<int8_t>::max();
// Sanity check.
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (GetTensorData<int8_t>(bias)) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
int16_t* buf = nullptr;
auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
if (*buffer_idx > -1) {
void* raw = context->GetScratchBuffer(context, *buffer_idx);
buf = reinterpret_cast<int16_t*>(raw);
}
if (op_params.padding_values.width == 0 &&
op_params.padding_values.height == 0 && (input_depth % 4 == 0) &&
op_params.stride_width == 1 && op_params.stride_height == 1 &&
filter_width == 1 && filter_height == 1) {
if (arm_convolve_1x1_s8_fast(
GetTensorData<int8_t>(input), input_width, input_height,
input_depth, batches, GetTensorData<int8_t>(filter), output_depth,
op_params.padding_values.width, op_params.padding_values.height,
op_params.stride_width, op_params.stride_height,
GetTensorData<int32>(bias), GetTensorData<int8_t>(output),
data->per_channel_output_shift, data->per_channel_output_multiplier,
op_params.output_offset, op_params.input_offset,
output_activation_min, output_activation_max, output_width,
output_height, buf) != ARM_MATH_SUCCESS) {
return kTfLiteError;
}
} else if (output_height == 1 && input_height == 1 && filter_height == 1 &&
(output_width % 4 == 0) && batches == 1) {
if (arm_convolve_1_x_n_s8(
GetTensorData<int8_t>(input), input_width, input_depth, batches,
GetTensorData<int8_t>(filter), output_depth, filter_width,
op_params.padding_values.width, op_params.stride_width,
GetTensorData<int32_t>(bias), GetTensorData<int8_t>(output),
data->per_channel_output_shift, data->per_channel_output_multiplier,
op_params.output_offset, op_params.input_offset,
output_activation_min, output_activation_max, output_width,
buf) != ARM_MATH_SUCCESS) {
return kTfLiteError;
}
} else {
if (arm_convolve_s8(
GetTensorData<int8_t>(input), input_width, input_height,
input_depth, batches, GetTensorData<int8_t>(filter), output_depth,
filter_width, filter_height, op_params.padding_values.width,
op_params.padding_values.height, op_params.stride_width,
op_params.stride_height, GetTensorData<int32>(bias),
GetTensorData<int8_t>(output), data->per_channel_output_shift,
data->per_channel_output_multiplier, op_params.output_offset,
op_params.input_offset, output_activation_min,
output_activation_max, output_width, output_height,
buf) != ARM_MATH_SUCCESS) {
return kTfLiteError;
}
}
#else
#pragma message( \
"CMSIS-NN optimization for conv not available for this target. Using reference kernel.")
reference_integer_ops::ConvPerChannel(
op_params, data->per_channel_output_multiplier,
data->per_channel_output_shift, GetTensorShape(input),

View File

@ -28,8 +28,8 @@ LEON_BCC2_MD5 := "cdf78082be4882da2a92c9baa82fe765"
TSIM_URL := "https://www.gaisler.com/anonftp/tsim/tsim-eval-2.0.63.tar.gz"
TSIM_MD5 := "afa0095d3ed989a949e1467f94e41d2f"
CMSIS_URL := "https://github.com/ARM-software/CMSIS_5/archive/8a4db53f69da06e97565fe2f2e8926d193a5759d.zip"
CMSIS_MD5 := "e9864fb71b65adc4f7d92a9dea6e1aab"
CMSIS_URL := "https://github.com/ARM-software/CMSIS_5/archive/1150e71e07c79b538efd842aba5b210a31827ae5.zip"
CMSIS_MD5 := "e05f4222ef58825193910b41a0871dcb"
AM_SDK_URL := "http://s3.asia.ambiqmicro.com/downloads/AmbiqSuite-Rel2.2.0.zip"
AM_SDK_MD5 := "7605fa2d4d97e6bb7a1190c92b66b597"