diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/add.cc b/tensorflow/lite/micro/kernels/cmsis-nn/add.cc index c98e7a2c329..6db88839073 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/add.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/add.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/memory_helpers.h" namespace tflite { @@ -96,18 +97,20 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params, } void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, - const OpData* data, const TfLiteTensor* input1, - const TfLiteTensor* input2, TfLiteTensor* output) { + const OpData* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); tflite::ArithmeticParams op_params; SetActivationParams(output_activation_min, output_activation_max, &op_params); -#define TF_LITE_ADD(opname) \ - reference_ops::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)) +#define TF_LITE_ADD(opname) \ + reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)) if (data->requires_broadcast) { TF_LITE_ADD(BroadcastAdd4DSlow); } else { @@ -118,9 +121,9 @@ void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, const OpData* data, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteTensor* output) { + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { tflite::ArithmeticParams op_params; op_params.left_shift = data->left_shift; @@ -136,27 +139,32 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, SetActivationParams(data->output_activation_min, data->output_activation_max, &op_params); bool need_broadcast = reference_ops::ProcessBroadcastShapes( - GetTensorShape(input1), GetTensorShape(input2), &op_params); -#define TF_LITE_ADD(type, opname, dtype) \ - type::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)); + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); +#define TF_LITE_ADD(type, opname, dtype) \ + type::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)); if (output->type == kTfLiteInt8) { if (need_broadcast) { TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t); } else { arm_elementwise_add_s8( - GetTensorData(input1), GetTensorData(input2), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorData(input2), op_params.input1_offset, op_params.input1_multiplier, op_params.input1_shift, op_params.input2_offset, op_params.input2_multiplier, op_params.input2_shift, - op_params.left_shift, GetTensorData(output), + op_params.left_shift, tflite::micro::GetTensorData(output), op_params.output_offset, op_params.output_multiplier, op_params.output_shift, op_params.quantized_activation_min, op_params.quantized_activation_max, - MatchingElementsSize(GetTensorShape(input1), GetTensorShape(input2), - GetTensorShape(output))); + MatchingElementsSize(tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorShape(output))); } } else { if (need_broadcast) { @@ -196,9 +204,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); - const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); - const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); TFLITE_DCHECK(node->user_data != nullptr); const OpData* data = static_cast(node->user_data); diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc index 834f107dad0..cf1ce8cb5cb 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -43,6 +44,12 @@ constexpr int kConvQuantizedDimension = 0; struct OpData { TfLitePaddingValues padding; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; + // The scaling factor from input to output (aka the 'real multiplier') can // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; @@ -57,6 +64,9 @@ struct OpData { // uint8_t these would be 0 and 255. int32_t output_activation_min; int32_t output_activation_max; + + // Index to buffer for optimizations if applicable. + int buffer_idx; }; inline PaddingType RuntimePaddingType(TfLitePadding padding) { @@ -110,16 +120,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(int)); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { #if defined(__ARM_FEATURE_DSP) || defined(__ARM_FEATURE_MVE) - OpData data; int32_t buf_size = 0; + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); auto* params = reinterpret_cast(node->builtin_data); - + auto* data = reinterpret_cast(node->user_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); const TfLiteTensor* output = GetOutput(context, node, kOutputTensor); @@ -148,11 +159,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_dims.w = output->dims->data[2]; output_dims.c = output_shape.Dims(3); - int* buffer_idx = reinterpret_cast(node->user_data); - TF_LITE_ENSURE_STATUS(CalculateOpData( context, node, params, input_dims.w, input_dims.h, filter_dims.w, - filter_dims.h, output_dims.w, output_dims.h, input->type, &data)); + filter_dims.h, output_dims.w, output_dims.h, input->type, data)); + + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; if (input->type == kTfLiteInt8) { // Initialize cmsis-nn convolution parameters @@ -163,40 +176,41 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { conv_params.stride.w = params->stride_width; conv_params.dilation.h = params->dilation_height_factor; conv_params.dilation.w = params->dilation_width_factor; - conv_params.padding.h = data.padding.height; - conv_params.padding.w = data.padding.width; - conv_params.activation.min = data.output_activation_min; - conv_params.activation.max = data.output_activation_max; + conv_params.padding.h = data->padding.height; + conv_params.padding.w = data->padding.width; + conv_params.activation.min = data->output_activation_min; + conv_params.activation.max = data->output_activation_max; buf_size = arm_convolve_wrapper_s8_get_buffer_size( &conv_params, &input_dims, &filter_dims, &output_dims); } - node->user_data = buffer_idx; if (buf_size > 0) { - TF_LITE_ENSURE_STATUS( - context->RequestScratchBufferInArena(context, buf_size, buffer_idx)); + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, buf_size, &data->buffer_idx)); } else { - *buffer_idx = -1; + data->buffer_idx = -1; } #endif return kTfLiteOk; } TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteConvParams* params, OpData* data, - const TfLiteTensor* input, - const TfLiteTensor* filter, const TfLiteTensor* bias, - TfLiteTensor* im2col, TfLiteTensor* hwcn_weights, - TfLiteTensor* output) { - const int32_t input_offset = -input->params.zero_point; - const int32_t filter_offset = -filter->params.zero_point; - const int32_t output_offset = output->params.zero_point; + TfLiteConvParams* params, const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* im2col, + TfLiteEvalTensor* hwcn_weights, + TfLiteEvalTensor* output) { + const int32_t input_offset = -data.input_zero_point; + const int32_t filter_offset = -data.filter_zero_point; + const int32_t output_offset = data.output_zero_point; ConvParams op_params; op_params.padding_type = RuntimePaddingType(params->padding); - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; op_params.stride_width = params->stride_width; op_params.stride_height = params->stride_height; op_params.dilation_width_factor = params->dilation_width_factor; @@ -204,46 +218,52 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, op_params.input_offset = input_offset; op_params.weights_offset = filter_offset; op_params.output_offset = output_offset; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = -data->output_shift; - op_params.quantized_activation_min = data->output_activation_min; - op_params.quantized_activation_max = data->output_activation_max; - reference_ops::Conv(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output), GetTensorShape(im2col), - GetTensorData(im2col), nullptr); + op_params.output_multiplier = data.output_multiplier; + op_params.output_shift = -data.output_shift; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; + reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(im2col), + tflite::micro::GetTensorData(im2col), nullptr); return kTfLiteOk; } TfLiteStatus EvalQuantizedPerChannel( TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params, - OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output, TfLiteTensor* im2col) { + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output, TfLiteEvalTensor* im2col) { // Initialize cmsis-nn convolution parameters cmsis_nn_conv_params conv_params; - conv_params.input_offset = -input->params.zero_point; - conv_params.output_offset = output->params.zero_point; + conv_params.input_offset = -data.input_zero_point; + conv_params.output_offset = data.output_zero_point; conv_params.stride.h = params->stride_height; conv_params.stride.w = params->stride_width; conv_params.dilation.h = params->dilation_height_factor; conv_params.dilation.w = params->dilation_width_factor; - conv_params.padding.h = data->padding.height; - conv_params.padding.w = data->padding.width; - conv_params.activation.min = data->output_activation_min; - conv_params.activation.max = data->output_activation_max; + conv_params.padding.h = data.padding.height; + conv_params.padding.w = data.padding.width; + conv_params.activation.min = data.output_activation_min; + conv_params.activation.max = data.output_activation_max; // Initialize cmsis-nn per channel quantization parameters cmsis_nn_per_channel_quant_params quant_params; - quant_params.multiplier = data->per_channel_output_multiplier; - quant_params.shift = data->per_channel_output_shift; + quant_params.multiplier = + const_cast(data.per_channel_output_multiplier); + quant_params.shift = const_cast(data.per_channel_output_shift); #if defined(__ARM_FEATURE_DSP) || defined(__ARM_FEATURE_MVE) - RuntimeShape filter_shape = GetTensorShape(filter); - RuntimeShape input_shape = GetTensorShape(input); - RuntimeShape output_shape = GetTensorShape(output); - RuntimeShape bias_shape = GetTensorShape(bias); + RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias); // Consistency check. TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max); @@ -253,7 +273,7 @@ TfLiteStatus EvalQuantizedPerChannel( const int batch_size = MatchingDim(input_shape, 0, output_shape, 0); const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); - if (GetTensorData(bias)) { + if (tflite::micro::GetTensorData(bias)) { TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); } @@ -291,9 +311,8 @@ TfLiteStatus EvalQuantizedPerChannel( ctx.buf = nullptr; ctx.size = 0; - auto* buffer_idx = reinterpret_cast(node->user_data); - if (*buffer_idx > -1) { - ctx.buf = context->GetScratchBuffer(context, *buffer_idx); + if (data.buffer_idx > -1) { + ctx.buf = context->GetScratchBuffer(context, data.buffer_idx); // Note: ctx.size is currently not used in cmsis-nn. // The buffer should be allocated in the Prepare function through // arm_convolve_wrapper_s8_get_buffer_size @@ -303,9 +322,10 @@ TfLiteStatus EvalQuantizedPerChannel( // the parameters passed arm_status status = arm_convolve_wrapper_s8( &ctx, &conv_params, &quant_params, &input_dims, - GetTensorData(input), &filter_dims, GetTensorData(filter), - &bias_dims, GetTensorData(bias), &output_dims, - GetTensorData(output)); + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, + tflite::micro::GetTensorData(bias), &output_dims, + tflite::micro::GetTensorData(output)); if (status == ARM_MATH_SUCCESS) { return kTfLiteOk; @@ -318,42 +338,47 @@ TfLiteStatus EvalQuantizedPerChannel( "CMSIS-NN optimization for conv not available for this target. Using reference kernel.") ConvParams op_params; - op_params.input_offset = -input->params.zero_point; - op_params.output_offset = output->params.zero_point; + conv_params.input_offset = -data.input_zero_point; + conv_params.output_offset = data.output_zero_point; op_params.stride_height = params->stride_height; op_params.stride_width = params->stride_width; op_params.dilation_height_factor = params->dilation_height_factor; op_params.dilation_width_factor = params->dilation_width_factor; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; + op_params.padding_values.height = data.padding.height; + op_params.padding_values.width = data.padding.width; op_params.quantized_activation_min = data->output_activation_min; op_params.quantized_activation_max = data->output_activation_max; reference_integer_ops::ConvPerChannel( op_params, data->per_channel_output_multiplier, - data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + data->per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); #endif return kTfLiteOk; } TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteConvParams* params, OpData* data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* im2col, - TfLiteTensor* hwcn_weights, TfLiteTensor* output) { + TfLiteConvParams* params, const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col, + TfLiteEvalTensor* hwcn_weights, + TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); - + // TODO(b/154032858): Investigate removing extra copies. ConvParams op_params; op_params.padding_type = RuntimePaddingType(params->padding); - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; op_params.stride_width = params->stride_width; op_params.stride_height = params->stride_height; op_params.dilation_width_factor = params->dilation_width_factor; @@ -361,66 +386,47 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; - reference_ops::Conv(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output), GetTensorShape(im2col), - GetTensorData(im2col)); + reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(im2col), + tflite::micro::GetTensorData(im2col)); return kTfLiteOk; } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); - const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFilterTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - int input_width = input->dims->data[2]; - int input_height = input->dims->data[1]; - int filter_width = filter->dims->data[2]; - int filter_height = filter->dims->data[1]; - int output_width = output->dims->data[2]; - int output_height = output->dims->data[1]; - - OpData data; - - // All per-channel quantized tensors need valid zero point and scale arrays. - if (input->type == kTfLiteInt8) { - TF_LITE_ENSURE_EQ(context, filter->quantization.type, - kTfLiteAffineQuantization); - - const auto* affine_quantization = - reinterpret_cast( - filter->quantization.params); - TF_LITE_ENSURE(context, affine_quantization); - TF_LITE_ENSURE(context, affine_quantization->scale); - TF_LITE_ENSURE(context, affine_quantization->zero_point); - TF_LITE_ENSURE(context, - affine_quantization->scale->size == 1 || - affine_quantization->scale->size == - filter->dims->data[kConvQuantizedDimension]); - TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, - affine_quantization->zero_point->size); - } - - TF_LITE_ENSURE_STATUS(CalculateOpData( - context, node, params, input_width, input_height, filter_width, - filter_height, output_width, output_height, input->type, &data)); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: - return EvalFloat(context, node, params, &data, input, filter, bias, - nullptr, nullptr, output); + EvalFloat(context, node, params, data, input, filter, bias, nullptr, + nullptr, output); break; case kTfLiteInt8: - return EvalQuantizedPerChannel(context, node, params, &data, input, - filter, bias, output, nullptr); + return EvalQuantizedPerChannel(context, node, params, data, input, filter, + bias, output, nullptr); break; case kTfLiteUInt8: - return EvalQuantized(context, node, params, &data, input, filter, bias, + return EvalQuantized(context, node, params, data, input, filter, bias, nullptr, nullptr, output); break; default: diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc index 457b3f854de..42ac15a0837 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -44,6 +45,12 @@ constexpr int kDepthwiseConvQuantizedDimension = 3; struct OpData { TfLitePaddingValues padding; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; + // The scaling factor from input to output (aka the 'real multiplier') can // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; @@ -115,6 +122,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); const TfLiteType data_type = input->type; int width = SizeOfDimension(input, 2); @@ -150,8 +158,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { filter_width, filter_height, data_type, data)); + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; + if (input->type == kTfLiteInt8) { - const TfLiteTensor* output = GetOutput(context, node, kOutputTensor); RuntimeShape input_shape = GetTensorShape(input); RuntimeShape output_shape = GetTensorShape(output); RuntimeShape filter_shape = GetTensorShape(filter); @@ -200,8 +211,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { void EvalFloat(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, const OpData* data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); @@ -220,25 +231,30 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.float_activation_max = output_activation_max; tflite::reference_ops::DepthwiseConv( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, OpData* data, - const TfLiteTensor* input, - const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { cmsis_nn_dw_conv_params dw_conv_params; dw_conv_params.dilation.h = params->dilation_height_factor; dw_conv_params.dilation.w = params->dilation_width_factor; // Call to reference implementation can be removed when dilation is supported // in the optimized implementations. if (1 == dw_conv_params.dilation.h && 1 == dw_conv_params.dilation.w) { - dw_conv_params.input_offset = -input->params.zero_point; - dw_conv_params.output_offset = output->params.zero_point; + dw_conv_params.input_offset = -data->input_zero_point; + dw_conv_params.output_offset = data->output_zero_point; dw_conv_params.stride.h = params->stride_height; dw_conv_params.stride.w = params->stride_width; dw_conv_params.padding.h = data->padding.height; @@ -252,10 +268,10 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, quant_params.multiplier = data->per_channel_output_multiplier; quant_params.shift = data->per_channel_output_shift; - RuntimeShape filter_shape = GetTensorShape(filter); - RuntimeShape input_shape = GetTensorShape(input); - RuntimeShape output_shape = GetTensorShape(output); - RuntimeShape bias_shape = GetTensorShape(bias); + RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias); TFLITE_DCHECK_LE(dw_conv_params.activation.min, dw_conv_params.activation.max); @@ -263,7 +279,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, const int batch_size = MatchingDim(input_shape, 0, output_shape, 0); const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); - if (GetTensorData(bias)) { + if (tflite::micro::GetTensorData(bias)) { TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); } @@ -300,13 +316,14 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, ctx.buf = context->GetScratchBuffer(context, data->buffer_idx); } - TFLITE_DCHECK_EQ(arm_depthwise_conv_wrapper_s8( - &ctx, &dw_conv_params, &quant_params, &input_dims, - GetTensorData(input), &filter_dims, - GetTensorData(filter), &bias_dims, - GetTensorData(bias), &output_dims, - GetTensorData(output)), - ARM_MATH_SUCCESS); + TFLITE_DCHECK_EQ( + arm_depthwise_conv_wrapper_s8( + &ctx, &dw_conv_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, + tflite::micro::GetTensorData(bias), &output_dims, + tflite::micro::GetTensorData(output)), + ARM_MATH_SUCCESS); } else { DepthwiseParams op_params; op_params.padding_type = PaddingType::kSame; @@ -317,30 +334,34 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, op_params.dilation_width_factor = params->dilation_width_factor; op_params.dilation_height_factor = params->dilation_height_factor; op_params.depth_multiplier = params->depth_multiplier; - op_params.input_offset = -input->params.zero_point; + op_params.input_offset = -data->input_zero_point; op_params.weights_offset = 0; - op_params.output_offset = output->params.zero_point; + op_params.output_offset = data->output_zero_point; // TODO(b/130439627): Use calculated value for clamping. op_params.quantized_activation_min = std::numeric_limits::min(); op_params.quantized_activation_max = std::numeric_limits::max(); reference_integer_ops::DepthwiseConvPerChannel( op_params, data->per_channel_output_multiplier, - data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + data->per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } } void EvalQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, const OpData* data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { - const int32_t input_offset = -input->params.zero_point; - const int32_t filter_offset = -filter->params.zero_point; - const int32_t output_offset = output->params.zero_point; + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + const int32_t input_offset = -data->input_zero_point; + const int32_t filter_offset = -data->filter_zero_point; + const int32_t output_offset = data->output_zero_point; tflite::DepthwiseParams op_params; // Padding type is ignored, but still set. @@ -363,34 +384,39 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, if (1 == op_params.dilation_width_factor && 1 == op_params.dilation_height_factor) { - RuntimeShape filter_shape = GetTensorShape(filter); + RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); const int filter_height = filter_shape.Dims(1); const int filter_width = filter_shape.Dims(2); - RuntimeShape input_shape = GetTensorShape(input); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); const int input_height = input_shape.Dims(1); const int input_width = input_shape.Dims(2); const int input_depth = input_shape.Dims(3); - RuntimeShape output_shape = GetTensorShape(output); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); const int output_height = output_shape.Dims(1); const int output_width = output_shape.Dims(2); arm_depthwise_conv_u8_basic_ver1( - GetTensorData(input), input_width, input_height, input_depth, - GetTensorData(filter), filter_width, filter_height, - op_params.depth_multiplier, op_params.padding_values.width, - op_params.padding_values.height, op_params.stride_width, - op_params.stride_height, op_params.dilation_width_factor, - op_params.dilation_height_factor, GetTensorData(bias), - op_params.input_offset, op_params.weights_offset, - op_params.output_offset, GetTensorData(output), output_width, + tflite::micro::GetTensorData(input), input_width, input_height, + input_depth, tflite::micro::GetTensorData(filter), + filter_width, filter_height, op_params.depth_multiplier, + op_params.padding_values.width, op_params.padding_values.height, + op_params.stride_width, op_params.stride_height, + op_params.dilation_width_factor, op_params.dilation_height_factor, + tflite::micro::GetTensorData(bias), op_params.input_offset, + op_params.weights_offset, op_params.output_offset, + tflite::micro::GetTensorData(output), output_width, output_height, op_params.quantized_activation_min, op_params.quantized_activation_max, op_params.output_shift, op_params.output_multiplier); } else { tflite::reference_ops::DepthwiseConv( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), - GetTensorShape(output), GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } } @@ -402,11 +428,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { reinterpret_cast(node->builtin_data); OpData& data = *(static_cast(node->user_data)); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); - const TfLiteTensor* bias = - (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFilterTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + : nullptr; // TODO(aselle): Consider whether float conv and quantized conv should be // separate ops to avoid dispatch overhead here. diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc b/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc index 074f4a9f251..8af92e6d245 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -43,6 +44,11 @@ struct OpData { int input_quantized_index; // Index to buffer for optimizations if applicable. int buffer_idx; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; }; constexpr int kInputTensor = 0; @@ -69,6 +75,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, activation, output, &data->output_activation_min, &data->output_activation_max)); + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; } return status; } @@ -125,25 +134,26 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, - const OpData& data, const TfLiteTensor* input, - const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { // The 'if' condition can be removed when null handling of bias is added to // arm_fully_connected_s8 - if (nullptr != GetTensorData(bias)) { - RuntimeShape output_shape = GetTensorShape(output); + if (nullptr != tflite::micro::GetTensorData(bias)) { + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2); const int batches = output_shape.Dims(0); const int output_depth = output_shape.Dims(1); - const RuntimeShape filter_shape = GetTensorShape(filter); + const RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); const int filter_dim_count = filter_shape.DimensionsCount(); const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - const RuntimeShape input_shape = GetTensorShape(input); + const RuntimeShape input_shape = tflite::micro::GetTensorShape(input); cmsis_nn_fc_params fc_params; - fc_params.input_offset = -input->params.zero_point; - fc_params.filter_offset = -filter->params.zero_point; - fc_params.output_offset = output->params.zero_point; + fc_params.input_offset = -data.input_zero_point; + fc_params.output_offset = data.output_zero_point; fc_params.activation.min = data.output_activation_min; fc_params.activation.max = data.output_activation_max; @@ -186,17 +196,18 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, TF_LITE_ENSURE_EQ( context, - arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, - GetTensorData(input), &filter_dims, - GetTensorData(filter), &bias_dims, - GetTensorData(bias), &output_dims, - GetTensorData(output)), + arm_fully_connected_s8( + &ctx, &fc_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, + tflite::micro::GetTensorData(bias), &output_dims, + tflite::micro::GetTensorData(output)), ARM_MATH_SUCCESS); } else { tflite::FullyConnectedParams op_params; - op_params.input_offset = -input->params.zero_point; - op_params.weights_offset = -filter->params.zero_point; - op_params.output_offset = output->params.zero_point; + op_params.input_offset = -data.input_zero_point; + op_params.weights_offset = -data.filter_zero_point; + op_params.output_offset = data.output_zero_point; op_params.output_multiplier = data.output_multiplier; // TODO(b/138810107): Figure out whether output shift should be inverted op_params.output_shift = -data.output_shift; @@ -204,21 +215,26 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, op_params.quantized_activation_max = data.output_activation_max; reference_integer_ops::FullyConnected( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), - GetTensorShape(output), GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } return kTfLiteOk; } TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, - const OpData& data, const TfLiteTensor* input, - const TfLiteTensor* filter, const TfLiteTensor* bias, - TfLiteTensor* output) { - const int32_t input_offset = -input->params.zero_point; - const int32_t filter_offset = -filter->params.zero_point; - const int32_t output_offset = output->params.zero_point; + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + const int32_t input_offset = -data.input_zero_point; + const int32_t filter_offset = -data.filter_zero_point; + const int32_t output_offset = data.output_zero_point; tflite::FullyConnectedParams op_params; op_params.input_offset = input_offset; @@ -230,12 +246,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, op_params.quantized_activation_min = data.output_activation_min; op_params.quantized_activation_max = data.output_activation_max; -#define TF_LITE_FULLY_CONNECTED(output_data_type) \ - reference_ops::FullyConnected( \ - op_params, GetTensorShape(input), GetTensorData(input), \ - GetTensorShape(filter), GetTensorData(filter), \ - GetTensorShape(bias), GetTensorData(bias), \ - GetTensorShape(output), GetTensorData(output)) +#define TF_LITE_FULLY_CONNECTED(output_data_type) \ + reference_ops::FullyConnected( \ + op_params, tflite::micro::GetTensorShape(input), \ + tflite::micro::GetTensorData(input), \ + tflite::micro::GetTensorShape(filter), \ + tflite::micro::GetTensorData(filter), \ + tflite::micro::GetTensorShape(bias), \ + tflite::micro::GetTensorData(bias), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)) switch (output->type) { case kTfLiteUInt8: TF_LITE_FULLY_CONNECTED(uint8_t); @@ -254,8 +274,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, TfLiteFusedActivation activation, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(activation, &output_activation_min, &output_activation_max); @@ -263,10 +284,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; tflite::reference_ops::FullyConnected( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } @@ -275,10 +300,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const auto* params = static_cast(node->builtin_data); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); - const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kWeightsTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kBiasTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); TFLITE_DCHECK(node->user_data != nullptr); const OpData& data = *(static_cast(node->user_data)); diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc b/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc index 6f9113a02f6..00d884eb415 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/memory_helpers.h" namespace tflite { @@ -38,6 +39,11 @@ struct OpData { int32_t output_multiplier; int output_shift; + + // Cached tensor zero point values for quantized operations. + int32_t input1_zero_point; + int32_t input2_zero_point; + int32_t output_zero_point; }; TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, @@ -65,6 +71,11 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, return kTfLiteOk; } +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor); const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor); @@ -74,44 +85,59 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { return AllocateOutputDimensionsFromInput(context, input1, input2, output); } + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + data->input1_zero_point = input1->params.zero_point; + data->input2_zero_point = input2->params.zero_point; + data->output_zero_point = output->params.zero_point; + CalculateOpData(context, node, params, data); + return kTfLiteOk; } void EvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteMulParams* params, OpData* data, - const TfLiteTensor* input1, const TfLiteTensor* input2, - TfLiteTensor* output) { + TfLiteMulParams* params, const OpData& data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) { tflite::ArithmeticParams op_params; - SetActivationParams(data->output_activation_min, - data->output_activation_max, &op_params); - op_params.input1_offset = -input1->params.zero_point; - op_params.input2_offset = -input2->params.zero_point; - op_params.output_offset = output->params.zero_point; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; + SetActivationParams(data.output_activation_min, data.output_activation_max, + &op_params); + op_params.input1_offset = -data.input1_zero_point; + op_params.input2_offset = -data.input2_zero_point; + op_params.output_offset = data.output_zero_point; + op_params.output_multiplier = data.output_multiplier; + op_params.output_shift = data.output_shift; bool need_broadcast = reference_ops::ProcessBroadcastShapes( - GetTensorShape(input1), GetTensorShape(input2), &op_params); + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); -#define TF_LITE_MUL(type, opname, dtype) \ - type::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)); +#define TF_LITE_MUL(type, opname, dtype) \ + type::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)); if (output->type == kTfLiteInt8) { if (need_broadcast) { TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t); } else { arm_elementwise_mul_s8( - GetTensorData(input1), GetTensorData(input2), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorData(input2), op_params.input1_offset, op_params.input2_offset, - GetTensorData(output), op_params.output_offset, - op_params.output_multiplier, op_params.output_shift, - op_params.quantized_activation_min, + tflite::micro::GetTensorData(output), + op_params.output_offset, op_params.output_multiplier, + op_params.output_shift, op_params.quantized_activation_min, op_params.quantized_activation_max, - MatchingElementsSize(GetTensorShape(input1), GetTensorShape(input2), - GetTensorShape(output))); + MatchingElementsSize(tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorShape(output))); } } else if (output->type == kTfLiteUInt8) { if (need_broadcast) { @@ -125,9 +151,8 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, } void EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteMulParams* params, OpData* data, - const TfLiteTensor* input1, const TfLiteTensor* input2, - TfLiteTensor* output) { + TfLiteMulParams* params, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); @@ -135,12 +160,15 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, SetActivationParams(output_activation_min, output_activation_max, &op_params); bool need_broadcast = reference_ops::ProcessBroadcastShapes( - GetTensorShape(input1), GetTensorShape(input2), &op_params); -#define TF_LITE_MUL(opname) \ - reference_ops::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)); + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); +#define TF_LITE_MUL(opname) \ + reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)); if (need_broadcast) { TF_LITE_MUL(BroadcastMul4DSlow); @@ -152,21 +180,24 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); - OpData data; - const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor); - const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInput1Tensor); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInput2Tensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - CalculateOpData(context, node, params, &data); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); switch (input1->type) { case kTfLiteUInt8: case kTfLiteInt8: - EvalQuantized(context, node, params, &data, input1, input2, output); + EvalQuantized(context, node, params, data, input1, input2, output); break; case kTfLiteFloat32: - EvalFloat(context, node, params, &data, input1, input2, output); + EvalFloat(context, node, params, input1, input2, output); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", @@ -179,8 +210,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace mul TfLiteRegistration Register_MUL() { - return {nullptr /* Init */, nullptr /* Free */, nullptr /* Prepare */, - mul::Eval}; + return {mul::Init, nullptr /* Free */, mul::Prepare, mul::Eval}; } } // namespace micro diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc b/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc index d0babb4b98d..4229b2c244c 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -72,7 +73,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpData& data, - const TfLiteTensor* input, TfLiteTensor* output) { + const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { float activation_min, activation_max; CalculateActivationRange(params->activation, &activation_min, &activation_max); @@ -86,14 +87,16 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node, op_params.padding_values.width = data.padding.width; op_params.float_activation_min = activation_min; op_params.float_activation_max = activation_max; - reference_ops::AveragePool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpData& data, - const TfLiteTensor* input, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8); PoolParams op_params; @@ -107,14 +110,15 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, op_params.quantized_activation_max = data.activation_max; if (input->type == kTfLiteUInt8) { - reference_ops::AveragePool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { - RuntimeShape input_shape = GetTensorShape(input); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - RuntimeShape output_shape = GetTensorShape(output); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); const int depth = MatchingDim(input_shape, 3, output_shape, 3); @@ -154,15 +158,16 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, TFLITE_DCHECK_EQ( arm_avgpool_s8(&ctx, &pool_params, &input_dims, - GetTensorData(input), &filter_dims, &output_dims, - GetTensorData(output)), + tflite::micro::GetTensorData(input), + &filter_dims, &output_dims, + tflite::micro::GetTensorData(output)), ARM_MATH_SUCCESS); } } void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpData& data, - TfLiteTensor* input, TfLiteTensor* output) { + const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { float activation_min, activation_max; CalculateActivationRange(params->activation, &activation_min, &activation_max); @@ -175,14 +180,16 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.padding_values.width = data.padding.width; op_params.float_activation_min = activation_min; op_params.float_activation_max = activation_max; - reference_ops::MaxPool(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpData& data, - TfLiteTensor* input, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { tflite::PoolParams op_params; op_params.stride_height = params->stride_height; op_params.stride_width = params->stride_width; @@ -192,16 +199,18 @@ void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node, op_params.padding_values.width = data.padding.width; op_params.quantized_activation_min = data.activation_min; op_params.quantized_activation_max = data.activation_max; - reference_ops::MaxPool(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } TfLiteStatus MaxEvalInt8(TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpData& data, - TfLiteTensor* input, TfLiteTensor* output) { - RuntimeShape input_shape = GetTensorShape(input); - RuntimeShape output_shape = GetTensorShape(output); + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); const int depth = MatchingDim(input_shape, 3, output_shape, 3); cmsis_nn_dims input_dims; @@ -237,10 +246,12 @@ TfLiteStatus MaxEvalInt8(TfLiteContext* context, const TfLiteNode* node, ctx.buf = context->GetScratchBuffer(context, data.buffer_idx); } - TFLITE_DCHECK_EQ(arm_max_pool_s8(&ctx, &pool_params, &input_dims, - GetTensorData(input), &filter_dims, - &output_dims, GetTensorData(output)), - ARM_MATH_SUCCESS); + TFLITE_DCHECK_EQ( + arm_max_pool_s8(&ctx, &pool_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + &output_dims, + tflite::micro::GetTensorData(output)), + ARM_MATH_SUCCESS); return kTfLiteOk; } @@ -307,8 +318,10 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { const OpData& data = *(static_cast(node->user_data)); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); // Inputs and outputs share the same type, guaranteed by the converter. switch (input->type) { @@ -332,9 +345,10 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { const OpData& data = *(static_cast(node->user_data)); - TfLiteTensor* input = &context->tensors[flatbuffers::EndianScalar( - node->inputs->data[kInputTensor])]; - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); switch (input->type) { case kTfLiteFloat32: diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc index 790af35f217..194bba4f26a 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc @@ -18,6 +18,7 @@ limitations under the License. #include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -47,8 +48,6 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, TF_LITE_ENSURE(context, output->params.scale == 1.f / 256); } } - TF_LITE_ENSURE(context, (output->params.scale == 1.f / 256) || - (output->params.scale == 1.f / 255)); static const int kScaledDiffIntegerBits = 5; @@ -71,37 +70,53 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, } // namespace +void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams)); +} + TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = static_cast(node->builtin_data); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); const TfLiteTensor* input = GetInput(context, node, 0); TF_LITE_ENSURE(context, NumDimensions(input) >= 1); - return kTfLiteOk; + TfLiteTensor* output = GetOutput(context, node, 0); + + TFLITE_DCHECK(node->user_data != nullptr); + SoftmaxParams* data = static_cast(node->user_data); + return CalculateSoftmaxParams(context, input, output, params, data); } // Takes a tensor and performs softmax along the last dimension. -void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output, +void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output, const SoftmaxParams& op_data) { - tflite::reference_ops::Softmax( - op_data, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } -void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output, +void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output, const SoftmaxParams& op_data) { - const auto input_shape = GetTensorShape(input); - const auto output_shape = GetTensorShape(output); + const auto input_shape = tflite::micro::GetTensorShape(input); + const auto output_shape = tflite::micro::GetTensorShape(output); if (input->type == kTfLiteUInt8) { - tflite::reference_ops::Softmax(op_data, input_shape, - GetTensorData(input), output_shape, - GetTensorData(output)); + tflite::reference_ops::Softmax( + op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { if (output->type == kTfLiteInt16) { tflite::reference_ops::Softmax( - op_data, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { const int trailing_dim = input_shape.DimensionsCount() - 1; const int outer_size = @@ -109,31 +124,30 @@ void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output, const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); - arm_softmax_s8(GetTensorData(input), outer_size, depth, - op_data.input_multiplier, op_data.input_left_shift, - op_data.diff_min, GetTensorData(output)); + arm_softmax_s8(tflite::micro::GetTensorData(input), outer_size, + depth, op_data.input_multiplier, op_data.input_left_shift, + op_data.diff_min, + tflite::micro::GetTensorData(output)); } } } TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { - auto* params = static_cast(node->builtin_data); + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); - const TfLiteTensor* input = GetInput(context, node, 0); - TfLiteTensor* output = GetOutput(context, node, 0); - - SoftmaxParams op_data; - TF_LITE_ENSURE_STATUS( - CalculateSoftmaxParams(context, input, output, params, &op_data)); + TFLITE_DCHECK(node->user_data != nullptr); + const SoftmaxParams& data = + *(static_cast(node->user_data)); switch (input->type) { case kTfLiteFloat32: { - SoftmaxFloat(input, output, op_data); + SoftmaxFloat(input, output, data); return kTfLiteOk; } case kTfLiteInt8: case kTfLiteUInt8: { - SoftmaxQuantized(input, output, op_data); + SoftmaxQuantized(input, output, data); return kTfLiteOk; } default: @@ -142,10 +156,11 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; } } + } // namespace activations TfLiteRegistration Register_SOFTMAX() { - return {/*init=*/nullptr, + return {/*init=*/activations::SoftmaxInit, /*free=*/nullptr, /*prepare=*/activations::SoftmaxPrepare, /*invoke=*/activations::SoftmaxEval,