This CL renames `kTfLiteActRelu1` to `kTfLiteActReluN1To1` because it has minimum clipping at -1, not 0. Also, This CL finishes renaming because TFLite already uses `kTfLiteBuiltinReluN1To1`, `ActivationFunctionType_RELU_N1_TO_1` for this op. PiperOrigin-RevId: 317589358 Change-Id: I2424104da45234346749b3921d563e9161e809cc
283 lines
12 KiB
C++
283 lines
12 KiB
C++
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
|
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <algorithm>
|
|
#include <limits>
|
|
#include <memory>
|
|
|
|
#include "tensorflow/lite/c/builtin_op_data.h"
|
|
#include "tensorflow/lite/c/common.h"
|
|
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
|
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
|
|
|
namespace tflite {
|
|
|
|
// Per-axis
|
|
TfLiteStatus PopulateConvolutionQuantizationParams(
|
|
TfLiteContext* context, const TfLiteTensor* input,
|
|
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
|
|
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
|
|
int32_t* output_activation_min, int32_t* output_activation_max,
|
|
int32_t* per_channel_multiplier, int* per_channel_shift) {
|
|
const auto* affine_quantization =
|
|
reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
|
return PopulateConvolutionQuantizationParams(
|
|
context, input, filter, bias, output, activation, multiplier, shift,
|
|
output_activation_min, output_activation_max, per_channel_multiplier,
|
|
per_channel_shift, affine_quantization->scale->size);
|
|
}
|
|
|
|
// Per-axis & per-tensor
|
|
TfLiteStatus PopulateConvolutionQuantizationParams(
|
|
TfLiteContext* context, const TfLiteTensor* input,
|
|
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
|
|
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
|
|
int32_t* output_activation_min, int32_t* output_activation_max,
|
|
int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels) {
|
|
TF_LITE_ENSURE_EQ(context, input->quantization.type,
|
|
kTfLiteAffineQuantization);
|
|
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
|
kTfLiteAffineQuantization);
|
|
// TODO(jianlijianli): Enable bias type check and bias scale == input scale
|
|
// * filter scale for each channel in affine quantization once bias
|
|
// quantization is properly populated.
|
|
// TF_LITE_ENSURE_EQ(context, bias->quantization.type,
|
|
// kTfLiteAffineQuantization);
|
|
|
|
// Check data type.
|
|
const auto* affine_quantization =
|
|
reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
|
TF_LITE_ENSURE(context, affine_quantization);
|
|
TF_LITE_ENSURE(context, affine_quantization->scale);
|
|
const bool is_per_channel = affine_quantization->scale->size > 1;
|
|
if (is_per_channel) {
|
|
// Currently only Int8/Int16 is supported for per channel quantization.
|
|
TF_LITE_ENSURE(context,
|
|
input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
|
|
TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8);
|
|
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, num_channels);
|
|
TF_LITE_ENSURE_EQ(
|
|
context, num_channels,
|
|
filter->dims->data[affine_quantization->quantized_dimension]);
|
|
}
|
|
|
|
// Populate multiplier and shift using affine quantization.
|
|
const float input_scale = input->params.scale;
|
|
const float output_scale = output->params.scale;
|
|
const float* filter_scales = affine_quantization->scale->data;
|
|
for (int i = 0; i < num_channels; ++i) {
|
|
// If per-tensor quantization parameter is specified, broadcast it along the
|
|
// quantization dimension (channels_out).
|
|
const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
|
|
const double filter_scale = static_cast<double>(scale);
|
|
const double effective_output_scale = static_cast<double>(input_scale) *
|
|
filter_scale /
|
|
static_cast<double>(output_scale);
|
|
int32_t significand;
|
|
int channel_shift;
|
|
QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
|
|
per_channel_multiplier[i] = significand;
|
|
per_channel_shift[i] = channel_shift;
|
|
}
|
|
|
|
// Populate scalar quantization parameters.
|
|
// This check on legacy quantization parameters is kept only for backward
|
|
// compatibility.
|
|
if (input->type == kTfLiteUInt8) {
|
|
// Check bias scale == input scale * filter scale.
|
|
double real_multiplier = 0.0;
|
|
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
|
|
context, input, filter, bias, output, &real_multiplier));
|
|
int exponent;
|
|
|
|
// Populate quantization parameters with multiplier and shift.
|
|
QuantizeMultiplier(real_multiplier, multiplier, &exponent);
|
|
*shift = -exponent;
|
|
}
|
|
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 ||
|
|
input->type == kTfLiteInt16) {
|
|
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
|
context, activation, output, output_activation_min,
|
|
output_activation_max));
|
|
}
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
|
|
const TfLiteTensor* input,
|
|
const TfLiteTensor* filter,
|
|
const TfLiteTensor* bias,
|
|
TfLiteTensor* output,
|
|
double* multiplier) {
|
|
const double input_product_scale = static_cast<double>(input->params.scale) *
|
|
static_cast<double>(filter->params.scale);
|
|
// TODO(ahentz): The following conditions must be guaranteed by the training
|
|
// pipeline.
|
|
if (bias) {
|
|
const double bias_scale = static_cast<double>(bias->params.scale);
|
|
// Here we're making sure the input_product_scale & bias_scale are about the
|
|
// same. Since we have:
|
|
// (output - output_zp) * output_scale =
|
|
// input_product_scale * input_product + bias * bias_scale ---- (0)
|
|
//
|
|
// (0) equals:
|
|
// (input_product + bias) * input_product_scale ----- (1)
|
|
// +
|
|
// bias * (bias_scale - input_product_scale) ------ (2)
|
|
//
|
|
// For the real kernel computation, we're doing (1), so we really need to
|
|
// make sure (2) has minimum impact on the output, so:
|
|
// bias * (bias_scale - input_product_scale) / output_scale should be
|
|
// a small number for an integer.
|
|
// Since normally bias should be within a small range.
|
|
// We should expect (bias_scale - input_product_scale) / output_scale to
|
|
// be a small number like 0.02.
|
|
const double scale_diff = std::abs(input_product_scale - bias_scale);
|
|
const double output_scale = static_cast<double>(output->params.scale);
|
|
|
|
TF_LITE_ENSURE(context, scale_diff / output_scale <= 0.02);
|
|
}
|
|
return GetQuantizedConvolutionMultipler(context, input, filter, output,
|
|
multiplier);
|
|
}
|
|
|
|
TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
|
|
const TfLiteTensor* input,
|
|
const TfLiteTensor* filter,
|
|
TfLiteTensor* output,
|
|
double* multiplier) {
|
|
const double input_product_scale =
|
|
static_cast<double>(input->params.scale * filter->params.scale);
|
|
TF_LITE_ENSURE(context, input_product_scale >= 0);
|
|
*multiplier = input_product_scale / static_cast<double>(output->params.scale);
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
namespace {
|
|
void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
|
|
int32_t qmin, int32_t qmax,
|
|
TfLiteTensor* output,
|
|
int32_t* act_min, int32_t* act_max) {
|
|
const auto scale = output->params.scale;
|
|
const auto zero_point = output->params.zero_point;
|
|
|
|
auto quantize = [scale, zero_point](float f) {
|
|
return zero_point + static_cast<int32_t>(TfLiteRound(f / scale));
|
|
};
|
|
|
|
if (activation == kTfLiteActRelu) {
|
|
*act_min = std::max(qmin, quantize(0.0));
|
|
*act_max = qmax;
|
|
} else if (activation == kTfLiteActRelu6) {
|
|
*act_min = std::max(qmin, quantize(0.0));
|
|
*act_max = std::min(qmax, quantize(6.0));
|
|
} else if (activation == kTfLiteActReluN1To1) {
|
|
*act_min = std::max(qmin, quantize(-1.0));
|
|
*act_max = std::min(qmax, quantize(1.0));
|
|
} else {
|
|
*act_min = qmin;
|
|
*act_max = qmax;
|
|
}
|
|
}
|
|
} // namespace
|
|
|
|
TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
|
|
TfLiteFusedActivation activation,
|
|
TfLiteTensor* output,
|
|
int32_t* act_min,
|
|
int32_t* act_max) {
|
|
int32_t qmin = 0;
|
|
int32_t qmax = 0;
|
|
if (output->type == kTfLiteUInt8) {
|
|
qmin = std::numeric_limits<uint8_t>::min();
|
|
qmax = std::numeric_limits<uint8_t>::max();
|
|
} else if (output->type == kTfLiteInt8) {
|
|
qmin = std::numeric_limits<int8_t>::min();
|
|
qmax = std::numeric_limits<int8_t>::max();
|
|
} else if (output->type == kTfLiteInt16) {
|
|
qmin = std::numeric_limits<int16_t>::min();
|
|
qmax = std::numeric_limits<int16_t>::max();
|
|
} else {
|
|
TF_LITE_ENSURE(context, false);
|
|
}
|
|
|
|
CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
|
|
act_max);
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
|
|
return TfLiteIntArrayEqual(input1->dims, input2->dims);
|
|
}
|
|
|
|
// TODO(petewarden): Having macros around this is ugly, look at other strategies
|
|
// before replicating this approach elsewhere.
|
|
#ifndef TF_LITE_STATIC_MEMORY
|
|
TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
|
|
const TfLiteTensor* input1,
|
|
const TfLiteTensor* input2,
|
|
TfLiteIntArray** output_shape) {
|
|
int dims1 = NumDimensions(input1);
|
|
int dims2 = NumDimensions(input2);
|
|
int out_dims = std::max(dims1, dims2);
|
|
if (NumElements(input1) == 0) {
|
|
*output_shape = TfLiteIntArrayCopy(input1->dims);
|
|
return kTfLiteOk;
|
|
}
|
|
std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
|
|
TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
|
|
for (int i = 0; i < out_dims; ++i) {
|
|
int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
|
|
int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
|
|
TF_LITE_ENSURE(context, d1 == d2 || d1 == 1 || d2 == 1);
|
|
shape->data[out_dims - i - 1] = std::max(d1, d2);
|
|
}
|
|
*output_shape = shape.release();
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
|
|
const TfLiteTensor* input1,
|
|
const TfLiteTensor* input2,
|
|
const TfLiteTensor* input3,
|
|
TfLiteIntArray** output_shape) {
|
|
int dims1 = NumDimensions(input1);
|
|
int dims2 = NumDimensions(input2);
|
|
int dims3 = NumDimensions(input3);
|
|
int out_dims = std::max(std::max(dims1, dims2), dims3);
|
|
std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
|
|
TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
|
|
for (int i = 0; i < out_dims; ++i) {
|
|
int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
|
|
int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
|
|
int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
|
|
int max_value = std::max(std::max(d1, d2), d3);
|
|
TF_LITE_ENSURE(context, d1 == 1 || d1 == max_value);
|
|
TF_LITE_ENSURE(context, d2 == 1 || d2 == max_value);
|
|
TF_LITE_ENSURE(context, d3 == 1 || d3 == max_value);
|
|
shape->data[out_dims - i - 1] = max_value;
|
|
}
|
|
*output_shape = shape.release();
|
|
return kTfLiteOk;
|
|
}
|
|
#endif // TF_LITE_STATIC_MEMORY
|
|
|
|
} // namespace tflite
|