Allow kernels to take different scales for prelu
PiperOrigin-RevId: 310672881 Change-Id: Ibb3044112cf3136892e1b509d18e2585a67384db
This commit is contained in:
parent
4909933889
commit
5e5b33c233
tensorflow
compiler/mlir/lite/ir
lite
kernels
micro/kernels
testing/op_tests
tools/optimize
@ -2269,8 +2269,7 @@ def TFL_PReluOp : TFL_Op<"prelu", [
|
||||
TFL_OperandIsUnrankedPred<1>,
|
||||
CPred<"$_op.getOperand(0).getType().cast<ShapedType>().getRank() == "
|
||||
"$_op.getOperand(1).getType().cast<ShapedType>().getRank() "
|
||||
"+ 1">]>>,
|
||||
SameOperandsAndResultsScale]> {
|
||||
"+ 1">]>>]> {
|
||||
let summary = "Parameterized Relu operator";
|
||||
|
||||
let description = [{
|
||||
|
@ -84,8 +84,10 @@ struct LeakyReluOpData : public OpData {
|
||||
};
|
||||
|
||||
struct PreluOpData : public OpData {
|
||||
int32_t output_multiplier = 0;
|
||||
int output_shift = 0;
|
||||
int32_t output_multiplier_1 = 0;
|
||||
int32_t output_shift_1 = 0;
|
||||
int32_t output_multiplier_2 = 0;
|
||||
int32_t output_shift_2 = 0;
|
||||
};
|
||||
|
||||
struct HardSwishData {
|
||||
@ -664,7 +666,6 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
|
||||
output->type == kTfLiteInt16) {
|
||||
// This scale check is actually needed for quantized path:
|
||||
// prelu(x) = x if x >= 0 else x * alpha.
|
||||
// So if we translate that for quantized computation:
|
||||
//
|
||||
@ -676,19 +677,19 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// ouput_q = (input_q - input_zp) * input_scale / output_scale + output_q
|
||||
// else:
|
||||
// output_q = (input_q - input_zp) * (alpha_q - alpha_zp) * input_scale
|
||||
// * alpha_scale / output_scale +output_q
|
||||
// * alpha_scale / output_scale + output_q
|
||||
//
|
||||
// So we have two float values which we need to translate into multiplier
|
||||
// shift languages.
|
||||
// For simplicity & efficiency, if we make sure input_scale
|
||||
// & output_scale are the same, we only need to translate the latter one
|
||||
// into multiplier & shift format.
|
||||
TF_LITE_ENSURE(context,
|
||||
std::abs(input->params.scale - output->params.scale) < 1e-4);
|
||||
double real_multiplier =
|
||||
// So for input_q - input_zp >= 0:
|
||||
// output real multiplier 1 is input_scale / output_scale;
|
||||
// for input_q - input_zp < 0:
|
||||
// output real multiplier 2 is input_scale * alpha_scale/ output_scale.
|
||||
double real_multiplier_1 = input->params.scale / output->params.scale;
|
||||
double real_multiplier_2 =
|
||||
input->params.scale * alpha->params.scale / output->params.scale;
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_multiplier, &data->output_multiplier, &data->output_shift);
|
||||
QuantizeMultiplier(real_multiplier_1, &data->output_multiplier_1,
|
||||
&data->output_shift_1);
|
||||
QuantizeMultiplier(real_multiplier_2, &data->output_multiplier_2,
|
||||
&data->output_shift_2);
|
||||
}
|
||||
|
||||
// PRelu (parameteric Relu) shares the same alpha value on "shared axis".
|
||||
@ -1171,8 +1172,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.alpha_offset = -alpha->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
op_params.output_multiplier_1 = data->output_multiplier_1;
|
||||
op_params.output_shift_1 = data->output_shift_1;
|
||||
op_params.output_multiplier_2 = data->output_multiplier_2;
|
||||
op_params.output_shift_2 = data->output_shift_2;
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
|
||||
@ -1184,8 +1187,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.alpha_offset = -alpha->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
op_params.output_multiplier_1 = data->output_multiplier_1;
|
||||
op_params.output_shift_1 = data->output_shift_1;
|
||||
op_params.output_multiplier_2 = data->output_multiplier_2;
|
||||
op_params.output_shift_2 = data->output_shift_2;
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(alpha), GetTensorData<int8_t>(alpha),
|
||||
|
@ -48,14 +48,16 @@ inline void BroadcastPrelu4DSlow(
|
||||
params.input_offset + input_data[input_index];
|
||||
int32 output_value;
|
||||
if (input_value >= 0) {
|
||||
output_value = input_value;
|
||||
output_value = MultiplyByQuantizedMultiplier(
|
||||
input_value, params.output_multiplier_1, params.output_shift_1);
|
||||
} else {
|
||||
auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
|
||||
const int32 alpha_value =
|
||||
params.alpha_offset + alpha_data[alpha_index];
|
||||
|
||||
output_value = MultiplyByQuantizedMultiplier(
|
||||
input_value * alpha_value, params.output_multiplier,
|
||||
params.output_shift);
|
||||
input_value * alpha_value, params.output_multiplier_2,
|
||||
params.output_shift_2);
|
||||
}
|
||||
output_value += params.output_offset;
|
||||
|
||||
|
@ -972,8 +972,10 @@ struct PreluParams {
|
||||
int32 input_offset;
|
||||
int32 alpha_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
int output_shift;
|
||||
int32 output_multiplier_1;
|
||||
int32 output_shift_1;
|
||||
int32 output_multiplier_2;
|
||||
int32 output_shift_2;
|
||||
};
|
||||
|
||||
struct PoolParams {
|
||||
|
@ -64,14 +64,20 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* alpha = GetInput(context, node, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
int32_t output_multiplier = 0;
|
||||
int output_shift = 0;
|
||||
int32_t output_multiplier_1 = 0;
|
||||
int output_shift_1 = 0;
|
||||
int32_t output_multiplier_2 = 0;
|
||||
int output_shift_2 = 0;
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
|
||||
double real_multiplier = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
|
||||
&output_shift);
|
||||
double real_multiplier_1 = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(output->params.scale);
|
||||
double real_multiplier_2 = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier_1, &output_multiplier_1,
|
||||
&output_shift_1);
|
||||
QuantizeMultiplier(real_multiplier_2, &output_multiplier_2,
|
||||
&output_shift_2);
|
||||
}
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@ -86,8 +92,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.alpha_offset = -alpha->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = output_multiplier;
|
||||
op_params.output_shift = output_shift;
|
||||
op_params.output_multiplier_1 = output_multiplier_1;
|
||||
op_params.output_shift_1 = output_shift_1;
|
||||
op_params.output_multiplier_2 = output_multiplier_2;
|
||||
op_params.output_shift_2 = output_shift_2;
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
|
||||
|
@ -154,14 +154,14 @@ TF_LITE_MICRO_TESTS_BEGIN
|
||||
TF_LITE_MICRO_TEST(FloatPreluActivationsOpTest) {
|
||||
const int output_dims_count = 12;
|
||||
float output_data[output_dims_count];
|
||||
tflite::testing::TestPreluFloat({4, 1, 2, 2, 3}, // input shape
|
||||
tflite::testing::TestPreluFloat({1, 2, 2, 3}, // input shape
|
||||
{
|
||||
0.0f, 0.0f, 0.0f, // Row 1, Column 1
|
||||
1.0f, 1.0f, 1.0f, // Row 1, Column 2
|
||||
-1.0f, -1.0f, -1.0f, // Row 2, Column 1
|
||||
-2.0f, -2.0f, -2.0f, // Row 1, Column 2
|
||||
},
|
||||
{3, 1, 1, 3}, // alpha shape
|
||||
{1, 1, 1, 3}, // alpha shape
|
||||
{0.0f, 1.0f, 2.0f}, // alpha values
|
||||
{
|
||||
0.0f, 0.0f, 0.0f, // Row 1, Column 1
|
||||
@ -169,7 +169,7 @@ TF_LITE_MICRO_TEST(FloatPreluActivationsOpTest) {
|
||||
0.0f, -1.0f, -2.0f, // Row 2, Column 1
|
||||
0.0f, -2.0f, -4.0f, // Row 1, Column 2
|
||||
},
|
||||
{4, 1, 2, 2, 3}, // output shape
|
||||
{1, 2, 2, 3}, // output shape
|
||||
output_data);
|
||||
}
|
||||
|
||||
@ -182,13 +182,13 @@ TF_LITE_MICRO_TEST(QuantizedPreluActivationsOpTest) {
|
||||
const int output_dims_count = 12;
|
||||
uint8_t output_data[output_dims_count];
|
||||
tflite::testing::TestPreluQuantized(
|
||||
{4, 1, 2, 2, 3}, // input shape
|
||||
{1, 2, 2, 3}, // input shape
|
||||
{F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax),
|
||||
F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax),
|
||||
F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax),
|
||||
F2Q(-0.25f, kMin, kMax), F2Q(-0.25f, kMin, kMax),
|
||||
F2Q(-0.25f, kMin, kMax)},
|
||||
kMin, kMax, {3, 1, 1, 3}, // alpha shape
|
||||
kMin, kMax, {1, 1, 1, 3}, // alpha shape
|
||||
{F2Q(0.0f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(-0.5f, kMin, kMax)},
|
||||
kMin, kMax,
|
||||
{F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax),
|
||||
@ -196,7 +196,7 @@ TF_LITE_MICRO_TEST(QuantizedPreluActivationsOpTest) {
|
||||
F2Q(0.0f, kMin, kMax), F2Q(-0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax),
|
||||
F2Q(0.0f, kMin, kMax), F2Q(-0.125f, kMin, kMax),
|
||||
F2Q(0.125f, kMin, kMax)},
|
||||
{4, 1, 2, 2, 3}, // output shape
|
||||
{1, 2, 2, 3}, // output shape
|
||||
kMin, kMax, output_data);
|
||||
}
|
||||
|
||||
|
@ -86,7 +86,7 @@ def make_prelu_tests(options):
|
||||
alpha_shape.append(1 if dim in shared_axes else input_shape[dim])
|
||||
|
||||
alpha_values = create_tensor_data(
|
||||
np.float32, alpha_shape, min_value=-1, max_value=1)
|
||||
np.float32, alpha_shape, min_value=-5, max_value=5)
|
||||
|
||||
# There should be only 1 trainable variable tensor.
|
||||
variables = tf.compat.v1.all_variables()
|
||||
|
@ -818,7 +818,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
|
||||
case BuiltinOperator_PRELU:
|
||||
property.inputs = {{0, {}}, {1, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.restrict_same_input_output_scale = true;
|
||||
property.restrict_same_input_output_scale = false;
|
||||
property.version = 1;
|
||||
break;
|
||||
case BuiltinOperator_LEAKY_RELU:
|
||||
|
Loading…
Reference in New Issue
Block a user