From 5e5b33c233a3067199f42af6dd5476155735dc01 Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Fri, 8 May 2020 19:50:57 -0700 Subject: [PATCH] Allow kernels to take different scales for prelu PiperOrigin-RevId: 310672881 Change-Id: Ibb3044112cf3136892e1b509d18e2585a67384db --- tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 3 +- tensorflow/lite/kernels/activations.cc | 41 +++++++++++-------- .../lite/kernels/internal/reference/prelu.h | 8 ++-- tensorflow/lite/kernels/internal/types.h | 6 ++- tensorflow/lite/micro/kernels/prelu.cc | 26 ++++++++---- tensorflow/lite/micro/kernels/prelu_test.cc | 12 +++--- tensorflow/lite/testing/op_tests/prelu.py | 2 +- .../lite/tools/optimize/operator_property.cc | 2 +- 8 files changed, 58 insertions(+), 42 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 966c56a1464..13b8ae83e34 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -2269,8 +2269,7 @@ def TFL_PReluOp : TFL_Op<"prelu", [ TFL_OperandIsUnrankedPred<1>, CPred<"$_op.getOperand(0).getType().cast().getRank() == " "$_op.getOperand(1).getType().cast().getRank() " - "+ 1">]>>, - SameOperandsAndResultsScale]> { + "+ 1">]>>]> { let summary = "Parameterized Relu operator"; let description = [{ diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc index 3343985e4f2..84420b8eb9f 100644 --- a/tensorflow/lite/kernels/activations.cc +++ b/tensorflow/lite/kernels/activations.cc @@ -84,8 +84,10 @@ struct LeakyReluOpData : public OpData { }; struct PreluOpData : public OpData { - int32_t output_multiplier = 0; - int output_shift = 0; + int32_t output_multiplier_1 = 0; + int32_t output_shift_1 = 0; + int32_t output_multiplier_2 = 0; + int32_t output_shift_2 = 0; }; struct HardSwishData { @@ -664,7 +666,6 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) { if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { - // This scale check is actually needed for quantized path: // prelu(x) = x if x >= 0 else x * alpha. // So if we translate that for quantized computation: // @@ -676,19 +677,19 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) { // ouput_q = (input_q - input_zp) * input_scale / output_scale + output_q // else: // output_q = (input_q - input_zp) * (alpha_q - alpha_zp) * input_scale - // * alpha_scale / output_scale +output_q + // * alpha_scale / output_scale + output_q // - // So we have two float values which we need to translate into multiplier - // shift languages. - // For simplicity & efficiency, if we make sure input_scale - // & output_scale are the same, we only need to translate the latter one - // into multiplier & shift format. - TF_LITE_ENSURE(context, - std::abs(input->params.scale - output->params.scale) < 1e-4); - double real_multiplier = + // So for input_q - input_zp >= 0: + // output real multiplier 1 is input_scale / output_scale; + // for input_q - input_zp < 0: + // output real multiplier 2 is input_scale * alpha_scale/ output_scale. + double real_multiplier_1 = input->params.scale / output->params.scale; + double real_multiplier_2 = input->params.scale * alpha->params.scale / output->params.scale; - QuantizeMultiplierSmallerThanOneExp( - real_multiplier, &data->output_multiplier, &data->output_shift); + QuantizeMultiplier(real_multiplier_1, &data->output_multiplier_1, + &data->output_shift_1); + QuantizeMultiplier(real_multiplier_2, &data->output_multiplier_2, + &data->output_shift_2); } // PRelu (parameteric Relu) shares the same alpha value on "shared axis". @@ -1171,8 +1172,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { op_params.input_offset = -input->params.zero_point; op_params.alpha_offset = -alpha->params.zero_point; op_params.output_offset = output->params.zero_point; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; + op_params.output_multiplier_1 = data->output_multiplier_1; + op_params.output_shift_1 = data->output_shift_1; + op_params.output_multiplier_2 = data->output_multiplier_2; + op_params.output_shift_2 = data->output_shift_2; reference_ops::BroadcastPrelu4DSlow( op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(alpha), GetTensorData(alpha), @@ -1184,8 +1187,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { op_params.input_offset = -input->params.zero_point; op_params.alpha_offset = -alpha->params.zero_point; op_params.output_offset = output->params.zero_point; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; + op_params.output_multiplier_1 = data->output_multiplier_1; + op_params.output_shift_1 = data->output_shift_1; + op_params.output_multiplier_2 = data->output_multiplier_2; + op_params.output_shift_2 = data->output_shift_2; reference_ops::BroadcastPrelu4DSlow( op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(alpha), GetTensorData(alpha), diff --git a/tensorflow/lite/kernels/internal/reference/prelu.h b/tensorflow/lite/kernels/internal/reference/prelu.h index d3d7d78a4a4..50d9ad24dd9 100644 --- a/tensorflow/lite/kernels/internal/reference/prelu.h +++ b/tensorflow/lite/kernels/internal/reference/prelu.h @@ -48,14 +48,16 @@ inline void BroadcastPrelu4DSlow( params.input_offset + input_data[input_index]; int32 output_value; if (input_value >= 0) { - output_value = input_value; + output_value = MultiplyByQuantizedMultiplier( + input_value, params.output_multiplier_1, params.output_shift_1); } else { auto alpha_index = SubscriptToIndex(desc2, b, y, x, c); const int32 alpha_value = params.alpha_offset + alpha_data[alpha_index]; + output_value = MultiplyByQuantizedMultiplier( - input_value * alpha_value, params.output_multiplier, - params.output_shift); + input_value * alpha_value, params.output_multiplier_2, + params.output_shift_2); } output_value += params.output_offset; diff --git a/tensorflow/lite/kernels/internal/types.h b/tensorflow/lite/kernels/internal/types.h index cbdedd88901..52d74d1eca4 100644 --- a/tensorflow/lite/kernels/internal/types.h +++ b/tensorflow/lite/kernels/internal/types.h @@ -972,8 +972,10 @@ struct PreluParams { int32 input_offset; int32 alpha_offset; int32 output_offset; - int32 output_multiplier; - int output_shift; + int32 output_multiplier_1; + int32 output_shift_1; + int32 output_multiplier_2; + int32 output_shift_2; }; struct PoolParams { diff --git a/tensorflow/lite/micro/kernels/prelu.cc b/tensorflow/lite/micro/kernels/prelu.cc index a20d2c88225..2c575269cca 100644 --- a/tensorflow/lite/micro/kernels/prelu.cc +++ b/tensorflow/lite/micro/kernels/prelu.cc @@ -64,14 +64,20 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, 0); const TfLiteTensor* alpha = GetInput(context, node, 1); TfLiteTensor* output = GetOutput(context, node, 0); - int32_t output_multiplier = 0; - int output_shift = 0; + int32_t output_multiplier_1 = 0; + int output_shift_1 = 0; + int32_t output_multiplier_2 = 0; + int output_shift_2 = 0; if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) { - double real_multiplier = static_cast(input->params.scale) * - static_cast(alpha->params.scale) / - static_cast(output->params.scale); - QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier, - &output_shift); + double real_multiplier_1 = static_cast(input->params.scale) * + static_cast(output->params.scale); + double real_multiplier_2 = static_cast(input->params.scale) * + static_cast(alpha->params.scale) / + static_cast(output->params.scale); + QuantizeMultiplier(real_multiplier_1, &output_multiplier_1, + &output_shift_1); + QuantizeMultiplier(real_multiplier_2, &output_multiplier_2, + &output_shift_2); } switch (input->type) { case kTfLiteFloat32: { @@ -86,8 +92,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { op_params.input_offset = -input->params.zero_point; op_params.alpha_offset = -alpha->params.zero_point; op_params.output_offset = output->params.zero_point; - op_params.output_multiplier = output_multiplier; - op_params.output_shift = output_shift; + op_params.output_multiplier_1 = output_multiplier_1; + op_params.output_shift_1 = output_shift_1; + op_params.output_multiplier_2 = output_multiplier_2; + op_params.output_shift_2 = output_shift_2; reference_ops::BroadcastPrelu4DSlow( op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(alpha), GetTensorData(alpha), diff --git a/tensorflow/lite/micro/kernels/prelu_test.cc b/tensorflow/lite/micro/kernels/prelu_test.cc index 4b35dac5849..d6c851a2726 100644 --- a/tensorflow/lite/micro/kernels/prelu_test.cc +++ b/tensorflow/lite/micro/kernels/prelu_test.cc @@ -154,14 +154,14 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(FloatPreluActivationsOpTest) { const int output_dims_count = 12; float output_data[output_dims_count]; - tflite::testing::TestPreluFloat({4, 1, 2, 2, 3}, // input shape + tflite::testing::TestPreluFloat({1, 2, 2, 3}, // input shape { 0.0f, 0.0f, 0.0f, // Row 1, Column 1 1.0f, 1.0f, 1.0f, // Row 1, Column 2 -1.0f, -1.0f, -1.0f, // Row 2, Column 1 -2.0f, -2.0f, -2.0f, // Row 1, Column 2 }, - {3, 1, 1, 3}, // alpha shape + {1, 1, 1, 3}, // alpha shape {0.0f, 1.0f, 2.0f}, // alpha values { 0.0f, 0.0f, 0.0f, // Row 1, Column 1 @@ -169,7 +169,7 @@ TF_LITE_MICRO_TEST(FloatPreluActivationsOpTest) { 0.0f, -1.0f, -2.0f, // Row 2, Column 1 0.0f, -2.0f, -4.0f, // Row 1, Column 2 }, - {4, 1, 2, 2, 3}, // output shape + {1, 2, 2, 3}, // output shape output_data); } @@ -182,13 +182,13 @@ TF_LITE_MICRO_TEST(QuantizedPreluActivationsOpTest) { const int output_dims_count = 12; uint8_t output_data[output_dims_count]; tflite::testing::TestPreluQuantized( - {4, 1, 2, 2, 3}, // input shape + {1, 2, 2, 3}, // input shape {F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax), F2Q(-0.25f, kMin, kMax), F2Q(-0.25f, kMin, kMax), F2Q(-0.25f, kMin, kMax)}, - kMin, kMax, {3, 1, 1, 3}, // alpha shape + kMin, kMax, {1, 1, 1, 3}, // alpha shape {F2Q(0.0f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(-0.5f, kMin, kMax)}, kMin, kMax, {F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), @@ -196,7 +196,7 @@ TF_LITE_MICRO_TEST(QuantizedPreluActivationsOpTest) { F2Q(0.0f, kMin, kMax), F2Q(-0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(-0.125f, kMin, kMax), F2Q(0.125f, kMin, kMax)}, - {4, 1, 2, 2, 3}, // output shape + {1, 2, 2, 3}, // output shape kMin, kMax, output_data); } diff --git a/tensorflow/lite/testing/op_tests/prelu.py b/tensorflow/lite/testing/op_tests/prelu.py index 480736a76fe..bc5875739ed 100644 --- a/tensorflow/lite/testing/op_tests/prelu.py +++ b/tensorflow/lite/testing/op_tests/prelu.py @@ -86,7 +86,7 @@ def make_prelu_tests(options): alpha_shape.append(1 if dim in shared_axes else input_shape[dim]) alpha_values = create_tensor_data( - np.float32, alpha_shape, min_value=-1, max_value=1) + np.float32, alpha_shape, min_value=-5, max_value=5) # There should be only 1 trainable variable tensor. variables = tf.compat.v1.all_variables() diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc index 94093ef8698..e3dbd0b839f 100644 --- a/tensorflow/lite/tools/optimize/operator_property.cc +++ b/tensorflow/lite/tools/optimize/operator_property.cc @@ -818,7 +818,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, case BuiltinOperator_PRELU: property.inputs = {{0, {}}, {1, {}}}; property.outputs = {{0, {}}}; - property.restrict_same_input_output_scale = true; + property.restrict_same_input_output_scale = false; property.version = 1; break; case BuiltinOperator_LEAKY_RELU: