Allow kernels to take different scales for prelu

PiperOrigin-RevId: 310672881
Change-Id: Ibb3044112cf3136892e1b509d18e2585a67384db
This commit is contained in:
Renjie Liu 2020-05-08 19:50:57 -07:00 committed by TensorFlower Gardener
parent 4909933889
commit 5e5b33c233
8 changed files with 58 additions and 42 deletions
tensorflow
compiler/mlir/lite/ir
lite
kernels
micro/kernels
testing/op_tests
tools/optimize

View File

@ -2269,8 +2269,7 @@ def TFL_PReluOp : TFL_Op<"prelu", [
TFL_OperandIsUnrankedPred<1>,
CPred<"$_op.getOperand(0).getType().cast<ShapedType>().getRank() == "
"$_op.getOperand(1).getType().cast<ShapedType>().getRank() "
"+ 1">]>>,
SameOperandsAndResultsScale]> {
"+ 1">]>>]> {
let summary = "Parameterized Relu operator";
let description = [{

View File

@ -84,8 +84,10 @@ struct LeakyReluOpData : public OpData {
};
struct PreluOpData : public OpData {
int32_t output_multiplier = 0;
int output_shift = 0;
int32_t output_multiplier_1 = 0;
int32_t output_shift_1 = 0;
int32_t output_multiplier_2 = 0;
int32_t output_shift_2 = 0;
};
struct HardSwishData {
@ -664,7 +666,6 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
output->type == kTfLiteInt16) {
// This scale check is actually needed for quantized path:
// prelu(x) = x if x >= 0 else x * alpha.
// So if we translate that for quantized computation:
//
@ -676,19 +677,19 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
// ouput_q = (input_q - input_zp) * input_scale / output_scale + output_q
// else:
// output_q = (input_q - input_zp) * (alpha_q - alpha_zp) * input_scale
// * alpha_scale / output_scale +output_q
// * alpha_scale / output_scale + output_q
//
// So we have two float values which we need to translate into multiplier
// shift languages.
// For simplicity & efficiency, if we make sure input_scale
// & output_scale are the same, we only need to translate the latter one
// into multiplier & shift format.
TF_LITE_ENSURE(context,
std::abs(input->params.scale - output->params.scale) < 1e-4);
double real_multiplier =
// So for input_q - input_zp >= 0:
// output real multiplier 1 is input_scale / output_scale;
// for input_q - input_zp < 0:
// output real multiplier 2 is input_scale * alpha_scale/ output_scale.
double real_multiplier_1 = input->params.scale / output->params.scale;
double real_multiplier_2 =
input->params.scale * alpha->params.scale / output->params.scale;
QuantizeMultiplierSmallerThanOneExp(
real_multiplier, &data->output_multiplier, &data->output_shift);
QuantizeMultiplier(real_multiplier_1, &data->output_multiplier_1,
&data->output_shift_1);
QuantizeMultiplier(real_multiplier_2, &data->output_multiplier_2,
&data->output_shift_2);
}
// PRelu (parameteric Relu) shares the same alpha value on "shared axis".
@ -1171,8 +1172,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
op_params.input_offset = -input->params.zero_point;
op_params.alpha_offset = -alpha->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
op_params.output_multiplier_1 = data->output_multiplier_1;
op_params.output_shift_1 = data->output_shift_1;
op_params.output_multiplier_2 = data->output_multiplier_2;
op_params.output_shift_2 = data->output_shift_2;
reference_ops::BroadcastPrelu4DSlow(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
@ -1184,8 +1187,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
op_params.input_offset = -input->params.zero_point;
op_params.alpha_offset = -alpha->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
op_params.output_multiplier_1 = data->output_multiplier_1;
op_params.output_shift_1 = data->output_shift_1;
op_params.output_multiplier_2 = data->output_multiplier_2;
op_params.output_shift_2 = data->output_shift_2;
reference_ops::BroadcastPrelu4DSlow(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(alpha), GetTensorData<int8_t>(alpha),

View File

@ -48,14 +48,16 @@ inline void BroadcastPrelu4DSlow(
params.input_offset + input_data[input_index];
int32 output_value;
if (input_value >= 0) {
output_value = input_value;
output_value = MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_1, params.output_shift_1);
} else {
auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
const int32 alpha_value =
params.alpha_offset + alpha_data[alpha_index];
output_value = MultiplyByQuantizedMultiplier(
input_value * alpha_value, params.output_multiplier,
params.output_shift);
input_value * alpha_value, params.output_multiplier_2,
params.output_shift_2);
}
output_value += params.output_offset;

View File

@ -972,8 +972,10 @@ struct PreluParams {
int32 input_offset;
int32 alpha_offset;
int32 output_offset;
int32 output_multiplier;
int output_shift;
int32 output_multiplier_1;
int32 output_shift_1;
int32 output_multiplier_2;
int32 output_shift_2;
};
struct PoolParams {

View File

@ -64,14 +64,20 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
const TfLiteTensor* alpha = GetInput(context, node, 1);
TfLiteTensor* output = GetOutput(context, node, 0);
int32_t output_multiplier = 0;
int output_shift = 0;
int32_t output_multiplier_1 = 0;
int output_shift_1 = 0;
int32_t output_multiplier_2 = 0;
int output_shift_2 = 0;
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
double real_multiplier = static_cast<double>(input->params.scale) *
static_cast<double>(alpha->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
&output_shift);
double real_multiplier_1 = static_cast<double>(input->params.scale) *
static_cast<double>(output->params.scale);
double real_multiplier_2 = static_cast<double>(input->params.scale) *
static_cast<double>(alpha->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier_1, &output_multiplier_1,
&output_shift_1);
QuantizeMultiplier(real_multiplier_2, &output_multiplier_2,
&output_shift_2);
}
switch (input->type) {
case kTfLiteFloat32: {
@ -86,8 +92,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
op_params.input_offset = -input->params.zero_point;
op_params.alpha_offset = -alpha->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier = output_multiplier;
op_params.output_shift = output_shift;
op_params.output_multiplier_1 = output_multiplier_1;
op_params.output_shift_1 = output_shift_1;
op_params.output_multiplier_2 = output_multiplier_2;
op_params.output_shift_2 = output_shift_2;
reference_ops::BroadcastPrelu4DSlow(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),

View File

@ -154,14 +154,14 @@ TF_LITE_MICRO_TESTS_BEGIN
TF_LITE_MICRO_TEST(FloatPreluActivationsOpTest) {
const int output_dims_count = 12;
float output_data[output_dims_count];
tflite::testing::TestPreluFloat({4, 1, 2, 2, 3}, // input shape
tflite::testing::TestPreluFloat({1, 2, 2, 3}, // input shape
{
0.0f, 0.0f, 0.0f, // Row 1, Column 1
1.0f, 1.0f, 1.0f, // Row 1, Column 2
-1.0f, -1.0f, -1.0f, // Row 2, Column 1
-2.0f, -2.0f, -2.0f, // Row 1, Column 2
},
{3, 1, 1, 3}, // alpha shape
{1, 1, 1, 3}, // alpha shape
{0.0f, 1.0f, 2.0f}, // alpha values
{
0.0f, 0.0f, 0.0f, // Row 1, Column 1
@ -169,7 +169,7 @@ TF_LITE_MICRO_TEST(FloatPreluActivationsOpTest) {
0.0f, -1.0f, -2.0f, // Row 2, Column 1
0.0f, -2.0f, -4.0f, // Row 1, Column 2
},
{4, 1, 2, 2, 3}, // output shape
{1, 2, 2, 3}, // output shape
output_data);
}
@ -182,13 +182,13 @@ TF_LITE_MICRO_TEST(QuantizedPreluActivationsOpTest) {
const int output_dims_count = 12;
uint8_t output_data[output_dims_count];
tflite::testing::TestPreluQuantized(
{4, 1, 2, 2, 3}, // input shape
{1, 2, 2, 3}, // input shape
{F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax),
F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax),
F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax),
F2Q(-0.25f, kMin, kMax), F2Q(-0.25f, kMin, kMax),
F2Q(-0.25f, kMin, kMax)},
kMin, kMax, {3, 1, 1, 3}, // alpha shape
kMin, kMax, {1, 1, 1, 3}, // alpha shape
{F2Q(0.0f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(-0.5f, kMin, kMax)},
kMin, kMax,
{F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax),
@ -196,7 +196,7 @@ TF_LITE_MICRO_TEST(QuantizedPreluActivationsOpTest) {
F2Q(0.0f, kMin, kMax), F2Q(-0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax),
F2Q(0.0f, kMin, kMax), F2Q(-0.125f, kMin, kMax),
F2Q(0.125f, kMin, kMax)},
{4, 1, 2, 2, 3}, // output shape
{1, 2, 2, 3}, // output shape
kMin, kMax, output_data);
}

View File

@ -86,7 +86,7 @@ def make_prelu_tests(options):
alpha_shape.append(1 if dim in shared_axes else input_shape[dim])
alpha_values = create_tensor_data(
np.float32, alpha_shape, min_value=-1, max_value=1)
np.float32, alpha_shape, min_value=-5, max_value=5)
# There should be only 1 trainable variable tensor.
variables = tf.compat.v1.all_variables()

View File

@ -818,7 +818,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
case BuiltinOperator_PRELU:
property.inputs = {{0, {}}, {1, {}}};
property.outputs = {{0, {}}};
property.restrict_same_input_output_scale = true;
property.restrict_same_input_output_scale = false;
property.version = 1;
break;
case BuiltinOperator_LEAKY_RELU: