Merge pull request #35996 from psunn:int16_softmax

PiperOrigin-RevId: 306740044
Change-Id: I28304bf393b4444d57a76642400272edfa2502ed
This commit is contained in:
TensorFlower Gardener 2020-04-15 16:17:49 -07:00
commit fd76504f5f
5 changed files with 336 additions and 8 deletions

View File

@ -60,7 +60,13 @@ struct OpData {
struct SoftmaxOpData {
struct SoftmaxParams params = {};
float table[256];
float table[256]{};
const int size_of_lut = 513;
int16_t exp_lut[513]{}; // int16 LUT for exp(x), where x uniform distributed
// between [-10.0 , 0.0]
int16_t one_over_one_plus_x_lut[513]{}; // int16 LUT for 1 / (1 + x), where
// x uniform distributed between
// [0.0 , 1.0]
};
struct LogSoftmaxOpData : public OpData {
@ -546,8 +552,9 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE(context,
input->type == kTfLiteInt8 || input->type == kTfLiteUInt8);
TF_LITE_ENSURE(context, input->type == kTfLiteInt8 ||
input->type == kTfLiteUInt8 ||
input->type == kTfLiteInt16);
} else {
TF_LITE_ENSURE_EQ(context, input->type, output->type);
}
@ -562,6 +569,28 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
data->params.scale = output->params.scale;
}
if (input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
data->params.exp_lut = data->exp_lut;
// exp LUT only used on nagative values
// we consider exp(-10.0) is insignificant to accumulation
gen_lut([](double value) { return std::exp(value); }, -10.0, 0.0,
data->params.exp_lut, data->size_of_lut);
data->params.one_over_one_plus_x_lut = data->one_over_one_plus_x_lut;
gen_lut([](double value) { return 1.0 / (1.0 + value); }, 0.0, 1.0,
data->params.one_over_one_plus_x_lut, data->size_of_lut);
data->params.zero_point = output->params.zero_point;
data->params.scale = output->params.scale;
double input_scale_beta_rescale =
input->params.scale * params->beta /
(10.0 / 65535.0); // scale the input_diff such that [-65535, 0]
// correspond to [-10.0, 0.0]
QuantizeMultiplier(input_scale_beta_rescale, &data->params.input_multiplier,
&data->params.input_left_shift);
}
return context->ResizeTensor(context, output,
TfLiteIntArrayCopy(input->dims));
}
@ -945,6 +974,25 @@ TfLiteStatus SoftmaxQuantized(TfLiteContext* context, const TfLiteTensor* input,
return kTfLiteOk;
}
template <>
TfLiteStatus SoftmaxQuantized<int16, int16>(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
SoftmaxOpData* data) {
if (NumDimensions(input) >= 1 && NumDimensions(input) <= 4) {
reference_ops::SoftmaxInt16(
data->params, GetTensorShape(input), GetTensorData<int16_t>(input),
GetTensorShape(output), GetTensorData<int16_t>(output));
return kTfLiteOk;
} else {
TF_LITE_KERNEL_LOG(context,
"Only 1D, 2D, 3D and 4D tensors supported for int16 "
"input with int16 output, got %dD.",
NumDimensions(input));
return kTfLiteError;
}
}
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
SoftmaxOpData* data = reinterpret_cast<SoftmaxOpData*>(node->user_data);
@ -987,12 +1035,15 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
}
}
case kTfLiteInt16: {
return SoftmaxQuantized<int16_t, int16_t>(context, input, output, data);
}
default:
TF_LITE_KERNEL_LOG(
context,
"Only float32, uint8_t and int8_t are supported currently, got %s.",
TfLiteTypeGetName(input->type));
TF_LITE_KERNEL_LOG(context,
"Only float32, uint8_t, Int8_t, Int16_t are supported "
"currently, got %s.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}

View File

@ -85,7 +85,16 @@ class BaseActivationsOpModel : public SingleOpModel {
output_ = AddOutput({TensorType_UINT8, {}, 0, 0, 1. / 256});
} else if (output_type == TensorType_INT8) {
output_ = AddOutput({TensorType_INT8, {}, 0, 0, 1. / 256, -128});
} else if (output_type == TensorType_INT16) {
} else if (input.type == TensorType_INT16 &&
output_type == TensorType_INT16) {
output_ = AddOutput({TensorType_INT16,
{},
0,
0,
1.0f / (std::numeric_limits<int16_t>::max() + 1),
0});
} else if (input.type != TensorType_INT16 &&
output_type == TensorType_INT16) {
output_ = AddOutput({TensorType_INT16, {}, 0, 0, 1. / 32768, -16384});
} else {
output_ = AddOutput({output_type, {}});
@ -1040,6 +1049,149 @@ TEST(QuantizedActivationsOpTest, Softmax1DInt8) {
kQuantizedTolerance)));
}
// Test quantized softmax with int16 input and output. With the same input as in
// QuantizedActivationsOpTest.Softmax2D, the dequantized output is identical.
TEST(QuantizedActivationsOpTest, Softmax1DInt16) {
QuantizedActivationsOpModel m(1,
/*input=*/{TensorType_INT16, {3}, -3, 3},
/*output_type-*/ TensorType_INT16);
m.SetInput<int16_t>({1, 2, 3});
m.Invoke();
EXPECT_THAT(
m.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear({0.0900269, 0.2447285, 0.66524096},
kQuantizedToleranceInt16)));
}
TEST(QuantizedActivationsOpTest, Softmax1DInt16ZeroElement) {
QuantizedActivationsOpModel m(0.1,
/*input=*/{TensorType_INT16, {1}, -1, 1},
TensorType_INT16);
m.SetInput<int16_t>({0});
m.Invoke();
EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear({1}, kQuantizedToleranceInt16)));
}
TEST(QuantizedActivationsOpTest, Softmax2DInt16) {
QuantizedActivationsOpModel m(0.1,
/*input=*/{TensorType_INT16, {2, 4}, -10, 10},
TensorType_INT16);
m.SetInput<int16_t>({
0, -6, 2, 4, //
3, -2, 10, 1, //
});
m.Invoke();
EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear(
{
.23463, .12877, .28658, .35003, //
.22528, .13664, .45365, .18443, //
},
kQuantizedToleranceInt16)));
// Same input, but a different shape.
QuantizedActivationsOpModel m2(0.1,
/*input=*/{TensorType_INT16, {4, 2}, -10, 10},
TensorType_INT16);
m2.SetInput<int16_t>({
0, -6, //
2, 4, //
3, -2, //
10, 1, //
});
m2.Invoke();
EXPECT_THAT(m2.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear(
{
0.645656, 0.354344, //
0.450166, 0.549834, //
0.622459, 0.377541, //
0.710949, 0.28905, //
},
kQuantizedToleranceInt16)));
}
TEST(QuantizedActivationsOpTest, Softmax3DInt16) {
QuantizedActivationsOpModel m(
1,
/*input=*/{TensorType_INT16, {1, 2, 4}, -10, 10}, TensorType_INT16);
m.SetInput<int16_t>({
0, -6, 2, 4, // depth = 0
3, -2, 10, 1, // depth = 1
});
m.Invoke();
EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear(
{
.0158756, .000039, .1173, .866779, //
.00091, .0000061, .998959, .000123, //
},
kQuantizedTolerance)));
// Same input, but a different shape.
QuantizedActivationsOpModel m2(
1,
/*input=*/{TensorType_INT16, {4, 1, 2}, -10, 10}, TensorType_INT16);
m2.SetInput<int16_t>({
0, -6, //
2, 4, //
3, -2, //
10, 1, //
});
m2.Invoke();
EXPECT_THAT(m2.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear(
{
0.997527, 0.0024726, //
0.11920292, 0.88079707, //
0.99330715, 0.00669285, //
0.999876605, 0.000123395, //
},
kQuantizedTolerance)));
}
// Test quantized softmax with int16 input and output. With the same input as in
// QuantizedActivationsOpTest.Softmax4D, the dequantized output is identical.
TEST(QuantizedActivationsOpTest, Softmax4DInt16) {
QuantizedActivationsOpModel m(
0.1,
/*input=*/{TensorType_INT16, {1, 2, 1, 4}, -10, 10}, TensorType_INT16);
m.SetInput<int16_t>({
0, -6, 2, 4, // depth = 0
3, -2, 10, 1, // depth = 1
});
m.Invoke();
EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear(
{
.23463, .12877, .28658, .35003, //
.22528, .13664, .45365, .18443, //
},
kQuantizedToleranceInt16)));
// Same input, but a different shape.
QuantizedActivationsOpModel m2(
0.1,
/*input=*/{TensorType_INT16, {4, 1, 1, 2}, -10, 10}, TensorType_INT16);
m2.SetInput<int16_t>({
0, -6, //
2, 4, //
3, -2, //
10, 1, //
});
m2.Invoke();
EXPECT_THAT(m2.GetDequantizedOutput<int16_t>(),
ElementsAreArray(ArrayFloatNear(
{
0.645656, 0.354344, //
0.450166, 0.549834, //
0.622459, 0.377541, //
0.710949, 0.28905, //
},
kQuantizedToleranceInt16)));
}
// Test quantized softmax with int8 input and int16 output. With the same input
// as in QuantizedActivationsOpTest.Softmax1D, the dequantized output is
// identical.

View File

@ -21,6 +21,8 @@ limitations under the License.
#endif
#endif
#include <functional>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
#include "tensorflow/lite/kernels/internal/types.h"
@ -195,6 +197,49 @@ inline int CountLeadingSignBits(T integer_input) {
#endif
}
// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
// softmax
inline void gen_lut(const std::function<double(double)>& func, double min,
double max, int16_t* table, const int num) {
// size of table should equal to num + 1
// last element only for slope calculation
double step = (max - min) / (num - 1);
double half_step = step / 2.0;
for (int i = 0; i < num - 1; i++) {
double sample_val = std::round(func(min + i * step) * 32768.0);
double midpoint_interp_val =
std::round((func(min + (i + 1) * step) * 32768.0 +
std::round(func(min + i * step) * 32768.0)) /
2.0);
double midpoint_val =
std::round(func(min + i * step + half_step) * 32768.0);
double midpoint_err = midpoint_interp_val - midpoint_val;
double bias = std::round(midpoint_err / 2.0);
table[i] = std::min(std::max(sample_val - bias, -32768.0), 32767.0);
}
table[num - 1] =
std::min(std::max(std::round(func(max) * 32768.0), -32768.0), 32767.0);
}
// int16 func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
static int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
// 512 base value, lut[513] only for calculate slope
uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
assert(index < 512 && "LUT index out of range.");
int16_t offset = value & 0x7f;
// base and slope are Q0.15
int16_t base = lut[index];
int16_t slope = lut[index + 1] - lut[index];
// Q0.15 * Q0.7 = Q0.22
// Round and convert from Q0.22 to Q0.15
int32_t delta = (static_cast<int32_t>(slope) * offset + 64) >> 7;
// Q0.15 + Q0.15
return base + delta;
}
// Table of sigmoid(i/24) at 0.16 format - 256 elements.
// We use combined sigmoid and tanh look-up table, since

View File

@ -16,6 +16,7 @@ limitations under the License.
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
#include <limits>
#include <vector>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
@ -142,6 +143,83 @@ inline void Softmax(const SoftmaxParams& params,
}
}
// Quantized softmax with int16 input and int16 output.
inline void SoftmaxInt16(const SoftmaxParams& params,
const RuntimeShape& input_shape,
const int16_t* input_data,
const RuntimeShape& output_shape,
int16_t* output_data) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
for (int i = 0; i < outer_size; ++i) {
// Find the largest element
int16_t max_in_row = std::numeric_limits<int16_t>::min();
for (int c = 0; c < depth; ++c) {
max_in_row = std::max(max_in_row, input_data[i * depth + c]);
}
// Compute exp(input - max_input)
std::vector<int16_t> exp_result_Q015(depth);
for (int c = 0; c < depth; ++c) {
int32_t input_diff = input_data[i * depth + c] - max_in_row;
// scale the input_diff such that [-65535, 0] correspond to [-10.0, 0.0]
int32_t scaled_diff = MultiplyByQuantizedMultiplier(
input_diff, params.input_multiplier, params.input_left_shift);
// recenter to [-32768, 32767]
int32_t sym_scaled_diff = scaled_diff + 32767;
int16_t sat_sym_scaled_diff =
std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
static_cast<int32_t>(32767));
// apply the exp() LUT activation function
exp_result_Q015[c] =
generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
}
// sum_of_exps is a Q16.15 fixed point format.
int32_t sum_of_exps = 0;
for (int c = 0; c < depth; ++c) {
// Q16.15 + Q0.15
sum_of_exps += exp_result_Q015[c];
}
// Compute the reciprocal 1/sum_of_exps
uint8_t headroom_plus_one =
CountLeadingZeros(static_cast<uint32_t>(sum_of_exps));
int32_t shifted_sum =
((static_cast<int64_t>(sum_of_exps) << (headroom_plus_one - 1)) +
(1 << 13)) >>
14;
// since the LUT computes 1/(1 + x) we need to first compute x = (sum - 1).
// also, the LUT expects a symmetrical input, so we must also recenter x
// from [0, 65535] to [-32768, 32767].
int32_t sym_shifted_sum = shifted_sum + (-((1 << 15) + (1 << 16)));
int16_t sat_sym_shifted_sum = static_cast<int16_t>(
std::min(std::max(sym_shifted_sum, static_cast<int32_t>(-32768)),
static_cast<int32_t>(32767)));
// apply 1/(1 + x) LUT activation function
int16_t reciprocal_scale_Q015 = generic_int16_table_lookup(
sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
// Rescale the exp_result with reciprocal
// range of output is [0, 32767] correspond to [0.0, 1.0]
for (int c = 0; c < depth; ++c) {
uint8_t right_shift = 31 - headroom_plus_one;
int64_t round = 1 << (right_shift - 1);
int32_t result = (static_cast<int64_t>(exp_result_Q015[c]) *
static_cast<int64_t>(reciprocal_scale_Q015) +
round) >>
right_shift;
output_data[i * depth + c] = static_cast<int16_t>(
std::min(std::max(result, static_cast<int32_t>(0)),
static_cast<int32_t>(32767)));
}
}
}
} // namespace reference_ops
} // namespace tflite

View File

@ -1030,6 +1030,8 @@ struct SoftmaxParams {
int32_t zero_point;
float scale;
float* table;
int16_t* exp_lut;
int16_t* one_over_one_plus_x_lut;
};
struct SpaceToBatchParams {