16-bit reference kernel operators MAX_POOL_2D and AVERAGE_POOL_2D

This commit is contained in:
Elena Zhelezina 2020-01-22 14:01:14 +00:00
parent 1fb8fe3793
commit 756cc8f62d
3 changed files with 239 additions and 1 deletions

View File

@ -135,6 +135,121 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
} }
} }
inline void AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const int16* input_data,
const RuntimeShape& output_shape, int16* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int32 acc = 0;
int filter_count = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
acc +=
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
filter_count++;
}
}
// Round to the closest integer value.
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
: (acc - filter_count / 2) / filter_count;
acc = std::max(acc, params.quantized_activation_min);
acc = std::min(acc, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int16>(acc);
}
}
}
}
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
const int16* input_data, const RuntimeShape& output_shape,
int16* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_GE(params.quantized_activation_min,
std::numeric_limits<int16_t>::min());
TFLITE_DCHECK_LE(params.quantized_activation_max,
std::numeric_limits<int16_t>::max());
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int16_t max = std::numeric_limits<int16_t>::lowest();
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
max = std::max(
max,
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
}
}
max = std::max<int16_t>(max, params.quantized_activation_min);
max = std::min<int16_t>(max, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int16_t>(max);
}
}
}
}
}
} // namespace reference_integer_ops } // namespace reference_integer_ops
} // namespace tflite } // namespace tflite

View File

@ -197,6 +197,32 @@ void AverageEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
#undef TF_LITE_AVERAGE_POOL #undef TF_LITE_AVERAGE_POOL
} }
template <KernelType kernel_type>
void AverageEvalQuantizedInt16(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, OpData* data,
const TfLiteTensor* input,
TfLiteTensor* output) {
int32_t activation_min;
int32_t activation_max;
CalculateActivationRangeQuantized(context, params->activation, output,
&activation_min, &activation_max);
#define TF_LITE_AVERAGE_POOL(type) \
tflite::PoolParams op_params; \
op_params.stride_height = params->stride_height; \
op_params.stride_width = params->stride_width; \
op_params.filter_height = params->filter_height; \
op_params.filter_width = params->filter_width; \
op_params.padding_values.height = data->padding.height; \
op_params.padding_values.width = data->padding.width; \
op_params.quantized_activation_min = activation_min; \
op_params.quantized_activation_max = activation_max; \
type::AveragePool(op_params, GetTensorShape(input), \
GetTensorData<int16_t>(input), GetTensorShape(output), \
GetTensorData<int16_t>(output))
TF_LITE_AVERAGE_POOL(reference_integer_ops);
#undef TF_LITE_AVERAGE_POOL
}
template <KernelType kernel_type> template <KernelType kernel_type>
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, OpData* data, TfLitePoolParams* params, OpData* data,
@ -282,6 +308,31 @@ void MaxEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
#undef TF_LITE_MAX_POOL #undef TF_LITE_MAX_POOL
} }
template <KernelType kernel_type>
void MaxEvalQuantizedInt16(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
int32_t activation_min;
int32_t activation_max;
CalculateActivationRangeQuantized(context, params->activation, output,
&activation_min, &activation_max);
#define TF_LITE_MAX_POOL(type) \
tflite::PoolParams op_params; \
op_params.stride_height = params->stride_height; \
op_params.stride_width = params->stride_width; \
op_params.filter_height = params->filter_height; \
op_params.filter_width = params->filter_width; \
op_params.padding_values.height = data->padding.height; \
op_params.padding_values.width = data->padding.width; \
op_params.quantized_activation_min = activation_min; \
op_params.quantized_activation_max = activation_max; \
type::MaxPool(op_params, GetTensorShape(input), \
GetTensorData<int16_t>(input), GetTensorShape(output), \
GetTensorData<int16_t>(output))
TF_LITE_MAX_POOL(reference_integer_ops);
#undef TF_LITE_MAX_POOL
}
template <KernelType kernel_type> template <KernelType kernel_type>
void L2EvalFloat(TfLiteContext* context, TfLiteNode* node, void L2EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, OpData* data, TfLitePoolParams* params, OpData* data,
@ -330,6 +381,10 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
AverageEvalQuantizedInt8<kernel_type>(context, node, params, data, input, AverageEvalQuantizedInt8<kernel_type>(context, node, params, data, input,
output); output);
break; break;
case kTfLiteInt16:
AverageEvalQuantizedInt16<kernel_type>(context, node, params, data, input,
output);
break;
default: default:
context->ReportError(context, "Type %d not currently supported.", context->ReportError(context, "Type %d not currently supported.",
input->type); input->type);
@ -357,6 +412,10 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
MaxEvalQuantizedInt8<kernel_type>(context, node, params, data, input, MaxEvalQuantizedInt8<kernel_type>(context, node, params, data, input,
output); output);
break; break;
case kTfLiteInt16:
MaxEvalQuantizedInt16<kernel_type>(context, node, params, data, input,
output);
break;
default: default:
context->ReportError(context, "Type %d not currently supported.", context->ReportError(context, "Type %d not currently supported.",
input->type); input->type);

View File

@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#include <cstdarg>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <cstdarg>
#include "tensorflow/lite/interpreter.h" #include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/register.h" #include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/kernels/test_util.h"
@ -96,6 +96,25 @@ class SymmetricQuantizedPoolingOpModel : public BasePoolingOpModel {
} }
}; };
class SymmetricQuantizedPoolingOpModel16 : public BasePoolingOpModel {
public:
using BasePoolingOpModel::BasePoolingOpModel;
void SetInput(std::initializer_list<float> data) {
QuantizeAndPopulate<int16_t>(input_, data);
}
void SetInput(const std::vector<float>& data) {
QuantizeAndPopulate<int16_t>(input_, data);
}
std::vector<int16_t> GetOutput() { return ExtractVector<int16_t>(output_); }
std::vector<float> GetDequantizedOutput() {
return Dequantize<int16_t>(ExtractVector<int16_t>(output_),
GetScale(output_), GetZeroPoint(output_));
}
};
// Replicate each entry in a vector n times along depth (innermost dimension). // Replicate each entry in a vector n times along depth (innermost dimension).
// The values are incremented by delta, creating ramps offset by each input // The values are incremented by delta, creating ramps offset by each input
// value. This is used to create simple and predicatable variation. // value. This is used to create simple and predicatable variation.
@ -398,6 +417,29 @@ TEST(QuantizedPoolingOpTest, AveragePoolLargeDepth) {
ReplicateDepthRamp(output_image_plane, depth, 1.f / 512.f), ReplicateDepthRamp(output_image_plane, depth, 1.f / 512.f),
1. / 32.f))); 1. / 32.f)));
} }
// Test quantized AveragePool with int16 input and output. The input is the same
// as the uint8 test QuantizedPoolingOpTest.AveragePool but with a scale of
// 1/4096 rather than 1/16.
TEST(QuantizedPoolingOpTest, SymmetricAveragePool16) {
const float ulp = (float)1 / (float)4096;
SymmetricQuantizedPoolingOpModel16 m(
BuiltinOperator_AVERAGE_POOL_2D,
/*input=*/{TensorType_INT16, {1, 2, 4, 1}, 0, 16 - ulp},
/*filter_width=*/2, /*filter_height=*/2,
/*output=*/{TensorType_INT16, {}, 0, 16 - ulp});
m.SetInput({
0, 6, 2, 4, //
3, 2, 10, 7, //
});
m.Invoke();
EXPECT_THAT(m.GetDequantizedOutput(),
ElementsAreArray(ArrayFloatNear({2.75, 5.75})));
EXPECT_THAT(m.GetOutput(),
ElementsAreArray({(44 - 128) << 8, (92 - 128) << 8}));
}
// Test quantized AveragePool with int8 input and output. The input is the same // Test quantized AveragePool with int8 input and output. The input is the same
// as the uint8 test QuantizedPoolingOpTest.AveragePool. The float output is // as the uint8 test QuantizedPoolingOpTest.AveragePool. The float output is
// identical to uint8 test and quantized output is identical to uint8 test with // identical to uint8 test and quantized output is identical to uint8 test with
@ -858,6 +900,28 @@ TEST(QuantizedInt8PoolingOpTest, MaxPool) {
EXPECT_THAT(m.GetOutput(), ElementsAreArray({96 - 128, 160 - 128})); EXPECT_THAT(m.GetOutput(), ElementsAreArray({96 - 128, 160 - 128}));
} }
TEST(QuantizedInt8PoolingOpTest16, MaxPool) {
// Choose the input ranges carefully so that the dequantized output matches
// the results of the float model above.
// Input Range[0, 16-(1/4096)] --> [Scale{(1/4096)}, zero_point{-32768}]
const float ulp = (float)1 / (float)4096;
SymmetricQuantizedPoolingOpModel16 m(
BuiltinOperator_MAX_POOL_2D,
/*input=*/{TensorType_INT16, {1, 2, 4, 1}, 0, 16 - ulp},
/*filter_width=*/2, /*filter_height=*/2,
/*output=*/{TensorType_INT16, {}, 0, 16 - ulp});
m.SetInput({
0, 6, 2, 4, //
3, 2, 10, 7, //
});
m.Invoke();
EXPECT_THAT(m.GetDequantizedOutput(),
ElementsAreArray(ArrayFloatNear({6, 10})));
EXPECT_THAT(m.GetOutput(),
ElementsAreArray({(96 - 128) << 8, (160 - 128) << 8}));
}
TEST(QuantizedInt8PoolingOpTest, MaxPoolActivationRelu) { TEST(QuantizedInt8PoolingOpTest, MaxPoolActivationRelu) {
// Choose the input ranges carefully so that the dequantized output matches // Choose the input ranges carefully so that the dequantized output matches
// the results of the float model above. // the results of the float model above.