Introduce rounding into depthwise conv reference / test.

PiperOrigin-RevId: 232504607
This commit is contained in:
A. Unique TensorFlower 2019-02-05 09:51:21 -08:00 committed by TensorFlower Gardener
parent 32b84541e4
commit f6a39be9f4
2 changed files with 186 additions and 92 deletions

View File

@ -57,7 +57,8 @@ enum class CoverageExtension {
// The TestParam structure below is the preferred parameterization of tests. A // The TestParam structure below is the preferred parameterization of tests. A
// tuple version is defined in order to support value-parameterized tests. // tuple version is defined in order to support value-parameterized tests.
typedef std::tuple<DepthwiseConvInvocation, int, bool, bool, bool> typedef std::tuple<DepthwiseConvInvocation, int, bool, bool, bool,
DepthwiseConvOutputRounding, bool>
TestParamTuple; TestParamTuple;
struct TestParam { struct TestParam {
@ -68,7 +69,9 @@ struct TestParam {
tests_to_run(::testing::get<1>(param_tuple)), tests_to_run(::testing::get<1>(param_tuple)),
test_stride(::testing::get<2>(param_tuple)), test_stride(::testing::get<2>(param_tuple)),
test_pad(::testing::get<3>(param_tuple)), test_pad(::testing::get<3>(param_tuple)),
test_depth_multiplier(::testing::get<4>(param_tuple)) {} test_depth_multiplier(::testing::get<4>(param_tuple)),
output_rounding(::testing::get<5>(param_tuple)),
loose_tolerance(::testing::get<6>(param_tuple)) {}
static std::string TestNameSuffix( static std::string TestNameSuffix(
const ::testing::TestParamInfo<TestParamTuple>& info) { const ::testing::TestParamInfo<TestParamTuple>& info) {
@ -84,6 +87,9 @@ struct TestParam {
bool test_stride = false; bool test_stride = false;
bool test_pad = false; bool test_pad = false;
bool test_depth_multiplier = false; bool test_depth_multiplier = false;
DepthwiseConvOutputRounding output_rounding =
DepthwiseConvOutputRounding::kNone;
bool loose_tolerance = false;
}; };
inline void DispatchDepthwiseConv( inline void DispatchDepthwiseConv(
@ -183,9 +189,30 @@ int TestOneDepthwiseConvWithGivenOutputShift(
op_params.output_offset = output_offset; op_params.output_offset = output_offset;
op_params.output_multiplier = output_multiplier; op_params.output_multiplier = output_multiplier;
op_params.output_shift = -output_shift; op_params.output_shift = -output_shift;
reference_ops::DepthwiseConv(op_params, input_shape, input_data, filter_shape, switch (test_param.output_rounding) {
filter_data, bias_shape, bias_data, output_shape, case DepthwiseConvOutputRounding::kUpward:
reference_output_data.data()); reference_ops::DepthwiseConvBasicKernel<
DepthwiseConvOutputRounding::kAwayFromZero>::Run(op_params,
input_shape,
input_data,
filter_shape,
filter_data,
bias_shape,
bias_data,
output_shape,
reference_output_data
.data());
break;
case DepthwiseConvOutputRounding::kAwayFromZero:
reference_ops::DepthwiseConv(
op_params, input_shape, input_data, filter_shape, filter_data,
bias_shape, bias_data, output_shape, reference_output_data.data());
break;
case DepthwiseConvOutputRounding::kNone:
default:
EXPECT_NE(test_param.output_rounding, DepthwiseConvOutputRounding::kNone);
break;
}
DispatchDepthwiseConv(test_param, op_params, input_shape, input_data, DispatchDepthwiseConv(test_param, op_params, input_shape, input_data,
filter_shape, filter_data, bias_shape, bias_data, filter_shape, filter_data, bias_shape, bias_data,
output_shape, output_data.data()); output_shape, output_data.data());
@ -221,10 +248,10 @@ int TestOneDepthwiseConvWithGivenOutputShift(
// Normally we should require bit-for-bit exact results. Unfortunately a bug // Normally we should require bit-for-bit exact results. Unfortunately a bug
// in the Intel arm_neon_sse.h translation header that we use for x86 tests // in the Intel arm_neon_sse.h translation header that we use for x86 tests
// causes 1-bit inaccuracy in // causes 1-bit inaccuracy in the vqrdmulh_n_s32 intrinsic, which causes
// the vqrdmulh_n_s32 intrinsic, which causes off-by-1 errors in quantized // off-by-1 errors in quantized DepthwiseConv ops. So we have to live with a
// DepthwiseConv ops. So we have to live with a few off-by-one errors for now, // few off-by-one errors for now, yet still ensure that no more than a small
// yet still ensure that no more than a small minority of values are wrong. // minority of values are wrong.
EXPECT_LT(std::abs(mean_diff), mean_tolerance); EXPECT_LT(std::abs(mean_diff), mean_tolerance);
EXPECT_LT(mean_abs_diff, mean_tolerance); EXPECT_LT(mean_abs_diff, mean_tolerance);
EXPECT_LE(std::abs(median_diff), diff_median_tolerance); EXPECT_LE(std::abs(median_diff), diff_median_tolerance);
@ -482,16 +509,21 @@ bool TryTestOneNeonDot3x3(const TestParam& test_param,
dilation_width_factor, dilation_height_factor, padding_type); dilation_width_factor, dilation_height_factor, padding_type);
} }
void TestOneDepthwiseConv(DepthwiseConvInvocation forced_invocation) { void TestOneDepthwiseConv(DepthwiseConvInvocation forced_invocation,
DepthwiseConvOutputRounding output_rounding) {
TestParam test_param; TestParam test_param;
test_param.forced_invocation = forced_invocation; test_param.forced_invocation = forced_invocation;
test_param.output_rounding = output_rounding;
while (!TryTestOneDepthwiseConv(test_param, ParamsSpecialization::kNone)) { while (!TryTestOneDepthwiseConv(test_param, ParamsSpecialization::kNone)) {
} }
} }
void TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation forced_invocation) { void TestOneDepthwiseConv3x3Filter(
DepthwiseConvInvocation forced_invocation,
DepthwiseConvOutputRounding output_rounding) {
TestParam test_param; TestParam test_param;
test_param.forced_invocation = forced_invocation; test_param.forced_invocation = forced_invocation;
test_param.output_rounding = output_rounding;
while (!TryTestOneDepthwiseConv3x3Filter(test_param, while (!TryTestOneDepthwiseConv3x3Filter(test_param,
ParamsSpecialization::kNone)) { ParamsSpecialization::kNone)) {
} }
@ -505,7 +537,8 @@ void TestOneNeonDot3x3(const TestParam& test_param) {
TEST(TestDepthwiseConv, TestDepthwiseConv) { TEST(TestDepthwiseConv, TestDepthwiseConv) {
const int kTestsToRun = 10 * 1000; const int kTestsToRun = 10 * 1000;
for (int i = 0; i < kTestsToRun; i++) { for (int i = 0; i < kTestsToRun; i++) {
TestOneDepthwiseConv(DepthwiseConvInvocation::kNone); TestOneDepthwiseConv(DepthwiseConvInvocation::kNone,
DepthwiseConvOutputRounding::kAwayFromZero);
} }
} }
@ -513,14 +546,16 @@ TEST(TestDepthwiseConv, TestDepthwiseConv) {
TEST(TestDepthwiseConv, TestGenericKernel) { TEST(TestDepthwiseConv, TestGenericKernel) {
const int kTestsToRun = 10 * 1000; const int kTestsToRun = 10 * 1000;
for (int i = 0; i < kTestsToRun; i++) { for (int i = 0; i < kTestsToRun; i++) {
TestOneDepthwiseConv(DepthwiseConvInvocation::kUseGenericKernel); TestOneDepthwiseConv(DepthwiseConvInvocation::kUseGenericKernel,
DepthwiseConvOutputRounding::kAwayFromZero);
} }
} }
TEST(TestDepthwiseConv, TestKernel3x3Filter) { TEST(TestDepthwiseConv, TestKernel3x3Filter) {
const int kTestsToRun = 1000; const int kTestsToRun = 1000;
for (int i = 0; i < kTestsToRun; i++) { for (int i = 0; i < kTestsToRun; i++) {
TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kNone); TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kNone,
DepthwiseConvOutputRounding::kAwayFromZero);
} }
} }
@ -529,7 +564,8 @@ TEST(TestDepthwiseConv, TestKernel3x3Filter) {
TEST(TestDepthwiseConv, TestGenericKernel3x3Filter) { TEST(TestDepthwiseConv, TestGenericKernel3x3Filter) {
const int kTestsToRun = 100; const int kTestsToRun = 100;
for (int i = 0; i < kTestsToRun; i++) { for (int i = 0; i < kTestsToRun; i++) {
TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kUseGenericKernel); TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kUseGenericKernel,
DepthwiseConvOutputRounding::kAwayFromZero);
} }
} }
@ -537,7 +573,8 @@ TEST(TestDepthwiseConv, TestGenericKernel3x3Filter) {
TEST(TestDepthwiseConv, TestNeon3x3Filter) { TEST(TestDepthwiseConv, TestNeon3x3Filter) {
const int kTestsToRun = 3 * 1000; const int kTestsToRun = 3 * 1000;
for (int i = 0; i < kTestsToRun; i++) { for (int i = 0; i < kTestsToRun; i++) {
TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kUseNeon3x3); TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kUseNeon3x3,
DepthwiseConvOutputRounding::kAwayFromZero);
} }
} }
#endif #endif
@ -559,7 +596,9 @@ INSTANTIATE_TEST_SUITE_P(
Values(1000), // tests_to_run Values(1000), // tests_to_run
Bool(), // test_stride Bool(), // test_stride
Values(false), // test_pad Values(false), // test_pad
Values(false) // test_depth_multiplier Values(false), // test_depth_multiplier
Values(DepthwiseConvOutputRounding::kAwayFromZero), // output_rounding
Values(false) // loose_tolerance
), ),
TestParam::TestNameSuffix); TestParam::TestNameSuffix);
#endif #endif
@ -574,7 +613,9 @@ INSTANTIATE_TEST_SUITE_P(
Values(100), // tests_to_run Values(100), // tests_to_run
Bool(), // test_stride Bool(), // test_stride
Bool(), // test_pad Bool(), // test_pad
Bool() // test_depth_multiplier Bool(), // test_depth_multiplier
Values(DepthwiseConvOutputRounding::kUpward), // output_rounding
Values(false) // loose_tolerance
), ),
TestParam::TestNameSuffix); TestParam::TestNameSuffix);

View File

@ -44,6 +44,14 @@ enum class DepthwiseConvInvocation {
kUseIntrinsics3x3DotProduct, // 3x3 kernel using NEON intrinsics. kUseIntrinsics3x3DotProduct, // 3x3 kernel using NEON intrinsics.
}; };
// Category of depthwise convolution output rounding.
enum class DepthwiseConvOutputRounding {
kNone = 0, // Invalid: specific method must be specified.
kAwayFromZero, // Original method: exact halves rounded away from zero.
kUpward, // Halves towards +infinity: adds 0.5 before truncate.
// This is where a future kNearestEven would be placed.
};
// Category of depthwise convolution depth multiplication. // Category of depthwise convolution depth multiplication.
enum class DepthwiseConvDepthMultiplication { enum class DepthwiseConvDepthMultiplication {
kNoMultiplication = 0, // Depth multiplier = 1. kNoMultiplication = 0, // Depth multiplier = 1.
@ -52,88 +60,133 @@ enum class DepthwiseConvDepthMultiplication {
namespace reference_ops { namespace reference_ops {
template <DepthwiseConvOutputRounding output_rounding>
inline int32 DepthwiseConvRound(int32 x, int32 quantized_multiplier,
int shift) {
TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}
template <>
inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
int32 x, int32 quantized_multiplier, int shift) {
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}
template <>
inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
int32 x, int32 quantized_multiplier, int shift) {
using gemmlowp::SaturatingRoundingDoublingHighMul;
const int left_shift = shift > 0 ? shift : 0;
const int right_shift = shift > 0 ? 0 : -shift;
const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
quantized_multiplier) +
rounding_offset) >>
right_shift;
}
template <DepthwiseConvOutputRounding output_rounding>
struct DepthwiseConvBasicKernel {
static inline void Run(const DepthwiseParams& params,
const RuntimeShape& input_shape,
const uint8* input_data,
const RuntimeShape& filter_shape,
const uint8* filter_data,
const RuntimeShape& bias_shape, const int32* bias_data,
const RuntimeShape& output_shape, uint8* output_data) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32 input_offset = params.input_offset;
const int32 filter_offset = params.weights_offset;
const int32 output_offset = params.output_offset;
const int32 output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int b = 0; b < batches; ++b) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int ic = 0; ic < input_depth; ++ic) {
for (int m = 0; m < depth_multiplier; m++) {
const int oc = m + ic * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32 acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x =
in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// If the location is outside the bounds of the input image,
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
int32 input_val =
input_data[Offset(input_shape, b, in_y, in_x, ic)];
int32 filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, oc)];
acc += (filter_val + filter_offset) *
(input_val + input_offset);
}
}
}
if (bias_data) {
acc += bias_data[oc];
}
acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
output_shift);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
static_cast<uint8>(acc);
}
}
}
}
}
}
};
inline void DepthwiseConv( inline void DepthwiseConv(
const DepthwiseParams& params, const RuntimeShape& input_shape, const DepthwiseParams& params, const RuntimeShape& input_shape,
const uint8* input_data, const RuntimeShape& filter_shape, const uint8* input_data, const RuntimeShape& filter_shape,
const uint8* filter_data, const RuntimeShape& bias_shape, const uint8* filter_data, const RuntimeShape& bias_shape,
const int32* bias_data, const RuntimeShape& output_shape, const int32* bias_data, const RuntimeShape& output_shape,
uint8* output_data) { uint8* output_data) {
const int stride_width = params.stride_width; return DepthwiseConvBasicKernel<
const int stride_height = params.stride_height; DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
const int dilation_width_factor = params.dilation_width_factor; input_data, filter_shape,
const int dilation_height_factor = params.dilation_height_factor; filter_data, bias_shape,
const int pad_width = params.padding_values.width; bias_data, output_shape,
const int pad_height = params.padding_values.height; output_data);
const int depth_multiplier = params.depth_multiplier;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32 input_offset = params.input_offset;
const int32 filter_offset = params.weights_offset;
const int32 output_offset = params.output_offset;
const int32 output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int b = 0; b < batches; ++b) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int ic = 0; ic < input_depth; ++ic) {
for (int m = 0; m < depth_multiplier; m++) {
const int oc = m + ic * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32 acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// If the location is outside the bounds of the input image,
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
int32 input_val =
input_data[Offset(input_shape, b, in_y, in_x, ic)];
int32 filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, oc)];
acc +=
(filter_val + filter_offset) * (input_val + input_offset);
}
}
}
if (bias_data) {
acc += bias_data[oc];
}
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
output_shift);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
static_cast<uint8>(acc);
}
}
}
}
}
} }
} // end namespace reference_ops } // namespace reference_ops
} // end namespace tflite } // end namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_