From 26cd1d1d0630489709e300b72a67d08d50faaf6c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" <gardener@tensorflow.org> Date: Wed, 11 Jul 2018 19:13:29 -0700 Subject: [PATCH] add int32 support for mul PiperOrigin-RevId: 204230461 --- .../internal/optimized/optimized_ops.h | 14 ++++ .../internal/reference/reference_ops.h | 9 +-- tensorflow/contrib/lite/kernels/mul.cc | 67 ++++++++++++------- tensorflow/contrib/lite/kernels/mul_test.cc | 58 ++++++++++++++++ .../testing/generated_examples_zip_test.cc | 1 - 5 files changed, 118 insertions(+), 31 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index ebd3b116e17..c857fdf6995 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -3054,6 +3054,20 @@ void Mul(const float* input1_data, const Dims<4>& input1_dims, output_activation_max, output_data, output_dims); } +inline void Mul(const int32* input1_data, const Dims<4>& input1_dims, + const int32* input2_data, const Dims<4>& input2_dims, + int32 output_activation_min, int32 output_activation_max, + int32* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Mul/int32"); + + const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = ActivationFunctionWithMinMax( + input1_data[i] * input2_data[i], output_activation_min, + output_activation_max); + } +} + template <FusedActivationFunctionType Ac> void Mul(const int32* input1_data, const Dims<4>& input1_dims, const int32* input2_data, const Dims<4>& input2_dims, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 912e455a2e9..2d40f1769ba 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1429,10 +1429,11 @@ inline void BroadcastAddFivefold( output_activation_max, output_data, output_dims); } -inline void Mul(const float* input1_data, const Dims<4>& input1_dims, - const float* input2_data, const Dims<4>& input2_dims, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims) { +template <typename T> +inline void Mul(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims); for (int i = 0; i < flat_size; ++i) { output_data[i] = ActivationFunctionWithMinMax( diff --git a/tensorflow/contrib/lite/kernels/mul.cc b/tensorflow/contrib/lite/kernels/mul.cc index 1f72f3a3c7a..349f3e67261 100644 --- a/tensorflow/contrib/lite/kernels/mul.cc +++ b/tensorflow/contrib/lite/kernels/mul.cc @@ -100,29 +100,44 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } template <KernelType kernel_type> -void EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteMulParams* params, const OpData* data, - const TfLiteTensor* input1, const TfLiteTensor* input2, - TfLiteTensor* output) { - float output_activation_min, output_activation_max; - CalculateActivationRange(params->activation, &output_activation_min, - &output_activation_max); -#define TF_LITE_MUL(type, opname) \ - type::opname(GetTensorData<float>(input1), GetTensorDims(input1), \ - GetTensorData<float>(input2), GetTensorDims(input2), \ - output_activation_min, output_activation_max, \ - GetTensorData<float>(output), GetTensorDims(output)) - if (kernel_type == kReference) { - if (data->requires_broadcast) { - TF_LITE_MUL(reference_ops, BroadcastMul); +void EvalMul(TfLiteContext* context, TfLiteNode* node, TfLiteMulParams* params, + const OpData* data, const TfLiteTensor* input1, + const TfLiteTensor* input2, TfLiteTensor* output) { +#define TF_LITE_MUL(type, opname, data_type) \ + data_type output_activation_min, output_activation_max; \ + CalculateActivationRange(params->activation, &output_activation_min, \ + &output_activation_max); \ + type::opname(GetTensorData<data_type>(input1), GetTensorDims(input1), \ + GetTensorData<data_type>(input2), GetTensorDims(input2), \ + output_activation_min, output_activation_max, \ + GetTensorData<data_type>(output), GetTensorDims(output)) + if (output->type == kTfLiteInt32) { + if (kernel_type == kReference) { + if (data->requires_broadcast) { + TF_LITE_MUL(reference_ops, BroadcastMul, int32_t); + } else { + TF_LITE_MUL(reference_ops, Mul, int32_t); + } } else { - TF_LITE_MUL(reference_ops, Mul); + if (data->requires_broadcast) { + TF_LITE_MUL(optimized_ops, BroadcastMul, int32_t); + } else { + TF_LITE_MUL(optimized_ops, Mul, int32_t); + } } - } else { - if (data->requires_broadcast) { - TF_LITE_MUL(optimized_ops, BroadcastMul); + } else if (output->type == kTfLiteFloat32) { + if (kernel_type == kReference) { + if (data->requires_broadcast) { + TF_LITE_MUL(reference_ops, BroadcastMul, float); + } else { + TF_LITE_MUL(reference_ops, Mul, float); + } } else { - TF_LITE_MUL(optimized_ops, Mul); + if (data->requires_broadcast) { + TF_LITE_MUL(optimized_ops, BroadcastMul, float); + } else { + TF_LITE_MUL(optimized_ops, Mul, float); + } } } #undef TF_LITE_MUL @@ -194,17 +209,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - if (output->type == kTfLiteFloat32) { - EvalFloat<kernel_type>(context, node, params, data, input1, input2, output); + if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) { + EvalMul<kernel_type>(context, node, params, data, input1, input2, output); } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) { TF_LITE_ENSURE_OK( context, EvalQuantized<kernel_type>(context, node, params, data, input1, input2, output)); } else { - context->ReportError( - context, - "Mul only supports FLOAT32 and quantized UINT8 and INT16 now, got %d.", - output->type); + context->ReportError(context, + "Mul only supports FLOAT32, INT32 and quantized UINT8 " + "and INT16 now, got %d.", + output->type); return kTfLiteError; } diff --git a/tensorflow/contrib/lite/kernels/mul_test.cc b/tensorflow/contrib/lite/kernels/mul_test.cc index 43d56e50d26..2807550a6b0 100644 --- a/tensorflow/contrib/lite/kernels/mul_test.cc +++ b/tensorflow/contrib/lite/kernels/mul_test.cc @@ -52,6 +52,13 @@ class FloatMulOpModel : public BaseMulOpModel { std::vector<float> GetOutput() { return ExtractVector<float>(output_); } }; +class IntegerMulOpModel : public BaseMulOpModel { + public: + using BaseMulOpModel::BaseMulOpModel; + + std::vector<int32_t> GetOutput() { return ExtractVector<int32_t>(output_); } +}; + // For quantized Mul, the error shouldn't exceed (2*step + step^2). // The param min=-1.0 & max=1.0 is used in the following tests. // The tolerance value is ~0.0157. @@ -133,6 +140,57 @@ TEST(FloatMulOpTest, WithBroadcast) { } } +TEST(IntegerMulOpTest, NoActivation) { + IntegerMulOpModel m({TensorType_INT32, {1, 2, 2, 1}}, + {TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}}, + ActivationFunctionType_NONE); + m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8}); + m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-20, 4, 21, 40})); +} + +TEST(IntegerMulOpTest, ActivationRELU_N1_TO_1) { + IntegerMulOpModel m({TensorType_INT32, {1, 2, 2, 1}}, + {TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}}, + ActivationFunctionType_RELU_N1_TO_1); + m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8}); + m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1, 1, 1, 1})); +} + +TEST(IntegerMulOpTest, VariousInputShapes) { + std::vector<std::initializer_list<int>> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + IntegerMulOpModel m({TensorType_INT32, test_shapes[i]}, + {TensorType_INT32, test_shapes[i]}, + {TensorType_INT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8, 11, 20}); + m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5, 11, 1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-20, 4, 21, 40, 121, 20})) + << "With shape number " << i; + } +} + +TEST(IntegerMulOpTest, WithBroadcast) { + std::vector<std::initializer_list<int>> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + IntegerMulOpModel m({TensorType_INT32, test_shapes[i]}, + {TensorType_INT32, {}}, // always a scalar + {TensorType_INT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8, 11, 20}); + m.PopulateTensor<int32_t>(m.input2(), {1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-20, 2, 7, 8, 11, 20}))) + << "With shape number " << i; + } +} + TEST(QuantizedMulOpTest, NoActivation) { QuantizedMulOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 5bc6b53416d..58f6bb53826 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -53,7 +53,6 @@ tensorflow::Env* env = tensorflow::Env::Default(); // Key is a substring of the test name and value is a bug number. // TODO(ahentz): make sure we clean this list up frequently. std::map<string, string> kBrokenTests = { - {R"(^\/mul.*int32)", "68808744"}, {R"(^\/div.*int32)", "68808744"}, {R"(^\/sub.*int32)", "68808744"},