From 669d17a3b4c2c9b770ffbca4345801f48c50deef Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Sun, 30 Jun 2019 19:40:25 -0700 Subject: [PATCH] Optimize pow for single integer exponent case. PiperOrigin-RevId: 255877194 --- tensorflow/lite/kernels/BUILD | 1 + .../internal/optimized/optimized_ops.h | 56 +++++++++++++++++++ tensorflow/lite/kernels/pow.cc | 3 +- tensorflow/lite/kernels/pow_test.cc | 54 ++++++++++++++++++ 4 files changed, 113 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index 044ebc1b03a..a2e5dd2c2a7 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -1434,6 +1434,7 @@ cc_test( ":test_util", "//tensorflow/lite:framework", "//tensorflow/lite/c:c_api_internal", + "//tensorflow/lite/kernels/internal:test_util", "@com_google_googletest//:gtest", ], ) diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h index 6fa5202b6cc..8433b9052ac 100644 --- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h @@ -5648,6 +5648,62 @@ inline void HardSwish(const HardSwishParams& params, } } +template +inline void IntegerExponentPow(const ArithmeticParams& params, + const RuntimeShape& unextended_base_shape, + const T* base_data, const int exponent, + const RuntimeShape& unextended_output_shape, + T* output_data) { + TFLITE_DCHECK_GE(exponent, 1); + if (exponent == 1) { + // copy data over. + std::memcpy(output_data, base_data, + unextended_base_shape.FlatSize() * sizeof(T)); + } else { + IntegerExponentPow(params, unextended_base_shape, base_data, exponent / 2, + unextended_output_shape, output_data); + Mul(params, unextended_base_shape, output_data, unextended_base_shape, + output_data, unextended_output_shape, output_data); + if (exponent % 2 == 1) { + Mul(params, unextended_base_shape, base_data, unextended_base_shape, + output_data, unextended_output_shape, output_data); + } + } +} + +template +inline void BroadcastPow4D(const RuntimeShape& unextended_input1_shape, + const T* input1_data, + const RuntimeShape& unextended_input2_shape, + const T* input2_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + gemmlowp::ScopedProfilingLabel label("PowBroadcast"); + + if (unextended_input2_shape.FlatSize() == 1) { + static const float epsilon = 1e-5; + const T exponent = input2_data[0]; + const int int_exponent = static_cast(std::round(exponent)); + if ((std::abs(input2_data[0] - int_exponent) < epsilon) && + (int_exponent >= 1)) { + ArithmeticParams params; + if (std::is_same::value) { + params.float_activation_max = std::numeric_limits::max(); + params.float_activation_min = std::numeric_limits::lowest(); + } else if (std::is_same::value) { + params.quantized_activation_max = std::numeric_limits::max(); + params.quantized_activation_min = std::numeric_limits::lowest(); + } + IntegerExponentPow(params, unextended_input1_shape, input1_data, + int_exponent, unextended_output_shape, output_data); + return; + } + } + reference_ops::BroadcastPow4DSlow(unextended_input1_shape, input1_data, + unextended_input2_shape, input2_data, + unextended_output_shape, output_data); +} + } // namespace optimized_ops } // namespace tflite diff --git a/tensorflow/lite/kernels/pow.cc b/tensorflow/lite/kernels/pow.cc index 9f84e1cc5e6..3ea6b1079a0 100644 --- a/tensorflow/lite/kernels/pow.cc +++ b/tensorflow/lite/kernels/pow.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" #include "tensorflow/lite/kernels/internal/reference/reference_ops.h" #include "tensorflow/lite/kernels/internal/tensor.h" #include "tensorflow/lite/kernels/kernel_util.h" @@ -80,7 +81,7 @@ template void PowImpl(const TfLiteTensor* input1, const TfLiteTensor* input2, TfLiteTensor* output, bool requires_broadcast) { if (requires_broadcast) { - reference_ops::BroadcastPow4DSlow( + optimized_ops::BroadcastPow4D( GetTensorShape(input1), GetTensorData(input1), GetTensorShape(input2), GetTensorData(input2), GetTensorShape(output), GetTensorData(output)); diff --git a/tensorflow/lite/kernels/pow_test.cc b/tensorflow/lite/kernels/pow_test.cc index cbd15b301b1..569eee8ff78 100644 --- a/tensorflow/lite/kernels/pow_test.cc +++ b/tensorflow/lite/kernels/pow_test.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include #include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/kernels/internal/test_util.h" #include "tensorflow/lite/kernels/register.h" #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/model.h" @@ -107,5 +108,58 @@ TEST(PowOpModel, BroadcastTest) { EXPECT_THAT(model.GetOutput(), ElementsAre(20736, 16, 2401, 4096)); } +template +void CalculateTrueResults(const std::vector& input_data, T exponent, + int flat_size, std::vector* output_data) { + for (int i = 0; i < flat_size; ++i) { + output_data->at(i) = std::pow(input_data[i], exponent); + } +} + +TEST(PowOpModel, FloatSingleIntegerExponentTest) { + PowOpModel model({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1}}, {TensorType_FLOAT32, {}}); + const int input_size = 1 * 2 * 2 * 1; + for (int i = 1; i < 20; ++i) { + std::vector input_data(input_size); + for (int index = 0; index < input_size; ++index) { + // For exponent is float case, if base < 0, we will result in nan, so + // we only populate positive base. + input_data[index] = UniformRandomFloat(0, 1.5); + } + model.PopulateTensor(model.input1(), input_data); + float exponent = static_cast(i); + // Random deviate exponent, e.g., 1.99999 or 2.00001. + exponent += UniformRandomInt(-1, 1) * 1e-5; + model.PopulateTensor(model.input2(), {exponent}); + model.Invoke(); + EXPECT_THAT(model.GetOutputShape(), ElementsAre(1, 2, 2, 1)); + std::vector output_data(input_size); + CalculateTrueResults(input_data, exponent, input_size, &output_data); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray(ArrayFloatNear(output_data, 1e-2))); + } +} + +TEST(PowOpModel, IntSingleIntegerExponentTest) { + PowOpModel model({TensorType_INT32, {1, 2, 2, 1}}, + {TensorType_INT32, {1}}, {TensorType_INT32, {}}); + const int input_size = 1 * 2 * 2 * 1; + for (int i = 1; i < 20; ++i) { + std::vector input_data(input_size); + for (int index = 0; index < input_size; ++index) { + input_data[index] = UniformRandomInt(-2, -2); + } + model.PopulateTensor(model.input1(), input_data); + int exponent = i; + model.PopulateTensor(model.input2(), {exponent}); + model.Invoke(); + EXPECT_THAT(model.GetOutputShape(), ElementsAre(1, 2, 2, 1)); + std::vector output_data(input_size); + CalculateTrueResults(input_data, exponent, input_size, &output_data); + EXPECT_THAT(model.GetOutput(), ElementsAreArray(output_data)); + } +} + } // namespace } // namespace tflite