Optimize pow for single integer exponent case.

PiperOrigin-RevId: 255877194
This commit is contained in:
Renjie Liu 2019-06-30 19:40:25 -07:00 committed by TensorFlower Gardener
parent 38df5d8ef4
commit 669d17a3b4
4 changed files with 113 additions and 1 deletions

View File

@ -1434,6 +1434,7 @@ cc_test(
":test_util",
"//tensorflow/lite:framework",
"//tensorflow/lite/c:c_api_internal",
"//tensorflow/lite/kernels/internal:test_util",
"@com_google_googletest//:gtest",
],
)

View File

@ -5648,6 +5648,62 @@ inline void HardSwish(const HardSwishParams& params,
}
}
template <typename T>
inline void IntegerExponentPow(const ArithmeticParams& params,
const RuntimeShape& unextended_base_shape,
const T* base_data, const int exponent,
const RuntimeShape& unextended_output_shape,
T* output_data) {
TFLITE_DCHECK_GE(exponent, 1);
if (exponent == 1) {
// copy data over.
std::memcpy(output_data, base_data,
unextended_base_shape.FlatSize() * sizeof(T));
} else {
IntegerExponentPow(params, unextended_base_shape, base_data, exponent / 2,
unextended_output_shape, output_data);
Mul(params, unextended_base_shape, output_data, unextended_base_shape,
output_data, unextended_output_shape, output_data);
if (exponent % 2 == 1) {
Mul(params, unextended_base_shape, base_data, unextended_base_shape,
output_data, unextended_output_shape, output_data);
}
}
}
template <typename T>
inline void BroadcastPow4D(const RuntimeShape& unextended_input1_shape,
const T* input1_data,
const RuntimeShape& unextended_input2_shape,
const T* input2_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
gemmlowp::ScopedProfilingLabel label("PowBroadcast");
if (unextended_input2_shape.FlatSize() == 1) {
static const float epsilon = 1e-5;
const T exponent = input2_data[0];
const int int_exponent = static_cast<int>(std::round(exponent));
if ((std::abs(input2_data[0] - int_exponent) < epsilon) &&
(int_exponent >= 1)) {
ArithmeticParams params;
if (std::is_same<T, float>::value) {
params.float_activation_max = std::numeric_limits<float>::max();
params.float_activation_min = std::numeric_limits<float>::lowest();
} else if (std::is_same<T, int>::value) {
params.quantized_activation_max = std::numeric_limits<int>::max();
params.quantized_activation_min = std::numeric_limits<int>::lowest();
}
IntegerExponentPow(params, unextended_input1_shape, input1_data,
int_exponent, unextended_output_shape, output_data);
return;
}
}
reference_ops::BroadcastPow4DSlow(unextended_input1_shape, input1_data,
unextended_input2_shape, input2_data,
unextended_output_shape, output_data);
}
} // namespace optimized_ops
} // namespace tflite

View File

@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/c_api_internal.h"
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/kernel_util.h"
@ -80,7 +81,7 @@ template <typename T>
void PowImpl(const TfLiteTensor* input1, const TfLiteTensor* input2,
TfLiteTensor* output, bool requires_broadcast) {
if (requires_broadcast) {
reference_ops::BroadcastPow4DSlow(
optimized_ops::BroadcastPow4D(
GetTensorShape(input1), GetTensorData<T>(input1),
GetTensorShape(input2), GetTensorData<T>(input2),
GetTensorShape(output), GetTensorData<T>(output));

View File

@ -14,6 +14,7 @@ limitations under the License.
==============================================================================*/
#include <gtest/gtest.h>
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/internal/test_util.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/kernels/test_util.h"
#include "tensorflow/lite/model.h"
@ -107,5 +108,58 @@ TEST(PowOpModel, BroadcastTest) {
EXPECT_THAT(model.GetOutput(), ElementsAre(20736, 16, 2401, 4096));
}
template <typename T>
void CalculateTrueResults(const std::vector<T>& input_data, T exponent,
int flat_size, std::vector<T>* output_data) {
for (int i = 0; i < flat_size; ++i) {
output_data->at(i) = std::pow(input_data[i], exponent);
}
}
TEST(PowOpModel, FloatSingleIntegerExponentTest) {
PowOpModel<float> model({TensorType_FLOAT32, {1, 2, 2, 1}},
{TensorType_FLOAT32, {1}}, {TensorType_FLOAT32, {}});
const int input_size = 1 * 2 * 2 * 1;
for (int i = 1; i < 20; ++i) {
std::vector<float> input_data(input_size);
for (int index = 0; index < input_size; ++index) {
// For exponent is float case, if base < 0, we will result in nan, so
// we only populate positive base.
input_data[index] = UniformRandomFloat(0, 1.5);
}
model.PopulateTensor<float>(model.input1(), input_data);
float exponent = static_cast<float>(i);
// Random deviate exponent, e.g., 1.99999 or 2.00001.
exponent += UniformRandomInt(-1, 1) * 1e-5;
model.PopulateTensor<float>(model.input2(), {exponent});
model.Invoke();
EXPECT_THAT(model.GetOutputShape(), ElementsAre(1, 2, 2, 1));
std::vector<float> output_data(input_size);
CalculateTrueResults(input_data, exponent, input_size, &output_data);
EXPECT_THAT(model.GetOutput(),
ElementsAreArray(ArrayFloatNear(output_data, 1e-2)));
}
}
TEST(PowOpModel, IntSingleIntegerExponentTest) {
PowOpModel<int32_t> model({TensorType_INT32, {1, 2, 2, 1}},
{TensorType_INT32, {1}}, {TensorType_INT32, {}});
const int input_size = 1 * 2 * 2 * 1;
for (int i = 1; i < 20; ++i) {
std::vector<int32_t> input_data(input_size);
for (int index = 0; index < input_size; ++index) {
input_data[index] = UniformRandomInt(-2, -2);
}
model.PopulateTensor<int32_t>(model.input1(), input_data);
int exponent = i;
model.PopulateTensor<int32_t>(model.input2(), {exponent});
model.Invoke();
EXPECT_THAT(model.GetOutputShape(), ElementsAre(1, 2, 2, 1));
std::vector<int32_t> output_data(input_size);
CalculateTrueResults(input_data, exponent, input_size, &output_data);
EXPECT_THAT(model.GetOutput(), ElementsAreArray(output_data));
}
}
} // namespace
} // namespace tflite