Optimize pow for single integer exponent case.
PiperOrigin-RevId: 255877194
This commit is contained in:
parent
38df5d8ef4
commit
669d17a3b4
@ -1434,6 +1434,7 @@ cc_test(
|
||||
":test_util",
|
||||
"//tensorflow/lite:framework",
|
||||
"//tensorflow/lite/c:c_api_internal",
|
||||
"//tensorflow/lite/kernels/internal:test_util",
|
||||
"@com_google_googletest//:gtest",
|
||||
],
|
||||
)
|
||||
|
@ -5648,6 +5648,62 @@ inline void HardSwish(const HardSwishParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void IntegerExponentPow(const ArithmeticParams& params,
|
||||
const RuntimeShape& unextended_base_shape,
|
||||
const T* base_data, const int exponent,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
T* output_data) {
|
||||
TFLITE_DCHECK_GE(exponent, 1);
|
||||
if (exponent == 1) {
|
||||
// copy data over.
|
||||
std::memcpy(output_data, base_data,
|
||||
unextended_base_shape.FlatSize() * sizeof(T));
|
||||
} else {
|
||||
IntegerExponentPow(params, unextended_base_shape, base_data, exponent / 2,
|
||||
unextended_output_shape, output_data);
|
||||
Mul(params, unextended_base_shape, output_data, unextended_base_shape,
|
||||
output_data, unextended_output_shape, output_data);
|
||||
if (exponent % 2 == 1) {
|
||||
Mul(params, unextended_base_shape, base_data, unextended_base_shape,
|
||||
output_data, unextended_output_shape, output_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void BroadcastPow4D(const RuntimeShape& unextended_input1_shape,
|
||||
const T* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const T* input2_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
T* output_data) {
|
||||
gemmlowp::ScopedProfilingLabel label("PowBroadcast");
|
||||
|
||||
if (unextended_input2_shape.FlatSize() == 1) {
|
||||
static const float epsilon = 1e-5;
|
||||
const T exponent = input2_data[0];
|
||||
const int int_exponent = static_cast<int>(std::round(exponent));
|
||||
if ((std::abs(input2_data[0] - int_exponent) < epsilon) &&
|
||||
(int_exponent >= 1)) {
|
||||
ArithmeticParams params;
|
||||
if (std::is_same<T, float>::value) {
|
||||
params.float_activation_max = std::numeric_limits<float>::max();
|
||||
params.float_activation_min = std::numeric_limits<float>::lowest();
|
||||
} else if (std::is_same<T, int>::value) {
|
||||
params.quantized_activation_max = std::numeric_limits<int>::max();
|
||||
params.quantized_activation_min = std::numeric_limits<int>::lowest();
|
||||
}
|
||||
IntegerExponentPow(params, unextended_input1_shape, input1_data,
|
||||
int_exponent, unextended_output_shape, output_data);
|
||||
return;
|
||||
}
|
||||
}
|
||||
reference_ops::BroadcastPow4DSlow(unextended_input1_shape, input1_data,
|
||||
unextended_input2_shape, input2_data,
|
||||
unextended_output_shape, output_data);
|
||||
}
|
||||
|
||||
} // namespace optimized_ops
|
||||
} // namespace tflite
|
||||
|
||||
|
@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/c/c_api_internal.h"
|
||||
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
@ -80,7 +81,7 @@ template <typename T>
|
||||
void PowImpl(const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output, bool requires_broadcast) {
|
||||
if (requires_broadcast) {
|
||||
reference_ops::BroadcastPow4DSlow(
|
||||
optimized_ops::BroadcastPow4D(
|
||||
GetTensorShape(input1), GetTensorData<T>(input1),
|
||||
GetTensorShape(input2), GetTensorData<T>(input2),
|
||||
GetTensorShape(output), GetTensorData<T>(output));
|
||||
|
@ -14,6 +14,7 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
#include <gtest/gtest.h>
|
||||
#include "tensorflow/lite/interpreter.h"
|
||||
#include "tensorflow/lite/kernels/internal/test_util.h"
|
||||
#include "tensorflow/lite/kernels/register.h"
|
||||
#include "tensorflow/lite/kernels/test_util.h"
|
||||
#include "tensorflow/lite/model.h"
|
||||
@ -107,5 +108,58 @@ TEST(PowOpModel, BroadcastTest) {
|
||||
EXPECT_THAT(model.GetOutput(), ElementsAre(20736, 16, 2401, 4096));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CalculateTrueResults(const std::vector<T>& input_data, T exponent,
|
||||
int flat_size, std::vector<T>* output_data) {
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data->at(i) = std::pow(input_data[i], exponent);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(PowOpModel, FloatSingleIntegerExponentTest) {
|
||||
PowOpModel<float> model({TensorType_FLOAT32, {1, 2, 2, 1}},
|
||||
{TensorType_FLOAT32, {1}}, {TensorType_FLOAT32, {}});
|
||||
const int input_size = 1 * 2 * 2 * 1;
|
||||
for (int i = 1; i < 20; ++i) {
|
||||
std::vector<float> input_data(input_size);
|
||||
for (int index = 0; index < input_size; ++index) {
|
||||
// For exponent is float case, if base < 0, we will result in nan, so
|
||||
// we only populate positive base.
|
||||
input_data[index] = UniformRandomFloat(0, 1.5);
|
||||
}
|
||||
model.PopulateTensor<float>(model.input1(), input_data);
|
||||
float exponent = static_cast<float>(i);
|
||||
// Random deviate exponent, e.g., 1.99999 or 2.00001.
|
||||
exponent += UniformRandomInt(-1, 1) * 1e-5;
|
||||
model.PopulateTensor<float>(model.input2(), {exponent});
|
||||
model.Invoke();
|
||||
EXPECT_THAT(model.GetOutputShape(), ElementsAre(1, 2, 2, 1));
|
||||
std::vector<float> output_data(input_size);
|
||||
CalculateTrueResults(input_data, exponent, input_size, &output_data);
|
||||
EXPECT_THAT(model.GetOutput(),
|
||||
ElementsAreArray(ArrayFloatNear(output_data, 1e-2)));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(PowOpModel, IntSingleIntegerExponentTest) {
|
||||
PowOpModel<int32_t> model({TensorType_INT32, {1, 2, 2, 1}},
|
||||
{TensorType_INT32, {1}}, {TensorType_INT32, {}});
|
||||
const int input_size = 1 * 2 * 2 * 1;
|
||||
for (int i = 1; i < 20; ++i) {
|
||||
std::vector<int32_t> input_data(input_size);
|
||||
for (int index = 0; index < input_size; ++index) {
|
||||
input_data[index] = UniformRandomInt(-2, -2);
|
||||
}
|
||||
model.PopulateTensor<int32_t>(model.input1(), input_data);
|
||||
int exponent = i;
|
||||
model.PopulateTensor<int32_t>(model.input2(), {exponent});
|
||||
model.Invoke();
|
||||
EXPECT_THAT(model.GetOutputShape(), ElementsAre(1, 2, 2, 1));
|
||||
std::vector<int32_t> output_data(input_size);
|
||||
CalculateTrueResults(input_data, exponent, input_size, &output_data);
|
||||
EXPECT_THAT(model.GetOutput(), ElementsAreArray(output_data));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace tflite
|
||||
|
Loading…
Reference in New Issue
Block a user