add int32 support for mul
PiperOrigin-RevId: 204230461
This commit is contained in:
parent
dcc7a506eb
commit
26cd1d1d06
@ -3054,6 +3054,20 @@ void Mul(const float* input1_data, const Dims<4>& input1_dims,
|
||||
output_activation_max, output_data, output_dims);
|
||||
}
|
||||
|
||||
inline void Mul(const int32* input1_data, const Dims<4>& input1_dims,
|
||||
const int32* input2_data, const Dims<4>& input2_dims,
|
||||
int32 output_activation_min, int32 output_activation_max,
|
||||
int32* output_data, const Dims<4>& output_dims) {
|
||||
gemmlowp::ScopedProfilingLabel label("Mul/int32");
|
||||
|
||||
const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] * input2_data[i], output_activation_min,
|
||||
output_activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
template <FusedActivationFunctionType Ac>
|
||||
void Mul(const int32* input1_data, const Dims<4>& input1_dims,
|
||||
const int32* input2_data, const Dims<4>& input2_dims,
|
||||
|
@ -1429,10 +1429,11 @@ inline void BroadcastAddFivefold(
|
||||
output_activation_max, output_data, output_dims);
|
||||
}
|
||||
|
||||
inline void Mul(const float* input1_data, const Dims<4>& input1_dims,
|
||||
const float* input2_data, const Dims<4>& input2_dims,
|
||||
float output_activation_min, float output_activation_max,
|
||||
float* output_data, const Dims<4>& output_dims) {
|
||||
template <typename T>
|
||||
inline void Mul(const T* input1_data, const Dims<4>& input1_dims,
|
||||
const T* input2_data, const Dims<4>& input2_dims,
|
||||
T output_activation_min, T output_activation_max,
|
||||
T* output_data, const Dims<4>& output_dims) {
|
||||
const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
|
@ -100,29 +100,44 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
template <KernelType kernel_type>
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, const OpData* data,
|
||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
#define TF_LITE_MUL(type, opname) \
|
||||
type::opname(GetTensorData<float>(input1), GetTensorDims(input1), \
|
||||
GetTensorData<float>(input2), GetTensorDims(input2), \
|
||||
output_activation_min, output_activation_max, \
|
||||
GetTensorData<float>(output), GetTensorDims(output))
|
||||
if (kernel_type == kReference) {
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_MUL(reference_ops, BroadcastMul);
|
||||
void EvalMul(TfLiteContext* context, TfLiteNode* node, TfLiteMulParams* params,
|
||||
const OpData* data, const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||
#define TF_LITE_MUL(type, opname, data_type) \
|
||||
data_type output_activation_min, output_activation_max; \
|
||||
CalculateActivationRange(params->activation, &output_activation_min, \
|
||||
&output_activation_max); \
|
||||
type::opname(GetTensorData<data_type>(input1), GetTensorDims(input1), \
|
||||
GetTensorData<data_type>(input2), GetTensorDims(input2), \
|
||||
output_activation_min, output_activation_max, \
|
||||
GetTensorData<data_type>(output), GetTensorDims(output))
|
||||
if (output->type == kTfLiteInt32) {
|
||||
if (kernel_type == kReference) {
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_MUL(reference_ops, BroadcastMul, int32_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_ops, Mul, int32_t);
|
||||
}
|
||||
} else {
|
||||
TF_LITE_MUL(reference_ops, Mul);
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_MUL(optimized_ops, BroadcastMul, int32_t);
|
||||
} else {
|
||||
TF_LITE_MUL(optimized_ops, Mul, int32_t);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_MUL(optimized_ops, BroadcastMul);
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
if (kernel_type == kReference) {
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_MUL(reference_ops, BroadcastMul, float);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_ops, Mul, float);
|
||||
}
|
||||
} else {
|
||||
TF_LITE_MUL(optimized_ops, Mul);
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_MUL(optimized_ops, BroadcastMul, float);
|
||||
} else {
|
||||
TF_LITE_MUL(optimized_ops, Mul, float);
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
@ -194,17 +209,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalFloat<kernel_type>(context, node, params, data, input1, input2, output);
|
||||
if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
|
||||
EvalMul<kernel_type>(context, node, params, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_OK(
|
||||
context, EvalQuantized<kernel_type>(context, node, params, data, input1,
|
||||
input2, output));
|
||||
} else {
|
||||
context->ReportError(
|
||||
context,
|
||||
"Mul only supports FLOAT32 and quantized UINT8 and INT16 now, got %d.",
|
||||
output->type);
|
||||
context->ReportError(context,
|
||||
"Mul only supports FLOAT32, INT32 and quantized UINT8 "
|
||||
"and INT16 now, got %d.",
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
|
@ -52,6 +52,13 @@ class FloatMulOpModel : public BaseMulOpModel {
|
||||
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
|
||||
};
|
||||
|
||||
class IntegerMulOpModel : public BaseMulOpModel {
|
||||
public:
|
||||
using BaseMulOpModel::BaseMulOpModel;
|
||||
|
||||
std::vector<int32_t> GetOutput() { return ExtractVector<int32_t>(output_); }
|
||||
};
|
||||
|
||||
// For quantized Mul, the error shouldn't exceed (2*step + step^2).
|
||||
// The param min=-1.0 & max=1.0 is used in the following tests.
|
||||
// The tolerance value is ~0.0157.
|
||||
@ -133,6 +140,57 @@ TEST(FloatMulOpTest, WithBroadcast) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntegerMulOpTest, NoActivation) {
|
||||
IntegerMulOpModel m({TensorType_INT32, {1, 2, 2, 1}},
|
||||
{TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}},
|
||||
ActivationFunctionType_NONE);
|
||||
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8});
|
||||
m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-20, 4, 21, 40}));
|
||||
}
|
||||
|
||||
TEST(IntegerMulOpTest, ActivationRELU_N1_TO_1) {
|
||||
IntegerMulOpModel m({TensorType_INT32, {1, 2, 2, 1}},
|
||||
{TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}},
|
||||
ActivationFunctionType_RELU_N1_TO_1);
|
||||
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8});
|
||||
m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1, 1, 1, 1}));
|
||||
}
|
||||
|
||||
TEST(IntegerMulOpTest, VariousInputShapes) {
|
||||
std::vector<std::initializer_list<int>> test_shapes = {
|
||||
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
|
||||
for (int i = 0; i < test_shapes.size(); ++i) {
|
||||
IntegerMulOpModel m({TensorType_INT32, test_shapes[i]},
|
||||
{TensorType_INT32, test_shapes[i]},
|
||||
{TensorType_INT32, {}}, ActivationFunctionType_NONE);
|
||||
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8, 11, 20});
|
||||
m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5, 11, 1});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-20, 4, 21, 40, 121, 20}))
|
||||
<< "With shape number " << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntegerMulOpTest, WithBroadcast) {
|
||||
std::vector<std::initializer_list<int>> test_shapes = {
|
||||
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
|
||||
for (int i = 0; i < test_shapes.size(); ++i) {
|
||||
IntegerMulOpModel m({TensorType_INT32, test_shapes[i]},
|
||||
{TensorType_INT32, {}}, // always a scalar
|
||||
{TensorType_INT32, {}}, ActivationFunctionType_NONE);
|
||||
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8, 11, 20});
|
||||
m.PopulateTensor<int32_t>(m.input2(), {1});
|
||||
m.Invoke();
|
||||
EXPECT_THAT(m.GetOutput(),
|
||||
ElementsAreArray(ArrayFloatNear({-20, 2, 7, 8, 11, 20})))
|
||||
<< "With shape number " << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(QuantizedMulOpTest, NoActivation) {
|
||||
QuantizedMulOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
|
||||
{TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
|
||||
|
@ -53,7 +53,6 @@ tensorflow::Env* env = tensorflow::Env::Default();
|
||||
// Key is a substring of the test name and value is a bug number.
|
||||
// TODO(ahentz): make sure we clean this list up frequently.
|
||||
std::map<string, string> kBrokenTests = {
|
||||
{R"(^\/mul.*int32)", "68808744"},
|
||||
{R"(^\/div.*int32)", "68808744"},
|
||||
{R"(^\/sub.*int32)", "68808744"},
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user