add int32 support for mul
PiperOrigin-RevId: 204230461
This commit is contained in:
parent
dcc7a506eb
commit
26cd1d1d06
tensorflow/contrib/lite
kernels
testing
@ -3054,6 +3054,20 @@ void Mul(const float* input1_data, const Dims<4>& input1_dims,
|
|||||||
output_activation_max, output_data, output_dims);
|
output_activation_max, output_data, output_dims);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void Mul(const int32* input1_data, const Dims<4>& input1_dims,
|
||||||
|
const int32* input2_data, const Dims<4>& input2_dims,
|
||||||
|
int32 output_activation_min, int32 output_activation_max,
|
||||||
|
int32* output_data, const Dims<4>& output_dims) {
|
||||||
|
gemmlowp::ScopedProfilingLabel label("Mul/int32");
|
||||||
|
|
||||||
|
const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims);
|
||||||
|
for (int i = 0; i < flat_size; ++i) {
|
||||||
|
output_data[i] = ActivationFunctionWithMinMax(
|
||||||
|
input1_data[i] * input2_data[i], output_activation_min,
|
||||||
|
output_activation_max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <FusedActivationFunctionType Ac>
|
template <FusedActivationFunctionType Ac>
|
||||||
void Mul(const int32* input1_data, const Dims<4>& input1_dims,
|
void Mul(const int32* input1_data, const Dims<4>& input1_dims,
|
||||||
const int32* input2_data, const Dims<4>& input2_dims,
|
const int32* input2_data, const Dims<4>& input2_dims,
|
||||||
|
@ -1429,10 +1429,11 @@ inline void BroadcastAddFivefold(
|
|||||||
output_activation_max, output_data, output_dims);
|
output_activation_max, output_data, output_dims);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Mul(const float* input1_data, const Dims<4>& input1_dims,
|
template <typename T>
|
||||||
const float* input2_data, const Dims<4>& input2_dims,
|
inline void Mul(const T* input1_data, const Dims<4>& input1_dims,
|
||||||
float output_activation_min, float output_activation_max,
|
const T* input2_data, const Dims<4>& input2_dims,
|
||||||
float* output_data, const Dims<4>& output_dims) {
|
T output_activation_min, T output_activation_max,
|
||||||
|
T* output_data, const Dims<4>& output_dims) {
|
||||||
const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims);
|
const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims);
|
||||||
for (int i = 0; i < flat_size; ++i) {
|
for (int i = 0; i < flat_size; ++i) {
|
||||||
output_data[i] = ActivationFunctionWithMinMax(
|
output_data[i] = ActivationFunctionWithMinMax(
|
||||||
|
@ -100,29 +100,44 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <KernelType kernel_type>
|
template <KernelType kernel_type>
|
||||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
void EvalMul(TfLiteContext* context, TfLiteNode* node, TfLiteMulParams* params,
|
||||||
TfLiteMulParams* params, const OpData* data,
|
const OpData* data, const TfLiteTensor* input1,
|
||||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||||
TfLiteTensor* output) {
|
#define TF_LITE_MUL(type, opname, data_type) \
|
||||||
float output_activation_min, output_activation_max;
|
data_type output_activation_min, output_activation_max; \
|
||||||
CalculateActivationRange(params->activation, &output_activation_min,
|
CalculateActivationRange(params->activation, &output_activation_min, \
|
||||||
&output_activation_max);
|
&output_activation_max); \
|
||||||
#define TF_LITE_MUL(type, opname) \
|
type::opname(GetTensorData<data_type>(input1), GetTensorDims(input1), \
|
||||||
type::opname(GetTensorData<float>(input1), GetTensorDims(input1), \
|
GetTensorData<data_type>(input2), GetTensorDims(input2), \
|
||||||
GetTensorData<float>(input2), GetTensorDims(input2), \
|
output_activation_min, output_activation_max, \
|
||||||
output_activation_min, output_activation_max, \
|
GetTensorData<data_type>(output), GetTensorDims(output))
|
||||||
GetTensorData<float>(output), GetTensorDims(output))
|
if (output->type == kTfLiteInt32) {
|
||||||
if (kernel_type == kReference) {
|
if (kernel_type == kReference) {
|
||||||
if (data->requires_broadcast) {
|
if (data->requires_broadcast) {
|
||||||
TF_LITE_MUL(reference_ops, BroadcastMul);
|
TF_LITE_MUL(reference_ops, BroadcastMul, int32_t);
|
||||||
|
} else {
|
||||||
|
TF_LITE_MUL(reference_ops, Mul, int32_t);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
TF_LITE_MUL(reference_ops, Mul);
|
if (data->requires_broadcast) {
|
||||||
|
TF_LITE_MUL(optimized_ops, BroadcastMul, int32_t);
|
||||||
|
} else {
|
||||||
|
TF_LITE_MUL(optimized_ops, Mul, int32_t);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else if (output->type == kTfLiteFloat32) {
|
||||||
if (data->requires_broadcast) {
|
if (kernel_type == kReference) {
|
||||||
TF_LITE_MUL(optimized_ops, BroadcastMul);
|
if (data->requires_broadcast) {
|
||||||
|
TF_LITE_MUL(reference_ops, BroadcastMul, float);
|
||||||
|
} else {
|
||||||
|
TF_LITE_MUL(reference_ops, Mul, float);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
TF_LITE_MUL(optimized_ops, Mul);
|
if (data->requires_broadcast) {
|
||||||
|
TF_LITE_MUL(optimized_ops, BroadcastMul, float);
|
||||||
|
} else {
|
||||||
|
TF_LITE_MUL(optimized_ops, Mul, float);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#undef TF_LITE_MUL
|
#undef TF_LITE_MUL
|
||||||
@ -194,17 +209,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
|
||||||
if (output->type == kTfLiteFloat32) {
|
if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
|
||||||
EvalFloat<kernel_type>(context, node, params, data, input1, input2, output);
|
EvalMul<kernel_type>(context, node, params, data, input1, input2, output);
|
||||||
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
|
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
|
||||||
TF_LITE_ENSURE_OK(
|
TF_LITE_ENSURE_OK(
|
||||||
context, EvalQuantized<kernel_type>(context, node, params, data, input1,
|
context, EvalQuantized<kernel_type>(context, node, params, data, input1,
|
||||||
input2, output));
|
input2, output));
|
||||||
} else {
|
} else {
|
||||||
context->ReportError(
|
context->ReportError(context,
|
||||||
context,
|
"Mul only supports FLOAT32, INT32 and quantized UINT8 "
|
||||||
"Mul only supports FLOAT32 and quantized UINT8 and INT16 now, got %d.",
|
"and INT16 now, got %d.",
|
||||||
output->type);
|
output->type);
|
||||||
return kTfLiteError;
|
return kTfLiteError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,6 +52,13 @@ class FloatMulOpModel : public BaseMulOpModel {
|
|||||||
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
|
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class IntegerMulOpModel : public BaseMulOpModel {
|
||||||
|
public:
|
||||||
|
using BaseMulOpModel::BaseMulOpModel;
|
||||||
|
|
||||||
|
std::vector<int32_t> GetOutput() { return ExtractVector<int32_t>(output_); }
|
||||||
|
};
|
||||||
|
|
||||||
// For quantized Mul, the error shouldn't exceed (2*step + step^2).
|
// For quantized Mul, the error shouldn't exceed (2*step + step^2).
|
||||||
// The param min=-1.0 & max=1.0 is used in the following tests.
|
// The param min=-1.0 & max=1.0 is used in the following tests.
|
||||||
// The tolerance value is ~0.0157.
|
// The tolerance value is ~0.0157.
|
||||||
@ -133,6 +140,57 @@ TEST(FloatMulOpTest, WithBroadcast) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(IntegerMulOpTest, NoActivation) {
|
||||||
|
IntegerMulOpModel m({TensorType_INT32, {1, 2, 2, 1}},
|
||||||
|
{TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}},
|
||||||
|
ActivationFunctionType_NONE);
|
||||||
|
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8});
|
||||||
|
m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5});
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-20, 4, 21, 40}));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(IntegerMulOpTest, ActivationRELU_N1_TO_1) {
|
||||||
|
IntegerMulOpModel m({TensorType_INT32, {1, 2, 2, 1}},
|
||||||
|
{TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}},
|
||||||
|
ActivationFunctionType_RELU_N1_TO_1);
|
||||||
|
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8});
|
||||||
|
m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5});
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1, 1, 1, 1}));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(IntegerMulOpTest, VariousInputShapes) {
|
||||||
|
std::vector<std::initializer_list<int>> test_shapes = {
|
||||||
|
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
|
||||||
|
for (int i = 0; i < test_shapes.size(); ++i) {
|
||||||
|
IntegerMulOpModel m({TensorType_INT32, test_shapes[i]},
|
||||||
|
{TensorType_INT32, test_shapes[i]},
|
||||||
|
{TensorType_INT32, {}}, ActivationFunctionType_NONE);
|
||||||
|
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8, 11, 20});
|
||||||
|
m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5, 11, 1});
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-20, 4, 21, 40, 121, 20}))
|
||||||
|
<< "With shape number " << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(IntegerMulOpTest, WithBroadcast) {
|
||||||
|
std::vector<std::initializer_list<int>> test_shapes = {
|
||||||
|
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
|
||||||
|
for (int i = 0; i < test_shapes.size(); ++i) {
|
||||||
|
IntegerMulOpModel m({TensorType_INT32, test_shapes[i]},
|
||||||
|
{TensorType_INT32, {}}, // always a scalar
|
||||||
|
{TensorType_INT32, {}}, ActivationFunctionType_NONE);
|
||||||
|
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8, 11, 20});
|
||||||
|
m.PopulateTensor<int32_t>(m.input2(), {1});
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_THAT(m.GetOutput(),
|
||||||
|
ElementsAreArray(ArrayFloatNear({-20, 2, 7, 8, 11, 20})))
|
||||||
|
<< "With shape number " << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(QuantizedMulOpTest, NoActivation) {
|
TEST(QuantizedMulOpTest, NoActivation) {
|
||||||
QuantizedMulOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
|
QuantizedMulOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
|
||||||
{TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
|
{TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
|
||||||
|
@ -53,7 +53,6 @@ tensorflow::Env* env = tensorflow::Env::Default();
|
|||||||
// Key is a substring of the test name and value is a bug number.
|
// Key is a substring of the test name and value is a bug number.
|
||||||
// TODO(ahentz): make sure we clean this list up frequently.
|
// TODO(ahentz): make sure we clean this list up frequently.
|
||||||
std::map<string, string> kBrokenTests = {
|
std::map<string, string> kBrokenTests = {
|
||||||
{R"(^\/mul.*int32)", "68808744"},
|
|
||||||
{R"(^\/div.*int32)", "68808744"},
|
{R"(^\/div.*int32)", "68808744"},
|
||||||
{R"(^\/sub.*int32)", "68808744"},
|
{R"(^\/sub.*int32)", "68808744"},
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user