add int32 support for mul

PiperOrigin-RevId: 204230461
This commit is contained in:
A. Unique TensorFlower 2018-07-11 19:13:29 -07:00 committed by TensorFlower Gardener
parent dcc7a506eb
commit 26cd1d1d06
5 changed files with 118 additions and 31 deletions

View File

@ -3054,6 +3054,20 @@ void Mul(const float* input1_data, const Dims<4>& input1_dims,
output_activation_max, output_data, output_dims);
}
inline void Mul(const int32* input1_data, const Dims<4>& input1_dims,
const int32* input2_data, const Dims<4>& input2_dims,
int32 output_activation_min, int32 output_activation_max,
int32* output_data, const Dims<4>& output_dims) {
gemmlowp::ScopedProfilingLabel label("Mul/int32");
const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] * input2_data[i], output_activation_min,
output_activation_max);
}
}
template <FusedActivationFunctionType Ac>
void Mul(const int32* input1_data, const Dims<4>& input1_dims,
const int32* input2_data, const Dims<4>& input2_dims,

View File

@ -1429,10 +1429,11 @@ inline void BroadcastAddFivefold(
output_activation_max, output_data, output_dims);
}
inline void Mul(const float* input1_data, const Dims<4>& input1_dims,
const float* input2_data, const Dims<4>& input2_dims,
float output_activation_min, float output_activation_max,
float* output_data, const Dims<4>& output_dims) {
template <typename T>
inline void Mul(const T* input1_data, const Dims<4>& input1_dims,
const T* input2_data, const Dims<4>& input2_dims,
T output_activation_min, T output_activation_max,
T* output_data, const Dims<4>& output_dims) {
const int flat_size = MatchingFlatSize(input1_dims, input2_dims, output_dims);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(

View File

@ -100,29 +100,44 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
template <KernelType kernel_type>
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, const OpData* data,
const TfLiteTensor* input1, const TfLiteTensor* input2,
TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
#define TF_LITE_MUL(type, opname) \
type::opname(GetTensorData<float>(input1), GetTensorDims(input1), \
GetTensorData<float>(input2), GetTensorDims(input2), \
output_activation_min, output_activation_max, \
GetTensorData<float>(output), GetTensorDims(output))
if (kernel_type == kReference) {
if (data->requires_broadcast) {
TF_LITE_MUL(reference_ops, BroadcastMul);
void EvalMul(TfLiteContext* context, TfLiteNode* node, TfLiteMulParams* params,
const OpData* data, const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output) {
#define TF_LITE_MUL(type, opname, data_type) \
data_type output_activation_min, output_activation_max; \
CalculateActivationRange(params->activation, &output_activation_min, \
&output_activation_max); \
type::opname(GetTensorData<data_type>(input1), GetTensorDims(input1), \
GetTensorData<data_type>(input2), GetTensorDims(input2), \
output_activation_min, output_activation_max, \
GetTensorData<data_type>(output), GetTensorDims(output))
if (output->type == kTfLiteInt32) {
if (kernel_type == kReference) {
if (data->requires_broadcast) {
TF_LITE_MUL(reference_ops, BroadcastMul, int32_t);
} else {
TF_LITE_MUL(reference_ops, Mul, int32_t);
}
} else {
TF_LITE_MUL(reference_ops, Mul);
if (data->requires_broadcast) {
TF_LITE_MUL(optimized_ops, BroadcastMul, int32_t);
} else {
TF_LITE_MUL(optimized_ops, Mul, int32_t);
}
}
} else {
if (data->requires_broadcast) {
TF_LITE_MUL(optimized_ops, BroadcastMul);
} else if (output->type == kTfLiteFloat32) {
if (kernel_type == kReference) {
if (data->requires_broadcast) {
TF_LITE_MUL(reference_ops, BroadcastMul, float);
} else {
TF_LITE_MUL(reference_ops, Mul, float);
}
} else {
TF_LITE_MUL(optimized_ops, Mul);
if (data->requires_broadcast) {
TF_LITE_MUL(optimized_ops, BroadcastMul, float);
} else {
TF_LITE_MUL(optimized_ops, Mul, float);
}
}
}
#undef TF_LITE_MUL
@ -194,17 +209,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalFloat<kernel_type>(context, node, params, data, input1, input2, output);
if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
EvalMul<kernel_type>(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
TF_LITE_ENSURE_OK(
context, EvalQuantized<kernel_type>(context, node, params, data, input1,
input2, output));
} else {
context->ReportError(
context,
"Mul only supports FLOAT32 and quantized UINT8 and INT16 now, got %d.",
output->type);
context->ReportError(context,
"Mul only supports FLOAT32, INT32 and quantized UINT8 "
"and INT16 now, got %d.",
output->type);
return kTfLiteError;
}

View File

@ -52,6 +52,13 @@ class FloatMulOpModel : public BaseMulOpModel {
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
};
class IntegerMulOpModel : public BaseMulOpModel {
public:
using BaseMulOpModel::BaseMulOpModel;
std::vector<int32_t> GetOutput() { return ExtractVector<int32_t>(output_); }
};
// For quantized Mul, the error shouldn't exceed (2*step + step^2).
// The param min=-1.0 & max=1.0 is used in the following tests.
// The tolerance value is ~0.0157.
@ -133,6 +140,57 @@ TEST(FloatMulOpTest, WithBroadcast) {
}
}
TEST(IntegerMulOpTest, NoActivation) {
IntegerMulOpModel m({TensorType_INT32, {1, 2, 2, 1}},
{TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}},
ActivationFunctionType_NONE);
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8});
m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5});
m.Invoke();
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-20, 4, 21, 40}));
}
TEST(IntegerMulOpTest, ActivationRELU_N1_TO_1) {
IntegerMulOpModel m({TensorType_INT32, {1, 2, 2, 1}},
{TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}},
ActivationFunctionType_RELU_N1_TO_1);
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8});
m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5});
m.Invoke();
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1, 1, 1, 1}));
}
TEST(IntegerMulOpTest, VariousInputShapes) {
std::vector<std::initializer_list<int>> test_shapes = {
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
for (int i = 0; i < test_shapes.size(); ++i) {
IntegerMulOpModel m({TensorType_INT32, test_shapes[i]},
{TensorType_INT32, test_shapes[i]},
{TensorType_INT32, {}}, ActivationFunctionType_NONE);
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8, 11, 20});
m.PopulateTensor<int32_t>(m.input2(), {1, 2, 3, 5, 11, 1});
m.Invoke();
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-20, 4, 21, 40, 121, 20}))
<< "With shape number " << i;
}
}
TEST(IntegerMulOpTest, WithBroadcast) {
std::vector<std::initializer_list<int>> test_shapes = {
{6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
for (int i = 0; i < test_shapes.size(); ++i) {
IntegerMulOpModel m({TensorType_INT32, test_shapes[i]},
{TensorType_INT32, {}}, // always a scalar
{TensorType_INT32, {}}, ActivationFunctionType_NONE);
m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 7, 8, 11, 20});
m.PopulateTensor<int32_t>(m.input2(), {1});
m.Invoke();
EXPECT_THAT(m.GetOutput(),
ElementsAreArray(ArrayFloatNear({-20, 2, 7, 8, 11, 20})))
<< "With shape number " << i;
}
}
TEST(QuantizedMulOpTest, NoActivation) {
QuantizedMulOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
{TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},

View File

@ -53,7 +53,6 @@ tensorflow::Env* env = tensorflow::Env::Default();
// Key is a substring of the test name and value is a bug number.
// TODO(ahentz): make sure we clean this list up frequently.
std::map<string, string> kBrokenTests = {
{R"(^\/mul.*int32)", "68808744"},
{R"(^\/div.*int32)", "68808744"},
{R"(^\/sub.*int32)", "68808744"},