Fix implementation of quantized ReluX.
PiperOrigin-RevId: 277208160 Change-Id: I6f92a34cd07f7451baf6fe723f2001db647d00ac
This commit is contained in:
parent
f0570083f5
commit
d21270193b
@ -87,6 +87,11 @@ struct HardSwishData {
|
|||||||
HardSwishParams params;
|
HardSwishParams params;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ReluOpData : public OpData {
|
||||||
|
int32_t output_multiplier = 0;
|
||||||
|
int output_shift = 0;
|
||||||
|
};
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
TfLiteStatus CheckOutputQuantParams(TfLiteContext* context,
|
TfLiteStatus CheckOutputQuantParams(TfLiteContext* context,
|
||||||
const TfLiteTensor* input,
|
const TfLiteTensor* input,
|
||||||
@ -136,8 +141,8 @@ void EvalUsingLookupTable(struct OpData* data, const TfLiteTensor* input,
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void QuantizedReluX(float act_min, float act_max, const TfLiteTensor* input,
|
void QuantizedReluX(float act_min, float act_max, const TfLiteTensor* input,
|
||||||
TfLiteTensor* output) {
|
TfLiteTensor* output, const ReluOpData* data) {
|
||||||
ActivationParams params;
|
ReluParams params;
|
||||||
params.quantized_activation_min =
|
params.quantized_activation_min =
|
||||||
std::max(static_cast<int32_t>(std::numeric_limits<T>::min()),
|
std::max(static_cast<int32_t>(std::numeric_limits<T>::min()),
|
||||||
output->params.zero_point +
|
output->params.zero_point +
|
||||||
@ -149,6 +154,10 @@ void QuantizedReluX(float act_min, float act_max, const TfLiteTensor* input,
|
|||||||
static_cast<int32_t>(std::numeric_limits<T>::max()),
|
static_cast<int32_t>(std::numeric_limits<T>::max()),
|
||||||
output->params.zero_point +
|
output->params.zero_point +
|
||||||
static_cast<int32>(roundf(act_max / output->params.scale)));
|
static_cast<int32>(roundf(act_max / output->params.scale)));
|
||||||
|
params.input_offset = input->params.zero_point;
|
||||||
|
params.output_offset = output->params.zero_point;
|
||||||
|
params.output_multiplier = data->output_multiplier;
|
||||||
|
params.output_shift = data->output_shift;
|
||||||
optimized_ops::ReluX(params, GetTensorShape(input), GetTensorData<T>(input),
|
optimized_ops::ReluX(params, GetTensorShape(input), GetTensorData<T>(input),
|
||||||
GetTensorShape(output), GetTensorData<T>(output));
|
GetTensorShape(output), GetTensorData<T>(output));
|
||||||
}
|
}
|
||||||
@ -206,6 +215,32 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
TfLiteIntArrayCopy(input->dims));
|
TfLiteIntArrayCopy(input->dims));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||||
|
return new ReluOpData;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReluFree(TfLiteContext* context, void* buffer) {
|
||||||
|
delete reinterpret_cast<ReluOpData*>(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||||
|
ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
|
||||||
|
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||||
|
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||||
|
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||||
|
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||||
|
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||||
|
|
||||||
|
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||||
|
double real_multiplier = input->params.scale / output->params.scale;
|
||||||
|
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||||
|
&data->output_shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
return context->ResizeTensor(context, output,
|
||||||
|
TfLiteIntArrayCopy(input->dims));
|
||||||
|
}
|
||||||
|
|
||||||
void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||||
return new LeakyReluOpData;
|
return new LeakyReluOpData;
|
||||||
}
|
}
|
||||||
@ -557,6 +592,7 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||||
|
const ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
|
||||||
switch (input->type) {
|
switch (input->type) {
|
||||||
case kTfLiteFloat32: {
|
case kTfLiteFloat32: {
|
||||||
optimized_ops::Relu(GetTensorShape(input), GetTensorData<float>(input),
|
optimized_ops::Relu(GetTensorShape(input), GetTensorData<float>(input),
|
||||||
@ -566,11 +602,11 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
// the unbounded upper limit is actually hard to quantize.
|
// the unbounded upper limit is actually hard to quantize.
|
||||||
case kTfLiteUInt8: {
|
case kTfLiteUInt8: {
|
||||||
QuantizedReluX<uint8_t>(0.0f, std::numeric_limits<float>::infinity(),
|
QuantizedReluX<uint8_t>(0.0f, std::numeric_limits<float>::infinity(),
|
||||||
input, output);
|
input, output, data);
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteInt8: {
|
case kTfLiteInt8: {
|
||||||
QuantizedReluX<int8_t>(0.0f, std::numeric_limits<float>::infinity(),
|
QuantizedReluX<int8_t>(0.0f, std::numeric_limits<float>::infinity(),
|
||||||
input, output);
|
input, output, data);
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
context->ReportError(
|
context->ReportError(
|
||||||
@ -584,6 +620,7 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
TfLiteStatus Relu1Eval(TfLiteContext* context, TfLiteNode* node) {
|
TfLiteStatus Relu1Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||||
|
const ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
|
||||||
switch (input->type) {
|
switch (input->type) {
|
||||||
case kTfLiteFloat32: {
|
case kTfLiteFloat32: {
|
||||||
optimized_ops::Relu1(GetTensorShape(input), GetTensorData<float>(input),
|
optimized_ops::Relu1(GetTensorShape(input), GetTensorData<float>(input),
|
||||||
@ -592,11 +629,11 @@ TfLiteStatus Relu1Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteUInt8: {
|
case kTfLiteUInt8: {
|
||||||
QuantizedReluX<uint8_t>(-1.0f, 1.0f, input, output);
|
QuantizedReluX<uint8_t>(-1.0f, 1.0f, input, output, data);
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteInt8: {
|
case kTfLiteInt8: {
|
||||||
QuantizedReluX<int8_t>(-1, 1, input, output);
|
QuantizedReluX<int8_t>(-1, 1, input, output, data);
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
@ -665,6 +702,7 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||||
|
ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
|
||||||
switch (input->type) {
|
switch (input->type) {
|
||||||
case kTfLiteFloat32: {
|
case kTfLiteFloat32: {
|
||||||
size_t elements = input->bytes / sizeof(float);
|
size_t elements = input->bytes / sizeof(float);
|
||||||
@ -675,10 +713,10 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteUInt8:
|
case kTfLiteUInt8:
|
||||||
QuantizedReluX<uint8_t>(0.0f, 6.0f, input, output);
|
QuantizedReluX<uint8_t>(0.0f, 6.0f, input, output, data);
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
case kTfLiteInt8: {
|
case kTfLiteInt8: {
|
||||||
QuantizedReluX<int8_t>(0.0f, 6.0f, input, output);
|
QuantizedReluX<int8_t>(0.0f, 6.0f, input, output, data);
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
@ -1074,22 +1112,22 @@ TfLiteRegistration* Register_ELU() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TfLiteRegistration* Register_RELU() {
|
TfLiteRegistration* Register_RELU() {
|
||||||
static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
|
static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
|
||||||
activations::GenericPrepare,
|
activations::ReluPrepare,
|
||||||
activations::ReluEval};
|
activations::ReluEval};
|
||||||
return &r;
|
return &r;
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteRegistration* Register_RELU_N1_TO_1() {
|
TfLiteRegistration* Register_RELU_N1_TO_1() {
|
||||||
static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
|
static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
|
||||||
activations::GenericPrepare,
|
activations::ReluPrepare,
|
||||||
activations::Relu1Eval};
|
activations::Relu1Eval};
|
||||||
return &r;
|
return &r;
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteRegistration* Register_RELU6() {
|
TfLiteRegistration* Register_RELU6() {
|
||||||
static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
|
static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
|
||||||
activations::GenericPrepare,
|
activations::ReluPrepare,
|
||||||
activations::Relu6Eval};
|
activations::Relu6Eval};
|
||||||
return &r;
|
return &r;
|
||||||
}
|
}
|
||||||
|
@ -213,6 +213,24 @@ inline void Relu6(const RuntimeShape& input_shape, const float* input_data,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline void ReluX(const tflite::ReluParams& params,
|
||||||
|
const RuntimeShape& input_shape, const T* input_data,
|
||||||
|
const RuntimeShape& output_shape, T* output_data) {
|
||||||
|
gemmlowp::ScopedProfilingLabel label("Quantized ReluX (not fused)");
|
||||||
|
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||||
|
for (int i = 0; i < flat_size; ++i) {
|
||||||
|
const int32 val = static_cast<int32_t>(input_data[i]);
|
||||||
|
int32 clamped = params.output_offset +
|
||||||
|
MultiplyByQuantizedMultiplier(val - params.input_offset,
|
||||||
|
params.output_multiplier,
|
||||||
|
params.output_shift);
|
||||||
|
clamped = std::max(params.quantized_activation_min, clamped);
|
||||||
|
clamped = std::min(params.quantized_activation_max, clamped);
|
||||||
|
output_data[i] = static_cast<T>(clamped);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void ReluX(const tflite::ActivationParams& params,
|
inline void ReluX(const tflite::ActivationParams& params,
|
||||||
const RuntimeShape& input_shape, const T* input_data,
|
const RuntimeShape& input_shape, const T* input_data,
|
||||||
|
@ -734,6 +734,13 @@ struct ActivationParams {
|
|||||||
int32 quantized_activation_max;
|
int32 quantized_activation_max;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ReluParams : public ActivationParams {
|
||||||
|
int32 input_offset;
|
||||||
|
int32 output_offset;
|
||||||
|
int32 output_multiplier;
|
||||||
|
int32 output_shift;
|
||||||
|
};
|
||||||
|
|
||||||
// Styles of resizing op usages. For example, kImageStyle can be used with a Pad
|
// Styles of resizing op usages. For example, kImageStyle can be used with a Pad
|
||||||
// op for pattern-specific optimization.
|
// op for pattern-specific optimization.
|
||||||
enum class ResizingCategory : uint8 {
|
enum class ResizingCategory : uint8 {
|
||||||
|
@ -493,6 +493,7 @@ edgetpu_ops = [
|
|||||||
"max_pool",
|
"max_pool",
|
||||||
"mul",
|
"mul",
|
||||||
"pad", # high error
|
"pad", # high error
|
||||||
|
"relu6",
|
||||||
"reshape",
|
"reshape",
|
||||||
"resize_bilinear",
|
"resize_bilinear",
|
||||||
"sigmoid",
|
"sigmoid",
|
||||||
|
@ -32,6 +32,8 @@ def make_relu6_tests(options):
|
|||||||
test_parameters = [{
|
test_parameters = [{
|
||||||
"input_shape": [[], [1, 1, 1, 1], [1, 3, 4, 3], [3, 15, 14, 3],
|
"input_shape": [[], [1, 1, 1, 1], [1, 3, 4, 3], [3, 15, 14, 3],
|
||||||
[3, 1, 2, 4, 6], [2, 2, 3, 4, 5, 6]],
|
[3, 1, 2, 4, 6], [2, 2, 3, 4, 5, 6]],
|
||||||
|
"fully_quantize": [True, False],
|
||||||
|
"input_range": [(-2, 8)]
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def build_graph(parameters):
|
def build_graph(parameters):
|
||||||
@ -41,8 +43,9 @@ def make_relu6_tests(options):
|
|||||||
return [input_tensor], [out]
|
return [input_tensor], [out]
|
||||||
|
|
||||||
def build_inputs(parameters, sess, inputs, outputs):
|
def build_inputs(parameters, sess, inputs, outputs):
|
||||||
input_values = create_tensor_data(
|
min_value, max_value = parameters["input_range"]
|
||||||
np.float32, parameters["input_shape"], min_value=-3, max_value=10)
|
input_values = create_tensor_data(np.float32, parameters["input_shape"],
|
||||||
|
min_value, max_value)
|
||||||
return [input_values], sess.run(
|
return [input_values], sess.run(
|
||||||
outputs, feed_dict=dict(zip(inputs, [input_values])))
|
outputs, feed_dict=dict(zip(inputs, [input_values])))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user