Fix implementation of quantized ReluX.
PiperOrigin-RevId: 277208160 Change-Id: I6f92a34cd07f7451baf6fe723f2001db647d00ac
This commit is contained in:
parent
f0570083f5
commit
d21270193b
@ -87,6 +87,11 @@ struct HardSwishData {
|
||||
HardSwishParams params;
|
||||
};
|
||||
|
||||
struct ReluOpData : public OpData {
|
||||
int32_t output_multiplier = 0;
|
||||
int output_shift = 0;
|
||||
};
|
||||
|
||||
namespace {
|
||||
TfLiteStatus CheckOutputQuantParams(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
@ -136,8 +141,8 @@ void EvalUsingLookupTable(struct OpData* data, const TfLiteTensor* input,
|
||||
|
||||
template <typename T>
|
||||
void QuantizedReluX(float act_min, float act_max, const TfLiteTensor* input,
|
||||
TfLiteTensor* output) {
|
||||
ActivationParams params;
|
||||
TfLiteTensor* output, const ReluOpData* data) {
|
||||
ReluParams params;
|
||||
params.quantized_activation_min =
|
||||
std::max(static_cast<int32_t>(std::numeric_limits<T>::min()),
|
||||
output->params.zero_point +
|
||||
@ -149,6 +154,10 @@ void QuantizedReluX(float act_min, float act_max, const TfLiteTensor* input,
|
||||
static_cast<int32_t>(std::numeric_limits<T>::max()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32>(roundf(act_max / output->params.scale)));
|
||||
params.input_offset = input->params.zero_point;
|
||||
params.output_offset = output->params.zero_point;
|
||||
params.output_multiplier = data->output_multiplier;
|
||||
params.output_shift = data->output_shift;
|
||||
optimized_ops::ReluX(params, GetTensorShape(input), GetTensorData<T>(input),
|
||||
GetTensorShape(output), GetTensorData<T>(output));
|
||||
}
|
||||
@ -206,6 +215,32 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteIntArrayCopy(input->dims));
|
||||
}
|
||||
|
||||
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
return new ReluOpData;
|
||||
}
|
||||
|
||||
void ReluFree(TfLiteContext* context, void* buffer) {
|
||||
delete reinterpret_cast<ReluOpData*>(buffer);
|
||||
}
|
||||
|
||||
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
double real_multiplier = input->params.scale / output->params.scale;
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
return context->ResizeTensor(context, output,
|
||||
TfLiteIntArrayCopy(input->dims));
|
||||
}
|
||||
|
||||
void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
return new LeakyReluOpData;
|
||||
}
|
||||
@ -557,6 +592,7 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
optimized_ops::Relu(GetTensorShape(input), GetTensorData<float>(input),
|
||||
@ -566,11 +602,11 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
// the unbounded upper limit is actually hard to quantize.
|
||||
case kTfLiteUInt8: {
|
||||
QuantizedReluX<uint8_t>(0.0f, std::numeric_limits<float>::infinity(),
|
||||
input, output);
|
||||
input, output, data);
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
QuantizedReluX<int8_t>(0.0f, std::numeric_limits<float>::infinity(),
|
||||
input, output);
|
||||
input, output, data);
|
||||
} break;
|
||||
default:
|
||||
context->ReportError(
|
||||
@ -584,6 +620,7 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteStatus Relu1Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
optimized_ops::Relu1(GetTensorShape(input), GetTensorData<float>(input),
|
||||
@ -592,11 +629,11 @@ TfLiteStatus Relu1Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
QuantizedReluX<uint8_t>(-1.0f, 1.0f, input, output);
|
||||
QuantizedReluX<uint8_t>(-1.0f, 1.0f, input, output, data);
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
QuantizedReluX<int8_t>(-1, 1, input, output);
|
||||
QuantizedReluX<int8_t>(-1, 1, input, output, data);
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
@ -665,6 +702,7 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
ReluOpData* data = reinterpret_cast<ReluOpData*>(node->user_data);
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
size_t elements = input->bytes / sizeof(float);
|
||||
@ -675,10 +713,10 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteUInt8:
|
||||
QuantizedReluX<uint8_t>(0.0f, 6.0f, input, output);
|
||||
QuantizedReluX<uint8_t>(0.0f, 6.0f, input, output, data);
|
||||
return kTfLiteOk;
|
||||
case kTfLiteInt8: {
|
||||
QuantizedReluX<int8_t>(0.0f, 6.0f, input, output);
|
||||
QuantizedReluX<int8_t>(0.0f, 6.0f, input, output, data);
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
@ -1074,22 +1112,22 @@ TfLiteRegistration* Register_ELU() {
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RELU() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
|
||||
activations::GenericPrepare,
|
||||
static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
|
||||
activations::ReluPrepare,
|
||||
activations::ReluEval};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RELU_N1_TO_1() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
|
||||
activations::GenericPrepare,
|
||||
static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
|
||||
activations::ReluPrepare,
|
||||
activations::Relu1Eval};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RELU6() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
|
||||
activations::GenericPrepare,
|
||||
static TfLiteRegistration r = {activations::ReluInit, activations::ReluFree,
|
||||
activations::ReluPrepare,
|
||||
activations::Relu6Eval};
|
||||
return &r;
|
||||
}
|
||||
|
@ -213,6 +213,24 @@ inline void Relu6(const RuntimeShape& input_shape, const float* input_data,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void ReluX(const tflite::ReluParams& params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
gemmlowp::ScopedProfilingLabel label("Quantized ReluX (not fused)");
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const int32 val = static_cast<int32_t>(input_data[i]);
|
||||
int32 clamped = params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(val - params.input_offset,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
clamped = std::max(params.quantized_activation_min, clamped);
|
||||
clamped = std::min(params.quantized_activation_max, clamped);
|
||||
output_data[i] = static_cast<T>(clamped);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void ReluX(const tflite::ActivationParams& params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
|
@ -734,6 +734,13 @@ struct ActivationParams {
|
||||
int32 quantized_activation_max;
|
||||
};
|
||||
|
||||
struct ReluParams : public ActivationParams {
|
||||
int32 input_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
int32 output_shift;
|
||||
};
|
||||
|
||||
// Styles of resizing op usages. For example, kImageStyle can be used with a Pad
|
||||
// op for pattern-specific optimization.
|
||||
enum class ResizingCategory : uint8 {
|
||||
|
@ -493,6 +493,7 @@ edgetpu_ops = [
|
||||
"max_pool",
|
||||
"mul",
|
||||
"pad", # high error
|
||||
"relu6",
|
||||
"reshape",
|
||||
"resize_bilinear",
|
||||
"sigmoid",
|
||||
|
@ -32,6 +32,8 @@ def make_relu6_tests(options):
|
||||
test_parameters = [{
|
||||
"input_shape": [[], [1, 1, 1, 1], [1, 3, 4, 3], [3, 15, 14, 3],
|
||||
[3, 1, 2, 4, 6], [2, 2, 3, 4, 5, 6]],
|
||||
"fully_quantize": [True, False],
|
||||
"input_range": [(-2, 8)]
|
||||
}]
|
||||
|
||||
def build_graph(parameters):
|
||||
@ -41,8 +43,9 @@ def make_relu6_tests(options):
|
||||
return [input_tensor], [out]
|
||||
|
||||
def build_inputs(parameters, sess, inputs, outputs):
|
||||
input_values = create_tensor_data(
|
||||
np.float32, parameters["input_shape"], min_value=-3, max_value=10)
|
||||
min_value, max_value = parameters["input_range"]
|
||||
input_values = create_tensor_data(np.float32, parameters["input_shape"],
|
||||
min_value, max_value)
|
||||
return [input_values], sess.run(
|
||||
outputs, feed_dict=dict(zip(inputs, [input_values])))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user