TFLM: Fix double-promotion error.
Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1
This commit is contained in:
parent
2bfa43b081
commit
884ec0ff06
@ -35,7 +35,7 @@ struct ToString {
|
||||
template <>
|
||||
struct ToString<float, void> {
|
||||
static void Run(float value, char* buf) {
|
||||
snprintf(buf, kValueBufSize, "%.9g", value);
|
||||
snprintf(buf, kValueBufSize, "%.9g", static_cast<double>(value));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
load("//tensorflow/lite:build_def.bzl", "tflite_copts")
|
||||
load("//tensorflow/lite/micro:build_def.bzl", "cc_library")
|
||||
load("//tensorflow/lite/micro:build_def.bzl", "cc_library", "micro_copts")
|
||||
load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite_combined")
|
||||
load("//tensorflow:tensorflow.bzl", "tf_opts_nortti_if_android")
|
||||
|
||||
@ -373,7 +373,7 @@ cc_library(
|
||||
hdrs = [
|
||||
"kernel_util.h",
|
||||
],
|
||||
copts = tflite_copts(),
|
||||
copts = tflite_copts() + micro_copts(),
|
||||
deps = [
|
||||
"//tensorflow/lite/c:common",
|
||||
"//tensorflow/lite/kernels/internal:quantization_util",
|
||||
|
@ -1,6 +1,6 @@
|
||||
load("//tensorflow:tensorflow.bzl", "transitive_hdrs")
|
||||
load("//tensorflow/lite:build_def.bzl", "tflite_copts")
|
||||
load("//tensorflow/lite/micro:build_def.bzl", "cc_library")
|
||||
load("//tensorflow/lite/micro:build_def.bzl", "cc_library", "micro_copts")
|
||||
load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite_combined")
|
||||
|
||||
package(
|
||||
@ -353,7 +353,7 @@ cc_library(
|
||||
name = "quantization_util",
|
||||
srcs = ["quantization_util.cc"],
|
||||
hdrs = ["quantization_util.h"],
|
||||
copts = tflite_copts(),
|
||||
copts = tflite_copts() + micro_copts(),
|
||||
deps = [
|
||||
":compatibility",
|
||||
":round",
|
||||
@ -645,7 +645,7 @@ cc_library(
|
||||
name = "kernel_utils",
|
||||
srcs = ["kernel_utils.cc"],
|
||||
hdrs = ["kernel_utils.h"],
|
||||
copts = tflite_copts(),
|
||||
copts = tflite_copts() + micro_copts(),
|
||||
deps = [
|
||||
":tensor_utils",
|
||||
"//tensorflow/lite/c:common",
|
||||
|
@ -183,11 +183,11 @@ double DoubleFromFractionAndShift(int64_t fraction, int shift) {
|
||||
// Detect NaNs and infinities.
|
||||
if (shift == std::numeric_limits<int>::max()) {
|
||||
if (fraction == 0) {
|
||||
return NAN;
|
||||
return std::numeric_limits<double>::quiet_NaN();
|
||||
} else if (fraction > 0) {
|
||||
return INFINITY;
|
||||
return std::numeric_limits<double>::infinity();
|
||||
} else {
|
||||
return -INFINITY;
|
||||
return -std::numeric_limits<double>::infinity();
|
||||
}
|
||||
}
|
||||
|
||||
@ -229,7 +229,7 @@ double IntegerDoubleMultiply(double a, double b) {
|
||||
// Detect NaNs and infinities.
|
||||
if (a_shift == std::numeric_limits<int>::max() ||
|
||||
(b_shift == std::numeric_limits<int>::max())) {
|
||||
return NAN;
|
||||
return std::numeric_limits<double>::quiet_NaN();
|
||||
}
|
||||
const int result_shift = a_shift + b_shift + 1;
|
||||
const int64_t result_fraction = (a_fraction * b_fraction) >> 32;
|
||||
@ -379,7 +379,7 @@ bool CheckedLog2(const float x, int* log2_result) {
|
||||
const float x_log2_fracpart = x_log2 - x_log2_rounded;
|
||||
|
||||
*log2_result = static_cast<int>(x_log2_rounded);
|
||||
return std::abs(x_log2_fracpart) < 1e-3;
|
||||
return std::abs(x_log2_fracpart) < 1e-3f;
|
||||
}
|
||||
|
||||
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
|
||||
|
@ -36,7 +36,9 @@ inline void AffineQuantize(const tflite::QuantizationParams& op_params,
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
const float val = input_data[i];
|
||||
int32 unclamped = static_cast<int32>(TfLiteRound(val / scale)) + zero_point;
|
||||
int32 unclamped =
|
||||
static_cast<int32>(TfLiteRound(val / static_cast<float>(scale))) +
|
||||
zero_point;
|
||||
int32 clamped = std::min(std::max(unclamped, min_val), max_val);
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
|
@ -43,16 +43,20 @@ inline void Softmax(const SoftmaxParams& params,
|
||||
max = std::max(max, input_data[i * depth + c]);
|
||||
}
|
||||
|
||||
// TODO(b/148114827): Improve this code.
|
||||
// Compute sum.
|
||||
float sum = 0.f;
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
sum += std::exp((input_data[i * depth + c] - max) * params.beta);
|
||||
sum += std::exp(static_cast<double>(input_data[i * depth + c] - max) *
|
||||
params.beta);
|
||||
}
|
||||
|
||||
// Compute result.
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
output_data[i * depth + c] =
|
||||
std::exp((input_data[i * depth + c] - max) * params.beta) / sum;
|
||||
std::exp(static_cast<double>(input_data[i * depth + c] - max) *
|
||||
params.beta) /
|
||||
static_cast<double>(sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -118,11 +118,12 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
|
||||
const TfLiteTensor* bias,
|
||||
TfLiteTensor* output,
|
||||
double* multiplier) {
|
||||
const double input_product_scale = input->params.scale * filter->params.scale;
|
||||
const double input_product_scale = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(filter->params.scale);
|
||||
// TODO(ahentz): The following conditions must be guaranteed by the training
|
||||
// pipeline.
|
||||
if (bias) {
|
||||
const double bias_scale = bias->params.scale;
|
||||
const double bias_scale = static_cast<double>(bias->params.scale);
|
||||
TF_LITE_ENSURE(context,
|
||||
std::abs(input_product_scale - bias_scale) <=
|
||||
1e-6 * std::min(input_product_scale, bias_scale));
|
||||
@ -136,9 +137,10 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
|
||||
const TfLiteTensor* filter,
|
||||
TfLiteTensor* output,
|
||||
double* multiplier) {
|
||||
const double input_product_scale = input->params.scale * filter->params.scale;
|
||||
const double input_product_scale = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(filter->params.scale);
|
||||
TF_LITE_ENSURE(context, input_product_scale >= 0);
|
||||
*multiplier = input_product_scale / output->params.scale;
|
||||
*multiplier = input_product_scale / static_cast<double>(output->params.scale);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
@ -10,10 +10,9 @@ load(
|
||||
def micro_copts():
|
||||
# TODO(b/139024129): include the followings as well:
|
||||
# -Wmissing-field-initializers
|
||||
# -Wdouble-promotion
|
||||
# -Wunused-const-variable
|
||||
# -Wshadow
|
||||
copts = ["-Werror", "-Wsign-compare"]
|
||||
copts = ["-Werror", "-Wsign-compare", "-Wdouble-promotion"]
|
||||
return copts
|
||||
|
||||
def cc_library(**kwargs):
|
||||
|
@ -18,5 +18,7 @@ limitations under the License.
|
||||
void HandleOutput(tflite::ErrorReporter* error_reporter, float x_value,
|
||||
float y_value) {
|
||||
// Log the current X and Y values
|
||||
error_reporter->Report("x_value: %f, y_value: %f\n", x_value, y_value);
|
||||
error_reporter->Report("x_value: %f, y_value: %f\n",
|
||||
static_cast<double>(x_value),
|
||||
static_cast<double>(y_value));
|
||||
}
|
||||
|
@ -77,14 +77,15 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
data->output_offset = output->params.zero_point;
|
||||
data->left_shift = 20;
|
||||
const double twice_max_input_scale =
|
||||
2 * std::max(input1->params.scale, input2->params.scale);
|
||||
2 * static_cast<double>(
|
||||
std::max(input1->params.scale, input2->params.scale));
|
||||
const double real_input1_multiplier =
|
||||
input1->params.scale / twice_max_input_scale;
|
||||
static_cast<double>(input1->params.scale) / twice_max_input_scale;
|
||||
const double real_input2_multiplier =
|
||||
input2->params.scale / twice_max_input_scale;
|
||||
static_cast<double>(input2->params.scale) / twice_max_input_scale;
|
||||
const double real_output_multiplier =
|
||||
twice_max_input_scale /
|
||||
((1 << data->left_shift) * output->params.scale);
|
||||
((1 << data->left_shift) * static_cast<double>(output->params.scale));
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
|
||||
|
@ -43,12 +43,14 @@ constexpr int kOutputTensor = 0;
|
||||
\
|
||||
int32 input1_multiplier; \
|
||||
int input1_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp(input1->params.scale, \
|
||||
&input1_multiplier, &input1_shift); \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input1->params.scale), &input1_multiplier, \
|
||||
&input1_shift); \
|
||||
int32 input2_multiplier; \
|
||||
int input2_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp(input2->params.scale, \
|
||||
&input2_multiplier, &input2_shift); \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input2->params.scale), &input2_multiplier, \
|
||||
&input2_shift); \
|
||||
\
|
||||
ComparisonParams op_params; \
|
||||
op_params.left_shift = left_shift; \
|
||||
|
@ -46,7 +46,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
tflite::DequantizationParams op_params;
|
||||
op_params.zero_point = input->params.zero_point;
|
||||
op_params.scale = input->params.scale;
|
||||
op_params.scale = static_cast<double>(input->params.scale);
|
||||
switch (input->type) {
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::Dequantize(
|
||||
|
@ -55,8 +55,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
&data->output_activation_max));
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
double real_multiplier =
|
||||
input1->params.scale * input2->params.scale / output->params.scale;
|
||||
double real_multiplier = static_cast<double>(input1->params.scale) *
|
||||
static_cast<double>(input2->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
@ -152,8 +152,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
|
||||
op_context.constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.scale,
|
||||
op_context.constant_values->params.scale);
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context, static_cast<double>(op_context.output->params.scale),
|
||||
static_cast<double>(op_context.constant_values->params.scale));
|
||||
pad_value = *GetTensorData<uint8_t>(op_context.constant_values);
|
||||
}
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
|
@ -53,7 +53,7 @@ inline void BroadcastPrelu4DSlowFloat(
|
||||
auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
|
||||
auto in1_val = input1_data[in1_idx];
|
||||
auto in2_val = input2_data[in2_idx];
|
||||
output_data[out_idx] = in1_val >= 0.0 ? in1_val : in1_val * in2_val;
|
||||
output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -67,8 +67,9 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
int32_t output_multiplier = 0;
|
||||
int output_shift = 0;
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
|
||||
double real_multiplier =
|
||||
input->params.scale * alpha->params.scale / output->params.scale;
|
||||
double real_multiplier = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
|
||||
&output_shift);
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
tflite::QuantizationParams op_params;
|
||||
op_params.zero_point = output->params.zero_point;
|
||||
op_params.scale = output->params.scale;
|
||||
op_params.scale = static_cast<double>(output->params.scale);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
|
@ -53,7 +53,8 @@ TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context,
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
params->beta, input->params.scale, kScaledDiffIntegerBits,
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&data->input_multiplier, &data->input_left_shift);
|
||||
data->diff_min = -1.0 * tflite::CalculateInputRadius(
|
||||
kScaledDiffIntegerBits, data->input_left_shift);
|
||||
@ -143,7 +144,7 @@ void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
TfLiteSoftmaxParams* params) {
|
||||
SoftmaxParams op_params;
|
||||
op_params.beta = params->beta;
|
||||
op_params.beta = static_cast<double>(params->beta);
|
||||
tflite::reference_ops::Softmax(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
|
@ -526,12 +526,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
output->quantization.params);
|
||||
const double effective_scale_1 =
|
||||
input_params->scale->data[0] *
|
||||
weights_feature_params->scale->data[0] /
|
||||
state_params->scale->data[0];
|
||||
const double effective_scale_2 = state_params->scale->data[0] *
|
||||
weight_time_params->scale->data[0] /
|
||||
output_params->scale->data[0];
|
||||
static_cast<double>(input_params->scale->data[0] *
|
||||
weights_feature_params->scale->data[0] /
|
||||
state_params->scale->data[0]);
|
||||
const double effective_scale_2 = static_cast<double>(
|
||||
state_params->scale->data[0] * weight_time_params->scale->data[0] /
|
||||
output_params->scale->data[0]);
|
||||
QuantizeMultiplier(effective_scale_1, &op_data.effective_scale_1_a,
|
||||
&op_data.effective_scale_1_b);
|
||||
QuantizeMultiplier(effective_scale_2, &op_data.effective_scale_2_a,
|
||||
|
@ -54,6 +54,7 @@ $(MAKEFILE_DIR)/downloads/$(AM_SDK_DEST)/$(SF_BSPS_DEST): $(MAKEFILE_DIR)/downlo
|
||||
-Wall \
|
||||
-Wextra \
|
||||
-Wsign-compare \
|
||||
-Wdouble-promotion \
|
||||
-Wno-unused-parameter \
|
||||
-Wno-missing-field-initializers \
|
||||
-Wno-write-strings \
|
||||
|
@ -28,6 +28,7 @@ ifeq ($(TARGET), bluepill)
|
||||
-Wall \
|
||||
-Wextra \
|
||||
-Wsign-compare \
|
||||
-Wdouble-promotion \
|
||||
-Wno-unused-parameter \
|
||||
-Wno-missing-field-initializers \
|
||||
-Wno-write-strings \
|
||||
|
@ -41,6 +41,7 @@ ifeq ($(TARGET), ecm3531)
|
||||
-Wall \
|
||||
-Wextra \
|
||||
-Wsign-compare \
|
||||
-Wdouble-promotion \
|
||||
-Wno-unused-parameter \
|
||||
-Wno-missing-field-initializers \
|
||||
-Wno-write-strings \
|
||||
|
@ -24,6 +24,7 @@ ifeq ($(TARGET), riscv32_mcu)
|
||||
-Wall \
|
||||
-Wextra \
|
||||
-Wsign-compare \
|
||||
-Wdouble-promotion \
|
||||
-Wno-unused-parameter \
|
||||
-Wno-missing-field-initializers \
|
||||
-Wno-write-strings \
|
||||
|
Loading…
Reference in New Issue
Block a user