Tweak round_to_bfloat16 to make it vectorizable.
This simplifies control flow by handling positive and negative denormals separately. Should be ~40% faster. PiperOrigin-RevId: 312095390 Change-Id: I5b6388e48b8c217edb0fc4fe14c3add64fb52c65
This commit is contained in:
parent
9c49cda7d9
commit
cfdb943405
@ -194,17 +194,6 @@ struct bfloat16 {
|
||||
input = f.u;
|
||||
bfloat16 output;
|
||||
|
||||
if (float_isnan(v)) {
|
||||
// If the value is a NaN, squash it to a qNaN with msb of fraction set,
|
||||
// this makes sure after truncation we don't end up with an inf.
|
||||
//
|
||||
// qNaN magic: All exponent bits set + most significant bit of fraction
|
||||
// set.
|
||||
output.value = 0x7fc0;
|
||||
} else if (std::fabs(v) < std::numeric_limits<float>::min()) {
|
||||
// Flush denormal to +/- 0.0
|
||||
output.value = std::signbit(v) ? 0x8000 : 0;
|
||||
} else {
|
||||
// Fast rounding algorithm that rounds a half value to nearest even. This
|
||||
// reduces expected error when we convert a large number of floats. Here
|
||||
// is how it works:
|
||||
@ -359,6 +348,16 @@ struct bfloat16 {
|
||||
uint32_t rounding_bias = 0x7fff + lsb;
|
||||
input += rounding_bias;
|
||||
output.value = static_cast<uint16_t>(input >> 16);
|
||||
if ((f.u & 0xff800000u) == 0) {
|
||||
// Flush positive denormal to 0
|
||||
output.value = 0x0;
|
||||
}
|
||||
if ((f.u & 0xff800000u) == 0x80000000u) {
|
||||
// Flush negative denormal to -0
|
||||
output.value = 0x8000;
|
||||
}
|
||||
if (float_isnan(v)) {
|
||||
output.value = NAN_VALUE;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user