Tweak round_to_bfloat16 to make it vectorizable.

This simplifies control flow by handling positive and
negative denormals separately. Should be ~40% faster.

PiperOrigin-RevId: 312095390
Change-Id: I5b6388e48b8c217edb0fc4fe14c3add64fb52c65
This commit is contained in:
Ilya Tokar 2020-05-18 09:35:23 -07:00 committed by TensorFlower Gardener
parent 9c49cda7d9
commit cfdb943405

View File

@ -194,17 +194,6 @@ struct bfloat16 {
input = f.u; input = f.u;
bfloat16 output; bfloat16 output;
if (float_isnan(v)) {
// If the value is a NaN, squash it to a qNaN with msb of fraction set,
// this makes sure after truncation we don't end up with an inf.
//
// qNaN magic: All exponent bits set + most significant bit of fraction
// set.
output.value = 0x7fc0;
} else if (std::fabs(v) < std::numeric_limits<float>::min()) {
// Flush denormal to +/- 0.0
output.value = std::signbit(v) ? 0x8000 : 0;
} else {
// Fast rounding algorithm that rounds a half value to nearest even. This // Fast rounding algorithm that rounds a half value to nearest even. This
// reduces expected error when we convert a large number of floats. Here // reduces expected error when we convert a large number of floats. Here
// is how it works: // is how it works:
@ -359,6 +348,16 @@ struct bfloat16 {
uint32_t rounding_bias = 0x7fff + lsb; uint32_t rounding_bias = 0x7fff + lsb;
input += rounding_bias; input += rounding_bias;
output.value = static_cast<uint16_t>(input >> 16); output.value = static_cast<uint16_t>(input >> 16);
if ((f.u & 0xff800000u) == 0) {
// Flush positive denormal to 0
output.value = 0x0;
}
if ((f.u & 0xff800000u) == 0x80000000u) {
// Flush negative denormal to -0
output.value = 0x8000;
}
if (float_isnan(v)) {
output.value = NAN_VALUE;
} }
return output; return output;
} }