Tweak round_to_bfloat16 to make it vectorizable.

This simplifies control flow by handling positive and negative denormals separately. Should be ~40% faster. PiperOrigin-RevId: 312095390 Change-Id: I5b6388e48b8c217edb0fc4fe14c3add64fb52c65
2020-05-18 09:35:23 -07:00 · 2020-05-18 09:35:23 -07:00 · cfdb943405
commit cfdb943405
parent 9c49cda7d9
1 changed files with 163 additions and 164 deletions
--- a/tensorflow/core/lib/bfloat16/bfloat16.h
+++ b/tensorflow/core/lib/bfloat16/bfloat16.h
@ -194,17 +194,6 @@ struct bfloat16 {
    input = f.u;
    bfloat16 output;
    if (float_isnan(v)) {
      // If the value is a NaN, squash it to a qNaN with msb of fraction set,
      // this makes sure after truncation we don't end up with an inf.
      //
      // qNaN magic: All exponent bits set + most significant bit of fraction
      // set.
      output.value = 0x7fc0;
    } else if (std::fabs(v) < std::numeric_limits<float>::min()) {
      // Flush denormal to +/- 0.0
      output.value = std::signbit(v) ? 0x8000 : 0;
    } else {
    // Fast rounding algorithm that rounds a half value to nearest even. This
    // reduces expected error when we convert a large number of floats. Here
    // is how it works:
@ -359,6 +348,16 @@ struct bfloat16 {
    uint32_t rounding_bias = 0x7fff + lsb;
    input += rounding_bias;
    output.value = static_cast<uint16_t>(input >> 16);
    if ((f.u & 0xff800000u) == 0) {
      // Flush positive denormal to 0
      output.value = 0x0;
    }
    if ((f.u & 0xff800000u) == 0x80000000u) {
      // Flush negative denormal to -0
      output.value = 0x8000;
    }
    if (float_isnan(v)) {
      output.value = NAN_VALUE;
    }
    return output;
  }