Add fast math attributes to all generated methods when fast math enabled.

RELNOTES: n/a PiperOrigin-RevId: 167646637
2017-09-05 16:15:15 -07:00 · 2017-09-05 16:15:15 -07:00 · db43fe68e0
commit db43fe68e0
parent aebe8cc6f4
3 changed files with 13 additions and 0 deletions
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@ -240,6 +240,13 @@ void IrEmitter::InitializeIrFunction(const string& function_name) {
    compute_function_->addFnAttr(llvm::Attribute::OptimizeForSize);
  }
  if (hlo_module_config_.debug_options().xla_enable_fast_math()) {
    compute_function_->addFnAttr("unsafe-fp-math", "true");
    compute_function_->addFnAttr("no-infs-fp-math", "true");
    compute_function_->addFnAttr("no-nans-fp-math", "true");
    compute_function_->addFnAttr("no-signed-zeros-fp-math", "true");
  }
  ir_builder_.SetInsertPoint(llvm::BasicBlock::Create(
      /*Context=*/module_->getContext(),
      /*Name=*/"entry",
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc
@ -87,6 +87,9 @@ llvm::Function* IrEmitterNested::EmitBasePointersForNestedComputation(
    }
  }
  // TODO(b/65380986): Investigate if adding fast math flags for generated
  // kernels makes sense.
  llvm::BasicBlock* entry_bb =
      llvm::BasicBlock::Create(function->getContext(), "entry", function);
  // Emit a "return void" at entry_bb's end, and sets the insert point before
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@ -201,6 +201,9 @@ llvm::Function* IrEmitterUnnested::BuildKernelPrototype(
  }
  kernel->addAttribute(temp_buffer_arg_no + 1, llvm::Attribute::NoAlias);
  // TODO(b/65380986): Investigate if adding fast math flags for generated
  // kernels makes sense.
  // Add the declaration of this kernel to llvm.nvvm.annotations so that NVPTX
  // treats it as a CUDA kernel.
  llvm::NamedMDNode* nvvm_annotations_node =