Add fast math attributes to all generated methods when fast math enabled.

RELNOTES: n/a
PiperOrigin-RevId: 167646637
This commit is contained in:
A. Unique TensorFlower 2017-09-05 16:15:15 -07:00 committed by TensorFlower Gardener
parent aebe8cc6f4
commit db43fe68e0
3 changed files with 13 additions and 0 deletions

View File

@ -240,6 +240,13 @@ void IrEmitter::InitializeIrFunction(const string& function_name) {
compute_function_->addFnAttr(llvm::Attribute::OptimizeForSize); compute_function_->addFnAttr(llvm::Attribute::OptimizeForSize);
} }
if (hlo_module_config_.debug_options().xla_enable_fast_math()) {
compute_function_->addFnAttr("unsafe-fp-math", "true");
compute_function_->addFnAttr("no-infs-fp-math", "true");
compute_function_->addFnAttr("no-nans-fp-math", "true");
compute_function_->addFnAttr("no-signed-zeros-fp-math", "true");
}
ir_builder_.SetInsertPoint(llvm::BasicBlock::Create( ir_builder_.SetInsertPoint(llvm::BasicBlock::Create(
/*Context=*/module_->getContext(), /*Context=*/module_->getContext(),
/*Name=*/"entry", /*Name=*/"entry",

View File

@ -87,6 +87,9 @@ llvm::Function* IrEmitterNested::EmitBasePointersForNestedComputation(
} }
} }
// TODO(b/65380986): Investigate if adding fast math flags for generated
// kernels makes sense.
llvm::BasicBlock* entry_bb = llvm::BasicBlock* entry_bb =
llvm::BasicBlock::Create(function->getContext(), "entry", function); llvm::BasicBlock::Create(function->getContext(), "entry", function);
// Emit a "return void" at entry_bb's end, and sets the insert point before // Emit a "return void" at entry_bb's end, and sets the insert point before

View File

@ -201,6 +201,9 @@ llvm::Function* IrEmitterUnnested::BuildKernelPrototype(
} }
kernel->addAttribute(temp_buffer_arg_no + 1, llvm::Attribute::NoAlias); kernel->addAttribute(temp_buffer_arg_no + 1, llvm::Attribute::NoAlias);
// TODO(b/65380986): Investigate if adding fast math flags for generated
// kernels makes sense.
// Add the declaration of this kernel to llvm.nvvm.annotations so that NVPTX // Add the declaration of this kernel to llvm.nvvm.annotations so that NVPTX
// treats it as a CUDA kernel. // treats it as a CUDA kernel.
llvm::NamedMDNode* nvvm_annotations_node = llvm::NamedMDNode* nvvm_annotations_node =