Add fast math attributes to all generated methods when fast math enabled.
RELNOTES: n/a PiperOrigin-RevId: 167646637
This commit is contained in:
parent
aebe8cc6f4
commit
db43fe68e0
@ -240,6 +240,13 @@ void IrEmitter::InitializeIrFunction(const string& function_name) {
|
|||||||
compute_function_->addFnAttr(llvm::Attribute::OptimizeForSize);
|
compute_function_->addFnAttr(llvm::Attribute::OptimizeForSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (hlo_module_config_.debug_options().xla_enable_fast_math()) {
|
||||||
|
compute_function_->addFnAttr("unsafe-fp-math", "true");
|
||||||
|
compute_function_->addFnAttr("no-infs-fp-math", "true");
|
||||||
|
compute_function_->addFnAttr("no-nans-fp-math", "true");
|
||||||
|
compute_function_->addFnAttr("no-signed-zeros-fp-math", "true");
|
||||||
|
}
|
||||||
|
|
||||||
ir_builder_.SetInsertPoint(llvm::BasicBlock::Create(
|
ir_builder_.SetInsertPoint(llvm::BasicBlock::Create(
|
||||||
/*Context=*/module_->getContext(),
|
/*Context=*/module_->getContext(),
|
||||||
/*Name=*/"entry",
|
/*Name=*/"entry",
|
||||||
|
@ -87,6 +87,9 @@ llvm::Function* IrEmitterNested::EmitBasePointersForNestedComputation(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(b/65380986): Investigate if adding fast math flags for generated
|
||||||
|
// kernels makes sense.
|
||||||
|
|
||||||
llvm::BasicBlock* entry_bb =
|
llvm::BasicBlock* entry_bb =
|
||||||
llvm::BasicBlock::Create(function->getContext(), "entry", function);
|
llvm::BasicBlock::Create(function->getContext(), "entry", function);
|
||||||
// Emit a "return void" at entry_bb's end, and sets the insert point before
|
// Emit a "return void" at entry_bb's end, and sets the insert point before
|
||||||
|
@ -201,6 +201,9 @@ llvm::Function* IrEmitterUnnested::BuildKernelPrototype(
|
|||||||
}
|
}
|
||||||
kernel->addAttribute(temp_buffer_arg_no + 1, llvm::Attribute::NoAlias);
|
kernel->addAttribute(temp_buffer_arg_no + 1, llvm::Attribute::NoAlias);
|
||||||
|
|
||||||
|
// TODO(b/65380986): Investigate if adding fast math flags for generated
|
||||||
|
// kernels makes sense.
|
||||||
|
|
||||||
// Add the declaration of this kernel to llvm.nvvm.annotations so that NVPTX
|
// Add the declaration of this kernel to llvm.nvvm.annotations so that NVPTX
|
||||||
// treats it as a CUDA kernel.
|
// treats it as a CUDA kernel.
|
||||||
llvm::NamedMDNode* nvvm_annotations_node =
|
llvm::NamedMDNode* nvvm_annotations_node =
|
||||||
|
Loading…
Reference in New Issue
Block a user