Merge pull request #29093 from ROCmSoftwarePlatform:rocm_kernel
PiperOrigin-RevId: 251388968
This commit is contained in:
commit
0b1f96ed96
@ -238,18 +238,11 @@ llvm::Function* IrEmitterUnnested::BuildKernelPrototype(
|
||||
}
|
||||
}
|
||||
|
||||
AnnotateFunctionAsGpuKernel(module, kernel, &b_);
|
||||
|
||||
// TODO(b/65380986): Investigate if adding fast math flags for generated
|
||||
// kernels makes sense.
|
||||
|
||||
// Add the declaration of this kernel to llvm.nvvm.annotations so that NVPTX
|
||||
// treats it as a CUDA kernel.
|
||||
llvm::NamedMDNode* nvvm_annotations_node =
|
||||
module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||
nvvm_annotations_node->addOperand(llvm::MDNode::get(
|
||||
context, {llvm::ConstantAsMetadata::get(kernel),
|
||||
llvm::MDString::get(context, "kernel"),
|
||||
llvm::ConstantAsMetadata::get(b_.getInt32(1))}));
|
||||
|
||||
// Update the insert point to the entry basic block.
|
||||
llvm::BasicBlock* entry_bb =
|
||||
llvm::BasicBlock::Create(context, /*Name=*/"entry", /*Parent=*/kernel);
|
||||
|
||||
@ -76,8 +76,7 @@ llvm::CallInst* EmitCallToTargetIntrinsic(
|
||||
llvm::Triple target_triple = llvm::Triple(module->getTargetTriple());
|
||||
llvm::Intrinsic::ID llvm_intrinsic_id = llvm::Intrinsic::not_intrinsic;
|
||||
|
||||
if ((target_triple.getArch() == llvm::Triple::nvptx) ||
|
||||
(target_triple.getArch() == llvm::Triple::nvptx64)) {
|
||||
if (target_triple.isNVPTX()) {
|
||||
llvm_intrinsic_id = gpu_intrinsic_id.nvptx_intrinsic;
|
||||
} else if (target_triple.getArch() == llvm::Triple::amdgcn) {
|
||||
llvm_intrinsic_id = gpu_intrinsic_id.amdgpu_intrinsic;
|
||||
@ -90,5 +89,28 @@ llvm::CallInst* EmitCallToTargetIntrinsic(
|
||||
return b->CreateCall(intrinsic, llvm_ir::AsArrayRef(operands));
|
||||
}
|
||||
|
||||
void AnnotateFunctionAsGpuKernel(llvm::Module* module, llvm::Function* func,
|
||||
llvm::IRBuilder<>* b) {
|
||||
llvm::Triple target_triple = llvm::Triple(module->getTargetTriple());
|
||||
if (target_triple.isNVPTX()) {
|
||||
// Add the declaration of this kernel to llvm.nvvm.annotations so that NVPTX
|
||||
// treats function as a CUDA kernel.
|
||||
llvm::LLVMContext& context = module->getContext();
|
||||
llvm::NamedMDNode* nvvm_annotations_node =
|
||||
module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||
nvvm_annotations_node->addOperand(llvm::MDNode::get(
|
||||
context, {llvm::ConstantAsMetadata::get(func),
|
||||
llvm::MDString::get(context, "kernel"),
|
||||
llvm::ConstantAsMetadata::get(b->getInt32(1))}));
|
||||
|
||||
} else if (target_triple.getArch() == llvm::Triple::amdgcn) {
|
||||
// Attach information so AMDGPU can recognize function as a AMDGPU kernel.
|
||||
func->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
|
||||
func->addFnAttr("amdgpu-flat-work-group-size", "1, 1024");
|
||||
} else {
|
||||
LOG(FATAL) << "Invalid triple " << target_triple.str();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xla
|
||||
|
||||
@ -49,6 +49,10 @@ llvm::CallInst* EmitCallToTargetIntrinsic(
|
||||
TargetIntrinsicID intrinsic_id, absl::Span<llvm::Value* const> operands,
|
||||
absl::Span<llvm::Type* const> overloaded_types, llvm::IRBuilder<>* b);
|
||||
|
||||
// Annotate the kernel as GPU kernel according to the GPU target.
|
||||
void AnnotateFunctionAsGpuKernel(llvm::Module* module, llvm::Function* func,
|
||||
llvm::IRBuilder<>* b);
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xla
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user