Merge pull request #29093 from ROCmSoftwarePlatform:rocm_kernel

PiperOrigin-RevId: 251388968
This commit is contained in:
TensorFlower Gardener 2019-06-04 01:00:34 -07:00
commit 0b1f96ed96
3 changed files with 30 additions and 11 deletions

View File

@ -238,18 +238,11 @@ llvm::Function* IrEmitterUnnested::BuildKernelPrototype(
}
}
AnnotateFunctionAsGpuKernel(module, kernel, &b_);
// TODO(b/65380986): Investigate if adding fast math flags for generated
// kernels makes sense.
// Add the declaration of this kernel to llvm.nvvm.annotations so that NVPTX
// treats it as a CUDA kernel.
llvm::NamedMDNode* nvvm_annotations_node =
module->getOrInsertNamedMetadata("nvvm.annotations");
nvvm_annotations_node->addOperand(llvm::MDNode::get(
context, {llvm::ConstantAsMetadata::get(kernel),
llvm::MDString::get(context, "kernel"),
llvm::ConstantAsMetadata::get(b_.getInt32(1))}));
// Update the insert point to the entry basic block.
llvm::BasicBlock* entry_bb =
llvm::BasicBlock::Create(context, /*Name=*/"entry", /*Parent=*/kernel);

View File

@ -76,8 +76,7 @@ llvm::CallInst* EmitCallToTargetIntrinsic(
llvm::Triple target_triple = llvm::Triple(module->getTargetTriple());
llvm::Intrinsic::ID llvm_intrinsic_id = llvm::Intrinsic::not_intrinsic;
if ((target_triple.getArch() == llvm::Triple::nvptx) ||
(target_triple.getArch() == llvm::Triple::nvptx64)) {
if (target_triple.isNVPTX()) {
llvm_intrinsic_id = gpu_intrinsic_id.nvptx_intrinsic;
} else if (target_triple.getArch() == llvm::Triple::amdgcn) {
llvm_intrinsic_id = gpu_intrinsic_id.amdgpu_intrinsic;
@ -90,5 +89,28 @@ llvm::CallInst* EmitCallToTargetIntrinsic(
return b->CreateCall(intrinsic, llvm_ir::AsArrayRef(operands));
}
void AnnotateFunctionAsGpuKernel(llvm::Module* module, llvm::Function* func,
llvm::IRBuilder<>* b) {
llvm::Triple target_triple = llvm::Triple(module->getTargetTriple());
if (target_triple.isNVPTX()) {
// Add the declaration of this kernel to llvm.nvvm.annotations so that NVPTX
// treats function as a CUDA kernel.
llvm::LLVMContext& context = module->getContext();
llvm::NamedMDNode* nvvm_annotations_node =
module->getOrInsertNamedMetadata("nvvm.annotations");
nvvm_annotations_node->addOperand(llvm::MDNode::get(
context, {llvm::ConstantAsMetadata::get(func),
llvm::MDString::get(context, "kernel"),
llvm::ConstantAsMetadata::get(b->getInt32(1))}));
} else if (target_triple.getArch() == llvm::Triple::amdgcn) {
// Attach information so AMDGPU can recognize function as a AMDGPU kernel.
func->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
func->addFnAttr("amdgpu-flat-work-group-size", "1, 1024");
} else {
LOG(FATAL) << "Invalid triple " << target_triple.str();
}
}
} // namespace gpu
} // namespace xla

View File

@ -49,6 +49,10 @@ llvm::CallInst* EmitCallToTargetIntrinsic(
TargetIntrinsicID intrinsic_id, absl::Span<llvm::Value* const> operands,
absl::Span<llvm::Type* const> overloaded_types, llvm::IRBuilder<>* b);
// Annotate the kernel as GPU kernel according to the GPU target.
void AnnotateFunctionAsGpuKernel(llvm::Module* module, llvm::Function* func,
llvm::IRBuilder<>* b);
} // namespace gpu
} // namespace xla