Plug gpu.func into the GPU lowering pipelines
This updates the lowering pipelines from the GPU dialect to lower-level dialects (NVVM, SPIRV) to use the recently introduced gpu.func operation instead of a standard function annotated with an attribute. In particular, the kernel outlining is updated to produce gpu.func instead of std.func and the individual conversions are updated to consume gpu.funcs and disallow standard funcs after legalization, if necessary. The attribute "gpu.kernel" is preserved in the generic syntax, but can also be used with the custom syntax on gpu.funcs. The special kind of function for GPU allows one to use additional features such as memory attribution. PiperOrigin-RevId: 285822272 Change-Id: I35cbe2454e0f43baa4e2c4c16434ae2158f32d38
This commit is contained in:
parent
8f2154320a
commit
3ea3444ef6
@ -120,9 +120,9 @@ def GPU_GPUFuncOp : GPU_Op<"func", [FunctionLike, IsolatedFromAbove, Symbol]> {
|
|||||||
|
|
||||||
let builders = [
|
let builders = [
|
||||||
OpBuilder<"Builder *builder, OperationState &result, StringRef name, "
|
OpBuilder<"Builder *builder, OperationState &result, StringRef name, "
|
||||||
"FunctionType type, ArrayRef<Type> workgroupAttributions, "
|
"FunctionType type, ArrayRef<Type> workgroupAttributions = {}, "
|
||||||
"ArrayRef<Type> privateAttributions, "
|
"ArrayRef<Type> privateAttributions = {}, "
|
||||||
"ArrayRef<NamedAttribute> attrs">
|
"ArrayRef<NamedAttribute> attrs = {}">
|
||||||
];
|
];
|
||||||
|
|
||||||
let extraClassDeclaration = [{
|
let extraClassDeclaration = [{
|
||||||
@ -138,6 +138,17 @@ def GPU_GPUFuncOp : GPU_Op<"func", [FunctionLike, IsolatedFromAbove, Symbol]> {
|
|||||||
return getTypeAttr().getValue().cast<FunctionType>();
|
return getTypeAttr().getValue().cast<FunctionType>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Change the type of this function in place. This is an extremely
|
||||||
|
/// dangerous operation and it is up to the caller to ensure that this is
|
||||||
|
/// legal for this function, and to restore invariants:
|
||||||
|
/// - the entry block args must be updated to match the function params.
|
||||||
|
/// - the argument/result attributes may need an update: if the new type
|
||||||
|
/// has less parameters we drop the extra attributes, if there are more
|
||||||
|
/// parameters they won't have any attributes.
|
||||||
|
// TODO(b/146349912): consider removing this function thanks to rewrite
|
||||||
|
// patterns.
|
||||||
|
void setType(FunctionType newType);
|
||||||
|
|
||||||
/// Returns the number of buffers located in the workgroup memory.
|
/// Returns the number of buffers located in the workgroup memory.
|
||||||
unsigned getNumWorkgroupAttributions() {
|
unsigned getNumWorkgroupAttributions() {
|
||||||
return getAttrOfType<IntegerAttr>(getNumWorkgroupAttributionsAttrName())
|
return getAttrOfType<IntegerAttr>(getNumWorkgroupAttributionsAttrName())
|
||||||
@ -270,11 +281,11 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
|
|||||||
let skipDefaultBuilders = 1;
|
let skipDefaultBuilders = 1;
|
||||||
|
|
||||||
let builders = [
|
let builders = [
|
||||||
OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
|
OpBuilder<"Builder *builder, OperationState &result, GPUFuncOp kernelFunc, "
|
||||||
"Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ, "
|
"Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ, "
|
||||||
"Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ, "
|
"Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ, "
|
||||||
"ValueRange kernelOperands">,
|
"ValueRange kernelOperands">,
|
||||||
OpBuilder<"Builder *builder, OperationState &result, FuncOp kernelFunc, "
|
OpBuilder<"Builder *builder, OperationState &result, GPUFuncOp kernelFunc, "
|
||||||
"KernelDim3 gridSize, KernelDim3 blockSize, "
|
"KernelDim3 gridSize, KernelDim3 blockSize, "
|
||||||
"ValueRange kernelOperands">
|
"ValueRange kernelOperands">
|
||||||
];
|
];
|
||||||
|
@ -83,12 +83,6 @@ StringRef getEntryPointABIAttrName();
|
|||||||
EntryPointABIAttr getEntryPointABIAttr(ArrayRef<int32_t> localSize,
|
EntryPointABIAttr getEntryPointABIAttr(ArrayRef<int32_t> localSize,
|
||||||
MLIRContext *context);
|
MLIRContext *context);
|
||||||
|
|
||||||
/// Legalizes a function as an entry function.
|
|
||||||
FuncOp lowerAsEntryFunction(FuncOp funcOp, SPIRVTypeConverter &typeConverter,
|
|
||||||
ConversionPatternRewriter &rewriter,
|
|
||||||
spirv::EntryPointABIAttr entryPointInfo,
|
|
||||||
ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo);
|
|
||||||
|
|
||||||
/// Sets the InterfaceVarABIAttr and EntryPointABIAttr for a function and its
|
/// Sets the InterfaceVarABIAttr and EntryPointABIAttr for a function and its
|
||||||
/// arguments
|
/// arguments
|
||||||
LogicalResult setABIAttrs(FuncOp funcOp,
|
LogicalResult setABIAttrs(FuncOp funcOp,
|
||||||
|
@ -489,8 +489,6 @@ struct GPUFuncOpLowering : LLVMOpLowering {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Rewrite the original GPU function to an LLVM function.
|
// Rewrite the original GPU function to an LLVM function.
|
||||||
// TODO(zinenko): there is a hack in the std->llvm lowering that promotes
|
|
||||||
// structs to pointers that probably needs to be replicated here.
|
|
||||||
auto funcType = lowering.convertType(gpuFuncOp.getType())
|
auto funcType = lowering.convertType(gpuFuncOp.getType())
|
||||||
.cast<LLVM::LLVMType>()
|
.cast<LLVM::LLVMType>()
|
||||||
.getPointerElementTy();
|
.getPointerElementTy();
|
||||||
@ -576,16 +574,51 @@ struct GPUFuncOpLowering : LLVMOpLowering {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Move the region to the new function, update the entry block signature.
|
||||||
rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
|
rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
|
||||||
llvmFuncOp.end());
|
llvmFuncOp.end());
|
||||||
rewriter.applySignatureConversion(&llvmFuncOp.getBody(),
|
rewriter.applySignatureConversion(&llvmFuncOp.getBody(),
|
||||||
signatureConversion);
|
signatureConversion);
|
||||||
|
|
||||||
|
{
|
||||||
|
// For memref-typed arguments, insert the relevant loads in the beginning
|
||||||
|
// of the block to comply with the LLVM dialect calling convention. This
|
||||||
|
// needs to be done after signature conversion to get the right types.
|
||||||
|
OpBuilder::InsertionGuard guard(rewriter);
|
||||||
|
Block &block = llvmFuncOp.front();
|
||||||
|
rewriter.setInsertionPointToStart(&block);
|
||||||
|
|
||||||
|
for (auto en : llvm::enumerate(gpuFuncOp.getType().getInputs())) {
|
||||||
|
if (!en.value().isa<MemRefType>() &&
|
||||||
|
!en.value().isa<UnrankedMemRefType>())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
BlockArgument *arg = block.getArgument(en.index());
|
||||||
|
Value *loaded = rewriter.create<LLVM::LoadOp>(loc, arg);
|
||||||
|
rewriter.replaceUsesOfBlockArgument(arg, loaded);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
rewriter.eraseOp(gpuFuncOp);
|
rewriter.eraseOp(gpuFuncOp);
|
||||||
return matchSuccess();
|
return matchSuccess();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct GPUReturnOpLowering : public LLVMOpLowering {
|
||||||
|
GPUReturnOpLowering(LLVMTypeConverter &typeConverter)
|
||||||
|
: LLVMOpLowering(gpu::ReturnOp::getOperationName(),
|
||||||
|
typeConverter.getDialect()->getContext(),
|
||||||
|
typeConverter) {}
|
||||||
|
|
||||||
|
PatternMatchResult
|
||||||
|
matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
|
||||||
|
ConversionPatternRewriter &rewriter) const override {
|
||||||
|
rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, operands,
|
||||||
|
ArrayRef<Block *>());
|
||||||
|
return matchSuccess();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/// Import the GPU Ops to NVVM Patterns.
|
/// Import the GPU Ops to NVVM Patterns.
|
||||||
#include "GPUToNVVM.cpp.inc"
|
#include "GPUToNVVM.cpp.inc"
|
||||||
|
|
||||||
@ -632,7 +665,8 @@ void mlir::populateGpuToNVVMConversionPatterns(
|
|||||||
NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
|
NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
|
||||||
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, NVVM::GridDimXOp,
|
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, NVVM::GridDimXOp,
|
||||||
NVVM::GridDimYOp, NVVM::GridDimZOp>,
|
NVVM::GridDimYOp, NVVM::GridDimZOp>,
|
||||||
GPUAllReduceOpLowering, GPUFuncOpLowering>(converter);
|
GPUAllReduceOpLowering, GPUFuncOpLowering, GPUReturnOpLowering>(
|
||||||
|
converter);
|
||||||
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf",
|
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf",
|
||||||
"__nv_exp");
|
"__nv_exp");
|
||||||
}
|
}
|
||||||
|
@ -51,21 +51,20 @@ public:
|
|||||||
ConversionPatternRewriter &rewriter) const override;
|
ConversionPatternRewriter &rewriter) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Pattern to convert a kernel function in GPU dialect (a FuncOp with the
|
/// Pattern to convert a kernel function in GPU dialect within a spv.module.
|
||||||
/// attribute gpu.kernel) within a spv.module.
|
class KernelFnConversion final : public SPIRVOpLowering<gpu::GPUFuncOp> {
|
||||||
class KernelFnConversion final : public SPIRVOpLowering<FuncOp> {
|
|
||||||
public:
|
public:
|
||||||
KernelFnConversion(MLIRContext *context, SPIRVTypeConverter &converter,
|
KernelFnConversion(MLIRContext *context, SPIRVTypeConverter &converter,
|
||||||
ArrayRef<int64_t> workGroupSize,
|
ArrayRef<int64_t> workGroupSize,
|
||||||
PatternBenefit benefit = 1)
|
PatternBenefit benefit = 1)
|
||||||
: SPIRVOpLowering<FuncOp>(context, converter, benefit) {
|
: SPIRVOpLowering<gpu::GPUFuncOp>(context, converter, benefit) {
|
||||||
auto config = workGroupSize.take_front(3);
|
auto config = workGroupSize.take_front(3);
|
||||||
workGroupSizeAsInt32.assign(config.begin(), config.end());
|
workGroupSizeAsInt32.assign(config.begin(), config.end());
|
||||||
workGroupSizeAsInt32.resize(3, 1);
|
workGroupSizeAsInt32.resize(3, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
PatternMatchResult
|
PatternMatchResult
|
||||||
matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
|
matchAndRewrite(gpu::GPUFuncOp funcOp, ArrayRef<Value *> operands,
|
||||||
ConversionPatternRewriter &rewriter) const override;
|
ConversionPatternRewriter &rewriter) const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -96,6 +95,17 @@ public:
|
|||||||
ConversionPatternRewriter &rewriter) const override;
|
ConversionPatternRewriter &rewriter) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Pattern to convert a gpu.return into a SPIR-V return.
|
||||||
|
// TODO: This can go to DRR when GPU return has operands.
|
||||||
|
class GPUReturnOpConversion final : public SPIRVOpLowering<gpu::ReturnOp> {
|
||||||
|
public:
|
||||||
|
using SPIRVOpLowering<gpu::ReturnOp>::SPIRVOpLowering;
|
||||||
|
|
||||||
|
PatternMatchResult
|
||||||
|
matchAndRewrite(gpu::ReturnOp returnOp, ArrayRef<Value *> operands,
|
||||||
|
ConversionPatternRewriter &rewriter) const override;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -204,11 +214,58 @@ PatternMatchResult LaunchConfigConversion<SourceOp, builtin>::matchAndRewrite(
|
|||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// FuncOp with gpu.kernel attribute.
|
// GPUFuncOp
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// Legalizes a GPU function as an entry SPIR-V function.
|
||||||
|
static FuncOp
|
||||||
|
lowerAsEntryFunction(gpu::GPUFuncOp funcOp, SPIRVTypeConverter &typeConverter,
|
||||||
|
ConversionPatternRewriter &rewriter,
|
||||||
|
spirv::EntryPointABIAttr entryPointInfo,
|
||||||
|
ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
|
||||||
|
auto fnType = funcOp.getType();
|
||||||
|
if (fnType.getNumResults()) {
|
||||||
|
funcOp.emitError("SPIR-V lowering only supports entry functions"
|
||||||
|
"with no return values right now");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
if (fnType.getNumInputs() != argABIInfo.size()) {
|
||||||
|
funcOp.emitError(
|
||||||
|
"lowering as entry functions requires ABI info for all arguments");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
// For entry functions need to make the signature void(void). Compute the
|
||||||
|
// replacement value for all arguments and replace all uses.
|
||||||
|
TypeConverter::SignatureConversion signatureConverter(fnType.getNumInputs());
|
||||||
|
{
|
||||||
|
for (auto argType : enumerate(funcOp.getType().getInputs())) {
|
||||||
|
auto convertedType = typeConverter.convertType(argType.value());
|
||||||
|
signatureConverter.addInputs(argType.index(), convertedType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto newFuncOp = rewriter.create<FuncOp>(
|
||||||
|
funcOp.getLoc(), funcOp.getName(),
|
||||||
|
rewriter.getFunctionType(signatureConverter.getConvertedTypes(),
|
||||||
|
llvm::None),
|
||||||
|
ArrayRef<NamedAttribute>());
|
||||||
|
for (const auto &namedAttr : funcOp.getAttrs()) {
|
||||||
|
if (namedAttr.first.is(impl::getTypeAttrName()) ||
|
||||||
|
namedAttr.first.is(SymbolTable::getSymbolAttrName()))
|
||||||
|
continue;
|
||||||
|
newFuncOp.setAttr(namedAttr.first, namedAttr.second);
|
||||||
|
}
|
||||||
|
rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
|
||||||
|
newFuncOp.end());
|
||||||
|
rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
|
||||||
|
rewriter.eraseOp(funcOp);
|
||||||
|
|
||||||
|
spirv::setABIAttrs(newFuncOp, entryPointInfo, argABIInfo);
|
||||||
|
return newFuncOp;
|
||||||
|
}
|
||||||
|
|
||||||
PatternMatchResult
|
PatternMatchResult
|
||||||
KernelFnConversion::matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
|
KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp,
|
||||||
|
ArrayRef<Value *> operands,
|
||||||
ConversionPatternRewriter &rewriter) const {
|
ConversionPatternRewriter &rewriter) const {
|
||||||
if (!gpu::GPUDialect::isKernel(funcOp)) {
|
if (!gpu::GPUDialect::isKernel(funcOp)) {
|
||||||
return matchFailure();
|
return matchFailure();
|
||||||
@ -223,8 +280,8 @@ KernelFnConversion::matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
|
|||||||
auto context = rewriter.getContext();
|
auto context = rewriter.getContext();
|
||||||
auto entryPointAttr =
|
auto entryPointAttr =
|
||||||
spirv::getEntryPointABIAttr(workGroupSizeAsInt32, context);
|
spirv::getEntryPointABIAttr(workGroupSizeAsInt32, context);
|
||||||
FuncOp newFuncOp = spirv::lowerAsEntryFunction(
|
FuncOp newFuncOp = lowerAsEntryFunction(funcOp, typeConverter, rewriter,
|
||||||
funcOp, typeConverter, rewriter, entryPointAttr, argABI);
|
entryPointAttr, argABI);
|
||||||
if (!newFuncOp) {
|
if (!newFuncOp) {
|
||||||
return matchFailure();
|
return matchFailure();
|
||||||
}
|
}
|
||||||
@ -274,6 +331,20 @@ PatternMatchResult KernelModuleTerminatorConversion::matchAndRewrite(
|
|||||||
return matchSuccess();
|
return matchSuccess();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// GPU return inside kernel functions to SPIR-V return.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
PatternMatchResult GPUReturnOpConversion::matchAndRewrite(
|
||||||
|
gpu::ReturnOp returnOp, ArrayRef<Value *> operands,
|
||||||
|
ConversionPatternRewriter &rewriter) const {
|
||||||
|
if (!operands.empty())
|
||||||
|
return matchFailure();
|
||||||
|
|
||||||
|
rewriter.replaceOpWithNewOp<spirv::ReturnOp>(returnOp);
|
||||||
|
return matchSuccess();
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// GPU To SPIRV Patterns.
|
// GPU To SPIRV Patterns.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -285,7 +356,8 @@ void populateGPUToSPIRVPatterns(MLIRContext *context,
|
|||||||
ArrayRef<int64_t> workGroupSize) {
|
ArrayRef<int64_t> workGroupSize) {
|
||||||
patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
|
patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
|
||||||
patterns.insert<
|
patterns.insert<
|
||||||
ForOpConversion, KernelModuleConversion, KernelModuleTerminatorConversion,
|
GPUReturnOpConversion, ForOpConversion, KernelModuleConversion,
|
||||||
|
KernelModuleTerminatorConversion,
|
||||||
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
|
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
|
||||||
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
|
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
|
||||||
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
|
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
|
||||||
|
@ -94,9 +94,9 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
|
|||||||
// Check that `launch_func` refers to a well-formed kernel function.
|
// Check that `launch_func` refers to a well-formed kernel function.
|
||||||
StringRef kernelName = launchOp.kernel();
|
StringRef kernelName = launchOp.kernel();
|
||||||
Operation *kernelFunc = kernelModule.lookupSymbol(kernelName);
|
Operation *kernelFunc = kernelModule.lookupSymbol(kernelName);
|
||||||
auto kernelStdFunction = dyn_cast_or_null<::mlir::FuncOp>(kernelFunc);
|
auto kernelGPUFunction = dyn_cast_or_null<gpu::GPUFuncOp>(kernelFunc);
|
||||||
auto kernelLLVMFunction = dyn_cast_or_null<LLVM::LLVMFuncOp>(kernelFunc);
|
auto kernelLLVMFunction = dyn_cast_or_null<LLVM::LLVMFuncOp>(kernelFunc);
|
||||||
if (!kernelStdFunction && !kernelLLVMFunction)
|
if (!kernelGPUFunction && !kernelLLVMFunction)
|
||||||
return launchOp.emitOpError("kernel function '")
|
return launchOp.emitOpError("kernel function '")
|
||||||
<< kernelName << "' is undefined";
|
<< kernelName << "' is undefined";
|
||||||
if (!kernelFunc->getAttrOfType<mlir::UnitAttr>(
|
if (!kernelFunc->getAttrOfType<mlir::UnitAttr>(
|
||||||
@ -107,7 +107,7 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
|
|||||||
unsigned actualNumArguments = launchOp.getNumKernelOperands();
|
unsigned actualNumArguments = launchOp.getNumKernelOperands();
|
||||||
unsigned expectedNumArguments = kernelLLVMFunction
|
unsigned expectedNumArguments = kernelLLVMFunction
|
||||||
? kernelLLVMFunction.getNumArguments()
|
? kernelLLVMFunction.getNumArguments()
|
||||||
: kernelStdFunction.getNumArguments();
|
: kernelGPUFunction.getNumArguments();
|
||||||
if (expectedNumArguments != actualNumArguments)
|
if (expectedNumArguments != actualNumArguments)
|
||||||
return launchOp.emitOpError("got ")
|
return launchOp.emitOpError("got ")
|
||||||
<< actualNumArguments << " kernel operands but expected "
|
<< actualNumArguments << " kernel operands but expected "
|
||||||
@ -488,7 +488,7 @@ void LaunchOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
void LaunchFuncOp::build(Builder *builder, OperationState &result,
|
void LaunchFuncOp::build(Builder *builder, OperationState &result,
|
||||||
::mlir::FuncOp kernelFunc, Value *gridSizeX,
|
GPUFuncOp kernelFunc, Value *gridSizeX,
|
||||||
Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
|
Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
|
||||||
Value *blockSizeY, Value *blockSizeZ,
|
Value *blockSizeY, Value *blockSizeZ,
|
||||||
ValueRange kernelOperands) {
|
ValueRange kernelOperands) {
|
||||||
@ -505,7 +505,7 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void LaunchFuncOp::build(Builder *builder, OperationState &result,
|
void LaunchFuncOp::build(Builder *builder, OperationState &result,
|
||||||
::mlir::FuncOp kernelFunc, KernelDim3 gridSize,
|
GPUFuncOp kernelFunc, KernelDim3 gridSize,
|
||||||
KernelDim3 blockSize, ValueRange kernelOperands) {
|
KernelDim3 blockSize, ValueRange kernelOperands) {
|
||||||
build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z,
|
build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z,
|
||||||
blockSize.x, blockSize.y, blockSize.z, kernelOperands);
|
blockSize.x, blockSize.y, blockSize.z, kernelOperands);
|
||||||
@ -718,6 +718,18 @@ void printGPUFuncOp(OpAsmPrinter &p, GPUFuncOp op) {
|
|||||||
p.printRegion(op.getBody(), /*printEntryBlockArgs=*/false);
|
p.printRegion(op.getBody(), /*printEntryBlockArgs=*/false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPUFuncOp::setType(FunctionType newType) {
|
||||||
|
auto oldType = getType();
|
||||||
|
assert(newType.getNumResults() == oldType.getNumResults() &&
|
||||||
|
"unimplemented: changes to the number of results");
|
||||||
|
|
||||||
|
SmallVector<char, 16> nameBuf;
|
||||||
|
for (int i = newType.getNumInputs(), e = oldType.getNumInputs(); i < e; i++)
|
||||||
|
removeAttr(getArgAttrName(i, nameBuf));
|
||||||
|
|
||||||
|
setAttr(getTypeAttrName(), TypeAttr::get(newType));
|
||||||
|
}
|
||||||
|
|
||||||
/// Hook for FunctionLike verifier.
|
/// Hook for FunctionLike verifier.
|
||||||
LogicalResult GPUFuncOp::verifyType() {
|
LogicalResult GPUFuncOp::verifyType() {
|
||||||
Type type = getTypeAttr().getValue();
|
Type type = getTypeAttr().getValue();
|
||||||
|
@ -39,19 +39,21 @@ static void createForAllDimensions(OpBuilder &builder, Location loc,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add operations generating block/thread ids and gird/block dimensions at the
|
// Add operations generating block/thread ids and grid/block dimensions at the
|
||||||
// beginning of `kernelFunc` and replace uses of the respective function args.
|
// beginning of the `body` region and replace uses of the respective function
|
||||||
static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) {
|
// arguments.
|
||||||
OpBuilder OpBuilder(kernelFunc.getBody());
|
static void injectGpuIndexOperations(Location loc, Region &body) {
|
||||||
|
OpBuilder builder(loc->getContext());
|
||||||
|
Block &firstBlock = body.front();
|
||||||
|
builder.setInsertionPointToStart(&firstBlock);
|
||||||
SmallVector<Value *, 12> indexOps;
|
SmallVector<Value *, 12> indexOps;
|
||||||
createForAllDimensions<gpu::BlockIdOp>(OpBuilder, loc, indexOps);
|
createForAllDimensions<gpu::BlockIdOp>(builder, loc, indexOps);
|
||||||
createForAllDimensions<gpu::ThreadIdOp>(OpBuilder, loc, indexOps);
|
createForAllDimensions<gpu::ThreadIdOp>(builder, loc, indexOps);
|
||||||
createForAllDimensions<gpu::GridDimOp>(OpBuilder, loc, indexOps);
|
createForAllDimensions<gpu::GridDimOp>(builder, loc, indexOps);
|
||||||
createForAllDimensions<gpu::BlockDimOp>(OpBuilder, loc, indexOps);
|
createForAllDimensions<gpu::BlockDimOp>(builder, loc, indexOps);
|
||||||
// Replace the leading 12 function args with the respective thread/block index
|
// Replace the leading 12 function args with the respective thread/block index
|
||||||
// operations. Iterate backwards since args are erased and indices change.
|
// operations. Iterate backwards since args are erased and indices change.
|
||||||
for (int i = 11; i >= 0; --i) {
|
for (int i = 11; i >= 0; --i) {
|
||||||
auto &firstBlock = kernelFunc.front();
|
|
||||||
firstBlock.getArgument(i)->replaceAllUsesWith(indexOps[i]);
|
firstBlock.getArgument(i)->replaceAllUsesWith(indexOps[i]);
|
||||||
firstBlock.eraseArgument(i);
|
firstBlock.eraseArgument(i);
|
||||||
}
|
}
|
||||||
@ -63,7 +65,7 @@ static bool isInliningBeneficiary(Operation *op) {
|
|||||||
|
|
||||||
// Move arguments of the given kernel function into the function if this reduces
|
// Move arguments of the given kernel function into the function if this reduces
|
||||||
// the number of kernel arguments.
|
// the number of kernel arguments.
|
||||||
static gpu::LaunchFuncOp inlineBeneficiaryOps(FuncOp kernelFunc,
|
static gpu::LaunchFuncOp inlineBeneficiaryOps(gpu::GPUFuncOp kernelFunc,
|
||||||
gpu::LaunchFuncOp launch) {
|
gpu::LaunchFuncOp launch) {
|
||||||
OpBuilder kernelBuilder(kernelFunc.getBody());
|
OpBuilder kernelBuilder(kernelFunc.getBody());
|
||||||
auto &firstBlock = kernelFunc.getBody().front();
|
auto &firstBlock = kernelFunc.getBody().front();
|
||||||
@ -107,31 +109,30 @@ static gpu::LaunchFuncOp inlineBeneficiaryOps(FuncOp kernelFunc,
|
|||||||
|
|
||||||
// Outline the `gpu.launch` operation body into a kernel function. Replace
|
// Outline the `gpu.launch` operation body into a kernel function. Replace
|
||||||
// `gpu.return` operations by `std.return` in the generated function.
|
// `gpu.return` operations by `std.return` in the generated function.
|
||||||
static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
|
static gpu::GPUFuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
|
||||||
Location loc = launchOp.getLoc();
|
Location loc = launchOp.getLoc();
|
||||||
|
// Create a builder with no insertion point, insertion will happen separately
|
||||||
|
// due to symbol table manipulation.
|
||||||
|
OpBuilder builder(launchOp.getContext());
|
||||||
|
|
||||||
SmallVector<Type, 4> kernelOperandTypes(launchOp.getKernelOperandTypes());
|
SmallVector<Type, 4> kernelOperandTypes(launchOp.getKernelOperandTypes());
|
||||||
FunctionType type =
|
FunctionType type =
|
||||||
FunctionType::get(kernelOperandTypes, {}, launchOp.getContext());
|
FunctionType::get(kernelOperandTypes, {}, launchOp.getContext());
|
||||||
std::string kernelFuncName =
|
std::string kernelFuncName =
|
||||||
Twine(launchOp.getParentOfType<FuncOp>().getName(), "_kernel").str();
|
Twine(launchOp.getParentOfType<FuncOp>().getName(), "_kernel").str();
|
||||||
FuncOp outlinedFunc = FuncOp::create(loc, kernelFuncName, type);
|
auto outlinedFunc = builder.create<gpu::GPUFuncOp>(loc, kernelFuncName, type);
|
||||||
outlinedFunc.getBody().takeBody(launchOp.body());
|
|
||||||
Builder builder(launchOp.getContext());
|
|
||||||
outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
|
outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
|
||||||
builder.getUnitAttr());
|
builder.getUnitAttr());
|
||||||
injectGpuIndexOperations(loc, outlinedFunc);
|
outlinedFunc.body().takeBody(launchOp.body());
|
||||||
outlinedFunc.walk([](gpu::ReturnOp op) {
|
injectGpuIndexOperations(loc, outlinedFunc.body());
|
||||||
OpBuilder replacer(op);
|
|
||||||
replacer.create<ReturnOp>(op.getLoc());
|
|
||||||
op.erase();
|
|
||||||
});
|
|
||||||
return outlinedFunc;
|
return outlinedFunc;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replace `gpu.launch` operations with an `gpu.launch_func` operation launching
|
// Replace `gpu.launch` operations with an `gpu.launch_func` operation launching
|
||||||
// `kernelFunc`. The kernel func contains the body of the `gpu.launch` with
|
// `kernelFunc`. The kernel func contains the body of the `gpu.launch` with
|
||||||
// constant region arguments inlined.
|
// constant region arguments inlined.
|
||||||
static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) {
|
static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp,
|
||||||
|
gpu::GPUFuncOp kernelFunc) {
|
||||||
OpBuilder builder(launchOp);
|
OpBuilder builder(launchOp);
|
||||||
auto launchFuncOp = builder.create<gpu::LaunchFuncOp>(
|
auto launchFuncOp = builder.create<gpu::LaunchFuncOp>(
|
||||||
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
|
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
|
||||||
@ -160,7 +161,7 @@ public:
|
|||||||
// Insert just after the function.
|
// Insert just after the function.
|
||||||
Block::iterator insertPt(func.getOperation()->getNextNode());
|
Block::iterator insertPt(func.getOperation()->getNextNode());
|
||||||
func.walk([&](gpu::LaunchOp op) {
|
func.walk([&](gpu::LaunchOp op) {
|
||||||
FuncOp outlinedFunc = outlineKernelFunc(op);
|
gpu::GPUFuncOp outlinedFunc = outlineKernelFunc(op);
|
||||||
|
|
||||||
// Create nested module and insert outlinedFunc. The module will
|
// Create nested module and insert outlinedFunc. The module will
|
||||||
// originally get the same name as the function, but may be renamed on
|
// originally get the same name as the function, but may be renamed on
|
||||||
@ -183,7 +184,7 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
// Returns a module containing kernelFunc and all callees (recursive).
|
// Returns a module containing kernelFunc and all callees (recursive).
|
||||||
ModuleOp createKernelModule(FuncOp kernelFunc,
|
ModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
|
||||||
const SymbolTable &parentSymbolTable) {
|
const SymbolTable &parentSymbolTable) {
|
||||||
auto context = getModule().getContext();
|
auto context = getModule().getContext();
|
||||||
Builder builder(context);
|
Builder builder(context);
|
||||||
|
@ -249,43 +249,6 @@ Value *mlir::spirv::getBuiltinVariableValue(Operation *op,
|
|||||||
// Entry Function signature Conversion
|
// Entry Function signature Conversion
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
FuncOp mlir::spirv::lowerAsEntryFunction(
|
|
||||||
FuncOp funcOp, SPIRVTypeConverter &typeConverter,
|
|
||||||
ConversionPatternRewriter &rewriter,
|
|
||||||
spirv::EntryPointABIAttr entryPointInfo,
|
|
||||||
ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
|
|
||||||
auto fnType = funcOp.getType();
|
|
||||||
if (fnType.getNumResults()) {
|
|
||||||
funcOp.emitError("SPIR-V lowering only supports entry functions"
|
|
||||||
"with no return values right now");
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
if (fnType.getNumInputs() != argABIInfo.size()) {
|
|
||||||
funcOp.emitError(
|
|
||||||
"lowering as entry functions requires ABI info for all arguments");
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
// For entry functions need to make the signature void(void). Compute the
|
|
||||||
// replacement value for all arguments and replace all uses.
|
|
||||||
TypeConverter::SignatureConversion signatureConverter(fnType.getNumInputs());
|
|
||||||
{
|
|
||||||
for (auto argType : enumerate(funcOp.getType().getInputs())) {
|
|
||||||
auto convertedType = typeConverter.convertType(argType.value());
|
|
||||||
signatureConverter.addInputs(argType.index(), convertedType);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
auto newFuncOp = rewriter.cloneWithoutRegions(funcOp);
|
|
||||||
rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
|
|
||||||
newFuncOp.end());
|
|
||||||
newFuncOp.setType(rewriter.getFunctionType(
|
|
||||||
signatureConverter.getConvertedTypes(), llvm::None));
|
|
||||||
rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
|
|
||||||
rewriter.eraseOp(funcOp);
|
|
||||||
|
|
||||||
spirv::setABIAttrs(newFuncOp, entryPointInfo, argABIInfo);
|
|
||||||
return newFuncOp;
|
|
||||||
}
|
|
||||||
|
|
||||||
LogicalResult
|
LogicalResult
|
||||||
mlir::spirv::setABIAttrs(FuncOp funcOp, spirv::EntryPointABIAttr entryPointInfo,
|
mlir::spirv::setABIAttrs(FuncOp funcOp, spirv::EntryPointABIAttr entryPointInfo,
|
||||||
ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
|
ArrayRef<spirv::InterfaceVarABIAttr> argABIInfo) {
|
||||||
|
Loading…
Reference in New Issue
Block a user