Add lowering pipeline from LHLO to NVVM.
This adds a first simple lowering pipeline to the mlir_gpu compiler that takes an mlir module containing LHLO to a module with kernel functions containing LLVM/NVVM dialect only. PiperOrigin-RevId: 266903498
This commit is contained in:
parent
185a465225
commit
dc48199572
@ -36,7 +36,10 @@ cc_library(
|
||||
hdrs = ["mlir_compiler.h"],
|
||||
deps = [
|
||||
":failover_compiler",
|
||||
":kernel_lowering",
|
||||
":lhlo_dialect_emitter",
|
||||
"//tensorflow/compiler/xla:util",
|
||||
"//tensorflow/compiler/xla/service:buffer_assignment",
|
||||
"//tensorflow/compiler/xla/service:compiler",
|
||||
"//tensorflow/compiler/xla/service:dump",
|
||||
"//tensorflow/compiler/xla/service:hlo",
|
||||
@ -49,6 +52,7 @@ cc_library(
|
||||
"//tensorflow/compiler/xla/service/gpu:target_constants",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/stream_executor:stream_executor_headers",
|
||||
"@local_config_mlir//:GPUDialect",
|
||||
"@local_config_mlir//:IR",
|
||||
"@local_config_mlir//:LLVMDialect",
|
||||
],
|
||||
@ -92,6 +96,29 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "kernel_lowering",
|
||||
srcs = ["kernel_lowering.cc"],
|
||||
hdrs = ["kernel_lowering.h"],
|
||||
deps = [
|
||||
"//tensorflow/compiler/mlir/xla:hlo",
|
||||
"//tensorflow/compiler/xla:status",
|
||||
"//tensorflow/compiler/xla:util",
|
||||
"//tensorflow/compiler/xla/service/mlir_gpu/transforms:legalize_to_affine",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@local_config_mlir//:GPUDialect",
|
||||
"@local_config_mlir//:GPUToNVVMTransforms",
|
||||
"@local_config_mlir//:GPUTransforms",
|
||||
"@local_config_mlir//:IR",
|
||||
"@local_config_mlir//:LLVMDialect",
|
||||
"@local_config_mlir//:LLVMTransforms",
|
||||
"@local_config_mlir//:LoopsToGPUPass",
|
||||
"@local_config_mlir//:NVVMDialect",
|
||||
"@local_config_mlir//:Pass",
|
||||
"@local_config_mlir//:Transforms",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mlir_irgen_test_base",
|
||||
testonly = True,
|
||||
|
123
tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
Normal file
123
tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
Normal file
@ -0,0 +1,123 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" // TF:local_config_mlir
|
||||
#include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h" // TF:local_config_mlir
|
||||
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" // TF:local_config_mlir
|
||||
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" // TF:local_config_mlir
|
||||
#include "mlir/Dialect/GPU/GPUDialect.h" // TF:local_config_mlir
|
||||
#include "mlir/Dialect/GPU/Passes.h" // TF:local_config_mlir
|
||||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // TF:local_config_mlir
|
||||
#include "mlir/Dialect/LLVMIR/NVVMDialect.h" // TF:local_config_mlir
|
||||
#include "mlir/IR/Attributes.h" // TF:local_config_mlir
|
||||
#include "mlir/IR/Function.h" // TF:local_config_mlir
|
||||
#include "mlir/IR/Module.h" // TF:local_config_mlir
|
||||
#include "mlir/IR/OperationSupport.h" // TF:local_config_mlir
|
||||
#include "mlir/Pass/Pass.h" // TF:local_config_mlir
|
||||
#include "mlir/Pass/PassManager.h" // TF:local_config_mlir
|
||||
#include "mlir/Transforms/DialectConversion.h" // TF:local_config_mlir
|
||||
#include "mlir/Transforms/Passes.h" // TF:local_config_mlir
|
||||
#include "tensorflow/compiler/xla/service/mlir_gpu/transforms/legalize_to_affine.h"
|
||||
#include "tensorflow/compiler/xla/util.h"
|
||||
|
||||
namespace xla {
|
||||
namespace mlir_gpu {
|
||||
namespace {
|
||||
|
||||
using ::mlir::ConversionTarget;
|
||||
using ::mlir::FuncOp;
|
||||
using ::mlir::LLVMTypeConverter;
|
||||
using ::mlir::ModulePass;
|
||||
using ::mlir::ModulePassBase;
|
||||
using ::mlir::OwningRewritePatternList;
|
||||
using ::mlir::PassManager;
|
||||
using ::mlir::gpu::GPUDialect;
|
||||
using ::mlir::LLVM::LLVMDialect;
|
||||
using ::mlir::NVVM::NVVMDialect;
|
||||
|
||||
struct LowerKernelBodiesToNVVMPass
|
||||
: public ModulePass<LowerKernelBodiesToNVVMPass> {
|
||||
public:
|
||||
explicit LowerKernelBodiesToNVVMPass() = default;
|
||||
|
||||
void runOnModule() override {
|
||||
auto module = getModule();
|
||||
ConversionTarget target(*module.getContext());
|
||||
LLVMTypeConverter converter(module.getContext());
|
||||
|
||||
target.addLegalDialect<LLVMDialect>();
|
||||
target.addLegalDialect<NVVMDialect>();
|
||||
target.addDynamicallyLegalOp<FuncOp>(
|
||||
[&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
|
||||
|
||||
OwningRewritePatternList patterns;
|
||||
populateStdToLLVMConversionPatterns(converter, patterns);
|
||||
populateGpuToNVVMConversionPatterns(converter, patterns);
|
||||
|
||||
module.walk([this, &target, &patterns, &converter](FuncOp function) {
|
||||
if (!GPUDialect::isKernel(function)) {
|
||||
return;
|
||||
}
|
||||
if (failed(applyFullConversion(function, target, patterns, &converter))) {
|
||||
signalPassFailure();
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
Status LowerLHLOToGPU(mlir::ModuleOp module) {
|
||||
PassManager pm(module.getContext());
|
||||
|
||||
// Transform element-wise operations to Affine.
|
||||
pm.addPass(createLegalizeAffinePass());
|
||||
// Transform affine to gpu launches.
|
||||
// TODO(b/137624192) This pass requires known dimensions. Generalization it.
|
||||
pm.addPass(::mlir::createSimpleLoopsToGPUPass(/*numBlockDims=*/0,
|
||||
/*numThreadDims=*/2));
|
||||
// Take launches to launches with kernels.
|
||||
pm.addPass(::mlir::createGpuKernelOutliningPass());
|
||||
// Some basic cleanup.
|
||||
pm.addPass(::mlir::createCSEPass());
|
||||
|
||||
if (failed(pm.run(module))) {
|
||||
return InternalError("Lowering to NVVM IR failed.");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status LowerKernelBodiesToNVVM(mlir::ModuleOp module) {
|
||||
// We cannot verify as the signature of the kernel is rewritten.
|
||||
PassManager pm(module.getContext(), /*verifyPasses=*/false);
|
||||
|
||||
// Rewrite kernel functions to LLVM IR.
|
||||
pm.addPass(absl::make_unique<LowerKernelBodiesToNVVMPass>());
|
||||
// Some basic cleanup.
|
||||
pm.addPass(::mlir::createCSEPass());
|
||||
|
||||
if (failed(pm.run(module))) {
|
||||
return InternalError("Lowering to NVVM IR failed.");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace mlir_gpu
|
||||
} // namespace xla
|
32
tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h
Normal file
32
tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h
Normal file
@ -0,0 +1,32 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_KERNEL_LOWERING_H_
|
||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_KERNEL_LOWERING_H_
|
||||
|
||||
#include "mlir/IR/Module.h" // TF:local_config_mlir
|
||||
#include "tensorflow/compiler/xla/status.h"
|
||||
|
||||
namespace xla {
|
||||
namespace mlir_gpu {
|
||||
|
||||
Status LowerLHLOToGPU(mlir::ModuleOp module);
|
||||
|
||||
Status LowerKernelBodiesToNVVM(mlir::ModuleOp module);
|
||||
|
||||
} // namespace mlir_gpu
|
||||
} // namespace xla
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_KERNEL_LOWERING_H_
|
@ -17,10 +17,13 @@ limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "mlir/Dialect/GPU/GPUDialect.h" // TF:local_config_mlir
|
||||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // TF:local_config_mlir
|
||||
#include "mlir/IR/Function.h" // TF:local_config_mlir
|
||||
#include "mlir/IR/Location.h" // TF:local_config_mlir
|
||||
#include "mlir/IR/MLIRContext.h" // TF:local_config_mlir
|
||||
#include "mlir/IR/Module.h" // TF:local_config_mlir
|
||||
#include "tensorflow/compiler/xla/service/buffer_assignment.h"
|
||||
#include "tensorflow/compiler/xla/service/dump.h"
|
||||
#include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
|
||||
#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
|
||||
@ -30,9 +33,12 @@ limitations under the License.
|
||||
#include "tensorflow/compiler/xla/service/gpu/stream_assignment.h"
|
||||
#include "tensorflow/compiler/xla/service/gpu/target_constants.h"
|
||||
#include "tensorflow/compiler/xla/service/gpu/thunk_schedule.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
||||
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
|
||||
#include "tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.h"
|
||||
#include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h"
|
||||
#include "tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.h"
|
||||
#include "tensorflow/compiler/xla/util.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
|
||||
namespace xla {
|
||||
@ -152,7 +158,22 @@ StatusOr<std::unique_ptr<Executable>> MlirCompiler::RunBackend(
|
||||
TF_RETURN_IF_ERROR(
|
||||
lhlo_emitter.EmitComputation(*module->entry_computation()));
|
||||
|
||||
if (module_hook_.callback && !module_hook_.apply_on_lowered) {
|
||||
if (module_hook_.callback &&
|
||||
module_hook_.stage == IRHook::LoweringStage::LHLO) {
|
||||
module_hook_.callback(*mlir_module);
|
||||
}
|
||||
|
||||
TF_RETURN_IF_ERROR(LowerLHLOToGPU(*mlir_module));
|
||||
|
||||
if (module_hook_.callback &&
|
||||
module_hook_.stage == IRHook::LoweringStage::GPU) {
|
||||
module_hook_.callback(*mlir_module);
|
||||
}
|
||||
|
||||
TF_RETURN_IF_ERROR(LowerKernelBodiesToNVVM(*mlir_module));
|
||||
|
||||
if (module_hook_.callback &&
|
||||
module_hook_.stage == IRHook::LoweringStage::LLVM) {
|
||||
module_hook_.callback(*mlir_module);
|
||||
}
|
||||
|
||||
@ -194,7 +215,9 @@ void MlirCompiler::SetModuleHook(IRHook module_hook) {
|
||||
module_hook_ = module_hook;
|
||||
}
|
||||
|
||||
void MlirCompiler::RemoveModuleHook() { module_hook_ = {nullptr, false}; }
|
||||
void MlirCompiler::RemoveModuleHook() {
|
||||
module_hook_ = {nullptr, IRHook::LoweringStage::LHLO};
|
||||
}
|
||||
|
||||
} // namespace mlir_gpu
|
||||
} // namespace xla
|
||||
|
@ -57,8 +57,10 @@ class MlirCompiler : public Compiler {
|
||||
}
|
||||
|
||||
struct IRHook {
|
||||
enum class LoweringStage { LHLO, GPU, LLVM };
|
||||
|
||||
std::function<void(mlir::ModuleOp)> callback;
|
||||
bool apply_on_lowered;
|
||||
LoweringStage stage;
|
||||
};
|
||||
|
||||
void SetModuleHook(IRHook module_hook);
|
||||
|
@ -31,7 +31,7 @@ namespace mlir_gpu {
|
||||
|
||||
void MlirIrGenTestBase::CompileAndVerifyIr(
|
||||
std::unique_ptr<HloModule> hlo_module, const string& pattern,
|
||||
bool match_lowered_ir) {
|
||||
LoweringStage stage) {
|
||||
MlirCompiler* compiler = GetMLIRCompiler();
|
||||
string ir;
|
||||
compiler->SetModuleHook({[&ir](mlir::ModuleOp module) -> Status {
|
||||
@ -42,7 +42,7 @@ void MlirIrGenTestBase::CompileAndVerifyIr(
|
||||
ir = buffer_string;
|
||||
return Status::OK();
|
||||
},
|
||||
match_lowered_ir});
|
||||
stage});
|
||||
Status status = CompileToExecutable(std::move(hlo_module)).status();
|
||||
compiler->RemoveModuleHook();
|
||||
TF_ASSERT_OK(status);
|
||||
@ -54,12 +54,12 @@ void MlirIrGenTestBase::CompileAndVerifyIr(
|
||||
|
||||
void MlirIrGenTestBase::CompileAndVerifyIr(const string& hlo_text,
|
||||
const string& expected_llvm_ir,
|
||||
bool match_lowered_ir) {
|
||||
LoweringStage stage) {
|
||||
HloModuleConfig config;
|
||||
config.set_debug_options(GetDebugOptionsForTest());
|
||||
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
|
||||
ParseAndReturnUnverifiedModule(hlo_text, config));
|
||||
CompileAndVerifyIr(std::move(module), expected_llvm_ir, match_lowered_ir);
|
||||
CompileAndVerifyIr(std::move(module), expected_llvm_ir, stage);
|
||||
}
|
||||
|
||||
MlirCompiler* MlirIrGenTestBase::GetMLIRCompiler() {
|
||||
|
@ -27,6 +27,8 @@ namespace mlir_gpu {
|
||||
// Tests that verify IR emitted by the CPU/GPU backend is as expected.
|
||||
class MlirIrGenTestBase : public CodegenTestBase {
|
||||
protected:
|
||||
using LoweringStage = MlirCompiler::IRHook::LoweringStage;
|
||||
|
||||
// Compiles the given HLO module to MLIR IR and verifies the IR matches the
|
||||
// given pattern. `pattern` is in the FileCheck pattern matching syntax
|
||||
// (http://llvm.org/docs/CommandGuide/FileCheck.html).
|
||||
@ -37,13 +39,14 @@ class MlirIrGenTestBase : public CodegenTestBase {
|
||||
// steps to LLVM IR are applied; otherwise, the IR before lowering is
|
||||
// matched.
|
||||
void CompileAndVerifyIr(std::unique_ptr<HloModule> hlo_module,
|
||||
const string& pattern, bool match_lowered_ir = false);
|
||||
const string& pattern,
|
||||
LoweringStage stage = LoweringStage::LHLO);
|
||||
|
||||
// A thin wrapper around CompileAndVerifyIr that parses `hlo_text` to create
|
||||
// an HLO module.
|
||||
void CompileAndVerifyIr(const string& hlo_text,
|
||||
const string& expected_llvm_ir,
|
||||
bool match_lowered_ir = false);
|
||||
LoweringStage stage = LoweringStage::LHLO);
|
||||
|
||||
// Compiles and returns module with optimizations from a given HLO.
|
||||
StatusOr<std::unique_ptr<HloModule>> GetOptimizedModule(
|
||||
|
@ -47,6 +47,49 @@ ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
|
||||
)");
|
||||
}
|
||||
|
||||
TEST_F(LhloGenTest, AddInGPUDialect) {
|
||||
CompileAndVerifyIr(R"(
|
||||
HloModule Add
|
||||
|
||||
ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
|
||||
%x = f32[2,2]{1,0} parameter(0)
|
||||
%y = f32[2,2]{1,0} parameter(1)
|
||||
ROOT %add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
|
||||
})",
|
||||
R"(
|
||||
;CHECK: func @add(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]]) {
|
||||
;CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[ARG0]], %[[ARG1]], %[[ARG2]]
|
||||
;CHECK: }
|
||||
;CHECK: func @add_kernel(%[[ARG0]]: [[TYPE]], %[[ARG1]]: [[TYPE]], %[[ARG2]]: [[TYPE]]
|
||||
;CHECK: load %[[ARG0]][[INDEX:.*]]
|
||||
;CHECK: load %[[ARG1]][[INDEX]]
|
||||
;CHECK: store %{{.*}}, %[[ARG2]][[INDEX]]
|
||||
)",
|
||||
LoweringStage::GPU);
|
||||
}
|
||||
|
||||
TEST_F(LhloGenTest, AddInLVVMDialect) {
|
||||
CompileAndVerifyIr(R"(
|
||||
HloModule Add
|
||||
|
||||
ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
|
||||
%x = f32[2,2]{1,0} parameter(0)
|
||||
%y = f32[2,2]{1,0} parameter(1)
|
||||
ROOT %add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
|
||||
})",
|
||||
R"(
|
||||
;CHECK: func @add_kernel(%[[ARG0:.*]]: [[TYPE:!llvm<.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]]
|
||||
;CHECK: %[[GEP0:.*]] = llvm.getelementptr %[[ARG0]][[INDEX:.*]]
|
||||
;CHECK: %[[VAL0:.*]] = llvm.load %[[GEP0]]
|
||||
;CHECK: %[[GEP1:.*]] = llvm.getelementptr %[[ARG1]][[INDEX]]
|
||||
;CHECK: %[[VAL1:.*]] = llvm.load %[[GEP1]]
|
||||
;CHECK: %[[VAL2:.*]] = llvm.fadd %[[VAL0]], %[[VAL1]]
|
||||
;CHECK: %[[GEP2:.*]] = llvm.getelementptr %[[ARG2]][[INDEX]]
|
||||
;CHECK: llvm.store %[[VAL2]], %[[GEP2]]
|
||||
)",
|
||||
LoweringStage::LLVM);
|
||||
}
|
||||
|
||||
TEST_F(LhloGenTest, AddMultiply) {
|
||||
CompileAndVerifyIr(R"(
|
||||
HloModule AddMultiply
|
||||
|
@ -24,7 +24,7 @@ namespace xla {
|
||||
namespace mlir_gpu {
|
||||
|
||||
// Lowers from LHLO dialect to affine dialect.
|
||||
std::unique_ptr<::mlir::FunctionPassBase> createLegalizeAffine();
|
||||
std::unique_ptr<::mlir::FunctionPassBase> createLegalizeAffinePass();
|
||||
|
||||
// Adds patterns to convert LHLO binary ops to affine loops.
|
||||
void AppendBinaryOpsPatterns(::mlir::MLIRContext* context,
|
||||
|
Loading…
Reference in New Issue
Block a user