Add lowering pipeline from LHLO to NVVM.

This adds a first simple lowering pipeline to the mlir_gpu compiler that
takes an mlir module containing LHLO to a module with kernel functions
containing LLVM/NVVM dialect only.

PiperOrigin-RevId: 266903498
This commit is contained in:
Stephan Herhut 2019-09-03 04:56:33 -07:00 committed by TensorFlower Gardener
parent 185a465225
commit dc48199572
9 changed files with 263 additions and 10 deletions

View File

@ -36,7 +36,10 @@ cc_library(
hdrs = ["mlir_compiler.h"], hdrs = ["mlir_compiler.h"],
deps = [ deps = [
":failover_compiler", ":failover_compiler",
":kernel_lowering",
":lhlo_dialect_emitter", ":lhlo_dialect_emitter",
"//tensorflow/compiler/xla:util",
"//tensorflow/compiler/xla/service:buffer_assignment",
"//tensorflow/compiler/xla/service:compiler", "//tensorflow/compiler/xla/service:compiler",
"//tensorflow/compiler/xla/service:dump", "//tensorflow/compiler/xla/service:dump",
"//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo",
@ -49,6 +52,7 @@ cc_library(
"//tensorflow/compiler/xla/service/gpu:target_constants", "//tensorflow/compiler/xla/service/gpu:target_constants",
"//tensorflow/core:lib", "//tensorflow/core:lib",
"//tensorflow/stream_executor:stream_executor_headers", "//tensorflow/stream_executor:stream_executor_headers",
"@local_config_mlir//:GPUDialect",
"@local_config_mlir//:IR", "@local_config_mlir//:IR",
"@local_config_mlir//:LLVMDialect", "@local_config_mlir//:LLVMDialect",
], ],
@ -92,6 +96,29 @@ cc_library(
], ],
) )
cc_library(
name = "kernel_lowering",
srcs = ["kernel_lowering.cc"],
hdrs = ["kernel_lowering.h"],
deps = [
"//tensorflow/compiler/mlir/xla:hlo",
"//tensorflow/compiler/xla:status",
"//tensorflow/compiler/xla:util",
"//tensorflow/compiler/xla/service/mlir_gpu/transforms:legalize_to_affine",
"@com_google_absl//absl/memory",
"@local_config_mlir//:GPUDialect",
"@local_config_mlir//:GPUToNVVMTransforms",
"@local_config_mlir//:GPUTransforms",
"@local_config_mlir//:IR",
"@local_config_mlir//:LLVMDialect",
"@local_config_mlir//:LLVMTransforms",
"@local_config_mlir//:LoopsToGPUPass",
"@local_config_mlir//:NVVMDialect",
"@local_config_mlir//:Pass",
"@local_config_mlir//:Transforms",
],
)
cc_library( cc_library(
name = "mlir_irgen_test_base", name = "mlir_irgen_test_base",
testonly = True, testonly = True,

View File

@ -0,0 +1,123 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h"
#include <memory>
#include "absl/memory/memory.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" // TF:local_config_mlir
#include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h" // TF:local_config_mlir
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" // TF:local_config_mlir
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" // TF:local_config_mlir
#include "mlir/Dialect/GPU/GPUDialect.h" // TF:local_config_mlir
#include "mlir/Dialect/GPU/Passes.h" // TF:local_config_mlir
#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // TF:local_config_mlir
#include "mlir/Dialect/LLVMIR/NVVMDialect.h" // TF:local_config_mlir
#include "mlir/IR/Attributes.h" // TF:local_config_mlir
#include "mlir/IR/Function.h" // TF:local_config_mlir
#include "mlir/IR/Module.h" // TF:local_config_mlir
#include "mlir/IR/OperationSupport.h" // TF:local_config_mlir
#include "mlir/Pass/Pass.h" // TF:local_config_mlir
#include "mlir/Pass/PassManager.h" // TF:local_config_mlir
#include "mlir/Transforms/DialectConversion.h" // TF:local_config_mlir
#include "mlir/Transforms/Passes.h" // TF:local_config_mlir
#include "tensorflow/compiler/xla/service/mlir_gpu/transforms/legalize_to_affine.h"
#include "tensorflow/compiler/xla/util.h"
namespace xla {
namespace mlir_gpu {
namespace {
using ::mlir::ConversionTarget;
using ::mlir::FuncOp;
using ::mlir::LLVMTypeConverter;
using ::mlir::ModulePass;
using ::mlir::ModulePassBase;
using ::mlir::OwningRewritePatternList;
using ::mlir::PassManager;
using ::mlir::gpu::GPUDialect;
using ::mlir::LLVM::LLVMDialect;
using ::mlir::NVVM::NVVMDialect;
struct LowerKernelBodiesToNVVMPass
: public ModulePass<LowerKernelBodiesToNVVMPass> {
public:
explicit LowerKernelBodiesToNVVMPass() = default;
void runOnModule() override {
auto module = getModule();
ConversionTarget target(*module.getContext());
LLVMTypeConverter converter(module.getContext());
target.addLegalDialect<LLVMDialect>();
target.addLegalDialect<NVVMDialect>();
target.addDynamicallyLegalOp<FuncOp>(
[&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
OwningRewritePatternList patterns;
populateStdToLLVMConversionPatterns(converter, patterns);
populateGpuToNVVMConversionPatterns(converter, patterns);
module.walk([this, &target, &patterns, &converter](FuncOp function) {
if (!GPUDialect::isKernel(function)) {
return;
}
if (failed(applyFullConversion(function, target, patterns, &converter))) {
signalPassFailure();
}
});
}
};
} // namespace
Status LowerLHLOToGPU(mlir::ModuleOp module) {
PassManager pm(module.getContext());
// Transform element-wise operations to Affine.
pm.addPass(createLegalizeAffinePass());
// Transform affine to gpu launches.
// TODO(b/137624192) This pass requires known dimensions. Generalization it.
pm.addPass(::mlir::createSimpleLoopsToGPUPass(/*numBlockDims=*/0,
/*numThreadDims=*/2));
// Take launches to launches with kernels.
pm.addPass(::mlir::createGpuKernelOutliningPass());
// Some basic cleanup.
pm.addPass(::mlir::createCSEPass());
if (failed(pm.run(module))) {
return InternalError("Lowering to NVVM IR failed.");
}
return Status::OK();
}
Status LowerKernelBodiesToNVVM(mlir::ModuleOp module) {
// We cannot verify as the signature of the kernel is rewritten.
PassManager pm(module.getContext(), /*verifyPasses=*/false);
// Rewrite kernel functions to LLVM IR.
pm.addPass(absl::make_unique<LowerKernelBodiesToNVVMPass>());
// Some basic cleanup.
pm.addPass(::mlir::createCSEPass());
if (failed(pm.run(module))) {
return InternalError("Lowering to NVVM IR failed.");
}
return Status::OK();
}
} // namespace mlir_gpu
} // namespace xla

View File

@ -0,0 +1,32 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_KERNEL_LOWERING_H_
#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_KERNEL_LOWERING_H_
#include "mlir/IR/Module.h" // TF:local_config_mlir
#include "tensorflow/compiler/xla/status.h"
namespace xla {
namespace mlir_gpu {
Status LowerLHLOToGPU(mlir::ModuleOp module);
Status LowerKernelBodiesToNVVM(mlir::ModuleOp module);
} // namespace mlir_gpu
} // namespace xla
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_KERNEL_LOWERING_H_

View File

@ -17,10 +17,13 @@ limitations under the License.
#include <memory> #include <memory>
#include "mlir/Dialect/GPU/GPUDialect.h" // TF:local_config_mlir
#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // TF:local_config_mlir #include "mlir/Dialect/LLVMIR/LLVMDialect.h" // TF:local_config_mlir
#include "mlir/IR/Function.h" // TF:local_config_mlir
#include "mlir/IR/Location.h" // TF:local_config_mlir #include "mlir/IR/Location.h" // TF:local_config_mlir
#include "mlir/IR/MLIRContext.h" // TF:local_config_mlir #include "mlir/IR/MLIRContext.h" // TF:local_config_mlir
#include "mlir/IR/Module.h" // TF:local_config_mlir #include "mlir/IR/Module.h" // TF:local_config_mlir
#include "tensorflow/compiler/xla/service/buffer_assignment.h"
#include "tensorflow/compiler/xla/service/dump.h" #include "tensorflow/compiler/xla/service/dump.h"
#include "tensorflow/compiler/xla/service/gpu/gpu_constants.h" #include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h" #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
@ -30,9 +33,12 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/gpu/stream_assignment.h" #include "tensorflow/compiler/xla/service/gpu/stream_assignment.h"
#include "tensorflow/compiler/xla/service/gpu/target_constants.h" #include "tensorflow/compiler/xla/service/gpu/target_constants.h"
#include "tensorflow/compiler/xla/service/gpu/thunk_schedule.h" #include "tensorflow/compiler/xla/service/gpu/thunk_schedule.h"
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
#include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h"
#include "tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.h" #include "tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.h"
#include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h"
#include "tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.h" #include "tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.h"
#include "tensorflow/compiler/xla/util.h"
#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/errors.h"
namespace xla { namespace xla {
@ -152,7 +158,22 @@ StatusOr<std::unique_ptr<Executable>> MlirCompiler::RunBackend(
TF_RETURN_IF_ERROR( TF_RETURN_IF_ERROR(
lhlo_emitter.EmitComputation(*module->entry_computation())); lhlo_emitter.EmitComputation(*module->entry_computation()));
if (module_hook_.callback && !module_hook_.apply_on_lowered) { if (module_hook_.callback &&
module_hook_.stage == IRHook::LoweringStage::LHLO) {
module_hook_.callback(*mlir_module);
}
TF_RETURN_IF_ERROR(LowerLHLOToGPU(*mlir_module));
if (module_hook_.callback &&
module_hook_.stage == IRHook::LoweringStage::GPU) {
module_hook_.callback(*mlir_module);
}
TF_RETURN_IF_ERROR(LowerKernelBodiesToNVVM(*mlir_module));
if (module_hook_.callback &&
module_hook_.stage == IRHook::LoweringStage::LLVM) {
module_hook_.callback(*mlir_module); module_hook_.callback(*mlir_module);
} }
@ -194,7 +215,9 @@ void MlirCompiler::SetModuleHook(IRHook module_hook) {
module_hook_ = module_hook; module_hook_ = module_hook;
} }
void MlirCompiler::RemoveModuleHook() { module_hook_ = {nullptr, false}; } void MlirCompiler::RemoveModuleHook() {
module_hook_ = {nullptr, IRHook::LoweringStage::LHLO};
}
} // namespace mlir_gpu } // namespace mlir_gpu
} // namespace xla } // namespace xla

View File

@ -57,8 +57,10 @@ class MlirCompiler : public Compiler {
} }
struct IRHook { struct IRHook {
enum class LoweringStage { LHLO, GPU, LLVM };
std::function<void(mlir::ModuleOp)> callback; std::function<void(mlir::ModuleOp)> callback;
bool apply_on_lowered; LoweringStage stage;
}; };
void SetModuleHook(IRHook module_hook); void SetModuleHook(IRHook module_hook);

View File

@ -31,7 +31,7 @@ namespace mlir_gpu {
void MlirIrGenTestBase::CompileAndVerifyIr( void MlirIrGenTestBase::CompileAndVerifyIr(
std::unique_ptr<HloModule> hlo_module, const string& pattern, std::unique_ptr<HloModule> hlo_module, const string& pattern,
bool match_lowered_ir) { LoweringStage stage) {
MlirCompiler* compiler = GetMLIRCompiler(); MlirCompiler* compiler = GetMLIRCompiler();
string ir; string ir;
compiler->SetModuleHook({[&ir](mlir::ModuleOp module) -> Status { compiler->SetModuleHook({[&ir](mlir::ModuleOp module) -> Status {
@ -42,7 +42,7 @@ void MlirIrGenTestBase::CompileAndVerifyIr(
ir = buffer_string; ir = buffer_string;
return Status::OK(); return Status::OK();
}, },
match_lowered_ir}); stage});
Status status = CompileToExecutable(std::move(hlo_module)).status(); Status status = CompileToExecutable(std::move(hlo_module)).status();
compiler->RemoveModuleHook(); compiler->RemoveModuleHook();
TF_ASSERT_OK(status); TF_ASSERT_OK(status);
@ -54,12 +54,12 @@ void MlirIrGenTestBase::CompileAndVerifyIr(
void MlirIrGenTestBase::CompileAndVerifyIr(const string& hlo_text, void MlirIrGenTestBase::CompileAndVerifyIr(const string& hlo_text,
const string& expected_llvm_ir, const string& expected_llvm_ir,
bool match_lowered_ir) { LoweringStage stage) {
HloModuleConfig config; HloModuleConfig config;
config.set_debug_options(GetDebugOptionsForTest()); config.set_debug_options(GetDebugOptionsForTest());
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module, TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
ParseAndReturnUnverifiedModule(hlo_text, config)); ParseAndReturnUnverifiedModule(hlo_text, config));
CompileAndVerifyIr(std::move(module), expected_llvm_ir, match_lowered_ir); CompileAndVerifyIr(std::move(module), expected_llvm_ir, stage);
} }
MlirCompiler* MlirIrGenTestBase::GetMLIRCompiler() { MlirCompiler* MlirIrGenTestBase::GetMLIRCompiler() {

View File

@ -27,6 +27,8 @@ namespace mlir_gpu {
// Tests that verify IR emitted by the CPU/GPU backend is as expected. // Tests that verify IR emitted by the CPU/GPU backend is as expected.
class MlirIrGenTestBase : public CodegenTestBase { class MlirIrGenTestBase : public CodegenTestBase {
protected: protected:
using LoweringStage = MlirCompiler::IRHook::LoweringStage;
// Compiles the given HLO module to MLIR IR and verifies the IR matches the // Compiles the given HLO module to MLIR IR and verifies the IR matches the
// given pattern. `pattern` is in the FileCheck pattern matching syntax // given pattern. `pattern` is in the FileCheck pattern matching syntax
// (http://llvm.org/docs/CommandGuide/FileCheck.html). // (http://llvm.org/docs/CommandGuide/FileCheck.html).
@ -37,13 +39,14 @@ class MlirIrGenTestBase : public CodegenTestBase {
// steps to LLVM IR are applied; otherwise, the IR before lowering is // steps to LLVM IR are applied; otherwise, the IR before lowering is
// matched. // matched.
void CompileAndVerifyIr(std::unique_ptr<HloModule> hlo_module, void CompileAndVerifyIr(std::unique_ptr<HloModule> hlo_module,
const string& pattern, bool match_lowered_ir = false); const string& pattern,
LoweringStage stage = LoweringStage::LHLO);
// A thin wrapper around CompileAndVerifyIr that parses `hlo_text` to create // A thin wrapper around CompileAndVerifyIr that parses `hlo_text` to create
// an HLO module. // an HLO module.
void CompileAndVerifyIr(const string& hlo_text, void CompileAndVerifyIr(const string& hlo_text,
const string& expected_llvm_ir, const string& expected_llvm_ir,
bool match_lowered_ir = false); LoweringStage stage = LoweringStage::LHLO);
// Compiles and returns module with optimizations from a given HLO. // Compiles and returns module with optimizations from a given HLO.
StatusOr<std::unique_ptr<HloModule>> GetOptimizedModule( StatusOr<std::unique_ptr<HloModule>> GetOptimizedModule(

View File

@ -47,6 +47,49 @@ ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
)"); )");
} }
TEST_F(LhloGenTest, AddInGPUDialect) {
CompileAndVerifyIr(R"(
HloModule Add
ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
%x = f32[2,2]{1,0} parameter(0)
%y = f32[2,2]{1,0} parameter(1)
ROOT %add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
})",
R"(
;CHECK: func @add(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]]) {
;CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[ARG0]], %[[ARG1]], %[[ARG2]]
;CHECK: }
;CHECK: func @add_kernel(%[[ARG0]]: [[TYPE]], %[[ARG1]]: [[TYPE]], %[[ARG2]]: [[TYPE]]
;CHECK: load %[[ARG0]][[INDEX:.*]]
;CHECK: load %[[ARG1]][[INDEX]]
;CHECK: store %{{.*}}, %[[ARG2]][[INDEX]]
)",
LoweringStage::GPU);
}
TEST_F(LhloGenTest, AddInLVVMDialect) {
CompileAndVerifyIr(R"(
HloModule Add
ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
%x = f32[2,2]{1,0} parameter(0)
%y = f32[2,2]{1,0} parameter(1)
ROOT %add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
})",
R"(
;CHECK: func @add_kernel(%[[ARG0:.*]]: [[TYPE:!llvm<.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]]
;CHECK: %[[GEP0:.*]] = llvm.getelementptr %[[ARG0]][[INDEX:.*]]
;CHECK: %[[VAL0:.*]] = llvm.load %[[GEP0]]
;CHECK: %[[GEP1:.*]] = llvm.getelementptr %[[ARG1]][[INDEX]]
;CHECK: %[[VAL1:.*]] = llvm.load %[[GEP1]]
;CHECK: %[[VAL2:.*]] = llvm.fadd %[[VAL0]], %[[VAL1]]
;CHECK: %[[GEP2:.*]] = llvm.getelementptr %[[ARG2]][[INDEX]]
;CHECK: llvm.store %[[VAL2]], %[[GEP2]]
)",
LoweringStage::LLVM);
}
TEST_F(LhloGenTest, AddMultiply) { TEST_F(LhloGenTest, AddMultiply) {
CompileAndVerifyIr(R"( CompileAndVerifyIr(R"(
HloModule AddMultiply HloModule AddMultiply

View File

@ -24,7 +24,7 @@ namespace xla {
namespace mlir_gpu { namespace mlir_gpu {
// Lowers from LHLO dialect to affine dialect. // Lowers from LHLO dialect to affine dialect.
std::unique_ptr<::mlir::FunctionPassBase> createLegalizeAffine(); std::unique_ptr<::mlir::FunctionPassBase> createLegalizeAffinePass();
// Adds patterns to convert LHLO binary ops to affine loops. // Adds patterns to convert LHLO binary ops to affine loops.
void AppendBinaryOpsPatterns(::mlir::MLIRContext* context, void AppendBinaryOpsPatterns(::mlir::MLIRContext* context,