From 2c05a4a796a853f92df819b61792720df0859a38 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 10 Nov 2020 03:26:51 -0800 Subject: [PATCH] Make use of same_shape and tensorflow abi knowledge propagation passes. PiperOrigin-RevId: 341586499 Change-Id: Ifa6177fba122d53375b47ec69fa8a401d51582ac --- .../mlir/tools/kernel_gen/kernel_creator.cc | 59 ++++++++++++++----- .../kernel_gen/tests/tf_to_kernel/tanh.mlir | 2 +- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc index 5692070dac0..8850a61fc3e 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc @@ -198,32 +198,57 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only, return Status::OK(); } -Status LowerGPUToLLVM(mlir::ModuleOp module, bool gpu_binary_only, - llvm::ArrayRef same_shape, - llvm::StringRef gpu_binary_attr_name, - llvm::ArrayRef architectures, - bool generate_fatbin) { +Status AmendKernelLLVMIRWithStaticKnowledge(mlir::ModuleOp module) { + mlir::PassManager pm(module.getContext()); + applyTensorflowAndCLOptions(pm); + + pm.addNestedPass( + mlir::kernel_gen::transforms::CreatePropagateShapeKnowledgeToKernels()); + pm.addNestedPass( + mlir::kernel_gen::transforms::CreatePropagateTfAbiKnowledgeToKernels()); + + return failed(pm.run(module)) + ? InternalError("Amending LLVMIR with static knowledge failed.") + : Status::OK(); +} + +Status GenerateDeviceCode(mlir::ModuleOp module, bool gpu_binary_only, + llvm::ArrayRef same_shape, + llvm::StringRef gpu_binary_attr_name, + llvm::ArrayRef architectures, + bool generate_fatbin) { mlir::PassManager pm(module.getContext()); applyTensorflowAndCLOptions(pm); auto& kernel_pm = pm.nest(); + // TODO(herhut): Remove this. if (gpu_binary_only) { // Grab the original signature from the single function. kernel_pm.addNestedPass( mlir::kernel_gen::transforms::CreatePropagateTensorFlowABIKnowledgePass( same_shape)); } + // Remove debug information to ensure we do not create debug PTX. kernel_pm.addPass(mlir::createStripDebugInfoPass()); kernel_pm.addPass(mlir::kernel_gen::transforms::CreateGpuKernelToBlobPass( gpu_binary_attr_name, architectures, generate_fatbin)); - if (!gpu_binary_only) { - pm.addPass(mlir::kernel_gen::transforms::CreateTFKernelToLLVMPass()); - pm.addPass(mlir::createCanonicalizerPass()); - pm.addPass(mlir::createCSEPass()); - } - return failed(pm.run(module)) ? InternalError("Lowering to LLVM IR failed.") - : Status::OK(); + return failed(pm.run(module)) + ? InternalError("Generating device code failed.") + : Status::OK(); +} + +Status LowerHostSideToFinalForm(mlir::ModuleOp module) { + mlir::PassManager pm(module.getContext()); + applyTensorflowAndCLOptions(pm); + + pm.addPass(mlir::kernel_gen::transforms::CreateTFKernelToLLVMPass()); + pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(mlir::createCSEPass()); + + return failed(pm.run(module)) + ? InternalError("Final lowering of host side failed.") + : Status::OK(); } } // namespace @@ -249,9 +274,13 @@ StatusOr GenerateKernelForTfCode( #elif GOOGLE_CUDA TF_RETURN_IF_ERROR(xla::mlir_gpu::LowerKernelBodiesToNVVM(module.get())); #endif - TF_RETURN_IF_ERROR(LowerGPUToLLVM(module.get(), gpu_binary_only, same_shape, - kGpuBinaryAttrName, architectures, - generate_fatbin)); + TF_RETURN_IF_ERROR(AmendKernelLLVMIRWithStaticKnowledge(module.get())); + TF_RETURN_IF_ERROR(GenerateDeviceCode(module.get(), gpu_binary_only, + same_shape, kGpuBinaryAttrName, + architectures, generate_fatbin)); + if (!gpu_binary_only) { + TF_RETURN_IF_ERROR(LowerHostSideToFinalForm(module.get())); + } return module; } diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir index 85bea1795a5..3eb736a359e 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_kernel/tanh.mlir @@ -1,6 +1,6 @@ // RUN: tf_to_kernel --input=%s --output=%t --same_shape=0,1 --unroll_factors=4 --tile_sizes=256 --arch=sm_70,compute_75 -func @tanh(%arg: tensor<*xf32>) -> tensor<*xf32> { +func @tanh(%arg: tensor<*xf32>) -> tensor<*xf32> attributes {tf_entry} { %0 = "tf.Tanh"(%arg) : (tensor<*xf32>) -> tensor<*xf32> return %0 : tensor<*xf32> }