Make use of same_shape and tensorflow abi knowledge propagation passes.

PiperOrigin-RevId: 341586499
Change-Id: Ifa6177fba122d53375b47ec69fa8a401d51582ac
This commit is contained in:
Stephan Herhut 2020-11-10 03:26:51 -08:00 committed by TensorFlower Gardener
parent e897624fac
commit 2c05a4a796
2 changed files with 45 additions and 16 deletions

View File

@ -198,32 +198,57 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only,
return Status::OK(); return Status::OK();
} }
Status LowerGPUToLLVM(mlir::ModuleOp module, bool gpu_binary_only, Status AmendKernelLLVMIRWithStaticKnowledge(mlir::ModuleOp module) {
llvm::ArrayRef<uint32_t> same_shape, mlir::PassManager pm(module.getContext());
llvm::StringRef gpu_binary_attr_name, applyTensorflowAndCLOptions(pm);
llvm::ArrayRef<std::string> architectures,
bool generate_fatbin) { pm.addNestedPass<mlir::FuncOp>(
mlir::kernel_gen::transforms::CreatePropagateShapeKnowledgeToKernels());
pm.addNestedPass<mlir::FuncOp>(
mlir::kernel_gen::transforms::CreatePropagateTfAbiKnowledgeToKernels());
return failed(pm.run(module))
? InternalError("Amending LLVMIR with static knowledge failed.")
: Status::OK();
}
Status GenerateDeviceCode(mlir::ModuleOp module, bool gpu_binary_only,
llvm::ArrayRef<uint32_t> same_shape,
llvm::StringRef gpu_binary_attr_name,
llvm::ArrayRef<std::string> architectures,
bool generate_fatbin) {
mlir::PassManager pm(module.getContext()); mlir::PassManager pm(module.getContext());
applyTensorflowAndCLOptions(pm); applyTensorflowAndCLOptions(pm);
auto& kernel_pm = pm.nest<mlir::gpu::GPUModuleOp>(); auto& kernel_pm = pm.nest<mlir::gpu::GPUModuleOp>();
// TODO(herhut): Remove this.
if (gpu_binary_only) { if (gpu_binary_only) {
// Grab the original signature from the single function. // Grab the original signature from the single function.
kernel_pm.addNestedPass<mlir::LLVM::LLVMFuncOp>( kernel_pm.addNestedPass<mlir::LLVM::LLVMFuncOp>(
mlir::kernel_gen::transforms::CreatePropagateTensorFlowABIKnowledgePass( mlir::kernel_gen::transforms::CreatePropagateTensorFlowABIKnowledgePass(
same_shape)); same_shape));
} }
// Remove debug information to ensure we do not create debug PTX.
kernel_pm.addPass(mlir::createStripDebugInfoPass()); kernel_pm.addPass(mlir::createStripDebugInfoPass());
kernel_pm.addPass(mlir::kernel_gen::transforms::CreateGpuKernelToBlobPass( kernel_pm.addPass(mlir::kernel_gen::transforms::CreateGpuKernelToBlobPass(
gpu_binary_attr_name, architectures, generate_fatbin)); gpu_binary_attr_name, architectures, generate_fatbin));
if (!gpu_binary_only) { return failed(pm.run(module))
pm.addPass(mlir::kernel_gen::transforms::CreateTFKernelToLLVMPass()); ? InternalError("Generating device code failed.")
pm.addPass(mlir::createCanonicalizerPass()); : Status::OK();
pm.addPass(mlir::createCSEPass()); }
}
return failed(pm.run(module)) ? InternalError("Lowering to LLVM IR failed.") Status LowerHostSideToFinalForm(mlir::ModuleOp module) {
: Status::OK(); mlir::PassManager pm(module.getContext());
applyTensorflowAndCLOptions(pm);
pm.addPass(mlir::kernel_gen::transforms::CreateTFKernelToLLVMPass());
pm.addPass(mlir::createCanonicalizerPass());
pm.addPass(mlir::createCSEPass());
return failed(pm.run(module))
? InternalError("Final lowering of host side failed.")
: Status::OK();
} }
} // namespace } // namespace
@ -249,9 +274,13 @@ StatusOr<mlir::OwningModuleRef> GenerateKernelForTfCode(
#elif GOOGLE_CUDA #elif GOOGLE_CUDA
TF_RETURN_IF_ERROR(xla::mlir_gpu::LowerKernelBodiesToNVVM(module.get())); TF_RETURN_IF_ERROR(xla::mlir_gpu::LowerKernelBodiesToNVVM(module.get()));
#endif #endif
TF_RETURN_IF_ERROR(LowerGPUToLLVM(module.get(), gpu_binary_only, same_shape, TF_RETURN_IF_ERROR(AmendKernelLLVMIRWithStaticKnowledge(module.get()));
kGpuBinaryAttrName, architectures, TF_RETURN_IF_ERROR(GenerateDeviceCode(module.get(), gpu_binary_only,
generate_fatbin)); same_shape, kGpuBinaryAttrName,
architectures, generate_fatbin));
if (!gpu_binary_only) {
TF_RETURN_IF_ERROR(LowerHostSideToFinalForm(module.get()));
}
return module; return module;
} }

View File

@ -1,6 +1,6 @@
// RUN: tf_to_kernel --input=%s --output=%t --same_shape=0,1 --unroll_factors=4 --tile_sizes=256 --arch=sm_70,compute_75 // RUN: tf_to_kernel --input=%s --output=%t --same_shape=0,1 --unroll_factors=4 --tile_sizes=256 --arch=sm_70,compute_75
func @tanh(%arg: tensor<*xf32>) -> tensor<*xf32> { func @tanh(%arg: tensor<*xf32>) -> tensor<*xf32> attributes {tf_entry} {
%0 = "tf.Tanh"(%arg) : (tensor<*xf32>) -> tensor<*xf32> %0 = "tf.Tanh"(%arg) : (tensor<*xf32>) -> tensor<*xf32>
return %0 : tensor<*xf32> return %0 : tensor<*xf32>
} }