From f72495d54ccc0260628a42daebd2f3af9fba3b7c Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 22 Dec 2020 04:06:50 -0800 Subject: [PATCH] Move MapParallelLoopsPass to kernel _generator directory. PiperOrigin-RevId: 348611655 Change-Id: Id2341c80f639018e5b4c92ddf82e722763d22917 --- .../mlir/tools/kernel_gen/kernel_creator.cc | 3 +- .../mlir/tools/kernel_gen/transforms/BUILD | 2 + .../transforms/map_parallel_loops_to_gpu.cc | 41 +++++++++++++++++++ .../mlir/tools/kernel_gen/transforms/passes.h | 5 ++- .../tools/kernel_gen/transforms/passes.td | 9 ++++ 5 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/map_parallel_loops_to_gpu.cc diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc index 143505bd51c..51c0f353da0 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc @@ -157,7 +157,8 @@ Status LowerTFtoGPU(mlir::ModuleOp module, llvm::ArrayRef tile_sizes, pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass()); pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass()); // Greedily map the remaining loop to GPU hardware dimensions. - pm.addNestedPass<::mlir::FuncOp>(xla::mlir_gpu::createMapParallelLoopsPass()); + pm.addNestedPass<::mlir::FuncOp>( + mlir::kernel_gen::transforms::CreateMapParallelLoopsPass()); // Now lower the shape computations, bufferize all remaining ops and insert // deallocs. diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD index 5ed47ac2416..d328c5b28cc 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD @@ -77,6 +77,7 @@ cc_library( "embed_memref_prints.cc", "embed_tf_framework_pass.cc", "gpu_kernel_to_blob_pass.cc", + "map_parallel_loops_to_gpu.cc", "parallel_loops_to_sequential.cc", "same_shape_propagation.cc", "shape_to_descriptors_pass.cc", @@ -103,6 +104,7 @@ cc_library( ":tf_framework_legalize_to_llvm", "@llvm-project//llvm:Support", "@llvm-project//mlir:GPUDialect", + "@llvm-project//mlir:GPUTransforms", "@llvm-project//mlir:GPUToGPURuntimeTransforms", "@llvm-project//mlir:IR", "@llvm-project//mlir:LLVMDialect", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/map_parallel_loops_to_gpu.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/map_parallel_loops_to_gpu.cc new file mode 100644 index 00000000000..296b3336dfc --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/map_parallel_loops_to_gpu.cc @@ -0,0 +1,41 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Dialect/GPU/ParallelLoopMapper.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" + +namespace mlir { +namespace kernel_gen { +namespace transforms { +namespace { + +#define GEN_PASS_CLASSES +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc" + +struct MapParallelLoopsPass : MapParallelLoopsPassBase { + void runOnFunction() override { + mlir::greedilyMapParallelSCFToGPU(getFunction().getBody()); + } +}; + +} // namespace + +std::unique_ptr CreateMapParallelLoopsPass() { + return std::make_unique(); +} + +} // namespace transforms +} // namespace kernel_gen +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h index a29f4121e1c..4a940dde997 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h @@ -52,7 +52,7 @@ std::unique_ptr> CreateTFKernelToLLVMPass( // Pass to tranform shape computations in shape dialect to standard and scf // using memref descriptors. -std::unique_ptr > CreateShapeToDescriptorsPass(); +std::unique_ptr> CreateShapeToDescriptorsPass(); // Pass to tranform hlo-level computations on values to their corresponding // parts on buffers. @@ -80,6 +80,9 @@ std::unique_ptr CreatePropagateShapeKnowledgeToKernels(); // Pass to print content of memrefs. std::unique_ptr CreateEmbedMemRefPrintsPass(); +/// Greedily maps loops to GPU hardware dimensions. +std::unique_ptr CreateMapParallelLoopsPass(); + } // namespace transforms #define GEN_PASS_REGISTRATION diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td index abc1cb6ab06..af7613ecd3a 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td @@ -98,4 +98,13 @@ def EmbedMemRefPrintsPass : FunctionPass<"embed-memref-prints"> { let constructor = "transforms::CreateEmbedMemRefPrintsPass()"; } +def MapParallelLoopsPass + : FunctionPass<"map-parallel-loops-to-gpu"> { + let summary = "Greedily maps loops to GPU hardware dimensions."; + let constructor = "transforms::CreateMapParallelLoopsPass()"; + let description = [{ + Greedily maps loops to GPU hardware dimensions. + }]; +} + #endif // TF_KERNEL_GEN_PASSES