Move FuseInnerParallelLoopsPass to kernel_generator directory.

PiperOrigin-RevId: 348613506 Change-Id: I3a007d2d6e42459e6461994be018c496506ea4e2
2020-12-22 04:26:16 -08:00 · 2020-12-22 04:26:16 -08:00 · 36803f4d62
commit 36803f4d62
parent f72495d54c
6 changed files with 63 additions and 3 deletions
--- a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD
@ -57,7 +57,6 @@ cc_library(
        "//tensorflow/compiler/xla/service/gpu:target_constants",
        "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
        "//tensorflow/compiler/xla/service/mlir_gpu:kernel_lowering",
-        "//tensorflow/compiler/xla/service/mlir_gpu:passes",
        "//tensorflow/core:lib",
        "//tensorflow/core/platform:cuda_libdevice_path",
        "@llvm-project//llvm:Support",
--- a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc
@ -57,7 +57,6 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
 #include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h"
-#include "tensorflow/compiler/xla/service/mlir_gpu/passes.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/path.h"
@ -145,7 +144,7 @@ Status LowerTFtoGPU(mlir::ModuleOp module, llvm::ArrayRef<uint32_t> tile_sizes,
  pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
  // Fuse the inner-most loops.
  pm.addNestedPass<mlir::FuncOp>(
-      xla::mlir_gpu::createFuseInnerParallelLoopsPass());
+      mlir::kernel_gen::transforms::CreateFuseInnerParallelLoopsPass());
  // Run CSE to ensure that loads and stores to the same subview get
  // recognized as such.
  pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD
@ -76,6 +76,7 @@ cc_library(
        "bufferize_pass.cc",
        "embed_memref_prints.cc",
        "embed_tf_framework_pass.cc",
+        "fuse_inner_parallel_loops_pass.cc",
        "gpu_kernel_to_blob_pass.cc",
        "map_parallel_loops_to_gpu.cc",
        "parallel_loops_to_sequential.cc",
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/fuse_inner_parallel_loops_pass.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/fuse_inner_parallel_loops_pass.cc
@ -0,0 +1,45 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "mlir/Dialect/SCF/SCF.h"  // from @llvm-project
+#include "mlir/Dialect/SCF/Transforms.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
+
+namespace mlir {
+namespace kernel_gen {
+namespace transforms {
+namespace {
+
+#define GEN_PASS_CLASSES
+#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc"
+
+struct FuseInnerParallelLoopsPass
+    : FuseInnerParallelLoopsPassBase<FuseInnerParallelLoopsPass> {
+  void runOnFunction() override {
+    getFunction().walk([](mlir::scf::ParallelOp op) {
+      mlir::scf::naivelyFuseParallelOps(op.region());
+    });
+  }
+};
+
+}  // namespace
+
+std::unique_ptr<mlir::FunctionPass> CreateFuseInnerParallelLoopsPass() {
+  return std::make_unique<FuseInnerParallelLoopsPass>();
+}
+
+}  // namespace transforms
+}  // namespace kernel_gen
+}  // namespace mlir
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h
@ -83,6 +83,11 @@ std::unique_ptr<FunctionPass> CreateEmbedMemRefPrintsPass();
 /// Greedily maps loops to GPU hardware dimensions.
 std::unique_ptr<mlir::FunctionPass> CreateMapParallelLoopsPass();

+/// We need to direct fusion to the inner loops. This cannot be done with
+/// a passmanager alone ATM, as nested pass managers require operations to
+/// be closed from above.
+std::unique_ptr<mlir::FunctionPass> CreateFuseInnerParallelLoopsPass();
+
 }  // namespace transforms

 #define GEN_PASS_REGISTRATION
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td
@ -107,4 +107,15 @@ def MapParallelLoopsPass
  }];
 }

+def FuseInnerParallelLoopsPass
+    : FunctionPass<"fuse-inner-parallel-loops"> {
+  let summary = "Limited pass to forward stores to loads.";
+  let constructor = "transforms::CreateFuseInnerParallelLoopsPass()";
+  let description = [{
+    Directs parallel loop fusion to the inner loops. This cannot be done with
+    a passmanager alone ATM, as nested pass managers require operations to
+    be closed from above.
+  }];
+}
+
 #endif // TF_KERNEL_GEN_PASSES