Move FuseInnerParallelLoopsPass to kernel_generator directory.

PiperOrigin-RevId: 348613506
Change-Id: I3a007d2d6e42459e6461994be018c496506ea4e2
This commit is contained in:
Stephan Herhut 2020-12-22 04:26:16 -08:00 committed by TensorFlower Gardener
parent f72495d54c
commit 36803f4d62
6 changed files with 63 additions and 3 deletions

View File

@ -57,7 +57,6 @@ cc_library(
"//tensorflow/compiler/xla/service/gpu:target_constants", "//tensorflow/compiler/xla/service/gpu:target_constants",
"//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend", "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
"//tensorflow/compiler/xla/service/mlir_gpu:kernel_lowering", "//tensorflow/compiler/xla/service/mlir_gpu:kernel_lowering",
"//tensorflow/compiler/xla/service/mlir_gpu:passes",
"//tensorflow/core:lib", "//tensorflow/core:lib",
"//tensorflow/core/platform:cuda_libdevice_path", "//tensorflow/core/platform:cuda_libdevice_path",
"@llvm-project//llvm:Support", "@llvm-project//llvm:Support",

View File

@ -57,7 +57,6 @@ limitations under the License.
#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
#include "tensorflow/compiler/mlir/xla/transforms/passes.h" #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
#include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h" #include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h"
#include "tensorflow/compiler/xla/service/mlir_gpu/passes.h"
#include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/util.h"
#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/path.h" #include "tensorflow/core/platform/path.h"
@ -145,7 +144,7 @@ Status LowerTFtoGPU(mlir::ModuleOp module, llvm::ArrayRef<uint32_t> tile_sizes,
pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass()); pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
// Fuse the inner-most loops. // Fuse the inner-most loops.
pm.addNestedPass<mlir::FuncOp>( pm.addNestedPass<mlir::FuncOp>(
xla::mlir_gpu::createFuseInnerParallelLoopsPass()); mlir::kernel_gen::transforms::CreateFuseInnerParallelLoopsPass());
// Run CSE to ensure that loads and stores to the same subview get // Run CSE to ensure that loads and stores to the same subview get
// recognized as such. // recognized as such.
pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass()); pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());

View File

@ -76,6 +76,7 @@ cc_library(
"bufferize_pass.cc", "bufferize_pass.cc",
"embed_memref_prints.cc", "embed_memref_prints.cc",
"embed_tf_framework_pass.cc", "embed_tf_framework_pass.cc",
"fuse_inner_parallel_loops_pass.cc",
"gpu_kernel_to_blob_pass.cc", "gpu_kernel_to_blob_pass.cc",
"map_parallel_loops_to_gpu.cc", "map_parallel_loops_to_gpu.cc",
"parallel_loops_to_sequential.cc", "parallel_loops_to_sequential.cc",

View File

@ -0,0 +1,45 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project
#include "mlir/Dialect/SCF/Transforms.h" // from @llvm-project
#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
namespace mlir {
namespace kernel_gen {
namespace transforms {
namespace {
#define GEN_PASS_CLASSES
#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc"
struct FuseInnerParallelLoopsPass
: FuseInnerParallelLoopsPassBase<FuseInnerParallelLoopsPass> {
void runOnFunction() override {
getFunction().walk([](mlir::scf::ParallelOp op) {
mlir::scf::naivelyFuseParallelOps(op.region());
});
}
};
} // namespace
std::unique_ptr<mlir::FunctionPass> CreateFuseInnerParallelLoopsPass() {
return std::make_unique<FuseInnerParallelLoopsPass>();
}
} // namespace transforms
} // namespace kernel_gen
} // namespace mlir

View File

@ -83,6 +83,11 @@ std::unique_ptr<FunctionPass> CreateEmbedMemRefPrintsPass();
/// Greedily maps loops to GPU hardware dimensions. /// Greedily maps loops to GPU hardware dimensions.
std::unique_ptr<mlir::FunctionPass> CreateMapParallelLoopsPass(); std::unique_ptr<mlir::FunctionPass> CreateMapParallelLoopsPass();
/// We need to direct fusion to the inner loops. This cannot be done with
/// a passmanager alone ATM, as nested pass managers require operations to
/// be closed from above.
std::unique_ptr<mlir::FunctionPass> CreateFuseInnerParallelLoopsPass();
} // namespace transforms } // namespace transforms
#define GEN_PASS_REGISTRATION #define GEN_PASS_REGISTRATION

View File

@ -107,4 +107,15 @@ def MapParallelLoopsPass
}]; }];
} }
def FuseInnerParallelLoopsPass
: FunctionPass<"fuse-inner-parallel-loops"> {
let summary = "Limited pass to forward stores to loads.";
let constructor = "transforms::CreateFuseInnerParallelLoopsPass()";
let description = [{
Directs parallel loop fusion to the inner loops. This cannot be done with
a passmanager alone ATM, as nested pass managers require operations to
be closed from above.
}];
}
#endif // TF_KERNEL_GEN_PASSES #endif // TF_KERNEL_GEN_PASSES