Move FuseInnerParallelLoopsPass to kernel_generator directory.
PiperOrigin-RevId: 348613506 Change-Id: I3a007d2d6e42459e6461994be018c496506ea4e2
This commit is contained in:
parent
f72495d54c
commit
36803f4d62
@ -57,7 +57,6 @@ cc_library(
|
||||
"//tensorflow/compiler/xla/service/gpu:target_constants",
|
||||
"//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
|
||||
"//tensorflow/compiler/xla/service/mlir_gpu:kernel_lowering",
|
||||
"//tensorflow/compiler/xla/service/mlir_gpu:passes",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core/platform:cuda_libdevice_path",
|
||||
"@llvm-project//llvm:Support",
|
||||
|
@ -57,7 +57,6 @@ limitations under the License.
|
||||
#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
|
||||
#include "tensorflow/compiler/mlir/xla/transforms/passes.h"
|
||||
#include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h"
|
||||
#include "tensorflow/compiler/xla/service/mlir_gpu/passes.h"
|
||||
#include "tensorflow/compiler/xla/util.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/path.h"
|
||||
@ -145,7 +144,7 @@ Status LowerTFtoGPU(mlir::ModuleOp module, llvm::ArrayRef<uint32_t> tile_sizes,
|
||||
pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
|
||||
// Fuse the inner-most loops.
|
||||
pm.addNestedPass<mlir::FuncOp>(
|
||||
xla::mlir_gpu::createFuseInnerParallelLoopsPass());
|
||||
mlir::kernel_gen::transforms::CreateFuseInnerParallelLoopsPass());
|
||||
// Run CSE to ensure that loads and stores to the same subview get
|
||||
// recognized as such.
|
||||
pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
|
||||
|
@ -76,6 +76,7 @@ cc_library(
|
||||
"bufferize_pass.cc",
|
||||
"embed_memref_prints.cc",
|
||||
"embed_tf_framework_pass.cc",
|
||||
"fuse_inner_parallel_loops_pass.cc",
|
||||
"gpu_kernel_to_blob_pass.cc",
|
||||
"map_parallel_loops_to_gpu.cc",
|
||||
"parallel_loops_to_sequential.cc",
|
||||
|
@ -0,0 +1,45 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project
|
||||
#include "mlir/Dialect/SCF/Transforms.h" // from @llvm-project
|
||||
#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace kernel_gen {
|
||||
namespace transforms {
|
||||
namespace {
|
||||
|
||||
#define GEN_PASS_CLASSES
|
||||
#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc"
|
||||
|
||||
struct FuseInnerParallelLoopsPass
|
||||
: FuseInnerParallelLoopsPassBase<FuseInnerParallelLoopsPass> {
|
||||
void runOnFunction() override {
|
||||
getFunction().walk([](mlir::scf::ParallelOp op) {
|
||||
mlir::scf::naivelyFuseParallelOps(op.region());
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<mlir::FunctionPass> CreateFuseInnerParallelLoopsPass() {
|
||||
return std::make_unique<FuseInnerParallelLoopsPass>();
|
||||
}
|
||||
|
||||
} // namespace transforms
|
||||
} // namespace kernel_gen
|
||||
} // namespace mlir
|
@ -83,6 +83,11 @@ std::unique_ptr<FunctionPass> CreateEmbedMemRefPrintsPass();
|
||||
/// Greedily maps loops to GPU hardware dimensions.
|
||||
std::unique_ptr<mlir::FunctionPass> CreateMapParallelLoopsPass();
|
||||
|
||||
/// We need to direct fusion to the inner loops. This cannot be done with
|
||||
/// a passmanager alone ATM, as nested pass managers require operations to
|
||||
/// be closed from above.
|
||||
std::unique_ptr<mlir::FunctionPass> CreateFuseInnerParallelLoopsPass();
|
||||
|
||||
} // namespace transforms
|
||||
|
||||
#define GEN_PASS_REGISTRATION
|
||||
|
@ -107,4 +107,15 @@ def MapParallelLoopsPass
|
||||
}];
|
||||
}
|
||||
|
||||
def FuseInnerParallelLoopsPass
|
||||
: FunctionPass<"fuse-inner-parallel-loops"> {
|
||||
let summary = "Limited pass to forward stores to loads.";
|
||||
let constructor = "transforms::CreateFuseInnerParallelLoopsPass()";
|
||||
let description = [{
|
||||
Directs parallel loop fusion to the inner loops. This cannot be done with
|
||||
a passmanager alone ATM, as nested pass managers require operations to
|
||||
be closed from above.
|
||||
}];
|
||||
}
|
||||
|
||||
#endif // TF_KERNEL_GEN_PASSES
|
||||
|
Loading…
Reference in New Issue
Block a user