Move FuseInnerParallelLoopsPass to kernel_generator directory.
PiperOrigin-RevId: 348613506 Change-Id: I3a007d2d6e42459e6461994be018c496506ea4e2
This commit is contained in:
parent
f72495d54c
commit
36803f4d62
@ -57,7 +57,6 @@ cc_library(
|
|||||||
"//tensorflow/compiler/xla/service/gpu:target_constants",
|
"//tensorflow/compiler/xla/service/gpu:target_constants",
|
||||||
"//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
|
"//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
|
||||||
"//tensorflow/compiler/xla/service/mlir_gpu:kernel_lowering",
|
"//tensorflow/compiler/xla/service/mlir_gpu:kernel_lowering",
|
||||||
"//tensorflow/compiler/xla/service/mlir_gpu:passes",
|
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core/platform:cuda_libdevice_path",
|
"//tensorflow/core/platform:cuda_libdevice_path",
|
||||||
"@llvm-project//llvm:Support",
|
"@llvm-project//llvm:Support",
|
||||||
|
@ -57,7 +57,6 @@ limitations under the License.
|
|||||||
#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
|
#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
|
||||||
#include "tensorflow/compiler/mlir/xla/transforms/passes.h"
|
#include "tensorflow/compiler/mlir/xla/transforms/passes.h"
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h"
|
#include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h"
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/passes.h"
|
|
||||||
#include "tensorflow/compiler/xla/util.h"
|
#include "tensorflow/compiler/xla/util.h"
|
||||||
#include "tensorflow/core/platform/logging.h"
|
#include "tensorflow/core/platform/logging.h"
|
||||||
#include "tensorflow/core/platform/path.h"
|
#include "tensorflow/core/platform/path.h"
|
||||||
@ -145,7 +144,7 @@ Status LowerTFtoGPU(mlir::ModuleOp module, llvm::ArrayRef<uint32_t> tile_sizes,
|
|||||||
pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
|
pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
|
||||||
// Fuse the inner-most loops.
|
// Fuse the inner-most loops.
|
||||||
pm.addNestedPass<mlir::FuncOp>(
|
pm.addNestedPass<mlir::FuncOp>(
|
||||||
xla::mlir_gpu::createFuseInnerParallelLoopsPass());
|
mlir::kernel_gen::transforms::CreateFuseInnerParallelLoopsPass());
|
||||||
// Run CSE to ensure that loads and stores to the same subview get
|
// Run CSE to ensure that loads and stores to the same subview get
|
||||||
// recognized as such.
|
// recognized as such.
|
||||||
pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
|
pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
|
||||||
|
@ -76,6 +76,7 @@ cc_library(
|
|||||||
"bufferize_pass.cc",
|
"bufferize_pass.cc",
|
||||||
"embed_memref_prints.cc",
|
"embed_memref_prints.cc",
|
||||||
"embed_tf_framework_pass.cc",
|
"embed_tf_framework_pass.cc",
|
||||||
|
"fuse_inner_parallel_loops_pass.cc",
|
||||||
"gpu_kernel_to_blob_pass.cc",
|
"gpu_kernel_to_blob_pass.cc",
|
||||||
"map_parallel_loops_to_gpu.cc",
|
"map_parallel_loops_to_gpu.cc",
|
||||||
"parallel_loops_to_sequential.cc",
|
"parallel_loops_to_sequential.cc",
|
||||||
|
@ -0,0 +1,45 @@
|
|||||||
|
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project
|
||||||
|
#include "mlir/Dialect/SCF/Transforms.h" // from @llvm-project
|
||||||
|
#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
|
||||||
|
|
||||||
|
namespace mlir {
|
||||||
|
namespace kernel_gen {
|
||||||
|
namespace transforms {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
#define GEN_PASS_CLASSES
|
||||||
|
#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc"
|
||||||
|
|
||||||
|
struct FuseInnerParallelLoopsPass
|
||||||
|
: FuseInnerParallelLoopsPassBase<FuseInnerParallelLoopsPass> {
|
||||||
|
void runOnFunction() override {
|
||||||
|
getFunction().walk([](mlir::scf::ParallelOp op) {
|
||||||
|
mlir::scf::naivelyFuseParallelOps(op.region());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
std::unique_ptr<mlir::FunctionPass> CreateFuseInnerParallelLoopsPass() {
|
||||||
|
return std::make_unique<FuseInnerParallelLoopsPass>();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace transforms
|
||||||
|
} // namespace kernel_gen
|
||||||
|
} // namespace mlir
|
@ -83,6 +83,11 @@ std::unique_ptr<FunctionPass> CreateEmbedMemRefPrintsPass();
|
|||||||
/// Greedily maps loops to GPU hardware dimensions.
|
/// Greedily maps loops to GPU hardware dimensions.
|
||||||
std::unique_ptr<mlir::FunctionPass> CreateMapParallelLoopsPass();
|
std::unique_ptr<mlir::FunctionPass> CreateMapParallelLoopsPass();
|
||||||
|
|
||||||
|
/// We need to direct fusion to the inner loops. This cannot be done with
|
||||||
|
/// a passmanager alone ATM, as nested pass managers require operations to
|
||||||
|
/// be closed from above.
|
||||||
|
std::unique_ptr<mlir::FunctionPass> CreateFuseInnerParallelLoopsPass();
|
||||||
|
|
||||||
} // namespace transforms
|
} // namespace transforms
|
||||||
|
|
||||||
#define GEN_PASS_REGISTRATION
|
#define GEN_PASS_REGISTRATION
|
||||||
|
@ -107,4 +107,15 @@ def MapParallelLoopsPass
|
|||||||
}];
|
}];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def FuseInnerParallelLoopsPass
|
||||||
|
: FunctionPass<"fuse-inner-parallel-loops"> {
|
||||||
|
let summary = "Limited pass to forward stores to loads.";
|
||||||
|
let constructor = "transforms::CreateFuseInnerParallelLoopsPass()";
|
||||||
|
let description = [{
|
||||||
|
Directs parallel loop fusion to the inner loops. This cannot be done with
|
||||||
|
a passmanager alone ATM, as nested pass managers require operations to
|
||||||
|
be closed from above.
|
||||||
|
}];
|
||||||
|
}
|
||||||
|
|
||||||
#endif // TF_KERNEL_GEN_PASSES
|
#endif // TF_KERNEL_GEN_PASSES
|
||||||
|
Loading…
Reference in New Issue
Block a user