[MLIR/GPU] Move utility functions to a separate file. NFC.
PiperOrigin-RevId: 303377339 Change-Id: I64983a99f9a36aa889e0a7e9f30dd0c0e6cd0561
This commit is contained in:
parent
190ceb644e
commit
22325a5c7d
@ -27,6 +27,7 @@ cc_library(
|
||||
srcs = ["conv_emitter.cc"],
|
||||
hdrs = ["conv_emitter.h"],
|
||||
deps = [
|
||||
":conv_emitter_transforms",
|
||||
"//tensorflow/compiler/xla:window_util",
|
||||
"//tensorflow/compiler/xla/service:hlo",
|
||||
"//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
|
||||
@ -39,6 +40,23 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "conv_emitter_transforms",
|
||||
srcs = ["conv_emitter_transforms.cc"],
|
||||
hdrs = ["conv_emitter_transforms.h"],
|
||||
deps = [
|
||||
"//tensorflow/core:lib",
|
||||
"@com_google_absl//absl/algorithm:container",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/types:span",
|
||||
"@llvm-project//llvm:support",
|
||||
"@llvm-project//mlir:Affine",
|
||||
"@llvm-project//mlir:IR",
|
||||
"@llvm-project//mlir:StandardOps",
|
||||
"@llvm-project//mlir:TransformUtils",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "conv_emitter_test",
|
||||
srcs = ["conv_emitter_test.cc"],
|
||||
|
@ -38,6 +38,7 @@ limitations under the License.
|
||||
#include "mlir/Transforms/LoopUtils.h" // from @llvm-project
|
||||
#include "mlir/Transforms/RegionUtils.h" // from @llvm-project
|
||||
#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
|
||||
#include "tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.h"
|
||||
#include "tensorflow/compiler/xla/window_util.h"
|
||||
|
||||
namespace xla {
|
||||
@ -109,48 +110,6 @@ ShapeInfo GetShapeInfo(
|
||||
return shape_info;
|
||||
}
|
||||
|
||||
bool IsSimpleLoop(mlir::AffineForOp loop) {
|
||||
return loop.getLowerBoundMap().isSingleConstant() &&
|
||||
loop.getLowerBoundMap().getSingleConstantResult() == 0 &&
|
||||
loop.getStep() == 1 && loop.getUpperBoundMap().getNumResults() == 1 &&
|
||||
std::next(loop.region().begin()) == loop.region().end();
|
||||
}
|
||||
|
||||
struct BoundAffineMap {
|
||||
mlir::AffineMap affine_map;
|
||||
std::vector<mlir::Value> operands;
|
||||
};
|
||||
|
||||
BoundAffineMap GetBoundAffineMapFrom(mlir::Operation* op) {
|
||||
if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
|
||||
return {load.getAffineMap(),
|
||||
std::vector<mlir::Value>(load.getMapOperands().begin(),
|
||||
load.getMapOperands().end())};
|
||||
} else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
|
||||
return {store.getAffineMap(),
|
||||
std::vector<mlir::Value>(store.getMapOperands().begin(),
|
||||
store.getMapOperands().end())};
|
||||
} else {
|
||||
CHECK(false);
|
||||
}
|
||||
}
|
||||
|
||||
mlir::Operation* CloneWithNewAffineMap(mlir::Operation* op,
|
||||
BoundAffineMap new_affine,
|
||||
mlir::OpBuilder builder) {
|
||||
if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
|
||||
return builder.create<mlir::AffineLoadOp>(
|
||||
builder.getUnknownLoc(), load.getMemRef(), new_affine.affine_map,
|
||||
new_affine.operands);
|
||||
} else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
|
||||
return builder.create<mlir::AffineStoreOp>(
|
||||
builder.getUnknownLoc(), store.getValueToStore(), store.getMemRef(),
|
||||
new_affine.affine_map, new_affine.operands);
|
||||
} else {
|
||||
CHECK(false);
|
||||
}
|
||||
}
|
||||
|
||||
void SetMemRef(mlir::Operation* op, mlir::Value memref) {
|
||||
if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
|
||||
load.setMemRef(memref);
|
||||
@ -161,127 +120,6 @@ void SetMemRef(mlir::Operation* op, mlir::Value memref) {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<mlir::AffineForOp> CreateNestedSimpleLoops(
|
||||
absl::Span<const int64_t> upper_bounds, mlir::OpBuilder builder) {
|
||||
std::vector<mlir::AffineForOp> loops;
|
||||
loops.reserve(upper_bounds.size());
|
||||
for (int64_t dim : upper_bounds) {
|
||||
auto loop =
|
||||
builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, dim);
|
||||
loops.push_back(loop);
|
||||
builder = loop.getBodyBuilder();
|
||||
}
|
||||
return loops;
|
||||
}
|
||||
|
||||
void SetBoundForSimpleLoop(mlir::AffineForOp loop, mlir::AffineExpr new_bound,
|
||||
mlir::OpBuilder builder) {
|
||||
CHECK(IsSimpleLoop(loop));
|
||||
|
||||
loop.setUpperBoundMap(mlir::AffineMap::get(
|
||||
loop.getUpperBoundMap().getNumDims(),
|
||||
loop.getUpperBoundMap().getNumSymbols(), {new_bound}));
|
||||
}
|
||||
|
||||
// Tile a loop with trip count N by `size`. For now, N has to be a multiple of
|
||||
// size, but later this constraint will be removed.
|
||||
//
|
||||
// The major loop (with trip count N / size) stays as-is, while the minor loop
|
||||
// (with trip count `size`) will take over the body of `target`, and be placed
|
||||
// as the new body of `target`.
|
||||
//
|
||||
// `target` has to be within the same "perfectly nested loop group" as `loop`.
|
||||
// See the documentation for mlir::getPerfectlyNestedLoops.
|
||||
//
|
||||
// Example:
|
||||
// Before tiling `loop` with tile size X:
|
||||
// for (loop in N)
|
||||
// for (unrelated_loop in ...)
|
||||
// for (target in ...)
|
||||
// // pass loop into affine maps
|
||||
// After:
|
||||
// for (loop in N / X)
|
||||
// for (unrelated_loop in ...)
|
||||
// for (target in ...)
|
||||
// for (tiled_loop in X)
|
||||
// // rewrite all affine exprs from loop to `loop * X + tiled_loop`.
|
||||
//
|
||||
// Design note:
|
||||
// TileLoop is different from mlir::tile. At the moment, mlir::tile is not well
|
||||
// documented about the exact tiling semantics, but the observed behavior is:
|
||||
// for (i from 0 to N)
|
||||
// for (unrelated_loop in ...)
|
||||
// for (target in ...)
|
||||
// // pass i into affine maps
|
||||
// =>
|
||||
// for (i from 0 to N, step = X)
|
||||
// for (unrelated_loop in ...)
|
||||
// for (target in ...)
|
||||
// for (j from i to min(i + X, N), step = 1)
|
||||
// // pass j into affine maps
|
||||
//
|
||||
// There are two differences between mlir::tile and TileLoop:
|
||||
// * TileLoop always puts the tiling logic "stepping" logic into AffineExprs.
|
||||
// With that all index calculation is done in AffineExprs and easier to
|
||||
// analyze in a single place.
|
||||
// * TileLoop doesn't plan to use use max() and min() to resolve the issue when
|
||||
// N % X != 0. max() and min() are not representable in AffineExprs.
|
||||
// TODO(timshen): support the case where N % X != 0.
|
||||
//
|
||||
// TODO(timshen): consider the possibility to reuse mlir::tile's logic to
|
||||
// achieve the same goal.
|
||||
mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size,
|
||||
mlir::AffineForOp target) {
|
||||
CHECK(IsSimpleLoop(loop));
|
||||
CHECK(IsSimpleLoop(target));
|
||||
{
|
||||
llvm::SmallVector<mlir::AffineForOp, 4> all_loops;
|
||||
getPerfectlyNestedLoops(all_loops, loop);
|
||||
CHECK(absl::c_linear_search(all_loops, target));
|
||||
}
|
||||
|
||||
auto builder = target.getBodyBuilder();
|
||||
|
||||
auto inner_loop =
|
||||
builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, size);
|
||||
{
|
||||
auto& inner_operations = inner_loop.getBody()->getOperations();
|
||||
auto& target_operations = target.getBody()->getOperations();
|
||||
|
||||
inner_operations.splice(inner_operations.begin(), target_operations,
|
||||
target_operations.begin(),
|
||||
std::prev(target_operations.end(), 2));
|
||||
|
||||
mlir::AffineExpr length = loop.getUpperBoundMap().getResult(0);
|
||||
CHECK_EQ(0, length.cast<mlir::AffineConstantExpr>().getValue() % size);
|
||||
SetBoundForSimpleLoop(loop, length.ceilDiv(size), builder);
|
||||
}
|
||||
|
||||
for (auto& use :
|
||||
llvm::make_early_inc_range(loop.getInductionVar().getUses())) {
|
||||
mlir::Operation* owner = use.getOwner();
|
||||
BoundAffineMap affine_map = GetBoundAffineMapFrom(owner);
|
||||
unsigned new_dim = affine_map.operands.size();
|
||||
affine_map.operands.push_back(inner_loop.getInductionVar());
|
||||
std::vector<mlir::AffineExpr> replacements;
|
||||
for (int i = 0; i < affine_map.affine_map.getNumDims(); i++) {
|
||||
if (affine_map.operands[i] == loop.getInductionVar()) {
|
||||
replacements.push_back(builder.getAffineDimExpr(i) * size +
|
||||
builder.getAffineDimExpr(new_dim));
|
||||
} else {
|
||||
replacements.push_back(builder.getAffineDimExpr(i));
|
||||
}
|
||||
}
|
||||
affine_map.affine_map = affine_map.affine_map.replaceDimsAndSymbols(
|
||||
replacements, {}, affine_map.operands.size(), 0);
|
||||
auto new_op =
|
||||
CloneWithNewAffineMap(owner, affine_map, mlir::OpBuilder(owner));
|
||||
owner->replaceAllUsesWith(new_op);
|
||||
owner->erase();
|
||||
}
|
||||
return inner_loop;
|
||||
}
|
||||
|
||||
// Hoist operations out of `where`. [begin_op, end_op) must be the first
|
||||
// operations of their parent loop, and `where` must be an ancestor of that
|
||||
// parent loop.
|
||||
@ -387,21 +225,6 @@ mlir::Operation* HoistAndFix(mlir::Operation* op, mlir::AffineForOp where) {
|
||||
return HoistAndFix(op->getIterator(), std::next(op->getIterator()), where);
|
||||
}
|
||||
|
||||
// Sinks a segment of perfectly nested loops to the bottom. It implements this
|
||||
// by rotating the loop nest by rotate_amount.
|
||||
void SinkPerfectlyNestedLoops(absl::Span<const mlir::AffineForOp> loops,
|
||||
int rotate_amount) {
|
||||
CHECK_GE(rotate_amount, 0);
|
||||
std::vector<unsigned> permutation(loops.size());
|
||||
std::iota(permutation.begin(), permutation.end(), unsigned(0));
|
||||
std::rotate(permutation.begin(),
|
||||
permutation.begin() + loops.size() - rotate_amount,
|
||||
permutation.end());
|
||||
mlir::interchangeLoops(
|
||||
llvm::ArrayRef<mlir::AffineForOp>(loops.begin(), loops.end()),
|
||||
permutation);
|
||||
}
|
||||
|
||||
struct InitialMlirConvAnchors {
|
||||
std::vector<mlir::AffineForOp> cartesian_product_loops;
|
||||
std::vector<mlir::AffineForOp> reduction_loops;
|
||||
|
@ -0,0 +1,152 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.h"
|
||||
|
||||
#include "absl/algorithm/container.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h" // from @llvm-project
|
||||
#include "mlir/Transforms/LoopUtils.h" // from @llvm-project
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
|
||||
namespace xla {
|
||||
namespace mlir_gpu {
|
||||
|
||||
BoundAffineMap GetBoundAffineMapFrom(mlir::Operation* op) {
|
||||
if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
|
||||
return {load.getAffineMap(),
|
||||
std::vector<mlir::Value>(load.getMapOperands().begin(),
|
||||
load.getMapOperands().end())};
|
||||
} else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
|
||||
return {store.getAffineMap(),
|
||||
std::vector<mlir::Value>(store.getMapOperands().begin(),
|
||||
store.getMapOperands().end())};
|
||||
} else {
|
||||
CHECK(false);
|
||||
}
|
||||
}
|
||||
|
||||
mlir::Operation* CloneWithNewAffineMap(mlir::Operation* op,
|
||||
BoundAffineMap new_affine,
|
||||
mlir::OpBuilder builder) {
|
||||
if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
|
||||
return builder.create<mlir::AffineLoadOp>(
|
||||
builder.getUnknownLoc(), load.getMemRef(), new_affine.affine_map,
|
||||
new_affine.operands);
|
||||
} else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
|
||||
return builder.create<mlir::AffineStoreOp>(
|
||||
builder.getUnknownLoc(), store.getValueToStore(), store.getMemRef(),
|
||||
new_affine.affine_map, new_affine.operands);
|
||||
} else {
|
||||
CHECK(false);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsSimpleLoop(mlir::AffineForOp loop) {
|
||||
return loop.getLowerBoundMap().isSingleConstant() &&
|
||||
loop.getLowerBoundMap().getSingleConstantResult() == 0 &&
|
||||
loop.getStep() == 1 && loop.getUpperBoundMap().getNumResults() == 1 &&
|
||||
std::next(loop.region().begin()) == loop.region().end();
|
||||
}
|
||||
|
||||
std::vector<mlir::AffineForOp> CreateNestedSimpleLoops(
|
||||
absl::Span<const int64_t> upper_bounds, mlir::OpBuilder builder) {
|
||||
std::vector<mlir::AffineForOp> loops;
|
||||
loops.reserve(upper_bounds.size());
|
||||
for (int64_t dim : upper_bounds) {
|
||||
auto loop =
|
||||
builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, dim);
|
||||
loops.push_back(loop);
|
||||
builder = loop.getBodyBuilder();
|
||||
}
|
||||
return loops;
|
||||
}
|
||||
|
||||
void SetBoundForSimpleLoop(mlir::AffineForOp loop, mlir::AffineExpr new_bound,
|
||||
mlir::OpBuilder builder) {
|
||||
CHECK(IsSimpleLoop(loop));
|
||||
|
||||
loop.setUpperBoundMap(mlir::AffineMap::get(
|
||||
loop.getUpperBoundMap().getNumDims(),
|
||||
loop.getUpperBoundMap().getNumSymbols(), {new_bound}));
|
||||
}
|
||||
|
||||
mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size,
|
||||
mlir::AffineForOp target) {
|
||||
CHECK(IsSimpleLoop(loop));
|
||||
CHECK(IsSimpleLoop(target));
|
||||
{
|
||||
llvm::SmallVector<mlir::AffineForOp, 4> all_loops;
|
||||
getPerfectlyNestedLoops(all_loops, loop);
|
||||
CHECK(absl::c_linear_search(all_loops, target));
|
||||
}
|
||||
|
||||
auto builder = target.getBodyBuilder();
|
||||
|
||||
auto inner_loop =
|
||||
builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, size);
|
||||
{
|
||||
auto& inner_operations = inner_loop.getBody()->getOperations();
|
||||
auto& target_operations = target.getBody()->getOperations();
|
||||
|
||||
inner_operations.splice(inner_operations.begin(), target_operations,
|
||||
target_operations.begin(),
|
||||
std::prev(target_operations.end(), 2));
|
||||
|
||||
mlir::AffineExpr length = loop.getUpperBoundMap().getResult(0);
|
||||
CHECK_EQ(0, length.cast<mlir::AffineConstantExpr>().getValue() % size);
|
||||
SetBoundForSimpleLoop(loop, length.ceilDiv(size), builder);
|
||||
}
|
||||
|
||||
for (auto& use :
|
||||
llvm::make_early_inc_range(loop.getInductionVar().getUses())) {
|
||||
mlir::Operation* owner = use.getOwner();
|
||||
BoundAffineMap affine_map = GetBoundAffineMapFrom(owner);
|
||||
unsigned new_dim = affine_map.operands.size();
|
||||
affine_map.operands.push_back(inner_loop.getInductionVar());
|
||||
std::vector<mlir::AffineExpr> replacements;
|
||||
for (int i = 0; i < affine_map.affine_map.getNumDims(); i++) {
|
||||
if (affine_map.operands[i] == loop.getInductionVar()) {
|
||||
replacements.push_back(builder.getAffineDimExpr(i) * size +
|
||||
builder.getAffineDimExpr(new_dim));
|
||||
} else {
|
||||
replacements.push_back(builder.getAffineDimExpr(i));
|
||||
}
|
||||
}
|
||||
affine_map.affine_map = affine_map.affine_map.replaceDimsAndSymbols(
|
||||
replacements, {}, affine_map.operands.size(), 0);
|
||||
auto new_op =
|
||||
CloneWithNewAffineMap(owner, affine_map, mlir::OpBuilder(owner));
|
||||
owner->replaceAllUsesWith(new_op);
|
||||
owner->erase();
|
||||
}
|
||||
return inner_loop;
|
||||
}
|
||||
|
||||
void SinkPerfectlyNestedLoops(absl::Span<const mlir::AffineForOp> loops,
|
||||
int rotate_amount) {
|
||||
CHECK_GE(rotate_amount, 0);
|
||||
std::vector<unsigned> permutation(loops.size());
|
||||
std::iota(permutation.begin(), permutation.end(), unsigned(0));
|
||||
std::rotate(permutation.begin(),
|
||||
permutation.begin() + loops.size() - rotate_amount,
|
||||
permutation.end());
|
||||
mlir::interchangeLoops(
|
||||
llvm::ArrayRef<mlir::AffineForOp>(loops.begin(), loops.end()),
|
||||
permutation);
|
||||
}
|
||||
|
||||
} // namespace mlir_gpu
|
||||
} // namespace xla
|
@ -0,0 +1,102 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EXPERIMENTAL_CONV_EMITTER_CONV_EMITTER_TRANSFORMS_H_
|
||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EXPERIMENTAL_CONV_EMITTER_CONV_EMITTER_TRANSFORMS_H_
|
||||
|
||||
#include "absl/base/integral_types.h"
|
||||
#include "absl/types/span.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h" // from @llvm-project
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
|
||||
#include "mlir/IR/Operation.h" // from @llvm-project
|
||||
|
||||
namespace xla {
|
||||
namespace mlir_gpu {
|
||||
|
||||
struct BoundAffineMap {
|
||||
mlir::AffineMap affine_map;
|
||||
std::vector<mlir::Value> operands;
|
||||
};
|
||||
|
||||
BoundAffineMap GetBoundAffineMapFrom(mlir::Operation* op);
|
||||
mlir::Operation* CloneWithNewAffineMap(mlir::Operation* op,
|
||||
BoundAffineMap new_affine,
|
||||
mlir::OpBuilder builder);
|
||||
|
||||
bool IsSimpleLoop(mlir::AffineForOp loop);
|
||||
std::vector<mlir::AffineForOp> CreateNestedSimpleLoops(
|
||||
absl::Span<const int64_t> upper_bounds, mlir::OpBuilder builder);
|
||||
void SetBoundForSimpleLoop(mlir::AffineForOp loop, mlir::AffineExpr new_bound,
|
||||
mlir::OpBuilder builder);
|
||||
|
||||
// Tile a loop with trip count N by `size`. For now, N has to be a multiple of
|
||||
// size, but later this constraint will be removed.
|
||||
//
|
||||
// The major loop (with trip count N / size) stays as-is, while the minor loop
|
||||
// (with trip count `size`) will take over the body of `target`, and be placed
|
||||
// as the new body of `target`.
|
||||
//
|
||||
// `target` has to be within the same "perfectly nested loop group" as `loop`.
|
||||
// See the documentation for mlir::getPerfectlyNestedLoops.
|
||||
//
|
||||
// Example:
|
||||
// Before tiling `loop` with tile size X:
|
||||
// for (loop in N)
|
||||
// for (unrelated_loop in ...)
|
||||
// for (target in ...)
|
||||
// // pass loop into affine maps
|
||||
// After:
|
||||
// for (loop in N / X)
|
||||
// for (unrelated_loop in ...)
|
||||
// for (target in ...)
|
||||
// for (tiled_loop in X)
|
||||
// // rewrite all affine exprs from loop to `loop * X + tiled_loop`.
|
||||
//
|
||||
// Design note:
|
||||
// TileLoop is different from mlir::tile. At the moment, mlir::tile is not well
|
||||
// documented about the exact tiling semantics, but the observed behavior is:
|
||||
// for (i from 0 to N)
|
||||
// for (unrelated_loop in ...)
|
||||
// for (target in ...)
|
||||
// // pass i into affine maps
|
||||
// =>
|
||||
// for (i from 0 to N, step = X)
|
||||
// for (unrelated_loop in ...)
|
||||
// for (target in ...)
|
||||
// for (j from i to min(i + X, N), step = 1)
|
||||
// // pass j into affine maps
|
||||
//
|
||||
// There are two differences between mlir::tile and TileLoop:
|
||||
// * TileLoop always puts the tiling logic "stepping" logic into AffineExprs.
|
||||
// With that all index calculation is done in AffineExprs and easier to
|
||||
// analyze in a single place.
|
||||
// * TileLoop doesn't plan to use use max() and min() to resolve the issue when
|
||||
// N % X != 0. max() and min() are not representable in AffineExprs.
|
||||
// TODO(timshen): support the case where N % X != 0.
|
||||
//
|
||||
// TODO(timshen): consider the possibility to reuse mlir::tile's logic to
|
||||
// achieve the same goal.
|
||||
mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size,
|
||||
mlir::AffineForOp target);
|
||||
|
||||
// Sinks a segment of perfectly nested loops to the bottom. It implements this
|
||||
// by rotating the loop nest by rotate_amount.
|
||||
void SinkPerfectlyNestedLoops(absl::Span<const mlir::AffineForOp> loops,
|
||||
int rotate_amount);
|
||||
|
||||
} // namespace mlir_gpu
|
||||
} // namespace xla
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EXPERIMENTAL_CONV_EMITTER_CONV_EMITTER_TRANSFORMS_H_
|
Loading…
Reference in New Issue
Block a user