[MLIR/GPU] Move utility functions to a separate file. NFC.

PiperOrigin-RevId: 303377339
Change-Id: I64983a99f9a36aa889e0a7e9f30dd0c0e6cd0561
This commit is contained in:
Tim Shen 2020-03-27 12:00:29 -07:00 committed by TensorFlower Gardener
parent 190ceb644e
commit 22325a5c7d
4 changed files with 273 additions and 178 deletions

View File

@ -27,6 +27,7 @@ cc_library(
srcs = ["conv_emitter.cc"],
hdrs = ["conv_emitter.h"],
deps = [
":conv_emitter_transforms",
"//tensorflow/compiler/xla:window_util",
"//tensorflow/compiler/xla/service:hlo",
"//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
@ -39,6 +40,23 @@ cc_library(
],
)
cc_library(
name = "conv_emitter_transforms",
srcs = ["conv_emitter_transforms.cc"],
hdrs = ["conv_emitter_transforms.h"],
deps = [
"//tensorflow/core:lib",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/types:span",
"@llvm-project//llvm:support",
"@llvm-project//mlir:Affine",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:StandardOps",
"@llvm-project//mlir:TransformUtils",
],
)
tf_cc_test(
name = "conv_emitter_test",
srcs = ["conv_emitter_test.cc"],

View File

@ -38,6 +38,7 @@ limitations under the License.
#include "mlir/Transforms/LoopUtils.h" // from @llvm-project
#include "mlir/Transforms/RegionUtils.h" // from @llvm-project
#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
#include "tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.h"
#include "tensorflow/compiler/xla/window_util.h"
namespace xla {
@ -109,48 +110,6 @@ ShapeInfo GetShapeInfo(
return shape_info;
}
bool IsSimpleLoop(mlir::AffineForOp loop) {
return loop.getLowerBoundMap().isSingleConstant() &&
loop.getLowerBoundMap().getSingleConstantResult() == 0 &&
loop.getStep() == 1 && loop.getUpperBoundMap().getNumResults() == 1 &&
std::next(loop.region().begin()) == loop.region().end();
}
struct BoundAffineMap {
mlir::AffineMap affine_map;
std::vector<mlir::Value> operands;
};
BoundAffineMap GetBoundAffineMapFrom(mlir::Operation* op) {
if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
return {load.getAffineMap(),
std::vector<mlir::Value>(load.getMapOperands().begin(),
load.getMapOperands().end())};
} else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
return {store.getAffineMap(),
std::vector<mlir::Value>(store.getMapOperands().begin(),
store.getMapOperands().end())};
} else {
CHECK(false);
}
}
mlir::Operation* CloneWithNewAffineMap(mlir::Operation* op,
BoundAffineMap new_affine,
mlir::OpBuilder builder) {
if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
return builder.create<mlir::AffineLoadOp>(
builder.getUnknownLoc(), load.getMemRef(), new_affine.affine_map,
new_affine.operands);
} else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
return builder.create<mlir::AffineStoreOp>(
builder.getUnknownLoc(), store.getValueToStore(), store.getMemRef(),
new_affine.affine_map, new_affine.operands);
} else {
CHECK(false);
}
}
void SetMemRef(mlir::Operation* op, mlir::Value memref) {
if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
load.setMemRef(memref);
@ -161,127 +120,6 @@ void SetMemRef(mlir::Operation* op, mlir::Value memref) {
}
}
std::vector<mlir::AffineForOp> CreateNestedSimpleLoops(
absl::Span<const int64_t> upper_bounds, mlir::OpBuilder builder) {
std::vector<mlir::AffineForOp> loops;
loops.reserve(upper_bounds.size());
for (int64_t dim : upper_bounds) {
auto loop =
builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, dim);
loops.push_back(loop);
builder = loop.getBodyBuilder();
}
return loops;
}
void SetBoundForSimpleLoop(mlir::AffineForOp loop, mlir::AffineExpr new_bound,
mlir::OpBuilder builder) {
CHECK(IsSimpleLoop(loop));
loop.setUpperBoundMap(mlir::AffineMap::get(
loop.getUpperBoundMap().getNumDims(),
loop.getUpperBoundMap().getNumSymbols(), {new_bound}));
}
// Tile a loop with trip count N by `size`. For now, N has to be a multiple of
// size, but later this constraint will be removed.
//
// The major loop (with trip count N / size) stays as-is, while the minor loop
// (with trip count `size`) will take over the body of `target`, and be placed
// as the new body of `target`.
//
// `target` has to be within the same "perfectly nested loop group" as `loop`.
// See the documentation for mlir::getPerfectlyNestedLoops.
//
// Example:
// Before tiling `loop` with tile size X:
// for (loop in N)
// for (unrelated_loop in ...)
// for (target in ...)
// // pass loop into affine maps
// After:
// for (loop in N / X)
// for (unrelated_loop in ...)
// for (target in ...)
// for (tiled_loop in X)
// // rewrite all affine exprs from loop to `loop * X + tiled_loop`.
//
// Design note:
// TileLoop is different from mlir::tile. At the moment, mlir::tile is not well
// documented about the exact tiling semantics, but the observed behavior is:
// for (i from 0 to N)
// for (unrelated_loop in ...)
// for (target in ...)
// // pass i into affine maps
// =>
// for (i from 0 to N, step = X)
// for (unrelated_loop in ...)
// for (target in ...)
// for (j from i to min(i + X, N), step = 1)
// // pass j into affine maps
//
// There are two differences between mlir::tile and TileLoop:
// * TileLoop always puts the tiling logic "stepping" logic into AffineExprs.
// With that all index calculation is done in AffineExprs and easier to
// analyze in a single place.
// * TileLoop doesn't plan to use use max() and min() to resolve the issue when
// N % X != 0. max() and min() are not representable in AffineExprs.
// TODO(timshen): support the case where N % X != 0.
//
// TODO(timshen): consider the possibility to reuse mlir::tile's logic to
// achieve the same goal.
mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size,
mlir::AffineForOp target) {
CHECK(IsSimpleLoop(loop));
CHECK(IsSimpleLoop(target));
{
llvm::SmallVector<mlir::AffineForOp, 4> all_loops;
getPerfectlyNestedLoops(all_loops, loop);
CHECK(absl::c_linear_search(all_loops, target));
}
auto builder = target.getBodyBuilder();
auto inner_loop =
builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, size);
{
auto& inner_operations = inner_loop.getBody()->getOperations();
auto& target_operations = target.getBody()->getOperations();
inner_operations.splice(inner_operations.begin(), target_operations,
target_operations.begin(),
std::prev(target_operations.end(), 2));
mlir::AffineExpr length = loop.getUpperBoundMap().getResult(0);
CHECK_EQ(0, length.cast<mlir::AffineConstantExpr>().getValue() % size);
SetBoundForSimpleLoop(loop, length.ceilDiv(size), builder);
}
for (auto& use :
llvm::make_early_inc_range(loop.getInductionVar().getUses())) {
mlir::Operation* owner = use.getOwner();
BoundAffineMap affine_map = GetBoundAffineMapFrom(owner);
unsigned new_dim = affine_map.operands.size();
affine_map.operands.push_back(inner_loop.getInductionVar());
std::vector<mlir::AffineExpr> replacements;
for (int i = 0; i < affine_map.affine_map.getNumDims(); i++) {
if (affine_map.operands[i] == loop.getInductionVar()) {
replacements.push_back(builder.getAffineDimExpr(i) * size +
builder.getAffineDimExpr(new_dim));
} else {
replacements.push_back(builder.getAffineDimExpr(i));
}
}
affine_map.affine_map = affine_map.affine_map.replaceDimsAndSymbols(
replacements, {}, affine_map.operands.size(), 0);
auto new_op =
CloneWithNewAffineMap(owner, affine_map, mlir::OpBuilder(owner));
owner->replaceAllUsesWith(new_op);
owner->erase();
}
return inner_loop;
}
// Hoist operations out of `where`. [begin_op, end_op) must be the first
// operations of their parent loop, and `where` must be an ancestor of that
// parent loop.
@ -387,21 +225,6 @@ mlir::Operation* HoistAndFix(mlir::Operation* op, mlir::AffineForOp where) {
return HoistAndFix(op->getIterator(), std::next(op->getIterator()), where);
}
// Sinks a segment of perfectly nested loops to the bottom. It implements this
// by rotating the loop nest by rotate_amount.
void SinkPerfectlyNestedLoops(absl::Span<const mlir::AffineForOp> loops,
int rotate_amount) {
CHECK_GE(rotate_amount, 0);
std::vector<unsigned> permutation(loops.size());
std::iota(permutation.begin(), permutation.end(), unsigned(0));
std::rotate(permutation.begin(),
permutation.begin() + loops.size() - rotate_amount,
permutation.end());
mlir::interchangeLoops(
llvm::ArrayRef<mlir::AffineForOp>(loops.begin(), loops.end()),
permutation);
}
struct InitialMlirConvAnchors {
std::vector<mlir::AffineForOp> cartesian_product_loops;
std::vector<mlir::AffineForOp> reduction_loops;

View File

@ -0,0 +1,152 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.h"
#include "absl/algorithm/container.h"
#include "llvm/ADT/StringRef.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h" // from @llvm-project
#include "mlir/Transforms/LoopUtils.h" // from @llvm-project
#include "tensorflow/core/platform/logging.h"
namespace xla {
namespace mlir_gpu {
BoundAffineMap GetBoundAffineMapFrom(mlir::Operation* op) {
if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
return {load.getAffineMap(),
std::vector<mlir::Value>(load.getMapOperands().begin(),
load.getMapOperands().end())};
} else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
return {store.getAffineMap(),
std::vector<mlir::Value>(store.getMapOperands().begin(),
store.getMapOperands().end())};
} else {
CHECK(false);
}
}
mlir::Operation* CloneWithNewAffineMap(mlir::Operation* op,
BoundAffineMap new_affine,
mlir::OpBuilder builder) {
if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
return builder.create<mlir::AffineLoadOp>(
builder.getUnknownLoc(), load.getMemRef(), new_affine.affine_map,
new_affine.operands);
} else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
return builder.create<mlir::AffineStoreOp>(
builder.getUnknownLoc(), store.getValueToStore(), store.getMemRef(),
new_affine.affine_map, new_affine.operands);
} else {
CHECK(false);
}
}
bool IsSimpleLoop(mlir::AffineForOp loop) {
return loop.getLowerBoundMap().isSingleConstant() &&
loop.getLowerBoundMap().getSingleConstantResult() == 0 &&
loop.getStep() == 1 && loop.getUpperBoundMap().getNumResults() == 1 &&
std::next(loop.region().begin()) == loop.region().end();
}
std::vector<mlir::AffineForOp> CreateNestedSimpleLoops(
absl::Span<const int64_t> upper_bounds, mlir::OpBuilder builder) {
std::vector<mlir::AffineForOp> loops;
loops.reserve(upper_bounds.size());
for (int64_t dim : upper_bounds) {
auto loop =
builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, dim);
loops.push_back(loop);
builder = loop.getBodyBuilder();
}
return loops;
}
void SetBoundForSimpleLoop(mlir::AffineForOp loop, mlir::AffineExpr new_bound,
mlir::OpBuilder builder) {
CHECK(IsSimpleLoop(loop));
loop.setUpperBoundMap(mlir::AffineMap::get(
loop.getUpperBoundMap().getNumDims(),
loop.getUpperBoundMap().getNumSymbols(), {new_bound}));
}
mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size,
mlir::AffineForOp target) {
CHECK(IsSimpleLoop(loop));
CHECK(IsSimpleLoop(target));
{
llvm::SmallVector<mlir::AffineForOp, 4> all_loops;
getPerfectlyNestedLoops(all_loops, loop);
CHECK(absl::c_linear_search(all_loops, target));
}
auto builder = target.getBodyBuilder();
auto inner_loop =
builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, size);
{
auto& inner_operations = inner_loop.getBody()->getOperations();
auto& target_operations = target.getBody()->getOperations();
inner_operations.splice(inner_operations.begin(), target_operations,
target_operations.begin(),
std::prev(target_operations.end(), 2));
mlir::AffineExpr length = loop.getUpperBoundMap().getResult(0);
CHECK_EQ(0, length.cast<mlir::AffineConstantExpr>().getValue() % size);
SetBoundForSimpleLoop(loop, length.ceilDiv(size), builder);
}
for (auto& use :
llvm::make_early_inc_range(loop.getInductionVar().getUses())) {
mlir::Operation* owner = use.getOwner();
BoundAffineMap affine_map = GetBoundAffineMapFrom(owner);
unsigned new_dim = affine_map.operands.size();
affine_map.operands.push_back(inner_loop.getInductionVar());
std::vector<mlir::AffineExpr> replacements;
for (int i = 0; i < affine_map.affine_map.getNumDims(); i++) {
if (affine_map.operands[i] == loop.getInductionVar()) {
replacements.push_back(builder.getAffineDimExpr(i) * size +
builder.getAffineDimExpr(new_dim));
} else {
replacements.push_back(builder.getAffineDimExpr(i));
}
}
affine_map.affine_map = affine_map.affine_map.replaceDimsAndSymbols(
replacements, {}, affine_map.operands.size(), 0);
auto new_op =
CloneWithNewAffineMap(owner, affine_map, mlir::OpBuilder(owner));
owner->replaceAllUsesWith(new_op);
owner->erase();
}
return inner_loop;
}
void SinkPerfectlyNestedLoops(absl::Span<const mlir::AffineForOp> loops,
int rotate_amount) {
CHECK_GE(rotate_amount, 0);
std::vector<unsigned> permutation(loops.size());
std::iota(permutation.begin(), permutation.end(), unsigned(0));
std::rotate(permutation.begin(),
permutation.begin() + loops.size() - rotate_amount,
permutation.end());
mlir::interchangeLoops(
llvm::ArrayRef<mlir::AffineForOp>(loops.begin(), loops.end()),
permutation);
}
} // namespace mlir_gpu
} // namespace xla

View File

@ -0,0 +1,102 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EXPERIMENTAL_CONV_EMITTER_CONV_EMITTER_TRANSFORMS_H_
#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EXPERIMENTAL_CONV_EMITTER_CONV_EMITTER_TRANSFORMS_H_
#include "absl/base/integral_types.h"
#include "absl/types/span.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h" // from @llvm-project
#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
#include "mlir/IR/Operation.h" // from @llvm-project
namespace xla {
namespace mlir_gpu {
struct BoundAffineMap {
mlir::AffineMap affine_map;
std::vector<mlir::Value> operands;
};
BoundAffineMap GetBoundAffineMapFrom(mlir::Operation* op);
mlir::Operation* CloneWithNewAffineMap(mlir::Operation* op,
BoundAffineMap new_affine,
mlir::OpBuilder builder);
bool IsSimpleLoop(mlir::AffineForOp loop);
std::vector<mlir::AffineForOp> CreateNestedSimpleLoops(
absl::Span<const int64_t> upper_bounds, mlir::OpBuilder builder);
void SetBoundForSimpleLoop(mlir::AffineForOp loop, mlir::AffineExpr new_bound,
mlir::OpBuilder builder);
// Tile a loop with trip count N by `size`. For now, N has to be a multiple of
// size, but later this constraint will be removed.
//
// The major loop (with trip count N / size) stays as-is, while the minor loop
// (with trip count `size`) will take over the body of `target`, and be placed
// as the new body of `target`.
//
// `target` has to be within the same "perfectly nested loop group" as `loop`.
// See the documentation for mlir::getPerfectlyNestedLoops.
//
// Example:
// Before tiling `loop` with tile size X:
// for (loop in N)
// for (unrelated_loop in ...)
// for (target in ...)
// // pass loop into affine maps
// After:
// for (loop in N / X)
// for (unrelated_loop in ...)
// for (target in ...)
// for (tiled_loop in X)
// // rewrite all affine exprs from loop to `loop * X + tiled_loop`.
//
// Design note:
// TileLoop is different from mlir::tile. At the moment, mlir::tile is not well
// documented about the exact tiling semantics, but the observed behavior is:
// for (i from 0 to N)
// for (unrelated_loop in ...)
// for (target in ...)
// // pass i into affine maps
// =>
// for (i from 0 to N, step = X)
// for (unrelated_loop in ...)
// for (target in ...)
// for (j from i to min(i + X, N), step = 1)
// // pass j into affine maps
//
// There are two differences between mlir::tile and TileLoop:
// * TileLoop always puts the tiling logic "stepping" logic into AffineExprs.
// With that all index calculation is done in AffineExprs and easier to
// analyze in a single place.
// * TileLoop doesn't plan to use use max() and min() to resolve the issue when
// N % X != 0. max() and min() are not representable in AffineExprs.
// TODO(timshen): support the case where N % X != 0.
//
// TODO(timshen): consider the possibility to reuse mlir::tile's logic to
// achieve the same goal.
mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size,
mlir::AffineForOp target);
// Sinks a segment of perfectly nested loops to the bottom. It implements this
// by rotating the loop nest by rotate_amount.
void SinkPerfectlyNestedLoops(absl::Span<const mlir::AffineForOp> loops,
int rotate_amount);
} // namespace mlir_gpu
} // namespace xla
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EXPERIMENTAL_CONV_EMITTER_CONV_EMITTER_TRANSFORMS_H_