From 26416a3b897cbef811969723cff6d8a83e028630 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <ntv@google.com>
Date: Fri, 18 Oct 2019 14:09:42 -0700
Subject: [PATCH] Lower vector transfer ops to loop.for operations.

This allows mixing linalg operations with vector transfer operations (with additional modifications to affine ops) and is a step towards solving #189.

PiperOrigin-RevId: 275543361
Change-Id: Icc8ae04738d4232507d5cfd584280dd8e588deb9
---
 .../Linalg/Transforms/LowerToLLVMDialect.cpp  |  2 +-
 third_party/mlir/lib/EDSC/Builders.cpp        |  1 -
 third_party/mlir/lib/EDSC/Helpers.cpp         |  5 +---
 .../lib/Transforms/LowerVectorTransfers.cpp   | 27 ++++++++++++-------
 4 files changed, 19 insertions(+), 16 deletions(-)
diff --git a/third_party/mlir/lib/Dialect/Linalg/Transforms/LowerToLLVMDialect.cpp b/third_party/mlir/lib/Dialect/Linalg/Transforms/LowerToLLVMDialect.cpp
index a0955d50523..90a76dedd9b 100644
--- a/third_party/mlir/lib/Dialect/Linalg/Transforms/LowerToLLVMDialect.cpp
+++ b/third_party/mlir/lib/Dialect/Linalg/Transforms/LowerToLLVMDialect.cpp
@@ -770,5 +770,5 @@ mlir::linalg::createLowerLinalgToLLVMPass() {
 }
 
 static PassRegistration<LowerLinalgToLLVMPass>
-    pass("linalg-convert-to-llvm",
+    pass("convert-linalg-to-llvm",
          "Lower the operations from the linalg dialect into the LLVM dialect");
diff --git a/third_party/mlir/lib/EDSC/Builders.cpp b/third_party/mlir/lib/EDSC/Builders.cpp
index f1dceec8f11..4046a7c1fc6 100644
--- a/third_party/mlir/lib/EDSC/Builders.cpp
+++ b/third_party/mlir/lib/EDSC/Builders.cpp
@@ -320,7 +320,6 @@ categorizeValueByAffineType(MLIRContext *context, Value *val, unsigned &numDims,
     d = getAffineSymbolExpr(numSymbols++, context);
     resultVal = val;
   } else {
-    assert(isValidDim(val) && "Must be a valid Dim");
     d = getAffineDimExpr(numDims++, context);
     resultVal = val;
   }
diff --git a/third_party/mlir/lib/EDSC/Helpers.cpp b/third_party/mlir/lib/EDSC/Helpers.cpp
index b4455c43c1e..eeb28668a34 100644
--- a/third_party/mlir/lib/EDSC/Helpers.cpp
+++ b/third_party/mlir/lib/EDSC/Helpers.cpp
@@ -24,11 +24,8 @@ using namespace mlir::edsc;
 
 static SmallVector<ValueHandle, 8> getMemRefSizes(Value *memRef) {
   MemRefType memRefType = memRef->getType().cast<MemRefType>();
+  assert(isStrided(memRefType) && "Expected strided MemRef type");
 
-  auto maps = memRefType.getAffineMaps();
-  (void)maps;
-  assert((maps.empty() || (maps.size() == 1 && maps[0].isIdentity())) &&
-         "Layout maps not supported");
   SmallVector<ValueHandle, 8> res;
   res.reserve(memRefType.getShape().size());
   const auto &shape = memRefType.getShape();
diff --git a/third_party/mlir/lib/Transforms/LowerVectorTransfers.cpp b/third_party/mlir/lib/Transforms/LowerVectorTransfers.cpp
index c7d986c283d..c517d74f221 100644
--- a/third_party/mlir/lib/Transforms/LowerVectorTransfers.cpp
+++ b/third_party/mlir/lib/Transforms/LowerVectorTransfers.cpp
@@ -25,6 +25,7 @@
 #include "mlir/Analysis/NestedMatcher.h"
 #include "mlir/Analysis/Utils.h"
 #include "mlir/Analysis/VectorAnalysis.h"
+#include "mlir/Dialect/LoopOps/LoopOps.h"
 #include "mlir/Dialect/StandardOps/Ops.h"
 #include "mlir/Dialect/VectorOps/VectorOps.h"
 #include "mlir/EDSC/Builders.h"
@@ -54,9 +55,9 @@
 ///    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
 ///    // vector<32x256xf32> and pad with %f0 to handle the boundary case:
 ///    %f0 = constant 0.0f : f32
-///    affine.for %i0 = 0 to %0 {
-///      affine.for %i1 = 0 to %1 step 256 {
-///        affine.for %i2 = 0 to %2 step 32 {
+///    loop.for %i0 = 0 to %0 {
+///      loop.for %i1 = 0 to %1 step %c256 {
+///        loop.for %i2 = 0 to %2 step %c32 {
 ///          %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0)
 ///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
 ///               memref<?x?x?xf32>, vector<32x256xf32>
@@ -68,8 +69,8 @@
 /// abstraction):
 ///
 /// ```mlir {.mlir}
-///    affine.for %d2 = 0 to 256 {
-///      affine.for %d1 = 0 to 32 {
+///    loop.for %d2 = 0 to %c256 {
+///      loop.for %d1 = 0 to %c32 {
 ///        %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
 ///        %tmp[%d2, %d1] = %s
 ///      }
@@ -126,7 +127,7 @@ struct VectorTransferRewriter : public RewritePattern {
 /// Analyzes the `transfer` to find an access dimension along the fastest remote
 /// MemRef dimension. If such a dimension with coalescing properties is found,
 /// `pivs` and `vectorView` are swapped so that the invocation of
-/// AffineLoopNestBuilder captures it in the innermost loop.
+/// LoopNestBuilder captures it in the innermost loop.
 template <typename VectorTransferOpTy>
 void coalesceCopy(VectorTransferOpTy transfer,
                   SmallVectorImpl<edsc::ValueHandle *> *pivs,
@@ -282,13 +283,16 @@ VectorTransferRewriter<VectorTransferReadOp>::matchAndRewrite(
 
   auto lbs = vectorView.getLbs();
   auto ubs = vectorView.getUbs();
-  auto steps = vectorView.getSteps();
+  SmallVector<ValueHandle, 8> steps;
+  steps.reserve(vectorView.getSteps().size());
+  for (auto step : vectorView.getSteps())
+    steps.push_back(constant_index(step));
 
   // 2. Emit alloc-copy-load-dealloc.
   ValueHandle tmp = alloc(tmpMemRefType(transfer));
   IndexedValue local(tmp);
   ValueHandle vec = vector_type_cast(tmp, vectorMemRefType(transfer));
-  AffineLoopNestBuilder(pivs, lbs, ubs, steps)([&] {
+  LoopNestBuilder(pivs, lbs, ubs, steps)([&] {
     // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist).
     local(ivs) = remote(clip(transfer, view, ivs));
   });
@@ -342,14 +346,17 @@ VectorTransferRewriter<VectorTransferWriteOp>::matchAndRewrite(
 
   auto lbs = vectorView.getLbs();
   auto ubs = vectorView.getUbs();
-  auto steps = vectorView.getSteps();
+  SmallVector<ValueHandle, 8> steps;
+  steps.reserve(vectorView.getSteps().size());
+  for (auto step : vectorView.getSteps())
+    steps.push_back(constant_index(step));
 
   // 2. Emit alloc-store-copy-dealloc.
   ValueHandle tmp = alloc(tmpMemRefType(transfer));
   IndexedValue local(tmp);
   ValueHandle vec = vector_type_cast(tmp, vectorMemRefType(transfer));
   std_store(vectorValue, vec, {constant_index(0)});
-  AffineLoopNestBuilder(pivs, lbs, ubs, steps)([&] {
+  LoopNestBuilder(pivs, lbs, ubs, steps)([&] {
     // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist).
     remote(clip(transfer, view, ivs)) = local(ivs);
   });