diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD
index ab02cfae96b..28ccf358dcf 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD
+++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD
@@ -27,6 +27,7 @@ cc_library(
     srcs = ["conv_emitter.cc"],
     hdrs = ["conv_emitter.h"],
     deps = [
+        ":conv_emitter_transforms",
         "//tensorflow/compiler/xla:window_util",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
@@ -39,6 +40,23 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "conv_emitter_transforms",
+    srcs = ["conv_emitter_transforms.cc"],
+    hdrs = ["conv_emitter_transforms.h"],
+    deps = [
+        "//tensorflow/core:lib",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/types:span",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:Affine",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:TransformUtils",
+    ],
+)
+
 tf_cc_test(
     name = "conv_emitter_test",
     srcs = ["conv_emitter_test.cc"],
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc
index 5ec8d3bb334..c17d686f7dc 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc
@@ -38,6 +38,7 @@ limitations under the License.
 #include "mlir/Transforms/LoopUtils.h"  // from @llvm-project
 #include "mlir/Transforms/RegionUtils.h"  // from @llvm-project
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
+#include "tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.h"
 #include "tensorflow/compiler/xla/window_util.h"
 
 namespace xla {
@@ -109,48 +110,6 @@ ShapeInfo GetShapeInfo(
   return shape_info;
 }
 
-bool IsSimpleLoop(mlir::AffineForOp loop) {
-  return loop.getLowerBoundMap().isSingleConstant() &&
-         loop.getLowerBoundMap().getSingleConstantResult() == 0 &&
-         loop.getStep() == 1 && loop.getUpperBoundMap().getNumResults() == 1 &&
-         std::next(loop.region().begin()) == loop.region().end();
-}
-
-struct BoundAffineMap {
-  mlir::AffineMap affine_map;
-  std::vector<mlir::Value> operands;
-};
-
-BoundAffineMap GetBoundAffineMapFrom(mlir::Operation* op) {
-  if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
-    return {load.getAffineMap(),
-            std::vector<mlir::Value>(load.getMapOperands().begin(),
-                                     load.getMapOperands().end())};
-  } else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
-    return {store.getAffineMap(),
-            std::vector<mlir::Value>(store.getMapOperands().begin(),
-                                     store.getMapOperands().end())};
-  } else {
-    CHECK(false);
-  }
-}
-
-mlir::Operation* CloneWithNewAffineMap(mlir::Operation* op,
-                                       BoundAffineMap new_affine,
-                                       mlir::OpBuilder builder) {
-  if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
-    return builder.create<mlir::AffineLoadOp>(
-        builder.getUnknownLoc(), load.getMemRef(), new_affine.affine_map,
-        new_affine.operands);
-  } else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
-    return builder.create<mlir::AffineStoreOp>(
-        builder.getUnknownLoc(), store.getValueToStore(), store.getMemRef(),
-        new_affine.affine_map, new_affine.operands);
-  } else {
-    CHECK(false);
-  }
-}
-
 void SetMemRef(mlir::Operation* op, mlir::Value memref) {
   if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
     load.setMemRef(memref);
@@ -161,127 +120,6 @@ void SetMemRef(mlir::Operation* op, mlir::Value memref) {
   }
 }
 
-std::vector<mlir::AffineForOp> CreateNestedSimpleLoops(
-    absl::Span<const int64_t> upper_bounds, mlir::OpBuilder builder) {
-  std::vector<mlir::AffineForOp> loops;
-  loops.reserve(upper_bounds.size());
-  for (int64_t dim : upper_bounds) {
-    auto loop =
-        builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, dim);
-    loops.push_back(loop);
-    builder = loop.getBodyBuilder();
-  }
-  return loops;
-}
-
-void SetBoundForSimpleLoop(mlir::AffineForOp loop, mlir::AffineExpr new_bound,
-                           mlir::OpBuilder builder) {
-  CHECK(IsSimpleLoop(loop));
-
-  loop.setUpperBoundMap(mlir::AffineMap::get(
-      loop.getUpperBoundMap().getNumDims(),
-      loop.getUpperBoundMap().getNumSymbols(), {new_bound}));
-}
-
-// Tile a loop with trip count N by `size`. For now, N has to be a multiple of
-// size, but later this constraint will be removed.
-//
-// The major loop (with trip count N / size) stays as-is, while the minor loop
-// (with trip count `size`) will take over the body of `target`, and be placed
-// as the new body of `target`.
-//
-// `target` has to be within the same "perfectly nested loop group" as `loop`.
-// See the documentation for mlir::getPerfectlyNestedLoops.
-//
-// Example:
-// Before tiling `loop` with tile size X:
-//   for (loop in N)
-//     for (unrelated_loop in ...)
-//       for (target in ...)
-//         // pass loop into affine maps
-// After:
-//   for (loop in N / X)
-//     for (unrelated_loop in ...)
-//       for (target in ...)
-//         for (tiled_loop in X)
-//           // rewrite all affine exprs from loop to `loop * X + tiled_loop`.
-//
-// Design note:
-// TileLoop is different from mlir::tile. At the moment, mlir::tile is not well
-// documented about the exact tiling semantics, but the observed behavior is:
-//   for (i from 0 to N)
-//     for (unrelated_loop in ...)
-//       for (target in ...)
-//         // pass i into affine maps
-// =>
-//   for (i from 0 to N, step = X)
-//     for (unrelated_loop in ...)
-//       for (target in ...)
-//         for (j from i to min(i + X, N), step = 1)
-//           // pass j into affine maps
-//
-// There are two differences between mlir::tile and TileLoop:
-// * TileLoop always puts the tiling logic "stepping" logic into AffineExprs.
-//   With that all index calculation is done in AffineExprs and easier to
-//   analyze in a single place.
-// * TileLoop doesn't plan to use use max() and min() to resolve the issue when
-//   N % X != 0. max() and min() are not representable in AffineExprs.
-//   TODO(timshen): support the case where N % X != 0.
-//
-// TODO(timshen): consider the possibility to reuse mlir::tile's logic to
-// achieve the same goal.
-mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size,
-                           mlir::AffineForOp target) {
-  CHECK(IsSimpleLoop(loop));
-  CHECK(IsSimpleLoop(target));
-  {
-    llvm::SmallVector<mlir::AffineForOp, 4> all_loops;
-    getPerfectlyNestedLoops(all_loops, loop);
-    CHECK(absl::c_linear_search(all_loops, target));
-  }
-
-  auto builder = target.getBodyBuilder();
-
-  auto inner_loop =
-      builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, size);
-  {
-    auto& inner_operations = inner_loop.getBody()->getOperations();
-    auto& target_operations = target.getBody()->getOperations();
-
-    inner_operations.splice(inner_operations.begin(), target_operations,
-                            target_operations.begin(),
-                            std::prev(target_operations.end(), 2));
-
-    mlir::AffineExpr length = loop.getUpperBoundMap().getResult(0);
-    CHECK_EQ(0, length.cast<mlir::AffineConstantExpr>().getValue() % size);
-    SetBoundForSimpleLoop(loop, length.ceilDiv(size), builder);
-  }
-
-  for (auto& use :
-       llvm::make_early_inc_range(loop.getInductionVar().getUses())) {
-    mlir::Operation* owner = use.getOwner();
-    BoundAffineMap affine_map = GetBoundAffineMapFrom(owner);
-    unsigned new_dim = affine_map.operands.size();
-    affine_map.operands.push_back(inner_loop.getInductionVar());
-    std::vector<mlir::AffineExpr> replacements;
-    for (int i = 0; i < affine_map.affine_map.getNumDims(); i++) {
-      if (affine_map.operands[i] == loop.getInductionVar()) {
-        replacements.push_back(builder.getAffineDimExpr(i) * size +
-                               builder.getAffineDimExpr(new_dim));
-      } else {
-        replacements.push_back(builder.getAffineDimExpr(i));
-      }
-    }
-    affine_map.affine_map = affine_map.affine_map.replaceDimsAndSymbols(
-        replacements, {}, affine_map.operands.size(), 0);
-    auto new_op =
-        CloneWithNewAffineMap(owner, affine_map, mlir::OpBuilder(owner));
-    owner->replaceAllUsesWith(new_op);
-    owner->erase();
-  }
-  return inner_loop;
-}
-
 // Hoist operations out of `where`. [begin_op, end_op) must be the first
 // operations of their parent loop, and `where` must be an ancestor of that
 // parent loop.
@@ -387,21 +225,6 @@ mlir::Operation* HoistAndFix(mlir::Operation* op, mlir::AffineForOp where) {
   return HoistAndFix(op->getIterator(), std::next(op->getIterator()), where);
 }
 
-// Sinks a segment of perfectly nested loops to the bottom. It implements this
-// by rotating the loop nest by rotate_amount.
-void SinkPerfectlyNestedLoops(absl::Span<const mlir::AffineForOp> loops,
-                              int rotate_amount) {
-  CHECK_GE(rotate_amount, 0);
-  std::vector<unsigned> permutation(loops.size());
-  std::iota(permutation.begin(), permutation.end(), unsigned(0));
-  std::rotate(permutation.begin(),
-              permutation.begin() + loops.size() - rotate_amount,
-              permutation.end());
-  mlir::interchangeLoops(
-      llvm::ArrayRef<mlir::AffineForOp>(loops.begin(), loops.end()),
-      permutation);
-}
-
 struct InitialMlirConvAnchors {
   std::vector<mlir::AffineForOp> cartesian_product_loops;
   std::vector<mlir::AffineForOp> reduction_loops;
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.cc b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.cc
new file mode 100644
index 00000000000..ec9e1c93f83
--- /dev/null
+++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.cc
@@ -0,0 +1,152 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.h"
+
+#include "absl/algorithm/container.h"
+#include "llvm/ADT/StringRef.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"  // from @llvm-project
+#include "mlir/Transforms/LoopUtils.h"  // from @llvm-project
+#include "tensorflow/core/platform/logging.h"
+
+namespace xla {
+namespace mlir_gpu {
+
+BoundAffineMap GetBoundAffineMapFrom(mlir::Operation* op) {
+  if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
+    return {load.getAffineMap(),
+            std::vector<mlir::Value>(load.getMapOperands().begin(),
+                                     load.getMapOperands().end())};
+  } else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
+    return {store.getAffineMap(),
+            std::vector<mlir::Value>(store.getMapOperands().begin(),
+                                     store.getMapOperands().end())};
+  } else {
+    CHECK(false);
+  }
+}
+
+mlir::Operation* CloneWithNewAffineMap(mlir::Operation* op,
+                                       BoundAffineMap new_affine,
+                                       mlir::OpBuilder builder) {
+  if (auto load = mlir::dyn_cast<mlir::AffineLoadOp>(op)) {
+    return builder.create<mlir::AffineLoadOp>(
+        builder.getUnknownLoc(), load.getMemRef(), new_affine.affine_map,
+        new_affine.operands);
+  } else if (auto store = mlir::dyn_cast<mlir::AffineStoreOp>(op)) {
+    return builder.create<mlir::AffineStoreOp>(
+        builder.getUnknownLoc(), store.getValueToStore(), store.getMemRef(),
+        new_affine.affine_map, new_affine.operands);
+  } else {
+    CHECK(false);
+  }
+}
+
+bool IsSimpleLoop(mlir::AffineForOp loop) {
+  return loop.getLowerBoundMap().isSingleConstant() &&
+         loop.getLowerBoundMap().getSingleConstantResult() == 0 &&
+         loop.getStep() == 1 && loop.getUpperBoundMap().getNumResults() == 1 &&
+         std::next(loop.region().begin()) == loop.region().end();
+}
+
+std::vector<mlir::AffineForOp> CreateNestedSimpleLoops(
+    absl::Span<const int64_t> upper_bounds, mlir::OpBuilder builder) {
+  std::vector<mlir::AffineForOp> loops;
+  loops.reserve(upper_bounds.size());
+  for (int64_t dim : upper_bounds) {
+    auto loop =
+        builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, dim);
+    loops.push_back(loop);
+    builder = loop.getBodyBuilder();
+  }
+  return loops;
+}
+
+void SetBoundForSimpleLoop(mlir::AffineForOp loop, mlir::AffineExpr new_bound,
+                           mlir::OpBuilder builder) {
+  CHECK(IsSimpleLoop(loop));
+
+  loop.setUpperBoundMap(mlir::AffineMap::get(
+      loop.getUpperBoundMap().getNumDims(),
+      loop.getUpperBoundMap().getNumSymbols(), {new_bound}));
+}
+
+mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size,
+                           mlir::AffineForOp target) {
+  CHECK(IsSimpleLoop(loop));
+  CHECK(IsSimpleLoop(target));
+  {
+    llvm::SmallVector<mlir::AffineForOp, 4> all_loops;
+    getPerfectlyNestedLoops(all_loops, loop);
+    CHECK(absl::c_linear_search(all_loops, target));
+  }
+
+  auto builder = target.getBodyBuilder();
+
+  auto inner_loop =
+      builder.create<mlir::AffineForOp>(builder.getUnknownLoc(), 0, size);
+  {
+    auto& inner_operations = inner_loop.getBody()->getOperations();
+    auto& target_operations = target.getBody()->getOperations();
+
+    inner_operations.splice(inner_operations.begin(), target_operations,
+                            target_operations.begin(),
+                            std::prev(target_operations.end(), 2));
+
+    mlir::AffineExpr length = loop.getUpperBoundMap().getResult(0);
+    CHECK_EQ(0, length.cast<mlir::AffineConstantExpr>().getValue() % size);
+    SetBoundForSimpleLoop(loop, length.ceilDiv(size), builder);
+  }
+
+  for (auto& use :
+       llvm::make_early_inc_range(loop.getInductionVar().getUses())) {
+    mlir::Operation* owner = use.getOwner();
+    BoundAffineMap affine_map = GetBoundAffineMapFrom(owner);
+    unsigned new_dim = affine_map.operands.size();
+    affine_map.operands.push_back(inner_loop.getInductionVar());
+    std::vector<mlir::AffineExpr> replacements;
+    for (int i = 0; i < affine_map.affine_map.getNumDims(); i++) {
+      if (affine_map.operands[i] == loop.getInductionVar()) {
+        replacements.push_back(builder.getAffineDimExpr(i) * size +
+                               builder.getAffineDimExpr(new_dim));
+      } else {
+        replacements.push_back(builder.getAffineDimExpr(i));
+      }
+    }
+    affine_map.affine_map = affine_map.affine_map.replaceDimsAndSymbols(
+        replacements, {}, affine_map.operands.size(), 0);
+    auto new_op =
+        CloneWithNewAffineMap(owner, affine_map, mlir::OpBuilder(owner));
+    owner->replaceAllUsesWith(new_op);
+    owner->erase();
+  }
+  return inner_loop;
+}
+
+void SinkPerfectlyNestedLoops(absl::Span<const mlir::AffineForOp> loops,
+                              int rotate_amount) {
+  CHECK_GE(rotate_amount, 0);
+  std::vector<unsigned> permutation(loops.size());
+  std::iota(permutation.begin(), permutation.end(), unsigned(0));
+  std::rotate(permutation.begin(),
+              permutation.begin() + loops.size() - rotate_amount,
+              permutation.end());
+  mlir::interchangeLoops(
+      llvm::ArrayRef<mlir::AffineForOp>(loops.begin(), loops.end()),
+      permutation);
+}
+
+}  // namespace mlir_gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.h b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.h
new file mode 100644
index 00000000000..76348b376b2
--- /dev/null
+++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter_transforms.h
@@ -0,0 +1,102 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EXPERIMENTAL_CONV_EMITTER_CONV_EMITTER_TRANSFORMS_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EXPERIMENTAL_CONV_EMITTER_CONV_EMITTER_TRANSFORMS_H_
+
+#include "absl/base/integral_types.h"
+#include "absl/types/span.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"  // from @llvm-project
+#include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
+#include "mlir/IR/Operation.h"  // from @llvm-project
+
+namespace xla {
+namespace mlir_gpu {
+
+struct BoundAffineMap {
+  mlir::AffineMap affine_map;
+  std::vector<mlir::Value> operands;
+};
+
+BoundAffineMap GetBoundAffineMapFrom(mlir::Operation* op);
+mlir::Operation* CloneWithNewAffineMap(mlir::Operation* op,
+                                       BoundAffineMap new_affine,
+                                       mlir::OpBuilder builder);
+
+bool IsSimpleLoop(mlir::AffineForOp loop);
+std::vector<mlir::AffineForOp> CreateNestedSimpleLoops(
+    absl::Span<const int64_t> upper_bounds, mlir::OpBuilder builder);
+void SetBoundForSimpleLoop(mlir::AffineForOp loop, mlir::AffineExpr new_bound,
+                           mlir::OpBuilder builder);
+
+// Tile a loop with trip count N by `size`. For now, N has to be a multiple of
+// size, but later this constraint will be removed.
+//
+// The major loop (with trip count N / size) stays as-is, while the minor loop
+// (with trip count `size`) will take over the body of `target`, and be placed
+// as the new body of `target`.
+//
+// `target` has to be within the same "perfectly nested loop group" as `loop`.
+// See the documentation for mlir::getPerfectlyNestedLoops.
+//
+// Example:
+// Before tiling `loop` with tile size X:
+//   for (loop in N)
+//     for (unrelated_loop in ...)
+//       for (target in ...)
+//         // pass loop into affine maps
+// After:
+//   for (loop in N / X)
+//     for (unrelated_loop in ...)
+//       for (target in ...)
+//         for (tiled_loop in X)
+//           // rewrite all affine exprs from loop to `loop * X + tiled_loop`.
+//
+// Design note:
+// TileLoop is different from mlir::tile. At the moment, mlir::tile is not well
+// documented about the exact tiling semantics, but the observed behavior is:
+//   for (i from 0 to N)
+//     for (unrelated_loop in ...)
+//       for (target in ...)
+//         // pass i into affine maps
+// =>
+//   for (i from 0 to N, step = X)
+//     for (unrelated_loop in ...)
+//       for (target in ...)
+//         for (j from i to min(i + X, N), step = 1)
+//           // pass j into affine maps
+//
+// There are two differences between mlir::tile and TileLoop:
+// * TileLoop always puts the tiling logic "stepping" logic into AffineExprs.
+//   With that all index calculation is done in AffineExprs and easier to
+//   analyze in a single place.
+// * TileLoop doesn't plan to use use max() and min() to resolve the issue when
+//   N % X != 0. max() and min() are not representable in AffineExprs.
+//   TODO(timshen): support the case where N % X != 0.
+//
+// TODO(timshen): consider the possibility to reuse mlir::tile's logic to
+// achieve the same goal.
+mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size,
+                           mlir::AffineForOp target);
+
+// Sinks a segment of perfectly nested loops to the bottom. It implements this
+// by rotating the loop nest by rotate_amount.
+void SinkPerfectlyNestedLoops(absl::Span<const mlir::AffineForOp> loops,
+                              int rotate_amount);
+
+}  // namespace mlir_gpu
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EXPERIMENTAL_CONV_EMITTER_CONV_EMITTER_TRANSFORMS_H_