diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 5a50a536f8e..a954d017af7 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -463,16 +463,6 @@ cc_library(
     ],
 )
 
-cc_test(
-    name = "cpu_instruction_fusion_test",
-    size = "small",
-    srcs = ["cpu_instruction_fusion_test.cc"],
-    deps = [
-        ":cpu_instruction_fusion",
-        "//tensorflow/compiler/xla/tests:hlo_test_base",
-    ],
-)
-
 cc_library(
     name = "cpu_parallelization_preparation",
     srcs = ["cpu_parallelization_preparation.cc"],
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
index e4fc3cdd034..dc002846e9e 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
@@ -20,28 +20,6 @@ limitations under the License.
 namespace xla {
 namespace cpu {
 
-namespace {
-
-bool CanBeLoweredIntoElementalLoop(const HloInstruction& hlo) {
-  if (hlo.IsElementwise()) {
-    return hlo.operand_count() > 0;
-  }
-
-  // These non-elementwise ops have a lowering that generates the output for a
-  // specified element at a time.
-  return (hlo.opcode() == HloOpcode::kConcatenate ||
-          hlo.opcode() == HloOpcode::kReverse ||
-          hlo.opcode() == HloOpcode::kBroadcast ||
-          hlo.opcode() == HloOpcode::kSlice ||
-          hlo.opcode() == HloOpcode::kDynamicSlice ||
-          hlo.opcode() == HloOpcode::kDynamicUpdateSlice ||
-          hlo.opcode() == HloOpcode::kReshape ||
-          hlo.opcode() == HloOpcode::kTranspose ||
-          hlo.opcode() == HloOpcode::kPad);
-}
-
-}  // namespace
-
 bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
                                       int64 operand_index) {
   HloInstruction* producer = consumer->mutable_operand(operand_index);
@@ -51,33 +29,20 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
     return false;
   }
 
-  // Condition for consumer: must act elementwise on the operand. This permits
-  // only elementwise ops or (potentially) fusion ops to act as consumers.
-  if (!consumer->IsElementwiseOnOperand(operand_index)) {
+  // Condition for consumer: must be elementwise or a fusion op
+  // (which necessarily only contains elementwise operations)
+  if (!(consumer->opcode() == HloOpcode::kFusion ||
+        consumer->IsElementwise())) {
     return false;
   }
 
-  // Producer or consumer cannot be Map. Maps are technically elementwise but of
-  // a slightly different form (call instead of a computation). These are not
+  // Producer or consumer cannot be Map. Maps are technically elementwise but
+  // of a slightly different form (call instead of a computation). These are not
   // yet supported in the CPU backend.
-  if (producer->opcode() == HloOpcode::kMap ||
-      consumer->opcode() == HloOpcode::kMap) {
-    return false;
-  }
-
-  // Avoid dragging something that could otherwise be implemented as a
-  // bitcast into the loop.
-  if (producer->CouldBeBitcast()) {
-    return false;
-  }
-
-  // Check to make sure that the producer can generate output a specified
-  // element at a time.
-  if (!CanBeLoweredIntoElementalLoop(*producer)) {
-    return false;
-  }
-
-  return InstructionFusion::ShouldFuse(consumer, operand_index);
+  return producer->IsElementwise() && producer->operand_count() > 0 &&
+         producer->opcode() != HloOpcode::kMap &&
+         consumer->opcode() != HloOpcode::kMap &&
+         InstructionFusion::ShouldFuse(consumer, operand_index);
 }
 
 }  // namespace cpu
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
deleted file mode 100644
index a8700780b6c..00000000000
--- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h"
-
-#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
-
-namespace xla {
-namespace cpu {
-
-using InstructionFusionTest = HloTestBase;
-
-TEST_F(InstructionFusionTest, BroadcastFused) {
-  HloComputation::Builder builder(TestName());
-  Shape param_shape = ShapeUtil::MakeShape(F32, {8});
-  Shape result_shape = ShapeUtil::MakeShape(F32, {8, 8});
-  auto param0 = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, param_shape, "param"));
-  auto broadcast1 = builder.AddInstruction(
-      HloInstruction::CreateBroadcast(result_shape, param0, {1}));
-  builder.AddInstruction(HloInstruction::CreateUnary(
-      result_shape, HloOpcode::kNegate, broadcast1));
-
-  auto module = CreateNewModule();
-  module->AddEntryComputation(builder.Build());
-  auto computation = module->entry_computation();
-  auto did_fusion = CpuInstructionFusion().Run(module.get());
-  ASSERT_TRUE(did_fusion.ok());
-  EXPECT_TRUE(did_fusion.ValueOrDie());
-
-  HloInstruction* root = computation->root_instruction();
-  ASSERT_EQ(HloOpcode::kFusion, root->opcode());
-  EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop);
-  HloInstruction* fused_root = root->fused_expression_root();
-  EXPECT_EQ(HloOpcode::kNegate, fused_root->opcode());
-  EXPECT_EQ(HloOpcode::kBroadcast, fused_root->operand(0)->opcode());
-}
-
-TEST_F(InstructionFusionTest, SliceBeforeReverseNotFused) {
-  HloComputation::Builder builder(TestName());
-  Shape param_shape = ShapeUtil::MakeShape(F32, {8});
-  Shape slice_shape = ShapeUtil::MakeShape(F32, {4});
-  auto param0 = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, param_shape, "param"));
-  // The (slice, reverse) pair can't be fused into a loop because reverse
-  // doesn't act elementwise on slice.
-  auto slice1 = builder.AddInstruction(
-      HloInstruction::CreateSlice(slice_shape, param0, {0}, {4}, {1}));
-  auto reverse2 = builder.AddInstruction(
-      HloInstruction::CreateReverse(slice_shape, slice1, {0}));
-  builder.AddInstruction(
-      HloInstruction::CreateUnary(slice_shape, HloOpcode::kNegate, reverse2));
-
-  auto module = CreateNewModule();
-  module->AddEntryComputation(builder.Build());
-  auto computation = module->entry_computation();
-  auto did_fusion = CpuInstructionFusion().Run(module.get());
-  ASSERT_TRUE(did_fusion.ok());
-  EXPECT_TRUE(did_fusion.ValueOrDie());
-
-  HloInstruction* root = computation->root_instruction();
-  ASSERT_EQ(HloOpcode::kFusion, root->opcode());
-  EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop);
-  HloInstruction* fused_root = root->fused_expression_root();
-  EXPECT_EQ(HloOpcode::kNegate, fused_root->opcode());
-  EXPECT_EQ(HloOpcode::kReverse, fused_root->operand(0)->opcode());
-  EXPECT_EQ(HloOpcode::kSlice, root->operand(0)->opcode());
-}
-
-}  // namespace cpu
-}  // namespace xla
-
-int main(int argc, char** argv) {
-  return xla::ParseDebugOptionsFlagsAndRunTests(argc, argv);
-}
diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc
index 5fc2b82fe41..f9676dfc19c 100644
--- a/tensorflow/compiler/xla/tests/fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/fusion_test.cc
@@ -420,182 +420,6 @@ XLA_TEST_F(FusionTest, Reverse) {
                                *ExecuteAndTransfer(std::move(hlo_module), {}));
 }
 
-XLA_TEST_F(FusionTest, ConcatenateNegate) {
-  auto builder = HloComputation::Builder(TestName());
-  auto hlo_module = CreateNewModule();
-  auto const0 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3})));
-  auto const1 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR1<int32>({4})));
-  auto concat2 = builder.AddInstruction(HloInstruction::CreateConcatenate(
-      ShapeUtil::MakeShape(S32, {4}), {const0, const1}, 0));
-  auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
-      ShapeUtil::MakeShape(S32, {4}), HloOpcode::kNegate, concat2));
-  hlo_module->AddEntryComputation(builder.Build())
-      ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, concat2},
-                                HloInstruction::FusionKind::kLoop);
-
-  LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -2, -3, -4}),
-                               *ExecuteAndTransfer(std::move(hlo_module), {}));
-}
-
-XLA_TEST_F(FusionTest, ReverseNegate) {
-  auto builder = HloComputation::Builder(TestName());
-  auto hlo_module = CreateNewModule();
-  auto const0 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3})));
-  auto reverse1 = builder.AddInstruction(HloInstruction::CreateReverse(
-      ShapeUtil::MakeShape(S32, {3}), const0, {0}));
-  auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
-      ShapeUtil::MakeShape(S32, {3}), HloOpcode::kNegate, reverse1));
-  hlo_module->AddEntryComputation(builder.Build())
-      ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, reverse1},
-                                HloInstruction::FusionKind::kLoop);
-
-  LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-3, -2, -1}),
-                               *ExecuteAndTransfer(std::move(hlo_module), {}));
-}
-
-XLA_TEST_F(FusionTest, BroadcastNegate) {
-  auto builder = HloComputation::Builder(TestName());
-  auto hlo_module = CreateNewModule();
-  auto const0 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0<int32>(1)));
-  auto broadcast1 = builder.AddInstruction(HloInstruction::CreateBroadcast(
-      ShapeUtil::MakeShape(S32, {2}), const0, {}));
-  auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
-      ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, broadcast1));
-  hlo_module->AddEntryComputation(builder.Build())
-      ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, broadcast1},
-                                HloInstruction::FusionKind::kLoop);
-
-  LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -1}),
-                               *ExecuteAndTransfer(std::move(hlo_module), {}));
-}
-
-XLA_TEST_F(FusionTest, SliceNegate) {
-  auto builder = HloComputation::Builder(TestName());
-  auto hlo_module = CreateNewModule();
-  auto const0 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
-  auto slice1 = builder.AddInstruction(HloInstruction::CreateSlice(
-      ShapeUtil::MakeShape(S32, {2}), const0, {0}, {4}, {2}));
-  auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
-      ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, slice1));
-  hlo_module->AddEntryComputation(builder.Build())
-      ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, slice1},
-                                HloInstruction::FusionKind::kLoop);
-
-  LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -3}),
-                               *ExecuteAndTransfer(std::move(hlo_module), {}));
-}
-
-XLA_TEST_F(FusionTest, DynamicSliceNegate) {
-  auto builder = HloComputation::Builder(TestName());
-  auto hlo_module = CreateNewModule();
-  auto const0 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
-  auto const1 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR1<int32>({1})));
-  auto dynamic_slice2 =
-      builder.AddInstruction(HloInstruction::CreateDynamicSlice(
-          ShapeUtil::MakeShape(S32, {2}), const0, const1, {2}));
-  auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
-      ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, dynamic_slice2));
-  hlo_module->AddEntryComputation(builder.Build())
-      ->CreateFusionInstruction(
-          /*instructions_to_fuse=*/{negate3, dynamic_slice2},
-          HloInstruction::FusionKind::kLoop);
-
-  LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-2, -3}),
-                               *ExecuteAndTransfer(std::move(hlo_module), {}));
-}
-
-XLA_TEST_F(FusionTest, DynamicUpdateSliceNegate) {
-  auto builder = HloComputation::Builder(TestName());
-  auto hlo_module = CreateNewModule();
-  auto const0 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
-  auto const1 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR1<int32>({6, 7})));
-  auto const2 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR1<int32>({1})));
-  auto dynamic_update_slice3 =
-      builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
-          ShapeUtil::MakeShape(S32, {4}), const0, const1, const2));
-  auto negate4 = builder.AddInstruction(
-      HloInstruction::CreateUnary(ShapeUtil::MakeShape(S32, {4}),
-                                  HloOpcode::kNegate, dynamic_update_slice3));
-  hlo_module->AddEntryComputation(builder.Build())
-      ->CreateFusionInstruction(
-          /*instructions_to_fuse=*/{negate4, dynamic_update_slice3},
-          HloInstruction::FusionKind::kLoop);
-
-  LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -6, -7, -4}),
-                               *ExecuteAndTransfer(std::move(hlo_module), {}));
-}
-
-XLA_TEST_F(FusionTest, ReshapeNegate) {
-  auto builder = HloComputation::Builder(TestName());
-  auto hlo_module = CreateNewModule();
-  auto const0 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
-  auto reshape1 = builder.AddInstruction(
-      HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {2, 2}), const0));
-  auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
-      ShapeUtil::MakeShape(S32, {2, 2}), HloOpcode::kNegate, reshape1));
-  hlo_module->AddEntryComputation(builder.Build())
-      ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, reshape1},
-                                HloInstruction::FusionKind::kLoop);
-
-  LiteralTestUtil::ExpectEqual(*Literal::CreateR2<int32>({{-1, -2}, {-3, -4}}),
-                               *ExecuteAndTransfer(std::move(hlo_module), {}));
-}
-
-// TODO(b/64070202): Investigate failure.
-XLA_TEST_F(FusionTest, DISABLED_ON_GPU(TransposeNegate)) {
-  auto builder = HloComputation::Builder(TestName());
-  auto hlo_module = CreateNewModule();
-  auto const0 = builder.AddInstruction(HloInstruction::CreateConstant(
-      Literal::CreateR2<int32>({{1, 2}, {3, 4}})));
-  auto transpose1 = builder.AddInstruction(HloInstruction::CreateTranspose(
-      ShapeUtil::MakeShape(S32, {2, 2}), const0, {1, 0}));
-  auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
-      ShapeUtil::MakeShape(S32, {2, 2}), HloOpcode::kNegate, transpose1));
-  hlo_module->AddEntryComputation(builder.Build())
-      ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, transpose1},
-                                HloInstruction::FusionKind::kLoop);
-
-  LiteralTestUtil::ExpectEqual(*Literal::CreateR2<int32>({{-1, -3}, {-2, -4}}),
-                               *ExecuteAndTransfer(std::move(hlo_module), {}));
-}
-
-XLA_TEST_F(FusionTest, PadNegate) {
-  auto builder = HloComputation::Builder(TestName());
-  auto hlo_module = CreateNewModule();
-  auto const0 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
-  auto const1 = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0<int32>(-1)));
-  // Set up the padding configuration {low: 1, high: 1, interior: 1}.
-  PaddingConfig padding_config;
-  auto dimension = padding_config.add_dimensions();
-  dimension->set_edge_padding_low(1);
-  dimension->set_edge_padding_high(1);
-  dimension->set_interior_padding(1);
-  auto pad2 = builder.AddInstruction(HloInstruction::CreatePad(
-      ShapeUtil::MakeShape(S32, {9}), const0, const1, padding_config));
-  auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
-      ShapeUtil::MakeShape(S32, {9}), HloOpcode::kNegate, pad2));
-  hlo_module->AddEntryComputation(builder.Build())
-      ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, pad2},
-                                HloInstruction::FusionKind::kLoop);
-
-  LiteralTestUtil::ExpectEqual(
-      *Literal::CreateR1<int32>({1, -1, 1, -2, 1, -3, 1, -4, 1}),
-      *ExecuteAndTransfer(std::move(hlo_module), {}));
-}
-
 std::unique_ptr<HloComputation> MakeReduceTestComputation() {
   auto builder = HloComputation::Builder("add");
   auto lhs = builder.AddInstruction(HloInstruction::CreateParameter(