diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 5a50a536f8e..a954d017af7 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -463,16 +463,6 @@ cc_library( ], ) -cc_test( - name = "cpu_instruction_fusion_test", - size = "small", - srcs = ["cpu_instruction_fusion_test.cc"], - deps = [ - ":cpu_instruction_fusion", - "//tensorflow/compiler/xla/tests:hlo_test_base", - ], -) - cc_library( name = "cpu_parallelization_preparation", srcs = ["cpu_parallelization_preparation.cc"], diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc index e4fc3cdd034..dc002846e9e 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc @@ -20,28 +20,6 @@ limitations under the License. namespace xla { namespace cpu { -namespace { - -bool CanBeLoweredIntoElementalLoop(const HloInstruction& hlo) { - if (hlo.IsElementwise()) { - return hlo.operand_count() > 0; - } - - // These non-elementwise ops have a lowering that generates the output for a - // specified element at a time. - return (hlo.opcode() == HloOpcode::kConcatenate || - hlo.opcode() == HloOpcode::kReverse || - hlo.opcode() == HloOpcode::kBroadcast || - hlo.opcode() == HloOpcode::kSlice || - hlo.opcode() == HloOpcode::kDynamicSlice || - hlo.opcode() == HloOpcode::kDynamicUpdateSlice || - hlo.opcode() == HloOpcode::kReshape || - hlo.opcode() == HloOpcode::kTranspose || - hlo.opcode() == HloOpcode::kPad); -} - -} // namespace - bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer, int64 operand_index) { HloInstruction* producer = consumer->mutable_operand(operand_index); @@ -51,33 +29,20 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer, return false; } - // Condition for consumer: must act elementwise on the operand. This permits - // only elementwise ops or (potentially) fusion ops to act as consumers. - if (!consumer->IsElementwiseOnOperand(operand_index)) { + // Condition for consumer: must be elementwise or a fusion op + // (which necessarily only contains elementwise operations) + if (!(consumer->opcode() == HloOpcode::kFusion || + consumer->IsElementwise())) { return false; } - // Producer or consumer cannot be Map. Maps are technically elementwise but of - // a slightly different form (call instead of a computation). These are not + // Producer or consumer cannot be Map. Maps are technically elementwise but + // of a slightly different form (call instead of a computation). These are not // yet supported in the CPU backend. - if (producer->opcode() == HloOpcode::kMap || - consumer->opcode() == HloOpcode::kMap) { - return false; - } - - // Avoid dragging something that could otherwise be implemented as a - // bitcast into the loop. - if (producer->CouldBeBitcast()) { - return false; - } - - // Check to make sure that the producer can generate output a specified - // element at a time. - if (!CanBeLoweredIntoElementalLoop(*producer)) { - return false; - } - - return InstructionFusion::ShouldFuse(consumer, operand_index); + return producer->IsElementwise() && producer->operand_count() > 0 && + producer->opcode() != HloOpcode::kMap && + consumer->opcode() != HloOpcode::kMap && + InstructionFusion::ShouldFuse(consumer, operand_index); } } // namespace cpu diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc deleted file mode 100644 index a8700780b6c..00000000000 --- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h" - -#include "tensorflow/compiler/xla/tests/hlo_test_base.h" - -namespace xla { -namespace cpu { - -using InstructionFusionTest = HloTestBase; - -TEST_F(InstructionFusionTest, BroadcastFused) { - HloComputation::Builder builder(TestName()); - Shape param_shape = ShapeUtil::MakeShape(F32, {8}); - Shape result_shape = ShapeUtil::MakeShape(F32, {8, 8}); - auto param0 = builder.AddInstruction( - HloInstruction::CreateParameter(0, param_shape, "param")); - auto broadcast1 = builder.AddInstruction( - HloInstruction::CreateBroadcast(result_shape, param0, {1})); - builder.AddInstruction(HloInstruction::CreateUnary( - result_shape, HloOpcode::kNegate, broadcast1)); - - auto module = CreateNewModule(); - module->AddEntryComputation(builder.Build()); - auto computation = module->entry_computation(); - auto did_fusion = CpuInstructionFusion().Run(module.get()); - ASSERT_TRUE(did_fusion.ok()); - EXPECT_TRUE(did_fusion.ValueOrDie()); - - HloInstruction* root = computation->root_instruction(); - ASSERT_EQ(HloOpcode::kFusion, root->opcode()); - EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop); - HloInstruction* fused_root = root->fused_expression_root(); - EXPECT_EQ(HloOpcode::kNegate, fused_root->opcode()); - EXPECT_EQ(HloOpcode::kBroadcast, fused_root->operand(0)->opcode()); -} - -TEST_F(InstructionFusionTest, SliceBeforeReverseNotFused) { - HloComputation::Builder builder(TestName()); - Shape param_shape = ShapeUtil::MakeShape(F32, {8}); - Shape slice_shape = ShapeUtil::MakeShape(F32, {4}); - auto param0 = builder.AddInstruction( - HloInstruction::CreateParameter(0, param_shape, "param")); - // The (slice, reverse) pair can't be fused into a loop because reverse - // doesn't act elementwise on slice. - auto slice1 = builder.AddInstruction( - HloInstruction::CreateSlice(slice_shape, param0, {0}, {4}, {1})); - auto reverse2 = builder.AddInstruction( - HloInstruction::CreateReverse(slice_shape, slice1, {0})); - builder.AddInstruction( - HloInstruction::CreateUnary(slice_shape, HloOpcode::kNegate, reverse2)); - - auto module = CreateNewModule(); - module->AddEntryComputation(builder.Build()); - auto computation = module->entry_computation(); - auto did_fusion = CpuInstructionFusion().Run(module.get()); - ASSERT_TRUE(did_fusion.ok()); - EXPECT_TRUE(did_fusion.ValueOrDie()); - - HloInstruction* root = computation->root_instruction(); - ASSERT_EQ(HloOpcode::kFusion, root->opcode()); - EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop); - HloInstruction* fused_root = root->fused_expression_root(); - EXPECT_EQ(HloOpcode::kNegate, fused_root->opcode()); - EXPECT_EQ(HloOpcode::kReverse, fused_root->operand(0)->opcode()); - EXPECT_EQ(HloOpcode::kSlice, root->operand(0)->opcode()); -} - -} // namespace cpu -} // namespace xla - -int main(int argc, char** argv) { - return xla::ParseDebugOptionsFlagsAndRunTests(argc, argv); -} diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc index 5fc2b82fe41..f9676dfc19c 100644 --- a/tensorflow/compiler/xla/tests/fusion_test.cc +++ b/tensorflow/compiler/xla/tests/fusion_test.cc @@ -420,182 +420,6 @@ XLA_TEST_F(FusionTest, Reverse) { *ExecuteAndTransfer(std::move(hlo_module), {})); } -XLA_TEST_F(FusionTest, ConcatenateNegate) { - auto builder = HloComputation::Builder(TestName()); - auto hlo_module = CreateNewModule(); - auto const0 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3}))); - auto const1 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1<int32>({4}))); - auto concat2 = builder.AddInstruction(HloInstruction::CreateConcatenate( - ShapeUtil::MakeShape(S32, {4}), {const0, const1}, 0)); - auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {4}), HloOpcode::kNegate, concat2)); - hlo_module->AddEntryComputation(builder.Build()) - ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, concat2}, - HloInstruction::FusionKind::kLoop); - - LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -2, -3, -4}), - *ExecuteAndTransfer(std::move(hlo_module), {})); -} - -XLA_TEST_F(FusionTest, ReverseNegate) { - auto builder = HloComputation::Builder(TestName()); - auto hlo_module = CreateNewModule(); - auto const0 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3}))); - auto reverse1 = builder.AddInstruction(HloInstruction::CreateReverse( - ShapeUtil::MakeShape(S32, {3}), const0, {0})); - auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {3}), HloOpcode::kNegate, reverse1)); - hlo_module->AddEntryComputation(builder.Build()) - ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, reverse1}, - HloInstruction::FusionKind::kLoop); - - LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-3, -2, -1}), - *ExecuteAndTransfer(std::move(hlo_module), {})); -} - -XLA_TEST_F(FusionTest, BroadcastNegate) { - auto builder = HloComputation::Builder(TestName()); - auto hlo_module = CreateNewModule(); - auto const0 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0<int32>(1))); - auto broadcast1 = builder.AddInstruction(HloInstruction::CreateBroadcast( - ShapeUtil::MakeShape(S32, {2}), const0, {})); - auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, broadcast1)); - hlo_module->AddEntryComputation(builder.Build()) - ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, broadcast1}, - HloInstruction::FusionKind::kLoop); - - LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -1}), - *ExecuteAndTransfer(std::move(hlo_module), {})); -} - -XLA_TEST_F(FusionTest, SliceNegate) { - auto builder = HloComputation::Builder(TestName()); - auto hlo_module = CreateNewModule(); - auto const0 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4}))); - auto slice1 = builder.AddInstruction(HloInstruction::CreateSlice( - ShapeUtil::MakeShape(S32, {2}), const0, {0}, {4}, {2})); - auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, slice1)); - hlo_module->AddEntryComputation(builder.Build()) - ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, slice1}, - HloInstruction::FusionKind::kLoop); - - LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -3}), - *ExecuteAndTransfer(std::move(hlo_module), {})); -} - -XLA_TEST_F(FusionTest, DynamicSliceNegate) { - auto builder = HloComputation::Builder(TestName()); - auto hlo_module = CreateNewModule(); - auto const0 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4}))); - auto const1 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1<int32>({1}))); - auto dynamic_slice2 = - builder.AddInstruction(HloInstruction::CreateDynamicSlice( - ShapeUtil::MakeShape(S32, {2}), const0, const1, {2})); - auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, dynamic_slice2)); - hlo_module->AddEntryComputation(builder.Build()) - ->CreateFusionInstruction( - /*instructions_to_fuse=*/{negate3, dynamic_slice2}, - HloInstruction::FusionKind::kLoop); - - LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-2, -3}), - *ExecuteAndTransfer(std::move(hlo_module), {})); -} - -XLA_TEST_F(FusionTest, DynamicUpdateSliceNegate) { - auto builder = HloComputation::Builder(TestName()); - auto hlo_module = CreateNewModule(); - auto const0 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4}))); - auto const1 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1<int32>({6, 7}))); - auto const2 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1<int32>({1}))); - auto dynamic_update_slice3 = - builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice( - ShapeUtil::MakeShape(S32, {4}), const0, const1, const2)); - auto negate4 = builder.AddInstruction( - HloInstruction::CreateUnary(ShapeUtil::MakeShape(S32, {4}), - HloOpcode::kNegate, dynamic_update_slice3)); - hlo_module->AddEntryComputation(builder.Build()) - ->CreateFusionInstruction( - /*instructions_to_fuse=*/{negate4, dynamic_update_slice3}, - HloInstruction::FusionKind::kLoop); - - LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -6, -7, -4}), - *ExecuteAndTransfer(std::move(hlo_module), {})); -} - -XLA_TEST_F(FusionTest, ReshapeNegate) { - auto builder = HloComputation::Builder(TestName()); - auto hlo_module = CreateNewModule(); - auto const0 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4}))); - auto reshape1 = builder.AddInstruction( - HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {2, 2}), const0)); - auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {2, 2}), HloOpcode::kNegate, reshape1)); - hlo_module->AddEntryComputation(builder.Build()) - ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, reshape1}, - HloInstruction::FusionKind::kLoop); - - LiteralTestUtil::ExpectEqual(*Literal::CreateR2<int32>({{-1, -2}, {-3, -4}}), - *ExecuteAndTransfer(std::move(hlo_module), {})); -} - -// TODO(b/64070202): Investigate failure. -XLA_TEST_F(FusionTest, DISABLED_ON_GPU(TransposeNegate)) { - auto builder = HloComputation::Builder(TestName()); - auto hlo_module = CreateNewModule(); - auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR2<int32>({{1, 2}, {3, 4}}))); - auto transpose1 = builder.AddInstruction(HloInstruction::CreateTranspose( - ShapeUtil::MakeShape(S32, {2, 2}), const0, {1, 0})); - auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {2, 2}), HloOpcode::kNegate, transpose1)); - hlo_module->AddEntryComputation(builder.Build()) - ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, transpose1}, - HloInstruction::FusionKind::kLoop); - - LiteralTestUtil::ExpectEqual(*Literal::CreateR2<int32>({{-1, -3}, {-2, -4}}), - *ExecuteAndTransfer(std::move(hlo_module), {})); -} - -XLA_TEST_F(FusionTest, PadNegate) { - auto builder = HloComputation::Builder(TestName()); - auto hlo_module = CreateNewModule(); - auto const0 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4}))); - auto const1 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0<int32>(-1))); - // Set up the padding configuration {low: 1, high: 1, interior: 1}. - PaddingConfig padding_config; - auto dimension = padding_config.add_dimensions(); - dimension->set_edge_padding_low(1); - dimension->set_edge_padding_high(1); - dimension->set_interior_padding(1); - auto pad2 = builder.AddInstruction(HloInstruction::CreatePad( - ShapeUtil::MakeShape(S32, {9}), const0, const1, padding_config)); - auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {9}), HloOpcode::kNegate, pad2)); - hlo_module->AddEntryComputation(builder.Build()) - ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, pad2}, - HloInstruction::FusionKind::kLoop); - - LiteralTestUtil::ExpectEqual( - *Literal::CreateR1<int32>({1, -1, 1, -2, 1, -3, 1, -4, 1}), - *ExecuteAndTransfer(std::move(hlo_module), {})); -} - std::unique_ptr<HloComputation> MakeReduceTestComputation() { auto builder = HloComputation::Builder("add"); auto lhs = builder.AddInstruction(HloInstruction::CreateParameter(