Undo loop fusion changes for now as they seem to be altering a few results.

END_PUBLIC
RELNOTES: n/a

BEGIN_PUBLIC
Automated g4 rollback of changelist 164825735

PiperOrigin-RevId: 164883478
This commit is contained in:
A. Unique TensorFlower 2017-08-10 11:55:59 -07:00 committed by TensorFlower Gardener
parent 36c1e8b0d2
commit 46e4de6e5f
4 changed files with 10 additions and 318 deletions

View File

@ -463,16 +463,6 @@ cc_library(
],
)
cc_test(
name = "cpu_instruction_fusion_test",
size = "small",
srcs = ["cpu_instruction_fusion_test.cc"],
deps = [
":cpu_instruction_fusion",
"//tensorflow/compiler/xla/tests:hlo_test_base",
],
)
cc_library(
name = "cpu_parallelization_preparation",
srcs = ["cpu_parallelization_preparation.cc"],

View File

@ -20,28 +20,6 @@ limitations under the License.
namespace xla {
namespace cpu {
namespace {
bool CanBeLoweredIntoElementalLoop(const HloInstruction& hlo) {
if (hlo.IsElementwise()) {
return hlo.operand_count() > 0;
}
// These non-elementwise ops have a lowering that generates the output for a
// specified element at a time.
return (hlo.opcode() == HloOpcode::kConcatenate ||
hlo.opcode() == HloOpcode::kReverse ||
hlo.opcode() == HloOpcode::kBroadcast ||
hlo.opcode() == HloOpcode::kSlice ||
hlo.opcode() == HloOpcode::kDynamicSlice ||
hlo.opcode() == HloOpcode::kDynamicUpdateSlice ||
hlo.opcode() == HloOpcode::kReshape ||
hlo.opcode() == HloOpcode::kTranspose ||
hlo.opcode() == HloOpcode::kPad);
}
} // namespace
bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
int64 operand_index) {
HloInstruction* producer = consumer->mutable_operand(operand_index);
@ -51,33 +29,20 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
return false;
}
// Condition for consumer: must act elementwise on the operand. This permits
// only elementwise ops or (potentially) fusion ops to act as consumers.
if (!consumer->IsElementwiseOnOperand(operand_index)) {
// Condition for consumer: must be elementwise or a fusion op
// (which necessarily only contains elementwise operations)
if (!(consumer->opcode() == HloOpcode::kFusion ||
consumer->IsElementwise())) {
return false;
}
// Producer or consumer cannot be Map. Maps are technically elementwise but of
// a slightly different form (call instead of a computation). These are not
// Producer or consumer cannot be Map. Maps are technically elementwise but
// of a slightly different form (call instead of a computation). These are not
// yet supported in the CPU backend.
if (producer->opcode() == HloOpcode::kMap ||
consumer->opcode() == HloOpcode::kMap) {
return false;
}
// Avoid dragging something that could otherwise be implemented as a
// bitcast into the loop.
if (producer->CouldBeBitcast()) {
return false;
}
// Check to make sure that the producer can generate output a specified
// element at a time.
if (!CanBeLoweredIntoElementalLoop(*producer)) {
return false;
}
return InstructionFusion::ShouldFuse(consumer, operand_index);
return producer->IsElementwise() && producer->operand_count() > 0 &&
producer->opcode() != HloOpcode::kMap &&
consumer->opcode() != HloOpcode::kMap &&
InstructionFusion::ShouldFuse(consumer, operand_index);
}
} // namespace cpu

View File

@ -1,87 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h"
#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
namespace xla {
namespace cpu {
using InstructionFusionTest = HloTestBase;
TEST_F(InstructionFusionTest, BroadcastFused) {
HloComputation::Builder builder(TestName());
Shape param_shape = ShapeUtil::MakeShape(F32, {8});
Shape result_shape = ShapeUtil::MakeShape(F32, {8, 8});
auto param0 = builder.AddInstruction(
HloInstruction::CreateParameter(0, param_shape, "param"));
auto broadcast1 = builder.AddInstruction(
HloInstruction::CreateBroadcast(result_shape, param0, {1}));
builder.AddInstruction(HloInstruction::CreateUnary(
result_shape, HloOpcode::kNegate, broadcast1));
auto module = CreateNewModule();
module->AddEntryComputation(builder.Build());
auto computation = module->entry_computation();
auto did_fusion = CpuInstructionFusion().Run(module.get());
ASSERT_TRUE(did_fusion.ok());
EXPECT_TRUE(did_fusion.ValueOrDie());
HloInstruction* root = computation->root_instruction();
ASSERT_EQ(HloOpcode::kFusion, root->opcode());
EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop);
HloInstruction* fused_root = root->fused_expression_root();
EXPECT_EQ(HloOpcode::kNegate, fused_root->opcode());
EXPECT_EQ(HloOpcode::kBroadcast, fused_root->operand(0)->opcode());
}
TEST_F(InstructionFusionTest, SliceBeforeReverseNotFused) {
HloComputation::Builder builder(TestName());
Shape param_shape = ShapeUtil::MakeShape(F32, {8});
Shape slice_shape = ShapeUtil::MakeShape(F32, {4});
auto param0 = builder.AddInstruction(
HloInstruction::CreateParameter(0, param_shape, "param"));
// The (slice, reverse) pair can't be fused into a loop because reverse
// doesn't act elementwise on slice.
auto slice1 = builder.AddInstruction(
HloInstruction::CreateSlice(slice_shape, param0, {0}, {4}, {1}));
auto reverse2 = builder.AddInstruction(
HloInstruction::CreateReverse(slice_shape, slice1, {0}));
builder.AddInstruction(
HloInstruction::CreateUnary(slice_shape, HloOpcode::kNegate, reverse2));
auto module = CreateNewModule();
module->AddEntryComputation(builder.Build());
auto computation = module->entry_computation();
auto did_fusion = CpuInstructionFusion().Run(module.get());
ASSERT_TRUE(did_fusion.ok());
EXPECT_TRUE(did_fusion.ValueOrDie());
HloInstruction* root = computation->root_instruction();
ASSERT_EQ(HloOpcode::kFusion, root->opcode());
EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop);
HloInstruction* fused_root = root->fused_expression_root();
EXPECT_EQ(HloOpcode::kNegate, fused_root->opcode());
EXPECT_EQ(HloOpcode::kReverse, fused_root->operand(0)->opcode());
EXPECT_EQ(HloOpcode::kSlice, root->operand(0)->opcode());
}
} // namespace cpu
} // namespace xla
int main(int argc, char** argv) {
return xla::ParseDebugOptionsFlagsAndRunTests(argc, argv);
}

View File

@ -420,182 +420,6 @@ XLA_TEST_F(FusionTest, Reverse) {
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, ConcatenateNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3})));
auto const1 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({4})));
auto concat2 = builder.AddInstruction(HloInstruction::CreateConcatenate(
ShapeUtil::MakeShape(S32, {4}), {const0, const1}, 0));
auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {4}), HloOpcode::kNegate, concat2));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, concat2},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -2, -3, -4}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, ReverseNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3})));
auto reverse1 = builder.AddInstruction(HloInstruction::CreateReverse(
ShapeUtil::MakeShape(S32, {3}), const0, {0}));
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {3}), HloOpcode::kNegate, reverse1));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, reverse1},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-3, -2, -1}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, BroadcastNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR0<int32>(1)));
auto broadcast1 = builder.AddInstruction(HloInstruction::CreateBroadcast(
ShapeUtil::MakeShape(S32, {2}), const0, {}));
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, broadcast1));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, broadcast1},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -1}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, SliceNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
auto slice1 = builder.AddInstruction(HloInstruction::CreateSlice(
ShapeUtil::MakeShape(S32, {2}), const0, {0}, {4}, {2}));
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, slice1));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, slice1},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -3}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, DynamicSliceNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
auto const1 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1})));
auto dynamic_slice2 =
builder.AddInstruction(HloInstruction::CreateDynamicSlice(
ShapeUtil::MakeShape(S32, {2}), const0, const1, {2}));
auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, dynamic_slice2));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(
/*instructions_to_fuse=*/{negate3, dynamic_slice2},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-2, -3}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, DynamicUpdateSliceNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
auto const1 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({6, 7})));
auto const2 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1})));
auto dynamic_update_slice3 =
builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
ShapeUtil::MakeShape(S32, {4}), const0, const1, const2));
auto negate4 = builder.AddInstruction(
HloInstruction::CreateUnary(ShapeUtil::MakeShape(S32, {4}),
HloOpcode::kNegate, dynamic_update_slice3));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(
/*instructions_to_fuse=*/{negate4, dynamic_update_slice3},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -6, -7, -4}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, ReshapeNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
auto reshape1 = builder.AddInstruction(
HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {2, 2}), const0));
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {2, 2}), HloOpcode::kNegate, reshape1));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, reshape1},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR2<int32>({{-1, -2}, {-3, -4}}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
// TODO(b/64070202): Investigate failure.
XLA_TEST_F(FusionTest, DISABLED_ON_GPU(TransposeNegate)) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(HloInstruction::CreateConstant(
Literal::CreateR2<int32>({{1, 2}, {3, 4}})));
auto transpose1 = builder.AddInstruction(HloInstruction::CreateTranspose(
ShapeUtil::MakeShape(S32, {2, 2}), const0, {1, 0}));
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {2, 2}), HloOpcode::kNegate, transpose1));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, transpose1},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(*Literal::CreateR2<int32>({{-1, -3}, {-2, -4}}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
XLA_TEST_F(FusionTest, PadNegate) {
auto builder = HloComputation::Builder(TestName());
auto hlo_module = CreateNewModule();
auto const0 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
auto const1 = builder.AddInstruction(
HloInstruction::CreateConstant(Literal::CreateR0<int32>(-1)));
// Set up the padding configuration {low: 1, high: 1, interior: 1}.
PaddingConfig padding_config;
auto dimension = padding_config.add_dimensions();
dimension->set_edge_padding_low(1);
dimension->set_edge_padding_high(1);
dimension->set_interior_padding(1);
auto pad2 = builder.AddInstruction(HloInstruction::CreatePad(
ShapeUtil::MakeShape(S32, {9}), const0, const1, padding_config));
auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
ShapeUtil::MakeShape(S32, {9}), HloOpcode::kNegate, pad2));
hlo_module->AddEntryComputation(builder.Build())
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, pad2},
HloInstruction::FusionKind::kLoop);
LiteralTestUtil::ExpectEqual(
*Literal::CreateR1<int32>({1, -1, 1, -2, 1, -3, 1, -4, 1}),
*ExecuteAndTransfer(std::move(hlo_module), {}));
}
std::unique_ptr<HloComputation> MakeReduceTestComputation() {
auto builder = HloComputation::Builder("add");
auto lhs = builder.AddInstruction(HloInstruction::CreateParameter(