Undo loop fusion changes for now as they seem to be altering a few results.
END_PUBLIC RELNOTES: n/a BEGIN_PUBLIC Automated g4 rollback of changelist 164825735 PiperOrigin-RevId: 164883478
This commit is contained in:
parent
36c1e8b0d2
commit
46e4de6e5f
tensorflow/compiler/xla
@ -463,16 +463,6 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "cpu_instruction_fusion_test",
|
||||
size = "small",
|
||||
srcs = ["cpu_instruction_fusion_test.cc"],
|
||||
deps = [
|
||||
":cpu_instruction_fusion",
|
||||
"//tensorflow/compiler/xla/tests:hlo_test_base",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cpu_parallelization_preparation",
|
||||
srcs = ["cpu_parallelization_preparation.cc"],
|
||||
|
@ -20,28 +20,6 @@ limitations under the License.
|
||||
namespace xla {
|
||||
namespace cpu {
|
||||
|
||||
namespace {
|
||||
|
||||
bool CanBeLoweredIntoElementalLoop(const HloInstruction& hlo) {
|
||||
if (hlo.IsElementwise()) {
|
||||
return hlo.operand_count() > 0;
|
||||
}
|
||||
|
||||
// These non-elementwise ops have a lowering that generates the output for a
|
||||
// specified element at a time.
|
||||
return (hlo.opcode() == HloOpcode::kConcatenate ||
|
||||
hlo.opcode() == HloOpcode::kReverse ||
|
||||
hlo.opcode() == HloOpcode::kBroadcast ||
|
||||
hlo.opcode() == HloOpcode::kSlice ||
|
||||
hlo.opcode() == HloOpcode::kDynamicSlice ||
|
||||
hlo.opcode() == HloOpcode::kDynamicUpdateSlice ||
|
||||
hlo.opcode() == HloOpcode::kReshape ||
|
||||
hlo.opcode() == HloOpcode::kTranspose ||
|
||||
hlo.opcode() == HloOpcode::kPad);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
|
||||
int64 operand_index) {
|
||||
HloInstruction* producer = consumer->mutable_operand(operand_index);
|
||||
@ -51,33 +29,20 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Condition for consumer: must act elementwise on the operand. This permits
|
||||
// only elementwise ops or (potentially) fusion ops to act as consumers.
|
||||
if (!consumer->IsElementwiseOnOperand(operand_index)) {
|
||||
// Condition for consumer: must be elementwise or a fusion op
|
||||
// (which necessarily only contains elementwise operations)
|
||||
if (!(consumer->opcode() == HloOpcode::kFusion ||
|
||||
consumer->IsElementwise())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Producer or consumer cannot be Map. Maps are technically elementwise but of
|
||||
// a slightly different form (call instead of a computation). These are not
|
||||
// Producer or consumer cannot be Map. Maps are technically elementwise but
|
||||
// of a slightly different form (call instead of a computation). These are not
|
||||
// yet supported in the CPU backend.
|
||||
if (producer->opcode() == HloOpcode::kMap ||
|
||||
consumer->opcode() == HloOpcode::kMap) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Avoid dragging something that could otherwise be implemented as a
|
||||
// bitcast into the loop.
|
||||
if (producer->CouldBeBitcast()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check to make sure that the producer can generate output a specified
|
||||
// element at a time.
|
||||
if (!CanBeLoweredIntoElementalLoop(*producer)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return InstructionFusion::ShouldFuse(consumer, operand_index);
|
||||
return producer->IsElementwise() && producer->operand_count() > 0 &&
|
||||
producer->opcode() != HloOpcode::kMap &&
|
||||
consumer->opcode() != HloOpcode::kMap &&
|
||||
InstructionFusion::ShouldFuse(consumer, operand_index);
|
||||
}
|
||||
|
||||
} // namespace cpu
|
||||
|
@ -1,87 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h"
|
||||
|
||||
#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
|
||||
|
||||
namespace xla {
|
||||
namespace cpu {
|
||||
|
||||
using InstructionFusionTest = HloTestBase;
|
||||
|
||||
TEST_F(InstructionFusionTest, BroadcastFused) {
|
||||
HloComputation::Builder builder(TestName());
|
||||
Shape param_shape = ShapeUtil::MakeShape(F32, {8});
|
||||
Shape result_shape = ShapeUtil::MakeShape(F32, {8, 8});
|
||||
auto param0 = builder.AddInstruction(
|
||||
HloInstruction::CreateParameter(0, param_shape, "param"));
|
||||
auto broadcast1 = builder.AddInstruction(
|
||||
HloInstruction::CreateBroadcast(result_shape, param0, {1}));
|
||||
builder.AddInstruction(HloInstruction::CreateUnary(
|
||||
result_shape, HloOpcode::kNegate, broadcast1));
|
||||
|
||||
auto module = CreateNewModule();
|
||||
module->AddEntryComputation(builder.Build());
|
||||
auto computation = module->entry_computation();
|
||||
auto did_fusion = CpuInstructionFusion().Run(module.get());
|
||||
ASSERT_TRUE(did_fusion.ok());
|
||||
EXPECT_TRUE(did_fusion.ValueOrDie());
|
||||
|
||||
HloInstruction* root = computation->root_instruction();
|
||||
ASSERT_EQ(HloOpcode::kFusion, root->opcode());
|
||||
EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop);
|
||||
HloInstruction* fused_root = root->fused_expression_root();
|
||||
EXPECT_EQ(HloOpcode::kNegate, fused_root->opcode());
|
||||
EXPECT_EQ(HloOpcode::kBroadcast, fused_root->operand(0)->opcode());
|
||||
}
|
||||
|
||||
TEST_F(InstructionFusionTest, SliceBeforeReverseNotFused) {
|
||||
HloComputation::Builder builder(TestName());
|
||||
Shape param_shape = ShapeUtil::MakeShape(F32, {8});
|
||||
Shape slice_shape = ShapeUtil::MakeShape(F32, {4});
|
||||
auto param0 = builder.AddInstruction(
|
||||
HloInstruction::CreateParameter(0, param_shape, "param"));
|
||||
// The (slice, reverse) pair can't be fused into a loop because reverse
|
||||
// doesn't act elementwise on slice.
|
||||
auto slice1 = builder.AddInstruction(
|
||||
HloInstruction::CreateSlice(slice_shape, param0, {0}, {4}, {1}));
|
||||
auto reverse2 = builder.AddInstruction(
|
||||
HloInstruction::CreateReverse(slice_shape, slice1, {0}));
|
||||
builder.AddInstruction(
|
||||
HloInstruction::CreateUnary(slice_shape, HloOpcode::kNegate, reverse2));
|
||||
|
||||
auto module = CreateNewModule();
|
||||
module->AddEntryComputation(builder.Build());
|
||||
auto computation = module->entry_computation();
|
||||
auto did_fusion = CpuInstructionFusion().Run(module.get());
|
||||
ASSERT_TRUE(did_fusion.ok());
|
||||
EXPECT_TRUE(did_fusion.ValueOrDie());
|
||||
|
||||
HloInstruction* root = computation->root_instruction();
|
||||
ASSERT_EQ(HloOpcode::kFusion, root->opcode());
|
||||
EXPECT_EQ(root->fusion_kind(), HloInstruction::FusionKind::kLoop);
|
||||
HloInstruction* fused_root = root->fused_expression_root();
|
||||
EXPECT_EQ(HloOpcode::kNegate, fused_root->opcode());
|
||||
EXPECT_EQ(HloOpcode::kReverse, fused_root->operand(0)->opcode());
|
||||
EXPECT_EQ(HloOpcode::kSlice, root->operand(0)->opcode());
|
||||
}
|
||||
|
||||
} // namespace cpu
|
||||
} // namespace xla
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return xla::ParseDebugOptionsFlagsAndRunTests(argc, argv);
|
||||
}
|
@ -420,182 +420,6 @@ XLA_TEST_F(FusionTest, Reverse) {
|
||||
*ExecuteAndTransfer(std::move(hlo_module), {}));
|
||||
}
|
||||
|
||||
XLA_TEST_F(FusionTest, ConcatenateNegate) {
|
||||
auto builder = HloComputation::Builder(TestName());
|
||||
auto hlo_module = CreateNewModule();
|
||||
auto const0 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3})));
|
||||
auto const1 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR1<int32>({4})));
|
||||
auto concat2 = builder.AddInstruction(HloInstruction::CreateConcatenate(
|
||||
ShapeUtil::MakeShape(S32, {4}), {const0, const1}, 0));
|
||||
auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
|
||||
ShapeUtil::MakeShape(S32, {4}), HloOpcode::kNegate, concat2));
|
||||
hlo_module->AddEntryComputation(builder.Build())
|
||||
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, concat2},
|
||||
HloInstruction::FusionKind::kLoop);
|
||||
|
||||
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -2, -3, -4}),
|
||||
*ExecuteAndTransfer(std::move(hlo_module), {}));
|
||||
}
|
||||
|
||||
XLA_TEST_F(FusionTest, ReverseNegate) {
|
||||
auto builder = HloComputation::Builder(TestName());
|
||||
auto hlo_module = CreateNewModule();
|
||||
auto const0 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3})));
|
||||
auto reverse1 = builder.AddInstruction(HloInstruction::CreateReverse(
|
||||
ShapeUtil::MakeShape(S32, {3}), const0, {0}));
|
||||
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
|
||||
ShapeUtil::MakeShape(S32, {3}), HloOpcode::kNegate, reverse1));
|
||||
hlo_module->AddEntryComputation(builder.Build())
|
||||
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, reverse1},
|
||||
HloInstruction::FusionKind::kLoop);
|
||||
|
||||
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-3, -2, -1}),
|
||||
*ExecuteAndTransfer(std::move(hlo_module), {}));
|
||||
}
|
||||
|
||||
XLA_TEST_F(FusionTest, BroadcastNegate) {
|
||||
auto builder = HloComputation::Builder(TestName());
|
||||
auto hlo_module = CreateNewModule();
|
||||
auto const0 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR0<int32>(1)));
|
||||
auto broadcast1 = builder.AddInstruction(HloInstruction::CreateBroadcast(
|
||||
ShapeUtil::MakeShape(S32, {2}), const0, {}));
|
||||
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
|
||||
ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, broadcast1));
|
||||
hlo_module->AddEntryComputation(builder.Build())
|
||||
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, broadcast1},
|
||||
HloInstruction::FusionKind::kLoop);
|
||||
|
||||
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -1}),
|
||||
*ExecuteAndTransfer(std::move(hlo_module), {}));
|
||||
}
|
||||
|
||||
XLA_TEST_F(FusionTest, SliceNegate) {
|
||||
auto builder = HloComputation::Builder(TestName());
|
||||
auto hlo_module = CreateNewModule();
|
||||
auto const0 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
|
||||
auto slice1 = builder.AddInstruction(HloInstruction::CreateSlice(
|
||||
ShapeUtil::MakeShape(S32, {2}), const0, {0}, {4}, {2}));
|
||||
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
|
||||
ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, slice1));
|
||||
hlo_module->AddEntryComputation(builder.Build())
|
||||
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, slice1},
|
||||
HloInstruction::FusionKind::kLoop);
|
||||
|
||||
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -3}),
|
||||
*ExecuteAndTransfer(std::move(hlo_module), {}));
|
||||
}
|
||||
|
||||
XLA_TEST_F(FusionTest, DynamicSliceNegate) {
|
||||
auto builder = HloComputation::Builder(TestName());
|
||||
auto hlo_module = CreateNewModule();
|
||||
auto const0 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
|
||||
auto const1 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1})));
|
||||
auto dynamic_slice2 =
|
||||
builder.AddInstruction(HloInstruction::CreateDynamicSlice(
|
||||
ShapeUtil::MakeShape(S32, {2}), const0, const1, {2}));
|
||||
auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
|
||||
ShapeUtil::MakeShape(S32, {2}), HloOpcode::kNegate, dynamic_slice2));
|
||||
hlo_module->AddEntryComputation(builder.Build())
|
||||
->CreateFusionInstruction(
|
||||
/*instructions_to_fuse=*/{negate3, dynamic_slice2},
|
||||
HloInstruction::FusionKind::kLoop);
|
||||
|
||||
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-2, -3}),
|
||||
*ExecuteAndTransfer(std::move(hlo_module), {}));
|
||||
}
|
||||
|
||||
XLA_TEST_F(FusionTest, DynamicUpdateSliceNegate) {
|
||||
auto builder = HloComputation::Builder(TestName());
|
||||
auto hlo_module = CreateNewModule();
|
||||
auto const0 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
|
||||
auto const1 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR1<int32>({6, 7})));
|
||||
auto const2 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1})));
|
||||
auto dynamic_update_slice3 =
|
||||
builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
|
||||
ShapeUtil::MakeShape(S32, {4}), const0, const1, const2));
|
||||
auto negate4 = builder.AddInstruction(
|
||||
HloInstruction::CreateUnary(ShapeUtil::MakeShape(S32, {4}),
|
||||
HloOpcode::kNegate, dynamic_update_slice3));
|
||||
hlo_module->AddEntryComputation(builder.Build())
|
||||
->CreateFusionInstruction(
|
||||
/*instructions_to_fuse=*/{negate4, dynamic_update_slice3},
|
||||
HloInstruction::FusionKind::kLoop);
|
||||
|
||||
LiteralTestUtil::ExpectEqual(*Literal::CreateR1<int32>({-1, -6, -7, -4}),
|
||||
*ExecuteAndTransfer(std::move(hlo_module), {}));
|
||||
}
|
||||
|
||||
XLA_TEST_F(FusionTest, ReshapeNegate) {
|
||||
auto builder = HloComputation::Builder(TestName());
|
||||
auto hlo_module = CreateNewModule();
|
||||
auto const0 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
|
||||
auto reshape1 = builder.AddInstruction(
|
||||
HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {2, 2}), const0));
|
||||
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
|
||||
ShapeUtil::MakeShape(S32, {2, 2}), HloOpcode::kNegate, reshape1));
|
||||
hlo_module->AddEntryComputation(builder.Build())
|
||||
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, reshape1},
|
||||
HloInstruction::FusionKind::kLoop);
|
||||
|
||||
LiteralTestUtil::ExpectEqual(*Literal::CreateR2<int32>({{-1, -2}, {-3, -4}}),
|
||||
*ExecuteAndTransfer(std::move(hlo_module), {}));
|
||||
}
|
||||
|
||||
// TODO(b/64070202): Investigate failure.
|
||||
XLA_TEST_F(FusionTest, DISABLED_ON_GPU(TransposeNegate)) {
|
||||
auto builder = HloComputation::Builder(TestName());
|
||||
auto hlo_module = CreateNewModule();
|
||||
auto const0 = builder.AddInstruction(HloInstruction::CreateConstant(
|
||||
Literal::CreateR2<int32>({{1, 2}, {3, 4}})));
|
||||
auto transpose1 = builder.AddInstruction(HloInstruction::CreateTranspose(
|
||||
ShapeUtil::MakeShape(S32, {2, 2}), const0, {1, 0}));
|
||||
auto negate2 = builder.AddInstruction(HloInstruction::CreateUnary(
|
||||
ShapeUtil::MakeShape(S32, {2, 2}), HloOpcode::kNegate, transpose1));
|
||||
hlo_module->AddEntryComputation(builder.Build())
|
||||
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate2, transpose1},
|
||||
HloInstruction::FusionKind::kLoop);
|
||||
|
||||
LiteralTestUtil::ExpectEqual(*Literal::CreateR2<int32>({{-1, -3}, {-2, -4}}),
|
||||
*ExecuteAndTransfer(std::move(hlo_module), {}));
|
||||
}
|
||||
|
||||
XLA_TEST_F(FusionTest, PadNegate) {
|
||||
auto builder = HloComputation::Builder(TestName());
|
||||
auto hlo_module = CreateNewModule();
|
||||
auto const0 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR1<int32>({1, 2, 3, 4})));
|
||||
auto const1 = builder.AddInstruction(
|
||||
HloInstruction::CreateConstant(Literal::CreateR0<int32>(-1)));
|
||||
// Set up the padding configuration {low: 1, high: 1, interior: 1}.
|
||||
PaddingConfig padding_config;
|
||||
auto dimension = padding_config.add_dimensions();
|
||||
dimension->set_edge_padding_low(1);
|
||||
dimension->set_edge_padding_high(1);
|
||||
dimension->set_interior_padding(1);
|
||||
auto pad2 = builder.AddInstruction(HloInstruction::CreatePad(
|
||||
ShapeUtil::MakeShape(S32, {9}), const0, const1, padding_config));
|
||||
auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary(
|
||||
ShapeUtil::MakeShape(S32, {9}), HloOpcode::kNegate, pad2));
|
||||
hlo_module->AddEntryComputation(builder.Build())
|
||||
->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, pad2},
|
||||
HloInstruction::FusionKind::kLoop);
|
||||
|
||||
LiteralTestUtil::ExpectEqual(
|
||||
*Literal::CreateR1<int32>({1, -1, 1, -2, 1, -3, 1, -4, 1}),
|
||||
*ExecuteAndTransfer(std::move(hlo_module), {}));
|
||||
}
|
||||
|
||||
std::unique_ptr<HloComputation> MakeReduceTestComputation() {
|
||||
auto builder = HloComputation::Builder("add");
|
||||
auto lhs = builder.AddInstruction(HloInstruction::CreateParameter(
|
||||
|
Loading…
Reference in New Issue
Block a user