[XLA:GPU] Split reduce ops with large but non-consecutive reduction dimensions.
PiperOrigin-RevId: 317330616 Change-Id: Icdcf320b233479c2f74c5b40ee4c8d9a73a6088a
This commit is contained in:
parent
57f9d638c0
commit
ef1cabc7a8
@ -1174,6 +1174,7 @@ cc_library(
|
|||||||
":reduction_degenerate_dim_remover",
|
":reduction_degenerate_dim_remover",
|
||||||
":reduction_dimension_grouper",
|
":reduction_dimension_grouper",
|
||||||
":reduction_layout_normalizer",
|
":reduction_layout_normalizer",
|
||||||
|
":reduction_splitter",
|
||||||
":stream_assignment",
|
":stream_assignment",
|
||||||
":stream_executor_util",
|
":stream_executor_util",
|
||||||
":target_constants",
|
":target_constants",
|
||||||
@ -1819,6 +1820,33 @@ cc_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "reduction_splitter",
|
||||||
|
srcs = ["reduction_splitter.cc"],
|
||||||
|
hdrs = ["reduction_splitter.h"],
|
||||||
|
deps = [
|
||||||
|
":ir_emission_utils",
|
||||||
|
"//tensorflow/compiler/xla:shape_util",
|
||||||
|
"//tensorflow/compiler/xla/service:hlo",
|
||||||
|
"//tensorflow/compiler/xla/service:hlo_pass",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
tf_cc_test(
|
||||||
|
name = "reduction_splitter_test",
|
||||||
|
srcs = ["reduction_splitter_test.cc"],
|
||||||
|
deps = [
|
||||||
|
":reduction_splitter",
|
||||||
|
"//tensorflow/compiler/xla:shape_util",
|
||||||
|
"//tensorflow/compiler/xla:test",
|
||||||
|
"//tensorflow/compiler/xla:test_helpers",
|
||||||
|
"//tensorflow/compiler/xla/service:hlo_matchers",
|
||||||
|
"//tensorflow/compiler/xla/service:hlo_parser",
|
||||||
|
"//tensorflow/compiler/xla/tests:hlo_test_base",
|
||||||
|
"//tensorflow/compiler/xla/tests:xla_internal_test_main",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "reduction_layout_normalizer",
|
name = "reduction_layout_normalizer",
|
||||||
srcs = ["reduction_layout_normalizer.cc"],
|
srcs = ["reduction_layout_normalizer.cc"],
|
||||||
|
|||||||
@ -65,6 +65,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/compiler/xla/service/gpu/reduction_degenerate_dim_remover.h"
|
#include "tensorflow/compiler/xla/service/gpu/reduction_degenerate_dim_remover.h"
|
||||||
#include "tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.h"
|
#include "tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.h"
|
||||||
#include "tensorflow/compiler/xla/service/gpu/reduction_layout_normalizer.h"
|
#include "tensorflow/compiler/xla/service/gpu/reduction_layout_normalizer.h"
|
||||||
|
#include "tensorflow/compiler/xla/service/gpu/reduction_splitter.h"
|
||||||
#include "tensorflow/compiler/xla/service/gpu/stream_assignment.h"
|
#include "tensorflow/compiler/xla/service/gpu/stream_assignment.h"
|
||||||
#include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h"
|
#include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h"
|
||||||
#include "tensorflow/compiler/xla/service/gpu/target_constants.h"
|
#include "tensorflow/compiler/xla/service/gpu/target_constants.h"
|
||||||
@ -371,6 +372,7 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment(
|
|||||||
pipeline.AddPass<ReductionDegenerateDimRemover>();
|
pipeline.AddPass<ReductionDegenerateDimRemover>();
|
||||||
pipeline.AddPass<ReductionLayoutNormalizer>();
|
pipeline.AddPass<ReductionLayoutNormalizer>();
|
||||||
pipeline.AddPass<ReductionDimensionGrouper>();
|
pipeline.AddPass<ReductionDimensionGrouper>();
|
||||||
|
pipeline.AddPass<HloPassFix<ReductionSplitter>>();
|
||||||
|
|
||||||
// The LayoutAssignment pass may leave behind kCopy instructions which are
|
// The LayoutAssignment pass may leave behind kCopy instructions which are
|
||||||
// duplicate or NOPs, so remove them with algebraic simplification and CSE.
|
// duplicate or NOPs, so remove them with algebraic simplification and CSE.
|
||||||
|
|||||||
117
tensorflow/compiler/xla/service/gpu/reduction_splitter.cc
Normal file
117
tensorflow/compiler/xla/service/gpu/reduction_splitter.cc
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/compiler/xla/service/gpu/reduction_splitter.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
|
||||||
|
#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
|
||||||
|
#include "tensorflow/compiler/xla/service/hlo_instructions.h"
|
||||||
|
#include "tensorflow/compiler/xla/shape_util.h"
|
||||||
|
|
||||||
|
namespace xla {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
class ReductionSplitterVisitor : public DfsHloRewriteVisitor {
|
||||||
|
public:
|
||||||
|
Status HandleReduce(HloInstruction *reduce) override {
|
||||||
|
VLOG(4) << "Input: " << reduce->ToString();
|
||||||
|
|
||||||
|
// Reductions with contiguous dimensions are lowered to efficient code. No
|
||||||
|
// need to split such ops.
|
||||||
|
if (IsReductionFromOrToContiguousDimensions(*reduce)) {
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
if (reduce->dimensions().size() < 2) {
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
if (!reduce->shape().IsArray()) {
|
||||||
|
// TODO(cheshire): Handle variadic reduction.
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
HloInstruction *operand = reduce->mutable_operand(0);
|
||||||
|
const Shape &shape = operand->shape();
|
||||||
|
CHECK(shape == LayoutUtil::GetWithDefaultLayout(shape))
|
||||||
|
<< "Default layout should be enforced on reduction operand";
|
||||||
|
// Verify that contiguous dimensions have been grouped by the
|
||||||
|
// ReductionDimensionGrouper pass.
|
||||||
|
for (int64 i = 0; i < reduce->dimensions().size(); ++i) {
|
||||||
|
for (int64 j = i + 1; j < reduce->dimensions().size(); ++j) {
|
||||||
|
CHECK(abs(reduce->dimensions(i) - reduce->dimensions(j)) > 1)
|
||||||
|
<< "Reduction dimensions must not be consecutive";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The reduce op has non-contiguous dimensions. Look for the dimension with
|
||||||
|
// the largest shape dimension. Reducing along this dimension first will
|
||||||
|
// reduce the output size most effectively.
|
||||||
|
int64 max_shape_dim = 0;
|
||||||
|
int64 max_reduce_dim = 0;
|
||||||
|
const auto &input_shape = reduce->operand(0)->shape();
|
||||||
|
for (int64 i = 0; i < reduce->dimensions().size(); ++i) {
|
||||||
|
if (input_shape.dimensions(reduce->dimensions(i)) > max_shape_dim) {
|
||||||
|
max_reduce_dim = reduce->dimensions(i);
|
||||||
|
max_shape_dim = input_shape.dimensions(max_reduce_dim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// TODO(tjoerg): Run microbenchmarks to tune this threshold.
|
||||||
|
if (max_shape_dim < 128) {
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split the reduction into a pre-reduction and a final reduction.
|
||||||
|
VLOG(3) << "Splitting reduction " << reduce->name() << " at dimension "
|
||||||
|
<< max_reduce_dim;
|
||||||
|
std::vector<int64> pre_reduce_dims;
|
||||||
|
pre_reduce_dims.push_back(max_reduce_dim);
|
||||||
|
std::vector<int64> pre_reduce_shape_dims(input_shape.dimensions().begin(),
|
||||||
|
input_shape.dimensions().end());
|
||||||
|
pre_reduce_shape_dims.erase(pre_reduce_shape_dims.begin() + max_reduce_dim);
|
||||||
|
Shape pre_reduce_shape = ShapeUtil::MakeShape(
|
||||||
|
reduce->shape().element_type(), pre_reduce_shape_dims);
|
||||||
|
std::unique_ptr<HloInstruction> pre_reduce = HloInstruction::CreateReduce(
|
||||||
|
pre_reduce_shape, reduce->mutable_operand(0),
|
||||||
|
reduce->mutable_operand(1), pre_reduce_dims, reduce->to_apply());
|
||||||
|
pre_reduce->set_metadata(reduce->metadata());
|
||||||
|
|
||||||
|
std::vector<int64> final_reduce_dims(reduce->dimensions().begin(),
|
||||||
|
reduce->dimensions().end());
|
||||||
|
final_reduce_dims.erase(
|
||||||
|
std::remove(final_reduce_dims.begin(), final_reduce_dims.end(),
|
||||||
|
max_reduce_dim),
|
||||||
|
final_reduce_dims.end());
|
||||||
|
for (int64 i = 0; i < final_reduce_dims.size(); ++i) {
|
||||||
|
if (final_reduce_dims[i] > max_reduce_dim) {
|
||||||
|
final_reduce_dims[i]--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::unique_ptr<HloInstruction> final_reduce = HloInstruction::CreateReduce(
|
||||||
|
reduce->shape(),
|
||||||
|
reduce->parent()->AddInstruction(std::move(pre_reduce)),
|
||||||
|
reduce->mutable_operand(1), final_reduce_dims, reduce->to_apply());
|
||||||
|
return ReplaceWithNewInstruction(reduce, std::move(final_reduce));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
StatusOr<bool> ReductionSplitter::Run(HloModule *module) {
|
||||||
|
TF_ASSIGN_OR_RETURN(bool changed,
|
||||||
|
ReductionSplitterVisitor().RunOnModule(module));
|
||||||
|
return changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xla
|
||||||
49
tensorflow/compiler/xla/service/gpu/reduction_splitter.h
Normal file
49
tensorflow/compiler/xla/service/gpu/reduction_splitter.h
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_SPLITTER_H_
|
||||||
|
#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_SPLITTER_H_
|
||||||
|
|
||||||
|
#include "tensorflow/compiler/xla/service/hlo_module.h"
|
||||||
|
#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
|
||||||
|
|
||||||
|
namespace xla {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
// Splits a reduce op into two consecutive reduce ops if
|
||||||
|
// * the reduce dimensions are not contiguous and
|
||||||
|
// * at least one reduce dimension is large (i.e. corresponds to a large input
|
||||||
|
// shape dimension).
|
||||||
|
//
|
||||||
|
// Reductions with non-contiguous dimensions are emitted as simple element-wise
|
||||||
|
// loops. This is inefficient when reducing large input shape dimensions.
|
||||||
|
// Splitting such reductions allows using more efficient reduction emitters.
|
||||||
|
//
|
||||||
|
// This pass splits reduce ops into two consecutive reduce ops. Run it to a
|
||||||
|
// fixpoint to split reduce ops along multiple large dimensions.
|
||||||
|
//
|
||||||
|
// Precondition: ReductionDimensionGrouper has been run and adjacent reduce
|
||||||
|
// dimentsions have been grouped. Reduction layouts have been normalized.
|
||||||
|
|
||||||
|
class ReductionSplitter : public HloModulePass {
|
||||||
|
public:
|
||||||
|
absl::string_view name() const override { return "reduction-splitter"; }
|
||||||
|
|
||||||
|
StatusOr<bool> Run(HloModule* module) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xla
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_SPLITTER_H_
|
||||||
140
tensorflow/compiler/xla/service/gpu/reduction_splitter_test.cc
Normal file
140
tensorflow/compiler/xla/service/gpu/reduction_splitter_test.cc
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/compiler/xla/service/gpu/reduction_splitter.h"
|
||||||
|
|
||||||
|
#include "tensorflow/compiler/xla/service/hlo_matchers.h"
|
||||||
|
#include "tensorflow/compiler/xla/service/hlo_parser.h"
|
||||||
|
#include "tensorflow/compiler/xla/shape_util.h"
|
||||||
|
#include "tensorflow/compiler/xla/test.h"
|
||||||
|
#include "tensorflow/compiler/xla/test_helpers.h"
|
||||||
|
#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
|
||||||
|
|
||||||
|
namespace xla {
|
||||||
|
namespace gpu {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
namespace op = xla::testing::opcode_matchers;
|
||||||
|
|
||||||
|
class ReductionSplitterTest : public HloTestBase {};
|
||||||
|
|
||||||
|
TEST_F(ReductionSplitterTest, SplitReductionAtDimensionTwo) {
|
||||||
|
auto module = ParseAndReturnVerifiedModule(R"(
|
||||||
|
HloModule test
|
||||||
|
|
||||||
|
add_computation {
|
||||||
|
x = f32[] parameter(0)
|
||||||
|
y = f32[] parameter(1)
|
||||||
|
ROOT add = f32[] add(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
ENTRY entry_computation {
|
||||||
|
param_0 = f16[6,16,512,64]{3,2,1,0} parameter(0)
|
||||||
|
transpose.1781 = f16[6,512,16,64]{3,1,2,0} transpose(param_0), dimensions={0,2,1,3}
|
||||||
|
convert.6986 = f32[6,512,16,64]{3,1,2,0} convert(transpose.1781)
|
||||||
|
bitcast.2136 = f32[6,16,512,64]{3,2,1,0} bitcast(convert.6986)
|
||||||
|
constant_11111 = f32[] constant(0)
|
||||||
|
ROOT reduce.982 = f32[16,64]{1,0} reduce(bitcast.2136, constant_11111), dimensions={0,2}, to_apply=add_computation
|
||||||
|
}
|
||||||
|
)")
|
||||||
|
.ValueOrDie();
|
||||||
|
ASSERT_TRUE(ReductionSplitter().Run(module.get()).ValueOrDie());
|
||||||
|
SCOPED_TRACE(module->ToString());
|
||||||
|
const HloInstruction* root_reduction =
|
||||||
|
module->entry_computation()->root_instruction();
|
||||||
|
ASSERT_THAT(root_reduction, op::Reduce(op::Reduce(), op::Constant()));
|
||||||
|
|
||||||
|
auto* pre_reduction = root_reduction->operand(0);
|
||||||
|
EXPECT_THAT(pre_reduction->dimensions(), std::vector<int64>({2}));
|
||||||
|
EXPECT_THAT(pre_reduction->shape(), ShapeUtil::MakeShape(F32, {6, 16, 64}));
|
||||||
|
EXPECT_THAT(root_reduction->dimensions(), std::vector<int64>({0}));
|
||||||
|
EXPECT_THAT(root_reduction->shape(), ShapeUtil::MakeShape(F32, {16, 64}));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ReductionSplitterTest, SplitReductionAtDimensionZero) {
|
||||||
|
auto module = ParseAndReturnVerifiedModule(R"(
|
||||||
|
HloModule test
|
||||||
|
|
||||||
|
add_computation {
|
||||||
|
x = f32[] parameter(0)
|
||||||
|
y = f32[] parameter(1)
|
||||||
|
ROOT add = f32[] add(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
ENTRY entry_computation {
|
||||||
|
param_0 = f32[1024,16,512,64,128]{4,3,2,1,0} parameter(0)
|
||||||
|
constant_11111 = f32[] constant(0)
|
||||||
|
ROOT reduce.982 = f32[16,64]{1,0} reduce(param_0, constant_11111), dimensions={2,0,4}, to_apply=add_computation
|
||||||
|
}
|
||||||
|
)")
|
||||||
|
.ValueOrDie();
|
||||||
|
ASSERT_TRUE(ReductionSplitter().Run(module.get()).ValueOrDie());
|
||||||
|
SCOPED_TRACE(module->ToString());
|
||||||
|
const HloInstruction* root_reduction =
|
||||||
|
module->entry_computation()->root_instruction();
|
||||||
|
ASSERT_THAT(root_reduction, op::Reduce(op::Reduce(), op::Constant()));
|
||||||
|
|
||||||
|
auto* pre_reduction = root_reduction->operand(0);
|
||||||
|
EXPECT_THAT(pre_reduction->dimensions(), std::vector<int64>({0}));
|
||||||
|
EXPECT_THAT(pre_reduction->shape(),
|
||||||
|
ShapeUtil::MakeShape(F32, {16, 512, 64, 128}));
|
||||||
|
EXPECT_THAT(root_reduction->dimensions(), std::vector<int64>({1, 3}));
|
||||||
|
EXPECT_THAT(root_reduction->shape(), ShapeUtil::MakeShape(F32, {16, 64}));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ReductionSplitterTest, DontSplitReductionWithSmallDimensions) {
|
||||||
|
auto module = ParseAndReturnVerifiedModule(R"(
|
||||||
|
HloModule test
|
||||||
|
|
||||||
|
add_computation {
|
||||||
|
x = f32[] parameter(0)
|
||||||
|
y = f32[] parameter(1)
|
||||||
|
ROOT add = f32[] add(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
ENTRY entry_computation {
|
||||||
|
param_0 = f32[8,1024,8]{2,1,0} parameter(0)
|
||||||
|
constant_11111 = f32[] constant(0)
|
||||||
|
ROOT reduce.982 = f32[1024]{0} reduce(param_0, constant_11111), dimensions={2,0}, to_apply=add_computation
|
||||||
|
}
|
||||||
|
)")
|
||||||
|
.ValueOrDie();
|
||||||
|
EXPECT_FALSE(ReductionSplitter().Run(module.get()).ValueOrDie());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ReductionSplitterTest, DontSplitReductionsWithContiguousDimensions) {
|
||||||
|
auto module = ParseAndReturnVerifiedModule(R"(
|
||||||
|
HloModule test
|
||||||
|
|
||||||
|
add_computation {
|
||||||
|
x = f32[] parameter(0)
|
||||||
|
y = f32[] parameter(1)
|
||||||
|
ROOT add = f32[] add(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
ENTRY entry_computation {
|
||||||
|
param_0 = f32[128,128,64,128]{3,2,1,0} parameter(0)
|
||||||
|
constant_11111 = f32[] constant(0)
|
||||||
|
// The dimenstions to keep (1 and 2) are contiguous.
|
||||||
|
ROOT reduce.982 = f32[128,64]{1,0} reduce(param_0, constant_11111), dimensions={3,0}, to_apply=add_computation
|
||||||
|
}
|
||||||
|
)")
|
||||||
|
.ValueOrDie();
|
||||||
|
EXPECT_FALSE(ReductionSplitter().Run(module.get()).ValueOrDie());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xla
|
||||||
@ -37,6 +37,7 @@ class ReductionDegenerateDimRemoverTest : public GpuCodegenTest {
|
|||||||
DebugOptions debug_options = GpuCodegenTest::GetDebugOptionsForTest();
|
DebugOptions debug_options = GpuCodegenTest::GetDebugOptionsForTest();
|
||||||
debug_options.add_xla_disable_hlo_passes("reduction-layout-normalizer");
|
debug_options.add_xla_disable_hlo_passes("reduction-layout-normalizer");
|
||||||
debug_options.add_xla_disable_hlo_passes("reduction-dimension-grouper");
|
debug_options.add_xla_disable_hlo_passes("reduction-dimension-grouper");
|
||||||
|
debug_options.add_xla_disable_hlo_passes("reduction-splitter");
|
||||||
debug_options.add_xla_disable_hlo_passes("gpu-tree-reduction-rewriter");
|
debug_options.add_xla_disable_hlo_passes("gpu-tree-reduction-rewriter");
|
||||||
return debug_options;
|
return debug_options;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -33,6 +33,7 @@ class ReductionLayoutNormalizerTest : public GpuCodegenTest {
|
|||||||
DebugOptions GetDebugOptionsForTest() override {
|
DebugOptions GetDebugOptionsForTest() override {
|
||||||
DebugOptions debug_options = GpuCodegenTest::GetDebugOptionsForTest();
|
DebugOptions debug_options = GpuCodegenTest::GetDebugOptionsForTest();
|
||||||
debug_options.add_xla_disable_hlo_passes("reduction-dimension-grouper");
|
debug_options.add_xla_disable_hlo_passes("reduction-dimension-grouper");
|
||||||
|
debug_options.add_xla_disable_hlo_passes("reduction-splitter");
|
||||||
debug_options.add_xla_disable_hlo_passes("layout-assignment");
|
debug_options.add_xla_disable_hlo_passes("layout-assignment");
|
||||||
debug_options.add_xla_disable_hlo_passes("gpu-tree-reduction-rewriter");
|
debug_options.add_xla_disable_hlo_passes("gpu-tree-reduction-rewriter");
|
||||||
return debug_options;
|
return debug_options;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user