From 7691e99586e336c5dc4b7209f355c79019b8cf3e Mon Sep 17 00:00:00 2001 From: amoitra Date: Wed, 3 Jul 2019 12:57:46 -0700 Subject: [PATCH 1/8] Enable use of cudnn backprop APIs for grouped convolutions --- .../xla/service/gpu/cudnn_conv_rewriter.cc | 51 +++++++----- .../service/gpu/cudnn_conv_rewriter_test.cc | 80 +++++++++++++++++++ 2 files changed, 111 insertions(+), 20 deletions(-) mode change 100644 => 100755 tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc old mode 100644 new mode 100755 index e81850db69e..21ef810e64b --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc @@ -89,13 +89,11 @@ bool CanImplementAsCudnnForwardConv(HloInstruction* conv) { // Try to match a backward filter pattern that contains "conv". // Precondition: "conv" is a kConvolution. -std::tuple MatchBackwardFilter( - HloInstruction* conv) { +std::tuple +MatchBackwardFilter(HloInstruction* conv) { const auto no_match_result = - std::make_tuple(false, Window(), ConvolutionDimensionNumbers()); - if (conv->feature_group_count() > 1) { - return no_match_result; - } + std::make_tuple(false, Window(), ConvolutionDimensionNumbers(), nullptr); + // Step 1: match the instruction pattern without considering the paddings and // dimension numbers just yet. We may need some generic pattern matcher // similar to third_party/llvm/llvm/include/llvm/IR/PatternMatch.h @@ -248,7 +246,29 @@ std::tuple MatchBackwardFilter( backward_conv_dnums.add_kernel_spatial_dimensions(output_spatial_dims[i]); } - return std::make_tuple(true, backward_conv_window, backward_conv_dnums); + HloInstruction* lhs = conv->mutable_operand(0); + if (conv->feature_group_count() == 1) { + return std::make_tuple(true, backward_conv_window, backward_conv_dnums, + lhs); + } + Shape new_shape = lhs->shape(); + + int64 input_batch_dimension = backward_conv_dnums.input_batch_dimension(); + int64 input_feature_dimension = backward_conv_dnums.input_feature_dimension(); + + int64 input_batch = new_shape.dimensions(input_batch_dimension); + int64 input_feature = new_shape.dimensions(input_feature_dimension); + + // Ensure that input_batch is exact multiple of conv->feature_group_count() + CHECK_EQ(input_batch % conv->feature_group_count(), 0); + new_shape.set_dimensions(input_batch_dimension, + input_batch / conv->feature_group_count()); + new_shape.set_dimensions(input_feature_dimension, + input_feature * conv->feature_group_count()); + + HloComputation* c = conv->parent(); + lhs = c->AddInstruction(HloInstruction::CreateReshape(new_shape, lhs)); + return std::make_tuple(true, backward_conv_window, backward_conv_dnums, lhs); } // Try to match a backward input pattern that contains "conv". @@ -258,15 +278,6 @@ MatchBackwardInput(HloInstruction* conv) { const auto no_match_result = std::make_tuple(false, Window(), ConvolutionDimensionNumbers(), nullptr); - // TODO(b/119479517): Theoretically cuDNN supports grouped convolutions also - // for the backward input convolution, but at least for now with version 7.1.4 - // it is slower. This needs to be re-evaluated for future cuDNN versions. - // Note that we already have the necessary code down below, the only thing to - // enable it is to remove the following early return. - if (conv->feature_group_count() > 1) { - return no_match_result; - } - // Match instruction pattern. CHECK_EQ(HloOpcode::kConvolution, conv->opcode()); HloInstruction* reverse_filter = conv->mutable_operand(1); @@ -503,13 +514,13 @@ StatusOr RunOnInstruction(HloInstruction* conv) { Window window; ConvolutionDimensionNumbers dnums; HloInstruction* rhs; + HloInstruction* lhs; - std::tie(match, window, dnums) = MatchBackwardFilter(conv); + std::tie(match, window, dnums, lhs) = MatchBackwardFilter(conv); if (match) { return CreateCudnnConv(kCudnnConvBackwardFilterCallTarget, conv->shape(), - conv->mutable_operand(0), conv->mutable_operand(1), - window, dnums, conv->feature_group_count(), - conv->metadata()); + lhs, conv->mutable_operand(1), window, dnums, + conv->feature_group_count(), conv->metadata()); } std::tie(match, window, dnums, rhs) = MatchBackwardInput(conv); diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc index dbcdc2b075b..362d8d13aab 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc @@ -135,6 +135,86 @@ TEST_F(CudnnConvRewriterTest, BackwardFilterConvolve) { << md_after_opt.DebugString() << " vs " << metadata.DebugString(); } +TEST_F(CudnnConvRewriterTest, BackwardFilterGroupConvolve) { + // In a nutshell, before pass: + // Input->batch_dim: 3 input_shape(3) = 4 + // Input->feature_dim: 0 input_shape(0) = 32 + // Kernel(gradient)->kernel_input_feature_dim (gradient_batch_dimension): 0 + // Kernel(gradient)->kernel_output_feature_dim (gradient_feature_dimension): 3 + // Output(dkernel)->output_batch_dim (dkernel_input_feature_dim): 2 + // Output(dkernel)->output_feature_dim (dkernel_output_feature_dim): 3 + + // After pass: All shapes and dimension layout is brought + // back to normal as would be acceptable by cudnn + // Input->batch_dim: 0 input_shape(0) = 8 + // Input->feature_dim: 3 input_shape(3) = 16 + // Kernel(gradient)->kernel_input_feature_dim (gradient_batch_dimension): 2 + // Kernel(gradient)->kernel_output_feature_dim (gradient_feature_dimension): 3 + // Output(dkernel)->output_batch_dim (dkernel_input_feature_dim): 0 + // Output(dkernel)->output_feature_dim (dkernel_output_feature_dim): 3 + HloComputation::Builder builder(TestName()); + HloInstruction* activations = + builder.AddInstruction(HloInstruction::CreateParameter( + 0, ShapeUtil::MakeShape(F32, {32, 1, 3, 4}), "activations")); + HloInstruction* gradients = + builder.AddInstruction(HloInstruction::CreateParameter( + 1, ShapeUtil::MakeShape(F32, {8, 1, 2, 16}), "gradients")); + Window conv_window = default_conv_window_; + conv_window.mutable_dimensions(1)->set_size(2); + conv_window.mutable_dimensions(1)->set_window_dilation(2); + auto* conv = builder.AddInstruction(HloInstruction::CreateConvolve( + ShapeInference::InferConvolveShape( + activations->shape(), gradients->shape(), /*feature_group_count=*/4, + /*batch_group_count=*/1, conv_window, + tf_default_dnums_for_backward_filter_) + .ConsumeValueOrDie(), + activations, gradients, /*feature_group_count=*/4, + /*batch_group_count=*/1, conv_window, + tf_default_dnums_for_backward_filter_, DefaultPrecisionConfig(2))); + OpMetadata metadata; + metadata.set_op_name("bar"); + conv->set_metadata(metadata); + auto module = CreateNewVerifiedModule(); + HloComputation* entry_computation = + module->AddEntryComputation(builder.Build()); + EXPECT_TRUE(RunPass(module.get())); + ASSERT_THAT(entry_computation->root_instruction(), + op::GetTupleElement( + op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0)); + // Check that metadata was preserved. + const auto& md_after_opt = + entry_computation->root_instruction()->operand(0)->metadata(); + EXPECT_TRUE(protobuf_util::ProtobufEquals(md_after_opt, metadata)) + << md_after_opt.DebugString() << " vs " << metadata.DebugString(); + const HloInstruction* custom_call = + entry_computation->root_instruction()->operand(0); + const ConvolutionDimensionNumbers conv_dim = + custom_call->convolution_dimension_numbers(); + const auto lhs_a = custom_call->operand(0); + const auto input_shape = lhs_a->shape(); + // The input (lhs) batch_dim(dim 0 in the original NHWC layout) gets mapped to + // be the feature_dim(dim 3) with a value of N*g = 32 in tf2xla. As described + // in conv_grad_ops.h, this swap is required to implement backprop using fwd + // conv. After the pass the batch_dim gets remapped to dim 0. The batch_dim + // value gets scaled to N = N*g/g = 32/4 = 8 to be compatible with cudnn + EXPECT_EQ(0, conv_dim.input_batch_dimension()); + EXPECT_EQ(8, input_shape.dimensions(conv_dim.input_batch_dimension())); + // Similarly, the input (lhs) feature_dim(dim 3 in the original NHWC layout) + // gets mapped to be the batch_dim(dim 0) with a value of C/g = 4 in tf2xla. + // After the pass the batch_dim gets remapped to dim 0. The feature_dim value + // gets scaled to C = C/g*g = 4*4 = 16 to be compatible with cudnn + EXPECT_EQ(3, conv_dim.input_feature_dimension()); + EXPECT_EQ(16, input_shape.dimensions(conv_dim.input_feature_dimension())); + // Similarly, the feature and batch dims of the incoming gradients (used as + // rhs) and the in/out dims of the output of convolution i.e, dgrad have been + // been modified in tf2xla (as described in conv_grad_ops.h). This pass remaps + // everything back for the layout to be compatible with cudnn backprop APIs. + EXPECT_EQ(2, conv_dim.kernel_input_feature_dimension()); + EXPECT_EQ(3, conv_dim.kernel_output_feature_dimension()); + EXPECT_EQ(0, conv_dim.output_batch_dimension()); + EXPECT_EQ(3, conv_dim.output_feature_dimension()); +} + TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveEquivalentToForwardConvolution) { HloComputation::Builder builder(TestName()); From e7c6533b7d3f1997bfabe9043210845f016ab688 Mon Sep 17 00:00:00 2001 From: amoitra Date: Tue, 9 Jul 2019 14:40:29 -0700 Subject: [PATCH 2/8] Incorporate Thomas's comments --- .../compiler/xla/service/gpu/cudnn_conv_rewriter.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc index 21ef810e64b..ca8d63cbcc7 100755 --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc @@ -251,16 +251,17 @@ MatchBackwardFilter(HloInstruction* conv) { return std::make_tuple(true, backward_conv_window, backward_conv_dnums, lhs); } - Shape new_shape = lhs->shape(); int64 input_batch_dimension = backward_conv_dnums.input_batch_dimension(); int64 input_feature_dimension = backward_conv_dnums.input_feature_dimension(); - int64 input_batch = new_shape.dimensions(input_batch_dimension); - int64 input_feature = new_shape.dimensions(input_feature_dimension); - + int64 input_batch = lhs->shape().dimensions(input_batch_dimension); // Ensure that input_batch is exact multiple of conv->feature_group_count() - CHECK_EQ(input_batch % conv->feature_group_count(), 0); + CHECK_EQ(input_batch % conv->feature_group_count(), 0) + << "Input batch should be an exact multiple of feature group count"; + int64 input_feature = lhs->shape().dimensions(input_feature_dimension); + + Shape new_shape = lhs->shape(); new_shape.set_dimensions(input_batch_dimension, input_batch / conv->feature_group_count()); new_shape.set_dimensions(input_feature_dimension, From 0404f60b100a77059c5164d6da9953b6c18cb8f4 Mon Sep 17 00:00:00 2001 From: amoitra Date: Tue, 16 Jul 2019 13:31:45 -0700 Subject: [PATCH 3/8] Add check for depthwise fwd conv addressing test failures and reverting change for MatchBackwardInput --- .../xla/service/gpu/cudnn_conv_rewriter.cc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc index ca8d63cbcc7..9e59b1290ed 100755 --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc @@ -153,6 +153,15 @@ MatchBackwardFilter(HloInstruction* conv) { "to fold it to a backward filter convolution."; return no_match_result; } + auto rhs_in = + conv->mutable_operand(1)->shape().dimensions(kernel_input_feature_dim); + if ((conv->feature_group_count() > 1) && (rhs_in == 1) && + (input_batch_dim == output_batch_dim)) { + VLOG(1) << conv->ToString() + << " is a depthwise forward convolution. No need to fold to " + "backward filter."; + return no_match_result; + } // Step 3: fuse the matched HLOs into a backward convolution instruction. // @@ -279,6 +288,15 @@ MatchBackwardInput(HloInstruction* conv) { const auto no_match_result = std::make_tuple(false, Window(), ConvolutionDimensionNumbers(), nullptr); + // TODO(b/119479517): Theoretically cuDNN supports grouped convolutions also + // for the backward input convolution, but at least for now with version 7.1.4 + // it is slower. This needs to be re-evaluated for future cuDNN versions. + // Note that we already have the necessary code down below, the only thing to + // enable it is to remove the following early return. + if (conv->feature_group_count() > 1) { + return no_match_result; + } + // Match instruction pattern. CHECK_EQ(HloOpcode::kConvolution, conv->opcode()); HloInstruction* reverse_filter = conv->mutable_operand(1); From 3e2958befaa22595b754018e7e2ef089420ff17d Mon Sep 17 00:00:00 2001 From: amoitra Date: Sat, 20 Jul 2019 11:05:22 -0700 Subject: [PATCH 4/8] Added Transpose and a reshape --- .../xla/service/gpu/cudnn_conv_rewriter.cc | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc index 9e59b1290ed..066e2daf52d 100755 --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc @@ -265,18 +265,37 @@ MatchBackwardFilter(HloInstruction* conv) { int64 input_feature_dimension = backward_conv_dnums.input_feature_dimension(); int64 input_batch = lhs->shape().dimensions(input_batch_dimension); + int64 input_feature = lhs->shape().dimensions(input_feature_dimension); + + // Reshape batch_dim G*N -> [G,N] + std::vector reshape_dims = lhs->shape().dimensions(); + auto num_groups = conv->feature_group_count(); // Ensure that input_batch is exact multiple of conv->feature_group_count() CHECK_EQ(input_batch % conv->feature_group_count(), 0) << "Input batch should be an exact multiple of feature group count"; - int64 input_feature = lhs->shape().dimensions(input_feature_dimension); - - Shape new_shape = lhs->shape(); - new_shape.set_dimensions(input_batch_dimension, - input_batch / conv->feature_group_count()); - new_shape.set_dimensions(input_feature_dimension, - input_feature * conv->feature_group_count()); + reshape_dims[input_batch_dimension] = + reshape_dims[input_batch_dimension] / num_groups; + reshape_dims.insert(reshape_dims.begin() + input_batch_dimension, num_groups); HloComputation* c = conv->parent(); + lhs = c->AddInstruction(HloInstruction::CreateReshape( + ShapeUtil::MakeShape(lhs->shape().element_type(), reshape_dims), lhs)); + + // Transpose G to the axis before C/G, For eg: [G, N, C/G, H, W] -> [N, G, + // C/G, H, W] + std::vector transpose_dims(lhs->shape().dimensions_size()); + std::iota(transpose_dims.begin(), transpose_dims.end(), 0); + transpose_dims.erase(transpose_dims.begin() + input_batch_dimension); + transpose_dims.insert(transpose_dims.begin() + input_feature_dimension, + input_batch_dimension); + lhs = c->AddInstruction( + HloInstruction::CreateTranspose(lhs->shape(), lhs, transpose_dims)); + + // Merge [G,C/G] -> [C] + Shape new_shape = lhs->shape(); + new_shape.DeleteDimension(input_feature_dimension); + new_shape.set_dimensions(input_feature_dimension, + input_feature * conv->feature_group_count()); lhs = c->AddInstruction(HloInstruction::CreateReshape(new_shape, lhs)); return std::make_tuple(true, backward_conv_window, backward_conv_dnums, lhs); } From 4390c4f8463bc5fb8e52fc2b4749951cdfca64ce Mon Sep 17 00:00:00 2001 From: amoitra Date: Sun, 21 Jul 2019 20:55:35 -0700 Subject: [PATCH 5/8] minor fix - missed something during merge conflict resolution --- tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc index ffda48872f2..25a821cb078 100755 --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc @@ -561,7 +561,7 @@ StatusOr RunOnInstruction(HloInstruction* conv) { conv->feature_group_count(), conv->metadata()); } - std::tie(match, window, dnums) = MatchBackwardFilter(conv); + std::tie(match, window, dnums, lhs) = MatchBackwardFilter(conv); if (match) { return CreateCudnnConv(kCudnnConvBackwardFilterCallTarget, conv->shape(), lhs, conv->mutable_operand(1), window, dnums, From 5471b5f66ed10ef49bce250746e7e73ec0ccf2be Mon Sep 17 00:00:00 2001 From: amoitra Date: Mon, 22 Jul 2019 00:33:24 -0700 Subject: [PATCH 6/8] Few more changes --- .../xla/service/gpu/cudnn_conv_rewriter.cc | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) mode change 100755 => 100644 tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc old mode 100755 new mode 100644 index 25a821cb078..a441e70510a --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc @@ -278,26 +278,40 @@ MatchBackwardFilter(HloInstruction* conv) { reshape_dims.insert(reshape_dims.begin() + input_batch_dimension, num_groups); HloComputation* c = conv->parent(); - lhs = c->AddInstruction(HloInstruction::CreateReshape( - ShapeUtil::MakeShape(lhs->shape().element_type(), reshape_dims), lhs)); + HloInstruction* lhs_reshape_1 = + c->AddInstruction(HloInstruction::CreateReshape( + ShapeUtil::MakeShape(lhs->shape().element_type(), reshape_dims), + lhs)); // Transpose G to the axis before C/G, For eg: [G, N, C/G, H, W] -> [N, G, // C/G, H, W] - std::vector transpose_dims(lhs->shape().dimensions_size()); + std::vector transpose_dims(lhs_reshape_1->shape().dimensions_size()); std::iota(transpose_dims.begin(), transpose_dims.end(), 0); transpose_dims.erase(transpose_dims.begin() + input_batch_dimension); transpose_dims.insert(transpose_dims.begin() + input_feature_dimension, input_batch_dimension); - lhs = c->AddInstruction( - HloInstruction::CreateTranspose(lhs->shape(), lhs, transpose_dims)); + std::vector transpose_reshape_dims = + lhs_reshape_1->shape().dimensions(); + transpose_reshape_dims.erase(transpose_reshape_dims.begin() + + input_batch_dimension); + transpose_reshape_dims.insert( + transpose_reshape_dims.begin() + input_feature_dimension, num_groups); + + HloInstruction* lhs_transpose = + c->AddInstruction(HloInstruction::CreateTranspose( + ShapeUtil::MakeShape(lhs_reshape_1->shape().element_type(), + transpose_reshape_dims), + lhs_reshape_1, transpose_dims)); // Merge [G,C/G] -> [C] - Shape new_shape = lhs->shape(); + Shape new_shape = lhs_transpose->shape(); new_shape.DeleteDimension(input_feature_dimension); new_shape.set_dimensions(input_feature_dimension, input_feature * conv->feature_group_count()); - lhs = c->AddInstruction(HloInstruction::CreateReshape(new_shape, lhs)); - return std::make_tuple(true, backward_conv_window, backward_conv_dnums, lhs); + HloInstruction* lhs_reshape_2 = c->AddInstruction( + HloInstruction::CreateReshape(new_shape, lhs_transpose)); + return std::make_tuple(true, backward_conv_window, backward_conv_dnums, + lhs_reshape_2); } // Try to match a backward input pattern that contains "conv". From 808a8068ad9a206d979d34b33357dd92f21ba786 Mon Sep 17 00:00:00 2001 From: amoitra Date: Mon, 22 Jul 2019 11:51:18 -0700 Subject: [PATCH 7/8] Incorporate Adrian's comments --- tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) mode change 100644 => 100755 tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc old mode 100644 new mode 100755 index a441e70510a..9c859a00dbc --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc @@ -155,8 +155,8 @@ MatchBackwardFilter(HloInstruction* conv) { } auto rhs_in = conv->mutable_operand(1)->shape().dimensions(kernel_input_feature_dim); - if ((conv->feature_group_count() > 1) && (rhs_in == 1) && - (input_batch_dim == output_batch_dim)) { + if (conv->feature_group_count() > 1 && rhs_in == 1 && + input_batch_dim == output_batch_dim) { VLOG(1) << conv->ToString() << " is a depthwise forward convolution. No need to fold to " "backward filter."; @@ -270,8 +270,7 @@ MatchBackwardFilter(HloInstruction* conv) { // Reshape batch_dim G*N -> [G,N] std::vector reshape_dims = lhs->shape().dimensions(); auto num_groups = conv->feature_group_count(); - // Ensure that input_batch is exact multiple of conv->feature_group_count() - CHECK_EQ(input_batch % conv->feature_group_count(), 0) + CHECK_EQ(input_batch % num_groups, 0) << "Input batch should be an exact multiple of feature group count"; reshape_dims[input_batch_dimension] = reshape_dims[input_batch_dimension] / num_groups; From 969a4b05b4b7bbda14c4b4b44a94137220340bb7 Mon Sep 17 00:00:00 2001 From: amoitra Date: Mon, 22 Jul 2019 11:58:33 -0700 Subject: [PATCH 8/8] minor edit --- tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc index 9c859a00dbc..33486608c1c 100755 --- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc @@ -328,7 +328,7 @@ MatchBackwardInput(HloInstruction* conv) { if (conv->feature_group_count() > 1) { return no_match_result; } - + // Match instruction pattern. CHECK_EQ(HloOpcode::kConvolution, conv->opcode()); HloInstruction* reverse_filter = conv->mutable_operand(1);