From c14f7d33bb6b40d04f5a1bcd9674640dee090145 Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Tue, 30 Apr 2019 15:56:38 -0700 Subject: [PATCH] Support explicit padding in _FusedConv2D. This prevents a crash when a graph rewrite changes a Conv2D with explicit padding to _FusedConv2D. Also change a DCHECK to a CHECK. If the DCHECK failed but did not trigger, the program would crash with a very cryptic error. PiperOrigin-RevId: 246039310 --- tensorflow/core/framework/common_shape_fns.cc | 2 +- .../core/grappler/optimizers/remapper.cc | 1 + .../core/grappler/optimizers/remapper_test.cc | 4 +- tensorflow/core/kernels/conv_ops_fused_impl.h | 136 +++++++----- tensorflow/core/kernels/conv_ops_test.cc | 195 ++++++++++++------ tensorflow/core/ops/nn_ops.cc | 5 +- 6 files changed, 222 insertions(+), 121 deletions(-) diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc index 668ebbda2ee..b4fdf8ec76a 100644 --- a/tensorflow/core/framework/common_shape_fns.cc +++ b/tensorflow/core/framework/common_shape_fns.cc @@ -565,7 +565,7 @@ Status Conv2DShapeImpl(shape_inference::InferenceContext* c, TF_RETURN_IF_ERROR(CheckValidPadding(padding, explicit_paddings, /*num_dims=*/4, data_format)); } else { - DCHECK(padding != Padding::EXPLICIT); + CHECK(padding != Padding::EXPLICIT); // Crash ok. } DimensionHandle output_rows, output_cols; diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc index 65611eca316..9de357a25d3 100644 --- a/tensorflow/core/grappler/optimizers/remapper.cc +++ b/tensorflow/core/grappler/optimizers/remapper.cc @@ -540,6 +540,7 @@ void CopyConv2DAttributes(const NodeDef* conv2d, NodeDef* fused_conv2d) { (*attr)["T"] = src_attr.at("T"); (*attr)["strides"] = src_attr.at("strides"); (*attr)["padding"] = src_attr.at("padding"); + (*attr)["explicit_paddings"] = src_attr.at("explicit_paddings"); (*attr)["dilations"] = src_attr.at("dilations"); (*attr)["data_format"] = src_attr.at("data_format"); (*attr)["use_cudnn_on_gpu"] = src_attr.at("use_cudnn_on_gpu"); diff --git a/tensorflow/core/grappler/optimizers/remapper_test.cc b/tensorflow/core/grappler/optimizers/remapper_test.cc index e0cd49a3208..25914231ab1 100644 --- a/tensorflow/core/grappler/optimizers/remapper_test.cc +++ b/tensorflow/core/grappler/optimizers/remapper_test.cc @@ -406,7 +406,9 @@ TEST_F(RemapperTest, FuseConv2DWithBatchNorm) { auto variance = Placeholder(s.WithOpName("variance"), DT_FLOAT, scale_shape); std::vector strides = {1, 1, 1, 1}; - auto conv = ops::Conv2D(s.WithOpName("conv"), input, filter, strides, "SAME"); + auto conv = ops::Conv2D( + s.WithOpName("conv"), input, filter, strides, "EXPLICIT", + ops::Conv2D::Attrs().ExplicitPaddings({0, 0, 1, 2, 3, 4, 0, 0})); ops::FusedBatchNorm::Attrs attrs; attrs = attrs.IsTraining(false); auto batch_norm = ops::FusedBatchNorm(s.WithOpName("batch_norm"), conv, scale, diff --git a/tensorflow/core/kernels/conv_ops_fused_impl.h b/tensorflow/core/kernels/conv_ops_fused_impl.h index 3284262777a..89e4e61c26f 100644 --- a/tensorflow/core/kernels/conv_ops_fused_impl.h +++ b/tensorflow/core/kernels/conv_ops_fused_impl.h @@ -91,18 +91,20 @@ class LaunchFusedConv2DWithOutputKernel { public: LaunchFusedConv2DWithOutputKernel(int row_stride, int col_stride, // int row_dilation, int col_dilation, // - Padding padding) + Padding padding, + const std::vector& explicit_paddings) : row_stride_(row_stride), col_stride_(col_stride), row_dilation_(row_dilation), col_dilation_(col_dilation), - padding_(padding) {} + padding_(padding), + explicit_paddings_(explicit_paddings) {} template void operator()(const OutputKernel& output_kernel, OpKernelContext* ctx, const Tensor& input, const Tensor& filter, Tensor* output) { if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 && - row_stride_ == 1 && col_stride_ == 1) { + row_stride_ == 1 && col_stride_ == 1 && padding_ != EXPLICIT) { int conv_width = 1; // Width for the convolution step. for (int i = 0; i < 3; ++i) { conv_width *= output->dim_size(i); @@ -135,11 +137,22 @@ class LaunchFusedConv2DWithOutputKernel { output_kernel); } else { - functor::SpatialConvolution()( - ctx->eigen_device(), output->tensor(), - input.tensor(), filter.tensor(), row_stride_, col_stride_, - row_dilation_, col_dilation_, BrainPadding2EigenPadding(padding_), - output_kernel); + if (padding_ == EXPLICIT) { + functor::SpatialConvolution()( + ctx->eigen_device(), output->tensor(), + input.tensor(), filter.tensor(), row_stride_, + col_stride_, row_dilation_, col_dilation_, + static_cast(explicit_paddings_[2]), + static_cast(explicit_paddings_[3]), + static_cast(explicit_paddings_[4]), + static_cast(explicit_paddings_[5]), output_kernel); + } else { + functor::SpatialConvolution()( + ctx->eigen_device(), output->tensor(), + input.tensor(), filter.tensor(), row_stride_, + col_stride_, row_dilation_, col_dilation_, + BrainPadding2EigenPadding(padding_), output_kernel); + } } } @@ -149,6 +162,7 @@ class LaunchFusedConv2DWithOutputKernel { int row_dilation_; int col_dilation_; const Padding padding_; + const std::vector& explicit_paddings_; }; template @@ -180,7 +194,8 @@ struct LaunchFusedConv2DOp { LaunchFusedConv2DWithOutputKernel conv2d( dimensions.stride_rows, dimensions.stride_cols, - dimensions.dilation_rows, dimensions.dilation_cols, params.padding); + dimensions.dilation_rows, dimensions.dilation_cols, params.padding, + params.explicit_paddings); switch (fusion) { case FusedComputationType::kUndefined: @@ -371,8 +386,6 @@ struct LaunchFusedConv2DOp { const int64 patch_cols = filter.dim_size(1); const int64 patch_depths = filter.dim_size(2); - int64 padding_rows = 0; - int64 padding_cols = 0; const int64 out_batch = GetTensorDim(*output, params.data_format, 'N'); const int64 out_rows = GetTensorDim(*output, params.data_format, 'H'); const int64 out_cols = GetTensorDim(*output, params.data_format, 'W'); @@ -387,44 +400,61 @@ struct LaunchFusedConv2DOp { errors::InvalidArgument("bias depth must be equal to out depth", bias.shape().DebugString())); - if (params.padding == SAME) { - // Total padding on rows and cols is - // Pr = (R' - 1) * S + (Kr - 1) * Dr + 1 - R - // Pc = (C' - 1) * S + (Kc - 1) * Dc + 1 - C - // where (R', C') are output dimensions, (R, C) are input dimensions, S - // is stride, (Dr, Dc) are dilations, (Kr, Kc) are filter dimensions. - // We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top - // and Pc - Pc/2 on the bottom. When Pr or Pc is odd, this means - // we pad more on the right and bottom than on the top and left. - padding_rows = std::max( - 0, (out_rows - 1) * dimensions.stride_rows + - (patch_rows - 1) * dimensions.dilation_rows + 1 - in_rows); - padding_cols = std::max( - 0, (out_cols - 1) * dimensions.stride_cols + - (patch_cols - 1) * dimensions.dilation_cols + 1 - in_cols); - const bool rows_odd = (padding_rows % 2 != 0); - const bool cols_odd = (padding_cols % 2 != 0); - if (rows_odd || cols_odd) { - Tensor transformed_input; - int64 new_in_rows = in_rows + rows_odd; - int64 new_in_cols = in_cols + cols_odd; - OP_REQUIRES_OK(context, - context->allocate_temp( - DataTypeToEnum::value, - ShapeFromFormat(params.data_format, in_batch, - new_in_rows, new_in_cols, in_depths), - &transformed_input)); + const int64 common_padding_rows = + std::min(dimensions.pad_rows_before, dimensions.pad_rows_after); + const int64 common_padding_cols = + std::min(dimensions.pad_cols_before, dimensions.pad_cols_after); + if (dimensions.pad_rows_before != dimensions.pad_rows_after || + dimensions.pad_cols_before != dimensions.pad_cols_after) { + // cuDNN only supports padding the same amount on the left and right + // sides, and on the top and bottom sides. So we manually create a new + // padded input tensor such that we can pass it to cuDNN. - functor::PadInput()( - context->eigen_device(), - To32Bit(input_param.tensor()), {{0, 0}}, - {{rows_odd, cols_odd}}, To32Bit(transformed_input.tensor()), - params.data_format); - - input = transformed_input; - in_rows = new_in_rows; - in_cols = new_in_cols; + // TODO(reedwm): In some cases, we can avoid an allocation even if the two + // padding sides are different. For example, if the input is 2x2, the + // filter is 1x1, the stride is 2, and the padding is (1, 0, 1, 0), the + // result is equivalent to as if the padding is (1, 1, 1, 1). Changing the + // padding in such a way would allow us to avoid the allocation. + Tensor transformed_input; + const int64 padding_rows_diff = + std::abs(dimensions.pad_rows_after - dimensions.pad_rows_before); + const int64 padding_cols_diff = + std::abs(dimensions.pad_cols_after - dimensions.pad_cols_before); + const int64 new_in_rows = in_rows + padding_rows_diff; + const int64 new_in_cols = in_cols + padding_cols_diff; + OP_REQUIRES_OK(context, + context->allocate_temp( + DataTypeToEnum::value, + ShapeFromFormat(params.data_format, in_batch, + new_in_rows, new_in_cols, in_depths), + &transformed_input)); + const int64 input_pad_top = + dimensions.pad_rows_before - common_padding_rows; + const int64 input_pad_bottom = + dimensions.pad_rows_after - common_padding_rows; + const int64 input_pad_left = + dimensions.pad_cols_before - common_padding_cols; + const int64 input_pad_right = + dimensions.pad_cols_after - common_padding_cols; + bool in_bounds = + FastBoundsCheck(input_pad_top, std::numeric_limits::max()) && + FastBoundsCheck(input_pad_bottom, std::numeric_limits::max()) && + FastBoundsCheck(input_pad_left, std::numeric_limits::max()) && + FastBoundsCheck(input_pad_right, std::numeric_limits::max()); + if (!in_bounds) { + context->SetStatus(errors::InvalidArgument("Padding is too large.")); + return; } + functor::PadInput()( + context->eigen_device(), + To32Bit(input_param.tensor()), + {{static_cast(input_pad_top), static_cast(input_pad_left)}}, + {{static_cast(input_pad_bottom), + static_cast(input_pad_right)}}, + To32Bit(transformed_input.tensor()), params.data_format); + input = transformed_input; + in_rows = new_in_rows; + in_cols = new_in_cols; } if (params.data_format == FORMAT_NHWC) { @@ -447,8 +477,8 @@ struct LaunchFusedConv2DOp { } } - CHECK(padding_rows >= 0) << "Negative padding rows"; // Crash OK - CHECK(padding_cols >= 0) << "Negative padding cols"; // Crash OK + CHECK(common_padding_rows >= 0) << "Negative padding rows"; // Crash OK + CHECK(common_padding_rows >= 0) << "Negative padding cols"; // Crash OK se::dnn::ActivationMode dnn_activation_mode; switch (fusion) { @@ -481,8 +511,8 @@ struct LaunchFusedConv2DOp { .set_horizontal_dilation_rate(dimensions.dilation_cols) .set_vertical_filter_stride(dimensions.stride_rows) .set_horizontal_filter_stride(dimensions.stride_cols) - .set_zero_padding_height(padding_rows / 2) - .set_zero_padding_width(padding_cols / 2) + .set_zero_padding_height(common_padding_rows) + .set_zero_padding_width(common_padding_cols) .set_group_count(in_depths / patch_depths); se::dnn::BatchDescriptor output_desc; output_desc.set_count(out_batch) @@ -547,8 +577,8 @@ struct LaunchFusedConv2DOp { dimensions.dilation_cols}}, // dilation_cols {{dimensions.stride_rows, // stride_rows dimensions.stride_cols}}, // stride_cols - {{padding_rows, // padding_rows - padding_cols}}, // padding_cols + {{common_padding_rows, // padding_rows + common_padding_cols}}, // padding_cols dtype, // tensor datatype device_id, // device_id }, diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc index b566fc4f6c3..bb0cd9e26e2 100644 --- a/tensorflow/core/kernels/conv_ops_test.cc +++ b/tensorflow/core/kernels/conv_ops_test.cc @@ -607,15 +607,18 @@ class FusedConv2DOpTest : public OpsTestBase { } void RunConv2DWithBias(const Tensor& input_data, const Tensor& filter_data, - const Tensor& bias_data, Tensor* output, - bool allow_gpu_device = false, int stride = 1) { + const Tensor& bias_data, const std::string& padding, + const std::vector& explicit_paddings, + Tensor* output, bool allow_gpu_device = false, + int stride = 1) { Scope root = tensorflow::Scope::NewRootScope(); ops::Conv2D conv = ops::Conv2D( root.WithOpName("conv"), ops::Const(root.WithOpName("input"), Input::Initializer(input_data)), ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)), - {1, stride, stride, 1}, "SAME"); + {1, stride, stride, 1}, padding, + ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings)); ops::BiasAdd with_bias = ops::BiasAdd( root.WithOpName("with_bias"), conv, @@ -626,15 +629,17 @@ class FusedConv2DOpTest : public OpsTestBase { void RunConv2DWithBiasAndActivation( const Tensor& input_data, const Tensor& filter_data, - const Tensor& bias_data, const string& activation_type, Tensor* output, - bool allow_gpu_device = false, int stride = 1) { + const Tensor& bias_data, const string& activation_type, + const std::string& padding, const std::vector& explicit_paddings, + Tensor* output, bool allow_gpu_device = false, int stride = 1) { Scope root = tensorflow::Scope::NewRootScope(); ops::Conv2D conv = ops::Conv2D( root.WithOpName("conv"), ops::Const(root.WithOpName("input"), Input::Initializer(input_data)), ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)), - {1, stride, stride, 1}, "SAME"); + {1, stride, stride, 1}, padding, + ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings)); ops::BiasAdd with_bias = ops::BiasAdd( root.WithOpName("with_bias"), conv, @@ -653,20 +658,20 @@ class FusedConv2DOpTest : public OpsTestBase { RunAndFetch(root, "with_activation", output, allow_gpu_device); } - void RunConv2DWithBatchNorm(const Tensor& input_data, - const Tensor& filter_data, - const Tensor& scale_data, - const Tensor& offset_data, - const Tensor& mean_data, - const Tensor& variance_data, Tensor* output, - bool allow_gpu_device = false, int stride = 1) { + void RunConv2DWithBatchNorm( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& scale_data, const Tensor& offset_data, + const Tensor& mean_data, const Tensor& variance_data, + const std::string& padding, const std::vector& explicit_paddings, + Tensor* output, bool allow_gpu_device = false, int stride = 1) { Scope root = tensorflow::Scope::NewRootScope(); ops::Conv2D conv = ops::Conv2D( root.WithOpName("conv"), ops::Const(root.WithOpName("input"), Input::Initializer(input_data)), ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)), - {1, stride, stride, 1}, "SAME"); + {1, stride, stride, 1}, padding, + ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings)); ops::FusedBatchNorm::Attrs attr; attr = attr.IsTraining(false); @@ -686,7 +691,8 @@ class FusedConv2DOpTest : public OpsTestBase { const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, - const string& activation_type, Tensor* output, + const string& activation_type, const std::string& padding, + const std::vector& explicit_paddings, Tensor* output, bool allow_gpu_device = false, int stride = 1) { Scope root = tensorflow::Scope::NewRootScope(); @@ -694,7 +700,8 @@ class FusedConv2DOpTest : public OpsTestBase { root.WithOpName("conv"), ops::Const(root.WithOpName("input"), Input::Initializer(input_data)), ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)), - {1, stride, stride, 1}, "SAME"); + {1, stride, stride, 1}, padding, + ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings)); ops::FusedBatchNorm::Attrs attr; attr = attr.IsTraining(false); @@ -723,8 +730,11 @@ class FusedConv2DOpTest : public OpsTestBase { void RunFusedConv2DOp(const Tensor& input_data, const Tensor& filter_data, const std::vector& args_data, - const std::vector& fused_ops, Tensor* output, - bool allow_gpu_device = false, int stride = 1) { + const std::vector& fused_ops, + const std::string& padding, + const std::vector& explicit_paddings, + Tensor* output, bool allow_gpu_device = false, + int stride = 1) { Scope root = tensorflow::Scope::NewRootScope(); DataType dtype = DataTypeToEnum::v(); @@ -750,7 +760,8 @@ class FusedConv2DOpTest : public OpsTestBase { .Attr("num_args", num_args) .Attr("T", dtype) .Attr("strides", {1, stride, stride, 1}) - .Attr("padding", "SAME") + .Attr("padding", padding) + .Attr("explicit_paddings", explicit_paddings) .Attr("fused_ops", fused_ops) .Finalize(&fused_conv2d)); @@ -851,21 +862,26 @@ class FusedConv2DOpTest : public OpsTestBase { // Verifies that computing Conv2D+BiasAdd in a graph is identical to // FusedConv2D. void VerifyConv2DWithBias(int filter_size, int filter_count, + const std::vector& explicit_paddings = {}, int depth = kDepth, int image_width = kImageWidth, int image_height = kImageHeight, int image_batch_count = kImageBatchCount) { + std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT"; const BiasAddGraphRunner run_default = - [this](const Tensor& input_data, const Tensor& filter_data, - const Tensor& bias_data, Tensor* out) { - RunConv2DWithBias(input_data, filter_data, bias_data, out); + [this, &explicit_paddings, padding]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& bias_data, Tensor* out) { + RunConv2DWithBias(input_data, filter_data, bias_data, padding, + explicit_paddings, out); }; - const BiasAddGraphRunner run_fused = [this](const Tensor& input_data, - const Tensor& filter_data, - const Tensor& bias_data, - Tensor* out) { - RunFusedConv2DOp(input_data, filter_data, {bias_data}, {"BiasAdd"}, out); - }; + const BiasAddGraphRunner run_fused = + [this, explicit_paddings, padding]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& bias_data, Tensor* out) { + RunFusedConv2DOp(input_data, filter_data, {bias_data}, {"BiasAdd"}, + padding, explicit_paddings, out); + }; VerifyBiasAddTensorsNear(depth, image_width, image_height, image_batch_count, filter_size, filter_count, @@ -876,24 +892,29 @@ class FusedConv2DOpTest : public OpsTestBase { // to FusedConv2D. void VerifyConv2DWithBiasAndActivation( const string& activation, int filter_size, int filter_count, - int depth = kDepth, int image_width = kImageWidth, - int image_height = kImageHeight, + const std::vector& explicit_paddings = {}, int depth = kDepth, + int image_width = kImageWidth, int image_height = kImageHeight, int image_batch_count = kImageBatchCount) { + std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT"; const BiasAddGraphRunner run_default = - [this, &activation](const Tensor& input_data, const Tensor& filter_data, - const Tensor& bias_data, Tensor* out) { + [this, &activation, &explicit_paddings, &padding]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& bias_data, Tensor* out) { RunConv2DWithBiasAndActivation( - input_data, filter_data, bias_data, activation, out, + input_data, filter_data, bias_data, activation, padding, + explicit_paddings, out, /*allow_gpu_device=*/activation == "Relu"); }; - const BiasAddGraphRunner run_fused = - [this, &activation](const Tensor& input_data, const Tensor& filter_data, - const Tensor& bias_data, Tensor* out) { - RunFusedConv2DOp(input_data, filter_data, {bias_data}, - {"BiasAdd", activation}, out, - /*allow_gpu_device=*/activation == "Relu"); - }; + const BiasAddGraphRunner run_fused = [this, &activation, &explicit_paddings, + padding](const Tensor& input_data, + const Tensor& filter_data, + const Tensor& bias_data, + Tensor* out) { + RunFusedConv2DOp(input_data, filter_data, {bias_data}, + {"BiasAdd", activation}, padding, explicit_paddings, out, + /*allow_gpu_device=*/activation == "Relu"); + }; VerifyBiasAddTensorsNear(depth, image_width, image_height, image_batch_count, filter_size, filter_count, @@ -903,27 +924,30 @@ class FusedConv2DOpTest : public OpsTestBase { // Verifies that computing Conv2D+FusedBatchNorm in a graph is identical to // FusedConv2D. void VerifyConv2DWithBatchNorm(int filter_size, int filter_count, + const std::vector& explicit_paddings = {}, int depth = kDepth, int image_width = kImageWidth, int image_height = kImageHeight, int image_batch_count = kImageBatchCount) { + std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT"; const BatchNormGraphRunner run_default = - [this](const Tensor& input_data, const Tensor& filter_data, - const Tensor& scale_data, const Tensor& offset_data, - const Tensor& mean_data, const Tensor& variance_data, - Tensor* out) { + [this, explicit_paddings, padding]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& scale_data, const Tensor& offset_data, + const Tensor& mean_data, const Tensor& variance_data, Tensor* out) { RunConv2DWithBatchNorm(input_data, filter_data, scale_data, - offset_data, mean_data, variance_data, out); + offset_data, mean_data, variance_data, padding, + explicit_paddings, out); }; const BatchNormGraphRunner run_fused = - [this](const Tensor& input_data, const Tensor& filter_data, - const Tensor& scale_data, const Tensor& offset_data, - const Tensor& mean_data, const Tensor& variance_data, - Tensor* out) { + [this, explicit_paddings, padding]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& scale_data, const Tensor& offset_data, + const Tensor& mean_data, const Tensor& variance_data, Tensor* out) { RunFusedConv2DOp(input_data, filter_data, {scale_data, offset_data, mean_data, variance_data}, - {"FusedBatchNorm"}, out); + {"FusedBatchNorm"}, padding, explicit_paddings, out); }; VerifyFusedBatchNormTensorsNear(depth, image_width, image_height, @@ -935,27 +959,29 @@ class FusedConv2DOpTest : public OpsTestBase { // identical to FusedConv2D. void VerifyConv2DWithBatchNormAndActivation( const string& activation, int filter_size, int filter_count, - int depth = kDepth, int image_width = kImageWidth, - int image_height = kImageHeight, + const std::vector& explicit_paddings = {}, int depth = kDepth, + int image_width = kImageWidth, int image_height = kImageHeight, int image_batch_count = kImageBatchCount) { + std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT"; const BatchNormGraphRunner run_default = - [this, &activation](const Tensor& input_data, const Tensor& filter_data, - const Tensor& scale_data, const Tensor& offset_data, - const Tensor& mean_data, - const Tensor& variance_data, Tensor* out) { + [this, &activation, explicit_paddings, padding]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& scale_data, const Tensor& offset_data, + const Tensor& mean_data, const Tensor& variance_data, Tensor* out) { RunConv2DWithBatchNormAndActivation( input_data, filter_data, scale_data, offset_data, mean_data, - variance_data, activation, out); + variance_data, activation, padding, explicit_paddings, out); }; const BatchNormGraphRunner run_fused = - [this, &activation](const Tensor& input_data, const Tensor& filter_data, - const Tensor& scale_data, const Tensor& offset_data, - const Tensor& mean_data, - const Tensor& variance_data, Tensor* out) { + [this, &activation, explicit_paddings, padding]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& scale_data, const Tensor& offset_data, + const Tensor& mean_data, const Tensor& variance_data, Tensor* out) { RunFusedConv2DOp(input_data, filter_data, {scale_data, offset_data, mean_data, variance_data}, - {"FusedBatchNorm", activation}, out); + {"FusedBatchNorm", activation}, padding, + explicit_paddings, out); }; VerifyFusedBatchNormTensorsNear(depth, image_width, image_height, @@ -997,6 +1023,13 @@ TYPED_TEST_P(FusedConv2DWithBiasOpTest, SpatialConvolution) { this->VerifyConv2DWithBias(filter_size, filter_count); } +TYPED_TEST_P(FusedConv2DWithBiasOpTest, ExplicitPaddingConvolution) { + const int filter_size = 3; + const int filter_count = 12; + this->VerifyConv2DWithBias(filter_size, filter_count, + /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0}); +} + TYPED_TEST_P(FusedConv2DWithBiasOpTest, OneByOneConvolutionAndActivation) { const int filter_size = 1; const int filter_count = 12; @@ -1024,6 +1057,17 @@ TYPED_TEST_P(FusedConv2DWithBiasOpTest, SpatialConvolutionAndActivation) { } } +TYPED_TEST_P(FusedConv2DWithBiasOpTest, + ExplicitPaddingConvolutionAndActivation) { + const int filter_size = 3; + const int filter_count = 12; + for (const string& activation : {"Relu", "Relu6", "Elu"}) { + this->VerifyConv2DWithBiasAndActivation( + activation, filter_size, filter_count, + /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0}); + } +} + // -------------------------------------------------------------------------- // // Conv2D + FusedBatchNorm + {Activation} // // -------------------------------------------------------------------------- // @@ -1046,6 +1090,14 @@ TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, SpatialConvolution) { this->VerifyConv2DWithBatchNorm(filter_size, filter_count); } +TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, ExplicitPaddingConvolution) { + const int filter_size = 3; + const int filter_count = 12; + this->VerifyConv2DWithBatchNorm( + filter_size, filter_count, + /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0}); +} + TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, OneByOneConvolutionAndActivation) { const int filter_size = 1; const int filter_count = 12; @@ -1074,21 +1126,36 @@ TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, SpatialConvolutionAndActivation) { } } +TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, + ExplicitPaddingConvolutionAndActivation) { + const int filter_size = 3; + const int filter_count = 12; + for (const string& activation : {"Relu", "Relu6", "Elu"}) { + this->VerifyConv2DWithBatchNormAndActivation( + activation, filter_size, filter_count, + /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0}); + } +} + REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest, // OneByOneConvolution, // ImageSizeConvolution, // SpatialConvolution, // + ExplicitPaddingConvolution, // OneByOneConvolutionAndActivation, // ImageSizeConvolutionAndActivation, // - SpatialConvolutionAndActivation); + SpatialConvolutionAndActivation, // + ExplicitPaddingConvolutionAndActivation); REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest, // OneByOneConvolution, // ImageSizeConvolution, // SpatialConvolution, // + ExplicitPaddingConvolution, // OneByOneConvolutionAndActivation, // ImageSizeConvolutionAndActivation, // - SpatialConvolutionAndActivation); + SpatialConvolutionAndActivation, // + ExplicitPaddingConvolutionAndActivation); using FusedBiasAddDataTypes = ::testing::Types; INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedConv2DWithBiasOpTest, diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 06b53188ac0..4d248b9f0ea 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -327,7 +327,8 @@ REGISTER_OP("_FusedConv2D") .Attr("T: {float, double}") .Attr("num_args: int >= 0") .Attr("strides: list(int)") - .Attr(GetPaddingAttrString()) + .Attr(GetPaddingAttrStringWithExplicit()) + .Attr(GetExplicitPaddingsAttrString()) .Attr(GetConvnetDataFormatAttrString()) .Attr("dilations: list(int) = [1, 1, 1, 1]") .Attr("use_cudnn_on_gpu: bool = true") @@ -335,7 +336,7 @@ REGISTER_OP("_FusedConv2D") // Attributes for the FusedBatchNorm ------------------------------------ // .Attr("epsilon: float = 0.0001") // ---------------------------------------------------------------------- // - .SetShapeFn(shape_inference::Conv2DShape) + .SetShapeFn(shape_inference::Conv2DShapeWithExplicitPadding) .Doc(R"doc( *NOTE*: Do not invoke this operator directly in Python. Grappler is expected to create these operators.