Support explicit padding in _FusedConv2D.

This prevents a crash when a graph rewrite changes a Conv2D with explicit padding to _FusedConv2D.

Also change a DCHECK to a CHECK. If the DCHECK failed but did not trigger, the program would crash with a very cryptic error.

PiperOrigin-RevId: 246039310
This commit is contained in:
Reed Wanderman-Milne 2019-04-30 15:56:38 -07:00 committed by TensorFlower Gardener
parent f67e646bc1
commit c14f7d33bb
6 changed files with 222 additions and 121 deletions

View File

@ -565,7 +565,7 @@ Status Conv2DShapeImpl(shape_inference::InferenceContext* c,
TF_RETURN_IF_ERROR(CheckValidPadding(padding, explicit_paddings, TF_RETURN_IF_ERROR(CheckValidPadding(padding, explicit_paddings,
/*num_dims=*/4, data_format)); /*num_dims=*/4, data_format));
} else { } else {
DCHECK(padding != Padding::EXPLICIT); CHECK(padding != Padding::EXPLICIT); // Crash ok.
} }
DimensionHandle output_rows, output_cols; DimensionHandle output_rows, output_cols;

View File

@ -540,6 +540,7 @@ void CopyConv2DAttributes(const NodeDef* conv2d, NodeDef* fused_conv2d) {
(*attr)["T"] = src_attr.at("T"); (*attr)["T"] = src_attr.at("T");
(*attr)["strides"] = src_attr.at("strides"); (*attr)["strides"] = src_attr.at("strides");
(*attr)["padding"] = src_attr.at("padding"); (*attr)["padding"] = src_attr.at("padding");
(*attr)["explicit_paddings"] = src_attr.at("explicit_paddings");
(*attr)["dilations"] = src_attr.at("dilations"); (*attr)["dilations"] = src_attr.at("dilations");
(*attr)["data_format"] = src_attr.at("data_format"); (*attr)["data_format"] = src_attr.at("data_format");
(*attr)["use_cudnn_on_gpu"] = src_attr.at("use_cudnn_on_gpu"); (*attr)["use_cudnn_on_gpu"] = src_attr.at("use_cudnn_on_gpu");

View File

@ -406,7 +406,9 @@ TEST_F(RemapperTest, FuseConv2DWithBatchNorm) {
auto variance = Placeholder(s.WithOpName("variance"), DT_FLOAT, scale_shape); auto variance = Placeholder(s.WithOpName("variance"), DT_FLOAT, scale_shape);
std::vector<int> strides = {1, 1, 1, 1}; std::vector<int> strides = {1, 1, 1, 1};
auto conv = ops::Conv2D(s.WithOpName("conv"), input, filter, strides, "SAME"); auto conv = ops::Conv2D(
s.WithOpName("conv"), input, filter, strides, "EXPLICIT",
ops::Conv2D::Attrs().ExplicitPaddings({0, 0, 1, 2, 3, 4, 0, 0}));
ops::FusedBatchNorm::Attrs attrs; ops::FusedBatchNorm::Attrs attrs;
attrs = attrs.IsTraining(false); attrs = attrs.IsTraining(false);
auto batch_norm = ops::FusedBatchNorm(s.WithOpName("batch_norm"), conv, scale, auto batch_norm = ops::FusedBatchNorm(s.WithOpName("batch_norm"), conv, scale,

View File

@ -91,18 +91,20 @@ class LaunchFusedConv2DWithOutputKernel {
public: public:
LaunchFusedConv2DWithOutputKernel(int row_stride, int col_stride, // LaunchFusedConv2DWithOutputKernel(int row_stride, int col_stride, //
int row_dilation, int col_dilation, // int row_dilation, int col_dilation, //
Padding padding) Padding padding,
const std::vector<int64>& explicit_paddings)
: row_stride_(row_stride), : row_stride_(row_stride),
col_stride_(col_stride), col_stride_(col_stride),
row_dilation_(row_dilation), row_dilation_(row_dilation),
col_dilation_(col_dilation), col_dilation_(col_dilation),
padding_(padding) {} padding_(padding),
explicit_paddings_(explicit_paddings) {}
template <typename OutputKernel> template <typename OutputKernel>
void operator()(const OutputKernel& output_kernel, OpKernelContext* ctx, void operator()(const OutputKernel& output_kernel, OpKernelContext* ctx,
const Tensor& input, const Tensor& filter, Tensor* output) { const Tensor& input, const Tensor& filter, Tensor* output) {
if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 && if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 &&
row_stride_ == 1 && col_stride_ == 1) { row_stride_ == 1 && col_stride_ == 1 && padding_ != EXPLICIT) {
int conv_width = 1; // Width for the convolution step. int conv_width = 1; // Width for the convolution step.
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
conv_width *= output->dim_size(i); conv_width *= output->dim_size(i);
@ -135,11 +137,22 @@ class LaunchFusedConv2DWithOutputKernel {
output_kernel); output_kernel);
} else { } else {
if (padding_ == EXPLICIT) {
functor::SpatialConvolution<CPUDevice, T, OutputKernel>()( functor::SpatialConvolution<CPUDevice, T, OutputKernel>()(
ctx->eigen_device<CPUDevice>(), output->tensor<T, 4>(), ctx->eigen_device<CPUDevice>(), output->tensor<T, 4>(),
input.tensor<T, 4>(), filter.tensor<T, 4>(), row_stride_, col_stride_, input.tensor<T, 4>(), filter.tensor<T, 4>(), row_stride_,
row_dilation_, col_dilation_, BrainPadding2EigenPadding(padding_), col_stride_, row_dilation_, col_dilation_,
output_kernel); static_cast<int>(explicit_paddings_[2]),
static_cast<int>(explicit_paddings_[3]),
static_cast<int>(explicit_paddings_[4]),
static_cast<int>(explicit_paddings_[5]), output_kernel);
} else {
functor::SpatialConvolution<CPUDevice, T, OutputKernel>()(
ctx->eigen_device<CPUDevice>(), output->tensor<T, 4>(),
input.tensor<T, 4>(), filter.tensor<T, 4>(), row_stride_,
col_stride_, row_dilation_, col_dilation_,
BrainPadding2EigenPadding(padding_), output_kernel);
}
} }
} }
@ -149,6 +162,7 @@ class LaunchFusedConv2DWithOutputKernel {
int row_dilation_; int row_dilation_;
int col_dilation_; int col_dilation_;
const Padding padding_; const Padding padding_;
const std::vector<int64>& explicit_paddings_;
}; };
template <typename T> template <typename T>
@ -180,7 +194,8 @@ struct LaunchFusedConv2DOp<CPUDevice, T> {
LaunchFusedConv2DWithOutputKernel<T> conv2d( LaunchFusedConv2DWithOutputKernel<T> conv2d(
dimensions.stride_rows, dimensions.stride_cols, dimensions.stride_rows, dimensions.stride_cols,
dimensions.dilation_rows, dimensions.dilation_cols, params.padding); dimensions.dilation_rows, dimensions.dilation_cols, params.padding,
params.explicit_paddings);
switch (fusion) { switch (fusion) {
case FusedComputationType::kUndefined: case FusedComputationType::kUndefined:
@ -371,8 +386,6 @@ struct LaunchFusedConv2DOp<GPUDevice, T> {
const int64 patch_cols = filter.dim_size(1); const int64 patch_cols = filter.dim_size(1);
const int64 patch_depths = filter.dim_size(2); const int64 patch_depths = filter.dim_size(2);
int64 padding_rows = 0;
int64 padding_cols = 0;
const int64 out_batch = GetTensorDim(*output, params.data_format, 'N'); const int64 out_batch = GetTensorDim(*output, params.data_format, 'N');
const int64 out_rows = GetTensorDim(*output, params.data_format, 'H'); const int64 out_rows = GetTensorDim(*output, params.data_format, 'H');
const int64 out_cols = GetTensorDim(*output, params.data_format, 'W'); const int64 out_cols = GetTensorDim(*output, params.data_format, 'W');
@ -387,45 +400,62 @@ struct LaunchFusedConv2DOp<GPUDevice, T> {
errors::InvalidArgument("bias depth must be equal to out depth", errors::InvalidArgument("bias depth must be equal to out depth",
bias.shape().DebugString())); bias.shape().DebugString()));
if (params.padding == SAME) { const int64 common_padding_rows =
// Total padding on rows and cols is std::min(dimensions.pad_rows_before, dimensions.pad_rows_after);
// Pr = (R' - 1) * S + (Kr - 1) * Dr + 1 - R const int64 common_padding_cols =
// Pc = (C' - 1) * S + (Kc - 1) * Dc + 1 - C std::min(dimensions.pad_cols_before, dimensions.pad_cols_after);
// where (R', C') are output dimensions, (R, C) are input dimensions, S if (dimensions.pad_rows_before != dimensions.pad_rows_after ||
// is stride, (Dr, Dc) are dilations, (Kr, Kc) are filter dimensions. dimensions.pad_cols_before != dimensions.pad_cols_after) {
// We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top // cuDNN only supports padding the same amount on the left and right
// and Pc - Pc/2 on the bottom. When Pr or Pc is odd, this means // sides, and on the top and bottom sides. So we manually create a new
// we pad more on the right and bottom than on the top and left. // padded input tensor such that we can pass it to cuDNN.
padding_rows = std::max<int>(
0, (out_rows - 1) * dimensions.stride_rows + // TODO(reedwm): In some cases, we can avoid an allocation even if the two
(patch_rows - 1) * dimensions.dilation_rows + 1 - in_rows); // padding sides are different. For example, if the input is 2x2, the
padding_cols = std::max<int>( // filter is 1x1, the stride is 2, and the padding is (1, 0, 1, 0), the
0, (out_cols - 1) * dimensions.stride_cols + // result is equivalent to as if the padding is (1, 1, 1, 1). Changing the
(patch_cols - 1) * dimensions.dilation_cols + 1 - in_cols); // padding in such a way would allow us to avoid the allocation.
const bool rows_odd = (padding_rows % 2 != 0);
const bool cols_odd = (padding_cols % 2 != 0);
if (rows_odd || cols_odd) {
Tensor transformed_input; Tensor transformed_input;
int64 new_in_rows = in_rows + rows_odd; const int64 padding_rows_diff =
int64 new_in_cols = in_cols + cols_odd; std::abs(dimensions.pad_rows_after - dimensions.pad_rows_before);
const int64 padding_cols_diff =
std::abs(dimensions.pad_cols_after - dimensions.pad_cols_before);
const int64 new_in_rows = in_rows + padding_rows_diff;
const int64 new_in_cols = in_cols + padding_cols_diff;
OP_REQUIRES_OK(context, OP_REQUIRES_OK(context,
context->allocate_temp( context->allocate_temp(
DataTypeToEnum<T>::value, DataTypeToEnum<T>::value,
ShapeFromFormat(params.data_format, in_batch, ShapeFromFormat(params.data_format, in_batch,
new_in_rows, new_in_cols, in_depths), new_in_rows, new_in_cols, in_depths),
&transformed_input)); &transformed_input));
const int64 input_pad_top =
dimensions.pad_rows_before - common_padding_rows;
const int64 input_pad_bottom =
dimensions.pad_rows_after - common_padding_rows;
const int64 input_pad_left =
dimensions.pad_cols_before - common_padding_cols;
const int64 input_pad_right =
dimensions.pad_cols_after - common_padding_cols;
bool in_bounds =
FastBoundsCheck(input_pad_top, std::numeric_limits<int>::max()) &&
FastBoundsCheck(input_pad_bottom, std::numeric_limits<int>::max()) &&
FastBoundsCheck(input_pad_left, std::numeric_limits<int>::max()) &&
FastBoundsCheck(input_pad_right, std::numeric_limits<int>::max());
if (!in_bounds) {
context->SetStatus(errors::InvalidArgument("Padding is too large."));
return;
}
functor::PadInput<GPUDevice, T, int, 4>()( functor::PadInput<GPUDevice, T, int, 4>()(
context->eigen_device<GPUDevice>(), context->eigen_device<GPUDevice>(),
To32Bit(input_param.tensor<T, 4>()), {{0, 0}}, To32Bit(input_param.tensor<T, 4>()),
{{rows_odd, cols_odd}}, To32Bit(transformed_input.tensor<T, 4>()), {{static_cast<int>(input_pad_top), static_cast<int>(input_pad_left)}},
params.data_format); {{static_cast<int>(input_pad_bottom),
static_cast<int>(input_pad_right)}},
To32Bit(transformed_input.tensor<T, 4>()), params.data_format);
input = transformed_input; input = transformed_input;
in_rows = new_in_rows; in_rows = new_in_rows;
in_cols = new_in_cols; in_cols = new_in_cols;
} }
}
if (params.data_format == FORMAT_NHWC) { if (params.data_format == FORMAT_NHWC) {
// Convert the input tensor from NHWC to NCHW. // Convert the input tensor from NHWC to NCHW.
@ -447,8 +477,8 @@ struct LaunchFusedConv2DOp<GPUDevice, T> {
} }
} }
CHECK(padding_rows >= 0) << "Negative padding rows"; // Crash OK CHECK(common_padding_rows >= 0) << "Negative padding rows"; // Crash OK
CHECK(padding_cols >= 0) << "Negative padding cols"; // Crash OK CHECK(common_padding_rows >= 0) << "Negative padding cols"; // Crash OK
se::dnn::ActivationMode dnn_activation_mode; se::dnn::ActivationMode dnn_activation_mode;
switch (fusion) { switch (fusion) {
@ -481,8 +511,8 @@ struct LaunchFusedConv2DOp<GPUDevice, T> {
.set_horizontal_dilation_rate(dimensions.dilation_cols) .set_horizontal_dilation_rate(dimensions.dilation_cols)
.set_vertical_filter_stride(dimensions.stride_rows) .set_vertical_filter_stride(dimensions.stride_rows)
.set_horizontal_filter_stride(dimensions.stride_cols) .set_horizontal_filter_stride(dimensions.stride_cols)
.set_zero_padding_height(padding_rows / 2) .set_zero_padding_height(common_padding_rows)
.set_zero_padding_width(padding_cols / 2) .set_zero_padding_width(common_padding_cols)
.set_group_count(in_depths / patch_depths); .set_group_count(in_depths / patch_depths);
se::dnn::BatchDescriptor output_desc; se::dnn::BatchDescriptor output_desc;
output_desc.set_count(out_batch) output_desc.set_count(out_batch)
@ -547,8 +577,8 @@ struct LaunchFusedConv2DOp<GPUDevice, T> {
dimensions.dilation_cols}}, // dilation_cols dimensions.dilation_cols}}, // dilation_cols
{{dimensions.stride_rows, // stride_rows {{dimensions.stride_rows, // stride_rows
dimensions.stride_cols}}, // stride_cols dimensions.stride_cols}}, // stride_cols
{{padding_rows, // padding_rows {{common_padding_rows, // padding_rows
padding_cols}}, // padding_cols common_padding_cols}}, // padding_cols
dtype, // tensor datatype dtype, // tensor datatype
device_id, // device_id device_id, // device_id
}, },

View File

@ -607,15 +607,18 @@ class FusedConv2DOpTest : public OpsTestBase {
} }
void RunConv2DWithBias(const Tensor& input_data, const Tensor& filter_data, void RunConv2DWithBias(const Tensor& input_data, const Tensor& filter_data,
const Tensor& bias_data, Tensor* output, const Tensor& bias_data, const std::string& padding,
bool allow_gpu_device = false, int stride = 1) { const std::vector<int>& explicit_paddings,
Tensor* output, bool allow_gpu_device = false,
int stride = 1) {
Scope root = tensorflow::Scope::NewRootScope(); Scope root = tensorflow::Scope::NewRootScope();
ops::Conv2D conv = ops::Conv2D( ops::Conv2D conv = ops::Conv2D(
root.WithOpName("conv"), root.WithOpName("conv"),
ops::Const(root.WithOpName("input"), Input::Initializer(input_data)), ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)), ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
{1, stride, stride, 1}, "SAME"); {1, stride, stride, 1}, padding,
ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
ops::BiasAdd with_bias = ops::BiasAdd( ops::BiasAdd with_bias = ops::BiasAdd(
root.WithOpName("with_bias"), conv, root.WithOpName("with_bias"), conv,
@ -626,15 +629,17 @@ class FusedConv2DOpTest : public OpsTestBase {
void RunConv2DWithBiasAndActivation( void RunConv2DWithBiasAndActivation(
const Tensor& input_data, const Tensor& filter_data, const Tensor& input_data, const Tensor& filter_data,
const Tensor& bias_data, const string& activation_type, Tensor* output, const Tensor& bias_data, const string& activation_type,
bool allow_gpu_device = false, int stride = 1) { const std::string& padding, const std::vector<int>& explicit_paddings,
Tensor* output, bool allow_gpu_device = false, int stride = 1) {
Scope root = tensorflow::Scope::NewRootScope(); Scope root = tensorflow::Scope::NewRootScope();
ops::Conv2D conv = ops::Conv2D( ops::Conv2D conv = ops::Conv2D(
root.WithOpName("conv"), root.WithOpName("conv"),
ops::Const(root.WithOpName("input"), Input::Initializer(input_data)), ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)), ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
{1, stride, stride, 1}, "SAME"); {1, stride, stride, 1}, padding,
ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
ops::BiasAdd with_bias = ops::BiasAdd( ops::BiasAdd with_bias = ops::BiasAdd(
root.WithOpName("with_bias"), conv, root.WithOpName("with_bias"), conv,
@ -653,20 +658,20 @@ class FusedConv2DOpTest : public OpsTestBase {
RunAndFetch(root, "with_activation", output, allow_gpu_device); RunAndFetch(root, "with_activation", output, allow_gpu_device);
} }
void RunConv2DWithBatchNorm(const Tensor& input_data, void RunConv2DWithBatchNorm(
const Tensor& filter_data, const Tensor& input_data, const Tensor& filter_data,
const Tensor& scale_data, const Tensor& scale_data, const Tensor& offset_data,
const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data,
const Tensor& mean_data, const std::string& padding, const std::vector<int>& explicit_paddings,
const Tensor& variance_data, Tensor* output, Tensor* output, bool allow_gpu_device = false, int stride = 1) {
bool allow_gpu_device = false, int stride = 1) {
Scope root = tensorflow::Scope::NewRootScope(); Scope root = tensorflow::Scope::NewRootScope();
ops::Conv2D conv = ops::Conv2D( ops::Conv2D conv = ops::Conv2D(
root.WithOpName("conv"), root.WithOpName("conv"),
ops::Const(root.WithOpName("input"), Input::Initializer(input_data)), ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)), ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
{1, stride, stride, 1}, "SAME"); {1, stride, stride, 1}, padding,
ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
ops::FusedBatchNorm::Attrs attr; ops::FusedBatchNorm::Attrs attr;
attr = attr.IsTraining(false); attr = attr.IsTraining(false);
@ -686,7 +691,8 @@ class FusedConv2DOpTest : public OpsTestBase {
const Tensor& input_data, const Tensor& filter_data, const Tensor& input_data, const Tensor& filter_data,
const Tensor& scale_data, const Tensor& offset_data, const Tensor& scale_data, const Tensor& offset_data,
const Tensor& mean_data, const Tensor& variance_data, const Tensor& mean_data, const Tensor& variance_data,
const string& activation_type, Tensor* output, const string& activation_type, const std::string& padding,
const std::vector<int>& explicit_paddings, Tensor* output,
bool allow_gpu_device = false, int stride = 1) { bool allow_gpu_device = false, int stride = 1) {
Scope root = tensorflow::Scope::NewRootScope(); Scope root = tensorflow::Scope::NewRootScope();
@ -694,7 +700,8 @@ class FusedConv2DOpTest : public OpsTestBase {
root.WithOpName("conv"), root.WithOpName("conv"),
ops::Const(root.WithOpName("input"), Input::Initializer(input_data)), ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)), ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
{1, stride, stride, 1}, "SAME"); {1, stride, stride, 1}, padding,
ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
ops::FusedBatchNorm::Attrs attr; ops::FusedBatchNorm::Attrs attr;
attr = attr.IsTraining(false); attr = attr.IsTraining(false);
@ -723,8 +730,11 @@ class FusedConv2DOpTest : public OpsTestBase {
void RunFusedConv2DOp(const Tensor& input_data, const Tensor& filter_data, void RunFusedConv2DOp(const Tensor& input_data, const Tensor& filter_data,
const std::vector<Tensor>& args_data, const std::vector<Tensor>& args_data,
const std::vector<string>& fused_ops, Tensor* output, const std::vector<string>& fused_ops,
bool allow_gpu_device = false, int stride = 1) { const std::string& padding,
const std::vector<int>& explicit_paddings,
Tensor* output, bool allow_gpu_device = false,
int stride = 1) {
Scope root = tensorflow::Scope::NewRootScope(); Scope root = tensorflow::Scope::NewRootScope();
DataType dtype = DataTypeToEnum<T>::v(); DataType dtype = DataTypeToEnum<T>::v();
@ -750,7 +760,8 @@ class FusedConv2DOpTest : public OpsTestBase {
.Attr("num_args", num_args) .Attr("num_args", num_args)
.Attr("T", dtype) .Attr("T", dtype)
.Attr("strides", {1, stride, stride, 1}) .Attr("strides", {1, stride, stride, 1})
.Attr("padding", "SAME") .Attr("padding", padding)
.Attr("explicit_paddings", explicit_paddings)
.Attr("fused_ops", fused_ops) .Attr("fused_ops", fused_ops)
.Finalize(&fused_conv2d)); .Finalize(&fused_conv2d));
@ -851,20 +862,25 @@ class FusedConv2DOpTest : public OpsTestBase {
// Verifies that computing Conv2D+BiasAdd in a graph is identical to // Verifies that computing Conv2D+BiasAdd in a graph is identical to
// FusedConv2D. // FusedConv2D.
void VerifyConv2DWithBias(int filter_size, int filter_count, void VerifyConv2DWithBias(int filter_size, int filter_count,
const std::vector<int>& explicit_paddings = {},
int depth = kDepth, int image_width = kImageWidth, int depth = kDepth, int image_width = kImageWidth,
int image_height = kImageHeight, int image_height = kImageHeight,
int image_batch_count = kImageBatchCount) { int image_batch_count = kImageBatchCount) {
std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
const BiasAddGraphRunner run_default = const BiasAddGraphRunner run_default =
[this](const Tensor& input_data, const Tensor& filter_data, [this, &explicit_paddings, padding](
const Tensor& input_data, const Tensor& filter_data,
const Tensor& bias_data, Tensor* out) { const Tensor& bias_data, Tensor* out) {
RunConv2DWithBias(input_data, filter_data, bias_data, out); RunConv2DWithBias(input_data, filter_data, bias_data, padding,
explicit_paddings, out);
}; };
const BiasAddGraphRunner run_fused = [this](const Tensor& input_data, const BiasAddGraphRunner run_fused =
const Tensor& filter_data, [this, explicit_paddings, padding](
const Tensor& bias_data, const Tensor& input_data, const Tensor& filter_data,
Tensor* out) { const Tensor& bias_data, Tensor* out) {
RunFusedConv2DOp(input_data, filter_data, {bias_data}, {"BiasAdd"}, out); RunFusedConv2DOp(input_data, filter_data, {bias_data}, {"BiasAdd"},
padding, explicit_paddings, out);
}; };
VerifyBiasAddTensorsNear(depth, image_width, image_height, VerifyBiasAddTensorsNear(depth, image_width, image_height,
@ -876,22 +892,27 @@ class FusedConv2DOpTest : public OpsTestBase {
// to FusedConv2D. // to FusedConv2D.
void VerifyConv2DWithBiasAndActivation( void VerifyConv2DWithBiasAndActivation(
const string& activation, int filter_size, int filter_count, const string& activation, int filter_size, int filter_count,
int depth = kDepth, int image_width = kImageWidth, const std::vector<int>& explicit_paddings = {}, int depth = kDepth,
int image_height = kImageHeight, int image_width = kImageWidth, int image_height = kImageHeight,
int image_batch_count = kImageBatchCount) { int image_batch_count = kImageBatchCount) {
std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
const BiasAddGraphRunner run_default = const BiasAddGraphRunner run_default =
[this, &activation](const Tensor& input_data, const Tensor& filter_data, [this, &activation, &explicit_paddings, &padding](
const Tensor& input_data, const Tensor& filter_data,
const Tensor& bias_data, Tensor* out) { const Tensor& bias_data, Tensor* out) {
RunConv2DWithBiasAndActivation( RunConv2DWithBiasAndActivation(
input_data, filter_data, bias_data, activation, out, input_data, filter_data, bias_data, activation, padding,
explicit_paddings, out,
/*allow_gpu_device=*/activation == "Relu"); /*allow_gpu_device=*/activation == "Relu");
}; };
const BiasAddGraphRunner run_fused = const BiasAddGraphRunner run_fused = [this, &activation, &explicit_paddings,
[this, &activation](const Tensor& input_data, const Tensor& filter_data, padding](const Tensor& input_data,
const Tensor& bias_data, Tensor* out) { const Tensor& filter_data,
const Tensor& bias_data,
Tensor* out) {
RunFusedConv2DOp(input_data, filter_data, {bias_data}, RunFusedConv2DOp(input_data, filter_data, {bias_data},
{"BiasAdd", activation}, out, {"BiasAdd", activation}, padding, explicit_paddings, out,
/*allow_gpu_device=*/activation == "Relu"); /*allow_gpu_device=*/activation == "Relu");
}; };
@ -903,27 +924,30 @@ class FusedConv2DOpTest : public OpsTestBase {
// Verifies that computing Conv2D+FusedBatchNorm in a graph is identical to // Verifies that computing Conv2D+FusedBatchNorm in a graph is identical to
// FusedConv2D. // FusedConv2D.
void VerifyConv2DWithBatchNorm(int filter_size, int filter_count, void VerifyConv2DWithBatchNorm(int filter_size, int filter_count,
const std::vector<int>& explicit_paddings = {},
int depth = kDepth, int depth = kDepth,
int image_width = kImageWidth, int image_width = kImageWidth,
int image_height = kImageHeight, int image_height = kImageHeight,
int image_batch_count = kImageBatchCount) { int image_batch_count = kImageBatchCount) {
std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
const BatchNormGraphRunner run_default = const BatchNormGraphRunner run_default =
[this](const Tensor& input_data, const Tensor& filter_data, [this, explicit_paddings, padding](
const Tensor& input_data, const Tensor& filter_data,
const Tensor& scale_data, const Tensor& offset_data, const Tensor& scale_data, const Tensor& offset_data,
const Tensor& mean_data, const Tensor& variance_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
Tensor* out) {
RunConv2DWithBatchNorm(input_data, filter_data, scale_data, RunConv2DWithBatchNorm(input_data, filter_data, scale_data,
offset_data, mean_data, variance_data, out); offset_data, mean_data, variance_data, padding,
explicit_paddings, out);
}; };
const BatchNormGraphRunner run_fused = const BatchNormGraphRunner run_fused =
[this](const Tensor& input_data, const Tensor& filter_data, [this, explicit_paddings, padding](
const Tensor& input_data, const Tensor& filter_data,
const Tensor& scale_data, const Tensor& offset_data, const Tensor& scale_data, const Tensor& offset_data,
const Tensor& mean_data, const Tensor& variance_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
Tensor* out) {
RunFusedConv2DOp(input_data, filter_data, RunFusedConv2DOp(input_data, filter_data,
{scale_data, offset_data, mean_data, variance_data}, {scale_data, offset_data, mean_data, variance_data},
{"FusedBatchNorm"}, out); {"FusedBatchNorm"}, padding, explicit_paddings, out);
}; };
VerifyFusedBatchNormTensorsNear(depth, image_width, image_height, VerifyFusedBatchNormTensorsNear(depth, image_width, image_height,
@ -935,27 +959,29 @@ class FusedConv2DOpTest : public OpsTestBase {
// identical to FusedConv2D. // identical to FusedConv2D.
void VerifyConv2DWithBatchNormAndActivation( void VerifyConv2DWithBatchNormAndActivation(
const string& activation, int filter_size, int filter_count, const string& activation, int filter_size, int filter_count,
int depth = kDepth, int image_width = kImageWidth, const std::vector<int>& explicit_paddings = {}, int depth = kDepth,
int image_height = kImageHeight, int image_width = kImageWidth, int image_height = kImageHeight,
int image_batch_count = kImageBatchCount) { int image_batch_count = kImageBatchCount) {
std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
const BatchNormGraphRunner run_default = const BatchNormGraphRunner run_default =
[this, &activation](const Tensor& input_data, const Tensor& filter_data, [this, &activation, explicit_paddings, padding](
const Tensor& input_data, const Tensor& filter_data,
const Tensor& scale_data, const Tensor& offset_data, const Tensor& scale_data, const Tensor& offset_data,
const Tensor& mean_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
const Tensor& variance_data, Tensor* out) {
RunConv2DWithBatchNormAndActivation( RunConv2DWithBatchNormAndActivation(
input_data, filter_data, scale_data, offset_data, mean_data, input_data, filter_data, scale_data, offset_data, mean_data,
variance_data, activation, out); variance_data, activation, padding, explicit_paddings, out);
}; };
const BatchNormGraphRunner run_fused = const BatchNormGraphRunner run_fused =
[this, &activation](const Tensor& input_data, const Tensor& filter_data, [this, &activation, explicit_paddings, padding](
const Tensor& input_data, const Tensor& filter_data,
const Tensor& scale_data, const Tensor& offset_data, const Tensor& scale_data, const Tensor& offset_data,
const Tensor& mean_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
const Tensor& variance_data, Tensor* out) {
RunFusedConv2DOp(input_data, filter_data, RunFusedConv2DOp(input_data, filter_data,
{scale_data, offset_data, mean_data, variance_data}, {scale_data, offset_data, mean_data, variance_data},
{"FusedBatchNorm", activation}, out); {"FusedBatchNorm", activation}, padding,
explicit_paddings, out);
}; };
VerifyFusedBatchNormTensorsNear(depth, image_width, image_height, VerifyFusedBatchNormTensorsNear(depth, image_width, image_height,
@ -997,6 +1023,13 @@ TYPED_TEST_P(FusedConv2DWithBiasOpTest, SpatialConvolution) {
this->VerifyConv2DWithBias(filter_size, filter_count); this->VerifyConv2DWithBias(filter_size, filter_count);
} }
TYPED_TEST_P(FusedConv2DWithBiasOpTest, ExplicitPaddingConvolution) {
const int filter_size = 3;
const int filter_count = 12;
this->VerifyConv2DWithBias(filter_size, filter_count,
/*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
}
TYPED_TEST_P(FusedConv2DWithBiasOpTest, OneByOneConvolutionAndActivation) { TYPED_TEST_P(FusedConv2DWithBiasOpTest, OneByOneConvolutionAndActivation) {
const int filter_size = 1; const int filter_size = 1;
const int filter_count = 12; const int filter_count = 12;
@ -1024,6 +1057,17 @@ TYPED_TEST_P(FusedConv2DWithBiasOpTest, SpatialConvolutionAndActivation) {
} }
} }
TYPED_TEST_P(FusedConv2DWithBiasOpTest,
ExplicitPaddingConvolutionAndActivation) {
const int filter_size = 3;
const int filter_count = 12;
for (const string& activation : {"Relu", "Relu6", "Elu"}) {
this->VerifyConv2DWithBiasAndActivation(
activation, filter_size, filter_count,
/*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
}
}
// -------------------------------------------------------------------------- // // -------------------------------------------------------------------------- //
// Conv2D + FusedBatchNorm + {Activation} // // Conv2D + FusedBatchNorm + {Activation} //
// -------------------------------------------------------------------------- // // -------------------------------------------------------------------------- //
@ -1046,6 +1090,14 @@ TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, SpatialConvolution) {
this->VerifyConv2DWithBatchNorm(filter_size, filter_count); this->VerifyConv2DWithBatchNorm(filter_size, filter_count);
} }
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, ExplicitPaddingConvolution) {
const int filter_size = 3;
const int filter_count = 12;
this->VerifyConv2DWithBatchNorm(
filter_size, filter_count,
/*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
}
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, OneByOneConvolutionAndActivation) { TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, OneByOneConvolutionAndActivation) {
const int filter_size = 1; const int filter_size = 1;
const int filter_count = 12; const int filter_count = 12;
@ -1074,21 +1126,36 @@ TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, SpatialConvolutionAndActivation) {
} }
} }
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,
ExplicitPaddingConvolutionAndActivation) {
const int filter_size = 3;
const int filter_count = 12;
for (const string& activation : {"Relu", "Relu6", "Elu"}) {
this->VerifyConv2DWithBatchNormAndActivation(
activation, filter_size, filter_count,
/*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
}
}
REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest, // REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest, //
OneByOneConvolution, // OneByOneConvolution, //
ImageSizeConvolution, // ImageSizeConvolution, //
SpatialConvolution, // SpatialConvolution, //
ExplicitPaddingConvolution, //
OneByOneConvolutionAndActivation, // OneByOneConvolutionAndActivation, //
ImageSizeConvolutionAndActivation, // ImageSizeConvolutionAndActivation, //
SpatialConvolutionAndActivation); SpatialConvolutionAndActivation, //
ExplicitPaddingConvolutionAndActivation);
REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest, // REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest, //
OneByOneConvolution, // OneByOneConvolution, //
ImageSizeConvolution, // ImageSizeConvolution, //
SpatialConvolution, // SpatialConvolution, //
ExplicitPaddingConvolution, //
OneByOneConvolutionAndActivation, // OneByOneConvolutionAndActivation, //
ImageSizeConvolutionAndActivation, // ImageSizeConvolutionAndActivation, //
SpatialConvolutionAndActivation); SpatialConvolutionAndActivation, //
ExplicitPaddingConvolutionAndActivation);
using FusedBiasAddDataTypes = ::testing::Types<float, double>; using FusedBiasAddDataTypes = ::testing::Types<float, double>;
INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedConv2DWithBiasOpTest, INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedConv2DWithBiasOpTest,

View File

@ -327,7 +327,8 @@ REGISTER_OP("_FusedConv2D")
.Attr("T: {float, double}") .Attr("T: {float, double}")
.Attr("num_args: int >= 0") .Attr("num_args: int >= 0")
.Attr("strides: list(int)") .Attr("strides: list(int)")
.Attr(GetPaddingAttrString()) .Attr(GetPaddingAttrStringWithExplicit())
.Attr(GetExplicitPaddingsAttrString())
.Attr(GetConvnetDataFormatAttrString()) .Attr(GetConvnetDataFormatAttrString())
.Attr("dilations: list(int) = [1, 1, 1, 1]") .Attr("dilations: list(int) = [1, 1, 1, 1]")
.Attr("use_cudnn_on_gpu: bool = true") .Attr("use_cudnn_on_gpu: bool = true")
@ -335,7 +336,7 @@ REGISTER_OP("_FusedConv2D")
// Attributes for the FusedBatchNorm ------------------------------------ // // Attributes for the FusedBatchNorm ------------------------------------ //
.Attr("epsilon: float = 0.0001") .Attr("epsilon: float = 0.0001")
// ---------------------------------------------------------------------- // // ---------------------------------------------------------------------- //
.SetShapeFn(shape_inference::Conv2DShape) .SetShapeFn(shape_inference::Conv2DShapeWithExplicitPadding)
.Doc(R"doc( .Doc(R"doc(
*NOTE*: Do not invoke this operator directly in Python. Grappler is *NOTE*: Do not invoke this operator directly in Python. Grappler is
expected to create these operators. expected to create these operators.