From 8fa9b949dcfad2c3fd271fb87ac8b94869453be4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Oct 2016 20:28:41 -0800
Subject: [PATCH] Add data_format support for N-D convolution and pooling.

This adds support for "NC*" data layouts for N-D convolution and
pooling (including atrous convolution and pooling); previously, only
"N*C" data lyaouts were supported.

This also adds support for 1-D pooling (by forwarding to the 2-D
implementation), and fixes the broken data_format support in
conv1d.
Change: 136556507
---
 .../kernel_tests/atrous_convolution_test.py   |  26 +-
 tensorflow/python/kernel_tests/pool_test.py   | 115 ++++-
 tensorflow/python/ops/nn_ops.py               | 419 ++++++++++++++----
 3 files changed, 453 insertions(+), 107 deletions(-)

diff --git a/tensorflow/python/kernel_tests/atrous_convolution_test.py b/tensorflow/python/kernel_tests/atrous_convolution_test.py
index ca3e04fc180..3bd076b5bbe 100644
--- a/tensorflow/python/kernel_tests/atrous_convolution_test.py
+++ b/tensorflow/python/kernel_tests/atrous_convolution_test.py
@@ -48,15 +48,15 @@ def upsample_filters(filters, rate):
 
 class AtrousConvolutionTest(tf.test.TestCase):
 
-  def _test_atrous_convolution(self, input_shape, filter_shape, padding,
-                               dilation_rate):
+  def _test_atrous_convolution(self, input_shape, filter_shape, dilation_rate,
+                               **kwargs):
     filters = np.arange(
         np.prod(filter_shape), dtype=np.float32).reshape(filter_shape)
     filters_upsampled = upsample_filters(filters, dilation_rate)
     x = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape)
     y1 = tf.nn.convolution(
-        input=x, filter=filters, padding=padding, dilation_rate=dilation_rate)
-    y2 = tf.nn.convolution(input=x, filter=filters_upsampled, padding=padding)
+        input=x, filter=filters, dilation_rate=dilation_rate, **kwargs)
+    y2 = tf.nn.convolution(input=x, filter=filters_upsampled, **kwargs)
     self.assertAllClose(y1.eval(), y2.eval(), rtol=1e-2, atol=1e-2)
 
   def testAtrousConvolution2D(self):
@@ -99,6 +99,24 @@ class AtrousConvolutionTest(tf.test.TestCase):
                   padding=padding,
                   dilation_rate=[rate])
 
+  def testAtrousConvolutionNC(self):
+    if tf.test.is_gpu_available():
+      # "NCW" and "NCHW" formats are not currently supported on CPU.
+      with self.test_session(use_gpu=True):
+        for padding in ["SAME", "VALID"]:
+          self._test_atrous_convolution(
+              input_shape=[2, 2, 9],
+              padding=padding,
+              filter_shape=[3, 2, 2],
+              dilation_rate=[2],
+              data_format="NCW")
+          self._test_atrous_convolution(
+              input_shape=[2, 2, 9, 5],
+              padding=padding,
+              filter_shape=[3, 3, 2, 2],
+              dilation_rate=[2, 1],
+              data_format="NCHW")
+
   def testAtrousSequence(self):
     """Tests optimization of sequence of atrous convolutions.
 
diff --git a/tensorflow/python/kernel_tests/pool_test.py b/tensorflow/python/kernel_tests/pool_test.py
index 56d3000ad30..8136efe9361 100644
--- a/tensorflow/python/kernel_tests/pool_test.py
+++ b/tensorflow/python/kernel_tests/pool_test.py
@@ -89,7 +89,7 @@ def pool_direct_single_axis(input,  # pylint: disable=redefined-builtin
 
 
 def pool_direct(input, window_shape, pooling_type, padding,  # pylint: disable=redefined-builtin
-                dilation_rate, strides):
+                dilation_rate, strides, data_format=None):
   """Numpy implementation of pooling.
 
   This is intended for testing only, and therefore isn't particularly efficient.
@@ -103,6 +103,8 @@ def pool_direct(input, window_shape, pooling_type, padding,  # pylint: disable=r
     padding: either "SAME" or "VALID".
     dilation_rate: Sequence of N ints >= 1.
     strides: Sequence of N ints >= 1.
+    data_format: If specified and starts with "NC", indicates that second
+      dimension, rather than the last dimension, specifies the channel.
 
   Returns:
     pooling output array of rank N+2.
@@ -110,11 +112,15 @@ def pool_direct(input, window_shape, pooling_type, padding,  # pylint: disable=r
   Raises:
     ValueError: if arguments are invalid.
   """
+  if data_format is None or not data_format.startswith("NC"):
+    spatial_start_dim = 1
+  else:
+    spatial_start_dim = 2
   output = input
   for i in range(len(window_shape)):
     output = pool_direct_single_axis(
         input=output,
-        axis=i + 1,
+        axis=i + spatial_start_dim,
         window_size=window_shape[i],
         pooling_type=pooling_type,
         padding=padding,
@@ -125,26 +131,13 @@ def pool_direct(input, window_shape, pooling_type, padding,  # pylint: disable=r
 
 class PoolingTest(tf.test.TestCase):
 
-  def _test(self, input_shape, window_shape, pooling_type, padding,
-            dilation_rate, strides):
+  def _test(self, input_shape, **kwargs):
     # Use negative numbers to make sure there isn't any zero padding getting
     # used.
     x = -np.arange(
         np.prod(input_shape), dtype=np.float32).reshape(input_shape) - 1
-    y1 = pool_direct(
-        input=x,
-        window_shape=window_shape,
-        pooling_type=pooling_type,
-        padding=padding,
-        dilation_rate=dilation_rate,
-        strides=strides)
-    y2 = tf.nn.pool(
-        input=x,
-        window_shape=window_shape,
-        pooling_type=pooling_type,
-        padding=padding,
-        dilation_rate=dilation_rate,
-        strides=strides)
+    y1 = pool_direct(input=x, **kwargs)
+    y2 = tf.nn.pool(input=x, **kwargs)
     self.assertAllClose(y1, y2.eval(), rtol=1e-2, atol=1e-2)
 
   def testPoolSimple(self):
@@ -159,6 +152,32 @@ class PoolingTest(tf.test.TestCase):
               dilation_rate=[1, 1],
               strides=[1, 2])
 
+  def testPool1D(self):
+    with self.test_session():
+      for padding in ["SAME", "VALID"]:
+        for pooling_type in ["MAX", "AVG"]:
+          for input_shape in [[2, 9, 2], [2, 10, 2]]:
+            for window_shape in [[1], [2], [3]]:
+              if padding != "SAME":
+                for dilation_rate in [[1], [2], [3]]:
+                  self._test(
+                      input_shape=input_shape,
+                      window_shape=window_shape,
+                      padding=padding,
+                      pooling_type=pooling_type,
+                      dilation_rate=dilation_rate,
+                      strides=[1])
+              for strides in [[1], [2], [3]]:
+                if np.any(np.array(strides) > window_shape):
+                  continue
+                self._test(
+                    input_shape=input_shape,
+                    window_shape=window_shape,
+                    padding=padding,
+                    pooling_type=pooling_type,
+                    dilation_rate=[1],
+                    strides=strides)
+
   def testPool2D(self):
     with self.test_session():
       for padding in ["SAME", "VALID"]:
@@ -212,6 +231,40 @@ class PoolingTest(tf.test.TestCase):
                     dilation_rate=[1, 1, 1],
                     strides=strides)
 
+  def testPoolNC(self):
+    if tf.test.is_gpu_available():
+      # "NC*" format is not currently supported on CPU.
+      with self.test_session(use_gpu=True):
+        for padding in ["SAME", "VALID"]:
+          self._test(input_shape=[2, 2, 9],
+                     window_shape=[2],
+                     padding=padding,
+                     pooling_type="MAX",
+                     strides=[1],
+                     dilation_rate=[1],
+                     data_format="NCW")
+          self._test(input_shape=[2, 2, 9],
+                     window_shape=[2],
+                     padding=padding,
+                     pooling_type="MAX",
+                     strides=[2],
+                     dilation_rate=[1],
+                     data_format="NCW")
+          self._test(input_shape=[2, 2, 7, 9],
+                     window_shape=[2, 2],
+                     padding=padding,
+                     pooling_type="MAX",
+                     strides=[1, 2],
+                     dilation_rate=[1, 1],
+                     data_format="NCHW")
+        self._test(input_shape=[2, 2, 7, 9],
+                   window_shape=[2, 2],
+                   padding="VALID",
+                   pooling_type="MAX",
+                   strides=[1, 1],
+                   dilation_rate=[2, 2],
+                   data_format="NCHW")
+
   def _test_gradient(self, input_shape, **kwargs):
     x_val = -np.arange(
         np.prod(input_shape), dtype=np.float32).reshape(input_shape) - 1
@@ -224,6 +277,32 @@ class PoolingTest(tf.test.TestCase):
     err_tolerance = 1e-2
     self.assertLess(err, err_tolerance)
 
+  def testGradient1D(self):
+    with self.test_session():
+      for padding in ["SAME", "VALID"]:
+        for pooling_type in ["AVG", "MAX"]:
+          for input_shape in [[2, 5, 2], [1, 4, 1]]:
+            for window_shape in [[1], [2]]:
+              if padding != "SAME":
+                for dilation_rate in [[1], [2]]:
+                  self._test_gradient(
+                      input_shape=input_shape,
+                      window_shape=window_shape,
+                      padding=padding,
+                      pooling_type=pooling_type,
+                      dilation_rate=dilation_rate,
+                      strides=[1])
+              for strides in [[1], [2]]:
+                if np.any(np.array(strides) > window_shape):
+                  continue
+                self._test(
+                    input_shape=input_shape,
+                    window_shape=window_shape,
+                    padding=padding,
+                    pooling_type=pooling_type,
+                    dilation_rate=[1],
+                    strides=strides)
+
   def testGradient2D(self):
     with self.test_session():
       for padding in ["SAME", "VALID"]:
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 32afa8b7c5b..a49f57c8f7d 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -42,7 +42,8 @@ from tensorflow.python.ops.gen_nn_ops import *
 local_response_normalization = gen_nn_ops.lrn
 
 
-def _non_atrous_convolution(input, filter, padding, strides=None, name=None):  # pylint: disable=redefined-builtin
+def _non_atrous_convolution(input, filter, padding, data_format=None,  # pylint: disable=redefined-builtin
+                            strides=None, name=None):
   """Computes sums of N-D convolutions (actually cross correlation).
 
   It is required that 1 <= N <= 3.
@@ -51,12 +52,22 @@ def _non_atrous_convolution(input, filter, padding, strides=None, name=None):  #
   extends the interface of this function with a `dilation_rate` parameter.
 
   Args:
+
     input: Rank N+2 tensor of type T of shape
-      `[batch_size] + input_spatial_shape + [in_channels]`.
+      `[batch_size] + input_spatial_shape + [in_channels]` if `data_format`
+      does not start with `"NC"`, or
+      `[batch_size, in_channels] + input_spatial_shape` if `data_format` starts
+      with `"NC"`.
     filter: Rank N+2 tensor of type T of shape
       `filter_spatial_shape + [in_channels, out_channels]`.  Rank of either
       `input` or `filter` must be known.
     padding: Padding method to use, must be either "VALID" or "SAME".
+    data_format: A string or None.  Specifies whether the channel dimension of
+      the `input` and output is the last dimension (default, or if `data_format`
+      does not start with "NC"), or the second dimension (if `data_format`
+      starts with "NC").  For N=1, the valid values are "NWC" (default) and
+      "NCW".  For N=2, the valid values are "NHWC" (default) and "NCHW".  For
+      N=3, the valid value is "NDHWC".
     strides: Sequence of N positive integers, defaults to `[1] * N`.
     name: Name prefix to use.
 
@@ -89,26 +100,50 @@ def _non_atrous_convolution(input, filter, padding, strides=None, name=None):  #
       raise ValueError("len(strides)=%d, but should be %d" %
                        (len(strides), conv_dims))
     if conv_dims == 1:
-      return conv1d(value=input,
-                    filters=filter,
-                    stride=strides[0],
-                    padding=padding,
-                    name=scope)
+      # conv1d uses the 2-d data format names
+      if data_format is None or data_format == "NWC":
+        data_format_2d = "NHWC"
+      elif data_format == "NCW":
+        data_format_2d = "NCHW"
+      else:
+        raise ValueError("data_format must be \"NWC\" or \"NCW\".")
+      return conv1d(
+          value=input,
+          filters=filter,
+          stride=strides[0],
+          padding=padding,
+          data_format=data_format_2d,
+          name=scope)
     elif conv_dims == 2:
-      return gen_nn_ops.conv2d(input=input,
-                               filter=filter,
-                               strides=[1] + list(strides) + [1],
-                               padding=padding,
-                               name=name)
+      if data_format is None or data_format == "NHWC":
+        data_format = "NHWC"
+        strides = [1] + list(strides) + [1]
+      elif data_format == "NCHW":
+        strides = [1, 1] + list(strides)
+      else:
+        raise ValueError("data_format must be \"NHWC\" or \"NCHW\".")
+      return gen_nn_ops.conv2d(
+          input=input,
+          filter=filter,
+          strides=strides,
+          padding=padding,
+          data_format=data_format,
+          name=name)
     elif conv_dims == 3:
-      return gen_nn_ops.conv3d(input=input,
-                               filter=filter,
-                               strides=[1] + list(strides) + [1],
-                               padding=padding,
-                               name=name)
+      if data_format is None or data_format == "NDHWC":
+        strides = [1] + list(strides) + [1]
+      else:
+        raise ValueError("data_format must be \"NDHWC\".")
+      return gen_nn_ops.conv3d(
+          input=input,
+          filter=filter,
+          strides=strides,
+          padding=padding,
+          name=name)
 
 
-def with_space_to_batch(input, dilation_rate, padding, op, filter_shape=None):  # pylint: disable=redefined-builtin
+def with_space_to_batch(input, dilation_rate, padding, op, filter_shape=None,  # pylint: disable=redefined-builtin
+                        spatial_dims=None):
   """Performs `op` on the space-to-batch representation of `input`.
 
   This has the effect of transforming sliding window operations into the
@@ -122,19 +157,27 @@ def with_space_to_batch(input, dilation_rate, padding, op, filter_shape=None):
   Otherwise, it returns:
 
     batch_to_space_nd(
-      op(space_to_batch_nd(input, dilation_rate, paddings),
+      op(space_to_batch_nd(input, adjusted_dilation_rate, adjusted_paddings),
          num_spatial_dims,
          "VALID")
-      dilation_rate,
-      crops),
+      adjusted_dilation_rate,
+      adjusted_crops),
 
-  where `paddings` and `crops` are int32 [num_spatial_dims, 2] tensors that
-  depend on the value of `padding`:
+  where:
+
+    adjusted_dilation_rate is an int64 tensor of shape [max(spatial_dims)],
+    adjusted_{paddings,crops} are int64 tensors of shape [max(spatial_dims), 2]
+
+  defined as follows:
+
+  We first define two int64 tensors `paddings` and `crops` of shape
+  `[num_spatial_dims, 2]` based on the value of `padding` and the spatial
+  dimensions of the `input`:
 
   If `padding = "VALID"`, then:
 
     paddings, crops = required_space_to_batch_paddings(
-      input_shape[1:num_spatial_dims+1],
+      input_shape[spatial_dims],
       dilation_rate)
 
   If `padding = "SAME"`, then:
@@ -143,10 +186,30 @@ def with_space_to_batch(input, dilation_rate, padding, op, filter_shape=None):
       filter_shape + (filter_shape - 1) * (dilation_rate - 1)
 
     paddings, crops = required_space_to_batch_paddings(
-      input_shape[1:num_spatial_dims+1],
+      input_shape[spatial_dims],
+      dilation_rate,
       [(dilated_filter_shape - 1) // 2,
        dilated_filter_shape - 1 - (dilated_filter_shape - 1) // 2])
 
+  Because `space_to_batch_nd` and `batch_to_space_nd` assume that the spatial
+  dimensions are contiguous starting at the second dimension, but the specified
+  `spatial_dims` may not be, we must adjust `dilation_rate`, `paddings` and
+  `crops` in order to be usable with these operations.  For a given dimension,
+  if the block size is 1, and both the starting and ending padding and crop
+  amounts are 0, then space_to_batch_nd effectively leaves that dimension alone,
+  which is what is needed for dimensions not part of `spatial_dims`.
+  Furthermore, `space_to_batch_nd` and `batch_to_space_nd` handle this case
+  efficiently for any number of leading and trailing dimensions.
+
+  For 0 <= i < len(spatial_dims), we assign:
+
+    adjusted_dilation_rate[spatial_dims[i] - 1] = dilation_rate[i]
+    adjusted_paddings[spatial_dims[i] - 1, :] = paddings[i, :]
+    adjusted_crops[spatial_dims[i] - 1, :] = crops[i, :]
+
+  All unassigned values of `adjusted_dilation_rate` default to 1, while all
+  unassigned values of `adjusted_paddings` and `adjusted_crops` default to 0.
+
   Note in the case that `dilation_rate` is not uniformly 1, specifying "VALID"
   padding is equivalent to specifying `padding = "SAME"` with a filter_shape of
   `[1]*N`.
@@ -189,19 +252,23 @@ def with_space_to_batch(input, dilation_rate, padding, op, filter_shape=None):
     net = with_space_to_batch(net, dilation_rate, "VALID", combined_op)
 
   Args:
-    input: Tensor of rank >= 1 + num_spatial_dims.
+    input: Tensor of rank > max(spatial_dims).
     dilation_rate: int32 Tensor of *known* shape [num_spatial_dims].
     padding: str constant equal to "VALID" or "SAME"
     op: Function that maps (input, num_spatial_dims, padding) -> output
     filter_shape: If padding = "SAME", specifies the shape of the convolution
       kernel/pooling window as an integer Tensor of shape [>=num_spatial_dims].
       If padding = "VALID", filter_shape is ignored and need not be specified.
+    spatial_dims: Monotonically increasing sequence of `num_spatial_dims`
+      integers (which are >= 1) specifying the spatial dimensions of `input`
+      and output.  Defaults to: `range(1, num_spatial_dims+1)`.
 
   Returns:
     The output Tensor as described above.
 
   Raises:
-    ValueError: if padding is invalid or the arguments are incompatible.
+    ValueError: if `padding` is invalid or the arguments are incompatible.
+    ValueError: if `spatial_dims` are invalid.
 
   """
   input = ops.convert_to_tensor(input, name="input")
@@ -218,18 +285,27 @@ def with_space_to_batch(input, dilation_rate, padding, op, filter_shape=None):
 
   num_spatial_dims = rate_shape[0].value
 
+  if spatial_dims is None:
+    spatial_dims = range(1, num_spatial_dims + 1)
+  orig_spatial_dims = list(spatial_dims)
+  spatial_dims = sorted(set(int(x) for x in orig_spatial_dims))
+  if spatial_dims != orig_spatial_dims or any(x < 1 for x in spatial_dims):
+    raise ValueError(
+        "spatial_dims must be a montonically increasing sequence of positive integers")  # pylint: disable=line-too-long
+  last_spatial_dim = spatial_dims[-1]
+
   try:
-    input.get_shape().with_rank_at_least(num_spatial_dims + 1)
+    input.get_shape().with_rank_at_least(last_spatial_dim + 1)
   except ValueError:
     ValueError("input tensor must have rank %d at least" %
-               (num_spatial_dims + 1))
+               (last_spatial_dim + 1))
 
   const_rate = tensor_util.constant_value(dilation_rate)
   rate_or_const_rate = dilation_rate
   if const_rate is not None:
     rate_or_const_rate = const_rate
     if np.any(const_rate < 1):
-      raise ValueError("rate must be positive")
+      raise ValueError("dilation_rate must be positive")
     if np.all(const_rate == 1):
       return op(input, num_spatial_dims, padding)
 
@@ -266,26 +342,88 @@ def with_space_to_batch(input, dilation_rate, padding, op, filter_shape=None):
     raise ValueError("Invalid padding method %r" % padding)
 
   # Handle input whose shape is unknown during graph creation.
-  if input.get_shape().is_fully_defined():
-    input_shape = np.array(input.get_shape().as_list())
-  else:
-    input_shape = array_ops.shape(input)
+  input_spatial_shape = None
+  if input.get_shape().ndims is not None:
+    input_shape_list = input.get_shape().as_list()
+    input_spatial_shape = [input_shape_list[i] for i in spatial_dims]
+  if input_spatial_shape is None or None in input_spatial_shape:
+    input_spatial_shape = array_ops.gather(array_ops.shape(input), spatial_dims)
 
-  input_spatial_shape = input_shape[1:num_spatial_dims+1]
   paddings, crops = array_ops.required_space_to_batch_paddings(
       input_shape=input_spatial_shape,
       base_paddings=base_paddings,
       block_shape=dilation_rate)
 
-  input_converted = array_ops.space_to_batch_nd(input=input,
-                                                block_shape=dilation_rate,
-                                                paddings=paddings)
+  def adjust(orig, fill_value):
+    """Returns an `adjusted` version of `orig` based on `spatial_dims`.
+
+    Tensor of the same type as `orig` and with shape
+    `[max(spatial_dims), ...]` where:
+
+      adjusted[spatial_dims[i] - 1, ...] = orig[i, ...]
+
+    for 0 <= i < len(spatial_dims), and
+
+      adjusted[j, ...] = fill_value
+
+    for j != spatial_dims[i] - 1 for some i.
+
+    If `orig` is a constant value, then the result will be a constant value.
+
+    Args:
+      orig: Tensor of rank > max(spatial_dims).
+      fill_value: Numpy scalar (of same data type as `orig) specifying the fill
+        value for non-spatial dimensions.
+
+    Returns:
+      `adjusted` tensor.
+    """
+    fill_dims = orig.get_shape().as_list()[1:]
+    dtype = orig.dtype.as_numpy_dtype
+    parts = []
+    const_orig = tensor_util.constant_value(orig)
+    const_or_orig = const_orig if const_orig is not None else orig
+    prev_spatial_dim = 0
+    i = 0
+    while i < len(spatial_dims):
+      start_i = i
+      start_spatial_dim = spatial_dims[i]
+      if start_spatial_dim > 1:
+        # Fill in any gap from the previous spatial dimension (or dimension 1 if
+        # this is the first spatial dimension) with `fill_value`.
+        parts.append(
+            np.full(
+                [start_spatial_dim - 1 - prev_spatial_dim] + fill_dims,
+                fill_value,
+                dtype=dtype))
+      # Find the largest value of i such that:
+      #   [spatial_dims[start_i], ..., spatial_dims[i]]
+      #     == [start_spatial_dim, ..., start_spatial_dim + i - start_i],
+      # i.e. the end of a contiguous group of spatial dimensions.
+      while (i + 1 < len(spatial_dims) and
+             spatial_dims[i + 1] == spatial_dims[i] + 1):
+        i += 1
+      parts.append(const_or_orig[start_i:i + 1])
+      prev_spatial_dim = spatial_dims[i]
+      i += 1
+    if const_orig is not None:
+      return np.concatenate(parts)
+    else:
+      return array_ops.concat(0, parts)
+
+  dilation_rate = adjust(dilation_rate, 1)
+  paddings = adjust(paddings, 0)
+  crops = adjust(crops, 0)
+
+  input_converted = array_ops.space_to_batch_nd(
+      input=input,
+      block_shape=dilation_rate,
+      paddings=paddings)
 
   result = op(input_converted, num_spatial_dims, "VALID")
 
-  result_converted = array_ops.batch_to_space_nd(input=result,
-                                                 block_shape=dilation_rate,
-                                                 crops=crops)
+  result_converted = array_ops.batch_to_space_nd(
+      input=result, block_shape=dilation_rate, crops=crops)
   return result_converted
 
 
@@ -333,7 +471,8 @@ def _get_strides_and_dilation_rate(num_spatial_dims, strides, dilation_rate):
 
 
 def convolution(input, filter,  # pylint: disable=redefined-builtin
-                padding, strides=None, dilation_rate=None, name=None):
+                padding, strides=None, dilation_rate=None,
+                name=None, data_format=None):
   # pylint: disable=line-too-long
   """Computes sums of N-D convolutions (actually cross-correlation).
 
@@ -343,7 +482,8 @@ def convolution(input, filter,  # pylint: disable=redefined-builtin
   the optional `dilation_rate` parameter.  Currently, however, output striding
   is not supported for atrous convolutions.
 
-  Specifically, given rank (N+2) `input` Tensor of shape
+  Specifically, in the case that `data_format` does not start with "NC", given
+  a rank (N+2) `input` Tensor of shape
 
     [num_batches,
      input_spatial_shape[0],
@@ -368,23 +508,34 @@ def convolution(input, filter,  # pylint: disable=redefined-builtin
 
         sum_{z[0], ..., z[N-1], q}
 
-            filters[z[0], ..., z[N-1], q, k] *
+            filter[z[0], ..., z[N-1], q, k] *
             padded_input[b,
                          x[0]*strides[0] + dilation_rate[0]*z[0],
                          ...,
                          x[N-1]*strides[N-1] + dilation_rate[N-1]*z[N-1],
-                         q],
+                         q]
 
   where `padded_input` is obtained by zero padding the input using an effective
   spatial filter shape of `(spatial_filter_shape-1) * dilation_rate + 1` and
   output striding `strides` as described in the
   [comment here](https://www.tensorflow.org/api_docs/python/nn.html#convolution).
 
+  In the case that `data_format` does start with `"NC"`, the `input` and output
+  (but not the `filter`) are simply transposed as follows:
+
+    convolution(input, data_format, **kwargs) =
+      tf.transpose(convolution(tf.transpose(input, [0] + range(2,N+2) + [1]),
+                               **kwargs),
+                   [0, N+1] + range(1, N+1))
+
   It is required that 1 <= N <= 3.
 
   Args:
     input: An N-D `Tensor` of type `T`, of shape
-      `[batch_size] + input_spatial_shape + [in_channels]`.
+      `[batch_size] + input_spatial_shape + [in_channels]` if data_format does
+      not start with "NC" (default), or
+      `[batch_size, in_channels] + input_spatial_shape` if data_format starts
+      with "NC".
     filter: An N-D `Tensor` with the same type as `input` and shape
       `spatial_filter_shape + [in_channels, out_channels]`.
     padding: A string, either `"VALID"` or `"SAME"`. The padding algorithm.
@@ -400,12 +551,23 @@ def convolution(input, filter,  # pylint: disable=redefined-builtin
       filter in each spatial dimension i.  If any value of dilation_rate is > 1,
       then all values of strides must be 1.
     name: Optional name for the returned tensor.
+    data_format: A string or None.  Specifies whether the channel dimension of
+      the `input` and output is the last dimension (default, or if `data_format`
+      does not start with "NC"), or the second dimension (if `data_format`
+      starts with "NC").  For N=1, the valid values are "NWC" (default) and
+      "NCW".  For N=2, the valid values are "NHWC" (default) and "NCHW".  For
+      N=3, the valid value is "NDHWC".
 
   Returns:
-    A `Tensor` with the same type as `value` of shape
+    A `Tensor` with the same type as `input` of shape
 
-        `[batch_size] + output_spatial_shape + [out_channels]`,
+        `[batch_size] + output_spatial_shape + [out_channels]`
 
+    if data_format is None or does not start with "NC", or
+
+        `[batch_size, out_channels] + output_spatial_shape`
+
+    if data_format starts with "NC",
     where `output_spatial_shape` depends on the value of `padding`.
 
     If padding == "SAME":
@@ -418,8 +580,8 @@ def convolution(input, filter,  # pylint: disable=redefined-builtin
              / strides[i]).
 
   Raises:
-    ValueError: If input/output depth does not match `filter` shape, or if
-      padding is other than `"VALID"` or `"SAME"`.
+    ValueError: If input/output depth does not match `filter` shape, if padding
+      is other than `"VALID"` or `"SAME"`, or if data_format is invalid.
 
   """
   # pylint: enable=line-too-long
@@ -444,12 +606,19 @@ def convolution(input, filter,  # pylint: disable=redefined-builtin
     except ValueError:
       ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2))
 
-    if not input.get_shape()[num_spatial_dims + 1].is_compatible_with(
-        filter.get_shape()[num_spatial_dims]):
+    if data_format is None or not data_format.startswith("NC"):
+      input_channels_dim = input.get_shape()[num_spatial_dims + 1]
+      spatial_dims = range(1, num_spatial_dims+1)
+    else:
+      input_channels_dim = input.get_shape()[1]
+      spatial_dims = range(2, num_spatial_dims+2)
+
+    if not input_channels_dim.is_compatible_with(filter.get_shape()[
+        num_spatial_dims]):
       raise ValueError(
           "number of input channels does not match corresponding dimension of filter, "
-          "{} != {}".format(input.get_shape()[num_spatial_dims + 1],
-                            filter.get_shape()[num_spatial_dims]))
+          "{} != {}".format(input_channels_dim, filter.get_shape()[
+              num_spatial_dims]))
 
     strides, dilation_rate = _get_strides_and_dilation_rate(
         num_spatial_dims, strides, dilation_rate)
@@ -459,12 +628,14 @@ def convolution(input, filter,  # pylint: disable=redefined-builtin
           input=input_converted,
           filter=filter,
           padding=padding,
+          data_format=data_format,
           strides=strides,
           name=name)
 
     return with_space_to_batch(
         input=input,
         filter_shape=array_ops.shape(filter),
+        spatial_dims=spatial_dims,
         dilation_rate=dilation_rate,
         padding=padding,
         op=op)
@@ -476,11 +647,12 @@ def pool(input,  # pylint: disable=redefined-builtin
          padding,
          dilation_rate=None,
          strides=None,
-         name=None):
+         name=None,
+         data_format=None):
   # pylint: disable=line-too-long
   """Performs an N-D pooling operation.
 
-  Computes for
+  In the case that `data_format` does not start with "NC", computes for
       0 <= b < batch_size,
       0 <= x[i] < output_spatial_shape[i],
       0 <= c < num_channels:
@@ -498,10 +670,20 @@ def pool(input,  # pylint: disable=redefined-builtin
   [comment here](https://www.tensorflow.org/api_docs/python/nn.html#convolution).
   The reduction never includes out-of-bounds positions.
 
+  In the case that `data_format` starts with `"NC"`, the `input` and output are
+  simply transposed as follows:
+
+    pool(input, data_format, **kwargs) =
+      tf.transpose(pool(tf.transpose(input, [0] + range(2,N+2) + [1]),
+                        **kwargs),
+                   [0, N+1] + range(1, N+1))
+
   Args:
     input: Tensor of rank N+2, of shape
-      [batch_size] + input_spatial_shape + [num_channels].
-      Pooling happens over the spatial dimensions only.
+      `[batch_size] + input_spatial_shape + [num_channels]` if data_format does
+      not start with "NC" (default), or
+      `[batch_size, num_channels] + input_spatial_shape` if data_format starts
+      with "NC".  Pooling happens over the spatial dimensions only.
     window_shape: Sequence of N ints >= 1.
     pooling_type: Specifies pooling operation, must be "AVG" or "MAX".
     padding: The padding algorithm, must be "SAME" or "VALID".
@@ -513,10 +695,22 @@ def pool(input,  # pylint: disable=redefined-builtin
       If any value of strides is > 1, then all values of dilation_rate must be
       1.
     name: Optional. Name of the op.
+    data_format: A string or None.  Specifies whether the channel dimension of
+      the `input` and output is the last dimension (default, or if `data_format`
+      does not start with "NC"), or the second dimension (if `data_format`
+      starts with "NC").  For N=1, the valid values are "NWC" (default) and
+      "NCW".  For N=2, the valid values are "NHWC" (default) and "NCHW".  For
+      N=3, the valid value is "NDHWC".
 
   Returns:
     Tensor of rank N+2, of shape
-      [batch_size] + output_spatial_shape + [num_channels],
+      [batch_size] + output_spatial_shape + [num_channels]
+
+    if data_format is None or does not start with "NC", or
+
+      [batch_size, num_channels] + output_spatial_shape
+
+    if data_format starts with "NC",
     where `output_spatial_shape` depends on the value of padding:
 
     If padding = "SAME":
@@ -536,8 +730,8 @@ def pool(input,  # pylint: disable=redefined-builtin
     input = ops.convert_to_tensor(input, name="input")
 
     num_spatial_dims = len(window_shape)
-    if num_spatial_dims < 2 or num_spatial_dims > 3:
-      raise ValueError("It is required that 2 <= num_spatial_dims <= 3.")
+    if num_spatial_dims < 1 or num_spatial_dims > 3:
+      raise ValueError("It is required that 1 <= num_spatial_dims <= 3.")
 
     input.get_shape().with_rank(num_spatial_dims + 2)
 
@@ -553,8 +747,10 @@ def pool(input,  # pylint: disable=redefined-builtin
           "strides > window_shape not supported due to inconsistency between "
           "CPU and GPU implementations")
 
-    pooling_ops = {("MAX", 2): max_pool,
+    pooling_ops = {("MAX", 1): max_pool,
+                   ("MAX", 2): max_pool,
                    ("MAX", 3): max_pool3d,  # pylint: disable=undefined-variable
+                   ("AVG", 1): avg_pool,
                    ("AVG", 2): avg_pool,
                    ("AVG", 3): avg_pool3d,  # pylint: disable=undefined-variable
                   }
@@ -563,18 +759,52 @@ def pool(input,  # pylint: disable=redefined-builtin
       raise ValueError("%d-D %s pooling is not supported." %
                        (op_key[1], op_key[0]))
 
-    def op(converted_input, _, converted_padding):
-      return pooling_ops[op_key](converted_input,
-                                 [1] + list(window_shape) + [1],
-                                 [1] + list(strides) + [1],
-                                 converted_padding,
-                                 name=scope)
+    if data_format is None or not data_format.startswith("NC"):
+      adjusted_window_shape = [1] + list(window_shape) + [1]
+      adjusted_strides = [1] + list(strides) + [1]
+      spatial_dims = range(1, num_spatial_dims + 1)
+    else:
+      adjusted_window_shape = [1, 1] + list(window_shape)
+      adjusted_strides = [1, 1] + list(strides)
+      spatial_dims = range(2, num_spatial_dims + 2)
 
-    return with_space_to_batch(input=input,
-                               dilation_rate=dilation_rate,
-                               padding=padding,
-                               op=op,
-                               filter_shape=window_shape)
+    if num_spatial_dims == 3:
+      if data_format is not None and data_format != "NDHWC":
+        raise ValueError("data_format must be \"NDHWC\".")
+      data_format_kwargs = dict()
+    elif num_spatial_dims == 1:
+      if data_format is None or data_format == "NWC":
+        data_format_kwargs = dict(data_format="NHWC")
+      elif data_format == "NCW":
+        data_format_kwargs = dict(data_format="NCHW")
+      else:
+        raise ValueError("data_format must be either \"NWC\" or \"NCW\".")
+      adjusted_window_shape = [1] + adjusted_window_shape
+      adjusted_strides = [1] + adjusted_strides
+    else:
+      data_format_kwargs = dict(data_format=data_format)
+
+    def op(converted_input, _, converted_padding):  # pylint: disable=missing-docstring
+      if num_spatial_dims == 1:
+        converted_input = array_ops.expand_dims(converted_input,
+                                                spatial_dims[0])
+      result = pooling_ops[op_key](converted_input,
+                                   adjusted_window_shape,
+                                   adjusted_strides,
+                                   converted_padding,
+                                   name=scope,
+                                   **data_format_kwargs)
+      if num_spatial_dims == 1:
+        result = array_ops.squeeze(result, [spatial_dims[0]])
+      return result
+
+    return with_space_to_batch(
+        input=input,
+        dilation_rate=dilation_rate,
+        padding=padding,
+        op=op,
+        spatial_dims=spatial_dims,
+        filter_shape=window_shape)
 
 
 def atrous_conv2d(value, filters, rate, padding, name=None):
@@ -1794,19 +2024,27 @@ def conv1d(value, filters, stride, padding,
            name=None):
   """Computes a 1-D convolution given 3-D input and filter tensors.
 
-  Given an input tensor of shape [batch, in_width, in_channels]
+  Given an input tensor of shape
+    [batch, in_width, in_channels]
+  if data_format is "NHWC", or
+    [batch, in_channels, in_width]
+  if data_format is "NCHW",
   and a filter / kernel tensor of shape
   [filter_width, in_channels, out_channels], this op reshapes
   the arguments to pass them to conv2d to perform the equivalent
   convolution operation.
 
-  Internally, this op reshapes the input tensors and invokes
-  `tf.nn.conv2d`.  A tensor of shape [batch, in_width, in_channels]
-  is reshaped to [batch, 1, in_width, in_channels], and the filter
-  is reshaped to [1, filter_width, in_channels, out_channels].
-  The result is then reshaped back to [batch, out_width, out_channels]
-  (where out_width is a function of the stride and padding as in
-  conv2d) and returned to the caller.
+  Internally, this op reshapes the input tensors and invokes `tf.nn.conv2d`.
+  For example, if `data_format` does not start with "NC", a tensor of shape
+    [batch, in_width, in_channels]
+  is reshaped to
+    [batch, 1, in_width, in_channels],
+  and the filter is reshaped to
+    [1, filter_width, in_channels, out_channels].
+  The result is then reshaped back to
+    [batch, out_width, out_channels]
+  (where out_width is a function of the stride and padding as in conv2d) and
+  returned to the caller.
 
   Args:
     value: A 3D `Tensor`.  Must be of type `float32` or `float64`.
@@ -1823,16 +2061,27 @@ def conv1d(value, filters, stride, padding,
 
   Returns:
     A `Tensor`.  Has the same type as input.
+
+  Raises:
+    ValueError: if `data_format` is invalid.
   """
   with ops.name_scope(name, "conv1d", [value, filters]) as name:
     # Reshape the input tensor to [batch, 1, in_width, in_channels]
-    value = array_ops.expand_dims(value, 1)
-    # And reshape the filter to [1, filter_width, in_channels, out_channels]
+    if data_format is None or data_format == "NHWC":
+      data_format = "NHWC"
+      spatial_start_dim = 1
+      strides = [1, 1, stride, 1]
+    elif data_format == "NCHW":
+      spatial_start_dim = 2
+      strides = [1, 1, 1, stride]
+    else:
+      raise ValueError("data_format must be \"NHWC\" or \"NCHW\".")
+    value = array_ops.expand_dims(value, spatial_start_dim)
     filters = array_ops.expand_dims(filters, 0)
-    result = gen_nn_ops.conv2d(value, filters, [1, 1, stride, 1], padding,
+    result = gen_nn_ops.conv2d(value, filters, strides, padding,
                                use_cudnn_on_gpu=use_cudnn_on_gpu,
                                data_format=data_format)
-    return array_ops.squeeze(result, [1])
+    return array_ops.squeeze(result, [spatial_start_dim])
 
 
 ops.RegisterShape("Dilation2D")(common_shapes.call_cpp_shape_fn)