Adds fractional_max_pool and fractional_avg_pool ops. Fixes #2953.

Change: 131754627
2016-08-30 13:23:06 -08:00 · 2016-08-30 13:23:06 -08:00 · 8b667b7d4b
commit 8b667b7d4b
parent 79d8721bf2
17 changed files with 2627 additions and 0 deletions
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@ -1448,6 +1448,9 @@ tf_kernel_library(
    srcs = [
        "avgpooling_op.cc",
        "cudnn_pooling_gpu.cc",
        "fractional_avg_pool_op.cc",
        "fractional_max_pool_op.cc",
        "fractional_pool_common.cc",
        "maxpooling_op.cc",
        "pooling_ops_3d.cc",
        "pooling_ops_common.cc",
@ -1455,6 +1458,7 @@ tf_kernel_library(
    hdrs = [
        "avgpooling_op.h",
        "cudnn_pooling_gpu.h",
        "fractional_pool_common.h",
        "maxpooling_op.h",
        "pooling_ops_common.h",
    ],
--- a/tensorflow/core/kernels/fractional_avg_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_avg_pool_op.cc
@ -0,0 +1,354 @@
 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #define EIGEN_USE_THREADS
 #include <algorithm>
 #include <cmath>
 #include <random>
 #include <vector>
 #include "tensorflow/core/kernels/fractional_pool_common.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/util/guarded_philox_random.h"
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 template <typename T>
 class FractionalAvgPoolOp : public OpKernel {
 public:
  explicit FractionalAvgPoolOp(OpKernelConstruction* context)
      : OpKernel(context) {
    OP_REQUIRES_OK(context, context->GetAttr("pooling_ratio", &pooling_ratio_));
    OP_REQUIRES_OK(context, context->GetAttr("pseudo_random", &pseudo_random_));
    OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_));
    OP_REQUIRES(context, pooling_ratio_.size() == 4,
                errors::InvalidArgument(
                    "pooling_ratio field must specify 4 dimensions"));
    OP_REQUIRES(
        context, pooling_ratio_[0] == 1 || pooling_ratio_[3] == 1,
        errors::Unimplemented("Fractional average pooling is not yet "
                              "supported on the batch nor channel dimension."));
    OP_REQUIRES_OK(context, context->GetAttr("deterministic", &deterministic_));
    pooling_region_generated_ = false;
    // Initialize philox random generator.
    OP_REQUIRES_OK(context, generator_.Init(context));
  }
  void Compute(OpKernelContext* context) override {
    typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
        ConstEigenMatrixMap;
    typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
        EigenMatrixMap;
    constexpr int tensor_in_and_out_dims = 4;
    const Tensor& tensor_in = context->input(0);
    OP_REQUIRES(context, tensor_in.dims() == tensor_in_and_out_dims,
                errors::InvalidArgument("tensor_in must be 4-dimensional"));
    for (int i = 0; i < tensor_in_and_out_dims; ++i) {
      input_size_.push_back(tensor_in.dim_size(i));
    }
    // Output size.
    for (int i = 0; i < tensor_in_and_out_dims; ++i) {
      output_size_.push_back(
          static_cast<int>(floor(input_size_[i] / pooling_ratio_[i])));
      DCHECK_GT(output_size_[i], 0);
    }
    // Generate pooling sequence.
    std::vector<int64> row_cum_seq;
    std::vector<int64> col_cum_seq;
    if (deterministic_) {
      if (pooling_region_generated_) {
        row_cum_seq = row_cum_seq_;
        col_cum_seq = col_cum_seq_;
      } else {
        row_cum_seq = GeneratePoolingSequence(input_size_[1], output_size_[1],
                                              &generator_, pseudo_random_);
        col_cum_seq = GeneratePoolingSequence(input_size_[2], output_size_[2],
                                              &generator_, pseudo_random_);
        mutex_lock lock(mu_);
        row_cum_seq_ = row_cum_seq;
        col_cum_seq_ = col_cum_seq;
        pooling_region_generated_ = true;
      }
    } else {
      row_cum_seq = GeneratePoolingSequence(input_size_[1], output_size_[1],
                                            &generator_, pseudo_random_);
      col_cum_seq = GeneratePoolingSequence(input_size_[2], output_size_[2],
                                            &generator_, pseudo_random_);
    }
    // Prepare output.
    Tensor* output_tensor = nullptr;
    OP_REQUIRES_OK(context,
                   context->allocate_output(
                       0, TensorShape({output_size_[0], output_size_[1],
                                       output_size_[2], output_size_[3]}),
                       &output_tensor));
    Tensor* output_row_seq_tensor = nullptr;
    OP_REQUIRES_OK(context,
                   context->allocate_output(
                       1, TensorShape({static_cast<int64>(row_cum_seq.size())}),
                       &output_row_seq_tensor));
    Tensor* output_col_seq_tensor = nullptr;
    OP_REQUIRES_OK(context,
                   context->allocate_output(
                       2, TensorShape({static_cast<int64>(col_cum_seq.size())}),
                       &output_col_seq_tensor));
    ConstEigenMatrixMap in_mat(
        tensor_in.flat<T>().data(), input_size_[3],
        input_size_[2] * input_size_[1] * input_size_[0]);
    EigenMatrixMap out_mat(output_tensor->flat<T>().data(), output_size_[3],
                           output_size_[2] * output_size_[1] * output_size_[0]);
    // out_count corresponds to number of elements in each pooling cell.
    Eigen::Matrix<T, Eigen::Dynamic, 1> out_count(out_mat.cols());
    // Initializes the output tensor and out_count with 0.
    out_mat.setZero();
    out_count.setZero();
    auto output_row_seq_flat = output_row_seq_tensor->flat<int64>();
    auto output_col_seq_flat = output_col_seq_tensor->flat<int64>();
    // Set output tensors.
    for (int i = 0; i < row_cum_seq.size(); ++i) {
      output_row_seq_flat(i) = row_cum_seq[i];
    }
    for (int i = 0; i < col_cum_seq.size(); ++i) {
      output_col_seq_flat(i) = col_cum_seq[i];
    }
    // For both input and output,
    // 0: batch
    // 1: row / row
    // 2: col / col
    // 3: depth / channel
    const int64 row_max = input_size_[1] - 1;
    const int64 col_max = input_size_[2] - 1;
    for (int64 b = 0; b < input_size_[0]; ++b) {
      // row sequence.
      for (int64 hs = 0; hs < row_cum_seq.size() - 1; ++hs) {
        // row start and end.
        const int64 row_start = row_cum_seq[hs];
        int64 row_end =
            overlapping_ ? row_cum_seq[hs + 1] : row_cum_seq[hs + 1] - 1;
        row_end = std::min(row_end, row_max);
        // col sequence.
        for (int64 ws = 0; ws < col_cum_seq.size() - 1; ++ws) {
          const int64 out_offset =
              (b * output_size_[1] + hs) * output_size_[2] + ws;
          // col start and end.
          const int64 col_start = col_cum_seq[ws];
          int64 col_end =
              overlapping_ ? col_cum_seq[ws + 1] : col_cum_seq[ws + 1] - 1;
          col_end = std::min(col_end, col_max);
          for (int64 h = row_start; h <= row_end; ++h) {
            for (int64 w = col_start; w <= col_end; ++w) {
              const int64 in_offset =
                  (b * input_size_[1] + h) * input_size_[2] + w;
              out_mat.col(out_offset) += in_mat.col(in_offset);
              out_count(out_offset)++;
            }
          }
        }
      }
    }
    DCHECK_GT(out_count.minCoeff(), 0);
    out_mat.array().rowwise() /= out_count.transpose().array();
  }
 private:
  bool deterministic_;
  // meaningful only when deterministic_ is true.
  mutex mu_;
  std::vector<int64> row_cum_seq_;
  std::vector<int64> col_cum_seq_;
  bool pooling_region_generated_;
  std::vector<int32> input_size_;
  std::vector<int32> output_size_;
  std::vector<float> pooling_ratio_;
  bool pseudo_random_;
  bool overlapping_;
  GuardedPhiloxRandom generator_;
 };
 #define REGISTER_FRACTIONALAVGPOOL(type)                                      \
  REGISTER_KERNEL_BUILDER(                                                    \
      Name("FractionalAvgPool").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
      FractionalAvgPoolOp<type>)
 REGISTER_FRACTIONALAVGPOOL(int32);
 REGISTER_FRACTIONALAVGPOOL(int64);
 REGISTER_FRACTIONALAVGPOOL(float);
 REGISTER_FRACTIONALAVGPOOL(double);
 #undef REGISTER_FRACTIONALAVGPOOL
 template <class T>
 class FractionalAvgPoolGradOp : public OpKernel {
 public:
  explicit FractionalAvgPoolGradOp(OpKernelConstruction* context)
      : OpKernel(context) {
    OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_));
  }
  void Compute(OpKernelContext* context) override {
    // Here's the basic idea:
    // Batch and depth dimension are independent from row and col dimension. And
    // because FractionalAvgPool currently only support pooling along row and
    // col, we can basically think of this 4D tensor backpropagation as
    // operation of a series of 2D planes.
    //
    // For each element of a 'slice' (2D plane) of output_backprop, we need to
    // figure out its contributors when doing FractionalAvgPool operation. This
    // can be done based on row_pooling_sequence, col_pooling_seq and
    // overlapping.
    // Once we figure out the original contributors, we just need to evenly
    // divide the value of this element among these contributors.
    //
    // Internally, we divide the out_backprop tensor and store it in a temparary
    // tensor of double type. And cast it to the corresponding type.
    typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
        ConstEigenMatrixMap;
    typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
        EigenMatrixMap;
    typedef Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>>
        EigenDoubleMatrixMap;
    // Grab the inputs.
    const Tensor& orig_input_tensor_shape = context->input(0);
    OP_REQUIRES(context, orig_input_tensor_shape.dims() == 1 &&
                             orig_input_tensor_shape.NumElements() == 4,
                errors::InvalidArgument("original input tensor shape must be"
                                        "1-dimensional and 4 elements"));
    const Tensor& out_backprop = context->input(1);
    const Tensor& row_seq_tensor = context->input(2);
    const Tensor& col_seq_tensor = context->input(3);
    const int64 out_batch = out_backprop.dim_size(0);
    const int64 out_rows = out_backprop.dim_size(1);
    const int64 out_cols = out_backprop.dim_size(2);
    const int64 out_depth = out_backprop.dim_size(3);
    auto row_seq_tensor_flat = row_seq_tensor.flat<int64>();
    auto col_seq_tensor_flat = col_seq_tensor.flat<int64>();
    auto orig_input_tensor_shape_flat = orig_input_tensor_shape.flat<int64>();
    const int64 in_batch = orig_input_tensor_shape_flat(0);
    const int64 in_rows = orig_input_tensor_shape_flat(1);
    const int64 in_cols = orig_input_tensor_shape_flat(2);
    const int64 in_depth = orig_input_tensor_shape_flat(3);
    constexpr int tensor_in_and_out_dims = 4;
    // Transform orig_input_tensor_shape into TensorShape
    TensorShape in_shape;
    for (auto i = 0; i < tensor_in_and_out_dims; ++i) {
      in_shape.AddDim(orig_input_tensor_shape_flat(i));
    }
    // Create intermediate in_backprop.
    Tensor in_backprop_tensor_temp;
    OP_REQUIRES_OK(context,
                   context->allocate_temp(DataTypeToEnum<double>::v(), in_shape,
                                          &in_backprop_tensor_temp));
    in_backprop_tensor_temp.flat<double>().setZero();
    // Transform 4D tensor to 2D matrix.
    EigenDoubleMatrixMap in_backprop_tensor_temp_mat(
        in_backprop_tensor_temp.flat<double>().data(), in_depth,
        in_cols * in_rows * in_batch);
    ConstEigenMatrixMap out_backprop_mat(out_backprop.flat<T>().data(),
                                         out_depth,
                                         out_cols * out_rows * out_batch);
    // Loop through each element of out_backprop and evenly distribute the
    // element to the corresponding pooling cell.
    const int64 in_max_row_index = in_rows - 1;
    const int64 in_max_col_index = in_cols - 1;
    for (int64 b = 0; b < out_batch; ++b) {
      for (int64 r = 0; r < out_rows; ++r) {
        const int64 in_row_start = row_seq_tensor_flat(r);
        int64 in_row_end = overlapping_ ? row_seq_tensor_flat(r + 1)
                                        : row_seq_tensor_flat(r + 1) - 1;
        in_row_end = std::min(in_row_end, in_max_row_index);
        for (int64 c = 0; c < out_cols; ++c) {
          const int64 in_col_start = col_seq_tensor_flat(c);
          int64 in_col_end = overlapping_ ? col_seq_tensor_flat(c + 1)
                                          : col_seq_tensor_flat(c + 1) - 1;
          in_col_end = std::min(in_col_end, in_max_col_index);
          const int64 num_elements_in_pooling_cell =
              (in_row_end - in_row_start + 1) * (in_col_end - in_col_start + 1);
          const int64 out_index = (b * out_rows + r) * out_cols + c;
          // Now we can evenly distribute out_backprop(b, h, w, *) to
          // in_backprop(b, hs:he, ws:we, *).
          for (int64 in_r = in_row_start; in_r <= in_row_end; ++in_r) {
            for (int64 in_c = in_col_start; in_c <= in_col_end; ++in_c) {
              const int64 in_index = (b * in_rows + in_r) * in_cols + in_c;
              // Walk through each channel (depth).
              for (int64 d = 0; d < out_depth; ++d) {
                const double out_backprop_element = static_cast<double>(
                    out_backprop_mat.coeffRef(d, out_index));
                double& in_backprop_ref =
                    in_backprop_tensor_temp_mat.coeffRef(d, in_index);
                in_backprop_ref +=
                    out_backprop_element / num_elements_in_pooling_cell;
              }
            }
          }
        }
      }
    }
    // Depending on the type, cast double to type T.
    Tensor* in_backprop_tensor = nullptr;
    OP_REQUIRES_OK(context,
                   context->allocate_output(0, in_shape, &in_backprop_tensor));
    auto in_backprop_tensor_flat = in_backprop_tensor->flat<T>();
    auto in_backprop_tensor_temp_flat = in_backprop_tensor_temp.flat<double>();
    for (int64 i = 0; i < in_backprop_tensor_flat.size(); ++i) {
      in_backprop_tensor_flat(i) =
          static_cast<T>(in_backprop_tensor_temp_flat(i));
    }
  }
 private:
  bool overlapping_;
 };
 #define REGISTER_FRACTIONALAVGPOOLGRAD(type)              \
  REGISTER_KERNEL_BUILDER(Name("FractionalAvgPoolGrad")   \
                              .Device(DEVICE_CPU)         \
                              .TypeConstraint<type>("T"), \
                          FractionalAvgPoolGradOp<type>)
 REGISTER_FRACTIONALAVGPOOLGRAD(int32);
 REGISTER_FRACTIONALAVGPOOLGRAD(int64);
 REGISTER_FRACTIONALAVGPOOLGRAD(float);
 REGISTER_FRACTIONALAVGPOOLGRAD(double);
 #undef REGISTER_FRACTIONALAVGPOOLGRAD
 }  // namespace tensorflow
--- a/tensorflow/core/kernels/fractional_max_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_max_pool_op.cc
@ -0,0 +1,381 @@
 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #define EIGEN_USE_THREADS
 #include <algorithm>
 #include <cmath>
 #include <random>
 #include <vector>
 #include "tensorflow/core/kernels/fractional_pool_common.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/util/guarded_philox_random.h"
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 template <typename T>
 class FractionalMaxPoolOp : public OpKernel {
 public:
  explicit FractionalMaxPoolOp(OpKernelConstruction* context)
      : OpKernel(context) {
    OP_REQUIRES_OK(context, context->GetAttr("pooling_ratio", &pooling_ratio_));
    OP_REQUIRES_OK(context, context->GetAttr("pseudo_random", &pseudo_random_));
    OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_));
    OP_REQUIRES(context, pooling_ratio_.size() == 4,
                errors::InvalidArgument("pooling_ratio field must "
                                        "specify 4 dimensions"));
    OP_REQUIRES(
        context, pooling_ratio_[0] == 1 || pooling_ratio_[3] == 1,
        errors::Unimplemented("Fractional max pooling is not yet "
                              "supported on the batch nor channel dimension."));
    OP_REQUIRES_OK(context, context->GetAttr("deterministic", &deterministic_));
    pooling_region_generated_ = false;
    // Initialize philox random generator.
    OP_REQUIRES_OK(context, generator_.Init(context));
  }
  void Compute(OpKernelContext* context) override {
    typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
        ConstEigenMatrixMap;
    typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
        EigenMatrixMap;
    constexpr int tensor_in_and_out_dims = 4;
    const Tensor& tensor_in = context->input(0);
    OP_REQUIRES(context, tensor_in.dims() == tensor_in_and_out_dims,
                errors::InvalidArgument("tensor_in must be 4-dimensional"));
    for (int i = 0; i < tensor_in_and_out_dims; ++i) {
      input_size_.push_back(tensor_in.dim_size(i));
    }
    // Output size.
    for (int i = 0; i < tensor_in_and_out_dims; ++i) {
      output_size_.push_back(
          static_cast<int>(floor(input_size_[i] / pooling_ratio_[i])));
      DCHECK_GT(output_size_[i], 0);
    }
    // Generate pooling sequence.
    std::vector<int64> height_cum_seq;
    std::vector<int64> width_cum_seq;
    if (deterministic_) {
      if (pooling_region_generated_) {
        height_cum_seq = height_cum_seq_;
        width_cum_seq = width_cum_seq_;
      } else {
        height_cum_seq = GeneratePoolingSequence(
            input_size_[1], output_size_[1], &generator_, pseudo_random_);
        width_cum_seq = GeneratePoolingSequence(input_size_[2], output_size_[2],
                                                &generator_, pseudo_random_);
        mutex_lock lock(mu_);
        height_cum_seq_ = height_cum_seq;
        width_cum_seq_ = width_cum_seq;
        pooling_region_generated_ = true;
      }
    } else {
      height_cum_seq = GeneratePoolingSequence(input_size_[1], output_size_[1],
                                               &generator_, pseudo_random_);
      width_cum_seq = GeneratePoolingSequence(input_size_[2], output_size_[2],
                                              &generator_, pseudo_random_);
    }
    // Prepare output.
    Tensor* output_tensor = nullptr;
    OP_REQUIRES_OK(context,
                   context->allocate_output(
                       0, TensorShape({output_size_[0], output_size_[1],
                                       output_size_[2], output_size_[3]}),
                       &output_tensor));
    Tensor* output_height_seq_tensor = nullptr;
    OP_REQUIRES_OK(
        context,
        context->allocate_output(
            1, TensorShape({static_cast<int64>(height_cum_seq.size())}),
            &output_height_seq_tensor));
    Tensor* output_width_seq_tensor = nullptr;
    OP_REQUIRES_OK(
        context, context->allocate_output(
                     2, TensorShape({static_cast<int64>(width_cum_seq.size())}),
                     &output_width_seq_tensor));
    ConstEigenMatrixMap in_mat(
        tensor_in.flat<T>().data(), input_size_[3],
        input_size_[2] * input_size_[1] * input_size_[0]);
    EigenMatrixMap out_mat(output_tensor->flat<T>().data(), output_size_[3],
                           output_size_[2] * output_size_[1] * output_size_[0]);
    // Initializes the output tensor with MIN<T>.
    output_tensor->flat<T>().setConstant(Eigen::NumTraits<T>::lowest());
    auto output_height_seq_flat = output_height_seq_tensor->flat<int64>();
    auto output_width_seq_flat = output_width_seq_tensor->flat<int64>();
    // Set output tensors.
    for (int i = 0; i < height_cum_seq.size(); ++i) {
      output_height_seq_flat(i) = height_cum_seq[i];
    }
    for (int i = 0; i < width_cum_seq.size(); ++i) {
      output_width_seq_flat(i) = width_cum_seq[i];
    }
    // For both input and output,
    // 0: batch
    // 1: height / row
    // 2: width / col
    // 3: depth / channel
    const int64 height_max = input_size_[1] - 1;
    const int64 width_max = input_size_[2] - 1;
    for (int64 b = 0; b < input_size_[0]; ++b) {
      // height sequence.
      for (int64 hs = 0; hs < height_cum_seq.size() - 1; ++hs) {
        // height start and end.
        const int64 height_start = height_cum_seq[hs];
        int64 height_end =
            overlapping_ ? height_cum_seq[hs + 1] : height_cum_seq[hs + 1] - 1;
        height_end = std::min(height_end, height_max);
        // width sequence.
        for (int64 ws = 0; ws < width_cum_seq.size() - 1; ++ws) {
          const int64 out_offset =
              (b * output_size_[1] + hs) * output_size_[2] + ws;
          // width start and end.
          const int64 width_start = width_cum_seq[ws];
          int64 width_end =
              overlapping_ ? width_cum_seq[ws + 1] : width_cum_seq[ws + 1] - 1;
          width_end = std::min(width_end, width_max);
          for (int64 h = height_start; h <= height_end; ++h) {
            for (int64 w = width_start; w <= width_end; ++w) {
              const int64 in_offset =
                  (b * input_size_[1] + h) * input_size_[2] + w;
              out_mat.col(out_offset) =
                  out_mat.col(out_offset).cwiseMax(in_mat.col(in_offset));
            }
          }
        }
      }
    }
  }
 private:
  bool deterministic_;
  // meaningful only when deterministic_ is true.
  mutex mu_;
  std::vector<int64> height_cum_seq_;
  std::vector<int64> width_cum_seq_;
  bool pooling_region_generated_;
  std::vector<int32> input_size_;
  std::vector<int32> output_size_;
  std::vector<float> pooling_ratio_;
  bool pseudo_random_;
  bool overlapping_;
  GuardedPhiloxRandom generator_;
 };
 #define REGISTER_FRACTIONALMAXPOOL(type)                                      \
  REGISTER_KERNEL_BUILDER(                                                    \
      Name("FractionalMaxPool").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
      FractionalMaxPoolOp<type>)
 REGISTER_FRACTIONALMAXPOOL(int32);
 REGISTER_FRACTIONALMAXPOOL(int64);
 REGISTER_FRACTIONALMAXPOOL(float);
 REGISTER_FRACTIONALMAXPOOL(double);
 #undef REGISTER_FRACTIONALMAXPOOL
 static const int kInvalidMaxPoolingIndex = -1;
 template <class T>
 class FractionalMaxPoolGradOp : public OpKernel {
 public:
  explicit FractionalMaxPoolGradOp(OpKernelConstruction* context)
      : OpKernel(context) {
    OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_));
  }
  void Compute(OpKernelContext* context) override {
    // There are two steps when calculating gradient for FractionalMaxPool.
    // 1) Walk through the process of calculating fractional pooling given
    //    pooling region; however, in the process, keep track of where the max
    //    element comes from. (arg_max)
    // 2) Populate the value of out_backprop to where arg_max indicates. If
    //    we support overlapping, it is likely to have multiple out_backprop[i]
    //    propagates back to the same arg_max value.
    typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
        ConstEigenMatrixMap;
    typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
        EigenMatrixMap;
    typedef Eigen::Map<Eigen::Matrix<int64, Eigen::Dynamic, Eigen::Dynamic>>
        EigenIndexMatrixMap;
    const Tensor& tensor_in = context->input(0);
    const Tensor& tensor_out = context->input(1);
    const Tensor& out_backprop = context->input(2);
    const Tensor& height_seq_tensor = context->input(3);
    const Tensor& width_seq_tensor = context->input(4);
    // Just to make it similar to FractionalMaxPoolOp.
    constexpr int tensor_in_and_out_dims = 4;
    std::vector<int64> input_size;
    std::vector<int64> output_size;
    for (int i = 0; i < tensor_in_and_out_dims; ++i) {
      input_size.push_back(tensor_in.dim_size(i));
    }
    for (int i = 0; i < tensor_in_and_out_dims; ++i) {
      output_size.push_back(tensor_out.dim_size(i));
    }
    // ---------
    // Step 1
    // ---------
    Tensor tensor_out_dup;
    OP_REQUIRES_OK(context,
                   context->allocate_temp(DataTypeToEnum<T>::v(),
                                          tensor_out.shape(), &tensor_out_dup));
    Tensor tensor_out_arg_max;
    OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<int64>::v(),
                                                   tensor_out.shape(),
                                                   &tensor_out_arg_max));
    // Find arg_max for each tensor_out
    ConstEigenMatrixMap tensor_in_mat(
        tensor_in.flat<T>().data(), input_size[3],
        input_size[2] * input_size[1] * input_size[0]);
    EigenMatrixMap tensor_out_dup_mat(
        tensor_out_dup.flat<T>().data(), output_size[3],
        output_size[2] * output_size[1] * output_size[0]);
    EigenIndexMatrixMap tensor_out_arg_max_mat(
        tensor_out_arg_max.flat<int64>().data(), output_size[3],
        output_size[2] * output_size[1] * output_size[0]);
    tensor_out_arg_max.flat<int64>().setConstant(kInvalidMaxPoolingIndex);
    // Initializes the duplicate output tensor with MIN<T>.
    tensor_out_dup.flat<T>().setConstant(Eigen::NumTraits<T>::lowest());
    auto height_seq_tensor_flat = height_seq_tensor.flat<int64>();
    auto width_seq_tensor_flat = width_seq_tensor.flat<int64>();
    // Now walk through the process of fractional max pooling again.
    // For both input and output,
    // 0: batch
    // 1: height / row
    // 2: width / col
    // 3: depth / channel
    const int64 height_max = input_size[1] - 1;
    const int64 width_max = input_size[2] - 1;
    for (int64 b = 0; b < input_size[0]; ++b) {
      // height sequence.
      for (int64 hs = 0; hs < height_seq_tensor.dim_size(0) - 1; ++hs) {
        // height start and end.
        const int64 height_start = height_seq_tensor_flat(hs);
        int64 height_end = overlapping_ ? height_seq_tensor_flat(hs + 1)
                                        : height_seq_tensor_flat(hs + 1) - 1;
        height_end = std::min(height_end, height_max);
        // width sequence.
        for (int64 ws = 0; ws < width_seq_tensor.dim_size(0) - 1; ++ws) {
          const int64 out_index =
              (b * output_size[1] + hs) * output_size[2] + ws;
          // width start and end.
          const int64 width_start = width_seq_tensor_flat(ws);
          int64 width_end = overlapping_ ? width_seq_tensor_flat(ws + 1)
                                         : width_seq_tensor_flat(ws + 1) - 1;
          width_end = std::min(width_end, width_max);
          for (int64 h = height_start; h <= height_end; ++h) {
            for (int64 w = width_start; w <= width_end; ++w) {
              const int64 in_index =
                  (b * input_size[1] + h) * input_size[2] + w;
              // Walk through each channel (depth).
              for (int64 d = 0; d < input_size[3]; ++d) {
                const T& input_ref = tensor_in_mat.coeffRef(d, in_index);
                T& output_ref = tensor_out_dup_mat.coeffRef(d, out_index);
                int64& out_arg_max_ref =
                    tensor_out_arg_max_mat.coeffRef(d, out_index);
                if (output_ref < input_ref ||
                    out_arg_max_ref == kInvalidMaxPoolingIndex) {
                  output_ref = input_ref;
                  int input_offset = in_index * input_size[3] + d;
                  out_arg_max_ref = input_offset;
                }
              }
            }
          }
        }
      }
    }
    // Check tensor_out_dup is the same as tensor_out.
    ConstEigenMatrixMap tensor_out_mat(
        tensor_out.flat<T>().data(), output_size[3],
        output_size[2] * output_size[1] * output_size[0]);
    const int64 num_reshaped_cols =
        output_size[2] * output_size[1] * output_size[0];
    for (int64 i = 0; i < num_reshaped_cols; ++i) {
      for (int64 j = 0; j < output_size[3]; ++j) {
        DCHECK_EQ(tensor_out_dup_mat(j, i), tensor_out_mat(j, i));
      }
    }
    Tensor* output = nullptr;
    OP_REQUIRES_OK(context,
                   context->allocate_output(0, tensor_in.shape(), &output));
    output->flat<T>().setZero();
    auto out_backprop_flat = out_backprop.flat<T>();
    auto input_backprop_flat = output->flat<T>();
    auto out_arg_max_flat = tensor_out_arg_max.flat<int64>();
    int num_total_outputs = out_backprop_flat.size();
    int num_total_inputs = input_backprop_flat.size();
    for (int index = 0; index < num_total_outputs; ++index) {
      int input_backprop_index = out_arg_max_flat(index);
      // According to maxpooling_op.cc, the performance impact below is small.
      CHECK(input_backprop_index >= 0 &&
            input_backprop_index < num_total_inputs)
          << "Invalid input backprop index: " << input_backprop_index << ", "
          << num_total_inputs;
      input_backprop_flat(input_backprop_index) += out_backprop_flat(index);
    }
  }
 private:
  bool overlapping_;
 };
 #define REGISTER_FRACTIONALMAXPOOLGRAD(type)              \
  REGISTER_KERNEL_BUILDER(Name("FractionalMaxPoolGrad")   \
                              .Device(DEVICE_CPU)         \
                              .TypeConstraint<type>("T"), \
                          FractionalMaxPoolGradOp<type>)
 REGISTER_FRACTIONALMAXPOOLGRAD(int32);
 REGISTER_FRACTIONALMAXPOOLGRAD(int64);
 REGISTER_FRACTIONALMAXPOOLGRAD(float);
 REGISTER_FRACTIONALMAXPOOLGRAD(double);
 #undef REGISTER_FRACTIONALMAXPOOLGRAD
 }  // namespace tensorflow
--- a/tensorflow/core/kernels/fractional_pool_common.cc
+++ b/tensorflow/core/kernels/fractional_pool_common.cc
@ -0,0 +1,134 @@
 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/kernels/fractional_pool_common.h"
 #include "tensorflow/core/lib/random/simple_philox.h"
 namespace tensorflow {
 static std::vector<int64> GeneratePoolingSequencePseudoRandom(
    int input_length, int output_length, GuardedPhiloxRandom* generator) {
  std::vector<int64> cum_seq(output_length + 1, 0);
  std::vector<int64> diff(output_length, 0);
  double alpha = static_cast<double>(input_length) / output_length;
  int k = input_length / output_length;
  // In the paper [1], author proposes the following procedure to generate a
  // pseudo random pooling region:
  //   1) Set a_0 = 1, a_Nout = Nin;
  //   2) a_i = ceil(alpha*(u+i))
  //      in which, i = 1, 2, ... , Nout-1
  //                u is a random number in (0,1) for all i
  //                alpha = Nin/Nout in (1,2)
  // The sequence {a_i} should satisfy a_i-a_{i-1} = 1 or 2
  // Note: for step 1), it makes more sense to make a_Nout = Nin+1, that way,
  //    a_i-a_{i-1} = 1 or 2 is also true for i = Nout.
  //
  // However, there are choices of alpha and u that will make
  // a_i - a_{i-1} > 2. This happens at the left boundary. For example, with
  // alpha = 1.732, u = 0.8, then a_1 = 4, a_1-a_0 = 3.
  // This is why u_max1 is needed, i.e. u is a random number in (0,u_max1)
  // instead of (0,1).
  // Define k = ceil(alpha)-1, then we require:
  //   a_1 = alpha*(u+1) <= a_0+(k+1)
  // ===> This gives u_max1 = (k+2)/alpha - 1.
  //
  // In addition, when extending the pooling sequence generation process for
  // alpha beyond (1,2), e.g. (k,k+1); a check on the right boundary is also
  // needed to make sure the last gap a_Nout-a_{Nout-1} >= k. Solving it gives
  // u_max2 = (Nin+1-k)/alpha - (Nout-1)
  // Here is an example where u > u_max2, alpha = 2.3, u = 0.7, u_max2 = 0.565,
  // Nin = 23, Nout = 10; the sequence
  // from a_0 to a_10 is:
  // [1, 4, 7, 9, 11, 14, 16, 18, 21, 23, 24]
  // The last gap is only 1.
  //
  // [1]: https://arxiv.org/abs/1412.6071
  double u_max1 = (k + 2) / alpha - 1;
  double u_max2 = (input_length + 1 - k) / alpha - (output_length - 1);
  double max_u = std::min(u_max1, u_max2);
  // Generate random number in parallel.
  auto local_gen = generator->ReserveSamples32(2);
  random::SimplePhilox random(&local_gen);
  const double u = random.RandDouble() * max_u;
  cum_seq[0] = 1;
  cum_seq[output_length] = input_length + 1;
  for (int i = 1; i < output_length; ++i) {
    cum_seq[i] = static_cast<int>(ceil(alpha * (i + u)));
  }
  for (int i = 0; i < output_length; ++i) {
    diff[i] = cum_seq[i + 1] - cum_seq[i];
  }
  return diff;
 }
 static std::vector<int64> GeneratePoolingSequenceRandom(
    int input_length, int output_length, GuardedPhiloxRandom* generator) {
  int k = input_length / output_length;
  int num_random_spot = input_length % output_length;
  std::vector<int64> diff(output_length, k);
  for (int i = 0; i < num_random_spot; ++i) {
    diff[i] += 1;
  }
  // Randomly shuffle this vector.
  auto local_gen = generator->ReserveSamples32(diff.size());
  random::SingleSampleAdapter<random::PhiloxRandom> single(&local_gen);
  const auto uniform = [&single](uint32 n) { return single() % n; };
  RandomShuffle(diff.begin(), diff.end(), uniform);
  return diff;
 }
 std::vector<int64> GeneratePoolingSequence(int input_length, int output_length,
                                           GuardedPhiloxRandom* generator,
                                           bool pseudo_random) {
  std::vector<int64> diff;
  // This is a case that regular pooling can handle, just return diff with
  // each element input_length/output_length.
  if (input_length % output_length == 0) {
    diff = std::vector<int64>(output_length, input_length / output_length);
  }
  if (pseudo_random) {
    diff = GeneratePoolingSequencePseudoRandom(input_length, output_length,
                                               generator);
  } else {
    diff =
        GeneratePoolingSequenceRandom(input_length, output_length, generator);
  }
  // Sanity check.
  int k = input_length / output_length;
  for (int i = 0; i < output_length; ++i) {
    // k<= diff[i] <= k+1.
    DCHECK_GE(diff[i], k);
    DCHECK_LE(diff[i], k + 1);
  }
  // Return cumulative sequence.
  std::vector<int64> cum_seq(output_length + 1, 0);
  for (int i = 1; i < cum_seq.size(); ++i) {
    cum_seq[i] = cum_seq[i - 1] + diff[i - 1];
  }
  return cum_seq;
 }
 }  // namespace tensorflow
--- a/tensorflow/core/kernels/fractional_pool_common.h
+++ b/tensorflow/core/kernels/fractional_pool_common.h
@ -0,0 +1,78 @@
 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef TENSORFLOW_KERNELS_FRACTIONAL_POOL_COMMON_H_
 #define TENSORFLOW_KERNELS_FRACTIONAL_POOL_COMMON_H_
 #include <algorithm>
 #include <vector>
 #include "tensorflow/core/util/guarded_philox_random.h"
 namespace tensorflow {
 // Shuffle a container randomly, copied from random_shuffle_op.cc
 template <class Iter, class Random>
 static inline void RandomShuffle(Iter first, Iter last, const Random& uniform) {
  if (first == last) {
    return;
  }
  const auto stop = last - 1;
  for (auto i = first; i != stop; ++i) {
    using std::iter_swap;
    iter_swap(i, i + uniform(last - i));
  }
 }
 // Generate pooling sequence for fractional pooling along one dimension.
 //
 // Regular max/avg pooling can be viewed as a special case, in which given the
 //     * input_length: e.g. 10
 //     * output_length: e.g. 5
 // it will generate pooling sequence as
 //     diff sequence: [2, 2, 2, 2, 2]
 // or as
 //     cumulative sequence: [0, 2, 4, 6, 8, 10]
 //
 // In the case of fractional pooling, input_length is not an integer multiple of
 // output_length, randomness plays a role when generating pooling sequence.
 // There are two type of randomness (random vs pseudo-random) defined in paper:
 // http://arxiv.org/abs/1412.6071
 // You can check the paper for the difference between these two types.
 //
 // In summary, the generated diff sequence satisfy the following properties for
 // both types of randomness:
 //     * length(generated_diff_pooling_sequence) = output_length
 //     * sum(generated_diff_pooling_sequence) = input_length
 //     * Let's define floor(input_length / output_length) = K, then
 //       K <= generated_diff_pooling_sequence[i] <= K+1
 // For example, when input_length = 10, output_length = 6, the followings are
 // valid pooling sequence:
 //     * [1, 2, 2, 1, 2, 2]
 //     * [1, 1, 2, 2, 2, 2]
 // [1, 3, 2, 2, 2, 2] is not valid.
 //
 // Args:
 //   input_length:  See above explanation
 //   output_length:  See above explanation
 //   generator:  Parallel version of random number generator
 //   pseudo_random:  Whether or not use pseudo-random
 // Returns:
 //   pooling_sequence:  This is the cumulative pooling sequence.
 std::vector<int64> GeneratePoolingSequence(int input_length, int output_length,
                                           GuardedPhiloxRandom* generator,
                                           bool pseudo_random);
 }  // namespace tensorflow
 #endif  // TENSORFLOW_KERNELS_FRACTIONAL_POOL_COMMON_H_
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@ -1515,4 +1515,205 @@ values: The `k` largest elements along each last dimensional slice.
 indices: The indices of `values` within the last dimension of `input`.
 )doc");
 // --------------------------------------------------------------------------
 REGISTER_OP("FractionalMaxPool")
    .Input("value: T")
    .Output("output: T")
    .Output("row_pooling_sequence: int64")
    .Output("col_pooling_sequence: int64")
    .Attr("pooling_ratio: list(float) >=4")
    .Attr("pseudo_random: bool = false")
    .Attr("overlapping: bool = false")
    .Attr("deterministic: bool = false")
    .Attr("seed: int = 0")
    .Attr("seed2: int = 0")
    .Attr("T: {float, double, int32, int64}")
    .Doc(R"doc(
 Performs fractional max pooling on the input.
 Fractional max pooling is slightly different than regular max pooling.  In
 regular max pooling, you downsize an input set by taking the maximum value of
 smaller N x N subsections of the set (often 2x2), and try to reduce the set by
 a factor of N, where N is an integer.  Fractional max pooling, as you might
 expect from the word "fractional", means that the overall reduction ratio N
 does not have to be an integer.
 The sizes of the pooling regions are generated randomly but are fairly uniform.
 For example, let's look at the height dimension, and the constraints on the
 list of rows that will be pool boundaries.
 First we define the following:
 1.  input_row_length : the number of rows from the input set
 2.  output_row_length : which will be smaller than the input
 3.  alpha = input_row_length / output_row_length : our reduction ratio
 4.  K = floor(alpha)
 5.  row_pooling_sequence : this is the result list of pool boundary rows
 Then, row_pooling_sequence should satisfy:
 1.  a[0] = 0 : the first value of the sequence is 0
 2.  a[end] = input_row_length : the last value of the sequence is the size
 3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size
 4.  length(row_pooling_sequence) = output_row_length+1
 For more details on fractional max pooling, see this paper:
 [Benjamin Graham, Fractional Max-Pooling]
 (http://arxiv.org/abs/1412.6071)
 value: 4-D with shape `[batch, height, width, channels]`.
 pooling_ratio: Pooling ratio for each dimension of `value`, currently only
  supports row and col dimension and should be >= 1.0. For example, a valid
  pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
  must be 1.0 because we don't allow pooling on batch and channels
  dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
  respectively.
 pseudo_random: When set to True, generates the pooling sequence in a
  pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
  Graham, Fractional Max-Pooling] (http://arxiv.org/abs/1412.6071) for
  difference between pseudorandom and random.
 overlapping: When set to True, it means when pooling, the values at the boundary
  of adjacent pooling cells are used by both cells. For example:
  `index  0  1  2  3  4`
  `value  20 5  16 3  7`
  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
  The result would be [20, 16] for fractional max pooling.
 deterministic: When set to True, a fixed pooling region will be used when
  iterating over a FractionalMaxPool node in the computation graph. Mainly used
  in unit test to make FractionalMaxPool deterministic.
 seed: If either seed or seed2 are set to be non-zero, the random number
  generator is seeded by the given seed.  Otherwise, it is seeded by a
  random seed.
 seed2: An second seed to avoid seed collision.
 output: output tensor after fractional max pooling.
 row_pooling_sequence: row pooling sequence, needed to calculate gradient.
 col_pooling_sequence: column pooling sequence, needed to calculate gradient.
 )doc");
 REGISTER_OP("FractionalMaxPoolGrad")
    .Input("orig_input: T")
    .Input("orig_output: T")
    .Input("out_backprop: T")
    .Input("row_pooling_sequence: int64")
    .Input("col_pooling_sequence: int64")
    .Output("output: T")
    .Attr("overlapping: bool = false")
    .Attr("T: {float, double, int32, int64}")
    .Doc(R"doc(
 Computes gradient of the FractionalMaxPool function.
 orig_input: Original input for `fractional_max_pool`
 orig_output: Original output for `fractional_max_pool`
 out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
  w.r.t. the output of `fractional_max_pool`.
 row_pooling_sequence: row pooling sequence, form pooling region with
  col_pooling_sequence.
 col_pooling_sequence: column pooling sequence, form pooling region with
  row_pooling sequence.
 overlapping: When set to True, it means when pooling, the values at the boundary
  of adjacent pooling cells are used by both cells. For example:
  `index  0  1  2  3  4`
  `value  20 5  16 3  7`
  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
  The result would be [20, 16] for fractional max pooling.
 output: 4-D.  Gradients w.r.t. the input of `fractional_max_pool`.
 )doc");
 // --------------------------------------------------------------------------
 REGISTER_OP("FractionalAvgPool")
    .Input("value: T")
    .Output("output: T")
    .Output("row_pooling_sequence: int64")
    .Output("col_pooling_sequence: int64")
    .Attr("pooling_ratio: list(float) >=4")
    .Attr("pseudo_random: bool = false")
    .Attr("overlapping: bool = false")
    .Attr("deterministic: bool = false")
    .Attr("seed: int = 0")
    .Attr("seed2: int = 0")
    .Attr("T: {float, double, int32, int64}")
    .Doc(R"doc(
 Performs fractional average pooling on the input.
 Fractional average pooling is similar to Fractional max pooling in the pooling
 region generation step. The only difference is that after pooling regions are
 generated, a mean operation is performed instead of a max operation in each
 pooling region.
 value: 4-D with shape `[batch, height, width, channels]`.
 pooling_ratio: Pooling ratio for each dimension of `value`, currently only
  supports row and col dimension and should be >= 1.0. For example, a valid
  pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
  must be 1.0 because we don't allow pooling on batch and channels
  dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
  respectively.
 pseudo_random: When set to True, generates the pooling sequence in a
  pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
  Graham, Fractional Max-Pooling] (http://arxiv.org/abs/1412.6071) for
  difference between pseudorandom and random.
 overlapping: When set to True, it means when pooling, the values at the boundary
  of adjacent pooling cells are used by both cells. For example:
  `index  0  1  2  3  4`
  `value  20 5  16 3  7`
  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
  The result would be [41/3, 26/3] for fractional avg pooling.
 deterministic: When set to True, a fixed pooling region will be used when
  iterating over a FractionalAvgPool node in the computation graph. Mainly used
  in unit test to make FractionalAvgPool deterministic.
 seed: If either seed or seed2 are set to be non-zero, the random number
  generator is seeded by the given seed.  Otherwise, it is seeded by a
  random seed.
 seed2: An second seed to avoid seed collision.
 output: output tensor after fractional avg pooling.
 row_pooling_sequence: row pooling sequence, needed to calculate gradient.
 col_pooling_sequence: column pooling sequence, needed to calculate gradient.
 )doc");
 REGISTER_OP("FractionalAvgPoolGrad")
    .Input("orig_input_tensor_shape: int64")
    .Input("out_backprop: T")
    .Input("row_pooling_sequence: int64")
    .Input("col_pooling_sequence: int64")
    .Output("output: T")
    .Attr("overlapping: bool = false")
    .Attr("T: {float, double, int32, int64}")
    .Doc(R"doc(
 Computes gradient of the FractionalAvgPool function.
 Unlike FractionalMaxPoolGrad, we don't need to find arg_max for
 FractionalAvgPoolGrad, we just need to evenly back-propagate each element of
 out_backprop to those indices that form the same pooling cell. Therefore, we
 just need to know the shape of original input tensor, instead of the whole
 tensor.
 orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool`
 out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
  w.r.t. the output of `fractional_avg_pool`.
 row_pooling_sequence: row pooling sequence, form pooling region with
  col_pooling_sequence.
 col_pooling_sequence: column pooling sequence, form pooling region with
  row_pooling sequence.
 overlapping: When set to True, it means when pooling, the values at the boundary
  of adjacent pooling cells are used by both cells. For example:
  `index  0  1  2  3  4`
  `value  20 5  16 3  7`
  If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
  The result would be [41/3, 26/3] for fractional avg pooling.
 output: 4-D.  Gradients w.r.t. the input of `fractional_avg_pool`.
 )doc");
 }  // namespace tensorflow
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.nn.fractional_max_pool.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.nn.fractional_max_pool.md
@ -0,0 +1,82 @@
 ### `tf.nn.fractional_max_pool(value, pooling_ratio, pseudo_random=None, overlapping=None, deterministic=None, seed=None, seed2=None, name=None)` {#fractional_max_pool}
 Performs fractional max pooling on the input.
 Fractional max pooling is slightly different than regular max pooling.  In
 regular max pooling, you downsize an input set by taking the maximum value of
 smaller N x N subsections of the set (often 2x2), and try to reduce the set by
 a factor of N, where N is an integer.  Fractional max pooling, as you might
 expect from the word "fractional", means that the overall reduction ratio N
 does not have to be an integer.
 The sizes of the pooling regions are generated randomly but are fairly uniform.
 For example, let's look at the height dimension, and the constraints on the
 list of rows that will be pool boundaries.
 First we define the following:
 1.  input_row_length : the number of rows from the input set
 2.  output_row_length : which will be smaller than the input
 3.  alpha = input_row_length / output_row_length : our reduction ratio
 4.  K = floor(alpha)
 5.  row_pooling_sequence : this is the result list of pool boundary rows
 Then, row_pooling_sequence should satisfy:
 1.  a[0] = 0 : the first value of the sequence is 0
 2.  a[end] = input_row_length : the last value of the sequence is the size
 3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size
 4.  length(row_pooling_sequence) = output_row_length+1
 For more details on fractional max pooling, see this paper:
 [Benjamin Graham, Fractional Max-Pooling]
 (http://arxiv.org/abs/1412.6071)
 ##### Args:
 *  <b>`value`</b>: A `Tensor`. Must be one of the following types: `float32`, `float64`, `int32`, `int64`.
    4-D with shape `[batch, height, width, channels]`.
 *  <b>`pooling_ratio`</b>: A list of `floats` that has length `>= 4`.
    Pooling ratio for each dimension of `value`, currently only
    supports row and col dimension and should be >= 1.0. For example, a valid
    pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
    must be 1.0 because we don't allow pooling on batch and channels
    dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
    respectively.
 *  <b>`pseudo_random`</b>: An optional `bool`. Defaults to `False`.
    When set to True, generates the pooling sequence in a
    pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
    Graham, Fractional Max-Pooling] (http://arxiv.org/abs/1412.6071) for
    difference between pseudorandom and random.
 *  <b>`overlapping`</b>: An optional `bool`. Defaults to `False`.
    When set to True, it means when pooling, the values at the boundary
    of adjacent pooling cells are used by both cells. For example:
    `index  0  1  2  3  4`
    `value  20 5  16 3  7`
    If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
    The result would be [20, 16] for fractional max pooling.
 *  <b>`deterministic`</b>: An optional `bool`. Defaults to `False`.
    When set to True, a fixed pooling region will be used when
    iterating over a FractionalMaxPool node in the computation graph. Mainly used
    in unit test to make FractionalMaxPool deterministic.
 *  <b>`seed`</b>: An optional `int`. Defaults to `0`.
    If either seed or seed2 are set to be non-zero, the random number
    generator is seeded by the given seed.  Otherwise, it is seeded by a
    random seed.
 *  <b>`seed2`</b>: An optional `int`. Defaults to `0`.
    An second seed to avoid seed collision.
 *  <b>`name`</b>: A name for the operation (optional).
 ##### Returns:
  A tuple of `Tensor` objects (output, row_pooling_sequence, col_pooling_sequence).
 *  <b>`output`</b>: A `Tensor`. Has the same type as `value`. output tensor after fractional max pooling.
 *  <b>`row_pooling_sequence`</b>: A `Tensor` of type `int64`. row pooling sequence, needed to calculate gradient.
 *  <b>`col_pooling_sequence`</b>: A `Tensor` of type `int64`. column pooling sequence, needed to calculate gradient.
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.fractional_avg_pool.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.fractional_avg_pool.md
@ -0,0 +1,57 @@
 ### `tf.nn.fractional_avg_pool(value, pooling_ratio, pseudo_random=None, overlapping=None, deterministic=None, seed=None, seed2=None, name=None)` {#fractional_avg_pool}
 Performs fractional average pooling on the input.
 Fractional average pooling is similar to Fractional max pooling in the pooling
 region generation step. The only difference is that after pooling regions are
 generated, a mean operation is performed instead of a max operation in each
 pooling region.
 ##### Args:
 *  <b>`value`</b>: A `Tensor`. Must be one of the following types: `float32`, `float64`, `int32`, `int64`.
    4-D with shape `[batch, height, width, channels]`.
 *  <b>`pooling_ratio`</b>: A list of `floats` that has length `>= 4`.
    Pooling ratio for each dimension of `value`, currently only
    supports row and col dimension and should be >= 1.0. For example, a valid
    pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
    must be 1.0 because we don't allow pooling on batch and channels
    dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
    respectively.
 *  <b>`pseudo_random`</b>: An optional `bool`. Defaults to `False`.
    When set to True, generates the pooling sequence in a
    pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
    Graham, Fractional Max-Pooling] (http://arxiv.org/abs/1412.6071) for
    difference between pseudorandom and random.
 *  <b>`overlapping`</b>: An optional `bool`. Defaults to `False`.
    When set to True, it means when pooling, the values at the boundary
    of adjacent pooling cells are used by both cells. For example:
    `index  0  1  2  3  4`
    `value  20 5  16 3  7`
    If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
    The result would be [41/3, 26/3] for fractional avg pooling.
 *  <b>`deterministic`</b>: An optional `bool`. Defaults to `False`.
    When set to True, a fixed pooling region will be used when
    iterating over a FractionalAvgPool node in the computation graph. Mainly used
    in unit test to make FractionalAvgPool deterministic.
 *  <b>`seed`</b>: An optional `int`. Defaults to `0`.
    If either seed or seed2 are set to be non-zero, the random number
    generator is seeded by the given seed.  Otherwise, it is seeded by a
    random seed.
 *  <b>`seed2`</b>: An optional `int`. Defaults to `0`.
    An second seed to avoid seed collision.
 *  <b>`name`</b>: A name for the operation (optional).
 ##### Returns:
  A tuple of `Tensor` objects (output, row_pooling_sequence, col_pooling_sequence).
 *  <b>`output`</b>: A `Tensor`. Has the same type as `value`. output tensor after fractional avg pooling.
 *  <b>`row_pooling_sequence`</b>: A `Tensor` of type `int64`. row pooling sequence, needed to calculate gradient.
 *  <b>`col_pooling_sequence`</b>: A `Tensor` of type `int64`. column pooling sequence, needed to calculate gradient.
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@ -473,6 +473,8 @@
  * [`embedding_lookup_sparse`](../../api_docs/python/nn.md#embedding_lookup_sparse)
  * [`erosion2d`](../../api_docs/python/nn.md#erosion2d)
  * [`fixed_unigram_candidate_sampler`](../../api_docs/python/nn.md#fixed_unigram_candidate_sampler)
  * [`fractional_avg_pool`](../../api_docs/python/nn.md#fractional_avg_pool)
  * [`fractional_max_pool`](../../api_docs/python/nn.md#fractional_max_pool)
  * [`in_top_k`](../../api_docs/python/nn.md#in_top_k)
  * [`l2_loss`](../../api_docs/python/nn.md#l2_loss)
  * [`l2_normalize`](../../api_docs/python/nn.md#l2_normalize)
--- a/tensorflow/g3doc/api_docs/python/nn.md
+++ b/tensorflow/g3doc/api_docs/python/nn.md
@ -828,6 +828,151 @@ Performs 3D max pooling on the input.
  A `Tensor`. Has the same type as `input`. The max pooled output tensor.
 - - -
 ### `tf.nn.fractional_avg_pool(value, pooling_ratio, pseudo_random=None, overlapping=None, deterministic=None, seed=None, seed2=None, name=None)` {#fractional_avg_pool}
 Performs fractional average pooling on the input.
 Fractional average pooling is similar to Fractional max pooling in the pooling
 region generation step. The only difference is that after pooling regions are
 generated, a mean operation is performed instead of a max operation in each
 pooling region.
 ##### Args:
 *  <b>`value`</b>: A `Tensor`. Must be one of the following types: `float32`, `float64`, `int32`, `int64`.
    4-D with shape `[batch, height, width, channels]`.
 *  <b>`pooling_ratio`</b>: A list of `floats` that has length `>= 4`.
    Pooling ratio for each dimension of `value`, currently only
    supports row and col dimension and should be >= 1.0. For example, a valid
    pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
    must be 1.0 because we don't allow pooling on batch and channels
    dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
    respectively.
 *  <b>`pseudo_random`</b>: An optional `bool`. Defaults to `False`.
    When set to True, generates the pooling sequence in a
    pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
    Graham, Fractional Max-Pooling] (http://arxiv.org/abs/1412.6071) for
    difference between pseudorandom and random.
 *  <b>`overlapping`</b>: An optional `bool`. Defaults to `False`.
    When set to True, it means when pooling, the values at the boundary
    of adjacent pooling cells are used by both cells. For example:
    `index  0  1  2  3  4`
    `value  20 5  16 3  7`
    If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
    The result would be [41/3, 26/3] for fractional avg pooling.
 *  <b>`deterministic`</b>: An optional `bool`. Defaults to `False`.
    When set to True, a fixed pooling region will be used when
    iterating over a FractionalAvgPool node in the computation graph. Mainly used
    in unit test to make FractionalAvgPool deterministic.
 *  <b>`seed`</b>: An optional `int`. Defaults to `0`.
    If either seed or seed2 are set to be non-zero, the random number
    generator is seeded by the given seed.  Otherwise, it is seeded by a
    random seed.
 *  <b>`seed2`</b>: An optional `int`. Defaults to `0`.
    An second seed to avoid seed collision.
 *  <b>`name`</b>: A name for the operation (optional).
 ##### Returns:
  A tuple of `Tensor` objects (output, row_pooling_sequence, col_pooling_sequence).
 *  <b>`output`</b>: A `Tensor`. Has the same type as `value`. output tensor after fractional avg pooling.
 *  <b>`row_pooling_sequence`</b>: A `Tensor` of type `int64`. row pooling sequence, needed to calculate gradient.
 *  <b>`col_pooling_sequence`</b>: A `Tensor` of type `int64`. column pooling sequence, needed to calculate gradient.
 - - -
 ### `tf.nn.fractional_max_pool(value, pooling_ratio, pseudo_random=None, overlapping=None, deterministic=None, seed=None, seed2=None, name=None)` {#fractional_max_pool}
 Performs fractional max pooling on the input.
 Fractional max pooling is slightly different than regular max pooling.  In
 regular max pooling, you downsize an input set by taking the maximum value of
 smaller N x N subsections of the set (often 2x2), and try to reduce the set by
 a factor of N, where N is an integer.  Fractional max pooling, as you might
 expect from the word "fractional", means that the overall reduction ratio N
 does not have to be an integer.
 The sizes of the pooling regions are generated randomly but are fairly uniform.
 For example, let's look at the height dimension, and the constraints on the
 list of rows that will be pool boundaries.
 First we define the following:
 1.  input_row_length : the number of rows from the input set
 2.  output_row_length : which will be smaller than the input
 3.  alpha = input_row_length / output_row_length : our reduction ratio
 4.  K = floor(alpha)
 5.  row_pooling_sequence : this is the result list of pool boundary rows
 Then, row_pooling_sequence should satisfy:
 1.  a[0] = 0 : the first value of the sequence is 0
 2.  a[end] = input_row_length : the last value of the sequence is the size
 3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size
 4.  length(row_pooling_sequence) = output_row_length+1
 For more details on fractional max pooling, see this paper:
 [Benjamin Graham, Fractional Max-Pooling]
 (http://arxiv.org/abs/1412.6071)
 ##### Args:
 *  <b>`value`</b>: A `Tensor`. Must be one of the following types: `float32`, `float64`, `int32`, `int64`.
    4-D with shape `[batch, height, width, channels]`.
 *  <b>`pooling_ratio`</b>: A list of `floats` that has length `>= 4`.
    Pooling ratio for each dimension of `value`, currently only
    supports row and col dimension and should be >= 1.0. For example, a valid
    pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
    must be 1.0 because we don't allow pooling on batch and channels
    dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
    respectively.
 *  <b>`pseudo_random`</b>: An optional `bool`. Defaults to `False`.
    When set to True, generates the pooling sequence in a
    pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
    Graham, Fractional Max-Pooling] (http://arxiv.org/abs/1412.6071) for
    difference between pseudorandom and random.
 *  <b>`overlapping`</b>: An optional `bool`. Defaults to `False`.
    When set to True, it means when pooling, the values at the boundary
    of adjacent pooling cells are used by both cells. For example:
    `index  0  1  2  3  4`
    `value  20 5  16 3  7`
    If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
    The result would be [20, 16] for fractional max pooling.
 *  <b>`deterministic`</b>: An optional `bool`. Defaults to `False`.
    When set to True, a fixed pooling region will be used when
    iterating over a FractionalMaxPool node in the computation graph. Mainly used
    in unit test to make FractionalMaxPool deterministic.
 *  <b>`seed`</b>: An optional `int`. Defaults to `0`.
    If either seed or seed2 are set to be non-zero, the random number
    generator is seeded by the given seed.  Otherwise, it is seeded by a
    random seed.
 *  <b>`seed2`</b>: An optional `int`. Defaults to `0`.
    An second seed to avoid seed collision.
 *  <b>`name`</b>: A name for the operation (optional).
 ##### Returns:
  A tuple of `Tensor` objects (output, row_pooling_sequence, col_pooling_sequence).
 *  <b>`output`</b>: A `Tensor`. Has the same type as `value`. output tensor after fractional max pooling.
 *  <b>`row_pooling_sequence`</b>: A `Tensor` of type `int64`. row pooling sequence, needed to calculate gradient.
 *  <b>`col_pooling_sequence`</b>: A `Tensor` of type `int64`. column pooling sequence, needed to calculate gradient.
 ## Morphological filtering
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@ -36,6 +36,8 @@ py_tests(
        "determinant_op_test.py",
        "edit_distance_op_test.py",
        "fifo_queue_test.py",
        "fractional_avg_pool_op_test.py",
        "fractional_max_pool_op_test.py",
        "identity_op_py_test.py",
        "in_topk_op_test.py",
        "io_ops_test.py",
--- a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py
@ -0,0 +1,521 @@
 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Tests for fractional average pool operation."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import math
 import numpy as np
 import tensorflow as tf
 from tensorflow.python.ops import gen_nn_ops
 class FractionalAvgTest(tf.test.TestCase):
  # Random number generate with seed.
  _PRNG = np.random.RandomState(341261000)
  _SEED = 341261001
  _SEED2 = 341261002
  def _AvgPoolAlongRows(self, input_matrix, row_seq, overlapping):
    """Perform average pool along row of a 2-D matrix based on row_seq.
    Args:
      input_matrix: A 2-D matrix.
      row_seq: Cumulative pooling sequence along row.
      overlapping: Whether or not use overlapping when pooling.
    Returns:
      A 2-D matrix, with
        * num_rows = len(row_seq)-1
        * num_cols = input_matrix.num_cols.
    """
    output_image = np.zeros(input_matrix.shape[1])
    row_max = row_seq[-1]
    for i in range(row_seq.shape[0] - 1):
      row_start = row_seq[i]
      row_end = row_seq[i + 1] + 1 if overlapping else row_seq[i + 1]
      row_end = min(row_end, row_max)
      output_image = np.vstack((output_image,
                                np.mean(input_matrix[row_start:row_end, :],
                                        axis=0)))  # axis 0 is along row
    # remove the sentinel row
    return output_image[1:, :]
  def _AvgPoolAlongCols(self, input_matrix, col_seq, overlapping):
    """Perform average pool along column of a 2-D matrix based on col_seq.
    Args:
      input_matrix: A 2-D matrix.
      col_seq: Cumulative pooling sequence along column.
      overlapping: Whether or not use overlapping when pooling.
    Returns:
      A 2-D matrix, with
        * num_rows = input_matrix.num_rows
        * num_cols = len(col_seq)-1.
    """
    input_matrix = input_matrix.transpose()
    output_matrix = self._AvgPoolAlongRows(input_matrix, col_seq, overlapping)
    return output_matrix.transpose()
  def _GetExpectedFractionalAvgPoolResult(self, input_tensor, row_seq, col_seq,
                                          overlapping):
    """Get expected fractional average pooling result.
    row_seq and col_seq together defines the fractional pooling region.
    Args:
      input_tensor: Original input tensor, assuming it is a 4-D tensor, with
        dimension as [batch, height/row, width/column, channels/depth].
      row_seq: Cumulative pooling sequence along row.
      col_seq: Cumulative pooling sequence along column.
      overlapping: Use overlapping when doing pooling.
    Returns:
      A 4-D tensor that is the result of average pooling on input_tensor based
        on pooling region defined by row_seq and col_seq, conditioned on whether
        or not overlapping is used.
    """
    input_shape = input_tensor.shape
    output_shape = (input_shape[0], len(row_seq) - 1, len(col_seq) - 1,
                    input_shape[3])
    output_tensor = np.zeros(shape=output_shape, dtype=input_tensor.dtype)
    for batch in range(input_shape[0]):
      for channel in range(input_shape[3]):
        two_dim_slice = input_tensor[batch, :, :, channel]
        tmp = self._AvgPoolAlongRows(two_dim_slice, row_seq, overlapping)
        output_tensor[batch, :, :, channel] = self._AvgPoolAlongCols(
            tmp, col_seq, overlapping)
    return output_tensor
  def _ValidateFractionalAvgPoolResult(self, input_tensor, pooling_ratio,
                                       pseudo_random, overlapping):
    """Validate FractionalAvgPool's result against expected.
    Expected result is computed given input_tensor, and pooling region defined
    by row_seq and col_seq.
    Args:
      input_tensor: A tensor or numpy ndarray.
      pooling_ratio: A list or tuple of length 4, first and last element be 1.
      pseudo_random: Use pseudo random method to generate pooling sequence.
      overlapping: Use overlapping when pooling.
    Returns:
      None
    """
    with self.test_session() as sess:
      p, r, c = tf.nn.fractional_avg_pool(input_tensor,
                                          pooling_ratio,
                                          pseudo_random,
                                          overlapping,
                                          deterministic=True,
                                          seed=self._SEED,
                                          seed2=self._SEED2)
      actual, row_seq, col_seq = sess.run([p, r, c])
      expected = self._GetExpectedFractionalAvgPoolResult(input_tensor, row_seq,
                                                          col_seq, overlapping)
      self.assertShapeEqual(expected, p)
      self.assertAllClose(expected, actual)
  def _testVisually(self):
    """Manual test by printing out intermediate result of a small random tensor.
    Since _GetExpectedFractionalAvgPoolResult is 'automated', it feels safer to
    have a test case that you can see what's happening.
    This test will generate a small, random, int 2D matrix, and feed it to
    FractionalAvgPool and _GetExpectedFractionalAvgPoolResult.
    """
    num_rows = 6
    num_cols = 6
    tensor_shape = (1, num_rows, num_cols, 1)
    pseudo_random = False
    for overlapping in True, False:
      print("-" * 70)
      print("Testing FractionalAvgPool with overlapping = {}".format(
          overlapping))
      rand_mat = self._PRNG.randint(10, size=tensor_shape)
      pooling_ratio = [1, math.sqrt(2), math.sqrt(2), 1]
      with self.test_session() as sess:
        p, r, c = tf.nn.fractional_avg_pool(
            rand_mat.astype(np.float32),
            pooling_ratio,
            pseudo_random,
            overlapping,
            deterministic=True,
            seed=self._SEED,
            seed2=self._SEED2)
        tensor_output, row_seq, col_seq = sess.run([p, r, c])
        expected_result = self._GetExpectedFractionalAvgPoolResult(
            rand_mat.astype(np.float32), row_seq, col_seq, overlapping)
        print("row sequence:")
        print(row_seq)
        print("column sequence:")
        print(col_seq)
        print("Input:")
        # Print input with pooling region marked.
        for i in range(num_rows):
          row_to_print = []
          for j in range(num_cols):
            if j in col_seq:
              row_to_print.append("|")
            row_to_print.append(str(rand_mat[0, i, j, 0]))
          row_to_print.append("|")
          if i in row_seq:
            print("-" * 2 * len(row_to_print))
          print(" ".join(row_to_print))
        print("-" * 2 * len(row_to_print))
        print("Output from FractionalAvgPool:")
        print(tensor_output[0, :, :, 0])
        print("Expected result:")
        print(expected_result[0, :, :, 0])
  def testAllInputOptions(self):
    """Try all possible input options for fractional_avg_pool.
    """
    num_batches = 5
    num_channels = 3
    num_rows = 20
    num_cols = 30
    for pseudo_random in True, False:
      for overlapping in True, False:
        tensor_shape = (num_batches, num_rows, num_cols, num_channels)
        # random tensor with value in [-500.0, 500.0)
        rand_mat = self._PRNG.random_sample(tensor_shape) * 1000 - 500
        self._ValidateFractionalAvgPoolResult(
            rand_mat, [1, math.sqrt(3), math.sqrt(2), 1], pseudo_random,
            overlapping)
  def testIntegerTensorInput(self):
    """Test FractionalAvgPool works fine when input tensor is integer type.
    I would have used _ValidateFractionalAvgPoolResult function to automate this
    process, however, there's rounding issue. It is caused by numpy.mean cast
    integer input to numpy.float64 for intermediate use. While for
    fractional_avg_pool, the mean operation is integer division (trucated).  So,
    for this test case, I will hard code a simple matrix.
    """
    pseudo_random = True
    overlapping = True
    tensor_shape = (1, 6, 6, 1)
    # pyformat: disable
    mat = np.array([
        [2, 6, 4, 1, 3, 6],
        [8, 9, 1, 6, 6, 8],
        [3, 9, 8, 2, 5, 6],
        [2, 7, 9, 5, 4, 5],
        [8, 5, 0, 5, 7, 4],
        [4, 4, 5, 9, 7, 2]
    ])
    # pyformat: enable
    with self.test_session() as sess:
      # Since deterministic = True, seed and seed2 are fixed. Therefore r, and c
      # are the same each time. We can have an expected result precomputed.
      # r = [0, 2, 4, 6]
      # c = [0, 1, 3, 4, 6]
      # pyformat: disable
      expected = np.array([
          [6, 5, 3, 5],
          [5, 5, 4, 5],
          [5, 4, 7, 5]
      ]).reshape((1, 3, 4, 1))
      # pyformat: enable
      p, unused_r, unused_c = tf.nn.fractional_avg_pool(
          mat.reshape(tensor_shape), [1, math.sqrt(3), math.sqrt(2), 1],
          pseudo_random,
          overlapping,
          deterministic=True,
          seed=self._SEED,
          seed2=self._SEED2)
      actual = sess.run(p)
      self.assertShapeEqual(expected, p)
      self.assertAllClose(expected, actual)
  def testDifferentTensorShapes(self):
    """Test different shapes of input tensor.
    Mainly test different combinations of num_rows and num_cols.
    """
    pseudo_random = True
    overlapping = True
    for num_batches in [1, 3]:
      for num_channels in [1, 3]:
        for num_rows in [10, 20, 50]:
          for num_cols in [10, 20, 50]:
            tensor_shape = (num_batches, num_rows, num_cols, num_channels)
            # random tensor with value in [-500.0, 500.0)
            rand_mat = self._PRNG.random_sample(tensor_shape) * 1000 - 500
            self._ValidateFractionalAvgPoolResult(
                rand_mat, [1, math.sqrt(3), math.sqrt(2), 1], pseudo_random,
                overlapping)
  def testLargePoolingRatio(self):
    """Test when pooling ratio is not within [1, 2).
    """
    pseudo_random = True
    overlapping = True
    num_batches = 3
    num_channels = 3
    num_rows = 30
    num_cols = 50
    tensor_shape = (num_batches, num_rows, num_cols, num_channels)
    for row_ratio in [math.sqrt(11), math.sqrt(37)]:
      for col_ratio in [math.sqrt(11), math.sqrt(27)]:
        # random tensor with value in [-500.0, 500.0)
        rand_mat = self._PRNG.random_sample(tensor_shape) * 1000 - 500
        self._ValidateFractionalAvgPoolResult(rand_mat,
                                              [1, row_ratio, col_ratio, 1],
                                              pseudo_random, overlapping)
  def testDivisiblePoolingRatio(self):
    """Test when num of rows/cols can evenly divide pooling ratio.
    This is a case regular average pooling can handle. Should be handled by
    fractional pooling as well.
    """
    pseudo_random = True
    overlapping = True
    num_batches = 3
    num_channels = 3
    num_rows = 30
    num_cols = 50
    tensor_shape = (num_batches, num_rows, num_cols, num_channels)
    # random tensor with value in [-500.0, 500.0)
    rand_mat = self._PRNG.random_sample(tensor_shape) * 1000 - 500
    self._ValidateFractionalAvgPoolResult(rand_mat, [1, 2, 2, 1], pseudo_random,
                                          overlapping)
 class FractionalAvgPoolGradTest(tf.test.TestCase):
  """Tests for FractionalAvgPoolGrad.
  Two types of tests for FractionalAvgPoolGrad.
  1) Test fractional_avg_pool_grad() directly.
    This type of test relies on gen_nn_ops._avg_pool_grad() returns the
  correct result. For example:
    * input_tensor_shape = (1, 10, 10, 1)
    * window_size = (1, 2, 2, 1)
    * stride_size = (1, 2, 2, 1)
    * padding: not really important, since 10/2 is divisible
  avg pooling should generate the same result as fractional avg pooling with:
    * row_sequence = [0, 2, 4, 6, 8, 10]
    * col_sequence = [0, 2, 4, 6, 8, 10]
    * overlapping = False
  This also means their gradients in such case will be the same.
  Similarly, when
    * input_tensor_shape = (1, 7, 7, 1)
    * window_size = (1, 3, 3, 1)
    * stride_size = (1, 2, 2, 1)
    * padding: not important
  avg pooling should generate the same result as fractional avg pooling with:
    * row_sequence = [0, 2, 4, 7]
    * col_sequence = [0, 2, 4, 7]
    * overlapping = True
  2) Test through compute_gradient_error()
  """
  _PRNG = np.random.RandomState(341261004)
  _SEED = 341261005
  _SEED2 = 341261006
  def _GenerateRandomInputTensor(self, shape):
    num_elements = 1
    for dim_size in shape:
      num_elements *= dim_size
    x = self._PRNG.rand(num_elements) * 1000
    return x.reshape(shape)
  def testDirectNotUseOverlapping(self):
    for num_batches in [1, 3]:
      for row_window_size in [2, 5]:
        for col_window_size in [2, 4]:
          num_rows = row_window_size * 5
          num_cols = col_window_size * 7
          for num_channels in [1, 2]:
            input_shape = (num_batches, num_rows, num_cols, num_channels)
            with self.test_session() as _:
              input_tensor = tf.constant(self._GenerateRandomInputTensor(
                  input_shape).astype(np.float32))
              window_size = [1, row_window_size, col_window_size, 1]
              stride_size = [1, row_window_size, col_window_size, 1]
              padding = "VALID"
              output_tensor = tf.nn.avg_pool(input_tensor, window_size,
                                             stride_size, padding)
              output_data = output_tensor.eval()
              num_elements = 1
              for dim_size in output_data.shape:
                num_elements *= dim_size
              output_backprop = (self._PRNG.rand(num_elements) *
                                 1000).reshape(output_data.shape)
              input_backprop_tensor = gen_nn_ops._avg_pool_grad(
                  input_tensor.get_shape(), output_backprop, window_size,
                  stride_size, padding)
              input_backprop = input_backprop_tensor.eval()
              row_seq = list(range(0, num_rows + 1, row_window_size))
              col_seq = list(range(0, num_cols + 1, col_window_size))
              fap_input_backprop_tensor = gen_nn_ops._fractional_avg_pool_grad(
                  input_tensor.get_shape(),
                  output_backprop,
                  row_seq,
                  col_seq,
                  overlapping=False)
              fap_input_backprop = fap_input_backprop_tensor.eval()
              self.assertShapeEqual(input_backprop, fap_input_backprop_tensor)
              self.assertAllClose(input_backprop, fap_input_backprop)
  def testDirectUseOverlapping(self):
    for num_batches in [1, 3]:
      for row_window_size in [2, 5]:
        for col_window_size in [2, 4]:
          num_rows = (row_window_size - 1) * 5 + 1
          num_cols = (col_window_size - 1) * 7 + 1
          for num_channels in [1, 2]:
            input_shape = (num_batches, num_rows, num_cols, num_channels)
            with self.test_session() as _:
              input_tensor = tf.constant(self._GenerateRandomInputTensor(
                  input_shape).astype(np.float32))
              window_size = [1, row_window_size, col_window_size, 1]
              stride_size = [1, row_window_size - 1, col_window_size - 1, 1]
              padding = "VALID"
              output_tensor = tf.nn.avg_pool(input_tensor, window_size,
                                             stride_size, padding)
              output_data = output_tensor.eval()
              num_elements = 1
              for dim_size in output_data.shape:
                num_elements *= dim_size
              output_backprop = (self._PRNG.rand(num_elements) *
                                 1000).reshape(output_data.shape)
              input_backprop_tensor = gen_nn_ops._avg_pool_grad(
                  input_tensor.get_shape(), output_backprop, window_size,
                  stride_size, padding)
              input_backprop = input_backprop_tensor.eval()
              row_seq = list(range(0, num_rows, row_window_size - 1))
              col_seq = list(range(0, num_cols, col_window_size - 1))
              row_seq[-1] += 1
              col_seq[-1] += 1
              fap_input_backprop_tensor = gen_nn_ops._fractional_avg_pool_grad(
                  input_tensor.get_shape(),
                  output_backprop,
                  row_seq,
                  col_seq,
                  overlapping=True)
              fap_input_backprop = fap_input_backprop_tensor.eval()
              self.assertShapeEqual(input_backprop, fap_input_backprop_tensor)
              self.assertAllClose(input_backprop, fap_input_backprop)
  def testAllInputOptionsThroughGradientError(self):
    input_shape = (1, 7, 13, 1)
    input_data = self._GenerateRandomInputTensor(input_shape)
    pooling_ratio = [1, math.sqrt(2), math.sqrt(3), 1]
    for pseudo_random in True, False:
      for overlapping in True, False:
        with self.test_session() as _:
          input_tensor = tf.constant(input_data, shape=input_shape)
          output_tensor, unused_a, unused_b = tf.nn.fractional_avg_pool(
              input_tensor,
              pooling_ratio,
              pseudo_random=pseudo_random,
              overlapping=overlapping,
              deterministic=True,
              seed=self._SEED,
              seed2=self._SEED2)
          output_data = output_tensor.eval()
          output_shape = output_data.shape
          # error_margin and delta setting is similar to avg_pool_grad.
          error_margin = 1e-4
          gradient_error = tf.test.compute_gradient_error(
              input_tensor,
              input_shape,
              output_tensor,
              output_shape,
              x_init_value=input_data.reshape(input_shape),
              delta=1e-2)
          self.assertLess(gradient_error, error_margin)
  def testDifferentTensorShapesThroughGradientError(self):
    pseudo_random = True
    overlapping = True
    pooling_ratio = [1, math.sqrt(3), math.sqrt(2), 1]
    for num_batches in [1, 2]:
      for num_rows in [5, 13]:
        for num_cols in [5, 11]:
          for num_channels in [1, 3]:
            input_shape = (num_batches, num_rows, num_cols, num_channels)
            input_data = self._GenerateRandomInputTensor(input_shape)
            with self.test_session() as _:
              input_tensor = tf.constant(input_data, shape=input_shape)
              output_tensor, unused_a, unused_b = tf.nn.fractional_avg_pool(
                  input_tensor,
                  pooling_ratio,
                  pseudo_random=pseudo_random,
                  overlapping=overlapping,
                  deterministic=True,
                  seed=self._SEED,
                  seed2=self._SEED2)
              output_data = output_tensor.eval()
              output_shape = output_data.shape
              # error_margin and delta setting is similar to avg_pool_grad.
              error_margin = 1e-4
              gradient_error = tf.test.compute_gradient_error(
                  input_tensor,
                  input_shape,
                  output_tensor,
                  output_shape,
                  x_init_value=input_data.reshape(input_shape),
                  delta=1e-2)
              self.assertLess(gradient_error, error_margin)
  def testLargePoolingRatioThroughGradientError(self):
    input_shape = (1, 17, 23, 1)
    input_data = self._GenerateRandomInputTensor(input_shape)
    pooling_ratio = (1, math.sqrt(13), math.sqrt(7), 1)
    output_shape = [int(a / b) for a, b in zip(input_shape, pooling_ratio)]
    overlapping = True
    pseudo_random = False
    with self.test_session() as _:
      input_tensor = tf.constant(input_data, shape=input_shape)
      output_tensor, unused_a, unused_b = tf.nn.fractional_avg_pool(
          input_tensor,
          pooling_ratio,
          pseudo_random=pseudo_random,
          overlapping=overlapping,
          deterministic=True,
          seed=self._SEED,
          seed2=self._SEED2)
      # error_margin and delta setting is similar to avg_pool_grad.
      error_margin = 1e-4
      gradient_error = tf.test.compute_gradient_error(
          input_tensor,
          input_shape,
          output_tensor,
          output_shape,
          x_init_value=input_data.reshape(input_shape),
          delta=1e-2)
      self.assertLess(gradient_error, error_margin)
 if __name__ == "__main__":
  tf.test.main()
--- a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py
@ -0,0 +1,582 @@
 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Tests for fractional max pool operation."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import math
 import numpy as np
 import tensorflow as tf
 from tensorflow.python.ops import gen_nn_ops
 class FractionalMaxPoolTest(tf.test.TestCase):
  # Random number generate with seed.
  _PRNG = np.random.RandomState(341261)
  _SEED = 123456
  _SEED2 = 654321
  def _MaxPoolAlongRows(self, input_matrix, row_seq, overlapping):
    """Perform max pool along row of a 2-D matrix based on row_seq.
    Args:
      input_matrix: A 2-D matrix.
      row_seq: Cumulative pooling sequence along row.
      overlapping: Whether or not use overlapping when pooling.
    Returns:
      A 2-D matrix, with
        * num_rows = len(row_seq)-1
        * num_cols = input_matrix.num_cols.
    """
    output_image = np.zeros(input_matrix.shape[1])
    row_max = row_seq[-1]
    for i in range(row_seq.shape[0] - 1):
      row_start = row_seq[i]
      row_end = row_seq[i + 1] + 1 if overlapping else row_seq[i + 1]
      row_end = min(row_end, row_max)
      output_image = np.vstack((output_image,
                                np.amax(input_matrix[row_start:row_end, :],
                                        axis=0)))  # axis 0 is along row
    # remove the sentinel row
    return output_image[1:, :]
  def _MaxPoolAlongCols(self, input_matrix, col_seq, overlapping):
    """Perform max pool along column of a 2-D matrix based on col_seq.
    Args:
      input_matrix: A 2-D matrix.
      col_seq: Cumulative pooling sequence along column.
      overlapping: Whether or not use overlapping when pooling.
    Returns:
      A 2-D matrix, with
        * num_rows = input_matrix.num_rows
        * num_cols = len(col_seq)-1.
    """
    input_matrix = input_matrix.transpose()
    output_matrix = self._MaxPoolAlongRows(input_matrix, col_seq, overlapping)
    return output_matrix.transpose()
  def _GetExpectedFractionalMaxPoolResult(self, input_tensor, row_seq, col_seq,
                                          overlapping):
    """Get expected fractional max pool result.
    row_seq and col_seq together defines the fractional pooling region.
    Args:
      input_tensor: Original input tensor, assuming it is a 4-D tensor, with
        dimension as [batch, height/row, width/column, channels/depth].
      row_seq: Cumulative pooling sequence along row.
      col_seq: Cumulative pooling sequence along column.
      overlapping: Use overlapping when doing pooling.
    Returns:
      A 4-D tensor that is the result of max pooling on input_tensor based on
        pooling region defined by row_seq and col_seq, conditioned on whether or
        not overlapping is used.
    """
    input_shape = input_tensor.shape
    output_shape = (input_shape[0], len(row_seq) - 1, len(col_seq) - 1,
                    input_shape[3])
    output_tensor = np.zeros(shape=output_shape, dtype=input_tensor.dtype)
    for batch in range(input_shape[0]):
      for channel in range(input_shape[3]):
        two_dim_slice = input_tensor[batch, :, :, channel]
        tmp = self._MaxPoolAlongRows(two_dim_slice, row_seq, overlapping)
        output_tensor[batch, :, :, channel] = self._MaxPoolAlongCols(
            tmp, col_seq, overlapping)
    return output_tensor
  def _ValidateFractionalMaxPoolResult(self, input_tensor, pooling_ratio,
                                       pseudo_random, overlapping):
    """Validate FractionalMaxPool's result against expected.
    Expected result is computed given input_tensor, and pooling region defined
    by row_seq and col_seq.
    Args:
      input_tensor: A tensor or numpy ndarray.
      pooling_ratio: A list or tuple of length 4, first and last element be 1.
      pseudo_random: Use pseudo random method to generate pooling sequence.
      overlapping: Use overlapping when pooling.
    Returns:
      None
    """
    with self.test_session() as sess:
      p, r, c = tf.nn.fractional_max_pool(input_tensor,
                                          pooling_ratio,
                                          pseudo_random,
                                          overlapping,
                                          deterministic=True,
                                          seed=self._SEED,
                                          seed2=self._SEED2)
      actual, row_seq, col_seq = sess.run([p, r, c])
      expected = self._GetExpectedFractionalMaxPoolResult(input_tensor, row_seq,
                                                          col_seq, overlapping)
      self.assertShapeEqual(expected, p)
      self.assertAllClose(expected, actual)
  def _testVisually(self):
    """Manual test by printing out intermediate result of a small random tensor.
    Since _GetExpectedFractionalMaxPoolResult is 'automated', it feel safer to
    have a test case that you can see what's happening.
    This test will generate a small, random, int 2D matrix, and feed it to
    FractinalMaxPool and _GetExpectedFractionalMaxPoolResult.
    """
    num_rows = 6
    num_cols = 6
    tensor_shape = (1, num_rows, num_cols, 1)
    pseudo_random = False
    for overlapping in True, False:
      print("-" * 70)
      print("Testing FractionalMaxPool with overlapping = {}".format(
          overlapping))
      rand_mat = self._PRNG.randint(10, size=tensor_shape)
      pooling_ratio = [1, math.sqrt(2), math.sqrt(2), 1]
      with self.test_session() as sess:
        p, r, c = tf.nn.fractional_max_pool(rand_mat,
                                            pooling_ratio,
                                            pseudo_random,
                                            overlapping,
                                            deterministic=True,
                                            seed=self._SEED,
                                            seed2=self._SEED2)
        tensor_output, row_seq, col_seq = sess.run([p, r, c])
        expected_result = self._GetExpectedFractionalMaxPoolResult(rand_mat,
                                                                   row_seq,
                                                                   col_seq,
                                                                   overlapping)
        print("row sequence:")
        print(row_seq)
        print("column sequence:")
        print(col_seq)
        print("Input:")
        # Print input with pooling region marked.
        for i in range(num_rows):
          row_to_print = []
          for j in range(num_cols):
            if j in col_seq:
              row_to_print.append("|")
            row_to_print.append(str(rand_mat[0, i, j, 0]))
          row_to_print.append("|")
          if i in row_seq:
            print("-" * 2 * len(row_to_print))
          print(" ".join(row_to_print))
        print("-" * 2 * len(row_to_print))
        print("Output from FractionalMaxPool:")
        print(tensor_output[0, :, :, 0])
        print("Expected result:")
        print(expected_result[0, :, :, 0])
  def testAllInputOptions(self):
    """Try all possible input options for fractional_max_pool.
    """
    num_batches = 5
    num_channels = 3
    num_rows = 20
    num_cols = 30
    for pseudo_random in True, False:
      for overlapping in True, False:
        tensor_shape = (num_batches, num_rows, num_cols, num_channels)
        # random tensor with value in [-500.0, 500.0)
        rand_mat = self._PRNG.random_sample(tensor_shape) * 1000 - 500
        self._ValidateFractionalMaxPoolResult(
            rand_mat, [1, math.sqrt(3), math.sqrt(2), 1], pseudo_random,
            overlapping)
  def testIntegerTensorInput(self):
    """Test it works fine when input tensor is integer type.
    """
    num_batches = 5
    num_channels = 3
    num_rows = 20
    num_cols = 30
    pseudo_random = True
    overlapping = True
    tensor_shape = (num_batches, num_rows, num_cols, num_channels)
    rand_mat = self._PRNG.randint(1000, size=tensor_shape)
    self._ValidateFractionalMaxPoolResult(rand_mat,
                                          [1, math.sqrt(3), math.sqrt(2), 1],
                                          pseudo_random, overlapping)
  def testDifferentTensorShapes(self):
    """Test different shapes of input tensor.
    Mainly test different combinations of num_rows and num_cols.
    """
    pseudo_random = True
    overlapping = True
    for num_batches in [1, 3]:
      for num_channels in [1, 3]:
        for num_rows in [10, 20, 50]:
          for num_cols in [10, 20, 50]:
            tensor_shape = (num_batches, num_rows, num_cols, num_channels)
            # random tensor with value in [-500.0, 500.0)
            rand_mat = self._PRNG.random_sample(tensor_shape) * 1000 - 500
            self._ValidateFractionalMaxPoolResult(
                rand_mat, [1, math.sqrt(3), math.sqrt(2), 1], pseudo_random,
                overlapping)
  def testLargePoolingRatio(self):
    """Test when pooling ratio is not within [1, 2).
    """
    pseudo_random = True
    overlapping = True
    num_batches = 3
    num_channels = 3
    num_rows = 30
    num_cols = 50
    tensor_shape = (num_batches, num_rows, num_cols, num_channels)
    for row_ratio in [math.sqrt(11), math.sqrt(37)]:
      for col_ratio in [math.sqrt(11), math.sqrt(27)]:
        # random tensor with value in [-500.0, 500.0)
        rand_mat = self._PRNG.random_sample(tensor_shape) * 1000 - 500
        self._ValidateFractionalMaxPoolResult(rand_mat,
                                              [1, row_ratio, col_ratio, 1],
                                              pseudo_random, overlapping)
  def testDivisiblePoolingRatio(self):
    """Test when num of rows/cols can evenly divide pooling ratio.
    This is a case regular max pooling can handle. Should be handled by
    fractional pooling as well.
    """
    pseudo_random = True
    overlapping = True
    num_batches = 3
    num_channels = 3
    num_rows = 30
    num_cols = 50
    tensor_shape = (num_batches, num_rows, num_cols, num_channels)
    # random tensor with value in [-500.0, 500.0)
    rand_mat = self._PRNG.random_sample(tensor_shape) * 1000 - 500
    self._ValidateFractionalMaxPoolResult(rand_mat, [1, 2, 2, 1], pseudo_random,
                                          overlapping)
 class FractionalMaxPoolGradTest(tf.test.TestCase):
  """Tests for FractionalMaxPoolGrad.
  Two types of tests for FractionalMaxPoolGrad.
  1) Test fractional_max_pool_grad() directly.
    This type of test relies on gen_nn_ops._max_pool_grad() returns the correct
  result. For example:
    * input_tensor_shape = (1, 10, 10, 1)
    * window_size = (1, 2, 2, 1)
    * stride_size = (1, 2, 2, 1)
    * padding: not really import, since 10/2 is divisible
  max pooling should generate the same result as fractional max pooling with:
    * row_sequence = [0, 2, 4, 6, 8, 10]
    * col_sequence = [0, 2, 4, 6, 8, 10]
    * overlapping = False
  This also means their gradients in such case will be the same.
    Similarly, when
    * input_tensor_shape = (1, 7, 7, 1)
    * window_size = (1, 3, 3, 1)
    * stride_size = (1, 2, 2, 1)
    * padding: not important
  max pooling should generate the same result as fractional max pooling with:
    * row_sequence = [0, 2, 4, 7]
    * col_sequence = [0, 2, 4, 7]
    * overlapping = True
  2) Test through compute_gradient_error()
  """
  _PRNG = np.random.RandomState(341261)
  _SEED = 123456
  _SEED2 = 654321
  def _GenerateUniqueRandomInputTensor(self, shape):
    """Generate 'unqiue' random input tensor.
    'Unique' means there's no collision values in the tensor, all elements are
    different. This is done by generating sequence of integers with step of 1
    and then randomly shuffle these integers.
    Args:
      shape: Shape of the tensor desired.
    Returns:
      A numpy ndarray with size = shape and dtype = numpy.float32.
    """
    num_elements = 1
    for size in shape:
      num_elements *= size
    x = np.arange(num_elements, dtype=np.float32)
    self._PRNG.shuffle(x)
    return x.reshape(shape)
  def testDirectNotUseOverlapping(self):
    for num_batches in [1, 3]:
      for row_window_size in [2, 5]:
        for col_window_size in [2, 4]:
          num_rows = row_window_size * 5
          num_cols = col_window_size * 7
          for num_channels in [1, 2]:
            input_shape = (num_batches, num_rows, num_cols, num_channels)
            with self.test_session() as _:
              input_tensor = tf.constant(self._GenerateUniqueRandomInputTensor(
                  input_shape))
              window_size = [1, row_window_size, col_window_size, 1]
              stride_size = [1, row_window_size, col_window_size, 1]
              padding = "VALID"
              output_tensor = tf.nn.max_pool(input_tensor, window_size,
                                             stride_size, padding)
              output_data = output_tensor.eval()
              output_backprop = self._PRNG.randint(100, size=output_data.shape)
              input_backprop_tensor = gen_nn_ops._max_pool_grad(input_tensor,
                                                                output_tensor,
                                                                output_backprop,
                                                                window_size,
                                                                stride_size,
                                                                padding)
              input_backprop = input_backprop_tensor.eval()
              row_seq = list(range(0, num_rows + 1, row_window_size))
              col_seq = list(range(0, num_cols + 1, col_window_size))
              fmp_input_backprop_tensor = gen_nn_ops._fractional_max_pool_grad(
                  input_tensor,
                  output_tensor,
                  output_backprop,
                  row_seq,
                  col_seq,
                  overlapping=False)
              fmp_input_backprop = fmp_input_backprop_tensor.eval()
              self.assertShapeEqual(input_backprop, fmp_input_backprop_tensor)
              self.assertAllClose(input_backprop, fmp_input_backprop)
  def testDirectUseOverlapping(self):
    for num_batches in [1, 3]:
      for row_window_size in [2, 5]:
        for col_window_size in [2, 4]:
          num_rows = (row_window_size - 1) * 5 + 1
          num_cols = (col_window_size - 1) * 7 + 1
          for num_channels in [1, 2]:
            input_shape = (num_batches, num_rows, num_cols, num_channels)
            with self.test_session() as _:
              input_tensor = tf.constant(self._GenerateUniqueRandomInputTensor(
                  input_shape))
              window_size = [1, row_window_size, col_window_size, 1]
              stride_size = [1, row_window_size - 1, col_window_size - 1, 1]
              padding = "VALID"
              output_tensor = tf.nn.max_pool(input_tensor, window_size,
                                             stride_size, padding)
              output_data = output_tensor.eval()
              output_backprop = self._PRNG.randint(100, size=output_data.shape)
              input_backprop_tensor = gen_nn_ops._max_pool_grad(input_tensor,
                                                                output_tensor,
                                                                output_backprop,
                                                                window_size,
                                                                stride_size,
                                                                padding)
              input_backprop = input_backprop_tensor.eval()
              row_seq = list(range(0, num_rows, row_window_size - 1))
              col_seq = list(range(0, num_cols, col_window_size - 1))
              row_seq[-1] += 1
              col_seq[-1] += 1
              fmp_input_backprop_tensor = gen_nn_ops._fractional_max_pool_grad(
                  input_tensor,
                  output_tensor,
                  output_backprop,
                  row_seq,
                  col_seq,
                  overlapping=True)
              fmp_input_backprop = fmp_input_backprop_tensor.eval()
              self.assertShapeEqual(input_backprop, fmp_input_backprop_tensor)
              self.assertAllClose(input_backprop, fmp_input_backprop)
  def testAllInputOptionsThroughGradientError(self):
    input_shape = (1, 7, 13, 1)
    input_data = self._GenerateUniqueRandomInputTensor(input_shape)
    # Add some randomness to make input_data not so 'integer'
    input_data += self._PRNG.random_sample(input_shape)
    pooling_ratio = [1, math.sqrt(2), math.sqrt(3), 1]
    for pseudo_random in True, False:
      for overlapping in True, False:
        with self.test_session() as _:
          input_tensor = tf.constant(input_data, shape=input_shape)
          output_tensor, unused_a, unused_b = tf.nn.fractional_max_pool(
              input_tensor,
              pooling_ratio,
              pseudo_random=pseudo_random,
              overlapping=overlapping,
              deterministic=True,
              seed=self._SEED,
              seed2=self._SEED2)
          output_data = output_tensor.eval()
          output_shape = output_data.shape
          # error_margin and delta setting is similar to max_pool_grad.
          error_margin = 1e-3
          gradient_error = tf.test.compute_gradient_error(
              input_tensor,
              input_shape,
              output_tensor,
              output_shape,
              x_init_value=input_data.reshape(input_shape),
              delta=1e-2)
          self.assertLess(gradient_error, error_margin)
  def testDifferentTensorShapesThroughGradientError(self):
    pseudo_random = True
    overlapping = True
    pooling_ratio = [1, math.sqrt(3), math.sqrt(2), 1]
    for num_batches in [1, 2]:
      for num_rows in [5, 13]:
        for num_cols in [5, 11]:
          for num_channels in [1, 3]:
            input_shape = (num_batches, num_rows, num_cols, num_channels)
            input_data = self._GenerateUniqueRandomInputTensor(input_shape)
            # Add some randomness to make input_data not so 'integer'
            input_data += self._PRNG.random_sample(input_shape)
            with self.test_session() as _:
              input_tensor = tf.constant(input_data, shape=input_shape)
              output_tensor, unused_a, unused_b = tf.nn.fractional_max_pool(
                  input_tensor,
                  pooling_ratio,
                  pseudo_random=pseudo_random,
                  overlapping=overlapping,
                  deterministic=True,
                  seed=self._SEED,
                  seed2=self._SEED2)
              output_data = output_tensor.eval()
              output_shape = output_data.shape
              # error_margin and delta setting is similar to max_pool_grad.
              error_margin = 1e-3
              gradient_error = tf.test.compute_gradient_error(
                  input_tensor,
                  input_shape,
                  output_tensor,
                  output_shape,
                  x_init_value=input_data.reshape(input_shape),
                  delta=1e-2)
              self.assertLess(gradient_error, error_margin)
  def testLargePoolingRatioThroughGradientError(self):
    input_shape = (1, 17, 23, 1)
    input_data = self._GenerateUniqueRandomInputTensor(input_shape)
    # Add some randomness to make input_data not so 'integer'
    input_data += self._PRNG.random_sample(input_shape)
    pooling_ratio = (1, math.sqrt(13), math.sqrt(7), 1)
    output_shape = [int(a / b) for a, b in zip(input_shape, pooling_ratio)]
    overlapping = True
    pseudo_random = False
    with self.test_session() as _:
      input_tensor = tf.constant(input_data, shape=input_shape)
      output_tensor, unused_a, unused_b = tf.nn.fractional_max_pool(
          input_tensor,
          pooling_ratio,
          pseudo_random=pseudo_random,
          overlapping=overlapping,
          deterministic=True,
          seed=self._SEED,
          seed2=self._SEED2)
      # error_margin and delta setting is similar to max_pool_grad.
      error_margin = 1e-3
      gradient_error = tf.test.compute_gradient_error(
          input_tensor,
          input_shape,
          output_tensor,
          output_shape,
          x_init_value=input_data.reshape(input_shape),
          delta=1e-2)
      self.assertLess(gradient_error, error_margin)
  def testWhenRepeatedMaxValueInPoolingRegion(self):
    """Test when there's repeating value in pooling region.
    There's no formal definition for what the gradient should be when there're
    multiple max value within a pooling cell. Such as
        | 1 5 |
        | 5 3 |
    The expected result depends heavily on implementation, if someone swap the
    order of a nested for loop when walking through the tensor, result would be
    very different.
    The goal of this test is to alert when someone else change the
    implementation. Current implementation scans row-by-row.
    """
    input_data = [5.0, 4.0, 6.0, 7.0,
                  3.0, 5.0, 9.0, 6.0,
                  8.0, 8.0, 9.0, 5.0,
                  7.0, 4.0, 0.0, 0.0]  # pyformat: disable
    input_size = [1, 4, 4, 1]
    output_backprop = [12.0, 15.0,
                       17.0, -5.0,
                       6.0, 21.0]  # pyformat: disable
    row_seq = [0, 1, 3, 4]
    col_seq = [0, 2, 4]
    output_data_not_overlapping = [5.0, 7.0,
                                   8.0, 9.0,
                                   7.0, 0.0]  # pyformat: disable
    output_data_overlapping = [9.0, 9.0,
                               9.0, 9.0,
                               7.0, 0.0]  # pyformat: disable
    output_size = [1, 3, 2, 1]
    expected_input_backprop_not_overlapping = np.reshape(
        [12.0, 0.0, 0.0, 15.0,
         0.0, 0.0, -5.0, 0.0,
         17.0, 0.0, 0.0, 0.0,
         6.0, 0.0, 21.0, 0.0],
        input_size)  # pyformat: disable
    expected_input_backprop_overlapping = np.reshape(
        [0.0, 0.0, 0.0, 0.0,
         0.0, 0.0, 39.0, 0.0,
         0.0, 0.0, 0.0, 0.0,
         6.0, 0.0, 21.0, 0.0],
        input_size)  # pyformat: disable
    with self.test_session() as _:
      # Test when overlapping is False
      input_tensor = tf.constant(input_data, shape=input_size)
      output_tensor = tf.constant(output_data_not_overlapping,
                                  shape=output_size)
      grad = tf.constant(output_backprop, shape=output_size)
      r = gen_nn_ops._fractional_max_pool_grad(
          input_tensor,
          output_tensor,
          grad,
          row_seq,
          col_seq,
          overlapping=False)
      input_backprop_not_overlapping = r.eval()
      self.assertShapeEqual(
          np.reshape(expected_input_backprop_not_overlapping, input_size), r)
      self.assertAllClose(expected_input_backprop_not_overlapping,
                          input_backprop_not_overlapping)
      # Test when overlapping is True
      output_tensor = tf.constant(output_data_overlapping, shape=output_size)
      r = gen_nn_ops._fractional_max_pool_grad(
          input_tensor, output_tensor, grad, row_seq, col_seq, overlapping=True)
      input_backprop_overlapping = r.eval()
      self.assertShapeEqual(
          np.reshape(expected_input_backprop_overlapping, input_size), r)
      self.assertAllClose(expected_input_backprop_overlapping,
                          input_backprop_overlapping)
 if __name__ == "__main__":
  tf.test.main()
--- a/tensorflow/python/ops/hidden_ops.txt
+++ b/tensorflow/python/ops/hidden_ops.txt
@ -181,6 +181,8 @@ AvgPool
 MaxPool
 Softmax
 LogSoftmax
 FractionalAvgPoolGrad
 FractionalMaxPoolGrad
 # parsing_ops
 ParseExample
--- a/tensorflow/python/ops/nn.py
+++ b/tensorflow/python/ops/nn.py
@ -132,6 +132,8 @@ to the `Convolution` section for details about the padding calculation.
@@max_pool_with_argmax
@@avg_pool3d
@@max_pool3d
@@fractional_avg_pool
@@fractional_max_pool
 ## Morphological filtering
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@ -361,6 +361,53 @@ def _MaxPoolGrad(op, grad):
                                   data_format=op.get_attr("data_format"))
@ops.RegisterGradient("FractionalMaxPool")
 def _FractionalMaxPoolGrad(op, grad_0, unused_grad_1, unused_grad_2):
  """Returns gradient for FractionalMaxPool.
  Since FractionalMaxPool has three outputs, there are three gradients passed in
  for each of the outputs. Only the first one is useful, the other two gradients
  are empty.
  Args:
    op: The FractionalMaxPoolOp.
    grad_0: Gradient with respect to op.outputs[0]
    unused_grad_1: Gradient with respect to op.outputs[1]/row_seq. It is empty.
    unused_grad_2: Gradient with respect to op.outputs[2]/col_seq. It is empty.
  Returns:
    Input backprop for FractionalMaxPool op.
  """
  # pylint: disable=protected-access
  return gen_nn_ops._fractional_max_pool_grad(op.inputs[0], op.outputs[0],
                                              grad_0, op.outputs[1],
                                              op.outputs[2],
                                              op.get_attr("overlapping"))
@ops.RegisterGradient("FractionalAvgPool")
 def _FractionalAvgPoolGrad(op, grad_0, unused_grad_1, unused_grad_2):
  """Returns gradient for FractionalAvgPool.
  Since FractionalAvgPool has three outputs, there are three gradients passed in
  for each of the outputs. Only the first one is useful, the other two gradients
  are empty.
  Args:
    op: The FractionalAvgPoolOp.
    grad_0: Gradient with respect to op.outputs[0]
    unused_grad_1: Gradient with respect to op.outputs[1]/row_seq. It is empty.
    unused_grad_2: Gradient with respect to op.outputs[2]/col_seq. It is empty.
  Returns:
    Input backprop for FractionalAvgPool op.
  """
  # pylint: disable=protected-access
  return gen_nn_ops._fractional_avg_pool_grad(op.inputs[0].get_shape(), grad_0,
                                              op.outputs[1], op.outputs[2],
                                              op.get_attr("overlapping"))
@ops.RegisterGradient("BatchNormWithGlobalNormalization")
 def _BatchNormWithGlobalNormalizationGrad(op, grad):
  """Return the gradients for the 5 inputs of BatchNormWithGlobalNormalization.
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@ -941,6 +941,39 @@ def _AvgPoolGradShape(op):
    return [tensor_shape.unknown_shape(ndims=4)]
@ops.RegisterShape("FractionalMaxPool")
@ops.RegisterShape("FractionalAvgPool")
 def _fractional_pool_shape(op):
  input_dims = op.inputs[0].get_shape().with_rank(4).as_list()
  pooling_ratio = op.get_attr("pooling_ratio")
  output_dims = np.divide(input_dims, pooling_ratio).astype(int)
  return [
      # output.
      tensor_shape.TensorShape(output_dims),
      # row_pooling_sequence.
      tensor_shape.TensorShape([output_dims[1]]),
      # col_pooling_sequence.
      tensor_shape.TensorShape([output_dims[2]])
  ]
@ops.RegisterShape("FractionalMaxPoolGrad")
 def _fractional_max_pool_grad_shape(op):
  """Shape function for the FractionalMaxPoolGrad op."""
  orig_input_shape = op.inputs[0].get_shape().with_rank(4)
  return [orig_input_shape]
@ops.RegisterShape("FractionalAvgPoolGrad")
 def _fractional_avg_pool_grad_shape(op):
  """Shape function for the FractionalAvgPoolGrad op."""
  orig_input_shape = tensor_util.constant_value(op.inputs[0])
  if orig_input_shape is not None:
    return [tensor_shape.TensorShape(orig_input_shape.tolist())]
  else:
    return [tensor_shape.unknown_shape(ndims=4)]
@ops.RegisterShape("Conv2DBackpropFilter")
 def _Conv2DBackpropFilterShape(op):
  """Shape function for the Conv2DBackpropFilter op."""