From 8f7e34982dde766b3fc73c90bcdbfccc001fe8e3 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 3 Mar 2020 14:17:57 -0800
Subject: [PATCH] Splitting out common_shape_fns.cc: * Shape functions under
 shape_inference:: namespace stay in common_shape_fns.cc. * Other functions
 are moved to kernel_shape_util.h. Also, I removed common_shape_fns.h include
 in ops_util.h since it is not used in ops_util and only by files that include
 ops_util.h.

PiperOrigin-RevId: 298692539
Change-Id: I79df418a377d4eaed67aeec6f3736acb9d5b111d
---
 .../mlir/xla/transforms/legalize_tf.cc        |   2 +-
 .../tf2xla/kernels/conv_op_helpers.cc         |   2 +-
 .../kernels/extract_image_patches_op.cc       |   1 +
 tensorflow/core/BUILD                         |   2 +
 tensorflow/core/framework/BUILD               |  17 ++
 tensorflow/core/framework/common_shape_fns.cc | 112 -------------
 tensorflow/core/framework/common_shape_fns.h  | 123 ---------------
 .../core/framework/kernel_shape_util.cc       | 131 +++++++++++++++
 tensorflow/core/framework/kernel_shape_util.h | 149 ++++++++++++++++++
 tensorflow/core/framework/ops_util.h          |   1 -
 .../optimizers/layout_optimizer_test.cc       |   2 +
 tensorflow/core/kernels/avgpooling_op.cc      |   2 +
 .../core/kernels/conv_grad_filter_ops.cc      |   2 +-
 .../core/kernels/conv_grad_input_ops.cc       |   2 +-
 tensorflow/core/kernels/conv_grad_ops_3d.cc   |   2 +-
 .../core/kernels/conv_grad_shape_utils.cc     |   1 +
 tensorflow/core/kernels/conv_ops.cc           |   1 +
 tensorflow/core/kernels/conv_ops_3d.cc        |   1 +
 .../kernels/conv_ops_fused_image_transform.cc |   3 +-
 .../core/kernels/conv_ops_using_gemm.cc       |   4 +-
 .../core/kernels/depthwise_conv_grad_op.cc    |   2 +-
 tensorflow/core/kernels/depthwise_conv_op.cc  |   5 +-
 tensorflow/core/kernels/dilation_ops.cc       |   6 +-
 .../core/kernels/extract_image_patches_op.cc  |   3 +
 .../core/kernels/extract_volume_patches_op.cc |   3 +
 .../core/kernels/mkl_pooling_ops_common.cc    |   2 +-
 .../kernels/neon/neon_depthwise_conv_op.cc    |   2 +-
 tensorflow/core/kernels/nn_ops_test.cc        |   2 +-
 tensorflow/core/kernels/ops_util_test.cc      |   2 +
 tensorflow/core/kernels/pooling_ops_3d.cc     |   1 +
 tensorflow/core/kernels/pooling_ops_common.cc |   1 +
 tensorflow/core/kernels/quantized_conv_ops.cc |   2 +-
 tensorflow/core/ops/array_ops.cc              |   1 +
 tensorflow/core/ops/nn_ops.cc                 |   2 +
 34 files changed, 341 insertions(+), 253 deletions(-)
 create mode 100644 tensorflow/core/framework/kernel_shape_util.cc
 create mode 100644 tensorflow/core/framework/kernel_shape_util.h

diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
index 8f955d6944a..59a4556868b 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
@@ -46,7 +46,7 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
 #include "tensorflow/compiler/xla/client/padding.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/kernels/conv_grad_shape_utils.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
index 9f0ec65bb71..b60a13972a7 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
@@ -29,10 +29,10 @@ limitations under the License.
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/framework/bounds_check.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/ops_util.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_slice.h"
diff --git a/tensorflow/compiler/tf2xla/kernels/extract_image_patches_op.cc b/tensorflow/compiler/tf2xla/kernels/extract_image_patches_op.cc
index ba11b12fa2a..63e3f185421 100644
--- a/tensorflow/compiler/tf2xla/kernels/extract_image_patches_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/extract_image_patches_op.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/util/tensor_format.h"
 
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index b02eb89ebfc..bf8f3e13811 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -469,6 +469,7 @@ tf_cuda_library(
         "//tensorflow/core/framework:graph_to_functiondef.h",
         "//tensorflow/core/framework:kernel_def_builder.h",
         "//tensorflow/core/framework:kernel_def_util.h",
+        "//tensorflow/core/framework:kernel_shape_util.h",
         "//tensorflow/core/framework:log_memory.h",
         "//tensorflow/core/framework:logging.h",
         "//tensorflow/core/framework:lookup_interface.h",
@@ -2351,6 +2352,7 @@ tf_cuda_library(
         "//tensorflow/core/framework:attr_value_util",
         "//tensorflow/core/framework:bfloat16",
         "//tensorflow/core/framework:common_shape_fns",
+        "//tensorflow/core/framework:kernel_shape_util",
         "//tensorflow/core/framework:node_def_util",
         "//tensorflow/core/framework:node_properties",
         "//tensorflow/core/framework:numeric_types",
diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD
index bff95bc60a5..e72f8ef2693 100644
--- a/tensorflow/core/framework/BUILD
+++ b/tensorflow/core/framework/BUILD
@@ -129,6 +129,7 @@ exports_files(
     [
         "attr_value_util.h",
         "common_shape_fns.h",
+        "kernel_shape_util.h",
         "node_def_util.h",
         "node_properties.h",
         "op.h",
@@ -166,6 +167,7 @@ filegroup(
         "graph_to_functiondef.h",
         "kernel_def_builder.h",
         "kernel_def_util.h",
+        "kernel_shape_util.h",
         "local_rendezvous.h",
         "log_memory.h",
         "logging.h",
@@ -269,6 +271,8 @@ filegroup(
         "bfloat16.h",
         "bounds_check.h",
         "cpu_allocator_impl.cc",
+        "kernel_shape_util.cc",
+        "kernel_shape_util.h",
         "log_memory.cc",
         "log_memory.h",
         "numeric_types.h",
@@ -775,6 +779,19 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "kernel_shape_util",
+    srcs = ["kernel_shape_util.cc"],
+    hdrs = ["kernel_shape_util.h"],
+    deps = [
+        ":tensor",
+        ":tensor_shape",
+        "//tensorflow/core/lib/core:errors",
+        "//tensorflow/core/platform:status",
+        "//tensorflow/core/util:padding",
+    ],
+)
+
 cc_library(
     name = "common_shape_fns",
     srcs = ["common_shape_fns.cc"],
diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 2d39be1379e..9da77699e2c 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -27,118 +27,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size,
-                                      int64 dilation_rate, int64 stride,
-                                      Padding padding_type, int64* output_size,
-                                      int64* padding_before,
-                                      int64* padding_after) {
-  if (stride <= 0) {
-    return errors::InvalidArgument("Stride must be > 0, but got ", stride);
-  }
-  if (dilation_rate < 1) {
-    return errors::InvalidArgument("Dilation rate must be >= 1, but got ",
-                                   dilation_rate);
-  }
-
-  // See also the parallel implementation in GetWindowedOutputSizeFromDimsV2.
-  int64 effective_filter_size = (filter_size - 1) * dilation_rate + 1;
-  switch (padding_type) {
-    case Padding::VALID:
-      *output_size = (input_size - effective_filter_size + stride) / stride;
-      *padding_before = *padding_after = 0;
-      break;
-    case Padding::EXPLICIT:
-      *output_size = (input_size + *padding_before + *padding_after -
-                      effective_filter_size + stride) /
-                     stride;
-      break;
-    case Padding::SAME:
-      *output_size = (input_size + stride - 1) / stride;
-      const int64 padding_needed =
-          std::max(int64{0}, (*output_size - 1) * stride +
-                                 effective_filter_size - input_size);
-      // For odd values of total padding, add more padding at the 'right'
-      // side of the given dimension.
-      *padding_before = padding_needed / 2;
-      *padding_after = padding_needed - *padding_before;
-      break;
-  }
-  if (*output_size < 0) {
-    return errors::InvalidArgument(
-        "Computed output size would be negative: ", *output_size,
-        " [input_size: ", input_size,
-        ", effective_filter_size: ", effective_filter_size,
-        ", stride: ", stride, "]");
-  }
-  return Status::OK();
-}
-
-Status GetWindowedOutputSizeVerbose(int64 input_size, int64 filter_size,
-                                    int64 stride, Padding padding_type,
-                                    int64* output_size, int64* padding_before,
-                                    int64* padding_after) {
-  return GetWindowedOutputSizeVerboseV2(input_size, filter_size,
-                                        /*dilation_rate=*/1, stride,
-                                        padding_type, output_size,
-                                        padding_before, padding_after);
-}
-
-Status GetWindowedOutputSize(int64 input_size, int64 filter_size, int64 stride,
-                             Padding padding_type, int64* output_size,
-                             int64* padding_size) {
-  if (padding_type == Padding::EXPLICIT) {
-    return errors::Internal(
-        "GetWindowedOutputSize does not handle EXPLICIT padding; call "
-        "GetWindowedOutputSizeVerbose instead");
-  }
-  int64 padding_after_unused;
-  return GetWindowedOutputSizeVerbose(input_size, filter_size, stride,
-                                      padding_type, output_size, padding_size,
-                                      &padding_after_unused);
-}
-
-Status GetWindowedOutputSizeV2(int64 input_size, int64 filter_size,
-                               int64 dilation_rate, int64 stride,
-                               Padding padding_type, int64* output_size,
-                               int64* padding_size) {
-  if (padding_type == Padding::EXPLICIT) {
-    return errors::Internal(
-        "GetWindowedOutputSizeV2 does not handle EXPLICIT padding; call "
-        "GetWindowedOutputSizeVerboseV2 instead");
-  }
-  int64 padding_after_unused;
-  return GetWindowedOutputSizeVerboseV2(input_size, filter_size, dilation_rate,
-                                        stride, padding_type, output_size,
-                                        padding_size, &padding_after_unused);
-}
-
-Status Get3dOutputSize(const std::array<int64, 3>& input,
-                       const std::array<int64, 3>& window,
-                       const std::array<int64, 3>& strides,
-                       Padding padding_type, std::array<int64, 3>* output_ptr,
-                       std::array<int64, 3>* padding_ptr) {
-  for (size_t i = 0; i < input.size(); ++i) {
-    TF_RETURN_IF_ERROR(GetWindowedOutputSize(input[i], window[i], strides[i],
-                                             padding_type, &(*output_ptr)[i],
-                                             &(*padding_ptr)[i]));
-  }
-  return Status::OK();
-}
-
-Status Get3dOutputSizeV2(const std::array<int64, 3>& input,
-                         const std::array<int64, 3>& window,
-                         const std::array<int64, 3>& dilations,
-                         const std::array<int64, 3>& strides,
-                         Padding padding_type, std::array<int64, 3>* output_ptr,
-                         std::array<int64, 3>* padding_ptr) {
-  for (size_t i = 0; i < input.size(); ++i) {
-    TF_RETURN_IF_ERROR(GetWindowedOutputSizeV2(
-        input[i], window[i], dilations[i], strides[i], padding_type,
-        &(*output_ptr)[i], &(*padding_ptr)[i]));
-  }
-  return Status::OK();
-}
-
 namespace shape_inference {
 
 // The V2 version computes windowed output size with arbitrary dilation_rate,
diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h
index 715eb2ad018..0d869ee7ba6 100644
--- a/tensorflow/core/framework/common_shape_fns.h
+++ b/tensorflow/core/framework/common_shape_fns.h
@@ -23,129 +23,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-// GetWindowedOutputSize(): Given an input tensor, kernel, stride and padding
-// type, the function computes the output and padding dimensions.
-//
-// For example, ignoring batches or multiple features, a 1D convolution
-// takes as input a 1D tensor of shape (H), and convolves it with a filter of
-// shape (K).
-//
-// It also takes in a few additional parameters:
-//
-// Stride (S): the stride with which we apply the filters. This is the offset
-// between locations where we apply the filters. A larger stride
-// means that the output will be spatially smaller.
-//
-// Padding (P): the padding we apply to the input tensor along each
-// dimension. This is usually used to make sure that the spatial dimensions
-// do not shrink when we progress with convolutions. This function supports two
-// types of padding.
-//   SAME: the pad value is computed so that the output will have size H/S.
-//   VALID: no padding is carried out.
-// If you want to use EXPLICIT padding, GetWindowedOutputSizeVerbose must be
-// called instead. Note the padded area is zero-filled.
-//
-// The output dimensions for convolution and many other operations, when given
-// all the parameters above, are as follows:
-// - When Padding = SAME: the output size is (H'), where
-//     H' = ceil(float(H) / float(S))
-//   where ceil is the ceiling function. The number of padded cells
-//   is computed as:
-//     Pc = ((H' - 1) * S + K - H) / 2
-//   When the stride is 1, the expression simplifies to
-//     H' = H, Pc = (K-1)/2.
-//   This is where SAME comes from - the output has the same size as the input
-//   has.
-//
-// - When Padding = VALID: the output size is computed as
-//     H' = ceil(float(H - K + 1) / float(S))
-//   and the number of padded cells is always zero.
-//   When the stride is 1, the expression simplifies to
-//     H' = H-K+1.
-//
-// For convolution, mathematically, the output value at location (r')
-// is the inner product of two vectors: the chunk of input at
-//    ((r'*S-Pr) : (r'*S-Pr+K)),
-// and the filter.
-//
-// For 2D and 3D convolutions, the spatial dimensions are orthogonal, so the
-// size and padding of each spatial dimension can be computed by calling
-// GetWindowedOutputSize separately for each dimension.
-//
-Status GetWindowedOutputSize(int64 input_size, int64 filter_size, int64 stride,
-                             Padding padding_type, int64* output_size,
-                             int64* padding_size);
-
-// The V2 version computes the same outputs with arbitrary dilation_rate.
-// The output dimensions are computed as follows:
-// - When adding dilation_rate (D), we compute an effective filter size (K'):
-//     K' = (K - 1) * D + 1
-// - When Padding = SAME: the output size is (H'), where
-//     H' = ceil(float(H) / float(S))
-//   where ceil is the ceiling function. The number of padded cells
-//   is computed as:
-//     Pc = ((H' - 1) * S + K' - H) / 2
-//   When the stride is 1, the expression simplifies to
-//     H' = H, Pc = (K'-1)/2.
-//   This is where SAME comes from - the output has the same size as the input
-//   has.
-//
-// - When Padding = VALID: the output size is computed as
-//     H' = ceil(float(H - K' + 1) / float(S))
-//   and the number of padded cells is always zero.
-//   When the stride is 1, the expression simplifies to
-//     H' = H-K'+1.
-//
-// If you want to use EXPLICIT padding, GetWindowedOutputSizeVerboseV2 must be
-// called instead
-//
-// TODO(b/67112639): Merge V2 versions and the original versions eventually.
-Status GetWindowedOutputSizeV2(int64 input_size, int64 filter_size,
-                               int64 dilation_rate, int64 stride,
-                               Padding padding_type, int64* output_size,
-                               int64* padding_size);
-
-// Returns the same output dimensions as in GetWindowedOutputSize, but returns
-// verbose padding dimensions (before/after), and EXPLICIT padding is supported.
-// When padding_type is EXPLICIT, *padding_before and *padding_after must
-// already point to initialized integers with the padding amounts. Otherwise,
-// *padding_before and *padding_after are set by this function, and any
-// excess padding (caused by an odd padding size value) is added to the
-// 'padding_after' dimension.
-Status GetWindowedOutputSizeVerbose(int64 input_size, int64 filter_size,
-                                    int64 stride, Padding padding_type,
-                                    int64* output_size, int64* padding_before,
-                                    int64* padding_after);
-
-// The V2 version computes the same outputs with arbitrary dilation_rate. For
-// detailed equations, refer to the comments for GetWindowedOutputSizeV2().
-Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size,
-                                      int64 dilation_rate, int64 stride,
-                                      Padding padding_type, int64* output_size,
-                                      int64* padding_before,
-                                      int64* padding_after);
-
-// Given an input tensor, kernel, stride and padding type, populates the 3D size
-// of the output tensor and padding to be applied to the input tensor at the
-// lower end of every dimension. Use for 3D convolutions, where the input data
-// is padded with zeros, as well as for 3D avg/max pooling, where the input data
-// is padded with invalid values that are not considered for pooling. EXPLICIT
-// padding is not supported.
-Status Get3dOutputSize(const std::array<int64, 3>& input,
-                       const std::array<int64, 3>& window,
-                       const std::array<int64, 3>& strides,
-                       Padding padding_type, std::array<int64, 3>* output_ptr,
-                       std::array<int64, 3>* padding_ptr);
-
-// The V2 version computes the same outputs with arbitrary dilation_rate. For
-// detailed equations, refer to the comments for GetWindowedOutputSizeV2().
-Status Get3dOutputSizeV2(const std::array<int64, 3>& input,
-                         const std::array<int64, 3>& window,
-                         const std::array<int64, 3>& dilations,
-                         const std::array<int64, 3>& strides,
-                         Padding padding_type, std::array<int64, 3>* output_ptr,
-                         std::array<int64, 3>* padding_ptr);
-
 namespace shape_inference {
 
 // Like GetWindowedOutputSize, but deals with DimensionHandles. Does not support
diff --git a/tensorflow/core/framework/kernel_shape_util.cc b/tensorflow/core/framework/kernel_shape_util.cc
new file mode 100644
index 00000000000..c63a477bd0f
--- /dev/null
+++ b/tensorflow/core/framework/kernel_shape_util.cc
@@ -0,0 +1,131 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/kernel_shape_util.h"
+
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size,
+                                      int64 dilation_rate, int64 stride,
+                                      Padding padding_type, int64* output_size,
+                                      int64* padding_before,
+                                      int64* padding_after) {
+  if (stride <= 0) {
+    return errors::InvalidArgument("Stride must be > 0, but got ", stride);
+  }
+  if (dilation_rate < 1) {
+    return errors::InvalidArgument("Dilation rate must be >= 1, but got ",
+                                   dilation_rate);
+  }
+
+  // See also the parallel implementation in GetWindowedOutputSizeFromDimsV2.
+  int64 effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  switch (padding_type) {
+    case Padding::VALID:
+      *output_size = (input_size - effective_filter_size + stride) / stride;
+      *padding_before = *padding_after = 0;
+      break;
+    case Padding::EXPLICIT:
+      *output_size = (input_size + *padding_before + *padding_after -
+                      effective_filter_size + stride) /
+                     stride;
+      break;
+    case Padding::SAME:
+      *output_size = (input_size + stride - 1) / stride;
+      const int64 padding_needed =
+          std::max(int64{0}, (*output_size - 1) * stride +
+                                 effective_filter_size - input_size);
+      // For odd values of total padding, add more padding at the 'right'
+      // side of the given dimension.
+      *padding_before = padding_needed / 2;
+      *padding_after = padding_needed - *padding_before;
+      break;
+  }
+  if (*output_size < 0) {
+    return errors::InvalidArgument(
+        "Computed output size would be negative: ", *output_size,
+        " [input_size: ", input_size,
+        ", effective_filter_size: ", effective_filter_size,
+        ", stride: ", stride, "]");
+  }
+  return Status::OK();
+}
+
+Status GetWindowedOutputSizeVerbose(int64 input_size, int64 filter_size,
+                                    int64 stride, Padding padding_type,
+                                    int64* output_size, int64* padding_before,
+                                    int64* padding_after) {
+  return GetWindowedOutputSizeVerboseV2(input_size, filter_size,
+                                        /*dilation_rate=*/1, stride,
+                                        padding_type, output_size,
+                                        padding_before, padding_after);
+}
+
+Status GetWindowedOutputSize(int64 input_size, int64 filter_size, int64 stride,
+                             Padding padding_type, int64* output_size,
+                             int64* padding_size) {
+  if (padding_type == Padding::EXPLICIT) {
+    return errors::Internal(
+        "GetWindowedOutputSize does not handle EXPLICIT padding; call "
+        "GetWindowedOutputSizeVerbose instead");
+  }
+  int64 padding_after_unused;
+  return GetWindowedOutputSizeVerbose(input_size, filter_size, stride,
+                                      padding_type, output_size, padding_size,
+                                      &padding_after_unused);
+}
+
+Status GetWindowedOutputSizeV2(int64 input_size, int64 filter_size,
+                               int64 dilation_rate, int64 stride,
+                               Padding padding_type, int64* output_size,
+                               int64* padding_size) {
+  if (padding_type == Padding::EXPLICIT) {
+    return errors::Internal(
+        "GetWindowedOutputSizeV2 does not handle EXPLICIT padding; call "
+        "GetWindowedOutputSizeVerboseV2 instead");
+  }
+  int64 padding_after_unused;
+  return GetWindowedOutputSizeVerboseV2(input_size, filter_size, dilation_rate,
+                                        stride, padding_type, output_size,
+                                        padding_size, &padding_after_unused);
+}
+
+Status Get3dOutputSize(const std::array<int64, 3>& input,
+                       const std::array<int64, 3>& window,
+                       const std::array<int64, 3>& strides,
+                       Padding padding_type, std::array<int64, 3>* output_ptr,
+                       std::array<int64, 3>* padding_ptr) {
+  for (size_t i = 0; i < input.size(); ++i) {
+    TF_RETURN_IF_ERROR(GetWindowedOutputSize(input[i], window[i], strides[i],
+                                             padding_type, &(*output_ptr)[i],
+                                             &(*padding_ptr)[i]));
+  }
+  return Status::OK();
+}
+
+Status Get3dOutputSizeV2(const std::array<int64, 3>& input,
+                         const std::array<int64, 3>& window,
+                         const std::array<int64, 3>& dilations,
+                         const std::array<int64, 3>& strides,
+                         Padding padding_type, std::array<int64, 3>* output_ptr,
+                         std::array<int64, 3>* padding_ptr) {
+  for (size_t i = 0; i < input.size(); ++i) {
+    TF_RETURN_IF_ERROR(GetWindowedOutputSizeV2(
+        input[i], window[i], dilations[i], strides[i], padding_type,
+        &(*output_ptr)[i], &(*padding_ptr)[i]));
+  }
+  return Status::OK();
+}
+}  // namespace tensorflow
diff --git a/tensorflow/core/framework/kernel_shape_util.h b/tensorflow/core/framework/kernel_shape_util.h
new file mode 100644
index 00000000000..adb1dca1156
--- /dev/null
+++ b/tensorflow/core/framework/kernel_shape_util.h
@@ -0,0 +1,149 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_
+#define TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_
+
+#include <array>
+
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/util/padding.h"
+
+namespace tensorflow {
+// GetWindowedOutputSize(): Given an input tensor, kernel, stride and padding
+// type, the function computes the output and padding dimensions.
+//
+// For example, ignoring batches or multiple features, a 1D convolution
+// takes as input a 1D tensor of shape (H), and convolves it with a filter of
+// shape (K).
+//
+// It also takes in a few additional parameters:
+//
+// Stride (S): the stride with which we apply the filters. This is the offset
+// between locations where we apply the filters. A larger stride
+// means that the output will be spatially smaller.
+//
+// Padding (P): the padding we apply to the input tensor along each
+// dimension. This is usually used to make sure that the spatial dimensions
+// do not shrink when we progress with convolutions. This function supports two
+// types of padding.
+//   SAME: the pad value is computed so that the output will have size H/S.
+//   VALID: no padding is carried out.
+// If you want to use EXPLICIT padding, GetWindowedOutputSizeVerbose must be
+// called instead. Note the padded area is zero-filled.
+//
+// The output dimensions for convolution and many other operations, when given
+// all the parameters above, are as follows:
+// - When Padding = SAME: the output size is (H'), where
+//     H' = ceil(float(H) / float(S))
+//   where ceil is the ceiling function. The number of padded cells
+//   is computed as:
+//     Pc = ((H' - 1) * S + K - H) / 2
+//   When the stride is 1, the expression simplifies to
+//     H' = H, Pc = (K-1)/2.
+//   This is where SAME comes from - the output has the same size as the input
+//   has.
+//
+// - When Padding = VALID: the output size is computed as
+//     H' = ceil(float(H - K + 1) / float(S))
+//   and the number of padded cells is always zero.
+//   When the stride is 1, the expression simplifies to
+//     H' = H-K+1.
+//
+// For convolution, mathematically, the output value at location (r')
+// is the inner product of two vectors: the chunk of input at
+//    ((r'*S-Pr) : (r'*S-Pr+K)),
+// and the filter.
+//
+// For 2D and 3D convolutions, the spatial dimensions are orthogonal, so the
+// size and padding of each spatial dimension can be computed by calling
+// GetWindowedOutputSize separately for each dimension.
+//
+Status GetWindowedOutputSize(int64 input_size, int64 filter_size, int64 stride,
+                             Padding padding_type, int64* output_size,
+                             int64* padding_size);
+
+// The V2 version computes the same outputs with arbitrary dilation_rate.
+// The output dimensions are computed as follows:
+// - When adding dilation_rate (D), we compute an effective filter size (K'):
+//     K' = (K - 1) * D + 1
+// - When Padding = SAME: the output size is (H'), where
+//     H' = ceil(float(H) / float(S))
+//   where ceil is the ceiling function. The number of padded cells
+//   is computed as:
+//     Pc = ((H' - 1) * S + K' - H) / 2
+//   When the stride is 1, the expression simplifies to
+//     H' = H, Pc = (K'-1)/2.
+//   This is where SAME comes from - the output has the same size as the input
+//   has.
+//
+// - When Padding = VALID: the output size is computed as
+//     H' = ceil(float(H - K' + 1) / float(S))
+//   and the number of padded cells is always zero.
+//   When the stride is 1, the expression simplifies to
+//     H' = H-K'+1.
+//
+// If you want to use EXPLICIT padding, GetWindowedOutputSizeVerboseV2 must be
+// called instead
+//
+// TODO(b/67112639): Merge V2 versions and the original versions eventually.
+Status GetWindowedOutputSizeV2(int64 input_size, int64 filter_size,
+                               int64 dilation_rate, int64 stride,
+                               Padding padding_type, int64* output_size,
+                               int64* padding_size);
+
+// Returns the same output dimensions as in GetWindowedOutputSize, but returns
+// verbose padding dimensions (before/after), and EXPLICIT padding is supported.
+// When padding_type is EXPLICIT, *padding_before and *padding_after must
+// already point to initialized integers with the padding amounts. Otherwise,
+// *padding_before and *padding_after are set by this function, and any
+// excess padding (caused by an odd padding size value) is added to the
+// 'padding_after' dimension.
+Status GetWindowedOutputSizeVerbose(int64 input_size, int64 filter_size,
+                                    int64 stride, Padding padding_type,
+                                    int64* output_size, int64* padding_before,
+                                    int64* padding_after);
+
+// The V2 version computes the same outputs with arbitrary dilation_rate. For
+// detailed equations, refer to the comments for GetWindowedOutputSizeV2().
+Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size,
+                                      int64 dilation_rate, int64 stride,
+                                      Padding padding_type, int64* output_size,
+                                      int64* padding_before,
+                                      int64* padding_after);
+
+// Given an input tensor, kernel, stride and padding type, populates the 3D size
+// of the output tensor and padding to be applied to the input tensor at the
+// lower end of every dimension. Use for 3D convolutions, where the input data
+// is padded with zeros, as well as for 3D avg/max pooling, where the input data
+// is padded with invalid values that are not considered for pooling. EXPLICIT
+// padding is not supported.
+Status Get3dOutputSize(const std::array<int64, 3>& input,
+                       const std::array<int64, 3>& window,
+                       const std::array<int64, 3>& strides,
+                       Padding padding_type, std::array<int64, 3>* output_ptr,
+                       std::array<int64, 3>* padding_ptr);
+
+// The V2 version computes the same outputs with arbitrary dilation_rate. For
+// detailed equations, refer to the comments for GetWindowedOutputSizeV2().
+Status Get3dOutputSizeV2(const std::array<int64, 3>& input,
+                         const std::array<int64, 3>& window,
+                         const std::array<int64, 3>& dilations,
+                         const std::array<int64, 3>& strides,
+                         Padding padding_type, std::array<int64, 3>* output_ptr,
+                         std::array<int64, 3>* padding_ptr);
+
+}  // namespace tensorflow
+#endif  // TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_
diff --git a/tensorflow/core/framework/ops_util.h b/tensorflow/core/framework/ops_util.h
index feaab10b366..b323109abfc 100644
--- a/tensorflow/core/framework/ops_util.h
+++ b/tensorflow/core/framework/ops_util.h
@@ -21,7 +21,6 @@ limitations under the License.
 #include <array>
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/lib/core/status.h"
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index ba3b77a7c36..2d6e201f084 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -14,8 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/layout_optimizer.h"
+
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/grappler/clusters/single_machine.h"
diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc
index 0f22daba43c..20df833a934 100644
--- a/tensorflow/core/kernels/avgpooling_op.cc
+++ b/tensorflow/core/kernels/avgpooling_op.cc
@@ -20,7 +20,9 @@ limitations under the License.
 #include "tensorflow/core/kernels/avgpooling_op.h"
 
 #include <vector>
+
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
index f9bf64f2df3..169fc2e1f63 100644
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -34,7 +35,6 @@ limitations under the License.
 #ifdef TENSORFLOW_USE_LIBXSMM_CONVOLUTIONS
 #include "tensorflow/core/kernels/xsmm_conv2d.h"
 #endif
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index be5d821fc32..d479963556f 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -24,6 +24,7 @@ limitations under the License.
 
 #include "absl/base/dynamic_annotations.h"
 #include "tensorflow/core/framework/bounds_check.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -36,7 +37,6 @@ limitations under the License.
 #ifdef TENSORFLOW_USE_LIBXSMM_CONVOLUTIONS
 #include "tensorflow/core/kernels/xsmm_conv2d.h"
 #endif
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index bc6c64963ad..4fe112f7b5e 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #define USE_EIGEN_TENSOR
 #define EIGEN_USE_THREADS
 
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/conv_grad_ops.h"
 #include "tensorflow/core/kernels/conv_grad_shape_utils.h"
 #include "tensorflow/core/kernels/conv_ops_gpu.h"
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/util/padding.h"
diff --git a/tensorflow/core/kernels/conv_grad_shape_utils.cc b/tensorflow/core/kernels/conv_grad_shape_utils.cc
index 7857257658f..81c20ab0c7f 100644
--- a/tensorflow/core/kernels/conv_grad_shape_utils.cc
+++ b/tensorflow/core/kernels/conv_grad_shape_utils.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index d265e9d8f8b..55bfa35e8a5 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -32,6 +32,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/bounds_check.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index e9e11aebf61..71d22bc91c5 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #define USE_EIGEN_TENSOR
 #define EIGEN_USE_THREADS
 
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
diff --git a/tensorflow/core/kernels/conv_ops_fused_image_transform.cc b/tensorflow/core/kernels/conv_ops_fused_image_transform.cc
index 21c151d3b67..9055639aaaf 100644
--- a/tensorflow/core/kernels/conv_ops_fused_image_transform.cc
+++ b/tensorflow/core/kernels/conv_ops_fused_image_transform.cc
@@ -21,8 +21,9 @@ limitations under the License.
 
 #include <string>
 #include <vector>
+
 #include "tensorflow/core/framework/bounds_check.h"
-#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
diff --git a/tensorflow/core/kernels/conv_ops_using_gemm.cc b/tensorflow/core/kernels/conv_ops_using_gemm.cc
index 05df9e0207e..dff1a533ee0 100644
--- a/tensorflow/core/kernels/conv_ops_using_gemm.cc
+++ b/tensorflow/core/kernels/conv_ops_using_gemm.cc
@@ -47,10 +47,12 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include <string.h>
+
 #include <map>
 #include <vector>
+
 #include "tensorflow/core/framework/bounds_check.h"
-#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
index de472d5d4fe..f81065803d3 100644
--- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <cmath>
 
 #include "tensorflow/core/framework/bounds_check.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/conv_grad_ops.h"
 #include "tensorflow/core/kernels/depthwise_conv_op.h"
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index e85b6df721a..4c1811791a5 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -15,11 +15,14 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
+#include "tensorflow/core/kernels/depthwise_conv_op.h"
+
 #include <algorithm>
 #include <cmath>
 #include <type_traits>
 
 #include "tensorflow/core/framework/bounds_check.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -28,8 +31,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/conv_ops.h"
-#include "tensorflow/core/kernels/depthwise_conv_op.h"
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
diff --git a/tensorflow/core/kernels/dilation_ops.cc b/tensorflow/core/kernels/dilation_ops.cc
index f2e7b8a857a..738ea31d555 100644
--- a/tensorflow/core/kernels/dilation_ops.cc
+++ b/tensorflow/core/kernels/dilation_ops.cc
@@ -17,20 +17,20 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
+#include "tensorflow/core/kernels/dilation_ops.h"
+
 #include <cfloat>
 #include <vector>
 
-#include "tensorflow/core/kernels/dilation_ops.h"
-
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_slice.h"
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/util/padding.h"
diff --git a/tensorflow/core/kernels/extract_image_patches_op.cc b/tensorflow/core/kernels/extract_image_patches_op.cc
index 2cc9933965e..4e87dfc93a4 100644
--- a/tensorflow/core/kernels/extract_image_patches_op.cc
+++ b/tensorflow/core/kernels/extract_image_patches_op.cc
@@ -19,8 +19,11 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include "tensorflow/core/kernels/extract_image_patches_op.h"
+
 #include <vector>
+
 #include "tensorflow/core/framework/bounds_check.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
diff --git a/tensorflow/core/kernels/extract_volume_patches_op.cc b/tensorflow/core/kernels/extract_volume_patches_op.cc
index 904d2a8ac26..3f003b6f7f6 100644
--- a/tensorflow/core/kernels/extract_volume_patches_op.cc
+++ b/tensorflow/core/kernels/extract_volume_patches_op.cc
@@ -25,8 +25,11 @@ when rates are to be added.
 #define EIGEN_USE_THREADS
 
 #include "tensorflow/core/kernels/extract_volume_patches_op.h"
+
 #include <vector>
+
 #include "tensorflow/core/framework/bounds_check.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
index 904866f8223..438721f85fd 100644
--- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc
+++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc
@@ -22,7 +22,7 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/bounds_check.h"
-#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 
 namespace tensorflow {
 using mkldnn::prop_kind;
diff --git a/tensorflow/core/kernels/neon/neon_depthwise_conv_op.cc b/tensorflow/core/kernels/neon/neon_depthwise_conv_op.cc
index b218f62ddd9..8e853f2338b 100644
--- a/tensorflow/core/kernels/neon/neon_depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/neon/neon_depthwise_conv_op.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
 #include "public/gemmlowp.h"
 #include "tensorflow/core/framework/bounds_check.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/neon/depthwiseconv_float.h"
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mem.h"
diff --git a/tensorflow/core/kernels/nn_ops_test.cc b/tensorflow/core/kernels/nn_ops_test.cc
index 292be65bc15..331bbe25b17 100644
--- a/tensorflow/core/kernels/nn_ops_test.cc
+++ b/tensorflow/core/kernels/nn_ops_test.cc
@@ -35,6 +35,7 @@ limitations under the License.
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -44,7 +45,6 @@ limitations under the License.
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/platform/logging.h"
diff --git a/tensorflow/core/kernels/ops_util_test.cc b/tensorflow/core/kernels/ops_util_test.cc
index 13427d71ff6..02f460d50e4 100644
--- a/tensorflow/core/kernels/ops_util_test.cc
+++ b/tensorflow/core/kernels/ops_util_test.cc
@@ -14,7 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/kernels/ops_util.h"
+
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/platform/test.h"
 
diff --git a/tensorflow/core/kernels/pooling_ops_3d.cc b/tensorflow/core/kernels/pooling_ops_3d.cc
index 7345ccf69ee..31ead11dd34 100644
--- a/tensorflow/core/kernels/pooling_ops_3d.cc
+++ b/tensorflow/core/kernels/pooling_ops_3d.cc
@@ -20,6 +20,7 @@ limitations under the License.
 
 #include "third_party/eigen3/Eigen/Core"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc
index 2c56d8d8bfb..4bd710546fe 100644
--- a/tensorflow/core/kernels/pooling_ops_common.cc
+++ b/tensorflow/core/kernels/pooling_ops_common.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index 5b3570edff5..a4d36cca3e4 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -22,11 +22,11 @@ limitations under the License.
 
 #define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
 #include "public/gemmlowp.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/conv_ops.h"
 #include "tensorflow/core/kernels/meta_support.h"
-#include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/lib/core/errors.h"
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 73718061488..9c4c59872f9 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <ostream>
 
 #include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/framework/tensor.pb.h"
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 90c3f246593..f69782c00c1 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -14,7 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include <cmath>
+
 #include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"