Remove changes made to support TFRT-based OpKernel classes in Conv3d kernel.

This is essentially a rollback of 2f10fa7810 since we decided not to follow this approach. PiperOrigin-RevId: 346660110 Change-Id: I1c03d47d4f4b42f74ef23f4c8e1d6d3f8207b240
2020-12-09 16:13:53 -08:00 · 2020-12-09 16:13:53 -08:00 · cc5ea84696
commit cc5ea84696
parent b43da1eb10
6 changed files with 163 additions and 298 deletions
--- a/tensorflow/core/framework/BUILD
+++ b/tensorflow/core/framework/BUILD
@ -68,7 +68,6 @@ exports_files(
        "model.h",
        "node_def_builder.h",
        "numeric_op.h",
-        "numeric_op_base.h",
        "op_kernel.h",
        "op_requires.h",
        "op_segment.h",
@ -204,7 +203,6 @@ filegroup(
        "node_def_util.h",
        "node_properties.h",
        "numeric_op.h",
-        "numeric_op_base.h",
        "numeric_types.h",
        "op.h",
        "op_def_builder.h",
@ -305,7 +303,6 @@ filegroup(
        "kernel_shape_util.h",
        "log_memory.cc",
        "log_memory.h",
-        "numeric_op_base.h",
        "numeric_types.h",
        "op_requires.h",
        "ops_util.cc",
--- a/tensorflow/core/framework/numeric_op.h
+++ b/tensorflow/core/framework/numeric_op.h
@ -12,22 +12,38 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+
 #ifndef TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_H_
 #define TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_H_

-#include "tensorflow/core/framework/numeric_op_base.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"

 namespace tensorflow {

+// One input and one output, both the same type.
 template <class T>
-using UnaryOp = UnaryOpBase<T, OpKernel, OpKernelConstruction>;
+class UnaryOp : public OpKernel {
+ public:
+  explicit UnaryOp(OpKernelConstruction* context) : OpKernel(context) {
+    const DataType dt = DataTypeToEnum<T>::v();
+    OP_REQUIRES_OK(context, context->MatchSignature({dt}, {dt}));
+  }
+};

+// Two inputs and one output, all the same type.
 template <class T>
-using BinaryOp = BinaryOpBase<T, OpKernel, OpKernelConstruction>;
+class BinaryOp : public OpKernel {
+ public:
+  explicit BinaryOp(OpKernelConstruction* context) : OpKernel(context) {
+    const DataType dt = DataTypeToEnum<T>::v();
+    OP_REQUIRES_OK(context, context->MatchSignature({dt, dt}, {dt}));
+  }
+};

 // For operations where the input and output are the same shape.
 //
--- a/tensorflow/core/framework/numeric_op_base.h
+++ b/tensorflow/core/framework/numeric_op_base.h
@ -1,49 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_
-#define TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_
-
-#include "tensorflow/core/framework/op_requires.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-
-namespace tensorflow {
-
-// One input and one output, both the same type.
-template <class T, class OpKernelT, class OpKernelConstructionT>
-class UnaryOpBase : public OpKernelT {
- public:
-  explicit UnaryOpBase(OpKernelConstructionT* construction) :
-      OpKernelT(construction) {
-    const DataType dt = DataTypeToEnum<T>::v();
-    OP_REQUIRES_OK(construction, construction->MatchSignature({dt}, {dt}));
-  }
-};
-
-// Two inputs and one output, all the same type.
-template <class T, class OpKernelT, class OpKernelConstructionT>
-class BinaryOpBase : public OpKernelT {
- public:
-  explicit BinaryOpBase(OpKernelConstructionT* construction) :
-      OpKernelT(construction) {
-    const DataType dt = DataTypeToEnum<T>::v();
-    OP_REQUIRES_OK(construction, construction->MatchSignature({dt, dt}, {dt}));
-  }
-};
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@ -3297,48 +3297,6 @@ cc_library(
    }),
 )

-# TODO(annarev): conv_ops_3d_headers currently depends on android target build
-# from selected sources. We should switch to use granular dependencies instead.
-# Then, we can just depend on "conv3d".
-cc_library(
-    name = "conv_3d_mobile",
-    hdrs = [
-        "conv_3d.h",
-        "eigen_backward_cuboid_convolutions.h",
-        "eigen_convolution_helpers.h",
-        "eigen_cuboid_convolution.h",
-        "eigen_volume_patch.h",
-    ],
-    deps = [
-        ":eigen_spatial_convolutions-inl",
-    ] + select({
-        "//tensorflow:android": [
-            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
-        ],
-        "//conditions:default": [
-            "//tensorflow/core:framework",
-        ],
-    }),
-)
-
-cc_library(
-    name = "conv_ops_3d_headers",
-    hdrs = [
-        "conv_ops_3d.h",
-    ],
-    deps = select({
-        "//tensorflow:android": [
-            ":conv_3d_mobile",
-            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
-        ],
-        "//conditions:default": [
-            ":conv_3d",
-            "//third_party/eigen3",
-            "//tensorflow/core:framework",
-        ],
-    }),
-)
-
 tf_kernel_library(
    name = "argmax_op",
    prefix = "argmax_op",
@ -3810,6 +3768,7 @@ tf_kernel_library(
        "deep_conv2d.h",
        "gemm_functors.h",
        "winograd_transform.h",
+        "conv_ops_fused_impl.h",
    ] + select({
        ":xsmm_convolutions": ["xsmm_conv2d.h"],
        "//conditions:default": [],
@ -3824,7 +3783,6 @@ tf_kernel_library(
    prefix = "conv_ops",
    deps = [
        ":conv_grad_shape_utils",
-        ":conv_ops_3d_headers",
        ":conv_2d",
        ":conv_3d",
        ":eigen_contraction_kernel",
@ -5948,7 +5906,6 @@ filegroup(
        "conv_2d.h",
        "conv_3d.h",
        "conv_ops.h",
-        "conv_ops_3d.h",
        "conv_ops_gpu.h",
        "data_format_ops.h",
        "depthtospace_op.h",
@ -6445,7 +6402,6 @@ filegroup(
        "stateful_random_ops_cpu_gpu.h",
        # Allows conv_3d ops for android but excluded from *_3d* rule above.
        "conv_3d.h",
-        "conv_ops_3d.h",
        "conv_ops_3d.cc",
        "conv_ops_gpu.h",
    ],
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@ -16,8 +16,7 @@ limitations under the License.
 #define USE_EIGEN_TENSOR
 #define EIGEN_USE_THREADS

-#include "tensorflow/core/kernels/conv_ops_3d.h"
-
+#include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@ -51,11 +50,147 @@ namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;

+template <typename Device, typename T>
+struct LaunchConvOp;
+
+template <typename T>
+struct LaunchConvOp<CPUDevice, T> {
+  static void launch(OpKernelContext* context, bool cudnn_use_autotune,
+                     const Tensor& input, const Tensor& filter,
+                     const std::array<int64, 3>& dilations,
+                     const std::array<int64, 3>& strides, const Padding padding,
+                     TensorFormat data_format, Tensor* output) {
+    OP_REQUIRES(context, data_format == FORMAT_NHWC,
+                errors::InvalidArgument("CPU implementation of Conv3D "
+                                        "currently only supports the NHWC "
+                                        "tensor format."));
+    OP_REQUIRES(context,
+                dilations[0] == 1 && dilations[1] == 1 && dilations[2] == 1,
+                errors::InvalidArgument("CPU implementation of Conv3D "
+                                        "currently only supports dilated rates "
+                                        "of 1."));
+    functor::CuboidConvolution<CPUDevice, T>()(
+        context->eigen_device<CPUDevice>(), output->tensor<T, 5>(),
+        input.tensor<T, 5>(), filter.tensor<T, 5>(), strides[2], strides[1],
+        strides[0], BrainPadding2EigenPadding(padding));
+  }
+};
+
+template <typename Device, typename T>
+class Conv3DOp : public BinaryOp<T> {
+ public:
+  explicit Conv3DOp(OpKernelConstruction* context) : BinaryOp<T>(context) {
+    string data_format;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
+    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
+    OP_REQUIRES(context, stride_.size() == 5,
+                errors::InvalidArgument("Sliding window strides field must "
+                                        "specify 5 dimensions"));
+    OP_REQUIRES(
+        context,
+        (GetTensorDim(stride_, data_format_, 'N') == 1 &&
+         GetTensorDim(stride_, data_format_, 'C') == 1),
+        errors::InvalidArgument("Current implementation does not yet support "
+                                "strides in the batch and depth dimensions."));
+    OP_REQUIRES(
+        context,
+        (GetTensorDim(stride_, data_format_, '0') > 0 &&
+         GetTensorDim(stride_, data_format_, '1') > 0 &&
+         GetTensorDim(stride_, data_format_, '2') > 0),
+        errors::InvalidArgument("Spatial strides should be larger than 0."));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_));
+    OP_REQUIRES(context, dilation_.size() == 5,
+                errors::InvalidArgument("Dilation rates field must "
+                                        "specify 5 dimensions"));
+    OP_REQUIRES(context,
+                (GetTensorDim(dilation_, data_format_, 'N') == 1 &&
+                 GetTensorDim(dilation_, data_format_, 'C') == 1),
+                errors::InvalidArgument(
+                    "Current implementation does not yet support "
+                    "dilation rates in the batch and depth dimensions."));
+    OP_REQUIRES(
+        context,
+        (GetTensorDim(dilation_, data_format_, '0') > 0 &&
+         GetTensorDim(dilation_, data_format_, '1') > 0 &&
+         GetTensorDim(dilation_, data_format_, '2') > 0),
+        errors::InvalidArgument("Dilated rates should be larger than 0."));
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    cudnn_use_autotune_ = CudnnUseAutotune();
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Input tensor is of the following dimensions:
+    // [ batch, in_z, in_y, in_x, in_channels ]
+    const Tensor& input = context->input(0);
+
+    // Input filter is of the following dimensions:
+    // [ filter_z, filter_y, filter_x, in_channels, out_channels]
+    const Tensor& filter = context->input(1);
+
+    // NOTE: The ordering of the spatial dimensions is arbitrary, but has to be
+    // kept consistent between input/filter/output.
+    OP_REQUIRES(context, input.dims() == 5,
+                errors::InvalidArgument("input must be 5-dimensional"));
+    OP_REQUIRES(context, filter.dims() == 5,
+                errors::InvalidArgument("filter must be 5-dimensional"));
+
+    const int64 in_depth = GetTensorDim(input, data_format_, 'C');
+    const int64 in_batch = GetTensorDim(input, data_format_, 'N');
+
+    const int64 filter_depth = filter.dim_size(3);
+    const int64 out_depth = filter.dim_size(4);
+
+    OP_REQUIRES(context, in_depth % filter_depth == 0,
+                errors::InvalidArgument(
+                    "Input depth must be evenly divisible by filter depth: ",
+                    in_depth, " vs ", filter_depth));
+
+    // Dimension order for these arrays is: z, y, x.
+    std::array<int64, 3> input_size = {
+        {GetTensorDim(input, data_format_, '0'),
+         GetTensorDim(input, data_format_, '1'),
+         GetTensorDim(input, data_format_, '2')}};
+    std::array<int64, 3> filter_size = {
+        {filter.dim_size(0), filter.dim_size(1), filter.dim_size(2)}};
+    std::array<int64, 3> dilations = {
+        {GetTensorDim(dilation_, data_format_, '0'),
+         GetTensorDim(dilation_, data_format_, '1'),
+         GetTensorDim(dilation_, data_format_, '2')}};
+    std::array<int64, 3> strides = {{GetTensorDim(stride_, data_format_, '0'),
+                                     GetTensorDim(stride_, data_format_, '1'),
+                                     GetTensorDim(stride_, data_format_, '2')}};
+    std::array<int64, 3> out, padding;
+
+    OP_REQUIRES_OK(
+        context, Get3dOutputSizeV2(input_size, filter_size, dilations, strides,
+                                   padding_, &out, &padding));
+    TensorShape out_shape = ShapeFromFormat(
+        data_format_, in_batch, {{out[0], out[1], out[2]}}, out_depth);
+    Tensor* output;
+    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
+
+    // Return early if nothing to do.
+    if (out_shape.num_elements() == 0) return;
+
+    LaunchConvOp<Device, T>::launch(context, cudnn_use_autotune_, input, filter,
+                                    dilations, strides, padding_, data_format_,
+                                    output);
+  }
+
+ private:
+  std::vector<int32> dilation_;
+  std::vector<int32> stride_;
+  Padding padding_;
+  TensorFormat data_format_;
+  bool cudnn_use_autotune_;
+};
+
 #define REGISTER_CPU_KERNEL(T)                                  \
  REGISTER_KERNEL_BUILDER(                                      \
      Name("Conv3D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-      Conv3DOp<CPUDevice, T, OpKernel, OpKernelConstruction,    \
-               OpKernelContext>);
+      Conv3DOp<CPUDevice, T>);
 TF_CALL_half(REGISTER_CPU_KERNEL);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
@ -73,7 +208,7 @@ typedef AutoTuneSingleton<Conv3dAutoTuneGroup, ConvParameters,

 // TODO(mjanusz): Share logic with 2d implementation as much as possible.
 template <typename T>
-struct LaunchConvOp<GPUDevice, T, OpKernelContext> {
+struct LaunchConvOp<GPUDevice, T> {
  static void launch(OpKernelContext* ctx, bool cudnn_use_autotune,
                     const Tensor& input_param, const Tensor& filter,
                     const std::array<int64, 3>& dilations,
@ -548,16 +683,13 @@ DECLARE_GPU_SPEC(double);
 // Registration of the GPU implementations.
 REGISTER_KERNEL_BUILDER(
    Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
-    Conv3DOp<GPUDevice, Eigen::half, OpKernel, OpKernelConstruction,
-             OpKernelContext>);
+    Conv3DOp<GPUDevice, Eigen::half>);
 REGISTER_KERNEL_BUILDER(
    Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<float>("T"),
-    Conv3DOp<GPUDevice, float, OpKernel, OpKernelConstruction,
-             OpKernelContext>);
+    Conv3DOp<GPUDevice, float>);
 REGISTER_KERNEL_BUILDER(
    Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<double>("T"),
-    Conv3DOp<GPUDevice, double, OpKernel, OpKernelConstruction,
-             OpKernelContext>);
+    Conv3DOp<GPUDevice, double>);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 }  // namespace tensorflow
--- a/tensorflow/core/kernels/conv_ops_3d.h
+++ b/tensorflow/core/kernels/conv_ops_3d.h
@ -1,187 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_
-#define TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_
-
-#include <vector>
-
-#define USE_EIGEN_TENSOR
-#define EIGEN_USE_THREADS
-
-#include "tensorflow/core/framework/numeric_op_base.h"
-#include "tensorflow/core/framework/kernel_shape_util.h"
-#include "tensorflow/core/framework/op_requires.h"
-#include "tensorflow/core/framework/ops_util.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/conv_3d.h"
-#include "tensorflow/core/platform/errors.h"
-#include "tensorflow/core/util/padding.h"
-#include "tensorflow/core/util/tensor_format.h"
-#if GOOGLE_CUDA
-#include "tensorflow/core/util/use_cudnn.h"
-#endif
-
-namespace tensorflow {
-typedef Eigen::ThreadPoolDevice CPUDevice;
-
-template <typename Device, typename T, class OpKernelContextT>
-struct LaunchConvOp;
-
-template <typename T, class OpKernelContextT>
-struct LaunchConvOp<CPUDevice, T, OpKernelContextT> {
-  static void launch(OpKernelContextT* context, bool cudnn_use_autotune,
-                     const Tensor& input, const Tensor& filter,
-                     const std::array<int64, 3>& dilations,
-                     const std::array<int64, 3>& strides, const Padding padding,
-                     TensorFormat data_format, Tensor* output) {
-    OP_REQUIRES(context, data_format == FORMAT_NHWC,
-                errors::InvalidArgument("CPU implementation of Conv3D "
-                                        "currently only supports the NHWC "
-                                        "tensor format."));
-    OP_REQUIRES(context,
-                dilations[0] == 1 && dilations[1] == 1 && dilations[2] == 1,
-                errors::InvalidArgument("CPU implementation of Conv3D "
-                                        "currently only supports dilated rates "
-                                        "of 1."));
-    functor::CuboidConvolution<CPUDevice, T>()(
-        context->template eigen_device<CPUDevice>(), output->tensor<T, 5>(),
-        input.tensor<T, 5>(), filter.tensor<T, 5>(), strides[2], strides[1],
-        strides[0], BrainPadding2EigenPadding(padding));
-  }
-};
-
-template <typename Device, typename T, class OpKernelT,
-          class OpKernelConstructionT, class OpKernelContextT>
-class Conv3DOp : public BinaryOpBase<T, OpKernelT, OpKernelConstructionT> {
- public:
-  explicit Conv3DOp(OpKernelConstructionT* context) :
-      BinaryOpBase<T, OpKernelT, OpKernelConstructionT>(context) {
-    string data_format;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
-    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
-    OP_REQUIRES(context, stride_.size() == 5,
-                errors::InvalidArgument("Sliding window strides field must "
-                                        "specify 5 dimensions"));
-    OP_REQUIRES(
-        context,
-        (GetTensorDim(stride_, data_format_, 'N') == 1 &&
-         GetTensorDim(stride_, data_format_, 'C') == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-    OP_REQUIRES(
-        context,
-        (GetTensorDim(stride_, data_format_, '0') > 0 &&
-         GetTensorDim(stride_, data_format_, '1') > 0 &&
-         GetTensorDim(stride_, data_format_, '2') > 0),
-        errors::InvalidArgument("Spatial strides should be larger than 0."));
-    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_));
-    OP_REQUIRES(context, dilation_.size() == 5,
-                errors::InvalidArgument("Dilation rates field must "
-                                        "specify 5 dimensions"));
-    OP_REQUIRES(context,
-                (GetTensorDim(dilation_, data_format_, 'N') == 1 &&
-                 GetTensorDim(dilation_, data_format_, 'C') == 1),
-                errors::InvalidArgument(
-                    "Current implementation does not yet support "
-                    "dilation rates in the batch and depth dimensions."));
-    OP_REQUIRES(
-        context,
-        (GetTensorDim(dilation_, data_format_, '0') > 0 &&
-         GetTensorDim(dilation_, data_format_, '1') > 0 &&
-         GetTensorDim(dilation_, data_format_, '2') > 0),
-        errors::InvalidArgument("Dilated rates should be larger than 0."));
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
-#if GOOGLE_CUDA
-    cudnn_use_autotune_ = CudnnUseAutotune();
-#else
-    cudnn_use_autotune_ = false;
-#endif
-  }
-
-  void Compute(OpKernelContextT* context) override {
-    // Input tensor is of the following dimensions:
-    // [ batch, in_z, in_y, in_x, in_channels ]
-    const Tensor& input = context->input(0);
-
-    // Input filter is of the following dimensions:
-    // [ filter_z, filter_y, filter_x, in_channels, out_channels]
-    const Tensor& filter = context->input(1);
-
-    // NOTE: The ordering of the spatial dimensions is arbitrary, but has to be
-    // kept consistent between input/filter/output.
-    OP_REQUIRES(context, input.dims() == 5,
-                errors::InvalidArgument("input must be 5-dimensional"));
-    OP_REQUIRES(context, filter.dims() == 5,
-                errors::InvalidArgument("filter must be 5-dimensional"));
-
-    const int64 in_depth = GetTensorDim(input, data_format_, 'C');
-    const int64 in_batch = GetTensorDim(input, data_format_, 'N');
-
-    const int64 filter_depth = filter.dim_size(3);
-    const int64 out_depth = filter.dim_size(4);
-
-    OP_REQUIRES(context, in_depth % filter_depth == 0,
-                errors::InvalidArgument(
-                    "Input depth must be evenly divisible by filter depth: ",
-                    in_depth, " vs ", filter_depth));
-
-    // Dimension order for these arrays is: z, y, x.
-    std::array<int64, 3> input_size = {
-        {GetTensorDim(input, data_format_, '0'),
-         GetTensorDim(input, data_format_, '1'),
-         GetTensorDim(input, data_format_, '2')}};
-    std::array<int64, 3> filter_size = {
-        {filter.dim_size(0), filter.dim_size(1), filter.dim_size(2)}};
-    std::array<int64, 3> dilations = {
-        {GetTensorDim(dilation_, data_format_, '0'),
-         GetTensorDim(dilation_, data_format_, '1'),
-         GetTensorDim(dilation_, data_format_, '2')}};
-    std::array<int64, 3> strides = {{GetTensorDim(stride_, data_format_, '0'),
-                                     GetTensorDim(stride_, data_format_, '1'),
-                                     GetTensorDim(stride_, data_format_, '2')}};
-    std::array<int64, 3> out, padding;
-
-    OP_REQUIRES_OK(
-        context, Get3dOutputSizeV2(input_size, filter_size, dilations, strides,
-                                   padding_, &out, &padding));
-    TensorShape out_shape = ShapeFromFormat(
-        data_format_, in_batch, {{out[0], out[1], out[2]}}, out_depth);
-    Tensor* output;
-    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
-
-    // Return early if nothing to do.
-    if (out_shape.num_elements() == 0) return;
-
-    LaunchConvOp<Device, T, OpKernelContextT>::launch(
-        context, cudnn_use_autotune_, input, filter,
-        dilations, strides, padding_, data_format_,
-        output);
-  }
-
- private:
-  std::vector<int32> dilation_;
-  std::vector<int32> stride_;
-  Padding padding_;
-  TensorFormat data_format_;
-  bool cudnn_use_autotune_;
-};
-
-}  // namespace tensorflow
-
-
-#endif  // TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_