Remove changes made to support TFRT-based OpKernel classes in Conv3d kernel.

This is essentially a rollback of 2f10fa7810 since we decided not to follow this approach.

PiperOrigin-RevId: 346660110
Change-Id: I1c03d47d4f4b42f74ef23f4c8e1d6d3f8207b240
This commit is contained in:
Anna R 2020-12-09 16:13:53 -08:00 committed by TensorFlower Gardener
parent b43da1eb10
commit cc5ea84696
6 changed files with 163 additions and 298 deletions

View File

@ -68,7 +68,6 @@ exports_files(
"model.h",
"node_def_builder.h",
"numeric_op.h",
"numeric_op_base.h",
"op_kernel.h",
"op_requires.h",
"op_segment.h",
@ -204,7 +203,6 @@ filegroup(
"node_def_util.h",
"node_properties.h",
"numeric_op.h",
"numeric_op_base.h",
"numeric_types.h",
"op.h",
"op_def_builder.h",
@ -305,7 +303,6 @@ filegroup(
"kernel_shape_util.h",
"log_memory.cc",
"log_memory.h",
"numeric_op_base.h",
"numeric_types.h",
"op_requires.h",
"ops_util.cc",

View File

@ -12,22 +12,38 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_H_
#define TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_H_
#include "tensorflow/core/framework/numeric_op_base.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h"
namespace tensorflow {
// One input and one output, both the same type.
template <class T>
using UnaryOp = UnaryOpBase<T, OpKernel, OpKernelConstruction>;
class UnaryOp : public OpKernel {
public:
explicit UnaryOp(OpKernelConstruction* context) : OpKernel(context) {
const DataType dt = DataTypeToEnum<T>::v();
OP_REQUIRES_OK(context, context->MatchSignature({dt}, {dt}));
}
};
// Two inputs and one output, all the same type.
template <class T>
using BinaryOp = BinaryOpBase<T, OpKernel, OpKernelConstruction>;
class BinaryOp : public OpKernel {
public:
explicit BinaryOp(OpKernelConstruction* context) : OpKernel(context) {
const DataType dt = DataTypeToEnum<T>::v();
OP_REQUIRES_OK(context, context->MatchSignature({dt, dt}, {dt}));
}
};
// For operations where the input and output are the same shape.
//

View File

@ -1,49 +0,0 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_
#define TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_
#include "tensorflow/core/framework/op_requires.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/lib/core/status.h"
namespace tensorflow {
// One input and one output, both the same type.
template <class T, class OpKernelT, class OpKernelConstructionT>
class UnaryOpBase : public OpKernelT {
public:
explicit UnaryOpBase(OpKernelConstructionT* construction) :
OpKernelT(construction) {
const DataType dt = DataTypeToEnum<T>::v();
OP_REQUIRES_OK(construction, construction->MatchSignature({dt}, {dt}));
}
};
// Two inputs and one output, all the same type.
template <class T, class OpKernelT, class OpKernelConstructionT>
class BinaryOpBase : public OpKernelT {
public:
explicit BinaryOpBase(OpKernelConstructionT* construction) :
OpKernelT(construction) {
const DataType dt = DataTypeToEnum<T>::v();
OP_REQUIRES_OK(construction, construction->MatchSignature({dt, dt}, {dt}));
}
};
} // namespace tensorflow
#endif // TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_

View File

@ -3297,48 +3297,6 @@ cc_library(
}),
)
# TODO(annarev): conv_ops_3d_headers currently depends on android target build
# from selected sources. We should switch to use granular dependencies instead.
# Then, we can just depend on "conv3d".
cc_library(
name = "conv_3d_mobile",
hdrs = [
"conv_3d.h",
"eigen_backward_cuboid_convolutions.h",
"eigen_convolution_helpers.h",
"eigen_cuboid_convolution.h",
"eigen_volume_patch.h",
],
deps = [
":eigen_spatial_convolutions-inl",
] + select({
"//tensorflow:android": [
"//tensorflow/core:portable_tensorflow_lib_lite", # TODO(annarev): exclude runtime srcs
],
"//conditions:default": [
"//tensorflow/core:framework",
],
}),
)
cc_library(
name = "conv_ops_3d_headers",
hdrs = [
"conv_ops_3d.h",
],
deps = select({
"//tensorflow:android": [
":conv_3d_mobile",
"//tensorflow/core:portable_tensorflow_lib_lite", # TODO(annarev): exclude runtime srcs
],
"//conditions:default": [
":conv_3d",
"//third_party/eigen3",
"//tensorflow/core:framework",
],
}),
)
tf_kernel_library(
name = "argmax_op",
prefix = "argmax_op",
@ -3810,6 +3768,7 @@ tf_kernel_library(
"deep_conv2d.h",
"gemm_functors.h",
"winograd_transform.h",
"conv_ops_fused_impl.h",
] + select({
":xsmm_convolutions": ["xsmm_conv2d.h"],
"//conditions:default": [],
@ -3824,7 +3783,6 @@ tf_kernel_library(
prefix = "conv_ops",
deps = [
":conv_grad_shape_utils",
":conv_ops_3d_headers",
":conv_2d",
":conv_3d",
":eigen_contraction_kernel",
@ -5948,7 +5906,6 @@ filegroup(
"conv_2d.h",
"conv_3d.h",
"conv_ops.h",
"conv_ops_3d.h",
"conv_ops_gpu.h",
"data_format_ops.h",
"depthtospace_op.h",
@ -6445,7 +6402,6 @@ filegroup(
"stateful_random_ops_cpu_gpu.h",
# Allows conv_3d ops for android but excluded from *_3d* rule above.
"conv_3d.h",
"conv_ops_3d.h",
"conv_ops_3d.cc",
"conv_ops_gpu.h",
],

View File

@ -16,8 +16,7 @@ limitations under the License.
#define USE_EIGEN_TENSOR
#define EIGEN_USE_THREADS
#include "tensorflow/core/kernels/conv_ops_3d.h"
#include "tensorflow/core/framework/kernel_shape_util.h"
#include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
@ -51,11 +50,147 @@ namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
typedef Eigen::GpuDevice GPUDevice;
template <typename Device, typename T>
struct LaunchConvOp;
template <typename T>
struct LaunchConvOp<CPUDevice, T> {
static void launch(OpKernelContext* context, bool cudnn_use_autotune,
const Tensor& input, const Tensor& filter,
const std::array<int64, 3>& dilations,
const std::array<int64, 3>& strides, const Padding padding,
TensorFormat data_format, Tensor* output) {
OP_REQUIRES(context, data_format == FORMAT_NHWC,
errors::InvalidArgument("CPU implementation of Conv3D "
"currently only supports the NHWC "
"tensor format."));
OP_REQUIRES(context,
dilations[0] == 1 && dilations[1] == 1 && dilations[2] == 1,
errors::InvalidArgument("CPU implementation of Conv3D "
"currently only supports dilated rates "
"of 1."));
functor::CuboidConvolution<CPUDevice, T>()(
context->eigen_device<CPUDevice>(), output->tensor<T, 5>(),
input.tensor<T, 5>(), filter.tensor<T, 5>(), strides[2], strides[1],
strides[0], BrainPadding2EigenPadding(padding));
}
};
template <typename Device, typename T>
class Conv3DOp : public BinaryOp<T> {
public:
explicit Conv3DOp(OpKernelConstruction* context) : BinaryOp<T>(context) {
string data_format;
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
OP_REQUIRES(context, stride_.size() == 5,
errors::InvalidArgument("Sliding window strides field must "
"specify 5 dimensions"));
OP_REQUIRES(
context,
(GetTensorDim(stride_, data_format_, 'N') == 1 &&
GetTensorDim(stride_, data_format_, 'C') == 1),
errors::InvalidArgument("Current implementation does not yet support "
"strides in the batch and depth dimensions."));
OP_REQUIRES(
context,
(GetTensorDim(stride_, data_format_, '0') > 0 &&
GetTensorDim(stride_, data_format_, '1') > 0 &&
GetTensorDim(stride_, data_format_, '2') > 0),
errors::InvalidArgument("Spatial strides should be larger than 0."));
OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_));
OP_REQUIRES(context, dilation_.size() == 5,
errors::InvalidArgument("Dilation rates field must "
"specify 5 dimensions"));
OP_REQUIRES(context,
(GetTensorDim(dilation_, data_format_, 'N') == 1 &&
GetTensorDim(dilation_, data_format_, 'C') == 1),
errors::InvalidArgument(
"Current implementation does not yet support "
"dilation rates in the batch and depth dimensions."));
OP_REQUIRES(
context,
(GetTensorDim(dilation_, data_format_, '0') > 0 &&
GetTensorDim(dilation_, data_format_, '1') > 0 &&
GetTensorDim(dilation_, data_format_, '2') > 0),
errors::InvalidArgument("Dilated rates should be larger than 0."));
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
cudnn_use_autotune_ = CudnnUseAutotune();
}
void Compute(OpKernelContext* context) override {
// Input tensor is of the following dimensions:
// [ batch, in_z, in_y, in_x, in_channels ]
const Tensor& input = context->input(0);
// Input filter is of the following dimensions:
// [ filter_z, filter_y, filter_x, in_channels, out_channels]
const Tensor& filter = context->input(1);
// NOTE: The ordering of the spatial dimensions is arbitrary, but has to be
// kept consistent between input/filter/output.
OP_REQUIRES(context, input.dims() == 5,
errors::InvalidArgument("input must be 5-dimensional"));
OP_REQUIRES(context, filter.dims() == 5,
errors::InvalidArgument("filter must be 5-dimensional"));
const int64 in_depth = GetTensorDim(input, data_format_, 'C');
const int64 in_batch = GetTensorDim(input, data_format_, 'N');
const int64 filter_depth = filter.dim_size(3);
const int64 out_depth = filter.dim_size(4);
OP_REQUIRES(context, in_depth % filter_depth == 0,
errors::InvalidArgument(
"Input depth must be evenly divisible by filter depth: ",
in_depth, " vs ", filter_depth));
// Dimension order for these arrays is: z, y, x.
std::array<int64, 3> input_size = {
{GetTensorDim(input, data_format_, '0'),
GetTensorDim(input, data_format_, '1'),
GetTensorDim(input, data_format_, '2')}};
std::array<int64, 3> filter_size = {
{filter.dim_size(0), filter.dim_size(1), filter.dim_size(2)}};
std::array<int64, 3> dilations = {
{GetTensorDim(dilation_, data_format_, '0'),
GetTensorDim(dilation_, data_format_, '1'),
GetTensorDim(dilation_, data_format_, '2')}};
std::array<int64, 3> strides = {{GetTensorDim(stride_, data_format_, '0'),
GetTensorDim(stride_, data_format_, '1'),
GetTensorDim(stride_, data_format_, '2')}};
std::array<int64, 3> out, padding;
OP_REQUIRES_OK(
context, Get3dOutputSizeV2(input_size, filter_size, dilations, strides,
padding_, &out, &padding));
TensorShape out_shape = ShapeFromFormat(
data_format_, in_batch, {{out[0], out[1], out[2]}}, out_depth);
Tensor* output;
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
// Return early if nothing to do.
if (out_shape.num_elements() == 0) return;
LaunchConvOp<Device, T>::launch(context, cudnn_use_autotune_, input, filter,
dilations, strides, padding_, data_format_,
output);
}
private:
std::vector<int32> dilation_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
bool cudnn_use_autotune_;
};
#define REGISTER_CPU_KERNEL(T) \
REGISTER_KERNEL_BUILDER( \
Name("Conv3D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
Conv3DOp<CPUDevice, T, OpKernel, OpKernelConstruction, \
OpKernelContext>);
Conv3DOp<CPUDevice, T>);
TF_CALL_half(REGISTER_CPU_KERNEL);
TF_CALL_float(REGISTER_CPU_KERNEL);
TF_CALL_double(REGISTER_CPU_KERNEL);
@ -73,7 +208,7 @@ typedef AutoTuneSingleton<Conv3dAutoTuneGroup, ConvParameters,
// TODO(mjanusz): Share logic with 2d implementation as much as possible.
template <typename T>
struct LaunchConvOp<GPUDevice, T, OpKernelContext> {
struct LaunchConvOp<GPUDevice, T> {
static void launch(OpKernelContext* ctx, bool cudnn_use_autotune,
const Tensor& input_param, const Tensor& filter,
const std::array<int64, 3>& dilations,
@ -548,16 +683,13 @@ DECLARE_GPU_SPEC(double);
// Registration of the GPU implementations.
REGISTER_KERNEL_BUILDER(
Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
Conv3DOp<GPUDevice, Eigen::half, OpKernel, OpKernelConstruction,
OpKernelContext>);
Conv3DOp<GPUDevice, Eigen::half>);
REGISTER_KERNEL_BUILDER(
Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<float>("T"),
Conv3DOp<GPUDevice, float, OpKernel, OpKernelConstruction,
OpKernelContext>);
Conv3DOp<GPUDevice, float>);
REGISTER_KERNEL_BUILDER(
Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<double>("T"),
Conv3DOp<GPUDevice, double, OpKernel, OpKernelConstruction,
OpKernelContext>);
Conv3DOp<GPUDevice, double>);
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
} // namespace tensorflow

View File

@ -1,187 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_
#define TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_
#include <vector>
#define USE_EIGEN_TENSOR
#define EIGEN_USE_THREADS
#include "tensorflow/core/framework/numeric_op_base.h"
#include "tensorflow/core/framework/kernel_shape_util.h"
#include "tensorflow/core/framework/op_requires.h"
#include "tensorflow/core/framework/ops_util.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/kernels/conv_3d.h"
#include "tensorflow/core/platform/errors.h"
#include "tensorflow/core/util/padding.h"
#include "tensorflow/core/util/tensor_format.h"
#if GOOGLE_CUDA
#include "tensorflow/core/util/use_cudnn.h"
#endif
namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
template <typename Device, typename T, class OpKernelContextT>
struct LaunchConvOp;
template <typename T, class OpKernelContextT>
struct LaunchConvOp<CPUDevice, T, OpKernelContextT> {
static void launch(OpKernelContextT* context, bool cudnn_use_autotune,
const Tensor& input, const Tensor& filter,
const std::array<int64, 3>& dilations,
const std::array<int64, 3>& strides, const Padding padding,
TensorFormat data_format, Tensor* output) {
OP_REQUIRES(context, data_format == FORMAT_NHWC,
errors::InvalidArgument("CPU implementation of Conv3D "
"currently only supports the NHWC "
"tensor format."));
OP_REQUIRES(context,
dilations[0] == 1 && dilations[1] == 1 && dilations[2] == 1,
errors::InvalidArgument("CPU implementation of Conv3D "
"currently only supports dilated rates "
"of 1."));
functor::CuboidConvolution<CPUDevice, T>()(
context->template eigen_device<CPUDevice>(), output->tensor<T, 5>(),
input.tensor<T, 5>(), filter.tensor<T, 5>(), strides[2], strides[1],
strides[0], BrainPadding2EigenPadding(padding));
}
};
template <typename Device, typename T, class OpKernelT,
class OpKernelConstructionT, class OpKernelContextT>
class Conv3DOp : public BinaryOpBase<T, OpKernelT, OpKernelConstructionT> {
public:
explicit Conv3DOp(OpKernelConstructionT* context) :
BinaryOpBase<T, OpKernelT, OpKernelConstructionT>(context) {
string data_format;
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
OP_REQUIRES(context, stride_.size() == 5,
errors::InvalidArgument("Sliding window strides field must "
"specify 5 dimensions"));
OP_REQUIRES(
context,
(GetTensorDim(stride_, data_format_, 'N') == 1 &&
GetTensorDim(stride_, data_format_, 'C') == 1),
errors::InvalidArgument("Current implementation does not yet support "
"strides in the batch and depth dimensions."));
OP_REQUIRES(
context,
(GetTensorDim(stride_, data_format_, '0') > 0 &&
GetTensorDim(stride_, data_format_, '1') > 0 &&
GetTensorDim(stride_, data_format_, '2') > 0),
errors::InvalidArgument("Spatial strides should be larger than 0."));
OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_));
OP_REQUIRES(context, dilation_.size() == 5,
errors::InvalidArgument("Dilation rates field must "
"specify 5 dimensions"));
OP_REQUIRES(context,
(GetTensorDim(dilation_, data_format_, 'N') == 1 &&
GetTensorDim(dilation_, data_format_, 'C') == 1),
errors::InvalidArgument(
"Current implementation does not yet support "
"dilation rates in the batch and depth dimensions."));
OP_REQUIRES(
context,
(GetTensorDim(dilation_, data_format_, '0') > 0 &&
GetTensorDim(dilation_, data_format_, '1') > 0 &&
GetTensorDim(dilation_, data_format_, '2') > 0),
errors::InvalidArgument("Dilated rates should be larger than 0."));
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
#if GOOGLE_CUDA
cudnn_use_autotune_ = CudnnUseAutotune();
#else
cudnn_use_autotune_ = false;
#endif
}
void Compute(OpKernelContextT* context) override {
// Input tensor is of the following dimensions:
// [ batch, in_z, in_y, in_x, in_channels ]
const Tensor& input = context->input(0);
// Input filter is of the following dimensions:
// [ filter_z, filter_y, filter_x, in_channels, out_channels]
const Tensor& filter = context->input(1);
// NOTE: The ordering of the spatial dimensions is arbitrary, but has to be
// kept consistent between input/filter/output.
OP_REQUIRES(context, input.dims() == 5,
errors::InvalidArgument("input must be 5-dimensional"));
OP_REQUIRES(context, filter.dims() == 5,
errors::InvalidArgument("filter must be 5-dimensional"));
const int64 in_depth = GetTensorDim(input, data_format_, 'C');
const int64 in_batch = GetTensorDim(input, data_format_, 'N');
const int64 filter_depth = filter.dim_size(3);
const int64 out_depth = filter.dim_size(4);
OP_REQUIRES(context, in_depth % filter_depth == 0,
errors::InvalidArgument(
"Input depth must be evenly divisible by filter depth: ",
in_depth, " vs ", filter_depth));
// Dimension order for these arrays is: z, y, x.
std::array<int64, 3> input_size = {
{GetTensorDim(input, data_format_, '0'),
GetTensorDim(input, data_format_, '1'),
GetTensorDim(input, data_format_, '2')}};
std::array<int64, 3> filter_size = {
{filter.dim_size(0), filter.dim_size(1), filter.dim_size(2)}};
std::array<int64, 3> dilations = {
{GetTensorDim(dilation_, data_format_, '0'),
GetTensorDim(dilation_, data_format_, '1'),
GetTensorDim(dilation_, data_format_, '2')}};
std::array<int64, 3> strides = {{GetTensorDim(stride_, data_format_, '0'),
GetTensorDim(stride_, data_format_, '1'),
GetTensorDim(stride_, data_format_, '2')}};
std::array<int64, 3> out, padding;
OP_REQUIRES_OK(
context, Get3dOutputSizeV2(input_size, filter_size, dilations, strides,
padding_, &out, &padding));
TensorShape out_shape = ShapeFromFormat(
data_format_, in_batch, {{out[0], out[1], out[2]}}, out_depth);
Tensor* output;
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
// Return early if nothing to do.
if (out_shape.num_elements() == 0) return;
LaunchConvOp<Device, T, OpKernelContextT>::launch(
context, cudnn_use_autotune_, input, filter,
dilations, strides, padding_, data_format_,
output);
}
private:
std::vector<int32> dilation_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
bool cudnn_use_autotune_;
};
} // namespace tensorflow
#endif // TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_