Shard CPU implementation for mirror_pad to improve compile times

Change: 130444346
This commit is contained in:
A. Unique TensorFlower 2016-08-16 13:09:45 -08:00 committed by TensorFlower Gardener
parent 9d62d40f9f
commit 67b1a4b488
8 changed files with 182 additions and 0 deletions

View File

@ -52,6 +52,11 @@ tensorflow/core/kernels/pack_op.cc
tensorflow/core/kernels/ops_util.cc
tensorflow/core/kernels/no_op.cc
tensorflow/core/kernels/mirror_pad_op.cc
tensorflow/core/kernels/mirror_pad_op_cpu_impl_1.cc
tensorflow/core/kernels/mirror_pad_op_cpu_impl_2.cc
tensorflow/core/kernels/mirror_pad_op_cpu_impl_3.cc
tensorflow/core/kernels/mirror_pad_op_cpu_impl_4.cc
tensorflow/core/kernels/mirror_pad_op_cpu_impl_5.cc
tensorflow/core/kernels/maxpooling_op.cc
tensorflow/core/kernels/matmul_op.cc
tensorflow/core/kernels/lrn_op.cc

View File

@ -149,6 +149,30 @@ class MirrorPadOp : public OpKernel {
using CpuDevice = Eigen::ThreadPoolDevice;
using GpuDevice = Eigen::GpuDevice;
namespace functor {
// Forward declarations of the functor specializations defined in the sharded
// files.
#define DECLARE_CPU_SPEC(T, i) \
template <> \
void MirrorPad<CpuDevice, T, i>::operator()( \
const CpuDevice&, typename TTypes<T, i, int32>::Tensor, \
typename TTypes<T, i, int32>::ConstTensor, TTypes<int32>::ConstMatrix, \
int); \
extern template struct MirrorPad<CpuDevice, T, i>;
#define DECLARE_CPU_SPECS(T) \
DECLARE_CPU_SPEC(T, 1); \
DECLARE_CPU_SPEC(T, 2); \
DECLARE_CPU_SPEC(T, 3); \
DECLARE_CPU_SPEC(T, 4); \
DECLARE_CPU_SPEC(T, 5);
TF_CALL_POD_TYPES(DECLARE_CPU_SPECS);
#undef DECLARE_CPU_SPEC
#undef DECLARE_CPU_SPECS
} // namespace functor
#define REGISTER_KERNEL(type) \
REGISTER_KERNEL_BUILDER(Name("MirrorPad") \
.Device(DEVICE_CPU) \
@ -308,6 +332,29 @@ class MirrorPadGradOp : public OpKernel {
int offset_;
};
namespace functor {
// Forward declarations of the functor specializations defined in the sharded
// files.
#define DECLARE_CPU_SPEC(T, k) \
template <> \
void MirrorPadGrad<CpuDevice, T, k>::operator()( \
const CpuDevice&, typename TTypes<T, k, int32>::Tensor, \
typename TTypes<T, k, int32>::ConstTensor, TTypes<int32>::ConstMatrix, \
int, typename TTypes<T, k, int32>::Tensor); \
extern template struct MirrorPadGrad<CpuDevice, T, k>;
#define DECLARE_CPU_SPECS(T) \
DECLARE_CPU_SPEC(T, 1); \
DECLARE_CPU_SPEC(T, 2); \
DECLARE_CPU_SPEC(T, 3); \
DECLARE_CPU_SPEC(T, 4); \
DECLARE_CPU_SPEC(T, 5);
TF_CALL_NUMBER_TYPES(DECLARE_CPU_SPECS);
#undef DECLARE_CPU_SPECS
#undef DECLARE_CPU_SPEC
} // namespace functor
#define REGISTER_KERNEL(type) \
REGISTER_KERNEL_BUILDER(Name("MirrorPadGrad") \
.Device(DEVICE_CPU) \

View File

@ -0,0 +1,40 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TENSORFLOW_CORE_MIRROR_PAD_OP_CPU_IMPL_H_
#define THIRD_PARTY_TENSORFLOW_CORE_MIRROR_PAD_OP_CPU_IMPL_H_
#define EIGEN_USE_THREADS
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/kernels/mirror_pad_op.h"
namespace tensorflow {
using CpuDevice = Eigen::ThreadPoolDevice;
#define DEFINE_CPU_SPECS(T) \
template struct functor::MirrorPad<CpuDevice, T, CPU_PROVIDED_IXDIM>;
TF_CALL_POD_TYPES(DEFINE_CPU_SPECS);
#undef DEFINE_CPU_SPECS
#define DEFINE_CPU_SPECS(T) \
template struct functor::MirrorPadGrad<CpuDevice, T, CPU_PROVIDED_IXDIM>;
TF_CALL_NUMBER_TYPES(DEFINE_CPU_SPECS);
#undef DEFINE_CPU_SPECS
} // namespace tensorflow
#endif // THIRD_PARTY_TENSORFLOW_CORE_MIRROR_PAD_OP_CPU_IMPL_H_

View File

@ -0,0 +1,18 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#define CPU_PROVIDED_IXDIM 1
#include "tensorflow/core/kernels/mirror_pad_op_cpu_impl.h"
#undef CPU_PROVIDED_IXDIM

View File

@ -0,0 +1,18 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#define CPU_PROVIDED_IXDIM 2
#include "tensorflow/core/kernels/mirror_pad_op_cpu_impl.h"
#undef CPU_PROVIDED_IXDIM

View File

@ -0,0 +1,18 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#define CPU_PROVIDED_IXDIM 3
#include "tensorflow/core/kernels/mirror_pad_op_cpu_impl.h"
#undef CPU_PROVIDED_IXDIM

View File

@ -0,0 +1,18 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#define CPU_PROVIDED_IXDIM 4
#include "tensorflow/core/kernels/mirror_pad_op_cpu_impl.h"
#undef CPU_PROVIDED_IXDIM

View File

@ -0,0 +1,18 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#define CPU_PROVIDED_IXDIM 5
#include "tensorflow/core/kernels/mirror_pad_op_cpu_impl.h"
#undef CPU_PROVIDED_IXDIM