diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index f859631888a..12cc2b01606 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -7,6 +7,11 @@ tensorflow/core/kernels/transpose_functor_cpu.cc tensorflow/core/kernels/training_ops.cc tensorflow/core/kernels/topk_op.cc tensorflow/core/kernels/tile_ops.cc +tensorflow/core/kernels/tile_ops_cpu_impl_1.cc +tensorflow/core/kernels/tile_ops_cpu_impl_2.cc +tensorflow/core/kernels/tile_ops_cpu_impl_3.cc +tensorflow/core/kernels/tile_ops_cpu_impl_4.cc +tensorflow/core/kernels/tile_ops_cpu_impl_5.cc tensorflow/core/kernels/strided_slice_op_inst_6.cc tensorflow/core/kernels/strided_slice_op_inst_5.cc tensorflow/core/kernels/strided_slice_op_inst_4.cc diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 7f8b2b439d6..7735e97842a 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1940,7 +1940,8 @@ filegroup( "save_restore_tensor.h", "softplus_op.h", "softsign_op.h", - "tile_ops.h", + "tile_ops_cpu_impl.h", + "tile_ops_impl.h", "training_ops.h", "transpose_functor.h", "transpose_op.h", @@ -2019,6 +2020,11 @@ filegroup( "stack_ops.cc", "summary_op.cc", "tile_ops.cc", + "tile_ops_cpu_impl_1.cc", + "tile_ops_cpu_impl_2.cc", + "tile_ops_cpu_impl_3.cc", + "tile_ops_cpu_impl_4.cc", + "tile_ops_cpu_impl_5.cc", "topk_op.cc", "training_ops.cc", "transpose_functor_cpu.cc", diff --git a/tensorflow/core/kernels/tile_ops.cc b/tensorflow/core/kernels/tile_ops.cc index 5990bfbcf3c..4b2d2fa589c 100644 --- a/tensorflow/core/kernels/tile_ops.cc +++ b/tensorflow/core/kernels/tile_ops.cc @@ -21,23 +21,70 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA -#include "tensorflow/core/kernels/tile_ops.h" - #include + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/type_index.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; +// Forward declarations of functors that will be defined in +// tile_ops_cpu_impl*.cc and tile_ops_gpu.cu.cc. +namespace functor { +template +struct Tile { + void operator()(const Device& d, typename TTypes::Tensor out, + typename TTypes::ConstTensor in, + const Eigen::array& broadcast_array) const; +}; + +template +struct Tile { + void operator()(const Device& d, typename TTypes::Tensor out, + typename TTypes::ConstTensor in, + const Eigen::array&) const; +}; + +template +struct TileGrad { + void operator()(const Device& d, typename TTypes::Tensor out, + typename TTypes::ConstTensor in, + const Eigen::DSizes& indices, + const Eigen::DSizes& sizes, + bool first) const; +}; + +template +struct TileGrad { + void operator()(const Device& d, typename TTypes::Tensor out, + typename TTypes::ConstTensor in, + const Eigen::DSizes&, + const Eigen::DSizes&, bool first) const; +}; + +template +struct ReduceAndReshape { + void operator()( + const Device& d, typename TTypes::Tensor out, + typename TTypes::ConstTensor in, + const Eigen::DSizes& reduce_dim, + const Eigen::DSizes& reshape_dim) const; +}; +} // namespace functor + // -------------------------------------------------------------------------- template class TileOp : public OpKernel { @@ -153,7 +200,7 @@ inline void TileOp::HandleCase( << DataTypeString(DT) << ", " << NDIM; } -#define HANDLE_CASE(device, dtype, ndim) \ +#define HANDLE_CASE(device, T, dtype, ndim) \ template <> \ template <> \ void TileOp::HandleCase( \ @@ -163,15 +210,18 @@ inline void TileOp::HandleCase( } // 0-D handled above -#define HANDLE_CASE_DIM(device, dtype) \ - HANDLE_CASE(device, dtype, 1); \ - HANDLE_CASE(device, dtype, 2); \ - HANDLE_CASE(device, dtype, 3); \ - HANDLE_CASE(device, dtype, 4); \ - HANDLE_CASE(device, dtype, 5); +#define HANDLE_CASE_DIM(device, T, dtype) \ + HANDLE_CASE(device, T, dtype, 1); \ + HANDLE_CASE(device, T, dtype, 2); \ + HANDLE_CASE(device, T, dtype, 3); \ + HANDLE_CASE(device, T, dtype, 4); \ + HANDLE_CASE(device, T, dtype, 5); #define HANDLE_TYPE_NAME_CPU(T) \ - HANDLE_CASE_DIM(CPUDevice, DataTypeToEnum::value); + HANDLE_CASE_DIM(CPUDevice, T, DataTypeToEnum::value); + +#define HANDLE_TYPE_NAME_GPU(T) \ + HANDLE_CASE_DIM(GPUDevice, T, DataTypeToEnum::value); TF_CALL_bool(HANDLE_TYPE_NAME_CPU); TF_CALL_float(HANDLE_TYPE_NAME_CPU); @@ -186,15 +236,16 @@ TF_CALL_complex128(HANDLE_TYPE_NAME_CPU); TF_CALL_string(HANDLE_TYPE_NAME_CPU); #if GOOGLE_CUDA -HANDLE_CASE_DIM(GPUDevice, DT_FLOAT); -HANDLE_CASE_DIM(GPUDevice, DT_DOUBLE); -HANDLE_CASE_DIM(GPUDevice, DT_INT16); -HANDLE_CASE_DIM(GPUDevice, DT_INT32); -HANDLE_CASE_DIM(GPUDevice, DT_INT64); -HANDLE_CASE_DIM(GPUDevice, DT_HALF); +TF_CALL_float(HANDLE_TYPE_NAME_GPU); +TF_CALL_double(HANDLE_TYPE_NAME_GPU); +TF_CALL_int16(HANDLE_TYPE_NAME_GPU); +TF_CALL_int32(HANDLE_TYPE_NAME_GPU); +TF_CALL_int64(HANDLE_TYPE_NAME_GPU); +TF_CALL_half(HANDLE_TYPE_NAME_GPU); #endif // GOOGLE_CUDA #undef HANDLE_TYPE_NAME_CPU +#undef HANDLE_TYPE_NAME_GPU #undef HANDLE_CASE_DIM #undef HANDLE_CASE @@ -385,7 +436,7 @@ inline void TileGradientOp::HandleCase( << ", " << NDIM; } -#define HANDLE_CASE(device, dtype, ndim) \ +#define HANDLE_CASE(device, T, dtype, ndim) \ template <> \ template <> \ void TileGradientOp::HandleCase( \ @@ -395,15 +446,18 @@ inline void TileGradientOp::HandleCase( } // 0-D handled specially above -#define HANDLE_CASE_DIM(device, dtype) \ - HANDLE_CASE(device, dtype, 1); \ - HANDLE_CASE(device, dtype, 2); \ - HANDLE_CASE(device, dtype, 3); \ - HANDLE_CASE(device, dtype, 4); \ - HANDLE_CASE(device, dtype, 5); +#define HANDLE_CASE_DIM(device, T, dtype) \ + HANDLE_CASE(device, T, dtype, 1); \ + HANDLE_CASE(device, T, dtype, 2); \ + HANDLE_CASE(device, T, dtype, 3); \ + HANDLE_CASE(device, T, dtype, 4); \ + HANDLE_CASE(device, T, dtype, 5); #define HANDLE_TYPE_NAME_CPU(T) \ - HANDLE_CASE_DIM(CPUDevice, DataTypeToEnum::value); + HANDLE_CASE_DIM(CPUDevice, T, DataTypeToEnum::value); + +#define HANDLE_TYPE_NAME_GPU(T) \ + HANDLE_CASE_DIM(GPUDevice, T, DataTypeToEnum::value); TF_CALL_float(HANDLE_TYPE_NAME_CPU); TF_CALL_double(HANDLE_TYPE_NAME_CPU); @@ -415,16 +469,16 @@ TF_CALL_complex64(HANDLE_TYPE_NAME_CPU); TF_CALL_complex128(HANDLE_TYPE_NAME_CPU); #if GOOGLE_CUDA -HANDLE_CASE_DIM(GPUDevice, DT_FLOAT); -HANDLE_CASE_DIM(GPUDevice, DT_DOUBLE); -HANDLE_CASE_DIM(GPUDevice, DT_INT16); -HANDLE_CASE_DIM(GPUDevice, DT_INT32); -HANDLE_CASE_DIM(GPUDevice, DT_INT64); -HANDLE_CASE_DIM(GPUDevice, DT_HALF); - +TF_CALL_float(HANDLE_TYPE_NAME_GPU); +TF_CALL_double(HANDLE_TYPE_NAME_GPU); +TF_CALL_int16(HANDLE_TYPE_NAME_GPU); +TF_CALL_int32(HANDLE_TYPE_NAME_GPU); +TF_CALL_int64(HANDLE_TYPE_NAME_GPU); +TF_CALL_half(HANDLE_TYPE_NAME_GPU); #endif // GOOGLE_CUDA #undef HANDLE_TYPE_NAME_CPU +#undef HANDLE_TYPE_NAME_GPU #undef HANDLE_CASE_DIM #undef HANDLE_CASE @@ -436,46 +490,6 @@ REGISTER_KERNEL_BUILDER(Name("TileGrad") TileGradientOp); #if GOOGLE_CUDA -#define DEFINE_GPU_TYPE(T) \ - DEFINE_GPU_DIM(T, 1) \ - DEFINE_GPU_DIM(T, 2) \ - DEFINE_GPU_DIM(T, 3) \ - DEFINE_GPU_DIM(T, 4) \ - DEFINE_GPU_DIM(T, 5) - -#define DEFINE_GPU_DIM(T, NDIM) \ - template <> \ - void Tile::operator()( \ - const GPUDevice& d, typename TTypes::Tensor out, \ - typename TTypes::ConstTensor in, \ - const Eigen::array& broadcast_array) const; \ - extern template struct Tile; \ - template <> \ - void TileGrad::operator()( \ - const GPUDevice& d, typename TTypes::Tensor out, \ - typename TTypes::ConstTensor in, \ - const Eigen::DSizes& indices, \ - const Eigen::DSizes& sizes, bool first) const; \ - extern template struct TileGrad; \ - template <> \ - void ReduceAndReshape::operator()( \ - const GPUDevice& d, typename TTypes::Tensor out, \ - typename TTypes::ConstTensor in, \ - const Eigen::DSizes& reduce_dim, \ - const Eigen::DSizes& reshape_dim) const; \ - extern template struct ReduceAndReshape; - -namespace functor { -DEFINE_GPU_TYPE(float); -DEFINE_GPU_TYPE(double); -DEFINE_GPU_TYPE(int64); -DEFINE_GPU_TYPE(int32); -DEFINE_GPU_TYPE(int16); -DEFINE_GPU_TYPE(Eigen::half); -} // end namespace functor - -#undef DEFINE_GPU_DIM -#undef DEFINE_GPU_TYPE REGISTER_KERNEL_BUILDER(Name("Tile") .Device(DEVICE_GPU) diff --git a/tensorflow/core/kernels/tile_ops_cpu_impl.h b/tensorflow/core/kernels/tile_ops_cpu_impl.h new file mode 100644 index 00000000000..9cdf69ad0bc --- /dev/null +++ b/tensorflow/core/kernels/tile_ops_cpu_impl.h @@ -0,0 +1,68 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_TILE_OPS_CPU_IMPL_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_TILE_OPS_CPU_IMPL_H_ + +#define EIGEN_USE_THREADS + +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/kernels/tile_ops_impl.h" + +namespace tensorflow { +namespace functor { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +// Register functors used for TileOp. +#define DEFINE_DIM(T, NDIM) template struct Tile; +#define DEFINE_TYPE(T) DEFINE_DIM(T, CPU_PROVIDED_IXDIM) + +TF_CALL_bool(DEFINE_TYPE); +TF_CALL_float(DEFINE_TYPE); +TF_CALL_double(DEFINE_TYPE); +TF_CALL_uint8(DEFINE_TYPE); +TF_CALL_int32(DEFINE_TYPE); +TF_CALL_int16(DEFINE_TYPE); +TF_CALL_int64(DEFINE_TYPE); +TF_CALL_half(DEFINE_TYPE); +TF_CALL_complex64(DEFINE_TYPE); +TF_CALL_complex128(DEFINE_TYPE); +TF_CALL_string(DEFINE_TYPE); + +#undef DEFINE_DIM +#undef DEFINE_TYPE + +// Register functors used for TileGradientOp. +#define DEFINE_DIM(T, NDIM) \ + template struct TileGrad; \ + template struct ReduceAndReshape; +#define DEFINE_TYPE(T) DEFINE_DIM(T, CPU_PROVIDED_IXDIM) + +TF_CALL_float(DEFINE_TYPE); +TF_CALL_double(DEFINE_TYPE); +TF_CALL_int16(DEFINE_TYPE); +TF_CALL_int32(DEFINE_TYPE); +TF_CALL_int64(DEFINE_TYPE); +TF_CALL_half(DEFINE_TYPE); +TF_CALL_complex64(DEFINE_TYPE); +TF_CALL_complex128(DEFINE_TYPE); + +#undef DEFINE_DIM +#undef DEFINE_TYPE + +} // end namespace functor +} // end namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_TILE_OPS_CPU_IMPL_H_ diff --git a/tensorflow/core/kernels/tile_ops_cpu_impl_1.cc b/tensorflow/core/kernels/tile_ops_cpu_impl_1.cc new file mode 100644 index 00000000000..47955057490 --- /dev/null +++ b/tensorflow/core/kernels/tile_ops_cpu_impl_1.cc @@ -0,0 +1,18 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define CPU_PROVIDED_IXDIM 1 +#include "tensorflow/core/kernels/tile_ops_cpu_impl.h" +#undef CPU_PROVIDED_IXDIM diff --git a/tensorflow/core/kernels/tile_ops_cpu_impl_2.cc b/tensorflow/core/kernels/tile_ops_cpu_impl_2.cc new file mode 100644 index 00000000000..7fcd31c783b --- /dev/null +++ b/tensorflow/core/kernels/tile_ops_cpu_impl_2.cc @@ -0,0 +1,18 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define CPU_PROVIDED_IXDIM 2 +#include "tensorflow/core/kernels/tile_ops_cpu_impl.h" +#undef CPU_PROVIDED_IXDIM diff --git a/tensorflow/core/kernels/tile_ops_cpu_impl_3.cc b/tensorflow/core/kernels/tile_ops_cpu_impl_3.cc new file mode 100644 index 00000000000..3e835b43d2a --- /dev/null +++ b/tensorflow/core/kernels/tile_ops_cpu_impl_3.cc @@ -0,0 +1,18 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define CPU_PROVIDED_IXDIM 3 +#include "tensorflow/core/kernels/tile_ops_cpu_impl.h" +#undef CPU_PROVIDED_IXDIM diff --git a/tensorflow/core/kernels/tile_ops_cpu_impl_4.cc b/tensorflow/core/kernels/tile_ops_cpu_impl_4.cc new file mode 100644 index 00000000000..872f654cb9f --- /dev/null +++ b/tensorflow/core/kernels/tile_ops_cpu_impl_4.cc @@ -0,0 +1,18 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define CPU_PROVIDED_IXDIM 4 +#include "tensorflow/core/kernels/tile_ops_cpu_impl.h" +#undef CPU_PROVIDED_IXDIM diff --git a/tensorflow/core/kernels/tile_ops_cpu_impl_5.cc b/tensorflow/core/kernels/tile_ops_cpu_impl_5.cc new file mode 100644 index 00000000000..91e332e53ae --- /dev/null +++ b/tensorflow/core/kernels/tile_ops_cpu_impl_5.cc @@ -0,0 +1,18 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define CPU_PROVIDED_IXDIM 5 +#include "tensorflow/core/kernels/tile_ops_cpu_impl.h" +#undef CPU_PROVIDED_IXDIM diff --git a/tensorflow/core/kernels/tile_ops_gpu.cu.cc b/tensorflow/core/kernels/tile_ops_gpu.cu.cc index 3870c1a7bb8..787ffb4ea79 100644 --- a/tensorflow/core/kernels/tile_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/tile_ops_gpu.cu.cc @@ -17,8 +17,8 @@ limitations under the License. #define EIGEN_USE_GPU -#include "tensorflow/core/kernels/tile_ops.h" #include +#include "tensorflow/core/kernels/tile_ops_impl.h" namespace tensorflow { namespace functor { diff --git a/tensorflow/core/kernels/tile_ops.h b/tensorflow/core/kernels/tile_ops_impl.h similarity index 95% rename from tensorflow/core/kernels/tile_ops.h rename to tensorflow/core/kernels/tile_ops_impl.h index b79ac4586c9..c41e4bd74b0 100644 --- a/tensorflow/core/kernels/tile_ops.h +++ b/tensorflow/core/kernels/tile_ops_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_KERNELS_TILE_OPS_H_ -#define TENSORFLOW_KERNELS_TILE_OPS_H_ +#ifndef TENSORFLOW_KERNELS_TILE_IMPL_OPS_H_ +#define TENSORFLOW_KERNELS_TILE_IMPL_OPS_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" @@ -91,4 +91,4 @@ struct ReduceAndReshape { } // end namespace functor } // end namespace tensorflow -#endif // TENSORFLOW_KERNELS_TILE_OPS_H_ +#endif // TENSORFLOW_KERNELS_TILE_OPS_IMPL_H_