From 4e7ce793d996dfed173bd46e90f489df9464a540 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Mon, 8 Jun 2020 00:34:46 -0700 Subject: [PATCH] Merge various kernel registrations with macros We add the TF_CALL_COMPLEX_TYPES macro and update related kernel registrations with more compact macros rather than the individual dtype listings. This should be a no-op and should give better visibility into what is the dtype coverage for many of our kernels. PiperOrigin-RevId: 315224662 Change-Id: I14aad07711a407fa632a94d891238a48ae89bcab --- tensorflow/core/framework/register_types.h | 21 ++-- tensorflow/core/kernels/aggregate_ops.cc | 5 +- .../core/kernels/aggregate_ops_gpu.cu.cc | 5 +- .../core/kernels/batch_matmul_op_complex.cc | 6 +- tensorflow/core/kernels/cholesky_op.cc | 3 +- tensorflow/core/kernels/concat_lib.h | 5 +- tensorflow/core/kernels/concat_lib_gpu.cc | 5 +- tensorflow/core/kernels/concat_lib_gpu.h | 5 +- .../core/kernels/concat_lib_gpu_impl.cu.cc | 28 ++--- tensorflow/core/kernels/concat_op.cc | 7 +- tensorflow/core/kernels/diag_op.cc | 12 +- .../kernels/dynamic_partition_op_gpu.cu.cc | 3 +- tensorflow/core/kernels/dynamic_stitch_op.cc | 15 ++- .../core/kernels/dynamic_stitch_op_gpu.cu.cc | 7 +- tensorflow/core/kernels/einsum_op_gpu.cu.cc | 7 +- tensorflow/core/kernels/gather_functor.cc | 3 +- .../core/kernels/gather_functor_batched.cc | 3 +- .../kernels/gather_functor_batched_gpu.cu.cc | 5 +- .../core/kernels/gather_functor_gpu.cu.cc | 5 +- tensorflow/core/kernels/gather_nd_op.cc | 6 +- .../core/kernels/gather_nd_op_gpu.cu.cc | 3 +- tensorflow/core/kernels/gather_op.cc | 5 +- tensorflow/core/kernels/list_kernels.cu.cc | 7 +- tensorflow/core/kernels/matmul_op.cc | 15 +-- .../core/kernels/matrix_band_part_op.cc | 10 +- .../kernels/matrix_band_part_op_gpu.cu.cc | 5 +- tensorflow/core/kernels/matrix_diag_op.cc | 10 +- .../core/kernels/matrix_diag_op_gpu.cu.cc | 5 +- tensorflow/core/kernels/matrix_set_diag_op.cc | 10 +- .../core/kernels/matrix_set_diag_op_gpu.cu.cc | 5 +- .../matrix_triangular_solve_op_complex.cc | 7 +- .../core/kernels/mkl_batch_matmul_op.cc | 6 +- tensorflow/core/kernels/one_hot_op.cc | 10 +- tensorflow/core/kernels/one_hot_op_gpu.cu.cc | 5 +- tensorflow/core/kernels/pack_op.cc | 5 +- .../core/kernels/reduction_ops_euclidean.cc | 3 +- tensorflow/core/kernels/reduction_ops_mean.cc | 3 +- tensorflow/core/kernels/reduction_ops_prod.cc | 5 +- tensorflow/core/kernels/reduction_ops_sum.cc | 5 +- tensorflow/core/kernels/reverse_op.cc | 14 +-- tensorflow/core/kernels/reverse_op_gpu.cu.cc | 7 +- tensorflow/core/kernels/roll_op.cc | 5 +- tensorflow/core/kernels/roll_op_gpu.cu.cc | 5 +- tensorflow/core/kernels/scatter_nd_op.cc | 9 +- .../core/kernels/scatter_nd_op_gpu.cu.cc | 3 +- .../kernels/segment_reduction_ops_gpu.cu.cc | 3 +- .../kernels/segment_reduction_ops_impl_3.cc | 3 +- .../kernels/segment_reduction_ops_impl_4.cc | 3 +- tensorflow/core/kernels/slice_op.cc | 12 +- tensorflow/core/kernels/slice_op_gpu.cu.cc | 9 +- tensorflow/core/kernels/split_lib_gpu.cu.cc | 14 +-- tensorflow/core/kernels/split_lib_gpu.h | 5 +- tensorflow/core/kernels/split_op.cc | 5 +- tensorflow/core/kernels/split_v_op.cc | 5 +- tensorflow/core/kernels/strided_slice_op.cc | 5 +- .../strided_slice_op_gpu_complex.cu.cc | 3 +- .../core/kernels/strided_slice_op_impl.h | 12 +- tensorflow/core/kernels/tensor_array.cc | 6 +- tensorflow/core/kernels/tensor_array.h | 6 +- tensorflow/core/kernels/tensor_array_ops.cc | 37 +++--- tensorflow/core/kernels/training_ops.cc | 105 +++++------------- tensorflow/core/kernels/unpack_op.cc | 5 +- tensorflow/core/kernels/variable_ops.cc | 5 +- 63 files changed, 169 insertions(+), 397 deletions(-) diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h index 47aab2efb61..bc3e5e1743b 100644 --- a/tensorflow/core/framework/register_types.h +++ b/tensorflow/core/framework/register_types.h @@ -179,13 +179,14 @@ limitations under the License. TF_CALL_int64(m) TF_CALL_uint16(m) TF_CALL_int16(m) TF_CALL_uint8(m) \ TF_CALL_int8(m) -// Call "m" for all number types, including complex64 and complex128. +#define TF_CALL_COMPLEX_TYPES(m) TF_CALL_complex64(m) TF_CALL_complex128(m) + +// Call "m" for all number types, including complex types #define TF_CALL_NUMBER_TYPES(m) \ - TF_CALL_REAL_NUMBER_TYPES(m) TF_CALL_complex64(m) TF_CALL_complex128(m) + TF_CALL_REAL_NUMBER_TYPES(m) TF_CALL_COMPLEX_TYPES(m) #define TF_CALL_NUMBER_TYPES_NO_INT32(m) \ - TF_CALL_REAL_NUMBER_TYPES_NO_INT32(m) \ - TF_CALL_complex64(m) TF_CALL_complex128(m) + TF_CALL_REAL_NUMBER_TYPES_NO_INT32(m) TF_CALL_COMPLEX_TYPES(m) #define TF_CALL_POD_TYPES(m) TF_CALL_NUMBER_TYPES(m) TF_CALL_bool(m) @@ -202,8 +203,7 @@ limitations under the License. // Call "m" on all types supported on GPU. #define TF_CALL_GPU_ALL_TYPES(m) \ - TF_CALL_GPU_NUMBER_TYPES(m) \ - TF_CALL_bool(m) TF_CALL_complex64(m) TF_CALL_complex128(m) + TF_CALL_GPU_NUMBER_TYPES(m) TF_CALL_COMPLEX_TYPES(m) TF_CALL_bool(m) #define TF_CALL_GPU_NUMBER_TYPES_NO_HALF(m) TF_CALL_float(m) TF_CALL_double(m) @@ -213,11 +213,10 @@ limitations under the License. TF_CALL_qint8(m) TF_CALL_quint8(m) TF_CALL_qint32(m) // Types used for save and restore ops. -#define TF_CALL_SAVE_RESTORE_TYPES(m) \ - TF_CALL_INTEGRAL_TYPES(m) \ - TF_CALL_half(m) TF_CALL_float(m) TF_CALL_double(m) TF_CALL_complex64(m) \ - TF_CALL_complex128(m) TF_CALL_bool(m) TF_CALL_tstring(m) \ - TF_CALL_QUANTIZED_TYPES(m) +#define TF_CALL_SAVE_RESTORE_TYPES(m) \ + TF_CALL_REAL_NUMBER_TYPES_NO_BFLOAT16(m) \ + TF_CALL_COMPLEX_TYPES(m) \ + TF_CALL_QUANTIZED_TYPES(m) TF_CALL_bool(m) TF_CALL_tstring(m) #ifdef TENSORFLOW_SYCL_NO_DOUBLE #define TF_CALL_SYCL_double(m) diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc index 511a5f77a66..79062aee156 100644 --- a/tensorflow/core/kernels/aggregate_ops.cc +++ b/tensorflow/core/kernels/aggregate_ops.cc @@ -48,11 +48,10 @@ REGISTER_ADDN_CPU(Variant); #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define REGISTER_ADDN_GPU(type) REGISTER_ADDN(type, GPU) -TF_CALL_GPU_NUMBER_TYPES(REGISTER_ADDN_GPU); TF_CALL_int64(REGISTER_ADDN_GPU); -TF_CALL_complex64(REGISTER_ADDN_GPU); -TF_CALL_complex128(REGISTER_ADDN_GPU); TF_CALL_variant(REGISTER_ADDN_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_ADDN_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_ADDN_GPU); #undef REGISTER_ADDN_GPU // A special GPU kernel for int32. diff --git a/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc b/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc index 85bdc2447cd..c73323a795a 100644 --- a/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc @@ -154,10 +154,9 @@ struct Add9Functor { template struct functor::Add8pFunctor; \ template struct functor::Add9Functor; -TF_CALL_GPU_NUMBER_TYPES(REGISTER_FUNCTORS); TF_CALL_int64(REGISTER_FUNCTORS); -TF_CALL_complex64(REGISTER_FUNCTORS); -TF_CALL_complex128(REGISTER_FUNCTORS); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_FUNCTORS); +TF_CALL_COMPLEX_TYPES(REGISTER_FUNCTORS); #undef REGISTER_FUNCTORS diff --git a/tensorflow/core/kernels/batch_matmul_op_complex.cc b/tensorflow/core/kernels/batch_matmul_op_complex.cc index 2cf163be0d4..bc36b95d6a1 100644 --- a/tensorflow/core/kernels/batch_matmul_op_complex.cc +++ b/tensorflow/core/kernels/batch_matmul_op_complex.cc @@ -17,12 +17,10 @@ limitations under the License. namespace tensorflow { -TF_CALL_complex64(REGISTER_BATCH_MATMUL_CPU); -TF_CALL_complex128(REGISTER_BATCH_MATMUL_CPU); +TF_CALL_COMPLEX_TYPES(REGISTER_BATCH_MATMUL_CPU); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -TF_CALL_complex64(REGISTER_BATCH_MATMUL_GPU); -TF_CALL_complex128(REGISTER_BATCH_MATMUL_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_BATCH_MATMUL_GPU); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace tensorflow diff --git a/tensorflow/core/kernels/cholesky_op.cc b/tensorflow/core/kernels/cholesky_op.cc index 71e2604afce..ff8fd08f228 100644 --- a/tensorflow/core/kernels/cholesky_op.cc +++ b/tensorflow/core/kernels/cholesky_op.cc @@ -87,8 +87,7 @@ namespace functor { extern template struct MatrixBandPartFunctor; TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); -TF_CALL_complex64(DECLARE_GPU_SPEC); -TF_CALL_complex128(DECLARE_GPU_SPEC); +TF_CALL_COMPLEX_TYPES(DECLARE_GPU_SPEC); } // namespace functor template diff --git a/tensorflow/core/kernels/concat_lib.h b/tensorflow/core/kernels/concat_lib.h index 1d6d219f7ba..35da7afe3f5 100644 --- a/tensorflow/core/kernels/concat_lib.h +++ b/tensorflow/core/kernels/concat_lib.h @@ -64,15 +64,12 @@ void ConcatGPU( inputs_flat, \ Tensor* output, typename TTypes::Tensor* output_flat); -TF_CALL_GPU_NUMBER_TYPES(REGISTER); -TF_CALL_complex64(REGISTER); -TF_CALL_complex128(REGISTER); TF_CALL_int32(REGISTER); // Needed for TensorLists. TF_CALL_int64(REGISTER); TF_CALL_int16(REGISTER); TF_CALL_bfloat16(REGISTER); -TF_CALL_bool(REGISTER); TF_CALL_uint8(REGISTER); +TF_CALL_GPU_ALL_TYPES(REGISTER); #undef REGISTER #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/concat_lib_gpu.cc b/tensorflow/core/kernels/concat_lib_gpu.cc index 154b1a31c3a..de029397847 100644 --- a/tensorflow/core/kernels/concat_lib_gpu.cc +++ b/tensorflow/core/kernels/concat_lib_gpu.cc @@ -98,15 +98,12 @@ void ConcatGPU( inputs_flat, \ Tensor* output, typename TTypes::Tensor* output_flat); -TF_CALL_GPU_NUMBER_TYPES(REGISTER); -TF_CALL_complex64(REGISTER); -TF_CALL_complex128(REGISTER); TF_CALL_int32(REGISTER); // Needed for TensorLists. TF_CALL_int64(REGISTER); TF_CALL_int16(REGISTER); TF_CALL_bfloat16(REGISTER); -TF_CALL_bool(REGISTER); TF_CALL_uint8(REGISTER); +TF_CALL_GPU_ALL_TYPES(REGISTER); #undef REGISTER diff --git a/tensorflow/core/kernels/concat_lib_gpu.h b/tensorflow/core/kernels/concat_lib_gpu.h index 2db66a7c5a8..a83b6b63d9b 100644 --- a/tensorflow/core/kernels/concat_lib_gpu.h +++ b/tensorflow/core/kernels/concat_lib_gpu.h @@ -66,15 +66,12 @@ void ConcatGPUImpl(const Eigen::GpuDevice& d, const GpuDeviceArrayStruct& ptr_offsets, bool fixed_size, \ int split_size, typename TTypes::Matrix* output); -TF_CALL_GPU_NUMBER_TYPES(REGISTER); -TF_CALL_complex64(REGISTER); -TF_CALL_complex128(REGISTER); TF_CALL_int32(REGISTER); // Needed for TensorLists. TF_CALL_int64(REGISTER); TF_CALL_int16(REGISTER); TF_CALL_bfloat16(REGISTER); -TF_CALL_bool(REGISTER); TF_CALL_uint8(REGISTER); +TF_CALL_GPU_ALL_TYPES(REGISTER); #undef REGISTER } // namespace tensorflow diff --git a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc index 8f5458c9b56..859aca2f932 100644 --- a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc +++ b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc @@ -201,45 +201,33 @@ void ConcatGPUImpl(const Eigen::GpuDevice& gpu_device, const GpuDeviceArrayStruct& ptr_offsets, bool fixed_size, \ int split_size, typename TTypes::Matrix* output); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPUCONCAT32); -TF_CALL_complex64(REGISTER_GPUCONCAT32); -TF_CALL_complex128(REGISTER_GPUCONCAT32); TF_CALL_int32(REGISTER_GPUCONCAT32); // Needed for TensorLists. TF_CALL_int64(REGISTER_GPUCONCAT32); TF_CALL_int16(REGISTER_GPUCONCAT32); TF_CALL_uint8(REGISTER_GPUCONCAT32); -REGISTER_GPUCONCAT32(bfloat16); -REGISTER_GPUCONCAT32(bool); +TF_CALL_bfloat16(REGISTER_GPUCONCAT32); +TF_CALL_GPU_ALL_TYPES(REGISTER_GPUCONCAT32); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPUCONCAT64); -TF_CALL_complex64(REGISTER_GPUCONCAT64); -TF_CALL_complex128(REGISTER_GPUCONCAT64); TF_CALL_int32(REGISTER_GPUCONCAT64); // Needed for TensorLists. TF_CALL_int64(REGISTER_GPUCONCAT64); TF_CALL_int16(REGISTER_GPUCONCAT64); TF_CALL_uint8(REGISTER_GPUCONCAT64); -REGISTER_GPUCONCAT64(bfloat16); -REGISTER_GPUCONCAT64(bool); +TF_CALL_bfloat16(REGISTER_GPUCONCAT64); +TF_CALL_GPU_ALL_TYPES(REGISTER_GPUCONCAT64); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU32); -TF_CALL_complex64(REGISTER_GPU32); -TF_CALL_complex128(REGISTER_GPU32); TF_CALL_int32(REGISTER_GPU32); // Needed for TensorLists. TF_CALL_int64(REGISTER_GPU32); TF_CALL_int16(REGISTER_GPU32); TF_CALL_uint8(REGISTER_GPU32); -REGISTER_GPU32(bfloat16); -REGISTER_GPU32(bool); +TF_CALL_bfloat16(REGISTER_GPU32); +TF_CALL_GPU_ALL_TYPES(REGISTER_GPU32); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU64); -TF_CALL_complex64(REGISTER_GPU64); -TF_CALL_complex128(REGISTER_GPU64); TF_CALL_int32(REGISTER_GPU64); // Needed for TensorLists. TF_CALL_int64(REGISTER_GPU64); TF_CALL_int16(REGISTER_GPU64); TF_CALL_uint8(REGISTER_GPU64); -REGISTER_GPU64(bfloat16); -REGISTER_GPU64(bool); +TF_CALL_bfloat16(REGISTER_GPU64); +TF_CALL_GPU_ALL_TYPES(REGISTER_GPU64); #undef REGISTER_GPUCONCAT32 #undef REGISTER_GPUCONCAT64 diff --git a/tensorflow/core/kernels/concat_op.cc b/tensorflow/core/kernels/concat_op.cc index 19ed267b441..be3e9a67c5f 100644 --- a/tensorflow/core/kernels/concat_op.cc +++ b/tensorflow/core/kernels/concat_op.cc @@ -227,13 +227,10 @@ REGISTER_CONCAT(uint64); .HostMemory("axis"), \ ConcatV2Op) -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -REGISTER_GPU(bfloat16); +TF_CALL_bfloat16(REGISTER_GPU); TF_CALL_uint8(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); TF_CALL_int64(REGISTER_GPU); -REGISTER_GPU(bool); +TF_CALL_GPU_ALL_TYPES(REGISTER_GPU); #undef REGISTER_GPU // A special GPU kernel for int32. diff --git a/tensorflow/core/kernels/diag_op.cc b/tensorflow/core/kernels/diag_op.cc index 811d48af091..f34fc6c6be7 100644 --- a/tensorflow/core/kernels/diag_op.cc +++ b/tensorflow/core/kernels/diag_op.cc @@ -176,8 +176,7 @@ TF_CALL_double(REGISTER_DIAGOP); TF_CALL_float(REGISTER_DIAGOP); TF_CALL_int32(REGISTER_DIAGOP); TF_CALL_int64(REGISTER_DIAGOP); -TF_CALL_complex64(REGISTER_DIAGOP); -TF_CALL_complex128(REGISTER_DIAGOP); +TF_CALL_COMPLEX_TYPES(REGISTER_DIAGOP); TF_CALL_half(REGISTER_DIAGOP); #undef REGISTER_DIAGOP @@ -190,8 +189,7 @@ TF_CALL_double(REGISTER_DIAGPARTOP); TF_CALL_float(REGISTER_DIAGPARTOP); TF_CALL_int32(REGISTER_DIAGPARTOP); TF_CALL_int64(REGISTER_DIAGPARTOP); -TF_CALL_complex64(REGISTER_DIAGPARTOP); -TF_CALL_complex128(REGISTER_DIAGPARTOP); +TF_CALL_COMPLEX_TYPES(REGISTER_DIAGPARTOP); TF_CALL_half(REGISTER_DIAGPARTOP); #undef REGISTER_DIAGPARTOP @@ -217,8 +215,7 @@ TF_CALL_double(REGISTER_DIAGOP_GPU); TF_CALL_float(REGISTER_DIAGOP_GPU); TF_CALL_int32(REGISTER_DIAGOP_GPU); TF_CALL_int64(REGISTER_DIAGOP_GPU); -TF_CALL_complex64(REGISTER_DIAGOP_GPU); -TF_CALL_complex128(REGISTER_DIAGOP_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_DIAGOP_GPU); TF_CALL_half(REGISTER_DIAGOP_GPU); #undef REGISTER_DIAGOP_GPU @@ -242,8 +239,7 @@ TF_CALL_double(REGISTER_DIAGPARTOP_GPU); TF_CALL_float(REGISTER_DIAGPARTOP_GPU); TF_CALL_int32(REGISTER_DIAGPARTOP_GPU); TF_CALL_int64(REGISTER_DIAGPARTOP_GPU); -TF_CALL_complex64(REGISTER_DIAGPARTOP_GPU); -TF_CALL_complex128(REGISTER_DIAGPARTOP_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_DIAGPARTOP_GPU); TF_CALL_half(REGISTER_DIAGPARTOP_GPU); #undef REGISTER_DIAGPARTOP_GPU diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc index 98c2fb57833..7b64d9e8484 100644 --- a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc +++ b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc @@ -467,8 +467,7 @@ class DynamicPartitionOpGPU : public AsyncOpKernel { DynamicPartitionOpGPU) TF_CALL_GPU_NUMBER_TYPES(REGISTER_DYNAMIC_PARTITION_GPU); -TF_CALL_complex64(REGISTER_DYNAMIC_PARTITION_GPU); -TF_CALL_complex128(REGISTER_DYNAMIC_PARTITION_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_DYNAMIC_PARTITION_GPU); #undef REGISTER_DYNAMIC_PARTITION_GPU } // namespace tensorflow diff --git a/tensorflow/core/kernels/dynamic_stitch_op.cc b/tensorflow/core/kernels/dynamic_stitch_op.cc index 86f9c3e4621..5f6b0357f95 100644 --- a/tensorflow/core/kernels/dynamic_stitch_op.cc +++ b/tensorflow/core/kernels/dynamic_stitch_op.cc @@ -147,11 +147,11 @@ void DynamicStitchGPUImpl(const Eigen::GpuDevice& gpu_device, const int32 first_dim_size, \ const GpuDeviceArrayStruct& input_indices, \ const GpuDeviceArrayStruct& input_ptrs, T* output); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); + TF_CALL_int32(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #undef REGISTER_GPU template @@ -357,11 +357,10 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_DYNAMIC_STITCH); .HostMemory("merged"), \ ParallelDynamicStitchOpCPU) -TF_CALL_GPU_NUMBER_TYPES(REGISTER_DYNAMIC_STITCH_GPU); -TF_CALL_complex64(REGISTER_DYNAMIC_STITCH_GPU); -TF_CALL_complex128(REGISTER_DYNAMIC_STITCH_GPU); -TF_CALL_int64(REGISTER_DYNAMIC_STITCH_GPU); TF_CALL_int32(REGISTER_DYNAMIC_STITCH_GPU); +TF_CALL_int64(REGISTER_DYNAMIC_STITCH_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_DYNAMIC_STITCH_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_DYNAMIC_STITCH_GPU); #undef REGISTER_DYNAMIC_STITCH_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/dynamic_stitch_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_stitch_op_gpu.cu.cc index 22fed448c1f..c0a3df38b5d 100644 --- a/tensorflow/core/kernels/dynamic_stitch_op_gpu.cu.cc +++ b/tensorflow/core/kernels/dynamic_stitch_op_gpu.cu.cc @@ -70,11 +70,10 @@ void DynamicStitchGPUImpl(const Eigen::GpuDevice& gpu_device, const GpuDeviceArrayStruct& input_indices, \ const GpuDeviceArrayStruct& input_ptrs, T* output); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int32(REGISTER_GPU); TF_CALL_int64(REGISTER_GPU); -TF_CALL_int32(REGISTER_GPU) +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #undef REGISTER_GPU diff --git a/tensorflow/core/kernels/einsum_op_gpu.cu.cc b/tensorflow/core/kernels/einsum_op_gpu.cu.cc index 36a97691297..2935b7fd02a 100644 --- a/tensorflow/core/kernels/einsum_op_gpu.cu.cc +++ b/tensorflow/core/kernels/einsum_op_gpu.cu.cc @@ -33,11 +33,8 @@ namespace tensorflow { DECLARE_GPU_SPECS_NDIM(T, 5); \ DECLARE_GPU_SPECS_NDIM(T, 6); -TF_CALL_half(DECLARE_GPU_SPECS); -TF_CALL_float(DECLARE_GPU_SPECS); -TF_CALL_double(DECLARE_GPU_SPECS); -TF_CALL_complex64(DECLARE_GPU_SPECS); -TF_CALL_complex128(DECLARE_GPU_SPECS); +TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); +TF_CALL_COMPLEX_TYPES(DECLARE_GPU_SPECS); #undef DECLARE_GPU_SPECS_NDIM #undef DECLARE_GPU_SPECS diff --git a/tensorflow/core/kernels/gather_functor.cc b/tensorflow/core/kernels/gather_functor.cc index e4f2182be3c..a0293951660 100644 --- a/tensorflow/core/kernels/gather_functor.cc +++ b/tensorflow/core/kernels/gather_functor.cc @@ -39,8 +39,7 @@ namespace functor { TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); -TF_CALL_complex64(DECLARE_GPU_SPECS); -TF_CALL_complex128(DECLARE_GPU_SPECS); +TF_CALL_COMPLEX_TYPES(DECLARE_GPU_SPECS); #undef DECLARE_GPU_SPECS #undef DECLARE_GPU_SPECS_INDEX diff --git a/tensorflow/core/kernels/gather_functor_batched.cc b/tensorflow/core/kernels/gather_functor_batched.cc index 0960b3a2472..d1ef076260b 100644 --- a/tensorflow/core/kernels/gather_functor_batched.cc +++ b/tensorflow/core/kernels/gather_functor_batched.cc @@ -39,8 +39,7 @@ namespace functor { TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); -TF_CALL_complex64(DECLARE_GPU_SPECS); -TF_CALL_complex128(DECLARE_GPU_SPECS); +TF_CALL_COMPLEX_TYPES(DECLARE_GPU_SPECS); #undef DECLARE_GPU_SPECS #undef DECLARE_GPU_SPECS_INDEX diff --git a/tensorflow/core/kernels/gather_functor_batched_gpu.cu.cc b/tensorflow/core/kernels/gather_functor_batched_gpu.cu.cc index f118d8dc72b..40b9894776d 100644 --- a/tensorflow/core/kernels/gather_functor_batched_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_functor_batched_gpu.cu.cc @@ -31,12 +31,9 @@ typedef Eigen::GpuDevice GPUDevice; DEFINE_GPU_SPECS_INDEX(T, int32); \ DEFINE_GPU_SPECS_INDEX(T, int64); -TF_CALL_bool(DEFINE_GPU_SPECS); TF_CALL_int32(DEFINE_GPU_SPECS); TF_CALL_int64(DEFINE_GPU_SPECS); -TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); -TF_CALL_complex64(DEFINE_GPU_SPECS); -TF_CALL_complex128(DEFINE_GPU_SPECS); +TF_CALL_GPU_ALL_TYPES(DEFINE_GPU_SPECS); #undef DEFINE_GPU_SPECS #undef DEFINE_GPU_SPECS_INDEX diff --git a/tensorflow/core/kernels/gather_functor_gpu.cu.cc b/tensorflow/core/kernels/gather_functor_gpu.cu.cc index c548abb8bde..39402ebacec 100644 --- a/tensorflow/core/kernels/gather_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_functor_gpu.cu.cc @@ -31,12 +31,9 @@ typedef Eigen::GpuDevice GPUDevice; DEFINE_GPU_SPECS_INDEX(T, int32); \ DEFINE_GPU_SPECS_INDEX(T, int64); -TF_CALL_bool(DEFINE_GPU_SPECS); TF_CALL_int32(DEFINE_GPU_SPECS); TF_CALL_int64(DEFINE_GPU_SPECS); -TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); -TF_CALL_complex64(DEFINE_GPU_SPECS); -TF_CALL_complex128(DEFINE_GPU_SPECS); +TF_CALL_GPU_ALL_TYPES(DEFINE_GPU_SPECS); #undef DEFINE_GPU_SPECS #undef DEFINE_GPU_SPECS_INDEX diff --git a/tensorflow/core/kernels/gather_nd_op.cc b/tensorflow/core/kernels/gather_nd_op.cc index 517f78ff232..ad759489dc6 100644 --- a/tensorflow/core/kernels/gather_nd_op.cc +++ b/tensorflow/core/kernels/gather_nd_op.cc @@ -105,8 +105,7 @@ namespace functor { TF_CALL_int32(DECLARE_GPU_SPECS); TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); -TF_CALL_complex64(DECLARE_GPU_SPECS); -TF_CALL_complex128(DECLARE_GPU_SPECS); +TF_CALL_COMPLEX_TYPES(DECLARE_GPU_SPECS); #undef DECLARE_GPU_SPECS #undef DECLARE_GPU_SPECS_INDEX @@ -118,8 +117,7 @@ TF_CALL_complex128(DECLARE_GPU_SPECS); TF_CALL_int32(REGISTER_GATHER_ND_GPU); TF_CALL_int64(REGISTER_GATHER_ND_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_ND_GPU); -TF_CALL_complex64(REGISTER_GATHER_ND_GPU); -TF_CALL_complex128(REGISTER_GATHER_ND_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_GATHER_ND_GPU); #undef REGISTER_GATHER_ND_GPU diff --git a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc index 95ead6b0fd3..216ca2de114 100644 --- a/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/gather_nd_op_gpu.cu.cc @@ -121,8 +121,7 @@ struct GatherNdSlice { TF_CALL_int32(DEFINE_GPU_SPECS); TF_CALL_int64(DEFINE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); -TF_CALL_complex64(DEFINE_GPU_SPECS); -TF_CALL_complex128(DEFINE_GPU_SPECS); +TF_CALL_COMPLEX_TYPES(DEFINE_GPU_SPECS); #undef DEFINE_GPU_SPECS #undef DEFINE_GPU_SPECS_INDEX diff --git a/tensorflow/core/kernels/gather_op.cc b/tensorflow/core/kernels/gather_op.cc index 5e6bd1de9d6..6d493a5f2ea 100644 --- a/tensorflow/core/kernels/gather_op.cc +++ b/tensorflow/core/kernels/gather_op.cc @@ -221,12 +221,9 @@ TF_CALL_uint64(REGISTER_GATHER_CPU); // Registration of the GPU implementations. #define REGISTER_GATHER_GPU(type) REGISTER_GATHER_ALL_INDICES(GPU, type) -TF_CALL_bool(REGISTER_GATHER_GPU); TF_CALL_int32(REGISTER_GATHER_GPU); TF_CALL_int64(REGISTER_GATHER_GPU); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_GPU); -TF_CALL_complex64(REGISTER_GATHER_GPU); -TF_CALL_complex128(REGISTER_GATHER_GPU); +TF_CALL_GPU_ALL_TYPES(REGISTER_GATHER_GPU); #undef REGISTER_GATHER_GPU diff --git a/tensorflow/core/kernels/list_kernels.cu.cc b/tensorflow/core/kernels/list_kernels.cu.cc index 1a1447ef1e6..b95a065edb8 100644 --- a/tensorflow/core/kernels/list_kernels.cu.cc +++ b/tensorflow/core/kernels/list_kernels.cu.cc @@ -105,13 +105,10 @@ typedef Eigen::GpuDevice GPUDevice; .HostMemory("lengths"), \ TensorListSplit) -TF_CALL_GPU_NUMBER_TYPES(REGISTER_TENSOR_LIST_OPS_GPU); -REGISTER_TENSOR_LIST_OPS_GPU(bfloat16); -TF_CALL_complex64(REGISTER_TENSOR_LIST_OPS_GPU); -TF_CALL_complex128(REGISTER_TENSOR_LIST_OPS_GPU); TF_CALL_int32(REGISTER_TENSOR_LIST_OPS_GPU); TF_CALL_int64(REGISTER_TENSOR_LIST_OPS_GPU); -REGISTER_TENSOR_LIST_OPS_GPU(bool); +TF_CALL_bfloat16(REGISTER_TENSOR_LIST_OPS_GPU); +TF_CALL_GPU_ALL_TYPES(REGISTER_TENSOR_LIST_OPS_GPU); #undef REGISTER_TENSOR_LIST_OPS_GPU diff --git a/tensorflow/core/kernels/matmul_op.cc b/tensorflow/core/kernels/matmul_op.cc index 148540a3d82..2e3c120248f 100644 --- a/tensorflow/core/kernels/matmul_op.cc +++ b/tensorflow/core/kernels/matmul_op.cc @@ -581,21 +581,14 @@ struct MatMulFunctor { .Label("cublas"), \ MatMulOp) -TF_CALL_float(REGISTER_CPU); -TF_CALL_double(REGISTER_CPU); -TF_CALL_half(REGISTER_CPU); -TF_CALL_bfloat16(REGISTER_CPU); TF_CALL_int32(REGISTER_CPU); TF_CALL_int64(REGISTER_CPU); -TF_CALL_complex64(REGISTER_CPU); -TF_CALL_complex128(REGISTER_CPU); +TF_CALL_FLOAT_TYPES(REGISTER_CPU); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -TF_CALL_float(REGISTER_GPU); -TF_CALL_double(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); -TF_CALL_half(REGISTER_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #ifdef TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/matrix_band_part_op.cc b/tensorflow/core/kernels/matrix_band_part_op.cc index 5254ade87b9..4dcce5a8f58 100644 --- a/tensorflow/core/kernels/matrix_band_part_op.cc +++ b/tensorflow/core/kernels/matrix_band_part_op.cc @@ -210,10 +210,7 @@ namespace functor { }; \ extern template struct MatrixBandPartFunctor; -TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); -TF_CALL_bool(DECLARE_GPU_SPEC); -TF_CALL_complex64(DECLARE_GPU_SPEC); -TF_CALL_complex128(DECLARE_GPU_SPEC); +TF_CALL_GPU_ALL_TYPES(DECLARE_GPU_SPEC); #undef DECLARE_GPU_SPEC } // namespace functor @@ -225,10 +222,7 @@ TF_CALL_complex128(DECLARE_GPU_SPEC); .HostMemory("num_lower") \ .HostMemory("num_upper"), \ MatrixBandPartOp); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_MATRIX_BAND_PART_GPU); -TF_CALL_bool(REGISTER_MATRIX_BAND_PART_GPU); -TF_CALL_complex64(REGISTER_MATRIX_BAND_PART_GPU); -TF_CALL_complex128(REGISTER_MATRIX_BAND_PART_GPU); +TF_CALL_GPU_ALL_TYPES(REGISTER_MATRIX_BAND_PART_GPU); #undef REGISTER_MATRIX_BAND_PART_GPU // Registration of the deprecated kernel. diff --git a/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc b/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc index 4a94c51e878..9eb3e4f72a2 100644 --- a/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc +++ b/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc @@ -68,10 +68,7 @@ struct MatrixBandPartFunctor { #define DEFINE_GPU_SPEC(T) template struct MatrixBandPartFunctor; -TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPEC); -TF_CALL_bool(DEFINE_GPU_SPEC); -TF_CALL_complex64(DEFINE_GPU_SPEC); -TF_CALL_complex128(DEFINE_GPU_SPEC); +TF_CALL_GPU_ALL_TYPES(DEFINE_GPU_SPEC); #undef DEFINE_GPU_SPEC } // namespace functor diff --git a/tensorflow/core/kernels/matrix_diag_op.cc b/tensorflow/core/kernels/matrix_diag_op.cc index 9796fd25e39..05d7e4e6f86 100644 --- a/tensorflow/core/kernels/matrix_diag_op.cc +++ b/tensorflow/core/kernels/matrix_diag_op.cc @@ -469,10 +469,7 @@ namespace functor { const bool left_align_subdiagonal); \ extern template struct MatrixDiagPart; -TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); -TF_CALL_bool(DECLARE_GPU_SPEC); -TF_CALL_complex64(DECLARE_GPU_SPEC); -TF_CALL_complex128(DECLARE_GPU_SPEC); +TF_CALL_GPU_ALL_TYPES(DECLARE_GPU_SPEC); } // namespace functor @@ -513,10 +510,7 @@ TF_CALL_complex128(DECLARE_GPU_SPEC); .HostMemory("padding_value"), \ MatrixDiagPartOp); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_MATRIX_DIAG_GPU); -TF_CALL_bool(REGISTER_MATRIX_DIAG_GPU); -TF_CALL_complex64(REGISTER_MATRIX_DIAG_GPU); -TF_CALL_complex128(REGISTER_MATRIX_DIAG_GPU); +TF_CALL_GPU_ALL_TYPES(REGISTER_MATRIX_DIAG_GPU); #undef REGISTER_MATRIX_DIAG_GPU // Registration of the deprecated kernel. diff --git a/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc b/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc index 53cd2d2dc46..76271798d5f 100644 --- a/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc +++ b/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc @@ -163,10 +163,7 @@ struct MatrixDiagPart { template struct MatrixDiag; \ template struct MatrixDiagPart; -TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPEC); -TF_CALL_bool(DEFINE_GPU_SPEC); -TF_CALL_complex64(DEFINE_GPU_SPEC); -TF_CALL_complex128(DEFINE_GPU_SPEC); +TF_CALL_GPU_ALL_TYPES(DEFINE_GPU_SPEC); } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/matrix_set_diag_op.cc b/tensorflow/core/kernels/matrix_set_diag_op.cc index 2701ff788f7..bf98fd0d47d 100644 --- a/tensorflow/core/kernels/matrix_set_diag_op.cc +++ b/tensorflow/core/kernels/matrix_set_diag_op.cc @@ -272,10 +272,7 @@ namespace functor { const bool left_align_superdiagonal, const bool left_align_subdiagonal); \ extern template struct MatrixSetDiag; -TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); -TF_CALL_bool(DECLARE_GPU_SPEC); -TF_CALL_complex64(DECLARE_GPU_SPEC); -TF_CALL_complex128(DECLARE_GPU_SPEC); +TF_CALL_GPU_ALL_TYPES(DECLARE_GPU_SPEC); } // namespace functor @@ -295,10 +292,7 @@ TF_CALL_complex128(DECLARE_GPU_SPEC); .HostMemory("k"), \ MatrixSetDiagOp); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_MATRIX_SET_DIAG_GPU); -TF_CALL_bool(REGISTER_MATRIX_SET_DIAG_GPU); -TF_CALL_complex64(REGISTER_MATRIX_SET_DIAG_GPU); -TF_CALL_complex128(REGISTER_MATRIX_SET_DIAG_GPU); +TF_CALL_GPU_ALL_TYPES(REGISTER_MATRIX_SET_DIAG_GPU); #undef REGISTER_MATRIX_SET_DIAG_GPU // Registration of the deprecated kernel. diff --git a/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc b/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc index 4f742b90bff..4e32f8a52e8 100644 --- a/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc +++ b/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc @@ -136,10 +136,7 @@ struct MatrixSetDiag { #define DEFINE_GPU_SPEC(T) template struct MatrixSetDiag; -TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPEC); -TF_CALL_bool(DEFINE_GPU_SPEC); -TF_CALL_complex64(DEFINE_GPU_SPEC); -TF_CALL_complex128(DEFINE_GPU_SPEC); +TF_CALL_GPU_ALL_TYPES(DEFINE_GPU_SPEC); } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc b/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc index 47f958ff6a9..ae3702078a0 100644 --- a/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc +++ b/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc @@ -13,16 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/kernels/matrix_triangular_solve_op_impl.h" namespace tensorflow { -TF_CALL_complex64(REGISTER_BATCH_MATRIX_TRIANGULAR_SOLVE_CPU); -TF_CALL_complex128(REGISTER_BATCH_MATRIX_TRIANGULAR_SOLVE_CPU); +TF_CALL_COMPLEX_TYPES(REGISTER_BATCH_MATRIX_TRIANGULAR_SOLVE_CPU); #if GOOGLE_CUDA -TF_CALL_complex64(REGISTER_BATCH_MATRIX_TRIANGULAR_SOLVE_GPU); -TF_CALL_complex128(REGISTER_BATCH_MATRIX_TRIANGULAR_SOLVE_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_BATCH_MATRIX_TRIANGULAR_SOLVE_GPU); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc index 37888656020..2d0b18edb27 100644 --- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc @@ -316,13 +316,11 @@ class BatchMatMulMkl : public OpKernel { #ifdef ENABLE_MKL TF_CALL_float(REGISTER_BATCH_MATMUL_MKL); TF_CALL_double(REGISTER_BATCH_MATMUL_MKL); -TF_CALL_complex64(REGISTER_BATCH_MATMUL_MKL); -TF_CALL_complex128(REGISTER_BATCH_MATMUL_MKL); +TF_CALL_COMPLEX_TYPES(REGISTER_BATCH_MATMUL_MKL); TF_CALL_float(REGISTER_BATCH_MATMUL_MKL_V2); TF_CALL_double(REGISTER_BATCH_MATMUL_MKL_V2); -TF_CALL_complex64(REGISTER_BATCH_MATMUL_MKL_V2); -TF_CALL_complex128(REGISTER_BATCH_MATMUL_MKL_V2); +TF_CALL_COMPLEX_TYPES(REGISTER_BATCH_MATMUL_MKL_V2); #if defined(ENABLE_MKLDNN_V1) && defined(ENABLE_INTEL_MKL_BFLOAT16) TF_CALL_bfloat16(REGISTER_BATCH_MATMUL_MKL); diff --git a/tensorflow/core/kernels/one_hot_op.cc b/tensorflow/core/kernels/one_hot_op.cc index 3badbc294b7..e6cb84bab74 100644 --- a/tensorflow/core/kernels/one_hot_op.cc +++ b/tensorflow/core/kernels/one_hot_op.cc @@ -160,12 +160,9 @@ namespace functor { DECLARE_GPU_SPEC_INDEX(T, int32); \ DECLARE_GPU_SPEC_INDEX(T, int64); -TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); -TF_CALL_bool(DECLARE_GPU_SPEC); TF_CALL_int32(DECLARE_GPU_SPEC); TF_CALL_int64(DECLARE_GPU_SPEC); -TF_CALL_complex64(DECLARE_GPU_SPEC); -TF_CALL_complex128(DECLARE_GPU_SPEC); +TF_CALL_GPU_ALL_TYPES(DECLARE_GPU_SPEC); #undef DECLARE_GPU_SPEC_INDEX #undef DECLARE_GPU_SPEC @@ -186,12 +183,9 @@ TF_CALL_complex128(DECLARE_GPU_SPEC); REGISTER_ONE_HOT_GPU_INDEX(type, int32); \ REGISTER_ONE_HOT_GPU_INDEX(type, int64); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_ONE_HOT_GPU); -TF_CALL_bool(REGISTER_ONE_HOT_GPU); TF_CALL_int32(REGISTER_ONE_HOT_GPU); TF_CALL_int64(REGISTER_ONE_HOT_GPU); -TF_CALL_complex64(REGISTER_ONE_HOT_GPU); -TF_CALL_complex128(REGISTER_ONE_HOT_GPU); +TF_CALL_GPU_ALL_TYPES(REGISTER_ONE_HOT_GPU); #undef REGISTER_ONE_HOT_GPU_INDEX #undef REGISTER_ONE_HOT_GPU diff --git a/tensorflow/core/kernels/one_hot_op_gpu.cu.cc b/tensorflow/core/kernels/one_hot_op_gpu.cu.cc index 8df7284caed..47af41477c7 100644 --- a/tensorflow/core/kernels/one_hot_op_gpu.cu.cc +++ b/tensorflow/core/kernels/one_hot_op_gpu.cu.cc @@ -37,12 +37,9 @@ typedef Eigen::GpuDevice GPUDevice; DEFINE_GPU_SPEC_INDEX(T, int32); \ DEFINE_GPU_SPEC_INDEX(T, int64) -TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPEC); -TF_CALL_bool(DEFINE_GPU_SPEC); TF_CALL_int32(DEFINE_GPU_SPEC); TF_CALL_int64(DEFINE_GPU_SPEC); -TF_CALL_complex64(DEFINE_GPU_SPEC); -TF_CALL_complex128(DEFINE_GPU_SPEC); +TF_CALL_GPU_ALL_TYPES(DEFINE_GPU_SPEC); #undef DEFINE_GPU_SPEC_INDEX #undef DEFINE_GPU_SPEC diff --git a/tensorflow/core/kernels/pack_op.cc b/tensorflow/core/kernels/pack_op.cc index cf2b6bb1100..04b5c72b3cf 100644 --- a/tensorflow/core/kernels/pack_op.cc +++ b/tensorflow/core/kernels/pack_op.cc @@ -152,13 +152,10 @@ REGISTER_PACK(tstring); Name("Pack").Device(DEVICE_GPU).TypeConstraint("T"), \ PackOp) -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_bfloat16(REGISTER_GPU); TF_CALL_int64(REGISTER_GPU); TF_CALL_int16(REGISTER_GPU); -TF_CALL_bool(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); +TF_CALL_GPU_ALL_TYPES(REGISTER_GPU); #undef REGISTER_GPU // A special GPU kernel for int32. diff --git a/tensorflow/core/kernels/reduction_ops_euclidean.cc b/tensorflow/core/kernels/reduction_ops_euclidean.cc index cf719e76cd8..9bc11e29069 100644 --- a/tensorflow/core/kernels/reduction_ops_euclidean.cc +++ b/tensorflow/core/kernels/reduction_ops_euclidean.cc @@ -52,8 +52,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS); functor::EuclideanNormReducer>); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); #if GOOGLE_CUDA -TF_CALL_complex64(REGISTER_GPU_KERNELS); -TF_CALL_complex128(REGISTER_GPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU_KERNELS); #endif #undef REGISTER_GPU_KERNELS diff --git a/tensorflow/core/kernels/reduction_ops_mean.cc b/tensorflow/core/kernels/reduction_ops_mean.cc index d314f1953dc..e96d6f829ac 100644 --- a/tensorflow/core/kernels/reduction_ops_mean.cc +++ b/tensorflow/core/kernels/reduction_ops_mean.cc @@ -52,8 +52,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS); ReductionOp>); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); #if GOOGLE_CUDA -TF_CALL_complex64(REGISTER_GPU_KERNELS); -TF_CALL_complex128(REGISTER_GPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU_KERNELS); #endif #undef REGISTER_GPU_KERNELS diff --git a/tensorflow/core/kernels/reduction_ops_prod.cc b/tensorflow/core/kernels/reduction_ops_prod.cc index 0642bad9218..33742e97146 100644 --- a/tensorflow/core/kernels/reduction_ops_prod.cc +++ b/tensorflow/core/kernels/reduction_ops_prod.cc @@ -50,11 +50,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS); .HostMemory("reduction_indices"), \ ReductionOp>); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); TF_CALL_int32(REGISTER_GPU_KERNELS); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); #if GOOGLE_CUDA -TF_CALL_complex64(REGISTER_GPU_KERNELS); -TF_CALL_complex128(REGISTER_GPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU_KERNELS); #endif #undef REGISTER_GPU_KERNELS diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc index d79684df290..b5f7a5d7089 100644 --- a/tensorflow/core/kernels/reduction_ops_sum.cc +++ b/tensorflow/core/kernels/reduction_ops_sum.cc @@ -50,11 +50,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS); .TypeConstraint("Tidx") \ .HostMemory("reduction_indices"), \ ReductionOp>); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); TF_CALL_int64(REGISTER_GPU_KERNELS); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); #if GOOGLE_CUDA -TF_CALL_complex64(REGISTER_GPU_KERNELS); -TF_CALL_complex128(REGISTER_GPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU_KERNELS); #endif #undef REGISTER_GPU_KERNELS diff --git a/tensorflow/core/kernels/reverse_op.cc b/tensorflow/core/kernels/reverse_op.cc index 98bf8bf8e91..7ec6923492f 100644 --- a/tensorflow/core/kernels/reverse_op.cc +++ b/tensorflow/core/kernels/reverse_op.cc @@ -342,12 +342,7 @@ namespace functor { TF_CALL_uint8(DECLARE_GPU_SPEC); TF_CALL_int8(DECLARE_GPU_SPEC); -TF_CALL_bool(DECLARE_GPU_SPEC); -TF_CALL_half(DECLARE_GPU_SPEC); -TF_CALL_float(DECLARE_GPU_SPEC); -TF_CALL_double(DECLARE_GPU_SPEC); -TF_CALL_complex64(DECLARE_GPU_SPEC); -TF_CALL_complex128(DECLARE_GPU_SPEC); +TF_CALL_GPU_ALL_TYPES(DECLARE_GPU_SPEC); #undef DECLARE_GPU_SPEC #undef DECLARE_GPU_SPEC_DIM } // namespace functor @@ -373,12 +368,7 @@ TF_CALL_complex128(DECLARE_GPU_SPEC); ReverseV2Op) TF_CALL_uint8(REGISTER_GPU_KERNELS); TF_CALL_int8(REGISTER_GPU_KERNELS); -TF_CALL_bool(REGISTER_GPU_KERNELS); -TF_CALL_half(REGISTER_GPU_KERNELS); -TF_CALL_float(REGISTER_GPU_KERNELS); -TF_CALL_double(REGISTER_GPU_KERNELS); -TF_CALL_complex64(REGISTER_GPU_KERNELS); -TF_CALL_complex128(REGISTER_GPU_KERNELS); +TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNEL // A special GPU kernel for int32. diff --git a/tensorflow/core/kernels/reverse_op_gpu.cu.cc b/tensorflow/core/kernels/reverse_op_gpu.cu.cc index 2917a0d5f11..28c50bc66df 100644 --- a/tensorflow/core/kernels/reverse_op_gpu.cu.cc +++ b/tensorflow/core/kernels/reverse_op_gpu.cu.cc @@ -40,12 +40,7 @@ typedef Eigen::GpuDevice GPUDevice; TF_CALL_uint8(DEFINE_REVERSE_ALL_DIMS); TF_CALL_int8(DEFINE_REVERSE_ALL_DIMS); -TF_CALL_bool(DEFINE_REVERSE_ALL_DIMS); -TF_CALL_half(DEFINE_REVERSE_ALL_DIMS); -TF_CALL_float(DEFINE_REVERSE_ALL_DIMS); -TF_CALL_double(DEFINE_REVERSE_ALL_DIMS); -TF_CALL_complex64(DEFINE_REVERSE_ALL_DIMS); -TF_CALL_complex128(DEFINE_REVERSE_ALL_DIMS); +TF_CALL_GPU_ALL_TYPES(DEFINE_REVERSE_ALL_DIMS); #undef DEFINE_REVERSE #undef DEFINE_REVERSE_ALL_DIMS diff --git a/tensorflow/core/kernels/roll_op.cc b/tensorflow/core/kernels/roll_op.cc index 8d6147801e7..2a141864e18 100644 --- a/tensorflow/core/kernels/roll_op.cc +++ b/tensorflow/core/kernels/roll_op.cc @@ -397,10 +397,9 @@ TF_CALL_ALL_TYPES(REGISTER_CPU); TF_CALL_int32(REGISTER_KERNEL); TF_CALL_int64(REGISTER_KERNEL); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNEL); -TF_CALL_complex64(REGISTER_KERNEL); -TF_CALL_complex128(REGISTER_KERNEL); TF_CALL_uint32(REGISTER_KERNEL); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNEL); +TF_CALL_COMPLEX_TYPES(REGISTER_KERNEL); #undef REGISTER_KERNEL #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/roll_op_gpu.cu.cc b/tensorflow/core/kernels/roll_op_gpu.cu.cc index 7ba37e2f59c..0e81b576330 100644 --- a/tensorflow/core/kernels/roll_op_gpu.cu.cc +++ b/tensorflow/core/kernels/roll_op_gpu.cu.cc @@ -93,10 +93,9 @@ struct Roll { TF_CALL_int32(DEFINE_GPU_SPECS); TF_CALL_int64(DEFINE_GPU_SPECS); +TF_CALL_uint32(DEFINE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); -TF_CALL_complex64(DEFINE_GPU_SPECS); -TF_CALL_complex128(DEFINE_GPU_SPECS); -TF_CALL_uint32(DEFINE_GPU_SPECS) +TF_CALL_COMPLEX_TYPES(DEFINE_GPU_SPECS); #undef DEFINE_GPU_SPECS } // namespace functor diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 96711fce643..c6c93077f01 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -502,8 +502,7 @@ TF_CALL_int64(REGISTER_SCATTER_ND_ALL_GPU); TF_CALL_int64(REGISTER_SCATTER_ND_MIN_MAX_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_ND_ALL_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_ND_MIN_MAX_GPU); -TF_CALL_complex64(REGISTER_SCATTER_ND_ALL_GPU); -TF_CALL_complex128(REGISTER_SCATTER_ND_ALL_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_SCATTER_ND_ALL_GPU); #undef REGISTER_SCATTER_ND_ALL_GPU @@ -563,8 +562,7 @@ TF_CALL_int64(REGISTER_SCATTER_ND_TENSOR_GPU); TF_CALL_int64(REGISTER_SCATTER_ND_TENSOR_GPU_MIN_MAX); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_TENSOR_GPU); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_TENSOR_GPU_MIN_MAX); -TF_CALL_complex64(REGISTER_SCATTER_ND_TENSOR_GPU); -TF_CALL_complex128(REGISTER_SCATTER_ND_TENSOR_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_SCATTER_ND_TENSOR_GPU); #undef REGISTER_SCATTER_ND_ADD #undef REGISTER_SCATTER_ND_ADD_SUB @@ -862,8 +860,7 @@ TF_CALL_int32(DECLARE_GPU_SPECS); TF_CALL_int32(DECLARE_GPU_SPECS_MIN_MAX); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS_MIN_MAX); -TF_CALL_complex64(DECLARE_GPU_SPECS); -TF_CALL_complex128(DECLARE_GPU_SPECS); +TF_CALL_COMPLEX_TYPES(DECLARE_GPU_SPECS); #undef DECLARE_GPU_SPECS_MIN_MAX #undef DECLARE_GPU_SPECS diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc index 7fb2d3916e3..64b69af423f 100644 --- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc @@ -200,8 +200,7 @@ TF_CALL_int64(DECLARE_GPU_SPECS); TF_CALL_int64(DECLARE_GPU_SPECS_MINMAX); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS_MINMAX); -TF_CALL_complex64(DECLARE_GPU_SPECS); -TF_CALL_complex128(DECLARE_GPU_SPECS); +TF_CALL_COMPLEX_TYPES(DECLARE_GPU_SPECS); #undef DECLARE_GPU_SPECS #undef DECLARE_GPU_SPECS_MINMAX diff --git a/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc index bc28e64c4d7..418af1d6b6d 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc @@ -244,8 +244,7 @@ TF_CALL_int32(DEFINE_SUM_GPU_SPECS); // TODO(rocm): support atomicAdd for complex numbers on ROCm #if GOOGLE_CUDA -TF_CALL_complex64(DEFINE_SUM_GPU_SPECS); -TF_CALL_complex128(DEFINE_SUM_GPU_SPECS); +TF_CALL_COMPLEX_TYPES(DEFINE_SUM_GPU_SPECS); #endif #undef DEFINE_SORTED_GPU_SPECS_INDEX diff --git a/tensorflow/core/kernels/segment_reduction_ops_impl_3.cc b/tensorflow/core/kernels/segment_reduction_ops_impl_3.cc index d0d46938629..eef5a532b29 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_impl_3.cc +++ b/tensorflow/core/kernels/segment_reduction_ops_impl_3.cc @@ -113,8 +113,7 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_SUM_GPU_UNSORTED_KERNELS_ALL); TF_CALL_int32(REGISTER_SUM_GPU_UNSORTED_KERNELS_ALL); // TODO(rocm): support atomicAdd for complex numbers on ROCm #if GOOGLE_CUDA -TF_CALL_complex64(REGISTER_SUM_GPU_UNSORTED_KERNELS_ALL); -TF_CALL_complex128(REGISTER_SUM_GPU_UNSORTED_KERNELS_ALL); +TF_CALL_COMPLEX_TYPES(REGISTER_SUM_GPU_UNSORTED_KERNELS_ALL); #endif #undef REGISTER_GPU_KERNEL_UNSORTEDSEGMENT diff --git a/tensorflow/core/kernels/segment_reduction_ops_impl_4.cc b/tensorflow/core/kernels/segment_reduction_ops_impl_4.cc index 92caeb3c544..cad6f8a5e08 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_impl_4.cc +++ b/tensorflow/core/kernels/segment_reduction_ops_impl_4.cc @@ -113,8 +113,7 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_SUM_GPU_UNSORTED_KERNELS_ALL); TF_CALL_int32(REGISTER_SUM_GPU_UNSORTED_KERNELS_ALL); // TODO(rocm): support atomicAdd for complex numbers on ROCm #if GOOGLE_CUDA -TF_CALL_complex64(REGISTER_SUM_GPU_UNSORTED_KERNELS_ALL); -TF_CALL_complex128(REGISTER_SUM_GPU_UNSORTED_KERNELS_ALL); +TF_CALL_COMPLEX_TYPES(REGISTER_SUM_GPU_UNSORTED_KERNELS_ALL); #endif #undef REGISTER_GPU_KERNEL_UNSORTEDSEGMENT diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index 62c63263d48..6d7cd6f2a3d 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -300,14 +300,11 @@ namespace functor { DECLARE_GPU_SPEC(T, 7); \ DECLARE_GPU_SPEC(T, 8); -TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N); -TF_CALL_complex64(DECLARE_FOR_N); -TF_CALL_complex128(DECLARE_FOR_N); TF_CALL_bfloat16(DECLARE_FOR_N); -TF_CALL_bool(DECLARE_FOR_N); TF_CALL_int8(DECLARE_FOR_N); +TF_CALL_int32(DECLARE_FOR_N); TF_CALL_int64(DECLARE_FOR_N); -DECLARE_FOR_N(int32); +TF_CALL_GPU_ALL_TYPES(DECLARE_FOR_N); #undef DECLARE_FOR_N #undef DECLARE_GPU_SPEC @@ -321,13 +318,10 @@ DECLARE_FOR_N(int32); .HostMemory("size"), \ SliceOp) -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); TF_CALL_bfloat16(REGISTER_GPU); -TF_CALL_bool(REGISTER_GPU); TF_CALL_int8(REGISTER_GPU); TF_CALL_int64(REGISTER_GPU); +TF_CALL_GPU_ALL_TYPES(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/slice_op_gpu.cu.cc b/tensorflow/core/kernels/slice_op_gpu.cu.cc index 5a9d2ff950a..c20d01751d9 100644 --- a/tensorflow/core/kernels/slice_op_gpu.cu.cc +++ b/tensorflow/core/kernels/slice_op_gpu.cu.cc @@ -37,14 +37,11 @@ typedef Eigen::GpuDevice GPUDevice; template struct functor::Slice; \ template struct functor::Slice; -TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); -TF_CALL_complex64(DEFINE_GPU_KERNELS); -TF_CALL_complex128(DEFINE_GPU_KERNELS); TF_CALL_bfloat16(DEFINE_GPU_KERNELS); -TF_CALL_bool(DEFINE_GPU_KERNELS); TF_CALL_int8(DEFINE_GPU_KERNELS); -DEFINE_GPU_KERNELS(int32); -DEFINE_GPU_KERNELS(int64); +TF_CALL_int32(DEFINE_GPU_KERNELS); +TF_CALL_int64(DEFINE_GPU_KERNELS); +TF_CALL_GPU_ALL_TYPES(DEFINE_GPU_KERNELS); #undef DEFINE_GPU_KERNELS diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc index b094a5320f7..b4379a01ce1 100644 --- a/tensorflow/core/kernels/split_lib_gpu.cu.cc +++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc @@ -51,20 +51,16 @@ void SplitCustom::operator()( template struct Split; \ template struct Split; -TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); -TF_CALL_complex64(DEFINE_GPU_KERNELS); -TF_CALL_complex128(DEFINE_GPU_KERNELS); TF_CALL_int64(DEFINE_GPU_KERNELS); TF_CALL_bfloat16(DEFINE_GPU_KERNELS); TF_CALL_uint8(DEFINE_GPU_KERNELS); -TF_CALL_bool(DEFINE_GPU_KERNELS); +TF_CALL_GPU_ALL_TYPES(DEFINE_GPU_KERNELS); #undef DEFINE_GPU_KERNELS #define DEFINE_GPU_KERNELS(T) template struct SplitCustom; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); -TF_CALL_complex64(DEFINE_GPU_KERNELS); -TF_CALL_complex128(DEFINE_GPU_KERNELS); +TF_CALL_COMPLEX_TYPES(DEFINE_GPU_KERNELS); TF_CALL_bfloat16(DEFINE_GPU_KERNELS); #undef DEFINE_GPU_KERNELS @@ -248,8 +244,7 @@ void SplitVOpGPULaunch::Run( #define REGISTER_GPU_KERNEL(T) template struct SplitOpGPULaunch; TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); -TF_CALL_complex64(REGISTER_GPU_KERNEL); -TF_CALL_complex128(REGISTER_GPU_KERNEL); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU_KERNEL); TF_CALL_bfloat16(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL #define REGISTER_GPU_KERNEL(T) \ @@ -257,8 +252,7 @@ TF_CALL_bfloat16(REGISTER_GPU_KERNEL); template struct SplitVOpGPULaunch; TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); -TF_CALL_complex64(REGISTER_GPU_KERNEL); -TF_CALL_complex128(REGISTER_GPU_KERNEL); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU_KERNEL); TF_CALL_bfloat16(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL diff --git a/tensorflow/core/kernels/split_lib_gpu.h b/tensorflow/core/kernels/split_lib_gpu.h index 20feb7df143..f2c343ae5fd 100644 --- a/tensorflow/core/kernels/split_lib_gpu.h +++ b/tensorflow/core/kernels/split_lib_gpu.h @@ -50,12 +50,9 @@ struct SplitVOpGPULaunch { extern template struct SplitVOpGPULaunch; \ extern template struct SplitVOpGPULaunch; -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); -TF_CALL_complex64(REGISTER_GPU_KERNEL); -TF_CALL_complex128(REGISTER_GPU_KERNEL); TF_CALL_bfloat16(REGISTER_GPU_KERNEL); TF_CALL_uint8(REGISTER_GPU_KERNEL); -TF_CALL_bool(REGISTER_GPU_KERNEL); +TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL } // namespace tensorflow diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc index f8bf5bce197..f09740c6198 100644 --- a/tensorflow/core/kernels/split_op.cc +++ b/tensorflow/core/kernels/split_op.cc @@ -417,10 +417,9 @@ REGISTER_SPLIT(uint64); .HostMemory("split_dim"), \ SplitOpGPU) +TF_CALL_bfloat16(REGISTER_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); -REGISTER_GPU(bfloat16); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc index 1eaeda927f8..4569e11dd13 100644 --- a/tensorflow/core/kernels/split_v_op.cc +++ b/tensorflow/core/kernels/split_v_op.cc @@ -471,10 +471,9 @@ TF_CALL_ALL_TYPES(REGISTER_SPLIT_LEN); REGISTER_GPU(type, int32); \ REGISTER_GPU(type, int64); +TF_CALL_bfloat16(REGISTER_GPU_LEN); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_LEN); -TF_CALL_complex64(REGISTER_GPU_LEN); -TF_CALL_complex128(REGISTER_GPU_LEN); -REGISTER_GPU_LEN(bfloat16); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU_LEN); #undef REGISTER_GPU_LEN #undef REGISTER_GPU diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index dd23c251897..ccc1984bb98 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -486,12 +486,9 @@ TF_CALL_uint64(REGISTER_STRIDED_SLICE); .HostMemory("strides"), \ StridedSliceAssignOp) -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_bool(REGISTER_GPU); TF_CALL_int8(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); TF_CALL_int64(REGISTER_GPU); +TF_CALL_GPU_ALL_TYPES(REGISTER_GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/strided_slice_op_gpu_complex.cu.cc b/tensorflow/core/kernels/strided_slice_op_gpu_complex.cu.cc index a930b8a3fac..33d94f4fc71 100644 --- a/tensorflow/core/kernels/strided_slice_op_gpu_complex.cu.cc +++ b/tensorflow/core/kernels/strided_slice_op_gpu_complex.cu.cc @@ -21,8 +21,7 @@ limitations under the License. #include "tensorflow/core/kernels/strided_slice_op_gpu_impl.h" namespace tensorflow { -TF_CALL_complex64(DEFINE_GPU_KERNELS); -TF_CALL_complex128(DEFINE_GPU_KERNELS); +TF_CALL_COMPLEX_TYPES(DEFINE_GPU_KERNELS); } // end namespace tensorflow #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h index bf69a19abc5..1ae959b7b3f 100644 --- a/tensorflow/core/kernels/strided_slice_op_impl.h +++ b/tensorflow/core/kernels/strided_slice_op_impl.h @@ -278,16 +278,12 @@ class HandleStridedSliceAssignCase { #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TF_CALL_GPU_PROXY_TYPES(PREVENT_FOR_N_GPU); -TF_CALL_complex64(PREVENT_FOR_N_GPU); -TF_CALL_complex128(PREVENT_FOR_N_GPU); +TF_CALL_COMPLEX_TYPES(PREVENT_FOR_N_GPU); -TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N_GPU); -TF_CALL_complex64(DECLARE_FOR_N_GPU); -TF_CALL_complex128(DECLARE_FOR_N_GPU); -TF_CALL_bool(DECLARE_FOR_N_GPU); TF_CALL_int8(DECLARE_FOR_N_GPU); -DECLARE_FOR_N_GPU(int32); -DECLARE_FOR_N_GPU(int64); +TF_CALL_int32(DECLARE_FOR_N_GPU); +TF_CALL_int64(DECLARE_FOR_N_GPU); +TF_CALL_GPU_ALL_TYPES(DECLARE_FOR_N_GPU); #endif // END GOOGLE_CUDA || TENSORFLOW_USE_ROCM TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU); diff --git a/tensorflow/core/kernels/tensor_array.cc b/tensorflow/core/kernels/tensor_array.cc index 69efc016a1f..8613ed8c80d 100644 --- a/tensorflow/core/kernels/tensor_array.cc +++ b/tensorflow/core/kernels/tensor_array.cc @@ -46,8 +46,7 @@ TF_CALL_NUMBER_TYPES(TENSOR_ARRAY_WRITE_OR_ADD_CPU) #define TENSOR_ARRAY_WRITE_OR_ADD_GPU(T) TENSOR_ARRAY_WRITE_OR_ADD(GPUDevice, T) TF_CALL_GPU_NUMBER_TYPES(TENSOR_ARRAY_WRITE_OR_ADD_GPU); -TF_CALL_complex64(TENSOR_ARRAY_WRITE_OR_ADD_GPU); -TF_CALL_complex128(TENSOR_ARRAY_WRITE_OR_ADD_GPU); +TF_CALL_COMPLEX_TYPES(TENSOR_ARRAY_WRITE_OR_ADD_GPU); #undef TENSOR_ARRAY_WRITE_OR_ADD_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -71,8 +70,7 @@ TF_CALL_bool(TENSOR_ARRAY_SET_ZERO_CPU); #define TENSOR_ARRAY_SET_ZERO_GPU(T) TENSOR_ARRAY_SET_ZERO(GPUDevice, T) TF_CALL_GPU_NUMBER_TYPES(TENSOR_ARRAY_SET_ZERO_GPU); -TF_CALL_complex64(TENSOR_ARRAY_SET_ZERO_GPU); -TF_CALL_complex128(TENSOR_ARRAY_SET_ZERO_GPU); +TF_CALL_COMPLEX_TYPES(TENSOR_ARRAY_SET_ZERO_GPU); #undef TENSOR_ARRAY_SET_ZERO_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/tensor_array.h b/tensorflow/core/kernels/tensor_array.h index 405eba6c542..9bed5c533cf 100644 --- a/tensorflow/core/kernels/tensor_array.h +++ b/tensorflow/core/kernels/tensor_array.h @@ -61,8 +61,7 @@ TF_CALL_NUMBER_TYPES(TENSOR_ARRAY_WRITE_OR_ADD_CPU) #define TENSOR_ARRAY_WRITE_OR_ADD_GPU(T) TENSOR_ARRAY_WRITE_OR_ADD(GPUDevice, T) TF_CALL_GPU_NUMBER_TYPES(TENSOR_ARRAY_WRITE_OR_ADD_GPU); -TF_CALL_complex64(TENSOR_ARRAY_WRITE_OR_ADD_GPU); -TF_CALL_complex128(TENSOR_ARRAY_WRITE_OR_ADD_GPU); +TF_CALL_COMPLEX_TYPES(TENSOR_ARRAY_WRITE_OR_ADD_GPU); #undef TENSOR_ARRAY_WRITE_OR_ADD_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -89,8 +88,7 @@ TF_CALL_bool(TENSOR_ARRAY_SET_ZERO_CPU); #define TENSOR_ARRAY_SET_ZERO_GPU(T) TENSOR_ARRAY_SET_ZERO(GPUDevice, T) TF_CALL_GPU_NUMBER_TYPES(TENSOR_ARRAY_SET_ZERO_GPU); -TF_CALL_complex64(TENSOR_ARRAY_SET_ZERO_GPU); -TF_CALL_complex128(TENSOR_ARRAY_SET_ZERO_GPU); +TF_CALL_COMPLEX_TYPES(TENSOR_ARRAY_SET_ZERO_GPU); #undef TENSOR_ARRAY_SET_ZERO_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc index ea8e04a33f4..048048cea9e 100644 --- a/tensorflow/core/kernels/tensor_array_ops.cc +++ b/tensorflow/core/kernels/tensor_array_ops.cc @@ -256,11 +256,10 @@ REGISTER_KERNEL_BUILDER(Name("TensorArrayV3").Device(DEVICE_CPU), .HostMemory("handle"), \ TensorArrayOp); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); TF_CALL_int64(REGISTER_GPU); -REGISTER_GPU(bfloat16); +TF_CALL_bfloat16(REGISTER_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -483,10 +482,9 @@ TF_CALL_ALL_TYPES(REGISTER_WRITE); .HostMemory("index"), \ TensorArrayWriteOp); +TF_CALL_bfloat16(REGISTER_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); -REGISTER_GPU(bfloat16); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -572,11 +570,10 @@ TF_CALL_ALL_TYPES(REGISTER_READ) .HostMemory("index"), \ TensorArrayReadOp); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); TF_CALL_int64(REGISTER_GPU); -REGISTER_GPU(bfloat16); +TF_CALL_bfloat16(REGISTER_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -774,10 +771,9 @@ REGISTER_GATHER_AND_PACK(qint32); .HostMemory("handle"), \ TensorArrayPackOrGatherOp); +TF_CALL_bfloat16(REGISTER_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); -REGISTER_GPU(bfloat16); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #undef REGISTER_GPU // A special GPU kernel for int32. @@ -995,10 +991,9 @@ REGISTER_CONCAT(qint32); .HostMemory("handle"), \ TensorArrayConcatOp) +TF_CALL_bfloat16(REGISTER_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); -REGISTER_GPU(bfloat16); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #undef REGISTER_GPU // A special GPU kernel for int32. @@ -1215,10 +1210,9 @@ TF_CALL_ALL_TYPES(REGISTER_SCATTER_AND_UNPACK); TensorArrayUnpackOrScatterOp); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); TF_CALL_int64(REGISTER_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -1387,8 +1381,7 @@ TF_CALL_ALL_TYPES(REGISTER_SPLIT); TensorArraySplitOp); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 7e2a8b363c5..b3714e4d27e 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -727,12 +727,8 @@ class ApplyGradientDescentOp : public OpKernel { ApplyGradientDescentOp); #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Forward declarations of the functor specializations for GPU. @@ -898,12 +894,8 @@ class ApplyAdadeltaOp : public OpKernel { ApplyAdadeltaOp); #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Forward declarations of the functor specializations for GPU. @@ -1089,12 +1081,8 @@ class SparseApplyAdadeltaOp : public OpKernel { REGISTER_KERNELS(T, int32); \ REGISTER_KERNELS(T, int64); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS #undef REGISTER_KERNELS @@ -1383,12 +1371,8 @@ class ApplyAdagradOp : public OpKernel { ApplyAdagradOp); #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Forward declarations of the functor specializations for GPU. @@ -1492,12 +1476,8 @@ class ApplyAdagradV2Op : public OpKernel { ApplyAdagradV2Op); #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Forward declarations of the functor specializations for GPU. @@ -1801,12 +1781,8 @@ class SparseApplyAdagradOp : public OpKernel { REGISTER_KERNELS(T, int32); \ REGISTER_KERNELS(T, int64); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS #undef REGISTER_KERNELS @@ -1976,12 +1952,8 @@ class SparseApplyAdagradV2Op : public OpKernel { REGISTER_KERNELS(T, int32); \ REGISTER_KERNELS(T, int64); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS #undef REGISTER_KERNELS @@ -3054,12 +3026,8 @@ class ApplyMomentumOp : public OpKernel { ApplyMomentumOp); #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Forward declarations of the functor specializations for GPU. @@ -3209,12 +3177,8 @@ class SparseApplyMomentumOp : public OpKernel { REGISTER_KERNELS(T, int32); \ REGISTER_KERNELS(T, int64); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS #undef REGISTER_KERNELS @@ -3288,12 +3252,8 @@ class ApplyKerasMomentumOp : public OpKernel { ApplyKerasMomentumOp); #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Forward declarations of the functor specializations for GPU. @@ -3423,12 +3383,9 @@ class SparseApplyKerasMomentumOp : public OpKernel { REGISTER_KERNELS(T, CPU, int32); \ REGISTER_KERNELS(T, CPU, int64); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); + #undef REGISTER_CPU_KERNELS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -3699,12 +3656,8 @@ class ApplyAdamOp : public OpKernel { ApplyAdamOp); #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #ifdef TENSORFLOW_USE_SYCL #define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T); @@ -4226,12 +4179,8 @@ class ApplyCenteredRMSPropOp : public OpKernel { ApplyCenteredRMSPropOp); #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); -TF_CALL_half(REGISTER_CPU_KERNELS); -TF_CALL_bfloat16(REGISTER_CPU_KERNELS); -TF_CALL_float(REGISTER_CPU_KERNELS); -TF_CALL_double(REGISTER_CPU_KERNELS); -TF_CALL_complex64(REGISTER_CPU_KERNELS); -TF_CALL_complex128(REGISTER_CPU_KERNELS); +TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Forward declarations of the functor specializations for GPU. diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc index ce02aa17225..7ac02e8b4d4 100644 --- a/tensorflow/core/kernels/unpack_op.cc +++ b/tensorflow/core/kernels/unpack_op.cc @@ -144,12 +144,9 @@ TF_CALL_ALL_TYPES(REGISTER_UNPACK); Name("Unpack").Device(DEVICE_GPU).TypeConstraint("T"), \ UnpackOp) -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_bfloat16(REGISTER_GPU); TF_CALL_uint8(REGISTER_GPU); -TF_CALL_bool(REGISTER_GPU); -TF_CALL_complex64(REGISTER_GPU); -TF_CALL_complex128(REGISTER_GPU); +TF_CALL_GPU_ALL_TYPES(REGISTER_GPU); #undef REGISTER_GPU // A special GPU kernel for int32. diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc index e527c1a2e3f..6f5e0b94eca 100644 --- a/tensorflow/core/kernels/variable_ops.cc +++ b/tensorflow/core/kernels/variable_ops.cc @@ -250,11 +250,10 @@ TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL); .HostMemory("is_initialized"), \ IsVariableInitializedOp); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); -TF_CALL_complex64(REGISTER_GPU_KERNELS); -TF_CALL_complex128(REGISTER_GPU_KERNELS); TF_CALL_int64(REGISTER_GPU_KERNELS); TF_CALL_uint32(REGISTER_GPU_KERNELS); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_COMPLEX_TYPES(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM