From 29ecfbf1e7ab2f073e69770753174667079d10b5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" <gardener@tensorflow.org> Date: Wed, 24 Apr 2019 16:51:43 -0700 Subject: [PATCH] Check if GOOGLE_CUDA and TENSORFLOW_USE_ROCM are defined before using them. This is necessary to avoid errors with stricter clang checks PiperOrigin-RevId: 245143683 --- .../common_runtime/gpu/gpu_bfc_allocator_test.cc | 3 ++- .../gpu/gpu_debug_allocator_test.cc | 3 ++- tensorflow/core/common_runtime/gpu/gpu_device.cc | 3 ++- .../core/common_runtime/gpu/gpu_device_factory.cc | 3 ++- .../core/common_runtime/gpu/gpu_process_state.cc | 15 ++++++++++----- tensorflow/core/framework/types.cc | 3 ++- tensorflow/core/framework/types.h | 3 ++- tensorflow/core/kernels/adjust_contrast_op.cc | 6 ++++-- .../kernels/adjust_contrast_op_benchmark_test.cc | 3 ++- .../core/kernels/adjust_contrast_op_gpu.cu.cc | 3 ++- tensorflow/core/kernels/aggregate_ops.cc | 3 ++- tensorflow/core/kernels/aggregate_ops_gpu.cu.cc | 3 ++- tensorflow/core/kernels/argmax_op.cc | 6 ++++-- tensorflow/core/kernels/argmax_op_gpu.cu.cc | 3 ++- tensorflow/core/kernels/batch_kernels.cc | 9 ++++++--- tensorflow/core/kernels/betainc_op.cc | 3 ++- tensorflow/core/kernels/betainc_op_gpu.cu.cc | 3 ++- tensorflow/core/kernels/broadcast_to_op.cc | 6 ++++-- tensorflow/core/kernels/broadcast_to_op_gpu.cu.cc | 3 ++- tensorflow/core/kernels/cast_op.cc | 6 ++++-- tensorflow/core/kernels/cast_op_gpu.cu.cc | 3 ++- tensorflow/core/kernels/cast_op_impl.h | 3 ++- tensorflow/core/kernels/cast_op_impl_bfloat.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_bool.cc | 3 ++- .../core/kernels/cast_op_impl_complex128.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_complex64.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_double.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_float.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_half.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_int16.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_int32.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_int64.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_int8.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_uint16.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_uint32.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_uint64.cc | 3 ++- tensorflow/core/kernels/cast_op_impl_uint8.cc | 3 ++- tensorflow/core/kernels/colorspace_op.cc | 3 ++- tensorflow/core/kernels/colorspace_op_gpu.cu.cc | 3 ++- tensorflow/core/kernels/concat_lib.h | 3 ++- tensorflow/core/kernels/constant_op.cc | 15 ++++++++++----- tensorflow/core/kernels/constant_op_gpu.cu.cc | 3 ++- tensorflow/core/kernels/constant_op_test.cc | 3 ++- .../core/kernels/extract_image_patches_op.cc | 3 ++- .../kernels/extract_image_patches_op_gpu.cu.cc | 3 ++- tensorflow/core/kernels/fake_quant_ops.cc | 15 ++++++++++----- tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc | 3 ++- tensorflow/core/kernels/fft_ops.cc | 6 ++++-- tensorflow/core/kernels/fill_functor.cu.cc | 3 ++- tensorflow/core/kernels/gpu_device_array.h | 3 ++- tensorflow/core/kernels/gpu_device_array_gpu.h | 3 ++- tensorflow/core/kernels/identity_op.cc | 3 ++- tensorflow/core/kernels/nn_ops_test.cc | 3 ++- tensorflow/core/kernels/one_hot_op.cc | 6 ++++-- tensorflow/core/kernels/one_hot_op_gpu.cu.cc | 3 ++- tensorflow/core/kernels/pad_op.cc | 3 ++- tensorflow/core/kernels/pad_op_gpu.cu.cc | 3 ++- .../core/kernels/quantize_and_dequantize_op.cc | 6 ++++-- .../kernels/quantize_and_dequantize_op_gpu.cu.cc | 3 ++- tensorflow/core/kernels/reshape_op.cc | 3 ++- tensorflow/core/kernels/softplus_op.cc | 3 ++- tensorflow/core/kernels/softplus_op_gpu.cu.cc | 3 ++- tensorflow/core/kernels/stage_op.cc | 15 ++++++++++----- tensorflow/core/kernels/xent_op.cc | 3 ++- tensorflow/core/kernels/xent_op_gpu.cu.cc | 3 ++- tensorflow/core/util/port.cc | 3 ++- 66 files changed, 182 insertions(+), 91 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc index 2e44a37c68f..a97874aef60 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h" diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc index a0728c017e1..06b01fe9c6a 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h" diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 416ca602a5f..c888d76926a 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -15,7 +15,8 @@ limitations under the License. // TODO(opensource): Use a more generic sounding preprocessor name than // GOOGLE_CUDA -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #if TENSORFLOW_USE_ROCM #include "rocm/include/hip/hip_runtime.h" diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc index 72ff4cc9e80..e6b25209661 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc index ea45bfaef53..81333fe40f6 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc @@ -81,7 +81,8 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, TfGpuId tf_gpu_id, size_t total_bytes) { CHECK(process_state_); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) const string& allocator_type = options.allocator_type(); mutex_lock lock(mu_); GpuIdUtil::CheckValidTfGpuId(tf_gpu_id); @@ -163,7 +164,8 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, SharedCounter* GPUProcessState::GPUAllocatorCounter(TfGpuId tf_gpu_id) { DCHECK(process_state_); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) GpuIdUtil::CheckValidTfGpuId(tf_gpu_id); mutex_lock l(mu_); if (tf_gpu_id.value() >= static_cast<int64>(gpu_allocators_.size())) { @@ -275,7 +277,8 @@ Allocator* GPUProcessState::GetGpuHostAllocator(int numa_node) { void GPUProcessState::AddGPUAllocVisitor(int bus_id, const SubAllocator::Visitor& visitor) { -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) mutex_lock lock(mu_); CHECK(gpu_allocators_.empty()) // Crash OK << "AddGPUAllocVisitor must be called before " @@ -290,7 +293,8 @@ void GPUProcessState::AddGPUAllocVisitor(int bus_id, void GPUProcessState::AddGpuHostAllocVisitor( int numa_node, const SubAllocator::Visitor& visitor) { -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) mutex_lock lock(mu_); CHECK(gpu_host_allocators_.empty()) // Crash OK << "AddGpuHostAllocVisitor must be called before " @@ -304,7 +308,8 @@ void GPUProcessState::AddGpuHostAllocVisitor( void GPUProcessState::AddGpuHostFreeVisitor( int numa_node, const SubAllocator::Visitor& visitor) { -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) mutex_lock lock(mu_); CHECK(gpu_host_allocators_.empty()) // Crash OK << "AddGpuHostFreeVisitor must be called before " diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc index ef10ba16d5f..39346df11d5 100644 --- a/tensorflow/core/framework/types.cc +++ b/tensorflow/core/framework/types.cc @@ -40,7 +40,8 @@ const char* const DEVICE_GPU = "GPU"; const char* const DEVICE_SYCL = "SYCL"; const std::string DeviceName<Eigen::ThreadPoolDevice>::value = DEVICE_CPU; -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) const std::string DeviceName<Eigen::GpuDevice>::value = DEVICE_GPU; #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #ifdef TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h index 5b6eb872658..d91188cf43f 100644 --- a/tensorflow/core/framework/types.h +++ b/tensorflow/core/framework/types.h @@ -83,7 +83,8 @@ struct DeviceName<Eigen::ThreadPoolDevice> { static const std::string value; }; -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) template <> struct DeviceName<Eigen::GpuDevice> { static const std::string value; diff --git a/tensorflow/core/kernels/adjust_contrast_op.cc b/tensorflow/core/kernels/adjust_contrast_op.cc index 631702f4a7c..7fa33950bb2 100644 --- a/tensorflow/core/kernels/adjust_contrast_op.cc +++ b/tensorflow/core/kernels/adjust_contrast_op.cc @@ -99,7 +99,8 @@ REGISTER_KERNEL(float); REGISTER_KERNEL(double); #undef REGISTER_KERNEL -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Forward declarations of the function specializations for GPU (to prevent // building the GPU versions here, they will be built compiling _gpu.cu.cc). namespace functor { @@ -382,7 +383,8 @@ REGISTER_KERNEL_BUILDER( Name("AdjustContrastv2").Device(DEVICE_CPU).TypeConstraint<float>("T"), AdjustContrastOpv2<CPUDevice, float>); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Forward declarations of the function specializations for GPU (to prevent // building the GPU versions here, they will be built compiling _gpu.cu.cc). namespace functor { diff --git a/tensorflow/core/kernels/adjust_contrast_op_benchmark_test.cc b/tensorflow/core/kernels/adjust_contrast_op_benchmark_test.cc index 9030b9db793..0b9142ce1b5 100644 --- a/tensorflow/core/kernels/adjust_contrast_op_benchmark_test.cc +++ b/tensorflow/core/kernels/adjust_contrast_op_benchmark_test.cc @@ -56,7 +56,8 @@ static Graph* BM_AdjustContrast(int batches, int width, int height) { // BM_AdjustContrast_cpu_1_299_299 179084 340186 2181 751.9M items/s // BM_AdjustContrast_gpu_32_299_299 85276 123665 4189 2.9G items/s BM_AdjustContrastDev(cpu, 1, 299, 299); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) BM_AdjustContrastDev(gpu, 32, 299, 299); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #ifdef TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/adjust_contrast_op_gpu.cu.cc b/tensorflow/core/kernels/adjust_contrast_op_gpu.cu.cc index 2c20d3db747..e072dc46f5f 100644 --- a/tensorflow/core/kernels/adjust_contrast_op_gpu.cu.cc +++ b/tensorflow/core/kernels/adjust_contrast_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc index 3b8d339747f..afe34b087ba 100644 --- a/tensorflow/core/kernels/aggregate_ops.cc +++ b/tensorflow/core/kernels/aggregate_ops.cc @@ -211,7 +211,8 @@ REGISTER_ADDN_CPU(Variant); #undef REGISTER_ADDN_CPU -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define REGISTER_ADDN_GPU(type) REGISTER_ADDN(type, GPU) TF_CALL_GPU_NUMBER_TYPES(REGISTER_ADDN_GPU); TF_CALL_int64(REGISTER_ADDN_GPU); diff --git a/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc b/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc index 1dcf46047c8..85bdc2447cd 100644 --- a/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/argmax_op.cc b/tensorflow/core/kernels/argmax_op.cc index d840b2df2b0..59c8fff082c 100644 --- a/tensorflow/core/kernels/argmax_op.cc +++ b/tensorflow/core/kernels/argmax_op.cc @@ -17,7 +17,8 @@ limitations under the License. #define EIGEN_USE_THREADS -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -150,7 +151,8 @@ class ArgMinOp TF_CALL_REAL_NUMBER_TYPES(REGISTER_ARGMAX); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Forward declarations of the functor specializations for GPU. namespace functor { diff --git a/tensorflow/core/kernels/argmax_op_gpu.cu.cc b/tensorflow/core/kernels/argmax_op_gpu.cu.cc index 1b82ae6105f..bd7c4b4027c 100644 --- a/tensorflow/core/kernels/argmax_op_gpu.cu.cc +++ b/tensorflow/core/kernels/argmax_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc index 199f551d5aa..d0be9227078 100644 --- a/tensorflow/core/kernels/batch_kernels.cc +++ b/tensorflow/core/kernels/batch_kernels.cc @@ -83,7 +83,8 @@ Status Concat(OpKernelContext* context, const gtl::ArraySlice<Tensor>& inputs, context->allocate_temp(DataTypeToEnum<T>::value, output_shape, output)); if (output->NumElements() > 0) { auto output_flat = output->shaped<T, 2>({1, output->NumElements()}); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) if (std::is_same<Device, GPUDevice>::value) { ConcatGPU<T>(context, inputs_flat, output, &output_flat); return Status::OK(); @@ -173,7 +174,8 @@ Status SplitCPU(OpKernelContext* context, const Tensor& input, return Status::OK(); } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Handles the general case, on GPU. template <typename T> @@ -198,7 +200,8 @@ Status Split(OpKernelContext* context, const Tensor& input, return Status::OK(); } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // TODO(olston, apassos): Handle non-CPU cases. // return SplitGPU<T>(context, input, sizes, outputs); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/betainc_op.cc b/tensorflow/core/kernels/betainc_op.cc index 8683006ed38..f9f6d9f8dfa 100644 --- a/tensorflow/core/kernels/betainc_op.cc +++ b/tensorflow/core/kernels/betainc_op.cc @@ -122,7 +122,8 @@ REGISTER_KERNELS(float); REGISTER_KERNELS(double); #undef REGISTER_KERNELS -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Forward declarations of the functor specializations for GPU. namespace functor { #define DECLARE_GPU_SPEC_NDIM(T, NDIM) \ diff --git a/tensorflow/core/kernels/betainc_op_gpu.cu.cc b/tensorflow/core/kernels/betainc_op_gpu.cu.cc index 4c8b0aa10b4..2b7ce398696 100644 --- a/tensorflow/core/kernels/betainc_op_gpu.cu.cc +++ b/tensorflow/core/kernels/betainc_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/broadcast_to_op.cc b/tensorflow/core/kernels/broadcast_to_op.cc index 29c47b0e00d..51caca50ebd 100644 --- a/tensorflow/core/kernels/broadcast_to_op.cc +++ b/tensorflow/core/kernels/broadcast_to_op.cc @@ -15,7 +15,8 @@ limitations under the License. #define EIGEN_USE_THREADS -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -100,7 +101,8 @@ class BroadcastToOp : public OpKernel { TF_CALL_ALL_TYPES(REGISTER_KERNEL); #undef REGISTER_KERNEL -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) namespace functor { #define DECLARE_GPU_TEMPLATE(Type) \ diff --git a/tensorflow/core/kernels/broadcast_to_op_gpu.cu.cc b/tensorflow/core/kernels/broadcast_to_op_gpu.cu.cc index 4d9a8b073c3..aae1fb718d4 100644 --- a/tensorflow/core/kernels/broadcast_to_op_gpu.cu.cc +++ b/tensorflow/core/kernels/broadcast_to_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/cast_op.cc b/tensorflow/core/kernels/cast_op.cc index 05ca1cec957..91a9dbc7a9a 100644 --- a/tensorflow/core/kernels/cast_op.cc +++ b/tensorflow/core/kernels/cast_op.cc @@ -168,7 +168,8 @@ Status CpuCastOp::Prepare() { return work_ == nullptr ? Unimplemented() : Status::OK(); } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) class GpuCastOp : public CastOpBase { public: explicit GpuCastOp(OpKernelConstruction* ctx) : CastOpBase(ctx) { @@ -222,7 +223,8 @@ class GpuCastOp : public CastOpBase { REGISTER_KERNEL_BUILDER(Name("Cast").Device(DEVICE_CPU), CpuCastOp); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define REGISTER_CAST_GPU(srctype, dsttype) \ REGISTER_KERNEL_BUILDER(Name("Cast") \ .TypeConstraint<srctype>("SrcT") \ diff --git a/tensorflow/core/kernels/cast_op_gpu.cu.cc b/tensorflow/core/kernels/cast_op_gpu.cu.cc index 504697182e2..85cedfcd867 100644 --- a/tensorflow/core/kernels/cast_op_gpu.cu.cc +++ b/tensorflow/core/kernels/cast_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/cast_op_impl.h b/tensorflow/core/kernels/cast_op_impl.h index 154bedea753..266e2cec47a 100644 --- a/tensorflow/core/kernels/cast_op_impl.h +++ b/tensorflow/core/kernels/cast_op_impl.h @@ -99,7 +99,8 @@ CastFunctorType GetCpuCastFromComplex128(DataType dst_dtype); CastFunctorType GetCpuCastFromBfloat(DataType dst_dtype); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Same, for GPU. CastFunctorType GetGpuCastFromBool(DataType dst_dtype); diff --git a/tensorflow/core/kernels/cast_op_impl_bfloat.cc b/tensorflow/core/kernels/cast_op_impl_bfloat.cc index 2b41956ed76..0a399d011e9 100644 --- a/tensorflow/core/kernels/cast_op_impl_bfloat.cc +++ b/tensorflow/core/kernels/cast_op_impl_bfloat.cc @@ -27,7 +27,8 @@ CastFunctorType GetCpuCastFromBfloat(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromBfloat(DataType dst_dtype) { if (dst_dtype == DT_FLOAT) { return [](OpKernelContext* ctx, const Tensor& inp, Tensor* out, diff --git a/tensorflow/core/kernels/cast_op_impl_bool.cc b/tensorflow/core/kernels/cast_op_impl_bool.cc index 98cb49f5e66..d08a45a0745 100644 --- a/tensorflow/core/kernels/cast_op_impl_bool.cc +++ b/tensorflow/core/kernels/cast_op_impl_bool.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromBool(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromBool(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, bool); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_complex128.cc b/tensorflow/core/kernels/cast_op_impl_complex128.cc index d610bd3c3ea..9bd0e11b97b 100644 --- a/tensorflow/core/kernels/cast_op_impl_complex128.cc +++ b/tensorflow/core/kernels/cast_op_impl_complex128.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromComplex128(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromComplex128(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, std::complex<double>); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_complex64.cc b/tensorflow/core/kernels/cast_op_impl_complex64.cc index cb1018f6a5d..bb7fd86aa75 100644 --- a/tensorflow/core/kernels/cast_op_impl_complex64.cc +++ b/tensorflow/core/kernels/cast_op_impl_complex64.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromComplex64(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromComplex64(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, std::complex<float>); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_double.cc b/tensorflow/core/kernels/cast_op_impl_double.cc index a9a129bfa47..8637f3dbabf 100644 --- a/tensorflow/core/kernels/cast_op_impl_double.cc +++ b/tensorflow/core/kernels/cast_op_impl_double.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromDouble(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromDouble(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, double); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_float.cc b/tensorflow/core/kernels/cast_op_impl_float.cc index c7a918ea0ee..c2418e93f9b 100644 --- a/tensorflow/core/kernels/cast_op_impl_float.cc +++ b/tensorflow/core/kernels/cast_op_impl_float.cc @@ -27,7 +27,8 @@ CastFunctorType GetCpuCastFromFloat(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromFloat(DataType dst_dtype) { CURRY_TYPES3(CAST_CASE, GPUDevice, float); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_half.cc b/tensorflow/core/kernels/cast_op_impl_half.cc index 041c2054e6a..1581b6b6a2e 100644 --- a/tensorflow/core/kernels/cast_op_impl_half.cc +++ b/tensorflow/core/kernels/cast_op_impl_half.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromHalf(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromHalf(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, Eigen::half); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_int16.cc b/tensorflow/core/kernels/cast_op_impl_int16.cc index a3fdef777c5..b32200615fa 100644 --- a/tensorflow/core/kernels/cast_op_impl_int16.cc +++ b/tensorflow/core/kernels/cast_op_impl_int16.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromInt16(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromInt16(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int16); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_int32.cc b/tensorflow/core/kernels/cast_op_impl_int32.cc index cc43c745455..154fd148ce2 100644 --- a/tensorflow/core/kernels/cast_op_impl_int32.cc +++ b/tensorflow/core/kernels/cast_op_impl_int32.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromInt32(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromInt32(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int32); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_int64.cc b/tensorflow/core/kernels/cast_op_impl_int64.cc index acc550f5a24..1f4ebc96b46 100644 --- a/tensorflow/core/kernels/cast_op_impl_int64.cc +++ b/tensorflow/core/kernels/cast_op_impl_int64.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromInt64(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromInt64(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int64); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_int8.cc b/tensorflow/core/kernels/cast_op_impl_int8.cc index e0cad338da4..00a72ab9868 100644 --- a/tensorflow/core/kernels/cast_op_impl_int8.cc +++ b/tensorflow/core/kernels/cast_op_impl_int8.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromInt8(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromInt8(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, int8); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_uint16.cc b/tensorflow/core/kernels/cast_op_impl_uint16.cc index 31d171a16bc..2981fe99e3c 100644 --- a/tensorflow/core/kernels/cast_op_impl_uint16.cc +++ b/tensorflow/core/kernels/cast_op_impl_uint16.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromUint16(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromUint16(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint16); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_uint32.cc b/tensorflow/core/kernels/cast_op_impl_uint32.cc index fc6c67bcbc0..b94540dfe7d 100644 --- a/tensorflow/core/kernels/cast_op_impl_uint32.cc +++ b/tensorflow/core/kernels/cast_op_impl_uint32.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromUint32(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromUint32(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint32); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_uint64.cc b/tensorflow/core/kernels/cast_op_impl_uint64.cc index 70bf90ce948..e04c0a28cd8 100644 --- a/tensorflow/core/kernels/cast_op_impl_uint64.cc +++ b/tensorflow/core/kernels/cast_op_impl_uint64.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromUint64(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromUint64(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint64); return nullptr; diff --git a/tensorflow/core/kernels/cast_op_impl_uint8.cc b/tensorflow/core/kernels/cast_op_impl_uint8.cc index da365261022..20c572980c3 100644 --- a/tensorflow/core/kernels/cast_op_impl_uint8.cc +++ b/tensorflow/core/kernels/cast_op_impl_uint8.cc @@ -25,7 +25,8 @@ CastFunctorType GetCpuCastFromUint8(DataType dst_dtype) { return nullptr; } -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromUint8(DataType dst_dtype) { CURRY_TYPES3_NO_BF16(CAST_CASE, GPUDevice, uint8); return nullptr; diff --git a/tensorflow/core/kernels/colorspace_op.cc b/tensorflow/core/kernels/colorspace_op.cc index eb172f872b0..6c817f73058 100644 --- a/tensorflow/core/kernels/colorspace_op.cc +++ b/tensorflow/core/kernels/colorspace_op.cc @@ -119,7 +119,8 @@ class HSVToRGBOp : public OpKernel { TF_CALL_float(REGISTER_CPU); TF_CALL_double(REGISTER_CPU); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Forward declarations of the function specializations for GPU (to prevent // building the GPU versions here, they will be built compiling _gpu.cu.cc). namespace functor { diff --git a/tensorflow/core/kernels/colorspace_op_gpu.cu.cc b/tensorflow/core/kernels/colorspace_op_gpu.cu.cc index 6427d2014be..227490a2056 100644 --- a/tensorflow/core/kernels/colorspace_op_gpu.cu.cc +++ b/tensorflow/core/kernels/colorspace_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/concat_lib.h b/tensorflow/core/kernels/concat_lib.h index 7303a3409a2..5e338fe4c68 100644 --- a/tensorflow/core/kernels/concat_lib.h +++ b/tensorflow/core/kernels/concat_lib.h @@ -47,7 +47,8 @@ void ConcatCPU( const std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>& inputs, typename TTypes<T, 2>::Matrix* output); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) template <typename T> void ConcatGPU( OpKernelContext* c, diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index c3421e44b69..09180d61db6 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -17,7 +17,8 @@ limitations under the License. #define EIGEN_USE_THREADS -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU #endif @@ -92,7 +93,8 @@ ConstantOp::~ConstantOp() {} REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define REGISTER_KERNEL(D, TYPE) \ REGISTER_KERNEL_BUILDER( \ Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \ @@ -216,7 +218,8 @@ REGISTER_KERNEL_BUILDER(Name("Fill") #undef REGISTER_KERNEL_SYCL #endif // TENSORFLOW_USE_SYCL -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL(GPU, Eigen::half); REGISTER_KERNEL(GPU, bfloat16); REGISTER_KERNEL(GPU, float); @@ -300,7 +303,8 @@ REGISTER_KERNEL_BUILDER(Name("ZerosLike") ZerosLikeOp<CPUDevice, int32>); #endif // TENSORFLOW_USE_SYCL -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL(bool, GPU); REGISTER_KERNEL(Eigen::half, GPU); REGISTER_KERNEL(bfloat16, GPU); @@ -353,7 +357,8 @@ REGISTER_KERNEL_BUILDER(Name("OnesLike") OnesLikeOp<CPUDevice, int32>); #endif // TENSORFLOW_USE_SYCL -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL(bool, GPU); REGISTER_KERNEL(Eigen::half, GPU); REGISTER_KERNEL(bfloat16, GPU); diff --git a/tensorflow/core/kernels/constant_op_gpu.cu.cc b/tensorflow/core/kernels/constant_op_gpu.cu.cc index 4599232cb62..36c30da7faf 100644 --- a/tensorflow/core/kernels/constant_op_gpu.cu.cc +++ b/tensorflow/core/kernels/constant_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/constant_op_test.cc b/tensorflow/core/kernels/constant_op_test.cc index c618c36b719..e0171ea9001 100644 --- a/tensorflow/core/kernels/constant_op_test.cc +++ b/tensorflow/core/kernels/constant_op_test.cc @@ -86,7 +86,8 @@ void ConstantOpTest::PersistentMemoryTrackingTest(bool on_gpu) { TEST_F(ConstantOpTest, PersistentMemoryTracking) { PersistentMemoryTrackingTest(false); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) PersistentMemoryTrackingTest(true); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } diff --git a/tensorflow/core/kernels/extract_image_patches_op.cc b/tensorflow/core/kernels/extract_image_patches_op.cc index 7192fec37e6..0fc1f567a92 100644 --- a/tensorflow/core/kernels/extract_image_patches_op.cc +++ b/tensorflow/core/kernels/extract_image_patches_op.cc @@ -130,7 +130,8 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER); #undef REGISTER -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Forward declarations of the functor specializations for GPU. namespace functor { diff --git a/tensorflow/core/kernels/extract_image_patches_op_gpu.cu.cc b/tensorflow/core/kernels/extract_image_patches_op_gpu.cu.cc index 465b7acd475..650c51fc765 100644 --- a/tensorflow/core/kernels/extract_image_patches_op_gpu.cu.cc +++ b/tensorflow/core/kernels/extract_image_patches_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/fake_quant_ops.cc b/tensorflow/core/kernels/fake_quant_ops.cc index 5a3c571fbea..01e3468c93d 100644 --- a/tensorflow/core/kernels/fake_quant_ops.cc +++ b/tensorflow/core/kernels/fake_quant_ops.cc @@ -15,7 +15,8 @@ limitations under the License. #define EIGEN_USE_THREADS -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -28,7 +29,8 @@ limitations under the License. using tensorflow::BinaryElementWiseOp; using tensorflow::DEVICE_CPU; -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) using tensorflow::DEVICE_GPU; #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM using tensorflow::OpKernel; @@ -143,7 +145,8 @@ REGISTER_KERNEL_BUILDER( Name("FakeQuantWithMinMaxArgsGradient").Device(DEVICE_CPU), FakeQuantWithMinMaxArgsGradientOp<CPUDevice>); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) typedef Eigen::GpuDevice GPUDevice; // Forward declarations for functor specializations for GPU. @@ -265,7 +268,8 @@ REGISTER_KERNEL_BUILDER( Name("FakeQuantWithMinMaxVarsGradient").Device(DEVICE_CPU), FakeQuantWithMinMaxVarsGradientOp<CPUDevice>); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) template <> void FakeQuantWithMinMaxVarsFunctor<GPUDevice>::operator()( const GPUDevice& d, typename TTypes<float>::ConstFlat inputs, @@ -411,7 +415,8 @@ REGISTER_KERNEL_BUILDER( Name("FakeQuantWithMinMaxVarsPerChannelGradient").Device(DEVICE_CPU), FakeQuantWithMinMaxVarsPerChannelGradientOp<CPUDevice>); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) template <> void FakeQuantWithMinMaxVarsPerChannelFunctor<GPUDevice>::operator()( const GPUDevice& d, typename TTypes<float>::ConstMatrix inputs, diff --git a/tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc b/tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc index dc258f43c8e..b3bd44000ea 100644 --- a/tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define FAKE_QUANT_NO_DEBUG diff --git a/tensorflow/core/kernels/fft_ops.cc b/tensorflow/core/kernels/fft_ops.cc index 29b5e89b728..e0f326dcea3 100644 --- a/tensorflow/core/kernels/fft_ops.cc +++ b/tensorflow/core/kernels/fft_ops.cc @@ -28,7 +28,8 @@ limitations under the License. #include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/work_sharder.h" -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #include "tensorflow/core/platform/stream_executor.h" #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -286,7 +287,8 @@ REGISTER_KERNEL_BUILDER(Name("IRFFT3D").Device(DEVICE_CPU).Label(FFT_LABEL), #undef FFT_LABEL -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) namespace { template <typename T> diff --git a/tensorflow/core/kernels/fill_functor.cu.cc b/tensorflow/core/kernels/fill_functor.cu.cc index c759be12d0f..4e47de45c3e 100644 --- a/tensorflow/core/kernels/fill_functor.cu.cc +++ b/tensorflow/core/kernels/fill_functor.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/gpu_device_array.h b/tensorflow/core/kernels/gpu_device_array.h index 3961cee043b..51eb8bba60c 100644 --- a/tensorflow/core/kernels/gpu_device_array.h +++ b/tensorflow/core/kernels/gpu_device_array.h @@ -15,7 +15,8 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_H_ #define TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_H_ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/op_kernel.h" diff --git a/tensorflow/core/kernels/gpu_device_array_gpu.h b/tensorflow/core/kernels/gpu_device_array_gpu.h index ca2051c70db..3d81712dd76 100644 --- a/tensorflow/core/kernels/gpu_device_array_gpu.h +++ b/tensorflow/core/kernels/gpu_device_array_gpu.h @@ -18,7 +18,8 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_GPU_H_ #define TENSORFLOW_CORE_KERNELS_CUDA_DEVICE_ARRAY_GPU_H_ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) namespace tensorflow { diff --git a/tensorflow/core/kernels/identity_op.cc b/tensorflow/core/kernels/identity_op.cc index 541099760b1..cf63a975cc8 100644 --- a/tensorflow/core/kernels/identity_op.cc +++ b/tensorflow/core/kernels/identity_op.cc @@ -112,7 +112,8 @@ REGISTER_GPU_KERNEL(Variant); #undef REGISTER_GPU_KERNEL -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // A special GPU kernel for int32 and bool. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. diff --git a/tensorflow/core/kernels/nn_ops_test.cc b/tensorflow/core/kernels/nn_ops_test.cc index c506af95704..e977aa51afb 100644 --- a/tensorflow/core/kernels/nn_ops_test.cc +++ b/tensorflow/core/kernels/nn_ops_test.cc @@ -15,7 +15,8 @@ limitations under the License. #define EIGEN_USE_THREADS -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/one_hot_op.cc b/tensorflow/core/kernels/one_hot_op.cc index 49695bdcb35..0548e389b7a 100644 --- a/tensorflow/core/kernels/one_hot_op.cc +++ b/tensorflow/core/kernels/one_hot_op.cc @@ -17,7 +17,8 @@ limitations under the License. #define EIGEN_USE_THREADS -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -140,7 +141,8 @@ class OneHotOp : public OpKernel { TF_CALL_ALL_TYPES(REGISTER_ONE_HOT); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Forward declarations of the functor specializations for GPU. namespace functor { diff --git a/tensorflow/core/kernels/one_hot_op_gpu.cu.cc b/tensorflow/core/kernels/one_hot_op_gpu.cu.cc index fc97962210e..83ba272433f 100644 --- a/tensorflow/core/kernels/one_hot_op_gpu.cu.cc +++ b/tensorflow/core/kernels/one_hot_op_gpu.cu.cc @@ -15,7 +15,8 @@ limitations under the License. // See docs in ../ops/array_ops.cc -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/pad_op.cc b/tensorflow/core/kernels/pad_op.cc index d215756e9b8..a55b4afb9c8 100644 --- a/tensorflow/core/kernels/pad_op.cc +++ b/tensorflow/core/kernels/pad_op.cc @@ -294,7 +294,8 @@ TF_CALL_POD_TYPES(REGISTER_KERNEL); TF_CALL_string(REGISTER_KERNEL); #undef REGISTER_KERNEL -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Forward declarations of the functor specializations for GPU. namespace functor { #define DECLARE_GPU_SPEC(T, Dims) \ diff --git a/tensorflow/core/kernels/pad_op_gpu.cu.cc b/tensorflow/core/kernels/pad_op_gpu.cu.cc index bd4b0f04e70..ddc12417a91 100644 --- a/tensorflow/core/kernels/pad_op_gpu.cu.cc +++ b/tensorflow/core/kernels/pad_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.cc b/tensorflow/core/kernels/quantize_and_dequantize_op.cc index d39331cba18..43f1c6ea2af 100644 --- a/tensorflow/core/kernels/quantize_and_dequantize_op.cc +++ b/tensorflow/core/kernels/quantize_and_dequantize_op.cc @@ -15,7 +15,8 @@ limitations under the License. #define EIGEN_USE_THREADS -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -241,7 +242,8 @@ TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); #undef REGISTER_CPU_KERNEL -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define REGISTER_GPU_KERNEL(T) \ REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV2") \ .Device(DEVICE_GPU) \ diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op_gpu.cu.cc b/tensorflow/core/kernels/quantize_and_dequantize_op_gpu.cu.cc index 290b639a364..00d2a3b1b30 100644 --- a/tensorflow/core/kernels/quantize_and_dequantize_op_gpu.cu.cc +++ b/tensorflow/core/kernels/quantize_and_dequantize_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/reshape_op.cc b/tensorflow/core/kernels/reshape_op.cc index ead95a39a29..9860448947a 100644 --- a/tensorflow/core/kernels/reshape_op.cc +++ b/tensorflow/core/kernels/reshape_op.cc @@ -86,7 +86,8 @@ REGISTER_KERNEL_BUILDER(Name("Reshape") #undef REGISTER_SYCL_KERNEL #endif // TENSORFLOW_USE_SYCL -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. diff --git a/tensorflow/core/kernels/softplus_op.cc b/tensorflow/core/kernels/softplus_op.cc index fb00e1bb08c..0c0f33093e3 100644 --- a/tensorflow/core/kernels/softplus_op.cc +++ b/tensorflow/core/kernels/softplus_op.cc @@ -87,7 +87,8 @@ void SoftplusGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context, TF_CALL_FLOAT_TYPES(REGISTER_KERNELS); #undef REGISTER_KERNELS -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Forward declarations of the functor specializations for GPU. namespace functor { #define DECLARE_GPU_SPEC(T) \ diff --git a/tensorflow/core/kernels/softplus_op_gpu.cu.cc b/tensorflow/core/kernels/softplus_op_gpu.cu.cc index 900df277a5b..0cf169da85e 100644 --- a/tensorflow/core/kernels/softplus_op_gpu.cu.cc +++ b/tensorflow/core/kernels/softplus_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/kernels/stage_op.cc b/tensorflow/core/kernels/stage_op.cc index 925c9266395..9c0f370de3b 100644 --- a/tensorflow/core/kernels/stage_op.cc +++ b/tensorflow/core/kernels/stage_op.cc @@ -216,7 +216,8 @@ class StageOp : public OpKernel { }; REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_CPU), StageOp); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_GPU), StageOp); #endif #ifdef TENSORFLOW_USE_SYCL @@ -249,7 +250,8 @@ class UnstageOp : public OpKernel { }; REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_CPU), UnstageOp); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_GPU), UnstageOp); #endif #ifdef TENSORFLOW_USE_SYCL @@ -284,7 +286,8 @@ class StagePeekOp : public OpKernel { }; REGISTER_KERNEL_BUILDER(Name("StagePeek").Device(DEVICE_CPU), StagePeekOp); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL_BUILDER( Name("StagePeek").HostMemory("index").Device(DEVICE_GPU), StagePeekOp); #endif @@ -314,7 +317,8 @@ class StageSizeOp : public OpKernel { }; REGISTER_KERNEL_BUILDER(Name("StageSize").Device(DEVICE_CPU), StageSizeOp); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL_BUILDER(Name("StageSize").HostMemory("size").Device(DEVICE_GPU), StageSizeOp); #endif @@ -339,7 +343,8 @@ class StageClearOp : public OpKernel { }; REGISTER_KERNEL_BUILDER(Name("StageClear").Device(DEVICE_CPU), StageClearOp); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL_BUILDER(Name("StageClear").Device(DEVICE_GPU), StageClearOp); #endif #ifdef TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/xent_op.cc b/tensorflow/core/kernels/xent_op.cc index 19eb320b4f6..8a7c16349a7 100644 --- a/tensorflow/core/kernels/xent_op.cc +++ b/tensorflow/core/kernels/xent_op.cc @@ -134,7 +134,8 @@ TF_CALL_half(REGISTER_CPU); TF_CALL_float(REGISTER_CPU); TF_CALL_double(REGISTER_CPU); -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits") .Device(DEVICE_GPU) .TypeConstraint<Eigen::half>("T"), diff --git a/tensorflow/core/kernels/xent_op_gpu.cu.cc b/tensorflow/core/kernels/xent_op_gpu.cu.cc index 6c7a9d7ba0a..2b1ac45ab4c 100644 --- a/tensorflow/core/kernels/xent_op_gpu.cu.cc +++ b/tensorflow/core/kernels/xent_op_gpu.cu.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) #define EIGEN_USE_GPU diff --git a/tensorflow/core/util/port.cc b/tensorflow/core/util/port.cc index fd391488c42..0ec78153016 100644 --- a/tensorflow/core/util/port.cc +++ b/tensorflow/core/util/port.cc @@ -35,7 +35,8 @@ bool IsBuiltWithROCm() { } bool GpuSupportsHalfMatMulAndConv() { -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) return true; #else return false;