diff --git a/tensorflow/core/kernels/cwise_op_abs.cc b/tensorflow/core/kernels/cwise_op_abs.cc
index 1920c54e807..d19965b45fe 100644
--- a/tensorflow/core/kernels/cwise_op_abs.cc
+++ b/tensorflow/core/kernels/cwise_op_abs.cc
@@ -20,7 +20,7 @@ REGISTER5(UnaryOp, CPU, "Abs", functor::abs, float, Eigen::half, double, int32,
           int64);
 REGISTER2(UnaryOp, CPU, "ComplexAbs", functor::abs, complex64, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(UnaryOp, GPU, "Abs", functor::abs, float, Eigen::half, double, int64);
 REGISTER2(UnaryOp, GPU, "ComplexAbs", functor::abs, complex64, complex128);
 
diff --git a/tensorflow/core/kernels/cwise_op_acos.cc b/tensorflow/core/kernels/cwise_op_acos.cc
index 49191226074..8449f0661da 100644
--- a/tensorflow/core/kernels/cwise_op_acos.cc
+++ b/tensorflow/core/kernels/cwise_op_acos.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER2(UnaryOp, CPU, "Acos", functor::acos, float, double);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Acos", functor::acos, float, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_acosh.cc b/tensorflow/core/kernels/cwise_op_acosh.cc
index c2b355ab7f4..06aee8671bc 100644
--- a/tensorflow/core/kernels/cwise_op_acosh.cc
+++ b/tensorflow/core/kernels/cwise_op_acosh.cc
@@ -24,7 +24,7 @@ REGISTER4(UnaryOp, CPU, "Acosh", functor::acosh, float, double, complex64,
 REGISTER2(UnaryOp, SYCL, "Acosh", functor::acosh, float, double);
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Acosh", functor::acosh, float, double);
 #endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc
index 9e4ffe950c9..6668464bac1 100644
--- a/tensorflow/core/kernels/cwise_op_add_1.cc
+++ b/tensorflow/core/kernels/cwise_op_add_1.cc
@@ -21,7 +21,7 @@ REGISTER6(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32,
 REGISTER6(BinaryOp, CPU, "AddV2", functor::add, float, Eigen::half, double,
           int32, int64, bfloat16);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double);
 REGISTER3(BinaryOp, GPU, "AddV2", functor::add, float, Eigen::half, double);
 
diff --git a/tensorflow/core/kernels/cwise_op_add_2.cc b/tensorflow/core/kernels/cwise_op_add_2.cc
index e8acbac2853..1fa453ddb09 100644
--- a/tensorflow/core/kernels/cwise_op_add_2.cc
+++ b/tensorflow/core/kernels/cwise_op_add_2.cc
@@ -28,12 +28,12 @@ REGISTER6(BinaryOp, CPU, "Add", functor::add, int8, int16, complex64, uint8,
 // is_aggregate.
 REGISTER5(BinaryOp, CPU, "AddV2", functor::add, int8, int16, complex64, uint8,
           complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(BinaryOp, GPU, "Add", functor::add, uint8, int64, complex64,
           complex128);
 REGISTER4(BinaryOp, GPU, "AddV2", functor::add, uint8, int64, complex64,
           complex128);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #endif  // !defined(__ANDROID_TYPES_SLIM__)
 
diff --git a/tensorflow/core/kernels/cwise_op_asin.cc b/tensorflow/core/kernels/cwise_op_asin.cc
index fe8dfea1173..9089dfce23b 100644
--- a/tensorflow/core/kernels/cwise_op_asin.cc
+++ b/tensorflow/core/kernels/cwise_op_asin.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER2(UnaryOp, CPU, "Asin", functor::asin, float, double);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Asin", functor::asin, float, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc
index 7cf0405f524..9801b31af48 100644
--- a/tensorflow/core/kernels/cwise_op_asinh.cc
+++ b/tensorflow/core/kernels/cwise_op_asinh.cc
@@ -24,7 +24,7 @@ REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double, complex64,
 REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double);
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double);
 #endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_atan.cc b/tensorflow/core/kernels/cwise_op_atan.cc
index 09f0448874f..d8f84f01034 100644
--- a/tensorflow/core/kernels/cwise_op_atan.cc
+++ b/tensorflow/core/kernels/cwise_op_atan.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER2(UnaryOp, CPU, "Atan", functor::atan, float, double);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Atan", functor::atan, float, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_atan2.cc b/tensorflow/core/kernels/cwise_op_atan2.cc
index 68f67c444ef..9856a98b8e6 100644
--- a/tensorflow/core/kernels/cwise_op_atan2.cc
+++ b/tensorflow/core/kernels/cwise_op_atan2.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER2(BinaryOp, CPU, "Atan2", functor::atan2, float, double);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(BinaryOp, GPU, "Atan2", functor::atan2, float, double);
 #endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc
index 6170683fa64..e58adb57833 100644
--- a/tensorflow/core/kernels/cwise_op_atanh.cc
+++ b/tensorflow/core/kernels/cwise_op_atanh.cc
@@ -24,7 +24,7 @@ REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double, complex64,
 REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double);
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double);
 #endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_bessel.cc b/tensorflow/core/kernels/cwise_op_bessel.cc
index 4372f56408b..dedc961ffea 100644
--- a/tensorflow/core/kernels/cwise_op_bessel.cc
+++ b/tensorflow/core/kernels/cwise_op_bessel.cc
@@ -20,7 +20,7 @@ REGISTER3(UnaryOp, CPU, "BesselI0e", functor::bessel_i0e, Eigen::half, float,
           double);
 REGISTER3(UnaryOp, CPU, "BesselI1e", functor::bessel_i1e, Eigen::half, float,
           double);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "BesselI0e", functor::bessel_i0e, Eigen::half, float,
           double);
 REGISTER3(UnaryOp, GPU, "BesselI1e", functor::bessel_i1e, Eigen::half, float,
diff --git a/tensorflow/core/kernels/cwise_op_bessel.cu.cc b/tensorflow/core/kernels/cwise_op_bessel.cu.cc
index 30de8b1fdc4..3d47dddcdcb 100644
--- a/tensorflow/core/kernels/cwise_op_bessel.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_bessel.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_UNARY3(bessel_i1e, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_and.cc b/tensorflow/core/kernels/cwise_op_bitwise_and.cc
index 5a6cf4bad16..49d5044f289 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_and.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_and.cc
@@ -36,9 +36,9 @@ REGISTER_SYCL_KERNEL(uint64);
 
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
           int64, uint8, uint16, uint32, uint64);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_or.cc b/tensorflow/core/kernels/cwise_op_bitwise_or.cc
index 201a10198a6..f448968860d 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_or.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_or.cc
@@ -36,9 +36,9 @@ REGISTER_SYCL_KERNEL(uint64);
 
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(BinaryOp, GPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
           int64, uint8, uint16, uint32, uint64);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
index 2a7cd269959..b4387c2e8fd 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
@@ -36,9 +36,9 @@ REGISTER_SYCL_KERNEL(uint64);
 
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(BinaryOp, GPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
           int64, uint8, uint16, uint32, uint64);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_ceil.cc b/tensorflow/core/kernels/cwise_op_ceil.cc
index 816eadc80eb..4b1847d758c 100644
--- a/tensorflow/core/kernels/cwise_op_ceil.cc
+++ b/tensorflow/core/kernels/cwise_op_ceil.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, double);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Ceil", functor::ceil, float, Eigen::half, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc
index 49b90e855be..c0c71c5f638 100644
--- a/tensorflow/core/kernels/cwise_op_clip.cc
+++ b/tensorflow/core/kernels/cwise_op_clip.cc
@@ -181,7 +181,7 @@ REGISTER_CPU_KERNEL(uint8);
 REGISTER_CPU_KERNEL(uint16);
 #undef REGISTER_CPU_KERNEL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #define REGISTER_GPU_KERNEL(type)                                       \
   REGISTER_KERNEL_BUILDER(                                              \
diff --git a/tensorflow/core/kernels/cwise_op_complex.cc b/tensorflow/core/kernels/cwise_op_complex.cc
index e796bc7fe8e..309e65a7621 100644
--- a/tensorflow/core/kernels/cwise_op_complex.cc
+++ b/tensorflow/core/kernels/cwise_op_complex.cc
@@ -26,7 +26,7 @@ namespace tensorflow {
 REGISTER_COMPLEX(CPU, float, complex64);
 REGISTER_COMPLEX(CPU, double, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER_COMPLEX(GPU, float, complex64);
 REGISTER_COMPLEX(GPU, double, complex128);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_conj.cc b/tensorflow/core/kernels/cwise_op_conj.cc
index 929c54a9a15..4e37f554716 100644
--- a/tensorflow/core/kernels/cwise_op_conj.cc
+++ b/tensorflow/core/kernels/cwise_op_conj.cc
@@ -22,7 +22,7 @@ REGISTER2(UnaryOp, CPU, "Conj", functor::conj, complex64, complex128);
 
 REGISTER_VARIANT(UnaryVariantOp, CPU, "Conj", CONJ_VARIANT_UNARY_OP);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER_KERNEL_BUILDER(
     Name("Conj").Device(DEVICE_GPU).TypeConstraint<Variant>("T"),
     UnaryVariantOp<GPUDevice, CONJ_VARIANT_UNARY_OP>);
diff --git a/tensorflow/core/kernels/cwise_op_cos.cc b/tensorflow/core/kernels/cwise_op_cos.cc
index 71ad0ff0dc2..7b434ce4294 100644
--- a/tensorflow/core/kernels/cwise_op_cos.cc
+++ b/tensorflow/core/kernels/cwise_op_cos.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, double,
           complex64, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Cos", functor::cos, float, Eigen::half, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_cosh.cc b/tensorflow/core/kernels/cwise_op_cosh.cc
index 31b4bb3cadd..3388df0096a 100644
--- a/tensorflow/core/kernels/cwise_op_cosh.cc
+++ b/tensorflow/core/kernels/cwise_op_cosh.cc
@@ -29,7 +29,7 @@ REGISTER_SYCL_KERNEL(double);
 #undef REGISTER_SYCL_KERNEL
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Cosh", functor::cosh, float, double);
 #endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_digamma.cc b/tensorflow/core/kernels/cwise_op_digamma.cc
index 1536478d215..4bd00c36312 100644
--- a/tensorflow/core/kernels/cwise_op_digamma.cc
+++ b/tensorflow/core/kernels/cwise_op_digamma.cc
@@ -18,8 +18,8 @@ limitations under the License.
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "Digamma", functor::digamma, float, Eigen::half,
           double);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Digamma", functor::digamma, float, Eigen::half,
           double);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_equal_to_1.cc
index 931f59014b6..ac66e558d03 100644
--- a/tensorflow/core/kernels/cwise_op_equal_to_1.cc
+++ b/tensorflow/core/kernels/cwise_op_equal_to_1.cc
@@ -24,7 +24,7 @@ REGISTER_KERNEL_BUILDER(
 REGISTER_KERNEL_BUILDER(
     Name("ApproximateEqual").Device(DEVICE_CPU).TypeConstraint<double>("T"),
     ApproximateEqualOp<CPUDevice, double>);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(BinaryOp, GPU, "Equal", functor::equal_to, float, Eigen::half, double,
           uint8);
 REGISTER_KERNEL_BUILDER(
diff --git a/tensorflow/core/kernels/cwise_op_equal_to_2.cc b/tensorflow/core/kernels/cwise_op_equal_to_2.cc
index 57e19c7202d..77810338697 100644
--- a/tensorflow/core/kernels/cwise_op_equal_to_2.cc
+++ b/tensorflow/core/kernels/cwise_op_equal_to_2.cc
@@ -24,10 +24,10 @@ namespace tensorflow {
 
 REGISTER6(BinaryOp, CPU, "Equal", functor::equal_to, int32, int64, complex64,
           complex128, string, bool);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER6(BinaryOp, GPU, "Equal", functor::equal_to, int8, int16, int64,
           complex64, complex128, bool);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #endif  // !defined(__ANDROID_TYPES_SLIM__)
 
diff --git a/tensorflow/core/kernels/cwise_op_erf.cc b/tensorflow/core/kernels/cwise_op_erf.cc
index 524ca13e67a..85048f48deb 100644
--- a/tensorflow/core/kernels/cwise_op_erf.cc
+++ b/tensorflow/core/kernels/cwise_op_erf.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "Erf", functor::erf, float, Eigen::half, double);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Erf", functor::erf, float, Eigen::half, double);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_erfc.cc b/tensorflow/core/kernels/cwise_op_erfc.cc
index f3256dc1f55..7abcdc1c89c 100644
--- a/tensorflow/core/kernels/cwise_op_erfc.cc
+++ b/tensorflow/core/kernels/cwise_op_erfc.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "Erfc", functor::erfc, float, Eigen::half, double);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Erfc", functor::erfc, float, Eigen::half, double);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc
index 8f4ac98016c..2b157f0e7a9 100644
--- a/tensorflow/core/kernels/cwise_op_exp.cc
+++ b/tensorflow/core/kernels/cwise_op_exp.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double,
           complex64, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER5(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double,
           complex64, complex128);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_expm1.cc b/tensorflow/core/kernels/cwise_op_expm1.cc
index ce03ad5de62..55fdc4763d3 100644
--- a/tensorflow/core/kernels/cwise_op_expm1.cc
+++ b/tensorflow/core/kernels/cwise_op_expm1.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Expm1", functor::expm1, float, Eigen::half, double,
           complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Expm1", functor::expm1, float, Eigen::half, double);
 #endif
 #ifdef TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/cwise_op_floor.cc b/tensorflow/core/kernels/cwise_op_floor.cc
index d554d41c412..25210a0fa51 100644
--- a/tensorflow/core/kernels/cwise_op_floor.cc
+++ b/tensorflow/core/kernels/cwise_op_floor.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, double);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Floor", functor::floor, float, Eigen::half, double);
 #endif
 #ifdef TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc
index 24da61fdf6c..11869e43eaa 100644
--- a/tensorflow/core/kernels/cwise_op_floor_div.cc
+++ b/tensorflow/core/kernels/cwise_op_floor_div.cc
@@ -21,14 +21,14 @@ REGISTER6(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
 REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
           Eigen::half, double);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16,
           int64);
 REGISTER3(BinaryOp, GPU, "FloorDiv", functor::floor_div_real, float,
           Eigen::half, double);
 #endif
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
 // registration requires all int32 inputs and outputs to be in host memory.
diff --git a/tensorflow/core/kernels/cwise_op_floor_mod.cc b/tensorflow/core/kernels/cwise_op_floor_mod.cc
index 29340b88506..481fc3b8989 100644
--- a/tensorflow/core/kernels/cwise_op_floor_mod.cc
+++ b/tensorflow/core/kernels/cwise_op_floor_mod.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER2(BinaryOp, CPU, "FloorMod", functor::safe_floor_mod, int32, int64);
 REGISTER2(BinaryOp, CPU, "FloorMod", functor::floor_fmod, float, double);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
 // registration requires all int32 inputs and outputs to be in host memory.
diff --git a/tensorflow/core/kernels/cwise_op_gpu_abs.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_abs.cu.cc
index caec29986a6..f5b4f014c2c 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_abs.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_abs.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY6(abs, Eigen::half, float, double, int64, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_acos.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_acos.cu.cc
index abcc954a441..a68ed861a21 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_acos.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_acos.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY2(acos, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_acosh.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_acosh.cu.cc
index a29c9a374d9..0b24808db93 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_acosh.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_acosh.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 #include "tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h"
@@ -24,4 +24,4 @@ DEFINE_UNARY2(acosh, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_add.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_add.cu.cc
index 61079ebab39..f2ac4c51407 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_add.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_add.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_BINARY7(add, Eigen::half, float, double, uint8, int64, complex64,
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_asin.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_asin.cu.cc
index 916fbeeaa79..95a9eb68663 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_asin.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_asin.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY2(asin, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_asinh.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_asinh.cu.cc
index c78f09e5e9a..bb9e9a09168 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_asinh.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_asinh.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 #include "tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h"
@@ -24,4 +24,4 @@ DEFINE_UNARY2(asinh, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_atan.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_atan.cu.cc
index 455991e1d56..618fe2e4b58 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_atan.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_atan.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY2(atan, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_atan2.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_atan2.cu.cc
index 137e14ef840..7de20c68b4c 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_atan2.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_atan2.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_BINARY2(atan2, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_atanh.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_atanh.cu.cc
index 895dcbff020..25c6b0c55e3 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_atanh.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_atanh.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 #include "tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h"
@@ -24,4 +24,4 @@ DEFINE_UNARY2(atanh, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
index 3fbf69c114d..fb1416cebf6 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_and.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_BINARY8(bitwise_and, int8, int16, int32, int64, uint8, uint16, uint32,
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
index 8bcb82266a2..0d7fca5f87a 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_or.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_BINARY8(bitwise_or, int8, int16, int32, int64, uint8, uint16, uint32,
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
index e62a87aba44..686f9dd94a8 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_bitwise_xor.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_BINARY8(bitwise_xor, int8, int16, int32, int64, uint8, uint16, uint32,
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_ceil.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_ceil.cu.cc
index cc4fa120eb3..64d7497271d 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_ceil.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_ceil.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(ceil, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_complex.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_complex.cu.cc
index 328d412df39..b1436738538 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_complex.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_complex.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_BINARY2(make_complex, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_conj.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_conj.cu.cc
index 77723b3169f..e674d5af227 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_conj.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_conj.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_UNARY1(conj, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_cos.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_cos.cu.cc
index fa5b7eee4ac..d427f8dff3f 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_cos.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_cos.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(cos, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_cosh.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_cosh.cu.cc
index 267a381d1a5..8f43c59547d 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_cosh.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_cosh.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY2(cosh, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_digamma.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_digamma.cu.cc
index a3ff067c5fe..beb9475e003 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_digamma.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_digamma.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(digamma, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_equal_to.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_equal_to.cu.cc
index 26748ef0e72..f668db420cb 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_equal_to.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_equal_to.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -25,4 +25,4 @@ DEFINE_APPROXIMATE_EQUAL2(float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_erf.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_erf.cu.cc
index b18d6b0161e..29e03240e9b 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_erf.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_erf.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(erf, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_erfc.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_erfc.cu.cc
index 5b6303da9ea..006e5ecd310 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_erfc.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_erfc.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(erfc, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc
index 417e5da7588..7ac82ee73ab 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_exp.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY5(exp, Eigen::half, float, double, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_expm1.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_expm1.cu.cc
index 755b942295a..5ac000c43b5 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_expm1.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_expm1.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(expm1, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_floor.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_floor.cu.cc
index 0823fcdd444..6f75f5f1bb7 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_floor.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_floor.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(floor, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_floor_div.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_floor_div.cu.cc
index 0e4887eafd6..137e6459142 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_floor_div.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_floor_div.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_BINARY3(floor_div_real, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_floor_mod.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_floor_mod.cu.cc
index bbe97e4b4d8..b5a2d4d2794 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_floor_mod.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_floor_mod.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ namespace functor {
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_greater.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_greater.cu.cc
index 22337f7f22b..0afffbd4942 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_greater.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_greater.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_BINARY7(greater, Eigen::half, float, double, int64, uint8, int8, int16);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_greater_equal.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_greater_equal.cu.cc
index 10d87393a88..195cb472cf4 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_greater_equal.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_greater_equal.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_BINARY7(greater_equal, Eigen::half, float, double, int64, uint8, int8,
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_igammas_double.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_igammas_double.cu.cc
index 2bcc7aa8855..146e94650a6 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_igammas_double.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_igammas_double.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 #include "tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h"
@@ -26,4 +26,4 @@ DEFINE_BINARY1(igammac, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_igammas_float.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_igammas_float.cu.cc
index e6412216e93..f0aef797e31 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_igammas_float.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_igammas_float.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 #include "tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h"
@@ -26,4 +26,4 @@ DEFINE_BINARY1(igammac, float);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_imag.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_imag.cu.cc
index df05575df17..ca7fea8a79e 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_imag.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_imag.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY2(get_imag, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_inverse.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_inverse.cu.cc
index 2d8438f7e02..a57632471d1 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_inverse.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_inverse.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 #include "tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h"
@@ -25,4 +25,4 @@ DEFINE_SIMPLE_BINARY3(inverse_grad, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_invert.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_invert.cu.cc
index 1072ef3aa68..db720372667 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_invert.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_invert.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY8(invert, int8, int16, int32, int64, uint8, uint16, uint32, uint64);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_isfinite.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_isfinite.cu.cc
index 556d5411e34..58a8a4a9ae6 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_isfinite.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_isfinite.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(isfinite, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_isinf.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_isinf.cu.cc
index f223c14ace1..e65a184ced5 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_isinf.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_isinf.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(isinf, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_isnan.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_isnan.cu.cc
index b3587a26903..9e3bf52dc2d 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_isnan.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_isnan.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(isnan, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_left_shift.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_left_shift.cu.cc
index 740048795a9..ac4db971280 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_left_shift.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_left_shift.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_BINARY8(left_shift, int8, int16, int32, int64, uint8, uint16, uint32,
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_less.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_less.cu.cc
index 8c7fc89556d..3ed3710477c 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_less.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_less.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_BINARY7(less, Eigen::half, float, double, int64, uint8, int8, int16);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_less_equal.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_less_equal.cu.cc
index 7d85da29514..a472bd4f261 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_less_equal.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_less_equal.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_BINARY7(less_equal, Eigen::half, float, double, int64, uint8, int8,
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_lgamma.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_lgamma.cu.cc
index d370237b997..ddcef1497e7 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_lgamma.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_lgamma.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(lgamma, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_log.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_log.cu.cc
index 1b2959a1130..a17d310bc8d 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_log.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_log.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(log, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_log1p.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_log1p.cu.cc
index d09224c70e0..842aaf5d08e 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_log1p.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_log1p.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(log1p, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_logical_and.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_logical_and.cu.cc
index 1943779d212..9f0253db2fb 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_logical_and.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_logical_and.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -27,4 +27,4 @@ template struct BinaryFunctor<GPUDevice, logical_and, 5>;
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_logical_not.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_logical_not.cu.cc
index e847fa6ce95..f619b9b5d77 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_logical_not.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_logical_not.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ template struct UnaryFunctor<GPUDevice, logical_not>;
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_logical_or.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_logical_or.cu.cc
index 8b397706526..135ba7cec40 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_logical_or.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_logical_or.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -27,4 +27,4 @@ template struct BinaryFunctor<GPUDevice, logical_or, 5>;
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_maximum.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_maximum.cu.cc
index 56ca2a7a55a..522df1ba664 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_maximum.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_maximum.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_BINARY4(maximum, Eigen::half, float, double, int64);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_minimum.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_minimum.cu.cc
index 2e65240cde9..e9413e62f39 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_minimum.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_minimum.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_BINARY4(minimum, Eigen::half, float, double, int64);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_mod.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_mod.cu.cc
index bbe97e4b4d8..b5a2d4d2794 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_mod.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_mod.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ namespace functor {
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_mul.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_mul.cu.cc
index 27f197f4b4f..a199fe93c1f 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_mul.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_mul.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -25,4 +25,4 @@ DEFINE_BINARY5(mul_no_nan, Eigen::half, float, double, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_neg.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_neg.cu.cc
index d74cab6edf5..ea1ca623560 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_neg.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_neg.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_UNARY7(neg, Eigen::half, float, double, int32, int64, complex64,
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_not_equal_to.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_not_equal_to.cu.cc
index ec4b28a09cf..67a6057995d 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_not_equal_to.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_not_equal_to.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_BINARY10(not_equal_to, float, Eigen::half, double, uint8, int8, int16,
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_pow.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_pow.cu.cc
index ac031b3604b..4d48e20d7b1 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_pow.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_pow.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_BINARY4(pow, Eigen::half, float, double, int64);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_random_grad.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_random_grad.cu.cc
index fd0a95ecc59..af1dca51e23 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_random_grad.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_random_grad.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_BINARY2(random_gamma_grad, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_real.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_real.cu.cc
index 120209edbc7..9b7bc624ed7 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_real.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_real.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY2(get_real, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_right_shift.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_right_shift.cu.cc
index bb6772772c8..55d8a8885df 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_right_shift.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_right_shift.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_BINARY8(right_shift, int8, int16, int32, int64, uint8, uint16, uint32,
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc
index 028b944d27d..bb6d7447221 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_rint.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY2(rint, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_round.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_round.cu.cc
index 03d2b2c4423..72fab1ed95c 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_round.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_round.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY5(round, Eigen::half, float, double, int32, int64);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_rsqrt.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_rsqrt.cu.cc
index 6a361cfeec5..5c243cff294 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_rsqrt.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_rsqrt.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 #include "tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h"
@@ -25,4 +25,4 @@ DEFINE_SIMPLE_BINARY3(rsqrt_grad, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
index ba872db2172..1f0e460b9db 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #define EIGEN_USE_GPU
 
@@ -129,4 +129,4 @@ SELECT_FUNCTOR(complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_sigmoid.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_sigmoid.cu.cc
index b59d22310e0..ec52392b8bb 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_sigmoid.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_sigmoid.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 #include "tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h"
@@ -25,4 +25,4 @@ DEFINE_SIMPLE_BINARY3(sigmoid_grad, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_sign.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_sign.cu.cc
index 26e669550d1..eebd16f283f 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_sign.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_sign.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY6(sign, Eigen::half, float, double, int64, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_sin.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_sin.cu.cc
index fb8b3816405..53c886e7ccf 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_sin.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_sin.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(sin, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_sinh.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_sinh.cu.cc
index f8329e50d62..768fca5fbc0 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_sinh.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_sinh.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY2(sinh, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_sqrt.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_sqrt.cu.cc
index dae93a07665..99d30e7fe18 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_sqrt.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_sqrt.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 #include "tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h"
@@ -25,4 +25,4 @@ DEFINE_SIMPLE_BINARY3(sqrt_grad, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_square.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_square.cu.cc
index 038a995d93d..b04a9d092fc 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_square.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_square.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY4(square, Eigen::half, float, double, int64);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_squared_difference.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_squared_difference.cu.cc
index 3c90d00b5e9..0fa916efafd 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_squared_difference.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_squared_difference.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_BINARY4(squared_difference, float, Eigen::half, double, int64);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_sub.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_sub.cu.cc
index 2200f316c1e..aa5d09f8835 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_sub.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_sub.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_BINARY6(sub, Eigen::half, float, double, int64, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_tan.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_tan.cu.cc
index e09f12d4124..532a8cc26da 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_tan.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_tan.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_UNARY3(tan, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_tanh.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_tanh.cu.cc
index 66ee3c193e0..6118e95120b 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_tanh.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_tanh.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 #include "tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h"
@@ -25,4 +25,4 @@ DEFINE_SIMPLE_BINARY3(tanh_grad, Eigen::half, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_xdivy.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_xdivy.cu.cc
index e4b21a66c62..9fac1a7ff40 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_xdivy.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_xdivy.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -23,4 +23,4 @@ DEFINE_BINARY5(xdivy, Eigen::half, float, double, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_gpu_zeta.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_zeta.cu.cc
index 8f64a904473..41499ea096f 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_zeta.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_zeta.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
@@ -24,4 +24,4 @@ DEFINE_BINARY2(polygamma, float, double);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/cwise_op_greater.cc b/tensorflow/core/kernels/cwise_op_greater.cc
index b385e9e5450..d70233dc55c 100644
--- a/tensorflow/core/kernels/cwise_op_greater.cc
+++ b/tensorflow/core/kernels/cwise_op_greater.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER9(BinaryOp, CPU, "Greater", functor::greater, float, Eigen::half,
           double, int32, int64, uint8, int8, int16, bfloat16);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER7(BinaryOp, GPU, "Greater", functor::greater, float, Eigen::half,
           double, int64, uint8, int8, int16);
 
diff --git a/tensorflow/core/kernels/cwise_op_greater_equal.cc b/tensorflow/core/kernels/cwise_op_greater_equal.cc
index 8bfc018052f..7f6b788eb2e 100644
--- a/tensorflow/core/kernels/cwise_op_greater_equal.cc
+++ b/tensorflow/core/kernels/cwise_op_greater_equal.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER9(BinaryOp, CPU, "GreaterEqual", functor::greater_equal, float,
           Eigen::half, double, int32, int64, uint8, int8, int16, bfloat16);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER7(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, float,
           Eigen::half, double, int64, uint8, int8, int16);
 
diff --git a/tensorflow/core/kernels/cwise_op_igammas.cc b/tensorflow/core/kernels/cwise_op_igammas.cc
index cadda3b7230..e8638ca640e 100644
--- a/tensorflow/core/kernels/cwise_op_igammas.cc
+++ b/tensorflow/core/kernels/cwise_op_igammas.cc
@@ -20,7 +20,7 @@ namespace tensorflow {
 REGISTER2(BinaryOp, CPU, "Igamma", functor::igamma, float, double);
 REGISTER2(BinaryOp, CPU, "IgammaGradA", functor::igamma_grad_a, float, double);
 REGISTER2(BinaryOp, CPU, "Igammac", functor::igammac, float, double);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(BinaryOp, GPU, "Igamma", functor::igamma, float, double);
 REGISTER2(BinaryOp, GPU, "IgammaGradA", functor::igamma_grad_a, float, double);
 REGISTER2(BinaryOp, GPU, "Igammac", functor::igammac, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_imag.cc b/tensorflow/core/kernels/cwise_op_imag.cc
index 9dbfb2f94e7..bda9c19e3c2 100644
--- a/tensorflow/core/kernels/cwise_op_imag.cc
+++ b/tensorflow/core/kernels/cwise_op_imag.cc
@@ -26,7 +26,7 @@ namespace tensorflow {
 REGISTER_COMPLEX(CPU, float, complex64);
 REGISTER_COMPLEX(CPU, double, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER_COMPLEX(GPU, float, complex64);
 REGISTER_COMPLEX(GPU, double, complex128);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_invert.cc b/tensorflow/core/kernels/cwise_op_invert.cc
index 98c8d7e9b2e..7bdc3d02a42 100644
--- a/tensorflow/core/kernels/cwise_op_invert.cc
+++ b/tensorflow/core/kernels/cwise_op_invert.cc
@@ -24,9 +24,9 @@ REGISTER6(UnaryOp, SYCL, "Invert", functor::invert, int8, int16, int32, int64,
           uint8, uint16, uint32, uint64);
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(UnaryOp, GPU, "Invert", functor::invert, int8, int16, int32, int64,
           uint8, uint16, uint32, uint64);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_isfinite.cc b/tensorflow/core/kernels/cwise_op_isfinite.cc
index ae1e590d242..061dc8367e2 100644
--- a/tensorflow/core/kernels/cwise_op_isfinite.cc
+++ b/tensorflow/core/kernels/cwise_op_isfinite.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
           double);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half,
           double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_isinf.cc b/tensorflow/core/kernels/cwise_op_isinf.cc
index f22ca21e1ca..f87a24d2085 100644
--- a/tensorflow/core/kernels/cwise_op_isinf.cc
+++ b/tensorflow/core/kernels/cwise_op_isinf.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_isnan.cc b/tensorflow/core/kernels/cwise_op_isnan.cc
index 707dc9e49ca..2867b16e39a 100644
--- a/tensorflow/core/kernels/cwise_op_isnan.cc
+++ b/tensorflow/core/kernels/cwise_op_isnan.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER4(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double,
           bfloat16);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_left_shift.cc b/tensorflow/core/kernels/cwise_op_left_shift.cc
index ccb68139dec..38109a14c5d 100644
--- a/tensorflow/core/kernels/cwise_op_left_shift.cc
+++ b/tensorflow/core/kernels/cwise_op_left_shift.cc
@@ -36,9 +36,9 @@ REGISTER_SYCL_KERNEL(uint64);
 
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(BinaryOp, GPU, "LeftShift", functor::left_shift, int8, int16, int32,
           int64, uint8, uint16, uint32, uint64);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_less.cc b/tensorflow/core/kernels/cwise_op_less.cc
index e369fdcf8ab..563bb7d4566 100644
--- a/tensorflow/core/kernels/cwise_op_less.cc
+++ b/tensorflow/core/kernels/cwise_op_less.cc
@@ -21,7 +21,7 @@ REGISTER5(BinaryOp, CPU, "Less", functor::less, float, Eigen::half, double,
 REGISTER5(BinaryOp, CPU, "Less", functor::less, int64, uint8, int8, int16,
           bfloat16);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER7(BinaryOp, GPU, "Less", functor::less, float, Eigen::half, double,
           int64, uint8, int8, int16);
 
diff --git a/tensorflow/core/kernels/cwise_op_less_equal.cc b/tensorflow/core/kernels/cwise_op_less_equal.cc
index 3353e117cdf..1998fc0b2ad 100644
--- a/tensorflow/core/kernels/cwise_op_less_equal.cc
+++ b/tensorflow/core/kernels/cwise_op_less_equal.cc
@@ -21,7 +21,7 @@ REGISTER5(BinaryOp, CPU, "LessEqual", functor::less_equal, float, Eigen::half,
 REGISTER5(BinaryOp, CPU, "LessEqual", functor::less_equal, int64, uint8, int8,
           int16, bfloat16);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER7(BinaryOp, GPU, "LessEqual", functor::less_equal, float, Eigen::half,
           double, int64, uint8, int8, int16);
 
diff --git a/tensorflow/core/kernels/cwise_op_lgamma.cc b/tensorflow/core/kernels/cwise_op_lgamma.cc
index 737c10e7235..1446393921b 100644
--- a/tensorflow/core/kernels/cwise_op_lgamma.cc
+++ b/tensorflow/core/kernels/cwise_op_lgamma.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER3(UnaryOp, CPU, "Lgamma", functor::lgamma, float, Eigen::half, double);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Lgamma", functor::lgamma, float, Eigen::half, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_log.cc b/tensorflow/core/kernels/cwise_op_log.cc
index 5d17c890cfe..e4ff3808a93 100644
--- a/tensorflow/core/kernels/cwise_op_log.cc
+++ b/tensorflow/core/kernels/cwise_op_log.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER6(UnaryOp, CPU, "Log", functor::log, float, Eigen::half, double,
           bfloat16, complex64, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Log", functor::log, float, Eigen::half, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_log1p.cc b/tensorflow/core/kernels/cwise_op_log1p.cc
index 162ca9e07cd..06fc764fc75 100644
--- a/tensorflow/core/kernels/cwise_op_log1p.cc
+++ b/tensorflow/core/kernels/cwise_op_log1p.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Log1p", functor::log1p, float, Eigen::half, double,
           complex64, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Log1p", functor::log1p, float, Eigen::half, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_logical_and.cc b/tensorflow/core/kernels/cwise_op_logical_and.cc
index fa4b9f09d25..32a67c59a26 100644
--- a/tensorflow/core/kernels/cwise_op_logical_and.cc
+++ b/tensorflow/core/kernels/cwise_op_logical_and.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER_KERNEL_BUILDER(Name("LogicalAnd").Device(DEVICE_CPU),
                         BinaryOp<CPUDevice, functor::logical_and>);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER_KERNEL_BUILDER(Name("LogicalAnd").Device(DEVICE_GPU),
                         BinaryOp<GPUDevice, functor::logical_and>);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_logical_not.cc b/tensorflow/core/kernels/cwise_op_logical_not.cc
index 145b9cafca8..4c8fb0023d6 100644
--- a/tensorflow/core/kernels/cwise_op_logical_not.cc
+++ b/tensorflow/core/kernels/cwise_op_logical_not.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER_KERNEL_BUILDER(Name("LogicalNot").Device(DEVICE_CPU),
                         UnaryOp<CPUDevice, functor::logical_not>);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER_KERNEL_BUILDER(Name("LogicalNot").Device(DEVICE_GPU),
                         UnaryOp<GPUDevice, functor::logical_not>);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_logical_or.cc b/tensorflow/core/kernels/cwise_op_logical_or.cc
index 2a164019f34..9476393fe72 100644
--- a/tensorflow/core/kernels/cwise_op_logical_or.cc
+++ b/tensorflow/core/kernels/cwise_op_logical_or.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER_KERNEL_BUILDER(Name("LogicalOr").Device(DEVICE_CPU),
                         BinaryOp<CPUDevice, functor::logical_or>);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER_KERNEL_BUILDER(Name("LogicalOr").Device(DEVICE_GPU),
                         BinaryOp<GPUDevice, functor::logical_or>);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_maximum.cc b/tensorflow/core/kernels/cwise_op_maximum.cc
index e8a58eea80e..347022c208f 100644
--- a/tensorflow/core/kernels/cwise_op_maximum.cc
+++ b/tensorflow/core/kernels/cwise_op_maximum.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER6(BinaryOp, CPU, "Maximum", functor::maximum, float, Eigen::half,
           bfloat16, double, int32, int64);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(BinaryOp, GPU, "Maximum", functor::maximum, float, Eigen::half,
           double, int64);
 
diff --git a/tensorflow/core/kernels/cwise_op_minimum.cc b/tensorflow/core/kernels/cwise_op_minimum.cc
index 9bc37003879..15491b8258c 100644
--- a/tensorflow/core/kernels/cwise_op_minimum.cc
+++ b/tensorflow/core/kernels/cwise_op_minimum.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER6(BinaryOp, CPU, "Minimum", functor::minimum, float, Eigen::half,
           bfloat16, double, int32, int64);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(BinaryOp, GPU, "Minimum", functor::minimum, float, Eigen::half,
           double, int64);
 
diff --git a/tensorflow/core/kernels/cwise_op_mod.cc b/tensorflow/core/kernels/cwise_op_mod.cc
index 2d191566099..5df0ca4adf2 100644
--- a/tensorflow/core/kernels/cwise_op_mod.cc
+++ b/tensorflow/core/kernels/cwise_op_mod.cc
@@ -21,7 +21,7 @@ REGISTER2(BinaryOp, CPU, "Mod", functor::fmod, float, double);
 REGISTER2(BinaryOp, CPU, "TruncateMod", functor::safe_mod, int32, int64);
 REGISTER2(BinaryOp, CPU, "TruncateMod", functor::fmod, float, double);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
 // registration requires all int32 inputs and outputs to be in host memory.
diff --git a/tensorflow/core/kernels/cwise_op_mul_2.cc b/tensorflow/core/kernels/cwise_op_mul_2.cc
index 6aa8f883640..c4a2f6364f3 100644
--- a/tensorflow/core/kernels/cwise_op_mul_2.cc
+++ b/tensorflow/core/kernels/cwise_op_mul_2.cc
@@ -24,11 +24,11 @@ namespace tensorflow {
 
 REGISTER6(BinaryOp, CPU, "Mul", functor::mul, int8, uint16, int16, int64,
           complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER6(BinaryOp, GPU, "Mul", functor::mul, int8, uint16, int16, int64,
           complex64, complex128);
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #endif  // !defined(__ANDROID_TYPES_SLIM__)
 
diff --git a/tensorflow/core/kernels/cwise_op_neg.cc b/tensorflow/core/kernels/cwise_op_neg.cc
index bb7d22e4dd4..f52cf6c8b91 100644
--- a/tensorflow/core/kernels/cwise_op_neg.cc
+++ b/tensorflow/core/kernels/cwise_op_neg.cc
@@ -29,7 +29,7 @@ REGISTER_KERNEL_BUILDER(Name("Neg")
                         UnaryOp<CPUDevice, functor::neg<int32>>);
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER6(UnaryOp, GPU, "Neg", functor::neg, float, Eigen::half, double, int64,
           complex64, complex128);
 
diff --git a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc
index 9f1e5758054..f207158b843 100644
--- a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc
+++ b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER7(BinaryOp, CPU, "NotEqual", functor::not_equal_to, float, Eigen::half,
           double, uint8, int8, int16, bfloat16);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(BinaryOp, GPU, "NotEqual", functor::not_equal_to, float, Eigen::half,
           double, uint8);
 // A special GPU kernel for int32.
diff --git a/tensorflow/core/kernels/cwise_op_not_equal_to_2.cc b/tensorflow/core/kernels/cwise_op_not_equal_to_2.cc
index 05bdea66367..0ecc70c4f2b 100644
--- a/tensorflow/core/kernels/cwise_op_not_equal_to_2.cc
+++ b/tensorflow/core/kernels/cwise_op_not_equal_to_2.cc
@@ -24,11 +24,11 @@ namespace tensorflow {
 
 REGISTER6(BinaryOp, CPU, "NotEqual", functor::not_equal_to, int32, int64,
           complex64, complex128, string, bool);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER6(BinaryOp, GPU, "NotEqual", functor::not_equal_to, int8, int16, int64,
           complex64, complex128, bool);
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #endif  // !defined(__ANDROID_TYPES_SLIM__)
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_pow.cc b/tensorflow/core/kernels/cwise_op_pow.cc
index cf86478b0fe..1b1d626aa57 100644
--- a/tensorflow/core/kernels/cwise_op_pow.cc
+++ b/tensorflow/core/kernels/cwise_op_pow.cc
@@ -20,7 +20,7 @@ REGISTER5(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, double,
           complex64, complex128);
 REGISTER2(BinaryOp, CPU, "Pow", functor::safe_pow, int32, int64);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(BinaryOp, GPU, "Pow", functor::pow, float, Eigen::half, double,
           int64);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_random_grad.cc b/tensorflow/core/kernels/cwise_op_random_grad.cc
index 8e388ead9e4..bf23c05fa4d 100644
--- a/tensorflow/core/kernels/cwise_op_random_grad.cc
+++ b/tensorflow/core/kernels/cwise_op_random_grad.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER2(BinaryOp, CPU, "RandomGammaGrad", functor::random_gamma_grad, float,
           double);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(BinaryOp, GPU, "RandomGammaGrad", functor::random_gamma_grad, float,
           double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_real.cc b/tensorflow/core/kernels/cwise_op_real.cc
index e7942d02454..453f2801132 100644
--- a/tensorflow/core/kernels/cwise_op_real.cc
+++ b/tensorflow/core/kernels/cwise_op_real.cc
@@ -27,7 +27,7 @@ namespace tensorflow {
 REGISTER_COMPLEX(CPU, float, complex64);
 REGISTER_COMPLEX(CPU, double, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER_COMPLEX(GPU, float, complex64);
 REGISTER_COMPLEX(GPU, double, complex128);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_reciprocal.cc b/tensorflow/core/kernels/cwise_op_reciprocal.cc
index aee25747b86..a08499d7456 100644
--- a/tensorflow/core/kernels/cwise_op_reciprocal.cc
+++ b/tensorflow/core/kernels/cwise_op_reciprocal.cc
@@ -18,21 +18,21 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Inv", functor::inverse, float, Eigen::half, double,
           complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(UnaryOp, GPU, "Inv", functor::inverse, float, Eigen::half, double,
           int64);
 #endif
 
 REGISTER5(SimpleBinaryOp, CPU, "InvGrad", functor::inverse_grad, float,
           Eigen::half, double, complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(SimpleBinaryOp, GPU, "InvGrad", functor::inverse_grad, float,
           Eigen::half, double);
 #endif
 
 REGISTER5(UnaryOp, CPU, "Reciprocal", functor::inverse, float, Eigen::half,
           double, complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(UnaryOp, GPU, "Reciprocal", functor::inverse, float, Eigen::half,
           double, int64);
 #endif
@@ -42,7 +42,7 @@ REGISTER(UnaryOp, SYCL, "Reciprocal", functor::inverse, float);
 
 REGISTER5(SimpleBinaryOp, CPU, "ReciprocalGrad", functor::inverse_grad, float,
           Eigen::half, double, complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(SimpleBinaryOp, GPU, "ReciprocalGrad", functor::inverse_grad, float,
           Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_right_shift.cc b/tensorflow/core/kernels/cwise_op_right_shift.cc
index 6dc6b97e354..8165662e53f 100644
--- a/tensorflow/core/kernels/cwise_op_right_shift.cc
+++ b/tensorflow/core/kernels/cwise_op_right_shift.cc
@@ -36,9 +36,9 @@ REGISTER_SYCL_KERNEL(uint64);
 
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(BinaryOp, GPU, "RightShift", functor::right_shift, int8, int16, int32,
           int64, uint8, uint16, uint32, uint64);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_rint.cc b/tensorflow/core/kernels/cwise_op_rint.cc
index a741b3d718c..f9fe8321947 100644
--- a/tensorflow/core/kernels/cwise_op_rint.cc
+++ b/tensorflow/core/kernels/cwise_op_rint.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 namespace tensorflow {
 REGISTER2(UnaryOp, CPU, "Rint", functor::rint, float, double);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Rint", functor::rint, float, double);
 #endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_round.cc b/tensorflow/core/kernels/cwise_op_round.cc
index 163814aac4b..86e709b01e1 100644
--- a/tensorflow/core/kernels/cwise_op_round.cc
+++ b/tensorflow/core/kernels/cwise_op_round.cc
@@ -23,7 +23,7 @@ REGISTER5(UnaryOp, CPU, "Round", functor::round, Eigen::half, float, double,
 REGISTER2(UnaryOp, SYCL, "Round", functor::round, float, double);
 #endif
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER5(UnaryOp, GPU, "Round", functor::round, Eigen::half, float, double,
           int32, int64);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_rsqrt.cc b/tensorflow/core/kernels/cwise_op_rsqrt.cc
index a434538fbf8..20d81a66bbf 100644
--- a/tensorflow/core/kernels/cwise_op_rsqrt.cc
+++ b/tensorflow/core/kernels/cwise_op_rsqrt.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double,
           complex64, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double);
 #endif
 #ifdef TENSORFLOW_USE_SYCL
@@ -28,7 +28,7 @@ REGISTER2(UnaryOp, SYCL, "Rsqrt", functor::rsqrt, float, double);
 
 REGISTER5(SimpleBinaryOp, CPU, "RsqrtGrad", functor::rsqrt_grad, float,
           Eigen::half, double, complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(SimpleBinaryOp, GPU, "RsqrtGrad", functor::rsqrt_grad, float,
           Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_select.cc b/tensorflow/core/kernels/cwise_op_select.cc
index c85c9d0599f..f999546c8a3 100644
--- a/tensorflow/core/kernels/cwise_op_select.cc
+++ b/tensorflow/core/kernels/cwise_op_select.cc
@@ -15,9 +15,9 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define EIGEN_USE_GPU
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -268,7 +268,7 @@ class SelectV2Op : public OpKernel {
 
 TF_CALL_ALL_TYPES(REGISTER_SELECT);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 // Registration of the GPU implementations.
 #define REGISTER_SELECT_GPU(type)                                    \
@@ -290,7 +290,7 @@ REGISTER_SELECT_GPU(complex128);
 
 #undef REGISTER_SELECT_GPU
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #ifdef TENSORFLOW_USE_SYCL
 // Registration of the SYCL implementations.
diff --git a/tensorflow/core/kernels/cwise_op_sigmoid.cc b/tensorflow/core/kernels/cwise_op_sigmoid.cc
index c132fdb63f2..926284571ed 100644
--- a/tensorflow/core/kernels/cwise_op_sigmoid.cc
+++ b/tensorflow/core/kernels/cwise_op_sigmoid.cc
@@ -19,7 +19,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Sigmoid", functor::sigmoid, float, Eigen::half, double,
           complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Sigmoid", functor::sigmoid, float, Eigen::half,
           double);
 #endif
@@ -29,7 +29,7 @@ REGISTER(UnaryOp, SYCL, "Sigmoid", functor::sigmoid, float);
 
 REGISTER5(SimpleBinaryOp, CPU, "SigmoidGrad", functor::sigmoid_grad, float,
           Eigen::half, double, complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(SimpleBinaryOp, GPU, "SigmoidGrad", functor::sigmoid_grad, float,
           Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_sign.cc b/tensorflow/core/kernels/cwise_op_sign.cc
index 02915ff4ce4..983cee4c944 100644
--- a/tensorflow/core/kernels/cwise_op_sign.cc
+++ b/tensorflow/core/kernels/cwise_op_sign.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER7(UnaryOp, CPU, "Sign", functor::sign, float, double, int32, int64,
           complex64, Eigen::half, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER6(UnaryOp, GPU, "Sign", functor::sign, float, Eigen::half, double,
           int64, complex64, complex128);
 
diff --git a/tensorflow/core/kernels/cwise_op_sin.cc b/tensorflow/core/kernels/cwise_op_sin.cc
index 16c60578640..ab6fb1ccd5e 100644
--- a/tensorflow/core/kernels/cwise_op_sin.cc
+++ b/tensorflow/core/kernels/cwise_op_sin.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Sin", functor::sin, float, Eigen::half, double,
           complex64, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Sin", functor::sin, float, Eigen::half, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_sinh.cc b/tensorflow/core/kernels/cwise_op_sinh.cc
index 26b7a940aa8..114a6142bdc 100644
--- a/tensorflow/core/kernels/cwise_op_sinh.cc
+++ b/tensorflow/core/kernels/cwise_op_sinh.cc
@@ -29,7 +29,7 @@ REGISTER_SYCL_KERNEL(double);
 #undef REGISTER_SYCL_KERNEL
 #endif  // TENSORFLOW_USE_SYC
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Sinh", functor::sinh, float, double);
 #endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_sqrt.cc b/tensorflow/core/kernels/cwise_op_sqrt.cc
index 205070761f1..976f8b0954d 100644
--- a/tensorflow/core/kernels/cwise_op_sqrt.cc
+++ b/tensorflow/core/kernels/cwise_op_sqrt.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER6(UnaryOp, CPU, "Sqrt", functor::sqrt, float, Eigen::half, double,
           bfloat16, complex64, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Sqrt", functor::sqrt, float, Eigen::half, double);
 #endif
 
@@ -29,7 +29,7 @@ REGISTER2(UnaryOp, SYCL, "Sqrt", functor::sqrt, float, double);
 
 REGISTER6(SimpleBinaryOp, CPU, "SqrtGrad", functor::sqrt_grad, float,
           Eigen::half, bfloat16, double, complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(SimpleBinaryOp, GPU, "SqrtGrad", functor::sqrt_grad, float,
           Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_square.cc b/tensorflow/core/kernels/cwise_op_square.cc
index 84f695ddc29..40dea5a5fa3 100644
--- a/tensorflow/core/kernels/cwise_op_square.cc
+++ b/tensorflow/core/kernels/cwise_op_square.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER8(UnaryOp, CPU, "Square", functor::square, float, Eigen::half, double,
           int32, int64, complex64, complex128, bfloat16);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(UnaryOp, GPU, "Square", functor::square, float, Eigen::half, double,
           int64);
 
diff --git a/tensorflow/core/kernels/cwise_op_squared_difference.cc b/tensorflow/core/kernels/cwise_op_squared_difference.cc
index d0ff271df6a..154c6adf258 100644
--- a/tensorflow/core/kernels/cwise_op_squared_difference.cc
+++ b/tensorflow/core/kernels/cwise_op_squared_difference.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER7(BinaryOp, CPU, "SquaredDifference", functor::squared_difference,
           float, Eigen::half, double, int32, int64, complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER4(BinaryOp, GPU, "SquaredDifference", functor::squared_difference,
           float, Eigen::half, double, int64);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc
index eb27bddb78d..b2fb9d84b14 100644
--- a/tensorflow/core/kernels/cwise_op_sub.cc
+++ b/tensorflow/core/kernels/cwise_op_sub.cc
@@ -28,7 +28,7 @@ REGISTER4(BinaryOp, CPU, "Sub", functor::sub, int8, uint8, int16, uint16);
 REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32);
 #endif  // __ANDROID_TYPES_SLIM__
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER6(BinaryOp, GPU, "Sub", functor::sub, float, Eigen::half, double, int64,
           complex64, complex128);
 
diff --git a/tensorflow/core/kernels/cwise_op_tan.cc b/tensorflow/core/kernels/cwise_op_tan.cc
index 4338d75219e..d9793501a09 100644
--- a/tensorflow/core/kernels/cwise_op_tan.cc
+++ b/tensorflow/core/kernels/cwise_op_tan.cc
@@ -19,7 +19,7 @@ namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Tan", functor::tan, Eigen::half, float, double,
           complex64, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Tan", functor::tan, Eigen::half, float, double);
 #endif
 
diff --git a/tensorflow/core/kernels/cwise_op_tanh.cc b/tensorflow/core/kernels/cwise_op_tanh.cc
index c5005f5ea8a..9a6766e581a 100644
--- a/tensorflow/core/kernels/cwise_op_tanh.cc
+++ b/tensorflow/core/kernels/cwise_op_tanh.cc
@@ -20,7 +20,7 @@ namespace tensorflow {
 REGISTER5(UnaryOp, CPU, "Tanh", functor::tanh, float, Eigen::half, double,
           complex64, complex128);
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Tanh", functor::tanh, float, Eigen::half, double);
 #endif
 
@@ -30,7 +30,7 @@ REGISTER2(UnaryOp, SYCL, "Tanh", functor::tanh, float, double);
 
 REGISTER5(SimpleBinaryOp, CPU, "TanhGrad", functor::tanh_grad, float,
           Eigen::half, double, complex64, complex128);
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(SimpleBinaryOp, GPU, "TanhGrad", functor::tanh_grad, float,
           Eigen::half, double);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_xdivy.cc b/tensorflow/core/kernels/cwise_op_xdivy.cc
index 6a6aec5e865..dbd0a69347b 100644
--- a/tensorflow/core/kernels/cwise_op_xdivy.cc
+++ b/tensorflow/core/kernels/cwise_op_xdivy.cc
@@ -30,9 +30,9 @@ REGISTER_SYCL_KERNEL(double);
 
 #endif  // TENSORFLOW_USE_SYCL
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER5(BinaryOp, GPU, "Xdivy", functor::xdivy, float, Eigen::half, double,
           complex64, complex128);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
index cfae273bf43..cb042fb7a55 100644
--- a/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
+++ b/tensorflow/core/kernels/cwise_ops_gpu_common.cu.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if !GOOGLE_CUDA
-#error This file must only be included when building with Cuda support
+#if !GOOGLE_CUDA && !TENSORFLOW_USE_ROCM
+#error This file must only be included when building with Cuda or ROCm support
 #endif
 
 #ifndef TENSORFLOW_CORE_KERNELS_CWISE_OPS_GPU_COMMON_CU_H_
diff --git a/tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h b/tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h
index 15e5de0f724..8cef2c07350 100644
--- a/tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h
+++ b/tensorflow/core/kernels/cwise_ops_gpu_gradients.cu.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if !GOOGLE_CUDA
-#error This file must only be included when building with Cuda support
+#if !GOOGLE_CUDA && !TENSORFLOW_USE_ROCM
+#error This file must only be included when building with Cuda or ROCm support
 #endif
 
 #ifndef TENSORFLOW_CORE_KERNELS_CWISE_OPS_GPU_GRADIENTS_CU_H_