diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index c13517bd4ef..1f8e708883e 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -477,7 +477,6 @@ tf_xla_py_test(
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
-        "no_rocm",
     ],
     deps = [
         ":xla_test",
@@ -1629,7 +1628,6 @@ cuda_py_test(
     shard_count = 5,
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
-        "no_rocm",
     ],
     xla_enable_strict_auto_jit = False,
     xla_enabled = True,
@@ -1654,7 +1652,6 @@ cuda_py_test(
     srcs = ["dense_layer_test.py"],
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
-        "no_rocm",
     ],
     xla_enable_strict_auto_jit = False,
     xla_enabled = True,
@@ -1749,7 +1746,6 @@ cuda_py_test(
     srcs = ["lstm_test.py"],
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
-        "no_rocm",
     ],
     xla_enable_strict_auto_jit = False,
     xla_enabled = True,
@@ -1873,7 +1869,6 @@ tf_xla_py_test(
     tags = [
         "no_oss",  # TODO(b/148108508): Re-enable this test in OSS.
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
-        "no_rocm",
     ],
     deps = [
         ":xla_test",
diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD
index 681e025ba1f..4e941769391 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD
@@ -172,7 +172,7 @@ tf_cc_test(
     srcs = [
         "reduction_vectorization_test.cc",
     ],
-    tags = tf_cuda_tests_tags() + ["no_rocm"],
+    tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla:debug_options_flags",
@@ -410,7 +410,7 @@ tf_cc_test(
 tf_cc_test(
     name = "gpu_unrolling_test",
     srcs = ["gpu_unrolling_test.cc"],
-    tags = tf_cuda_tests_tags(),
+    tags = tf_cuda_tests_tags() + ["no_rocm"],
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla/service:hlo_module_config",
@@ -441,7 +441,7 @@ tf_cc_test(
 tf_cc_test(
     name = "gpu_atomic_test",
     srcs = ["gpu_atomic_test.cc"],
-    tags = tf_cuda_tests_tags(),
+    tags = tf_cuda_tests_tags() + ["no_rocm"],
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla/tests:filecheck",
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD
index 74eef71870e..f0197c720d8 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD
+++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/BUILD
@@ -63,7 +63,6 @@ tf_cc_test(
     srcs = ["conv_emitter_test.cc"],
     tags = [
         "no_oss",  # TODO(b/148143101): Test should pass in OSS.
-        "no_rocm",
     ],
     deps = [
         ":conv_emitter",
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 9923a6494c4..dffdeff7084 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -367,6 +367,7 @@ xla_test(
         "conv_depthwise_test.cc",
     ],
     shard_count = 50,
+    tags = ["no_rocm"],  # ROCm 3.9 regression
     deps = [
         ":conv_depthwise_common",
         ":test_macros_header",
@@ -388,6 +389,7 @@ xla_test(
     timeout = "long",
     srcs = ["conv_depthwise_backprop_filter_test.cc"],
     shard_count = 40,
+    tags = ["no_rocm"],  # ROCm 3.9 regression
     deps = [
         ":test_macros_header",
         "//tensorflow/compiler/xla:execution_options_util",
@@ -412,6 +414,7 @@ xla_test(
         "cpu",
     ],
     shard_count = 50,
+    tags = ["no_rocm"],  # ROCm 3.9 regression
     deps = [
         ":client_library_test_base",
         ":hlo_test_base",
@@ -626,7 +629,6 @@ xla_test(
     name = "conditional_test",
     srcs = ["conditional_test.cc"],
     shard_count = 2,
-    tags = ["no_rocm"],
     deps = [
         ":test_macros_header",
         "//tensorflow/compiler/xla:xla_data_proto_cc",
@@ -665,7 +667,6 @@ xla_test(
     name = "scalar_computations_test",
     srcs = ["scalar_computations_test.cc"],
     shard_count = 32,
-    tags = ["no_rocm"],
     deps = [
         ":test_macros_header",
         "//tensorflow/compiler/xla:literal",
@@ -923,7 +924,7 @@ xla_test(
     srcs = ["dot_operation_test.cc"],
     shard_count = 20,
     tags = [
-        "no_rocm",
+        "no_rocm",  # ROCm 3.9 regression
         "optonly",
     ],
     deps = [
@@ -957,7 +958,7 @@ xla_test(
     backends = ["gpu"],
     shard_count = 20,
     tags = [
-        "no_rocm",
+        "no_rocm",  # ROCm 3.9 regression
         "optonly",
         # TODO(b/151340488): Timed out on 2020-03-12.
         "nozapfhahn",
@@ -1024,7 +1025,7 @@ xla_test(
     },
     shard_count = 20,
     tags = [
-        "no_rocm",
+        "no_rocm",  # ROCm 3.9 regression
         "optonly",
     ],
     deps = [
@@ -1252,6 +1253,7 @@ xla_test(
         "cpu": ["nomsan"],
     },
     shard_count = 30,
+    tags = ["no_rocm"],  # ROCm 3.9 regression
     deps = [
         ":test_macros_header",
         "//tensorflow/compiler/xla:array3d",
@@ -1276,6 +1278,7 @@ xla_test(
     timeout = "long",
     srcs = ["convolution_dimension_numbers_test.cc"],
     shard_count = 20,
+    tags = ["no_rocm"],  # ROCm 3.9 regression
     deps = [
         ":test_macros_header",
         "//tensorflow/compiler/xla:array4d",
@@ -1514,7 +1517,6 @@ xla_test(
     srcs = ["reduce_test.cc"],
     shard_count = 31,
     tags = [
-        "no_rocm",
         "optonly",
     ],
     deps = [
@@ -1594,7 +1596,6 @@ xla_test(
     timeout = "long",
     srcs = ["select_and_scatter_test.cc"],
     tags = [
-        "no_rocm",
         "nozapfhahn",
         "optonly",
     ],
@@ -2321,6 +2322,7 @@ xla_test(
     name = "multioutput_fusion_test",
     srcs = ["multioutput_fusion_test.cc"],
     backends = ["gpu"],
+    tags = ["no_rocm"],  # ROCm 3.9 regression
     deps = [
         ":test_macros_header",
         "//tensorflow/compiler/xla:literal",
diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD
index 248b30c4c24..5d686219484 100644
--- a/tensorflow/core/common_runtime/BUILD
+++ b/tensorflow/core/common_runtime/BUILD
@@ -2127,7 +2127,6 @@ tf_cuda_cc_test(
     size = "small",
     srcs = ["process_function_library_runtime_test.cc"],
     linkstatic = tf_kernel_tests_linkstatic(),
-    tags = ["no_rocm"],
     deps = [
         ":core_cpu",
         ":core_cpu_internal",
diff --git a/tensorflow/core/nccl/BUILD b/tensorflow/core/nccl/BUILD
index 9b1447f53c1..70fcce9ab77 100644
--- a/tensorflow/core/nccl/BUILD
+++ b/tensorflow/core/nccl/BUILD
@@ -64,8 +64,6 @@ tf_cuda_cc_test(
         "manual",
         "multi_gpu",
         "no_oss",
-        # TODO(b/147451637): Replace 'no_rocm' with 'rocm_multi_gpu'.
-        "no_rocm",
         "notap",
     ],
     deps = [
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index d141220888e..ab19be973fe 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2018,6 +2018,7 @@ cuda_py_test(
     python_version = "PY3",
     shard_count = 10,
     tags = [
+        "no_rocm",
         "noasan",
         "optonly",
     ],
@@ -2063,6 +2064,7 @@ tf_py_test(
     srcs = ["framework/importer_test.py"],
     main = "framework/importer_test.py",
     python_version = "PY3",
+    tags = ["no_rocm"],
     deps = [
         ":array_ops",
         ":client_testlib",
@@ -4883,6 +4885,7 @@ cuda_py_test(
     srcs = ["ops/nn_fused_batchnorm_test.py"],
     python_version = "PY3",
     shard_count = 24,
+    tags = ["no_rocm"],
     deps = [
         ":array_ops",
         ":client_testlib",
diff --git a/tensorflow/python/compiler/tensorrt/BUILD b/tensorflow/python/compiler/tensorrt/BUILD
index 6b3f32cadc4..9237ad1392e 100644
--- a/tensorflow/python/compiler/tensorrt/BUILD
+++ b/tensorflow/python/compiler/tensorrt/BUILD
@@ -97,7 +97,6 @@ cuda_py_test(
     tags = [
         "no_cuda_on_cpu_tap",
         "no_pip",
-        "no_rocm",
         "no_windows",
         "nomac",
     ],
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 56361b3e226..b4a6ce60f8b 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -1119,7 +1119,6 @@ py_test(
     srcs = ["cli/debugger_cli_common_test.py"],
     python_version = "PY3",
     srcs_version = "PY2AND3",
-    tags = ["no_rocm"],
     deps = [
         ":debugger_cli_common",
         "//tensorflow/python:framework_test_lib",
diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD
index 99eeae580dd..168a58f6b01 100644
--- a/tensorflow/python/distribute/BUILD
+++ b/tensorflow/python/distribute/BUILD
@@ -162,7 +162,6 @@ py_test(
     srcs = ["distribute_lib_test.py"],
     python_version = "PY3",
     srcs_version = "PY2AND3",
-    tags = ["no_rocm"],
     deps = [
         ":combinations",
         ":distribute_lib",
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 4e8fed1b4e3..f58a8751a34 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -582,7 +582,6 @@ tf_py_test(
     python_version = "PY3",
     shard_count = 8,
     tags = [
-        "no_rocm",
         "notsan",  # b/67509773
     ],
     deps = [
diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD
index ec450c32ead..4b563db6716 100644
--- a/tensorflow/python/keras/distribute/BUILD
+++ b/tensorflow/python/keras/distribute/BUILD
@@ -808,7 +808,6 @@ distribute_py_test(
     tags = [
         "multi_and_single_gpu",
         "no_cuda_asan",  # times out
-        "no_rocm",
     ],
     xla_tags = [
         "no_cuda_asan",  # times out
@@ -828,7 +827,6 @@ distribute_py_test(
     shard_count = 7,
     tags = [
         "multi_and_single_gpu",
-        "no_rocm",
     ],
     xla_tags = [
         "no_cuda_asan",  # times out
diff --git a/tensorflow/python/keras/engine/BUILD b/tensorflow/python/keras/engine/BUILD
index 49e3fcfb178..185f15a9331 100644
--- a/tensorflow/python/keras/engine/BUILD
+++ b/tensorflow/python/keras/engine/BUILD
@@ -394,7 +394,6 @@ tf_py_test(
     shard_count = 20,
     tags = [
         "manual",
-        "no_rocm",
         "nomac",  # TODO(mihaimaruseac): b/127695564
         "notsan",
     ],
@@ -519,7 +518,6 @@ tf_py_test(
     python_version = "PY3",
     shard_count = 30,
     tags = [
-        "no_rocm",
         "nomac",  # TODO(mihaimaruseac): b/127695564
     ],
     deps = [
@@ -609,6 +607,7 @@ tf_py_test(
     shard_count = 8,
     tags = [
         "no-internal-py3",
+        "no_rocm",
         "nomac",  # TODO(mihaimaruseac): b/127695564
     ],
     deps = [
@@ -663,7 +662,6 @@ tf_py_test(
     python_version = "PY3",
     shard_count = 8,
     tags = [
-        "no_rocm",
         "nomac",  # TODO(mihaimaruseac): b/127695564
     ],
     deps = [
diff --git a/tensorflow/python/keras/layers/BUILD b/tensorflow/python/keras/layers/BUILD
index 54e815dc60a..1caff66c651 100644
--- a/tensorflow/python/keras/layers/BUILD
+++ b/tensorflow/python/keras/layers/BUILD
@@ -750,7 +750,6 @@ tf_py_test(
     python_version = "PY3",
     shard_count = 4,
     tags = [
-        "no_rocm",
         "notsan",  # http://b/62136390
     ],
     deps = [
@@ -769,7 +768,6 @@ tf_py_test(
     python_version = "PY3",
     shard_count = 4,
     tags = [
-        "no_rocm",
         "noasan",  # times out b/63678675
         "notsan",  # http://b/62189182
     ],
diff --git a/tensorflow/python/keras/tests/BUILD b/tensorflow/python/keras/tests/BUILD
index 53ac326d1f2..7718884cc91 100644
--- a/tensorflow/python/keras/tests/BUILD
+++ b/tensorflow/python/keras/tests/BUILD
@@ -147,7 +147,6 @@ tf_py_test(
     python_version = "PY3",
     shard_count = 16,
     tags = [
-        "no_rocm",
         "notsan",
     ],
     deps = [
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 4f7c6ccefb2..dc00408b9d8 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -241,7 +241,6 @@ cuda_py_test(
     srcs = ["cholesky_op_test.py"],
     shard_count = 5,
     tags = [
-        "no_rocm",  # TODO(rocm): feature not supported on ROCm platform
         "nomsan",  # TODO(b/131773093): Re-enable.
     ],
     deps = [
@@ -1720,6 +1719,7 @@ cuda_py_test(
     name = "betainc_op_test",
     size = "small",
     srcs = ["betainc_op_test.py"],
+    tags = ["no_rocm"],  # ROCm 3.9 regression
     xla_tags = [
         "no_cuda_asan",  # times out
     ],
@@ -3851,7 +3851,6 @@ cuda_py_test(
     size = "medium",
     srcs = ["tridiagonal_matmul_op_test.py"],
     shard_count = 10,
-    tags = ["no_rocm"],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD
index d4f29d4837f..7bdcdef4858 100644
--- a/tensorflow/python/kernel_tests/distributions/BUILD
+++ b/tensorflow/python/kernel_tests/distributions/BUILD
@@ -61,6 +61,7 @@ cuda_py_test(
     size = "small",
     srcs = ["beta_test.py"],
     tags = [
+        "no_rocm",  # ROCm 3.9 regression
         "notsan",  # b/173653918
     ],
     xla_tags = [
diff --git a/tensorflow/python/kernel_tests/linalg/BUILD b/tensorflow/python/kernel_tests/linalg/BUILD
index 097183d1025..0da8a0e6880 100644
--- a/tensorflow/python/kernel_tests/linalg/BUILD
+++ b/tensorflow/python/kernel_tests/linalg/BUILD
@@ -149,7 +149,6 @@ cuda_py_test(
     srcs = ["linear_operator_circulant_test.py"],
     shard_count = 10,
     tags = [
-        "no_rocm",  # calls BLAS ops for complex types
         "noasan",  # times out, b/63678675
         "optonly",  # times out, b/79171797
     ],
diff --git a/tensorflow/python/kernel_tests/linalg/sparse/BUILD b/tensorflow/python/kernel_tests/linalg/sparse/BUILD
index 96ebc38ce5a..0352ae764b4 100644
--- a/tensorflow/python/kernel_tests/linalg/sparse/BUILD
+++ b/tensorflow/python/kernel_tests/linalg/sparse/BUILD
@@ -40,7 +40,10 @@ cuda_py_test(
     srcs = ["csr_sparse_matrix_ops_test.py"],
     main = "csr_sparse_matrix_ops_test.py",
     shard_count = 10,
-    tags = ["notsan"],  # b/149115441
+    tags = [
+        "no_rocm",  # ROCm 3.8 regression
+        "notsan",  # b/149115441
+    ],
     deps = [
         "//tensorflow/python/ops/linalg/sparse",
         "//tensorflow/python/ops/linalg/sparse:gen_sparse_csr_matrix_ops",
diff --git a/tensorflow/python/kernel_tests/signal/BUILD b/tensorflow/python/kernel_tests/signal/BUILD
index bd893184570..d2d6296aef4 100644
--- a/tensorflow/python/kernel_tests/signal/BUILD
+++ b/tensorflow/python/kernel_tests/signal/BUILD
@@ -125,6 +125,7 @@ cuda_py_tests(
     srcs = ["spectral_ops_test.py"],
     python_version = "PY3",
     tags = [
+        "no_rocm",
         "nomac",
     ],
     deps = [
diff --git a/tensorflow/python/ops/ragged/BUILD b/tensorflow/python/ops/ragged/BUILD
index 2934491e69a..d726c3b832d 100644
--- a/tensorflow/python/ops/ragged/BUILD
+++ b/tensorflow/python/ops/ragged/BUILD
@@ -1104,7 +1104,6 @@ py_test(
     srcs = ["ragged_map_fn_op_test.py"],
     python_version = "PY3",
     srcs_version = "PY2AND3",
-    tags = ["no_rocm"],
     deps = [
         ":ragged",  # fixdeps: keep
         ":ragged_factory_ops",
diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD
index 772ec7b817f..0de43370259 100644
--- a/tensorflow/python/saved_model/BUILD
+++ b/tensorflow/python/saved_model/BUILD
@@ -367,7 +367,6 @@ py_strict_library(
 tf_py_test(
     name = "save_test",
     srcs = ["save_test.py"],
-    tags = ["no_rocm"],
     deps = [
         ":loader",
         ":save",
diff --git a/tensorflow/python/tools/BUILD b/tensorflow/python/tools/BUILD
index dc126a168e4..ba8d623c4b7 100644
--- a/tensorflow/python/tools/BUILD
+++ b/tensorflow/python/tools/BUILD
@@ -444,7 +444,6 @@ saved_model_compile_aot(
         "//tensorflow/cc/saved_model:saved_model_half_plus_two",
     ],
     force_without_xla_support_flag = False,
-    tags = ["no_rocm"],
 )
 
 saved_model_compile_aot(
@@ -455,7 +454,6 @@ saved_model_compile_aot(
         "//tensorflow/cc/saved_model:saved_model_half_plus_two",
     ],
     force_without_xla_support_flag = False,
-    tags = ["no_rocm"],
 )
 
 saved_model_compile_aot(
@@ -466,7 +464,6 @@ saved_model_compile_aot(
         "//tensorflow/cc/saved_model:saved_model_half_plus_two",
     ],
     force_without_xla_support_flag = False,
-    tags = ["no_rocm"],
     variables_to_feed = "variable_x",
 )
 
@@ -503,7 +500,6 @@ tf_cc_test(
     srcs = if_xla_available([
         "aot_compiled_test.cc",
     ]),
-    tags = ["no_rocm"],
     deps = [
         "//tensorflow/core:test_main",
     ] + if_xla_available([
diff --git a/tensorflow/python/tpu/BUILD b/tensorflow/python/tpu/BUILD
index 0d5f30d3a0e..44838e6b05c 100644
--- a/tensorflow/python/tpu/BUILD
+++ b/tensorflow/python/tpu/BUILD
@@ -34,7 +34,6 @@ py_test(
         "no_oss_py2",
         "no_oss_py35",
         "no_pip",
-        "no_rocm",
     ],
     deps = [
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/tools/compatibility/BUILD b/tensorflow/tools/compatibility/BUILD
index 1aa76fb5a9b..238b6909324 100644
--- a/tensorflow/tools/compatibility/BUILD
+++ b/tensorflow/tools/compatibility/BUILD
@@ -270,7 +270,6 @@ py_test(
     srcs = ["test_file_v2_0.py"],
     python_version = "PY3",
     srcs_version = "PY2AND3",
-    tags = ["no_rocm"],
     deps = [
         "//tensorflow:tensorflow_py",
     ],