diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 86c9d1fc665..161a0a95856 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1010,7 +1010,7 @@ cc_library(
         "//tensorflow/core/kernels:histogram_op",
         "//tensorflow/core/kernels:image",
         "//tensorflow/core/kernels:io",
-        "//tensorflow/core/kernels:linalg",
+        "//tensorflow/core/kernels/linalg:linalg",
         "//tensorflow/core/kernels:lookup",
         "//tensorflow/core/kernels:logging",
         "//tensorflow/core/kernels:manip",
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 34a3ee800d8..12d4f1c5574 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1039,9 +1039,6 @@ cc_library(
         ":immutable_constant_op",
         ":inplace_ops",
         ":listdiff_op",
-        ":matrix_band_part_op",
-        ":matrix_diag_op",
-        ":matrix_set_diag_op",
         ":mirror_pad_op",
         ":one_hot_op",
         ":pack_op",
@@ -1174,26 +1171,6 @@ tf_kernel_library(
     deps = ARRAY_DEPS,
 )
 
-tf_kernel_library(
-    name = "matrix_band_part_op",
-    prefix = "matrix_band_part_op",
-    deps = if_cuda([
-        ":cuda_solvers",
-    ]) + ARRAY_DEPS,
-)
-
-tf_kernel_library(
-    name = "matrix_diag_op",
-    prefix = "matrix_diag_op",
-    deps = ARRAY_DEPS,
-)
-
-tf_kernel_library(
-    name = "matrix_set_diag_op",
-    prefix = "matrix_set_diag_op",
-    deps = ARRAY_DEPS + [":matrix_diag_op"],
-)
-
 tf_kernel_library(
     name = "mirror_pad_op",
     prefix = "mirror_pad_op",
@@ -1405,7 +1382,7 @@ tf_kernel_library(
         "where_op_gpu_impl_8.cu.cc",
     ],
     deps = if_cuda_or_rocm([
-               ":cuda_solvers",
+               "//tensorflow/core/util:cuda_solvers",
            ]) + [":gpu_prim_hdrs"] +
            ARRAY_DEPS,
 )
@@ -2785,21 +2762,6 @@ tf_cuda_cc_tests(
     ],
 )
 
-tf_kernel_library(
-    name = "eye_functor",
-    hdrs = ["eye_functor.h"],
-    gpu_srcs = [
-        "eye_functor_gpu.cu.cc",
-        "eye_functor.h",
-    ],
-    visibility = [":friends"],
-    deps = [
-        "//tensorflow/core:framework",
-        "//third_party/eigen3",
-    ],
-    alwayslink = 0,
-)
-
 cc_library(
     name = "fifo_queue",
     srcs = ["fifo_queue.cc"],
@@ -3558,289 +3520,6 @@ tf_cc_tests(
     ],
 )
 
-cc_library(
-    name = "linalg",
-    deps = [
-        ":banded_triangular_solve_op",
-        ":cholesky_grad",
-        ":cholesky_op",
-        ":determinant_op",
-        ":eig_op",
-        ":einsum_op",
-        ":lu_op",
-        ":matrix_exponential_op",
-        ":matrix_inverse_op",
-        ":matrix_logarithm_op",
-        ":matrix_solve_ls_op",
-        ":matrix_solve_op",
-        ":matrix_square_root_op",
-        ":matrix_triangular_solve_op",
-        ":qr_op",
-        ":self_adjoint_eig_op",
-        ":self_adjoint_eig_v2_op",
-        ":svd_op",
-        ":tridiagonal_matmul_op",
-        ":tridiagonal_solve_op",
-    ],
-)
-
-tf_kernel_library(
-    name = "cuda_solvers",
-    srcs = ["cuda_solvers.cc"],
-    hdrs = ["cuda_solvers.h"],
-    # @local_config_cuda//cuda:cusolver_static, //third_party/eigen3:blas,
-    # and //third_party/libf2c all contain various parts of BLAS, LAPACK,
-    # and f2c helper functions in global namespace. Tell the compiler to
-    # allow multiple definitions when linking this.
-    linkopts = select({
-        "//tensorflow:macos": [],
-        "//tensorflow:windows": [],
-        "//conditions:default": ["-Wl,-z,muldefs"],
-    }),
-    visibility = [":friends"],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core/platform/default/build_config:cublas_plugin",
-        "//tensorflow/stream_executor/cuda:cublas_lib",
-        "//tensorflow/stream_executor/cuda:cusolver_lib",
-    ],
-)
-
-tf_kernel_library(
-    name = "rocm_solvers",
-    srcs = ["rocm_solvers.cc"],
-    hdrs = ["rocm_solvers.h"],
-    visibility = [":friends"],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:framework_internal",
-        "//tensorflow/core:lib",
-        "//tensorflow/stream_executor/lib",
-        "//tensorflow/stream_executor/platform:dso_loader",
-        "//tensorflow/stream_executor/rocm:rocblas_plugin",
-        "//tensorflow/stream_executor/rocm:rocm_gpu_executor",
-    ] + if_rocm([
-        "@local_config_rocm//rocm:rocprim",
-    ]),
-)
-
-tf_kernel_library(
-    name = "cuda_sparse",
-    srcs = if_cuda(["cuda_sparse.cc"]) + if_rocm(["rocm_sparse.cc"]),
-    hdrs = ["cuda_sparse.h"],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core/kernels:cuda_solvers",
-    ] + if_cuda([
-        "//tensorflow/stream_executor/cuda:cusparse_lib",
-        "@cub_archive//:cub",
-    ]) + if_rocm([
-        "@local_config_rocm//rocm:hipsparse",
-    ]),
-)
-
-LINALG_DEPS = [
-    ":linalg_ops_common",
-    "//third_party/eigen3",
-    "//tensorflow/core:framework",
-    "//tensorflow/core:lib",
-] + if_cuda([
-    ":cuda_solvers",
-    ":transpose_functor",
-]) + if_rocm([
-    ":rocm_solvers",
-])
-
-tf_kernel_library(
-    name = "cholesky_op",
-    prefix = "cholesky_op",
-    deps = if_cuda([
-        ":matrix_band_part_op",
-    ]) + LINALG_DEPS,
-)
-
-tf_kernel_library(
-    name = "cholesky_grad",
-    prefix = "cholesky_grad",
-    deps = LINALG_DEPS,
-)
-
-tf_kernel_library(
-    name = "determinant_op",
-    prefix = "determinant_op",
-    deps = if_cuda([
-        ":fill_functor",
-    ]) + LINALG_DEPS,
-)
-
-tf_kernel_library(
-    name = "matrix_exponential_op",
-    prefix = "matrix_exponential_op",
-    deps = LINALG_DEPS,
-)
-
-tf_kernel_library(
-    name = "matrix_logarithm_op",
-    prefix = "matrix_logarithm_op",
-    deps = LINALG_DEPS,
-)
-
-tf_kernel_library(
-    name = "self_adjoint_eig_op",
-    prefix = "self_adjoint_eig_op",
-    deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"],
-)
-
-tf_kernel_library(
-    name = "self_adjoint_eig_v2_op",
-    prefix = "self_adjoint_eig_v2_op",
-    deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([
-        ":cast_op",
-        ":cwise_op",
-    ]),
-)
-
-tf_kernel_library(
-    name = "eig_op",
-    prefix = "eig_op",
-    deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([
-        ":cast_op",
-        ":cwise_op",
-    ]),
-)
-
-tf_kernel_library(
-    name = "matrix_inverse_op",
-    prefix = "matrix_inverse_op",
-    deps = LINALG_DEPS + if_cuda([":eye_functor"]),
-)
-
-tf_kernel_library(
-    name = "matrix_solve_ls_op",
-    prefix = "matrix_solve_ls_op",
-    deps = LINALG_DEPS,
-)
-
-tf_kernel_library(
-    name = "matrix_solve_op",
-    prefix = "matrix_solve_op",
-    deps = LINALG_DEPS,
-)
-
-tf_kernel_library(
-    name = "matrix_square_root_op",
-    prefix = "matrix_square_root_op",
-    deps = LINALG_DEPS,
-)
-
-tf_kernel_library(
-    name = "banded_triangular_solve_op",
-    prefix = "banded_triangular_solve_op",
-    deps = LINALG_DEPS + [":fill_functor"],
-)
-
-tf_kernel_library(
-    name = "matrix_triangular_solve_op",
-    hdrs = ["matrix_triangular_solve_op_impl.h"],
-    prefix = "matrix_triangular_solve_op",
-    deps = [
-        ":linalg_ops_common",
-        "//third_party/eigen3",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        ":fill_functor",
-        "//tensorflow/core:stream_executor",
-    ] + if_cuda([
-        "//tensorflow/core/platform/default/build_config:cublas_plugin",
-        ":cuda_solvers",
-    ]) + if_rocm([
-        "@local_config_rocm//rocm:rocprim",
-        ":rocm_solvers",
-    ]) + if_cuda_or_rocm([
-        ":transpose_functor",
-    ]),
-)
-
-tf_kernel_library(
-    name = "tridiagonal_matmul_op",
-    srcs = ["tridiagonal_matmul_op.cc"],
-    gpu_srcs = ["tridiagonal_matmul_op_gpu.cu.cc"],
-    deps = LINALG_DEPS + if_cuda([
-        ":cuda_sparse",
-    ]),
-)
-
-tf_kernel_library(
-    name = "tridiagonal_solve_op",
-    srcs = ["tridiagonal_solve_op.cc"],
-    gpu_srcs = ["tridiagonal_solve_op_gpu.cu.cc"],
-    deps = LINALG_DEPS + if_cuda([
-        ":cuda_sparse",
-    ]),
-)
-
-tf_kernel_library(
-    name = "qr_op",
-    prefix = "qr_op",
-    deps = LINALG_DEPS + if_cuda([
-        ":cwise_op",
-        ":eye_functor",
-        ":matrix_band_part_op",
-    ]),
-)
-
-tf_kernel_library(
-    name = "svd_op",
-    prefix = "svd_op",
-    deps = LINALG_DEPS + if_cuda([
-        ":eye_functor",
-    ]),
-)
-
-tf_kernel_library(
-    name = "lu_op",
-    prefix = "lu_op",
-    deps = if_cuda([
-        ":cuda_solvers",
-        ":transpose_functor",
-    ]) + [
-        "//third_party/eigen3",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-    ],
-)
-
-tf_kernel_library(
-    name = "einsum_op",
-    prefix = "einsum_op",
-    deps = [
-        ":batch_matmul_op",
-        ":fill_functor",
-        ":reduction_ops",
-        ":transpose_functor",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core/profiler/lib:traceme",
-        "//third_party/eigen3",
-        "@com_google_absl//absl/container:flat_hash_map",
-        "@com_google_absl//absl/strings",
-    ],
-)
-
-cc_library(
-    name = "linalg_ops_common",
-    srcs = ["linalg_ops_common.cc"],
-    hdrs = ["linalg_ops_common.h"],
-    visibility = ["//visibility:private"],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//third_party/eigen3",
-    ],
-)
-
 cc_library(
     name = "logging",
     deps = [
@@ -4208,7 +3887,7 @@ tf_kernel_library(
     name = "segment_reduction_ops",
     prefix = "segment_reduction_ops",
     deps = MATH_DEPS + if_cuda_or_rocm([
-        ":cuda_solvers",
+        "//tensorflow/core/util:cuda_solvers",
     ]),
 )
 
@@ -4405,45 +4084,6 @@ tf_cuda_cc_test(
     ],
 )
 
-tf_cuda_cc_test(
-    name = "banded_triangular_solve_op_test",
-    size = "small",
-    srcs = ["banded_triangular_solve_op_test.cc"],
-    deps = [
-        ":banded_triangular_solve_op",
-        ":matrix_set_diag_op",
-        ":matrix_triangular_solve_op",
-        ":ops_testutil",
-        ":ops_util",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-    ],
-)
-
-tf_cuda_cc_test(
-    name = "matrix_triangular_solve_op_test",
-    size = "small",
-    srcs = ["matrix_triangular_solve_op_test.cc"],
-    deps = [
-        ":broadcast_to_op",
-        ":matrix_triangular_solve_op",
-        ":ops_testutil",
-        ":ops_util",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-    ],
-)
-
 tf_cuda_cc_test(
     name = "scan_ops_test",
     size = "small",
@@ -6672,10 +6312,7 @@ filegroup(
         "lookup_table_init_op.h",
         "lookup_table_op.h",
         "lookup_util.h",
-        "linalg_ops_common.h",
         "list_kernels.h",
-        "matrix_diag_op.h",
-        "matrix_set_diag_op.h",
         "maxpooling_op.h",
         "mfcc.h",
         "mfcc_dct.h",
@@ -6723,6 +6360,9 @@ filegroup(
         "xent_op.h",
     ] + [
         "//tensorflow/core/kernels/boosted_trees/quantiles:weighted_quantiles_hdrs",
+        "//tensorflow/core/kernels/linalg:linalg_ops_common.h",
+        "//tensorflow/core/kernels/linalg:matrix_diag_op.h",
+        "//tensorflow/core/kernels/linalg:matrix_set_diag_op.h",
     ],
 )
 
@@ -6823,16 +6463,6 @@ filegroup(
         "encode_wav_op.cc",
         "eigen_contraction_kernel.cc",
         "eigen_contraction_kernel.h",
-        "einsum_op_impl_half.cc",
-        "einsum_op_impl_bfloat16.cc",
-        "einsum_op_impl_int32.cc",
-        "einsum_op_impl_int64.cc",
-        "einsum_op_impl_float.cc",
-        "einsum_op_impl_double.cc",
-        "einsum_op_impl_complex64.cc",
-        "einsum_op_impl_complex128.cc",
-        "einsum_op_impl.h",
-        "einsum_op.h",
         "fake_quant_ops.cc",
         "fifo_queue.cc",
         "fifo_queue_op.cc",
@@ -6844,6 +6474,17 @@ filegroup(
         "population_count_op.h",
         "winograd_transform.h",
         ":android_extended_ops_headers",
+    ] + [
+        "//tensorflow/core/kernels/linalg:einsum_op_impl_half.cc",
+        "//tensorflow/core/kernels/linalg:einsum_op_impl_bfloat16.cc",
+        "//tensorflow/core/kernels/linalg:einsum_op_impl_int32.cc",
+        "//tensorflow/core/kernels/linalg:einsum_op_impl_int64.cc",
+        "//tensorflow/core/kernels/linalg:einsum_op_impl_float.cc",
+        "//tensorflow/core/kernels/linalg:einsum_op_impl_double.cc",
+        "//tensorflow/core/kernels/linalg:einsum_op_impl_complex64.cc",
+        "//tensorflow/core/kernels/linalg:einsum_op_impl_complex128.cc",
+        "//tensorflow/core/kernels/linalg:einsum_op_impl.h",
+        "//tensorflow/core/kernels/linalg:einsum_op.h",
     ] + select({
         ":xsmm_convolutions": [
             "xsmm_conv2d.h",
@@ -6874,7 +6515,6 @@ filegroup(
         "in_topk_op.cc",
         "in_topk_op.h",
         "initializable_lookup_table.cc",
-        "linalg_ops_common.cc",
         "list_kernels.cc",
         "logging_ops.cc",
         "logging_ops.h",
@@ -6882,9 +6522,6 @@ filegroup(
         "lookup_table_op.cc",
         "lookup_util.cc",
         "lrn_op.cc",
-        "matrix_diag_op.cc",
-        "matrix_inverse_op.cc",
-        "matrix_set_diag_op.cc",
         "maxpooling_op.cc",
         "mfcc.cc",
         "mfcc_dct.cc",
@@ -7006,6 +6643,10 @@ filegroup(
         ":android_extended_ops_headers",
     ] + [
         "//tensorflow/core/kernels/boosted_trees:quantile_ops.cc",
+        "//tensorflow/core/kernels/linalg:linalg_ops_common.cc",
+        "//tensorflow/core/kernels/linalg:matrix_diag_op.cc",
+        "//tensorflow/core/kernels/linalg:matrix_inverse_op.cc",
+        "//tensorflow/core/kernels/linalg:matrix_set_diag_op.cc",
     ],
 )
 
@@ -7059,6 +6700,7 @@ filegroup(
     srcs = [
         "//tensorflow/c/kernels:android_all_op_kernels",
         "//tensorflow/core/kernels/data:android_all_op_kernels",
+        "//tensorflow/core/kernels/linalg:android_all_op_kernels",
     ] + glob(
         [
             "*.cc",
@@ -8827,3 +8469,15 @@ tf_kernel_library(
         "@sobol_data",
     ],
 )
+
+# ---- temporary forwarding declaration for libraries in linalg
+# TODO(b/160344057): Remove after updating dependencies.
+tf_kernel_library(
+    name = "matrix_inverse_op",
+    deps = ["//tensorflow/core/kernels/linalg:matrix_inverse_op"],
+)
+
+tf_kernel_library(
+    name = "einsum_op",
+    deps = ["//tensorflow/core/kernels/linalg:einsum_op"],
+)
diff --git a/tensorflow/core/kernels/linalg/BUILD b/tensorflow/core/kernels/linalg/BUILD
new file mode 100644
index 00000000000..c735f58ae51
--- /dev/null
+++ b/tensorflow/core/kernels/linalg/BUILD
@@ -0,0 +1,376 @@
+load(
+    "//tensorflow:tensorflow.bzl",
+    "if_cuda_or_rocm",
+    "tf_kernel_library",
+)
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load(
+    "@local_config_rocm//rocm:build_defs.bzl",
+    "if_rocm",
+)
+load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test")
+
+# Description:
+# Op kernel implementations for TensorFlow.
+#
+# Note: Any test that uses GPU support and which we would like to
+# benchmark should be linked statically so that it can be executed
+# from a py_binary or cuda_py_test test logger.  For such a test,
+# append "_gpu" to the test name to invoke the GPU benchmarks.  Example:
+#
+#   # for CPU tests
+#   $ bazel test --config opt //third_party/tensorflow/core/kernels:my_op_test
+#   # for GPU benchmarks
+#   $ bazel run --config opt --config=cuda //third_party/tensorflow/core/kernels:my_op_test_gpu -- --benchmarks=..
+#
+package(
+    default_visibility = [
+        "//tensorflow:__subpackages__",
+        "//tensorflow:internal",
+    ],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+# TODO(rmlarsen): Remove ASAP.
+package_group(
+    name = "friends",
+    packages = ["//tensorflow/..."],
+)
+
+# Export a few files for use on Android.
+exports_files([
+    "einsum_op_impl_half.cc",
+    "einsum_op_impl_bfloat16.cc",
+    "einsum_op_impl_int32.cc",
+    "einsum_op_impl_int64.cc",
+    "einsum_op_impl_float.cc",
+    "einsum_op_impl_double.cc",
+    "einsum_op_impl_complex64.cc",
+    "einsum_op_impl_complex128.cc",
+    "einsum_op_impl.h",
+    "einsum_op.h",
+    "linalg_ops_common.h",
+    "linalg_ops_common.cc",
+    "matrix_diag_op.h",
+    "matrix_diag_op.cc",
+    "matrix_inverse_op.cc",
+    "matrix_set_diag_op.h",
+    "matrix_set_diag_op.cc",
+])
+
+# Public support libraries ----------------------------------------------------
+
+cc_library(
+    name = "linalg",
+    deps = [
+        ":banded_triangular_solve_op",
+        ":cholesky_grad",
+        ":cholesky_op",
+        ":determinant_op",
+        ":eig_op",
+        ":einsum_op",
+        ":lu_op",
+        ":matrix_band_part_op",
+        ":matrix_diag_op",
+        ":matrix_exponential_op",
+        ":matrix_inverse_op",
+        ":matrix_logarithm_op",
+        ":matrix_set_diag_op",
+        ":matrix_solve_ls_op",
+        ":matrix_solve_op",
+        ":matrix_square_root_op",
+        ":matrix_triangular_solve_op",
+        ":qr_op",
+        ":self_adjoint_eig_op",
+        ":self_adjoint_eig_v2_op",
+        ":svd_op",
+        ":tridiagonal_matmul_op",
+        ":tridiagonal_solve_op",
+    ],
+)
+
+LINALG_DEPS = [
+    ":linalg_ops_common",
+    "//third_party/eigen3",
+    "//tensorflow/core:framework",
+    "//tensorflow/core:lib",
+    "//tensorflow/core/kernels:cast_op",
+    "//tensorflow/core/kernels:fill_functor",
+] + if_cuda([
+    ":eye_functor",
+    "//tensorflow/core/util:cuda_solvers",
+    "//tensorflow/core/kernels:transpose_functor",
+]) + if_rocm([
+    "//tensorflow/core/util:rocm_solvers",
+])
+
+tf_kernel_library(
+    name = "matrix_band_part_op",
+    prefix = "matrix_band_part_op",
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "matrix_diag_op",
+    prefix = "matrix_diag_op",
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "matrix_set_diag_op",
+    prefix = "matrix_set_diag_op",
+    deps = LINALG_DEPS + [":matrix_diag_op"],
+)
+
+tf_kernel_library(
+    name = "cholesky_op",
+    prefix = "cholesky_op",
+    deps = if_cuda([
+        ":matrix_band_part_op",
+    ]) + LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "cholesky_grad",
+    prefix = "cholesky_grad",
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "determinant_op",
+    prefix = "determinant_op",
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "matrix_exponential_op",
+    prefix = "matrix_exponential_op",
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "matrix_logarithm_op",
+    prefix = "matrix_logarithm_op",
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "self_adjoint_eig_op",
+    prefix = "self_adjoint_eig_op",
+    deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"],
+)
+
+tf_kernel_library(
+    name = "self_adjoint_eig_v2_op",
+    prefix = "self_adjoint_eig_v2_op",
+    deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([
+        "//tensorflow/core/kernels:cwise_op",
+    ]),
+)
+
+tf_kernel_library(
+    name = "eig_op",
+    prefix = "eig_op",
+    deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([
+        "//tensorflow/core/kernels:cwise_op",
+    ]),
+)
+
+tf_kernel_library(
+    name = "matrix_inverse_op",
+    prefix = "matrix_inverse_op",
+    visibility = [":friends"],
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "matrix_solve_ls_op",
+    prefix = "matrix_solve_ls_op",
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "matrix_solve_op",
+    prefix = "matrix_solve_op",
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "matrix_square_root_op",
+    prefix = "matrix_square_root_op",
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "banded_triangular_solve_op",
+    prefix = "banded_triangular_solve_op",
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "matrix_triangular_solve_op",
+    hdrs = ["matrix_triangular_solve_op_impl.h"],
+    prefix = "matrix_triangular_solve_op",
+    deps = [
+        ":linalg_ops_common",
+        "//third_party/eigen3",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/kernels:fill_functor",
+        "//tensorflow/core:stream_executor",
+    ] + if_cuda([
+        "//tensorflow/core/platform/default/build_config:cublas_plugin",
+        "//tensorflow/core/util:cuda_solvers",
+    ]) + if_rocm([
+        "@local_config_rocm//rocm:rocprim",
+        "//tensorflow/core/util:rocm_solvers",
+    ]) + if_cuda_or_rocm([
+        "//tensorflow/core/kernels:transpose_functor",
+    ]),
+)
+
+tf_kernel_library(
+    name = "tridiagonal_matmul_op",
+    srcs = ["tridiagonal_matmul_op.cc"],
+    gpu_srcs = ["tridiagonal_matmul_op_gpu.cu.cc"],
+    deps = LINALG_DEPS + if_cuda([
+        "//tensorflow/core/util:cuda_sparse",
+    ]),
+)
+
+tf_kernel_library(
+    name = "tridiagonal_solve_op",
+    srcs = ["tridiagonal_solve_op.cc"],
+    gpu_srcs = ["tridiagonal_solve_op_gpu.cu.cc"],
+    deps = LINALG_DEPS + if_cuda([
+        "//tensorflow/core/util:cuda_sparse",
+    ]),
+)
+
+tf_kernel_library(
+    name = "qr_op",
+    prefix = "qr_op",
+    deps = LINALG_DEPS + if_cuda([
+        "//tensorflow/core/kernels:cwise_op",
+        ":matrix_band_part_op",
+    ]),
+)
+
+tf_kernel_library(
+    name = "svd_op",
+    prefix = "svd_op",
+    deps = LINALG_DEPS,
+)
+
+tf_kernel_library(
+    name = "lu_op",
+    prefix = "lu_op",
+    deps = if_cuda([
+        "//tensorflow/core/util:cuda_solvers",
+        "//tensorflow/core/kernels:transpose_functor",
+    ]) + [
+        "//third_party/eigen3",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_kernel_library(
+    name = "einsum_op",
+    prefix = "einsum_op",
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/kernels:batch_matmul_op",
+        "//tensorflow/core/kernels:fill_functor",
+        "//tensorflow/core/kernels:reduction_ops",
+        "//tensorflow/core/kernels:transpose_functor",
+        "//tensorflow/core/profiler/lib:traceme",
+        "//third_party/eigen3",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_library(
+    name = "linalg_ops_common",
+    srcs = ["linalg_ops_common.cc"],
+    hdrs = ["linalg_ops_common.h"],
+    visibility = ["//visibility:private"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//third_party/eigen3",
+    ],
+)
+
+tf_cuda_cc_test(
+    name = "banded_triangular_solve_op_test",
+    size = "small",
+    srcs = ["banded_triangular_solve_op_test.cc"],
+    deps = [
+        ":banded_triangular_solve_op",
+        ":matrix_set_diag_op",
+        ":matrix_triangular_solve_op",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_kernel_library(
+    name = "eye_functor",
+    hdrs = ["eye_functor.h"],
+    gpu_srcs = [
+        "eye_functor_gpu.cu.cc",
+        "eye_functor.h",
+    ],
+    visibility = ["//tensorflow/core/kernels:friends"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//third_party/eigen3",
+    ],
+    alwayslink = 0,
+)
+
+tf_cuda_cc_test(
+    name = "matrix_triangular_solve_op_test",
+    size = "small",
+    srcs = ["matrix_triangular_solve_op_test.cc"],
+    deps = [
+        ":matrix_triangular_solve_op",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:broadcast_to_op",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+# A file group which contains all operators which are known to work on mobile.
+filegroup(
+    name = "android_all_op_kernels",
+    srcs = glob(
+        [
+            "*.cc",
+            "*.h",
+        ],
+        exclude = [
+            "*test.cc",
+            "*test.h",
+            "*_test_*",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/core/kernels/banded_triangular_solve_op.cc b/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc
similarity index 99%
rename from tensorflow/core/kernels/banded_triangular_solve_op.cc
rename to tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc
index d01a015502a..6758dcf5b8b 100644
--- a/tensorflow/core/kernels/banded_triangular_solve_op.cc
+++ b/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/fill_functor.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
diff --git a/tensorflow/core/kernels/banded_triangular_solve_op_test.cc b/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc
similarity index 99%
rename from tensorflow/core/kernels/banded_triangular_solve_op_test.cc
rename to tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc
index 37e904a3e0e..7c20b88845f 100644
--- a/tensorflow/core/kernels/banded_triangular_solve_op_test.cc
+++ b/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc
@@ -21,7 +21,7 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/graph/testlib.h"
-#include "tensorflow/core/kernels/matrix_set_diag_op.h"
+#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
diff --git a/tensorflow/core/kernels/cholesky_grad.cc b/tensorflow/core/kernels/linalg/cholesky_grad.cc
similarity index 99%
rename from tensorflow/core/kernels/cholesky_grad.cc
rename to tensorflow/core/kernels/linalg/cholesky_grad.cc
index eac66e580dd..31a5570cddf 100644
--- a/tensorflow/core/kernels/cholesky_grad.cc
+++ b/tensorflow/core/kernels/linalg/cholesky_grad.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/cholesky_op.cc b/tensorflow/core/kernels/linalg/cholesky_op.cc
similarity index 98%
rename from tensorflow/core/kernels/cholesky_op.cc
rename to tensorflow/core/kernels/linalg/cholesky_op.cc
index ff8fd08f228..eae09124b36 100644
--- a/tensorflow/core/kernels/cholesky_op.cc
+++ b/tensorflow/core/kernels/linalg/cholesky_op.cc
@@ -25,16 +25,16 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
 #if GOOGLE_CUDA
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/matrix_band_part_op.h"
+#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h"
 #include "tensorflow/core/platform/stream_executor.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/determinant_op.cc b/tensorflow/core/kernels/linalg/determinant_op.cc
similarity index 99%
rename from tensorflow/core/kernels/determinant_op.cc
rename to tensorflow/core/kernels/linalg/determinant_op.cc
index b06f42384eb..8f0b0b618cf 100644
--- a/tensorflow/core/kernels/determinant_op.cc
+++ b/tensorflow/core/kernels/linalg/determinant_op.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #if GOOGLE_CUDA
 #define EIGEN_USE_GPU
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/determinant_op.h"
+#include "tensorflow/core/kernels/linalg/determinant_op.h"
 #endif
 
 #include "third_party/eigen3/Eigen/LU"
@@ -28,14 +28,14 @@ limitations under the License.
 #include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
 #if GOOGLE_CUDA
-#include "tensorflow/core/kernels/cuda_solvers.h"
 #include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/determinant_op.h b/tensorflow/core/kernels/linalg/determinant_op.h
similarity index 90%
rename from tensorflow/core/kernels/determinant_op.h
rename to tensorflow/core/kernels/linalg/determinant_op.h
index eefdfe0ae40..6ace1bef44b 100644
--- a/tensorflow/core/kernels/determinant_op.h
+++ b/tensorflow/core/kernels/linalg/determinant_op.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_
-#define TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_
 
 #include "tensorflow/core/framework/tensor_types.h"
 
@@ -44,4 +44,4 @@ struct LogDeterminantFromPivotedLUFunctor {
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_
diff --git a/tensorflow/core/kernels/determinant_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc
similarity index 98%
rename from tensorflow/core/kernels/determinant_op_gpu.cu.cc
rename to tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc
index 9aa64b3a7da..f6ab327bce0 100644
--- a/tensorflow/core/kernels/determinant_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc
@@ -21,8 +21,8 @@ limitations under the License.
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/determinant_op.h"
+#include "tensorflow/core/kernels/linalg/determinant_op.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/eig_op_complex128.cc b/tensorflow/core/kernels/linalg/eig_op_complex128.cc
similarity index 93%
rename from tensorflow/core/kernels/eig_op_complex128.cc
rename to tensorflow/core/kernels/linalg/eig_op_complex128.cc
index 988cc2f98d9..bd4b6fe36d0 100644
--- a/tensorflow/core/kernels/eig_op_complex128.cc
+++ b/tensorflow/core/kernels/linalg/eig_op_complex128.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/eig_op_impl.h"
+#include "tensorflow/core/kernels/linalg/eig_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/eig_op_complex64.cc b/tensorflow/core/kernels/linalg/eig_op_complex64.cc
similarity index 93%
rename from tensorflow/core/kernels/eig_op_complex64.cc
rename to tensorflow/core/kernels/linalg/eig_op_complex64.cc
index 6a3f7928715..b5b4a26ee85 100644
--- a/tensorflow/core/kernels/eig_op_complex64.cc
+++ b/tensorflow/core/kernels/linalg/eig_op_complex64.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/eig_op_impl.h"
+#include "tensorflow/core/kernels/linalg/eig_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/eig_op_double.cc b/tensorflow/core/kernels/linalg/eig_op_double.cc
similarity index 93%
rename from tensorflow/core/kernels/eig_op_double.cc
rename to tensorflow/core/kernels/linalg/eig_op_double.cc
index 2cd931cc135..c360637c84a 100644
--- a/tensorflow/core/kernels/eig_op_double.cc
+++ b/tensorflow/core/kernels/linalg/eig_op_double.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/eig_op_impl.h"
+#include "tensorflow/core/kernels/linalg/eig_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/eig_op_float.cc b/tensorflow/core/kernels/linalg/eig_op_float.cc
similarity index 93%
rename from tensorflow/core/kernels/eig_op_float.cc
rename to tensorflow/core/kernels/linalg/eig_op_float.cc
index a06f76e935f..18f576fcc19 100644
--- a/tensorflow/core/kernels/eig_op_float.cc
+++ b/tensorflow/core/kernels/linalg/eig_op_float.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/eig_op_impl.h"
+#include "tensorflow/core/kernels/linalg/eig_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/eig_op_impl.h b/tensorflow/core/kernels/linalg/eig_op_impl.h
similarity index 93%
rename from tensorflow/core/kernels/eig_op_impl.h
rename to tensorflow/core/kernels/linalg/eig_op_impl.h
index 4ebb6bde08b..a7aff7c2a5d 100644
--- a/tensorflow/core/kernels/eig_op_impl.h
+++ b/tensorflow/core/kernels/linalg/eig_op_impl.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_
-#define TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_
 
 // See docs in ../ops/linalg_ops.cc.
 
@@ -23,7 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/denormal.h"
 #include "tensorflow/core/platform/logging.h"
@@ -95,4 +95,4 @@ class EigOp : public LinearAlgebraOp<InputScalar, OutputScalar> {
 
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_
diff --git a/tensorflow/core/kernels/einsum_op.h b/tensorflow/core/kernels/linalg/einsum_op.h
similarity index 94%
rename from tensorflow/core/kernels/einsum_op.h
rename to tensorflow/core/kernels/linalg/einsum_op.h
index 31d1109004c..f22f33c600a 100644
--- a/tensorflow/core/kernels/einsum_op.h
+++ b/tensorflow/core/kernels/linalg/einsum_op.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CORE_KERNELS_EINSUM_OP_H_
-#define TENSORFLOW_CORE_KERNELS_EINSUM_OP_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_H_
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/tensor_types.h"
diff --git a/tensorflow/core/kernels/einsum_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc
similarity index 96%
rename from tensorflow/core/kernels/einsum_op_gpu.cu.cc
rename to tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc
index 2935b7fd02a..5461e43e0ab 100644
--- a/tensorflow/core/kernels/einsum_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #define EIGEN_USE_GPU
 
 #include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/kernels/einsum_op.h"
+#include "tensorflow/core/kernels/linalg/einsum_op.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/einsum_op_impl.h b/tensorflow/core/kernels/linalg/einsum_op_impl.h
similarity index 99%
rename from tensorflow/core/kernels/einsum_op_impl.h
rename to tensorflow/core/kernels/linalg/einsum_op_impl.h
index 312738442b8..b9b2d1f0eae 100644
--- a/tensorflow/core/kernels/einsum_op_impl.h
+++ b/tensorflow/core/kernels/linalg/einsum_op_impl.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_
-#define TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_
 
 #define EIGEN_USE_THREADS
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
@@ -31,8 +31,8 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/kernels/batch_matmul_op_impl.h"
-#include "tensorflow/core/kernels/einsum_op.h"
 #include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/kernels/linalg/einsum_op.h"
 #include "tensorflow/core/kernels/reduction_ops_common.h"
 #include "tensorflow/core/kernels/transpose_functor.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -780,4 +780,4 @@ DECLARE_GPU_SPECS(complex128);
 
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_
diff --git a/tensorflow/core/kernels/einsum_op_impl_bfloat16.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc
similarity index 94%
rename from tensorflow/core/kernels/einsum_op_impl_bfloat16.cc
rename to tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc
index 44508f86a5e..e2e13052df5 100644
--- a/tensorflow/core/kernels/einsum_op_impl_bfloat16.cc
+++ b/tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/einsum_op_impl.h"
+#include "tensorflow/core/kernels/linalg/einsum_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/einsum_op_impl_complex128.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc
similarity index 95%
rename from tensorflow/core/kernels/einsum_op_impl_complex128.cc
rename to tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc
index 8473cbf545d..ff78d460acf 100644
--- a/tensorflow/core/kernels/einsum_op_impl_complex128.cc
+++ b/tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/einsum_op_impl.h"
+#include "tensorflow/core/kernels/linalg/einsum_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/einsum_op_impl_complex64.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc
similarity index 95%
rename from tensorflow/core/kernels/einsum_op_impl_complex64.cc
rename to tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc
index bd506a04f5f..cd3788846b2 100644
--- a/tensorflow/core/kernels/einsum_op_impl_complex64.cc
+++ b/tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/einsum_op_impl.h"
+#include "tensorflow/core/kernels/linalg/einsum_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/einsum_op_impl_double.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_double.cc
similarity index 95%
rename from tensorflow/core/kernels/einsum_op_impl_double.cc
rename to tensorflow/core/kernels/linalg/einsum_op_impl_double.cc
index f994590779b..e0c093fa4a9 100644
--- a/tensorflow/core/kernels/einsum_op_impl_double.cc
+++ b/tensorflow/core/kernels/linalg/einsum_op_impl_double.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/einsum_op_impl.h"
+#include "tensorflow/core/kernels/linalg/einsum_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/einsum_op_impl_float.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_float.cc
similarity index 95%
rename from tensorflow/core/kernels/einsum_op_impl_float.cc
rename to tensorflow/core/kernels/linalg/einsum_op_impl_float.cc
index 1875310b687..ad9135c991c 100644
--- a/tensorflow/core/kernels/einsum_op_impl_float.cc
+++ b/tensorflow/core/kernels/linalg/einsum_op_impl_float.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/einsum_op_impl.h"
+#include "tensorflow/core/kernels/linalg/einsum_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/einsum_op_impl_half.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_half.cc
similarity index 95%
rename from tensorflow/core/kernels/einsum_op_impl_half.cc
rename to tensorflow/core/kernels/linalg/einsum_op_impl_half.cc
index 0486b133e62..72a9f6bec4f 100644
--- a/tensorflow/core/kernels/einsum_op_impl_half.cc
+++ b/tensorflow/core/kernels/linalg/einsum_op_impl_half.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/einsum_op_impl.h"
+#include "tensorflow/core/kernels/linalg/einsum_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/einsum_op_impl_int32.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc
similarity index 94%
rename from tensorflow/core/kernels/einsum_op_impl_int32.cc
rename to tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc
index db5169498d9..7569c979c59 100644
--- a/tensorflow/core/kernels/einsum_op_impl_int32.cc
+++ b/tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/einsum_op_impl.h"
+#include "tensorflow/core/kernels/linalg/einsum_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/einsum_op_impl_int64.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc
similarity index 94%
rename from tensorflow/core/kernels/einsum_op_impl_int64.cc
rename to tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc
index 7f1a1eac411..6ee0ebc9637 100644
--- a/tensorflow/core/kernels/einsum_op_impl_int64.cc
+++ b/tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/einsum_op_impl.h"
+#include "tensorflow/core/kernels/linalg/einsum_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/eye_functor.h b/tensorflow/core/kernels/linalg/eye_functor.h
similarity index 90%
rename from tensorflow/core/kernels/eye_functor.h
rename to tensorflow/core/kernels/linalg/eye_functor.h
index 3799cfba9ae..c77372f089a 100644
--- a/tensorflow/core/kernels/eye_functor.h
+++ b/tensorflow/core/kernels/linalg/eye_functor.h
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_CORE_KERNELS_EYE_FUNCTOR_H_
-#define TENSORFLOW_CORE_KERNELS_EYE_FUNCTOR_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EYE_FUNCTOR_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_EYE_FUNCTOR_H_
 
 #include "tensorflow/core/framework/tensor_types.h"
 
diff --git a/tensorflow/core/kernels/eye_functor_gpu.cu.cc b/tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc
similarity index 97%
rename from tensorflow/core/kernels/eye_functor_gpu.cu.cc
rename to tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc
index 90df538dd2c..85865588f2c 100644
--- a/tensorflow/core/kernels/eye_functor_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/type_traits.h"
-#include "tensorflow/core/kernels/eye_functor.h"
+#include "tensorflow/core/kernels/linalg/eye_functor.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/linalg_ops_common.cc b/tensorflow/core/kernels/linalg/linalg_ops_common.cc
similarity index 99%
rename from tensorflow/core/kernels/linalg_ops_common.cc
rename to tensorflow/core/kernels/linalg/linalg_ops_common.cc
index 56a941fbd1f..c8d33e435c7 100644
--- a/tensorflow/core/kernels/linalg_ops_common.cc
+++ b/tensorflow/core/kernels/linalg/linalg_ops_common.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 
 #include <utility>
 
diff --git a/tensorflow/core/kernels/linalg/linalg_ops_common.h b/tensorflow/core/kernels/linalg/linalg_ops_common.h
new file mode 100644
index 00000000000..3ab37480c90
--- /dev/null
+++ b/tensorflow/core/kernels/linalg/linalg_ops_common.h
@@ -0,0 +1,221 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_
+
+// Classes to support linear algebra functionality, similar to the numpy.linalg
+// module. Supports batch computation on several matrices at once, sharding the
+// computations across different threads if necessary.
+#include <algorithm>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "tensorflow/core/framework/kernel_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/work_sharder.h"
+
+namespace tensorflow {
+
+// Base class for linear algebra operators.
+template <class InputScalar, class OutputScalar = InputScalar>
+class LinearAlgebraOp : public OpKernel {
+ public:
+  explicit LinearAlgebraOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override;
+
+ protected:
+  using TensorShapes = gtl::InlinedVector<TensorShape, 4>;
+  // Returns the number of leading inputs that are to be treated as matrix
+  // inputs. By default this is all the inputs. Derived classes can override
+  // this to tell the base class to ignore one or more trailing inputs.
+  virtual int NumMatrixInputs(const OpKernelContext* context) const {
+    return context->num_inputs();
+  }
+
+  // Returns true if the number of inputs and their shapes are as expected.
+  // Many ops take a single square input matrix, so we provide that as a default
+  // implementation for convenience.
+  virtual void ValidateInputMatrixShapes(
+      OpKernelContext* context, const TensorShapes& input_matrix_shapes) const {
+    ValidateSingleSquareMatrix(context, input_matrix_shapes);
+  }
+
+  // Convenience validators for common cases:
+  //
+  // Validate op taking a single matrix A.
+  static void ValidateSingleMatrix(OpKernelContext* context,
+                                   const TensorShapes& input_matrix_shapes);
+  // Validate op taking a single square matrix A.
+  static void ValidateSingleSquareMatrix(
+      OpKernelContext* context, const TensorShapes& input_matrix_shapes);
+  // Validate op taking two matrices A and B that have the same number of rows.
+  static void ValidateSolver(OpKernelContext* context,
+                             const TensorShapes& input_matrix_shapes);
+  // Validate op taking two matrices A and B that have the same number of rows
+  // and A is square.
+  static void ValidateSquareSolver(OpKernelContext* context,
+                                   const TensorShapes& input_matrix_shapes);
+
+  // Returns the output shapes of each individual matrix operation. Output
+  // matrices shapes must be rank 0, 1, or 2. Scalar outputs are rank 0.
+  //
+  // The derived class may return a number of shapes (N) less than
+  // context->num_outputs() (M) to indicate that a only leading subset of
+  // the outputs will be populated. In this case, a dummy scalar tensor with
+  // value zero will be return for the last M-N outputs.
+  //
+  // For many ops, the output dimensions are the same as the input dimensions,
+  // so we provide that as a default implementation for convenience.
+  virtual TensorShapes GetOutputMatrixShapes(
+      const TensorShapes& input_matrix_shapes) const {
+    return input_matrix_shapes;
+  }
+
+  // Returns the cost per matrix operation. This is used to determine the
+  // number of threads to use for parallelizing calls to ComputeMatrix in
+  // batch mode. Cost per unit is assumed to be roughly 1ns, based on comments
+  // in core/util/work_sharder.cc. Many linear algebra ops take roughly max(m,n)
+  // * min(m,n)^2, where the first input matrix is m-by-n. We provide that as a
+  // default implementation for convenience.
+  virtual int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const {
+    double m = static_cast<double>(input_matrix_shapes[0].dim_size(0));
+    double n = static_cast<double>(input_matrix_shapes[0].dim_size(1));
+    double cost = std::max(m, n) * std::min(m, n) * std::min(m, n);
+    return cost >= static_cast<double>(kint64max) ? kint64max
+                                                  : static_cast<int64>(cost);
+  }
+
+  // Returns true if it is safe to forward (alias) input to output buffer
+  // and expect the kernel to perform the computation inplace.
+  virtual bool EnableInputForwarding() const { return true; }
+
+  using InputMatrix = Eigen::Matrix<InputScalar, Eigen::Dynamic, Eigen::Dynamic,
+                                    Eigen::RowMajor>;
+  using InputConstMatrixMap = Eigen::Map<const InputMatrix>;
+  using InputMatrixMap = Eigen::Map<InputMatrix>;
+  using InputConstVectorMap =
+      Eigen::Map<const Eigen::Matrix<InputScalar, 1, Eigen::Dynamic>>;
+  using InputConstMatrixMaps = gtl::InlinedVector<InputConstMatrixMap, 4>;
+  using InputMatrixMaps = gtl::InlinedVector<InputMatrixMap, 4>;
+  using InputRealScalar = typename Eigen::NumTraits<InputScalar>::Real;
+
+  using OutputMatrix = Eigen::Matrix<OutputScalar, Eigen::Dynamic,
+                                     Eigen::Dynamic, Eigen::RowMajor>;
+  using OutputConstMatrixMap = Eigen::Map<const OutputMatrix>;
+  using OutputMatrixMap = Eigen::Map<OutputMatrix>;
+  using OutputConstVectorMap =
+      Eigen::Map<const Eigen::Matrix<OutputScalar, 1, Eigen::Dynamic>>;
+  using OutputConstMatrixMaps = gtl::InlinedVector<OutputConstMatrixMap, 4>;
+  using OutputMatrixMaps = gtl::InlinedVector<OutputMatrixMap, 4>;
+  using OutputRealScalar = typename Eigen::NumTraits<OutputScalar>::Real;
+
+  // backward compatibility
+  using Scalar = OutputScalar;
+  using Matrix =
+      Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+  using ConstMatrixMap = Eigen::Map<const Matrix>;
+  using MatrixMap = Eigen::Map<Matrix>;
+  using ConstVectorMap =
+      Eigen::Map<const Eigen::Matrix<Scalar, 1, Eigen::Dynamic>>;
+  using ConstMatrixMaps = gtl::InlinedVector<ConstMatrixMap, 4>;
+  using MatrixMaps = gtl::InlinedVector<MatrixMap, 4>;
+  using RealScalar = typename Eigen::NumTraits<Scalar>::Real;
+
+  // Performs a single matrix computation given input matrices, and
+  // stores the result in outputs. For batch operations, this will be called
+  // repeatedly for a single call to Compute() when multiple matrices exist in
+  // input Tensors with rank > 2. In this case the calls to ComputeMatrix are
+  // parallelized. The number of threads used is determined by a cost model from
+  // the value returned by GetCostPerUnit().
+  virtual void ComputeMatrix(OpKernelContext* context,
+                             const InputConstMatrixMaps& inputs,
+                             OutputMatrixMaps* outputs) = 0;
+
+ private:
+  using TensorInputs = gtl::InlinedVector<const Tensor*, 4>;
+  using TensorOutputs = gtl::InlinedVector<Tensor*, 4>;
+  // This function maps 2-d slices (matrices) of the input and output tensors
+  // using Eigen::Map and calls ComputeMatrix implemented in terms of the
+  // Eigen::MatrixBase API by the derived class.
+  //
+  // The 'matrix_index' parameter specifies the index of the matrix to be used
+  // from each input tensor, and the index of the matrix to be written to each
+  // output tensor. The input matrices are in row major order, and located at
+  // the memory addresses
+  //   inputs[i].flat<Scalar>().data() +
+  //   matrix_index * input_matrix_shapes[i].num_elements()
+  // for i in 0...inputs.size()-1.
+  // The output matrices are in row major order, and located at the memory
+  // address
+  //   outputs[i]->flat<Scalar>().data() +
+  //   matrix_index * output_matrix_shapes[i].num_elements().
+  // for i in 0...outputs.size()-1.
+  //
+  void ComputeTensorSlice(OpKernelContext* context, int64 matrix_index,
+                          const TensorInputs& inputs,
+                          const TensorShapes& input_matrix_shapes,
+                          const TensorOutputs& outputs,
+                          const TensorShapes& output_matrix_shapes);
+
+  void AnalyzeInputs(OpKernelContext* context, TensorInputs* inputs,
+                     TensorShapes* input_matrix_shapes,
+                     TensorShape* batch_shape);
+
+  void PrepareOutputs(OpKernelContext* context,
+                      const TensorShapes& input_matrix_shapes,
+                      const TensorShape& batch_shape, TensorOutputs* outputs,
+                      TensorShapes* output_matrix_shapes);
+};
+
+// Declare LinearAlgebraOp, which is explicitly instantiated in
+// linalg_ops_common.cc for float, double, complex64, and complex128.
+extern template class LinearAlgebraOp<float>;
+extern template class LinearAlgebraOp<double>;
+extern template class LinearAlgebraOp<complex64>;
+extern template class LinearAlgebraOp<complex128>;
+
+}  // namespace tensorflow
+
+#define INHERIT_LINALG_TYPEDEFS(Scalar)                       \
+  typedef LinearAlgebraOp<Scalar> Base;                       \
+  using RealScalar = typename Eigen::NumTraits<Scalar>::Real; \
+  using Matrix = typename Base::Matrix;                       \
+  using MatrixMap = typename Base::MatrixMap;                 \
+  using MatrixMaps = typename Base::MatrixMaps;               \
+  using ConstMatrixMap = typename Base::ConstMatrixMap;       \
+  using ConstMatrixMaps = typename Base::ConstMatrixMaps;     \
+  using ConstVectorMap = typename Base::ConstVectorMap;       \
+  using TensorShapes = typename Base::TensorShapes;
+
+#define REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) \
+  REGISTER_KERNEL_BUILDER(                              \
+      Name(OpName).Device(DEVICE_CPU).TypeConstraint<Scalar>("T"), OpClass)
+
+#define REGISTER_LINALG_OP_GPU(OpName, OpClass, Scalar) \
+  REGISTER_KERNEL_BUILDER(                              \
+      Name(OpName).Device(DEVICE_GPU).TypeConstraint<Scalar>("T"), OpClass)
+
+// Deprecated, use one of the device-specific macros above.
+#define REGISTER_LINALG_OP(OpName, OpClass, Scalar) \
+  REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar)
+
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_
diff --git a/tensorflow/core/kernels/lu_op.cc b/tensorflow/core/kernels/linalg/lu_op.cc
similarity index 100%
rename from tensorflow/core/kernels/lu_op.cc
rename to tensorflow/core/kernels/linalg/lu_op.cc
diff --git a/tensorflow/core/kernels/lu_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc
similarity index 99%
rename from tensorflow/core/kernels/lu_op_gpu.cu.cc
rename to tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc
index 47b37ed7f7a..9d23a35057d 100644
--- a/tensorflow/core/kernels/lu_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc
@@ -25,9 +25,9 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/cuda_solvers.h"
 #include "tensorflow/core/kernels/transpose_functor.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/matrix_band_part_op.cc b/tensorflow/core/kernels/linalg/matrix_band_part_op.cc
similarity index 99%
rename from tensorflow/core/kernels/matrix_band_part_op.cc
rename to tensorflow/core/kernels/linalg/matrix_band_part_op.cc
index 4dcce5a8f58..23619bacc33 100644
--- a/tensorflow/core/kernels/matrix_band_part_op.cc
+++ b/tensorflow/core/kernels/linalg/matrix_band_part_op.cc
@@ -21,11 +21,12 @@ limitations under the License.
 #define EIGEN_USE_GPU
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#include "tensorflow/core/kernels/matrix_band_part_op.h"
+#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h"
 
 #include <algorithm>
 #include <memory>
 #include <vector>
+
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
diff --git a/tensorflow/core/kernels/matrix_band_part_op.h b/tensorflow/core/kernels/linalg/matrix_band_part_op.h
similarity index 86%
rename from tensorflow/core/kernels/matrix_band_part_op.h
rename to tensorflow/core/kernels/linalg/matrix_band_part_op.h
index b04e36db8ed..2f68eba6dcd 100644
--- a/tensorflow/core/kernels/matrix_band_part_op.h
+++ b/tensorflow/core/kernels/linalg/matrix_band_part_op.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_
-#define TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_
 
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_types.h"
@@ -34,4 +34,4 @@ struct MatrixBandPartFunctor {
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_
diff --git a/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc
similarity index 97%
rename from tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc
rename to tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc
index 9eb3e4f72a2..9c734b7fd6e 100644
--- a/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc
@@ -21,7 +21,7 @@ limitations under the License.
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/kernels/matrix_band_part_op.h"
+#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/matrix_diag_op.cc b/tensorflow/core/kernels/linalg/matrix_diag_op.cc
similarity index 99%
rename from tensorflow/core/kernels/matrix_diag_op.cc
rename to tensorflow/core/kernels/linalg/matrix_diag_op.cc
index 05d7e4e6f86..69cc8170793 100644
--- a/tensorflow/core/kernels/matrix_diag_op.cc
+++ b/tensorflow/core/kernels/linalg/matrix_diag_op.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #define EIGEN_USE_GPU
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#include "tensorflow/core/kernels/matrix_diag_op.h"
+#include "tensorflow/core/kernels/linalg/matrix_diag_op.h"
 
 #include <algorithm>
 #include <memory>
diff --git a/tensorflow/core/kernels/matrix_diag_op.h b/tensorflow/core/kernels/linalg/matrix_diag_op.h
similarity index 94%
rename from tensorflow/core/kernels/matrix_diag_op.h
rename to tensorflow/core/kernels/linalg/matrix_diag_op.h
index 707fd9b6c14..5758ba664cc 100644
--- a/tensorflow/core/kernels/matrix_diag_op.h
+++ b/tensorflow/core/kernels/linalg/matrix_diag_op.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_
-#define TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_
 
 // Generator definition for MatrixDiagOp, must be compilable by nvcc.
 
@@ -69,4 +69,4 @@ struct MatrixDiag {
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_
diff --git a/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc
similarity index 99%
rename from tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc
rename to tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc
index 76271798d5f..6b52e70716d 100644
--- a/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #define EIGEN_USE_GPU
 
 #include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/kernels/matrix_diag_op.h"
+#include "tensorflow/core/kernels/linalg/matrix_diag_op.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/matrix_exponential_op.cc b/tensorflow/core/kernels/linalg/matrix_exponential_op.cc
similarity index 97%
rename from tensorflow/core/kernels/matrix_exponential_op.cc
rename to tensorflow/core/kernels/linalg/matrix_exponential_op.cc
index 01d4894438c..73407614955 100644
--- a/tensorflow/core/kernels/matrix_exponential_op.cc
+++ b/tensorflow/core/kernels/linalg/matrix_exponential_op.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
diff --git a/tensorflow/core/kernels/matrix_inverse_op.cc b/tensorflow/core/kernels/linalg/matrix_inverse_op.cc
similarity index 98%
rename from tensorflow/core/kernels/matrix_inverse_op.cc
rename to tensorflow/core/kernels/linalg/matrix_inverse_op.cc
index 52afdd15ba6..dc51776f2fe 100644
--- a/tensorflow/core/kernels/matrix_inverse_op.cc
+++ b/tensorflow/core/kernels/linalg/matrix_inverse_op.cc
@@ -24,7 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -32,9 +32,9 @@ limitations under the License.
 
 #if GOOGLE_CUDA
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/eye_functor.h"
+#include "tensorflow/core/kernels/linalg/eye_functor.h"
 #include "tensorflow/core/kernels/transpose_functor.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/matrix_logarithm_op.cc b/tensorflow/core/kernels/linalg/matrix_logarithm_op.cc
similarity index 97%
rename from tensorflow/core/kernels/matrix_logarithm_op.cc
rename to tensorflow/core/kernels/linalg/matrix_logarithm_op.cc
index 22ca094e243..79d5472f140 100644
--- a/tensorflow/core/kernels/matrix_logarithm_op.cc
+++ b/tensorflow/core/kernels/linalg/matrix_logarithm_op.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
diff --git a/tensorflow/core/kernels/matrix_set_diag_op.cc b/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc
similarity index 99%
rename from tensorflow/core/kernels/matrix_set_diag_op.cc
rename to tensorflow/core/kernels/linalg/matrix_set_diag_op.cc
index bf98fd0d47d..df32228d0f2 100644
--- a/tensorflow/core/kernels/matrix_set_diag_op.cc
+++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc
@@ -21,7 +21,7 @@ limitations under the License.
 #define EIGEN_USE_GPU
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#include "tensorflow/core/kernels/matrix_set_diag_op.h"
+#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h"
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -30,7 +30,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/matrix_diag_op.h"
+#include "tensorflow/core/kernels/linalg/matrix_diag_op.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
diff --git a/tensorflow/core/kernels/matrix_set_diag_op.h b/tensorflow/core/kernels/linalg/matrix_set_diag_op.h
similarity index 89%
rename from tensorflow/core/kernels/matrix_set_diag_op.h
rename to tensorflow/core/kernels/linalg/matrix_set_diag_op.h
index 04877cd34ca..449a3607ede 100644
--- a/tensorflow/core/kernels/matrix_set_diag_op.h
+++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_
-#define TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_
 
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_types.h"
@@ -39,4 +39,4 @@ struct MatrixSetDiag {
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_
diff --git a/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc
similarity index 99%
rename from tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc
rename to tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc
index 4e32f8a52e8..0cdb457db03 100644
--- a/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #define EIGEN_USE_GPU
 
 #include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/kernels/matrix_set_diag_op.h"
+#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc
similarity index 92%
rename from tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc
rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc
index 22274cc3daf..4e64eb42371 100644
--- a/tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc
+++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h"
+#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc
similarity index 92%
rename from tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc
rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc
index c8421a3efba..719201f3f9e 100644
--- a/tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc
+++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h"
+#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_double.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc
similarity index 92%
rename from tensorflow/core/kernels/matrix_solve_ls_op_double.cc
rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc
index c7d03cb1052..614ecee4e23 100644
--- a/tensorflow/core/kernels/matrix_solve_ls_op_double.cc
+++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h"
+#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_float.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc
similarity index 92%
rename from tensorflow/core/kernels/matrix_solve_ls_op_float.cc
rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc
index c98a84beded..809cff8148c 100644
--- a/tensorflow/core/kernels/matrix_solve_ls_op_float.cc
+++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h"
+#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_impl.h b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h
similarity index 96%
rename from tensorflow/core/kernels/matrix_solve_ls_op_impl.h
rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h
index 00a05a87a3a..1c8101a05b4 100644
--- a/tensorflow/core/kernels/matrix_solve_ls_op_impl.h
+++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_
-#define TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_
 
 // See docs in ../ops/linalg_ops.cc.
 
@@ -24,7 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
@@ -163,4 +163,4 @@ class MatrixSolveLsOp : public LinearAlgebraOp<Scalar> {
 
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_
diff --git a/tensorflow/core/kernels/matrix_solve_op.cc b/tensorflow/core/kernels/linalg/matrix_solve_op.cc
similarity index 99%
rename from tensorflow/core/kernels/matrix_solve_op.cc
rename to tensorflow/core/kernels/linalg/matrix_solve_op.cc
index 3a75054f4ea..70f02bddf9b 100644
--- a/tensorflow/core/kernels/matrix_solve_op.cc
+++ b/tensorflow/core/kernels/linalg/matrix_solve_op.cc
@@ -25,7 +25,7 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -33,8 +33,8 @@ limitations under the License.
 
 #if GOOGLE_CUDA
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/cuda_solvers.h"
 #include "tensorflow/core/kernels/transpose_functor.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/matrix_square_root_op.cc b/tensorflow/core/kernels/linalg/matrix_square_root_op.cc
similarity index 97%
rename from tensorflow/core/kernels/matrix_square_root_op.cc
rename to tensorflow/core/kernels/linalg/matrix_square_root_op.cc
index fe3d3043c26..ce43e358350 100644
--- a/tensorflow/core/kernels/matrix_square_root_op.cc
+++ b/tensorflow/core/kernels/linalg/matrix_square_root_op.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc
similarity index 92%
rename from tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc
rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc
index ae3702078a0..27f3e77e29c 100644
--- a/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc
+++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/kernels/matrix_triangular_solve_op_impl.h"
+#include "tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_impl.h b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h
similarity index 97%
rename from tensorflow/core/kernels/matrix_triangular_solve_op_impl.h
rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h
index fb7e6f0f5ff..99249f792b6 100644
--- a/tensorflow/core/kernels/matrix_triangular_solve_op_impl.h
+++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h
@@ -15,8 +15,8 @@ limitations under the License.
 
 // See docs in ../ops/linalg_ops.cc.
 //
-#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_
-#define TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_
 
 #include "third_party/eigen3/Eigen/Core"
 #include "tensorflow/core/framework/kernel_def_builder.h"
@@ -24,7 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/fill_functor.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -38,9 +38,9 @@ limitations under the License.
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #if GOOGLE_CUDA
-#include "tensorflow/core/kernels/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 #elif TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/rocm_solvers.h"
+#include "tensorflow/core/util/rocm_solvers.h"
 #endif
 
 namespace tensorflow {
@@ -434,4 +434,4 @@ struct LaunchBatchMatrixTriangularSolve<GPUDevice, Scalar> {
 
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_
diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_real.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc
similarity index 93%
rename from tensorflow/core/kernels/matrix_triangular_solve_op_real.cc
rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc
index 0f92964dd72..71a62441dc4 100644
--- a/tensorflow/core/kernels/matrix_triangular_solve_op_real.cc
+++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/matrix_triangular_solve_op_impl.h"
+#include "tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h"
 
 #if GOOGLE_CUDA
 #include "third_party/gpus/cuda/include/cuda.h"
diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_test.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc
similarity index 100%
rename from tensorflow/core/kernels/matrix_triangular_solve_op_test.cc
rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc
diff --git a/tensorflow/core/kernels/qr_op_complex128.cc b/tensorflow/core/kernels/linalg/qr_op_complex128.cc
similarity index 96%
rename from tensorflow/core/kernels/qr_op_complex128.cc
rename to tensorflow/core/kernels/linalg/qr_op_complex128.cc
index 8a3e3dc0a92..0c14c6d2818 100644
--- a/tensorflow/core/kernels/qr_op_complex128.cc
+++ b/tensorflow/core/kernels/linalg/qr_op_complex128.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/qr_op_impl.h"
+#include "tensorflow/core/kernels/linalg/qr_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/qr_op_complex64.cc b/tensorflow/core/kernels/linalg/qr_op_complex64.cc
similarity index 95%
rename from tensorflow/core/kernels/qr_op_complex64.cc
rename to tensorflow/core/kernels/linalg/qr_op_complex64.cc
index 467fa6c2d6a..fc0227ef7f9 100644
--- a/tensorflow/core/kernels/qr_op_complex64.cc
+++ b/tensorflow/core/kernels/linalg/qr_op_complex64.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/qr_op_impl.h"
+#include "tensorflow/core/kernels/linalg/qr_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/qr_op_double.cc b/tensorflow/core/kernels/linalg/qr_op_double.cc
similarity index 96%
rename from tensorflow/core/kernels/qr_op_double.cc
rename to tensorflow/core/kernels/linalg/qr_op_double.cc
index 05537a0eaa3..ae00b3e7921 100644
--- a/tensorflow/core/kernels/qr_op_double.cc
+++ b/tensorflow/core/kernels/linalg/qr_op_double.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/qr_op_impl.h"
+#include "tensorflow/core/kernels/linalg/qr_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/qr_op_float.cc b/tensorflow/core/kernels/linalg/qr_op_float.cc
similarity index 96%
rename from tensorflow/core/kernels/qr_op_float.cc
rename to tensorflow/core/kernels/linalg/qr_op_float.cc
index 6aebd981865..77b8eeb0286 100644
--- a/tensorflow/core/kernels/qr_op_float.cc
+++ b/tensorflow/core/kernels/linalg/qr_op_float.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/qr_op_impl.h"
+#include "tensorflow/core/kernels/linalg/qr_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/qr_op_impl.h b/tensorflow/core/kernels/linalg/qr_op_impl.h
similarity index 96%
rename from tensorflow/core/kernels/qr_op_impl.h
rename to tensorflow/core/kernels/linalg/qr_op_impl.h
index 535df9d160d..876594bc511 100644
--- a/tensorflow/core/kernels/qr_op_impl.h
+++ b/tensorflow/core/kernels/linalg/qr_op_impl.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_
-#define TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_
 
 // See docs in ../ops/linalg_ops.cc.
 //
@@ -33,7 +33,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -41,11 +41,11 @@ limitations under the License.
 
 #if GOOGLE_CUDA
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/cuda_solvers.h"
 #include "tensorflow/core/kernels/cwise_ops.h"
-#include "tensorflow/core/kernels/eye_functor.h"
-#include "tensorflow/core/kernels/matrix_band_part_op.h"
+#include "tensorflow/core/kernels/linalg/eye_functor.h"
+#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h"
 #include "tensorflow/core/kernels/transpose_functor.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 #endif
 
 namespace tensorflow {
@@ -299,4 +299,4 @@ class QrOpGpu : public AsyncOpKernel {
 
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_
diff --git a/tensorflow/core/kernels/self_adjoint_eig_op.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc
similarity index 98%
rename from tensorflow/core/kernels/self_adjoint_eig_op.cc
rename to tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc
index cea5883db7b..ebf1955b8ff 100644
--- a/tensorflow/core/kernels/self_adjoint_eig_op.cc
+++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/denormal.h"
 #include "tensorflow/core/platform/logging.h"
diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc
similarity index 93%
rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc
rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc
index 4c7a391d56c..424c33a7ac1 100644
--- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc
+++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h"
+#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc
similarity index 93%
rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc
rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc
index 0ec5ec24dd1..bdd20998e3c 100644
--- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc
+++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h"
+#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc
similarity index 92%
rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc
rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc
index 7f81bb69021..afc50500d40 100644
--- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc
+++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h"
+#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc
similarity index 92%
rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc
rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc
index bf30952d1e7..1f795777a2e 100644
--- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc
+++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h"
+#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc
similarity index 99%
rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc
rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc
index 3a84df07a9a..778c50ff408 100644
--- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc
+++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc
@@ -26,12 +26,12 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/cast_op.h"
-#include "tensorflow/core/kernels/cuda_solvers.h"
 #include "tensorflow/core/kernels/cwise_ops.h"
 #include "tensorflow/core/kernels/transpose_functor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h
similarity index 91%
rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h
rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h
index b5274f8788b..56f2936a66e 100644
--- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h
+++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_
-#define TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_
 
 // See docs in ../ops/linalg_ops.cc.
 
@@ -23,7 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/denormal.h"
 #include "tensorflow/core/platform/logging.h"
@@ -89,4 +89,4 @@ class SelfAdjointEigV2Op : public LinearAlgebraOp<Scalar> {
 
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_
diff --git a/tensorflow/core/kernels/svd_op_complex128.cc b/tensorflow/core/kernels/linalg/svd_op_complex128.cc
similarity index 93%
rename from tensorflow/core/kernels/svd_op_complex128.cc
rename to tensorflow/core/kernels/linalg/svd_op_complex128.cc
index a0f39418aca..36ac629e38a 100644
--- a/tensorflow/core/kernels/svd_op_complex128.cc
+++ b/tensorflow/core/kernels/linalg/svd_op_complex128.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/svd_op_impl.h"
+#include "tensorflow/core/kernels/linalg/svd_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/svd_op_complex64.cc b/tensorflow/core/kernels/linalg/svd_op_complex64.cc
similarity index 93%
rename from tensorflow/core/kernels/svd_op_complex64.cc
rename to tensorflow/core/kernels/linalg/svd_op_complex64.cc
index a8fd50c67d1..50d940b534a 100644
--- a/tensorflow/core/kernels/svd_op_complex64.cc
+++ b/tensorflow/core/kernels/linalg/svd_op_complex64.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/svd_op_impl.h"
+#include "tensorflow/core/kernels/linalg/svd_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/svd_op_double.cc b/tensorflow/core/kernels/linalg/svd_op_double.cc
similarity index 93%
rename from tensorflow/core/kernels/svd_op_double.cc
rename to tensorflow/core/kernels/linalg/svd_op_double.cc
index 539dae3a081..85bbe08d8c9 100644
--- a/tensorflow/core/kernels/svd_op_double.cc
+++ b/tensorflow/core/kernels/linalg/svd_op_double.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/svd_op_impl.h"
+#include "tensorflow/core/kernels/linalg/svd_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/svd_op_float.cc b/tensorflow/core/kernels/linalg/svd_op_float.cc
similarity index 93%
rename from tensorflow/core/kernels/svd_op_float.cc
rename to tensorflow/core/kernels/linalg/svd_op_float.cc
index 03839aa49c3..961d131293b 100644
--- a/tensorflow/core/kernels/svd_op_float.cc
+++ b/tensorflow/core/kernels/linalg/svd_op_float.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/svd_op_impl.h"
+#include "tensorflow/core/kernels/linalg/svd_op_impl.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/svd_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc
similarity index 99%
rename from tensorflow/core/kernels/svd_op_gpu.cu.cc
rename to tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc
index 482fd057e4e..06d1efe6dd5 100644
--- a/tensorflow/core/kernels/svd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc
@@ -36,14 +36,14 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/eye_functor.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/eye_functor.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/kernels/transpose_functor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/svd_op_impl.h b/tensorflow/core/kernels/linalg/svd_op_impl.h
similarity index 95%
rename from tensorflow/core/kernels/svd_op_impl.h
rename to tensorflow/core/kernels/linalg/svd_op_impl.h
index 675826a057c..c43aaaa4b7b 100644
--- a/tensorflow/core/kernels/svd_op_impl.h
+++ b/tensorflow/core/kernels/linalg/svd_op_impl.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_
-#define TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_
 
 // See docs in ../ops/linalg_ops.cc.
 //
@@ -27,7 +27,7 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -118,4 +118,4 @@ class SvdOp : public LinearAlgebraOp<Scalar> {
 
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_
diff --git a/tensorflow/core/kernels/tridiagonal_matmul_op.cc b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc
similarity index 98%
rename from tensorflow/core/kernels/tridiagonal_matmul_op.cc
rename to tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc
index 3ddf22012de..9d17c574148 100644
--- a/tensorflow/core/kernels/tridiagonal_matmul_op.cc
+++ b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc
@@ -19,7 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc
similarity index 96%
rename from tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc
rename to tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc
index 1c82cc18e32..a65db40d822 100644
--- a/tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc
@@ -22,11 +22,11 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/kernels/transpose_functor.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #include "tensorflow/core/util/gpu_device_functions.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"
 #include "tensorflow/core/util/gpu_launch_config.h"
diff --git a/tensorflow/core/kernels/tridiagonal_solve_op.cc b/tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc
similarity index 99%
rename from tensorflow/core/kernels/tridiagonal_solve_op.cc
rename to tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc
index 88931ff3e66..8fe04125f9a 100644
--- a/tensorflow/core/kernels/tridiagonal_solve_op.cc
+++ b/tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc
@@ -19,7 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc
similarity index 99%
rename from tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc
rename to tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc
index 089fa8c040f..86514cfb033 100644
--- a/tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc
@@ -23,11 +23,11 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
-#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 #include "tensorflow/core/kernels/transpose_functor.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #include "tensorflow/core/util/gpu_device_functions.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"
 #include "tensorflow/core/util/gpu_launch_config.h"
diff --git a/tensorflow/core/kernels/linalg_ops_common.h b/tensorflow/core/kernels/linalg_ops_common.h
index 65c2fb90f0e..0aa69801f19 100644
--- a/tensorflow/core/kernels/linalg_ops_common.h
+++ b/tensorflow/core/kernels/linalg_ops_common.h
@@ -12,211 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-
 #ifndef TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_
 #define TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_
 
-// Classes to support linear algebra functionality, similar to the numpy.linalg
-// module. Supports batch computation on several matrices at once, sharding the
-// computations across different threads if necessary.
-#include <algorithm>
-
-#include "third_party/eigen3/Eigen/Core"
-#include "tensorflow/core/framework/kernel_def_builder.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/inlined_vector.h"
-#include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/util/work_sharder.h"
-
-namespace tensorflow {
-
-// Base class for linear algebra operators.
-template <class InputScalar, class OutputScalar = InputScalar>
-class LinearAlgebraOp : public OpKernel {
- public:
-  explicit LinearAlgebraOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override;
-
- protected:
-  using TensorShapes = gtl::InlinedVector<TensorShape, 4>;
-  // Returns the number of leading inputs that are to be treated as matrix
-  // inputs. By default this is all the inputs. Derived classes can override
-  // this to tell the base class to ignore one or more trailing inputs.
-  virtual int NumMatrixInputs(const OpKernelContext* context) const {
-    return context->num_inputs();
-  }
-
-  // Returns true if the number of inputs and their shapes are as expected.
-  // Many ops take a single square input matrix, so we provide that as a default
-  // implementation for convenience.
-  virtual void ValidateInputMatrixShapes(
-      OpKernelContext* context, const TensorShapes& input_matrix_shapes) const {
-    ValidateSingleSquareMatrix(context, input_matrix_shapes);
-  }
-
-  // Convenience validators for common cases:
-  //
-  // Validate op taking a single matrix A.
-  static void ValidateSingleMatrix(OpKernelContext* context,
-                                   const TensorShapes& input_matrix_shapes);
-  // Validate op taking a single square matrix A.
-  static void ValidateSingleSquareMatrix(
-      OpKernelContext* context, const TensorShapes& input_matrix_shapes);
-  // Validate op taking two matrices A and B that have the same number of rows.
-  static void ValidateSolver(OpKernelContext* context,
-                             const TensorShapes& input_matrix_shapes);
-  // Validate op taking two matrices A and B that have the same number of rows
-  // and A is square.
-  static void ValidateSquareSolver(OpKernelContext* context,
-                                   const TensorShapes& input_matrix_shapes);
-
-  // Returns the output shapes of each individual matrix operation. Output
-  // matrices shapes must be rank 0, 1, or 2. Scalar outputs are rank 0.
-  //
-  // The derived class may return a number of shapes (N) less than
-  // context->num_outputs() (M) to indicate that a only leading subset of
-  // the outputs will be populated. In this case, a dummy scalar tensor with
-  // value zero will be return for the last M-N outputs.
-  //
-  // For many ops, the output dimensions are the same as the input dimensions,
-  // so we provide that as a default implementation for convenience.
-  virtual TensorShapes GetOutputMatrixShapes(
-      const TensorShapes& input_matrix_shapes) const {
-    return input_matrix_shapes;
-  }
-
-  // Returns the cost per matrix operation. This is used to determine the
-  // number of threads to use for parallelizing calls to ComputeMatrix in
-  // batch mode. Cost per unit is assumed to be roughly 1ns, based on comments
-  // in core/util/work_sharder.cc. Many linear algebra ops take roughly max(m,n)
-  // * min(m,n)^2, where the first input matrix is m-by-n. We provide that as a
-  // default implementation for convenience.
-  virtual int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const {
-    double m = static_cast<double>(input_matrix_shapes[0].dim_size(0));
-    double n = static_cast<double>(input_matrix_shapes[0].dim_size(1));
-    double cost = std::max(m, n) * std::min(m, n) * std::min(m, n);
-    return cost >= static_cast<double>(kint64max) ? kint64max
-                                                  : static_cast<int64>(cost);
-  }
-
-  // Returns true if it is safe to forward (alias) input to output buffer
-  // and expect the kernel to perform the computation inplace.
-  virtual bool EnableInputForwarding() const { return true; }
-
-  using InputMatrix = Eigen::Matrix<InputScalar, Eigen::Dynamic, Eigen::Dynamic,
-                                    Eigen::RowMajor>;
-  using InputConstMatrixMap = Eigen::Map<const InputMatrix>;
-  using InputMatrixMap = Eigen::Map<InputMatrix>;
-  using InputConstVectorMap =
-      Eigen::Map<const Eigen::Matrix<InputScalar, 1, Eigen::Dynamic>>;
-  using InputConstMatrixMaps = gtl::InlinedVector<InputConstMatrixMap, 4>;
-  using InputMatrixMaps = gtl::InlinedVector<InputMatrixMap, 4>;
-  using InputRealScalar = typename Eigen::NumTraits<InputScalar>::Real;
-
-  using OutputMatrix = Eigen::Matrix<OutputScalar, Eigen::Dynamic,
-                                     Eigen::Dynamic, Eigen::RowMajor>;
-  using OutputConstMatrixMap = Eigen::Map<const OutputMatrix>;
-  using OutputMatrixMap = Eigen::Map<OutputMatrix>;
-  using OutputConstVectorMap =
-      Eigen::Map<const Eigen::Matrix<OutputScalar, 1, Eigen::Dynamic>>;
-  using OutputConstMatrixMaps = gtl::InlinedVector<OutputConstMatrixMap, 4>;
-  using OutputMatrixMaps = gtl::InlinedVector<OutputMatrixMap, 4>;
-  using OutputRealScalar = typename Eigen::NumTraits<OutputScalar>::Real;
-
-  // backward compatibility
-  using Scalar = OutputScalar;
-  using Matrix =
-      Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
-  using ConstMatrixMap = Eigen::Map<const Matrix>;
-  using MatrixMap = Eigen::Map<Matrix>;
-  using ConstVectorMap =
-      Eigen::Map<const Eigen::Matrix<Scalar, 1, Eigen::Dynamic>>;
-  using ConstMatrixMaps = gtl::InlinedVector<ConstMatrixMap, 4>;
-  using MatrixMaps = gtl::InlinedVector<MatrixMap, 4>;
-  using RealScalar = typename Eigen::NumTraits<Scalar>::Real;
-
-  // Performs a single matrix computation given input matrices, and
-  // stores the result in outputs. For batch operations, this will be called
-  // repeatedly for a single call to Compute() when multiple matrices exist in
-  // input Tensors with rank > 2. In this case the calls to ComputeMatrix are
-  // parallelized. The number of threads used is determined by a cost model from
-  // the value returned by GetCostPerUnit().
-  virtual void ComputeMatrix(OpKernelContext* context,
-                             const InputConstMatrixMaps& inputs,
-                             OutputMatrixMaps* outputs) = 0;
-
- private:
-  using TensorInputs = gtl::InlinedVector<const Tensor*, 4>;
-  using TensorOutputs = gtl::InlinedVector<Tensor*, 4>;
-  // This function maps 2-d slices (matrices) of the input and output tensors
-  // using Eigen::Map and calls ComputeMatrix implemented in terms of the
-  // Eigen::MatrixBase API by the derived class.
-  //
-  // The 'matrix_index' parameter specifies the index of the matrix to be used
-  // from each input tensor, and the index of the matrix to be written to each
-  // output tensor. The input matrices are in row major order, and located at
-  // the memory addresses
-  //   inputs[i].flat<Scalar>().data() +
-  //   matrix_index * input_matrix_shapes[i].num_elements()
-  // for i in 0...inputs.size()-1.
-  // The output matrices are in row major order, and located at the memory
-  // address
-  //   outputs[i]->flat<Scalar>().data() +
-  //   matrix_index * output_matrix_shapes[i].num_elements().
-  // for i in 0...outputs.size()-1.
-  //
-  void ComputeTensorSlice(OpKernelContext* context, int64 matrix_index,
-                          const TensorInputs& inputs,
-                          const TensorShapes& input_matrix_shapes,
-                          const TensorOutputs& outputs,
-                          const TensorShapes& output_matrix_shapes);
-
-  void AnalyzeInputs(OpKernelContext* context, TensorInputs* inputs,
-                     TensorShapes* input_matrix_shapes,
-                     TensorShape* batch_shape);
-
-  void PrepareOutputs(OpKernelContext* context,
-                      const TensorShapes& input_matrix_shapes,
-                      const TensorShape& batch_shape, TensorOutputs* outputs,
-                      TensorShapes* output_matrix_shapes);
-};
-
-// Declare LinearAlgebraOp, which is explicitly instantiated in
-// linalg_ops_common.cc for float, double, complex64, and complex128.
-extern template class LinearAlgebraOp<float>;
-extern template class LinearAlgebraOp<double>;
-extern template class LinearAlgebraOp<complex64>;
-extern template class LinearAlgebraOp<complex128>;
-
-}  // namespace tensorflow
-
-#define INHERIT_LINALG_TYPEDEFS(Scalar)                       \
-  typedef LinearAlgebraOp<Scalar> Base;                       \
-  using RealScalar = typename Eigen::NumTraits<Scalar>::Real; \
-  using Matrix = typename Base::Matrix;                       \
-  using MatrixMap = typename Base::MatrixMap;                 \
-  using MatrixMaps = typename Base::MatrixMaps;               \
-  using ConstMatrixMap = typename Base::ConstMatrixMap;       \
-  using ConstMatrixMaps = typename Base::ConstMatrixMaps;     \
-  using ConstVectorMap = typename Base::ConstVectorMap;       \
-  using TensorShapes = typename Base::TensorShapes;
-
-#define REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) \
-  REGISTER_KERNEL_BUILDER(                              \
-      Name(OpName).Device(DEVICE_CPU).TypeConstraint<Scalar>("T"), OpClass)
-
-#define REGISTER_LINALG_OP_GPU(OpName, OpClass, Scalar) \
-  REGISTER_KERNEL_BUILDER(                              \
-      Name(OpName).Device(DEVICE_GPU).TypeConstraint<Scalar>("T"), OpClass)
-
-// Deprecated, use one of the device-specific macros above.
-#define REGISTER_LINALG_OP(OpName, OpClass, Scalar) \
-  REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar)
+// Temporary forwarding header.
+#include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 
 #endif  // TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_
diff --git a/tensorflow/core/kernels/segment_reduction_ops_impl.h b/tensorflow/core/kernels/segment_reduction_ops_impl.h
index 6c3fad668ae..7cf15ef5b72 100644
--- a/tensorflow/core/kernels/segment_reduction_ops_impl.h
+++ b/tensorflow/core/kernels/segment_reduction_ops_impl.h
@@ -45,13 +45,13 @@ limitations under the License.
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #if GOOGLE_CUDA
-#include "tensorflow/core/kernels/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 #include "tensorflow/stream_executor/cuda/cuda_activation.h"
 
 using stream_executor::cuda::ScopedActivateExecutorContext;
 #elif TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_solvers.h"
 #include "tensorflow/core/platform/rocm.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 using stream_executor::rocm::ScopedActivateExecutorContext;
 #endif  // GOOGLE_CUDA
 
diff --git a/tensorflow/core/kernels/sparse/BUILD b/tensorflow/core/kernels/sparse/BUILD
index 1d281bc1d61..bfb6c4934bb 100644
--- a/tensorflow/core/kernels/sparse/BUILD
+++ b/tensorflow/core/kernels/sparse/BUILD
@@ -80,8 +80,8 @@ tf_kernel_library(
         "//tensorflow/core/kernels:transpose_functor",
         "//tensorflow/core/kernels:gpu_prim_hdrs",
     ] + if_cuda_or_rocm([
-        "//tensorflow/core/kernels:cuda_solvers",
-        "//tensorflow/core/kernels:cuda_sparse",
+        "//tensorflow/core/util:cuda_solvers",
+        "//tensorflow/core/util:cuda_sparse",
     ]),
     alwayslink = 1,
 )
diff --git a/tensorflow/core/kernels/sparse/add_op.cc b/tensorflow/core/kernels/sparse/add_op.cc
index b6265a1412c..06fe1cd042e 100644
--- a/tensorflow/core/kernels/sparse/add_op.cc
+++ b/tensorflow/core/kernels/sparse/add_op.cc
@@ -32,8 +32,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/fill_functor.h"
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/sparse/conj_op.cc b/tensorflow/core/kernels/sparse/conj_op.cc
index 7275262c1f0..147160fbe6c 100644
--- a/tensorflow/core/kernels/sparse/conj_op.cc
+++ b/tensorflow/core/kernels/sparse/conj_op.cc
@@ -32,8 +32,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/sparse/sparse_matrix.h"
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc
index 364c2c07bd8..2e5afbdcad7 100644
--- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc
+++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc
@@ -34,8 +34,8 @@ limitations under the License.
 #include "tensorflow/core/util/work_sharder.h"
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc
index 55ebfa4fc10..a81ccfa562e 100644
--- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc
+++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc
@@ -32,8 +32,8 @@ limitations under the License.
 #include "tensorflow/core/util/work_sharder.h"
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc
index 459bb219343..5c62a44f9ba 100644
--- a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc
+++ b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc
@@ -35,8 +35,8 @@ limitations under the License.
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #endif
 
 #if GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc
index 1c014db3d0a..6b11e64307a 100644
--- a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc
+++ b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc
@@ -20,13 +20,13 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
 #include "tensorflow/core/kernels/gpu_device_array.h"
 #include "tensorflow/core/kernels/gpu_device_array_gpu.h"
 #include "tensorflow/core/kernels/gpu_prim.h"
 #include "tensorflow/core/kernels/sparse/kernels.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/sparse/mat_mul_op.cc b/tensorflow/core/kernels/sparse/mat_mul_op.cc
index 50fa0ec88ea..bf9de570fbf 100644
--- a/tensorflow/core/kernels/sparse/mat_mul_op.cc
+++ b/tensorflow/core/kernels/sparse/mat_mul_op.cc
@@ -37,8 +37,8 @@ limitations under the License.
 #include "tensorflow/core/platform/threadpool.h"
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/sparse/mul_op.cc b/tensorflow/core/kernels/sparse/mul_op.cc
index 33c3756ce58..d08f1568db1 100644
--- a/tensorflow/core/kernels/sparse/mul_op.cc
+++ b/tensorflow/core/kernels/sparse/mul_op.cc
@@ -29,7 +29,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/sparse/sparse_matrix.h"
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/sparse/nnz_op.cc b/tensorflow/core/kernels/sparse/nnz_op.cc
index ebc48c3e9a4..d67620443f0 100644
--- a/tensorflow/core/kernels/sparse/nnz_op.cc
+++ b/tensorflow/core/kernels/sparse/nnz_op.cc
@@ -29,8 +29,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/sparse/sparse_matrix.h"
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/sparse/softmax_op.cc b/tensorflow/core/kernels/sparse/softmax_op.cc
index 25025bfe2a6..f1a5db8d0f0 100644
--- a/tensorflow/core/kernels/sparse/softmax_op.cc
+++ b/tensorflow/core/kernels/sparse/softmax_op.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #define EIGEN_USE_GPU
 #endif
 
diff --git a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc
index fb652e13d15..fecee9e4555 100644
--- a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc
+++ b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc
@@ -36,8 +36,8 @@ limitations under the License.
 #include "tensorflow/core/util/work_sharder.h"
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc
index 59540f63846..2eaf9bd5310 100644
--- a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc
+++ b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc
@@ -30,8 +30,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/sparse/sparse_matrix.h"
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc
index e1a4b4194d2..2548ceaa57c 100644
--- a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc
+++ b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc
@@ -33,8 +33,8 @@ limitations under the License.
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #endif
 
 #if GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/sparse/transpose_op.cc b/tensorflow/core/kernels/sparse/transpose_op.cc
index 3158eb5016d..08d37fa1692 100644
--- a/tensorflow/core/kernels/sparse/transpose_op.cc
+++ b/tensorflow/core/kernels/sparse/transpose_op.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 #define EIGEN_USE_GPU
 #endif
 
diff --git a/tensorflow/core/kernels/where_op.cc b/tensorflow/core/kernels/where_op.cc
index 598cb526d77..d504ec9b2ed 100644
--- a/tensorflow/core/kernels/where_op.cc
+++ b/tensorflow/core/kernels/where_op.cc
@@ -39,7 +39,7 @@ limitations under the License.
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
-#include "tensorflow/core/kernels/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 #if GOOGLE_CUDA
 #include "tensorflow/stream_executor/cuda/cuda_activation.h"
 using stream_executor::cuda::ScopedActivateExecutorContext;
diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD
index bb2b9ff429e..dcb2787e309 100644
--- a/tensorflow/core/util/BUILD
+++ b/tensorflow/core/util/BUILD
@@ -14,6 +14,7 @@ load(
     "tf_copts",
     "tf_cuda_library",
     "tf_cuda_only_cc_test",
+    "tf_kernel_library",
 )
 load("//tensorflow:tensorflow.bzl", "tf_version_info_genrule")
 load(
@@ -24,6 +25,11 @@ load(
     "//tensorflow/core/platform:build_config_root.bzl",
     "if_static",
 )
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load(
+    "@local_config_rocm//rocm:build_defs.bzl",
+    "if_rocm",
+)
 
 default_package_visibility = [
     "//tensorflow/core:__subpackages__",
@@ -567,6 +573,63 @@ cc_library(
     ],
 )
 
+tf_kernel_library(
+    name = "cuda_solvers",
+    srcs = ["cuda_solvers.cc"],
+    hdrs = ["cuda_solvers.h"],
+    # @local_config_cuda//cuda:cusolver_static, //third_party/eigen3:blas,
+    # and //third_party/libf2c all contain various parts of BLAS, LAPACK,
+    # and f2c helper functions in global namespace. Tell the compiler to
+    # allow multiple definitions when linking this.
+    linkopts = select({
+        "//tensorflow:macos": [],
+        "//tensorflow:windows": [],
+        "//conditions:default": ["-Wl,-z,muldefs"],
+    }),
+    visibility = ["//tensorflow/core/kernels:friends"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/platform/default/build_config:cublas_plugin",
+        "//tensorflow/stream_executor/cuda:cublas_lib",
+        "//tensorflow/stream_executor/cuda:cusolver_lib",
+    ],
+)
+
+tf_kernel_library(
+    name = "rocm_solvers",
+    srcs = ["rocm_solvers.cc"],
+    hdrs = ["rocm_solvers.h"],
+    visibility = ["//tensorflow/core/kernels:friends"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/stream_executor/lib",
+        "//tensorflow/stream_executor/platform:dso_loader",
+        "//tensorflow/stream_executor/rocm:rocblas_plugin",
+        "//tensorflow/stream_executor/rocm:rocm_gpu_executor",
+    ] + if_rocm([
+        "@local_config_rocm//rocm:rocprim",
+    ]),
+)
+
+tf_kernel_library(
+    name = "cuda_sparse",
+    srcs = if_cuda(["cuda_sparse.cc"]) + if_rocm(["rocm_sparse.cc"]),
+    hdrs = ["cuda_sparse.h"],
+    deps = [
+        ":cuda_solvers",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ] + if_cuda([
+        "//tensorflow/stream_executor/cuda:cusparse_lib",
+        "@cub_archive//:cub",
+    ]) + if_rocm([
+        "@local_config_rocm//rocm:hipsparse",
+    ]),
+)
+
 # Tests.
 
 tf_cc_test(
diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/util/cuda_solvers.cc
similarity index 99%
rename from tensorflow/core/kernels/cuda_solvers.cc
rename to tensorflow/core/util/cuda_solvers.cc
index f41ce2a5d27..3e4d2a05ac6 100644
--- a/tensorflow/core/kernels/cuda_solvers.cc
+++ b/tensorflow/core/util/cuda_solvers.cc
@@ -14,7 +14,7 @@
    ==============================================================================
 */
 #ifdef GOOGLE_CUDA
-#include "tensorflow/core/kernels/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 
 #include <chrono>
 #include <complex>
diff --git a/tensorflow/core/kernels/cuda_solvers.h b/tensorflow/core/util/cuda_solvers.h
similarity index 99%
rename from tensorflow/core/kernels/cuda_solvers.h
rename to tensorflow/core/util/cuda_solvers.h
index eb1d5c8a200..79f45c9b0ea 100644
--- a/tensorflow/core/kernels/cuda_solvers.h
+++ b/tensorflow/core/util/cuda_solvers.h
@@ -14,8 +14,8 @@ limitations under the License.
 ==============================================================================
 */
 
-#ifndef TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_
-#define TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_
 
 // This header declares the class CudaSolver, which contains wrappers of linear
 // algebra solvers in the cuBlas and cuSolverDN libraries for use in TensorFlow
@@ -435,7 +435,7 @@ class HostLapackInfo : public ScratchSpace<int> {
  public:
   HostLapackInfo(OpKernelContext* context, int64 size,
                  const std::string& debug_info)
-      : ScratchSpace<int>(context, size, debug_info, /* on_host */ true){};
+      : ScratchSpace<int>(context, size, debug_info, /* on_host */ true) {}
 };
 
 class DeviceLapackInfo : public ScratchSpace<int> {
@@ -489,4 +489,4 @@ inline DeviceLapackInfo CudaSolver::GetDeviceLapackInfo(
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#endif  // TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_
diff --git a/tensorflow/core/kernels/cuda_sparse.cc b/tensorflow/core/util/cuda_sparse.cc
similarity index 99%
rename from tensorflow/core/kernels/cuda_sparse.cc
rename to tensorflow/core/util/cuda_sparse.cc
index 141aae61571..47e018560e1 100644
--- a/tensorflow/core/kernels/cuda_sparse.cc
+++ b/tensorflow/core/util/cuda_sparse.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #ifdef GOOGLE_CUDA
 
-#include "tensorflow/core/kernels/cuda_sparse.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 
 #include <complex>
 #include <memory>
@@ -28,7 +28,6 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/cuda_solvers.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
@@ -38,6 +37,7 @@ limitations under the License.
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_solvers.h"
 
 // TODO(rmlarsen,penporn): Investigate using newer kernels in CUDA 10.1+.
 
diff --git a/tensorflow/core/kernels/cuda_sparse.h b/tensorflow/core/util/cuda_sparse.h
similarity index 93%
rename from tensorflow/core/kernels/cuda_sparse.h
rename to tensorflow/core/util/cuda_sparse.h
index 978bc9005ed..76580766d69 100644
--- a/tensorflow/core/kernels/cuda_sparse.h
+++ b/tensorflow/core/util/cuda_sparse.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_
-#define TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_
 
 // This header declares the class GpuSparse, which contains wrappers of
 // cuSparse libraries for use in TensorFlow kernels.
@@ -75,8 +75,7 @@ using gpuStream_t = hipStream_t;
 
 namespace tensorflow {
 
-inline std::string ConvertGPUSparseErrorToString(
-    const gpusparseStatus_t status) {
+inline string ConvertGPUSparseErrorToString(const gpusparseStatus_t status) {
   switch (status) {
 #define STRINGIZE(q) #q
 #define RETURN_IF_STATUS(err) \
@@ -206,49 +205,49 @@ class GpuSparse {
   // Solves tridiagonal system of equations.
   // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2
   template <typename Scalar>
-  Status Gtsv2(int m, int n, const Scalar *dl, const Scalar *d,
-               const Scalar *du, Scalar *B, int ldb, void *pBuffer) const;
+  Status Gtsv2(int m, int n, const Scalar* dl, const Scalar* d,
+               const Scalar* du, Scalar* B, int ldb, void* pBuffer) const;
 
   // Computes the size of a temporary buffer used by Gtsv2.
   // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2_bufferSize
   template <typename Scalar>
-  Status Gtsv2BufferSizeExt(int m, int n, const Scalar *dl, const Scalar *d,
-                            const Scalar *du, const Scalar *B, int ldb,
-                            size_t *bufferSizeInBytes) const;
+  Status Gtsv2BufferSizeExt(int m, int n, const Scalar* dl, const Scalar* d,
+                            const Scalar* du, const Scalar* B, int ldb,
+                            size_t* bufferSizeInBytes) const;
 
   // Solves tridiagonal system of equations without partial pivoting.
   // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2_nopivot
   template <typename Scalar>
-  Status Gtsv2NoPivot(int m, int n, const Scalar *dl, const Scalar *d,
-                      const Scalar *du, Scalar *B, int ldb,
-                      void *pBuffer) const;
+  Status Gtsv2NoPivot(int m, int n, const Scalar* dl, const Scalar* d,
+                      const Scalar* du, Scalar* B, int ldb,
+                      void* pBuffer) const;
 
   // Computes the size of a temporary buffer used by Gtsv2NoPivot.
   // See:
   // https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2_nopivot_bufferSize
   template <typename Scalar>
-  Status Gtsv2NoPivotBufferSizeExt(int m, int n, const Scalar *dl,
-                                   const Scalar *d, const Scalar *du,
-                                   const Scalar *B, int ldb,
-                                   size_t *bufferSizeInBytes) const;
+  Status Gtsv2NoPivotBufferSizeExt(int m, int n, const Scalar* dl,
+                                   const Scalar* d, const Scalar* du,
+                                   const Scalar* B, int ldb,
+                                   size_t* bufferSizeInBytes) const;
 
   // Solves a batch of tridiagonal systems of equations. Doesn't support
   // multiple right-hand sides per each system. Doesn't do pivoting.
   // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2stridedbatch
   template <typename Scalar>
-  Status Gtsv2StridedBatch(int m, const Scalar *dl, const Scalar *d,
-                           const Scalar *du, Scalar *x, int batchCount,
-                           int batchStride, void *pBuffer) const;
+  Status Gtsv2StridedBatch(int m, const Scalar* dl, const Scalar* d,
+                           const Scalar* du, Scalar* x, int batchCount,
+                           int batchStride, void* pBuffer) const;
 
   // Computes the size of a temporary buffer used by Gtsv2StridedBatch.
   // See:
   // https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2stridedbatch_bufferSize
   template <typename Scalar>
-  Status Gtsv2StridedBatchBufferSizeExt(int m, const Scalar *dl,
-                                        const Scalar *d, const Scalar *du,
-                                        const Scalar *x, int batchCount,
+  Status Gtsv2StridedBatchBufferSizeExt(int m, const Scalar* dl,
+                                        const Scalar* d, const Scalar* du,
+                                        const Scalar* x, int batchCount,
                                         int batchStride,
-                                        size_t *bufferSizeInBytes) const;
+                                        size_t* bufferSizeInBytes) const;
 
   // Compresses the indices of rows or columns. It can be interpreted as a
   // conversion from COO to CSR sparse storage format. See:
@@ -449,7 +448,7 @@ class GpuSparse {
 
  private:
   bool initialized_;
-  OpKernelContext *context_;  // not owned.
+  OpKernelContext* context_;  // not owned.
   gpuStream_t gpu_stream_;
   gpusparseHandle_t* gpusparse_handle_;  // not owned.
 
@@ -585,4 +584,4 @@ class GpuSparseCsrSortingConversionInfo {
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#endif  // TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_
diff --git a/tensorflow/core/kernels/rocm_solvers.cc b/tensorflow/core/util/rocm_solvers.cc
similarity index 99%
rename from tensorflow/core/kernels/rocm_solvers.cc
rename to tensorflow/core/util/rocm_solvers.cc
index 5faf718332e..13dadf602a7 100644
--- a/tensorflow/core/kernels/rocm_solvers.cc
+++ b/tensorflow/core/util/rocm_solvers.cc
@@ -14,7 +14,7 @@
    ==============================================================================
 */
 #if TENSORFLOW_USE_ROCM
-#include "tensorflow/core/kernels/rocm_solvers.h"
+#include "tensorflow/core/util/rocm_solvers.h"
 
 #include <complex>
 #include <unordered_map>
diff --git a/tensorflow/core/kernels/rocm_solvers.h b/tensorflow/core/util/rocm_solvers.h
similarity index 96%
rename from tensorflow/core/kernels/rocm_solvers.h
rename to tensorflow/core/util/rocm_solvers.h
index 94d3c82a497..afc8b936d05 100644
--- a/tensorflow/core/kernels/rocm_solvers.h
+++ b/tensorflow/core/util/rocm_solvers.h
@@ -14,8 +14,8 @@ limitations under the License.
 ==============================================================================
 */
 
-#ifndef TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_
-#define TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_
+#ifndef TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_
+#define TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_
 
 // This header declares the class ROCmSolver, which contains wrappers of linear
 // algebra solvers in the cuBlas and cuSolverDN libraries for use in TensorFlow
@@ -158,4 +158,4 @@ class ScratchSpace {
 
 #endif  // TENSORFLOW_USE_ROCM
 
-#endif  // TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_
+#endif  // TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_
diff --git a/tensorflow/core/kernels/rocm_sparse.cc b/tensorflow/core/util/rocm_sparse.cc
similarity index 99%
rename from tensorflow/core/kernels/rocm_sparse.cc
rename to tensorflow/core/util/rocm_sparse.cc
index 97488692bc1..cc7b56fdc01 100644
--- a/tensorflow/core/kernels/rocm_sparse.cc
+++ b/tensorflow/core/util/rocm_sparse.cc
@@ -24,8 +24,6 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/kernels/cuda_solvers.h"
-#include "tensorflow/core/kernels/cuda_sparse.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
@@ -35,6 +33,8 @@ limitations under the License.
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_solvers.h"
+#include "tensorflow/core/util/cuda_sparse.h"
 
 namespace tensorflow {
 namespace {