diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 86c9d1fc665..161a0a95856 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1010,7 +1010,7 @@ cc_library( "//tensorflow/core/kernels:histogram_op", "//tensorflow/core/kernels:image", "//tensorflow/core/kernels:io", - "//tensorflow/core/kernels:linalg", + "//tensorflow/core/kernels/linalg:linalg", "//tensorflow/core/kernels:lookup", "//tensorflow/core/kernels:logging", "//tensorflow/core/kernels:manip", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 34a3ee800d8..12d4f1c5574 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1039,9 +1039,6 @@ cc_library( ":immutable_constant_op", ":inplace_ops", ":listdiff_op", - ":matrix_band_part_op", - ":matrix_diag_op", - ":matrix_set_diag_op", ":mirror_pad_op", ":one_hot_op", ":pack_op", @@ -1174,26 +1171,6 @@ tf_kernel_library( deps = ARRAY_DEPS, ) -tf_kernel_library( - name = "matrix_band_part_op", - prefix = "matrix_band_part_op", - deps = if_cuda([ - ":cuda_solvers", - ]) + ARRAY_DEPS, -) - -tf_kernel_library( - name = "matrix_diag_op", - prefix = "matrix_diag_op", - deps = ARRAY_DEPS, -) - -tf_kernel_library( - name = "matrix_set_diag_op", - prefix = "matrix_set_diag_op", - deps = ARRAY_DEPS + [":matrix_diag_op"], -) - tf_kernel_library( name = "mirror_pad_op", prefix = "mirror_pad_op", @@ -1405,7 +1382,7 @@ tf_kernel_library( "where_op_gpu_impl_8.cu.cc", ], deps = if_cuda_or_rocm([ - ":cuda_solvers", + "//tensorflow/core/util:cuda_solvers", ]) + [":gpu_prim_hdrs"] + ARRAY_DEPS, ) @@ -2785,21 +2762,6 @@ tf_cuda_cc_tests( ], ) -tf_kernel_library( - name = "eye_functor", - hdrs = ["eye_functor.h"], - gpu_srcs = [ - "eye_functor_gpu.cu.cc", - "eye_functor.h", - ], - visibility = [":friends"], - deps = [ - "//tensorflow/core:framework", - "//third_party/eigen3", - ], - alwayslink = 0, -) - cc_library( name = "fifo_queue", srcs = ["fifo_queue.cc"], @@ -3558,289 +3520,6 @@ tf_cc_tests( ], ) -cc_library( - name = "linalg", - deps = [ - ":banded_triangular_solve_op", - ":cholesky_grad", - ":cholesky_op", - ":determinant_op", - ":eig_op", - ":einsum_op", - ":lu_op", - ":matrix_exponential_op", - ":matrix_inverse_op", - ":matrix_logarithm_op", - ":matrix_solve_ls_op", - ":matrix_solve_op", - ":matrix_square_root_op", - ":matrix_triangular_solve_op", - ":qr_op", - ":self_adjoint_eig_op", - ":self_adjoint_eig_v2_op", - ":svd_op", - ":tridiagonal_matmul_op", - ":tridiagonal_solve_op", - ], -) - -tf_kernel_library( - name = "cuda_solvers", - srcs = ["cuda_solvers.cc"], - hdrs = ["cuda_solvers.h"], - # @local_config_cuda//cuda:cusolver_static, //third_party/eigen3:blas, - # and //third_party/libf2c all contain various parts of BLAS, LAPACK, - # and f2c helper functions in global namespace. Tell the compiler to - # allow multiple definitions when linking this. - linkopts = select({ - "//tensorflow:macos": [], - "//tensorflow:windows": [], - "//conditions:default": ["-Wl,-z,muldefs"], - }), - visibility = [":friends"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/platform/default/build_config:cublas_plugin", - "//tensorflow/stream_executor/cuda:cublas_lib", - "//tensorflow/stream_executor/cuda:cusolver_lib", - ], -) - -tf_kernel_library( - name = "rocm_solvers", - srcs = ["rocm_solvers.cc"], - hdrs = ["rocm_solvers.h"], - visibility = [":friends"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/stream_executor/lib", - "//tensorflow/stream_executor/platform:dso_loader", - "//tensorflow/stream_executor/rocm:rocblas_plugin", - "//tensorflow/stream_executor/rocm:rocm_gpu_executor", - ] + if_rocm([ - "@local_config_rocm//rocm:rocprim", - ]), -) - -tf_kernel_library( - name = "cuda_sparse", - srcs = if_cuda(["cuda_sparse.cc"]) + if_rocm(["rocm_sparse.cc"]), - hdrs = ["cuda_sparse.h"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/kernels:cuda_solvers", - ] + if_cuda([ - "//tensorflow/stream_executor/cuda:cusparse_lib", - "@cub_archive//:cub", - ]) + if_rocm([ - "@local_config_rocm//rocm:hipsparse", - ]), -) - -LINALG_DEPS = [ - ":linalg_ops_common", - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", -] + if_cuda([ - ":cuda_solvers", - ":transpose_functor", -]) + if_rocm([ - ":rocm_solvers", -]) - -tf_kernel_library( - name = "cholesky_op", - prefix = "cholesky_op", - deps = if_cuda([ - ":matrix_band_part_op", - ]) + LINALG_DEPS, -) - -tf_kernel_library( - name = "cholesky_grad", - prefix = "cholesky_grad", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "determinant_op", - prefix = "determinant_op", - deps = if_cuda([ - ":fill_functor", - ]) + LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_exponential_op", - prefix = "matrix_exponential_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_logarithm_op", - prefix = "matrix_logarithm_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "self_adjoint_eig_op", - prefix = "self_adjoint_eig_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"], -) - -tf_kernel_library( - name = "self_adjoint_eig_v2_op", - prefix = "self_adjoint_eig_v2_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ - ":cast_op", - ":cwise_op", - ]), -) - -tf_kernel_library( - name = "eig_op", - prefix = "eig_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ - ":cast_op", - ":cwise_op", - ]), -) - -tf_kernel_library( - name = "matrix_inverse_op", - prefix = "matrix_inverse_op", - deps = LINALG_DEPS + if_cuda([":eye_functor"]), -) - -tf_kernel_library( - name = "matrix_solve_ls_op", - prefix = "matrix_solve_ls_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_solve_op", - prefix = "matrix_solve_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_square_root_op", - prefix = "matrix_square_root_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "banded_triangular_solve_op", - prefix = "banded_triangular_solve_op", - deps = LINALG_DEPS + [":fill_functor"], -) - -tf_kernel_library( - name = "matrix_triangular_solve_op", - hdrs = ["matrix_triangular_solve_op_impl.h"], - prefix = "matrix_triangular_solve_op", - deps = [ - ":linalg_ops_common", - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - ":fill_functor", - "//tensorflow/core:stream_executor", - ] + if_cuda([ - "//tensorflow/core/platform/default/build_config:cublas_plugin", - ":cuda_solvers", - ]) + if_rocm([ - "@local_config_rocm//rocm:rocprim", - ":rocm_solvers", - ]) + if_cuda_or_rocm([ - ":transpose_functor", - ]), -) - -tf_kernel_library( - name = "tridiagonal_matmul_op", - srcs = ["tridiagonal_matmul_op.cc"], - gpu_srcs = ["tridiagonal_matmul_op_gpu.cu.cc"], - deps = LINALG_DEPS + if_cuda([ - ":cuda_sparse", - ]), -) - -tf_kernel_library( - name = "tridiagonal_solve_op", - srcs = ["tridiagonal_solve_op.cc"], - gpu_srcs = ["tridiagonal_solve_op_gpu.cu.cc"], - deps = LINALG_DEPS + if_cuda([ - ":cuda_sparse", - ]), -) - -tf_kernel_library( - name = "qr_op", - prefix = "qr_op", - deps = LINALG_DEPS + if_cuda([ - ":cwise_op", - ":eye_functor", - ":matrix_band_part_op", - ]), -) - -tf_kernel_library( - name = "svd_op", - prefix = "svd_op", - deps = LINALG_DEPS + if_cuda([ - ":eye_functor", - ]), -) - -tf_kernel_library( - name = "lu_op", - prefix = "lu_op", - deps = if_cuda([ - ":cuda_solvers", - ":transpose_functor", - ]) + [ - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - ], -) - -tf_kernel_library( - name = "einsum_op", - prefix = "einsum_op", - deps = [ - ":batch_matmul_op", - ":fill_functor", - ":reduction_ops", - ":transpose_functor", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/profiler/lib:traceme", - "//third_party/eigen3", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/strings", - ], -) - -cc_library( - name = "linalg_ops_common", - srcs = ["linalg_ops_common.cc"], - hdrs = ["linalg_ops_common.h"], - visibility = ["//visibility:private"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//third_party/eigen3", - ], -) - cc_library( name = "logging", deps = [ @@ -4208,7 +3887,7 @@ tf_kernel_library( name = "segment_reduction_ops", prefix = "segment_reduction_ops", deps = MATH_DEPS + if_cuda_or_rocm([ - ":cuda_solvers", + "//tensorflow/core/util:cuda_solvers", ]), ) @@ -4405,45 +4084,6 @@ tf_cuda_cc_test( ], ) -tf_cuda_cc_test( - name = "banded_triangular_solve_op_test", - size = "small", - srcs = ["banded_triangular_solve_op_test.cc"], - deps = [ - ":banded_triangular_solve_op", - ":matrix_set_diag_op", - ":matrix_triangular_solve_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cuda_cc_test( - name = "matrix_triangular_solve_op_test", - size = "small", - srcs = ["matrix_triangular_solve_op_test.cc"], - deps = [ - ":broadcast_to_op", - ":matrix_triangular_solve_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - tf_cuda_cc_test( name = "scan_ops_test", size = "small", @@ -6672,10 +6312,7 @@ filegroup( "lookup_table_init_op.h", "lookup_table_op.h", "lookup_util.h", - "linalg_ops_common.h", "list_kernels.h", - "matrix_diag_op.h", - "matrix_set_diag_op.h", "maxpooling_op.h", "mfcc.h", "mfcc_dct.h", @@ -6723,6 +6360,9 @@ filegroup( "xent_op.h", ] + [ "//tensorflow/core/kernels/boosted_trees/quantiles:weighted_quantiles_hdrs", + "//tensorflow/core/kernels/linalg:linalg_ops_common.h", + "//tensorflow/core/kernels/linalg:matrix_diag_op.h", + "//tensorflow/core/kernels/linalg:matrix_set_diag_op.h", ], ) @@ -6823,16 +6463,6 @@ filegroup( "encode_wav_op.cc", "eigen_contraction_kernel.cc", "eigen_contraction_kernel.h", - "einsum_op_impl_half.cc", - "einsum_op_impl_bfloat16.cc", - "einsum_op_impl_int32.cc", - "einsum_op_impl_int64.cc", - "einsum_op_impl_float.cc", - "einsum_op_impl_double.cc", - "einsum_op_impl_complex64.cc", - "einsum_op_impl_complex128.cc", - "einsum_op_impl.h", - "einsum_op.h", "fake_quant_ops.cc", "fifo_queue.cc", "fifo_queue_op.cc", @@ -6844,6 +6474,17 @@ filegroup( "population_count_op.h", "winograd_transform.h", ":android_extended_ops_headers", + ] + [ + "//tensorflow/core/kernels/linalg:einsum_op_impl_half.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_bfloat16.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_int32.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_int64.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_float.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_double.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_complex64.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_complex128.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl.h", + "//tensorflow/core/kernels/linalg:einsum_op.h", ] + select({ ":xsmm_convolutions": [ "xsmm_conv2d.h", @@ -6874,7 +6515,6 @@ filegroup( "in_topk_op.cc", "in_topk_op.h", "initializable_lookup_table.cc", - "linalg_ops_common.cc", "list_kernels.cc", "logging_ops.cc", "logging_ops.h", @@ -6882,9 +6522,6 @@ filegroup( "lookup_table_op.cc", "lookup_util.cc", "lrn_op.cc", - "matrix_diag_op.cc", - "matrix_inverse_op.cc", - "matrix_set_diag_op.cc", "maxpooling_op.cc", "mfcc.cc", "mfcc_dct.cc", @@ -7006,6 +6643,10 @@ filegroup( ":android_extended_ops_headers", ] + [ "//tensorflow/core/kernels/boosted_trees:quantile_ops.cc", + "//tensorflow/core/kernels/linalg:linalg_ops_common.cc", + "//tensorflow/core/kernels/linalg:matrix_diag_op.cc", + "//tensorflow/core/kernels/linalg:matrix_inverse_op.cc", + "//tensorflow/core/kernels/linalg:matrix_set_diag_op.cc", ], ) @@ -7059,6 +6700,7 @@ filegroup( srcs = [ "//tensorflow/c/kernels:android_all_op_kernels", "//tensorflow/core/kernels/data:android_all_op_kernels", + "//tensorflow/core/kernels/linalg:android_all_op_kernels", ] + glob( [ "*.cc", @@ -8827,3 +8469,15 @@ tf_kernel_library( "@sobol_data", ], ) + +# ---- temporary forwarding declaration for libraries in linalg +# TODO(b/160344057): Remove after updating dependencies. +tf_kernel_library( + name = "matrix_inverse_op", + deps = ["//tensorflow/core/kernels/linalg:matrix_inverse_op"], +) + +tf_kernel_library( + name = "einsum_op", + deps = ["//tensorflow/core/kernels/linalg:einsum_op"], +) diff --git a/tensorflow/core/kernels/linalg/BUILD b/tensorflow/core/kernels/linalg/BUILD new file mode 100644 index 00000000000..c735f58ae51 --- /dev/null +++ b/tensorflow/core/kernels/linalg/BUILD @@ -0,0 +1,376 @@ +load( + "//tensorflow:tensorflow.bzl", + "if_cuda_or_rocm", + "tf_kernel_library", +) +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load( + "@local_config_rocm//rocm:build_defs.bzl", + "if_rocm", +) +load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") + +# Description: +# Op kernel implementations for TensorFlow. +# +# Note: Any test that uses GPU support and which we would like to +# benchmark should be linked statically so that it can be executed +# from a py_binary or cuda_py_test test logger. For such a test, +# append "_gpu" to the test name to invoke the GPU benchmarks. Example: +# +# # for CPU tests +# $ bazel test --config opt //third_party/tensorflow/core/kernels:my_op_test +# # for GPU benchmarks +# $ bazel run --config opt --config=cuda //third_party/tensorflow/core/kernels:my_op_test_gpu -- --benchmarks=.. +# +package( + default_visibility = [ + "//tensorflow:__subpackages__", + "//tensorflow:internal", + ], + licenses = ["notice"], # Apache 2.0 +) + +# TODO(rmlarsen): Remove ASAP. +package_group( + name = "friends", + packages = ["//tensorflow/..."], +) + +# Export a few files for use on Android. +exports_files([ + "einsum_op_impl_half.cc", + "einsum_op_impl_bfloat16.cc", + "einsum_op_impl_int32.cc", + "einsum_op_impl_int64.cc", + "einsum_op_impl_float.cc", + "einsum_op_impl_double.cc", + "einsum_op_impl_complex64.cc", + "einsum_op_impl_complex128.cc", + "einsum_op_impl.h", + "einsum_op.h", + "linalg_ops_common.h", + "linalg_ops_common.cc", + "matrix_diag_op.h", + "matrix_diag_op.cc", + "matrix_inverse_op.cc", + "matrix_set_diag_op.h", + "matrix_set_diag_op.cc", +]) + +# Public support libraries ---------------------------------------------------- + +cc_library( + name = "linalg", + deps = [ + ":banded_triangular_solve_op", + ":cholesky_grad", + ":cholesky_op", + ":determinant_op", + ":eig_op", + ":einsum_op", + ":lu_op", + ":matrix_band_part_op", + ":matrix_diag_op", + ":matrix_exponential_op", + ":matrix_inverse_op", + ":matrix_logarithm_op", + ":matrix_set_diag_op", + ":matrix_solve_ls_op", + ":matrix_solve_op", + ":matrix_square_root_op", + ":matrix_triangular_solve_op", + ":qr_op", + ":self_adjoint_eig_op", + ":self_adjoint_eig_v2_op", + ":svd_op", + ":tridiagonal_matmul_op", + ":tridiagonal_solve_op", + ], +) + +LINALG_DEPS = [ + ":linalg_ops_common", + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:cast_op", + "//tensorflow/core/kernels:fill_functor", +] + if_cuda([ + ":eye_functor", + "//tensorflow/core/util:cuda_solvers", + "//tensorflow/core/kernels:transpose_functor", +]) + if_rocm([ + "//tensorflow/core/util:rocm_solvers", +]) + +tf_kernel_library( + name = "matrix_band_part_op", + prefix = "matrix_band_part_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_diag_op", + prefix = "matrix_diag_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_set_diag_op", + prefix = "matrix_set_diag_op", + deps = LINALG_DEPS + [":matrix_diag_op"], +) + +tf_kernel_library( + name = "cholesky_op", + prefix = "cholesky_op", + deps = if_cuda([ + ":matrix_band_part_op", + ]) + LINALG_DEPS, +) + +tf_kernel_library( + name = "cholesky_grad", + prefix = "cholesky_grad", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "determinant_op", + prefix = "determinant_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_exponential_op", + prefix = "matrix_exponential_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_logarithm_op", + prefix = "matrix_logarithm_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "self_adjoint_eig_op", + prefix = "self_adjoint_eig_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"], +) + +tf_kernel_library( + name = "self_adjoint_eig_v2_op", + prefix = "self_adjoint_eig_v2_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ + "//tensorflow/core/kernels:cwise_op", + ]), +) + +tf_kernel_library( + name = "eig_op", + prefix = "eig_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ + "//tensorflow/core/kernels:cwise_op", + ]), +) + +tf_kernel_library( + name = "matrix_inverse_op", + prefix = "matrix_inverse_op", + visibility = [":friends"], + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_solve_ls_op", + prefix = "matrix_solve_ls_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_solve_op", + prefix = "matrix_solve_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_square_root_op", + prefix = "matrix_square_root_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "banded_triangular_solve_op", + prefix = "banded_triangular_solve_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_triangular_solve_op", + hdrs = ["matrix_triangular_solve_op_impl.h"], + prefix = "matrix_triangular_solve_op", + deps = [ + ":linalg_ops_common", + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:fill_functor", + "//tensorflow/core:stream_executor", + ] + if_cuda([ + "//tensorflow/core/platform/default/build_config:cublas_plugin", + "//tensorflow/core/util:cuda_solvers", + ]) + if_rocm([ + "@local_config_rocm//rocm:rocprim", + "//tensorflow/core/util:rocm_solvers", + ]) + if_cuda_or_rocm([ + "//tensorflow/core/kernels:transpose_functor", + ]), +) + +tf_kernel_library( + name = "tridiagonal_matmul_op", + srcs = ["tridiagonal_matmul_op.cc"], + gpu_srcs = ["tridiagonal_matmul_op_gpu.cu.cc"], + deps = LINALG_DEPS + if_cuda([ + "//tensorflow/core/util:cuda_sparse", + ]), +) + +tf_kernel_library( + name = "tridiagonal_solve_op", + srcs = ["tridiagonal_solve_op.cc"], + gpu_srcs = ["tridiagonal_solve_op_gpu.cu.cc"], + deps = LINALG_DEPS + if_cuda([ + "//tensorflow/core/util:cuda_sparse", + ]), +) + +tf_kernel_library( + name = "qr_op", + prefix = "qr_op", + deps = LINALG_DEPS + if_cuda([ + "//tensorflow/core/kernels:cwise_op", + ":matrix_band_part_op", + ]), +) + +tf_kernel_library( + name = "svd_op", + prefix = "svd_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "lu_op", + prefix = "lu_op", + deps = if_cuda([ + "//tensorflow/core/util:cuda_solvers", + "//tensorflow/core/kernels:transpose_functor", + ]) + [ + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +tf_kernel_library( + name = "einsum_op", + prefix = "einsum_op", + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:batch_matmul_op", + "//tensorflow/core/kernels:fill_functor", + "//tensorflow/core/kernels:reduction_ops", + "//tensorflow/core/kernels:transpose_functor", + "//tensorflow/core/profiler/lib:traceme", + "//third_party/eigen3", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "linalg_ops_common", + srcs = ["linalg_ops_common.cc"], + hdrs = ["linalg_ops_common.h"], + visibility = ["//visibility:private"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//third_party/eigen3", + ], +) + +tf_cuda_cc_test( + name = "banded_triangular_solve_op_test", + size = "small", + srcs = ["banded_triangular_solve_op_test.cc"], + deps = [ + ":banded_triangular_solve_op", + ":matrix_set_diag_op", + ":matrix_triangular_solve_op", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_kernel_library( + name = "eye_functor", + hdrs = ["eye_functor.h"], + gpu_srcs = [ + "eye_functor_gpu.cu.cc", + "eye_functor.h", + ], + visibility = ["//tensorflow/core/kernels:friends"], + deps = [ + "//tensorflow/core:framework", + "//third_party/eigen3", + ], + alwayslink = 0, +) + +tf_cuda_cc_test( + name = "matrix_triangular_solve_op_test", + size = "small", + srcs = ["matrix_triangular_solve_op_test.cc"], + deps = [ + ":matrix_triangular_solve_op", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:broadcast_to_op", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +# A file group which contains all operators which are known to work on mobile. +filegroup( + name = "android_all_op_kernels", + srcs = glob( + [ + "*.cc", + "*.h", + ], + exclude = [ + "*test.cc", + "*test.h", + "*_test_*", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/core/kernels/banded_triangular_solve_op.cc b/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/banded_triangular_solve_op.cc rename to tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc index d01a015502a..6758dcf5b8b 100644 --- a/tensorflow/core/kernels/banded_triangular_solve_op.cc +++ b/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/banded_triangular_solve_op_test.cc b/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc similarity index 99% rename from tensorflow/core/kernels/banded_triangular_solve_op_test.cc rename to tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc index 37e904a3e0e..7c20b88845f 100644 --- a/tensorflow/core/kernels/banded_triangular_solve_op_test.cc +++ b/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/testlib.h" -#include "tensorflow/core/kernels/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" diff --git a/tensorflow/core/kernels/cholesky_grad.cc b/tensorflow/core/kernels/linalg/cholesky_grad.cc similarity index 99% rename from tensorflow/core/kernels/cholesky_grad.cc rename to tensorflow/core/kernels/linalg/cholesky_grad.cc index eac66e580dd..31a5570cddf 100644 --- a/tensorflow/core/kernels/cholesky_grad.cc +++ b/tensorflow/core/kernels/linalg/cholesky_grad.cc @@ -18,7 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/cholesky_op.cc b/tensorflow/core/kernels/linalg/cholesky_op.cc similarity index 98% rename from tensorflow/core/kernels/cholesky_op.cc rename to tensorflow/core/kernels/linalg/cholesky_op.cc index ff8fd08f228..eae09124b36 100644 --- a/tensorflow/core/kernels/cholesky_op.cc +++ b/tensorflow/core/kernels/linalg/cholesky_op.cc @@ -25,16 +25,16 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include "tensorflow/core/platform/stream_executor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/determinant_op.cc b/tensorflow/core/kernels/linalg/determinant_op.cc similarity index 99% rename from tensorflow/core/kernels/determinant_op.cc rename to tensorflow/core/kernels/linalg/determinant_op.cc index b06f42384eb..8f0b0b618cf 100644 --- a/tensorflow/core/kernels/determinant_op.cc +++ b/tensorflow/core/kernels/linalg/determinant_op.cc @@ -20,7 +20,7 @@ limitations under the License. #if GOOGLE_CUDA #define EIGEN_USE_GPU #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/determinant_op.h" +#include "tensorflow/core/kernels/linalg/determinant_op.h" #endif #include "third_party/eigen3/Eigen/LU" @@ -28,14 +28,14 @@ limitations under the License. #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #if GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/fill_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/determinant_op.h b/tensorflow/core/kernels/linalg/determinant_op.h similarity index 90% rename from tensorflow/core/kernels/determinant_op.h rename to tensorflow/core/kernels/linalg/determinant_op.h index eefdfe0ae40..6ace1bef44b 100644 --- a/tensorflow/core/kernels/determinant_op.h +++ b/tensorflow/core/kernels/linalg/determinant_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ -#define TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ #include "tensorflow/core/framework/tensor_types.h" @@ -44,4 +44,4 @@ struct LogDeterminantFromPivotedLUFunctor { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ diff --git a/tensorflow/core/kernels/determinant_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc similarity index 98% rename from tensorflow/core/kernels/determinant_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc index 9aa64b3a7da..f6ab327bce0 100644 --- a/tensorflow/core/kernels/determinant_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc @@ -21,8 +21,8 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/determinant_op.h" +#include "tensorflow/core/kernels/linalg/determinant_op.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_complex128.cc b/tensorflow/core/kernels/linalg/eig_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_complex128.cc rename to tensorflow/core/kernels/linalg/eig_op_complex128.cc index 988cc2f98d9..bd4b6fe36d0 100644 --- a/tensorflow/core/kernels/eig_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/eig_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_complex64.cc b/tensorflow/core/kernels/linalg/eig_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_complex64.cc rename to tensorflow/core/kernels/linalg/eig_op_complex64.cc index 6a3f7928715..b5b4a26ee85 100644 --- a/tensorflow/core/kernels/eig_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/eig_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_double.cc b/tensorflow/core/kernels/linalg/eig_op_double.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_double.cc rename to tensorflow/core/kernels/linalg/eig_op_double.cc index 2cd931cc135..c360637c84a 100644 --- a/tensorflow/core/kernels/eig_op_double.cc +++ b/tensorflow/core/kernels/linalg/eig_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_float.cc b/tensorflow/core/kernels/linalg/eig_op_float.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_float.cc rename to tensorflow/core/kernels/linalg/eig_op_float.cc index a06f76e935f..18f576fcc19 100644 --- a/tensorflow/core/kernels/eig_op_float.cc +++ b/tensorflow/core/kernels/linalg/eig_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_impl.h b/tensorflow/core/kernels/linalg/eig_op_impl.h similarity index 93% rename from tensorflow/core/kernels/eig_op_impl.h rename to tensorflow/core/kernels/linalg/eig_op_impl.h index 4ebb6bde08b..a7aff7c2a5d 100644 --- a/tensorflow/core/kernels/eig_op_impl.h +++ b/tensorflow/core/kernels/linalg/eig_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" @@ -95,4 +95,4 @@ class EigOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/einsum_op.h b/tensorflow/core/kernels/linalg/einsum_op.h similarity index 94% rename from tensorflow/core/kernels/einsum_op.h rename to tensorflow/core/kernels/linalg/einsum_op.h index 31d1109004c..f22f33c600a 100644 --- a/tensorflow/core/kernels/einsum_op.h +++ b/tensorflow/core/kernels/linalg/einsum_op.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EINSUM_OP_H_ -#define TENSORFLOW_CORE_KERNELS_EINSUM_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" diff --git a/tensorflow/core/kernels/einsum_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc similarity index 96% rename from tensorflow/core/kernels/einsum_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc index 2935b7fd02a..5461e43e0ab 100644 --- a/tensorflow/core/kernels/einsum_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc @@ -17,7 +17,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/einsum_op.h" +#include "tensorflow/core/kernels/linalg/einsum_op.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl.h b/tensorflow/core/kernels/linalg/einsum_op_impl.h similarity index 99% rename from tensorflow/core/kernels/einsum_op_impl.h rename to tensorflow/core/kernels/linalg/einsum_op_impl.h index 312738442b8..b9b2d1f0eae 100644 --- a/tensorflow/core/kernels/einsum_op_impl.h +++ b/tensorflow/core/kernels/linalg/einsum_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -31,8 +31,8 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/batch_matmul_op_impl.h" -#include "tensorflow/core/kernels/einsum_op.h" #include "tensorflow/core/kernels/fill_functor.h" +#include "tensorflow/core/kernels/linalg/einsum_op.h" #include "tensorflow/core/kernels/reduction_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" @@ -780,4 +780,4 @@ DECLARE_GPU_SPECS(complex128); } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/einsum_op_impl_bfloat16.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc similarity index 94% rename from tensorflow/core/kernels/einsum_op_impl_bfloat16.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc index 44508f86a5e..e2e13052df5 100644 --- a/tensorflow/core/kernels/einsum_op_impl_bfloat16.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_complex128.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_complex128.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc index 8473cbf545d..ff78d460acf 100644 --- a/tensorflow/core/kernels/einsum_op_impl_complex128.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_complex64.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_complex64.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc index bd506a04f5f..cd3788846b2 100644 --- a/tensorflow/core/kernels/einsum_op_impl_complex64.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_double.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_double.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_double.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_double.cc index f994590779b..e0c093fa4a9 100644 --- a/tensorflow/core/kernels/einsum_op_impl_double.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_float.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_float.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_float.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_float.cc index 1875310b687..ad9135c991c 100644 --- a/tensorflow/core/kernels/einsum_op_impl_float.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_half.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_half.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_half.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_half.cc index 0486b133e62..72a9f6bec4f 100644 --- a/tensorflow/core/kernels/einsum_op_impl_half.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_half.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_int32.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc similarity index 94% rename from tensorflow/core/kernels/einsum_op_impl_int32.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc index db5169498d9..7569c979c59 100644 --- a/tensorflow/core/kernels/einsum_op_impl_int32.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_int64.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc similarity index 94% rename from tensorflow/core/kernels/einsum_op_impl_int64.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc index 7f1a1eac411..6ee0ebc9637 100644 --- a/tensorflow/core/kernels/einsum_op_impl_int64.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eye_functor.h b/tensorflow/core/kernels/linalg/eye_functor.h similarity index 90% rename from tensorflow/core/kernels/eye_functor.h rename to tensorflow/core/kernels/linalg/eye_functor.h index 3799cfba9ae..c77372f089a 100644 --- a/tensorflow/core/kernels/eye_functor.h +++ b/tensorflow/core/kernels/linalg/eye_functor.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EYE_FUNCTOR_H_ -#define TENSORFLOW_CORE_KERNELS_EYE_FUNCTOR_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EYE_FUNCTOR_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EYE_FUNCTOR_H_ #include "tensorflow/core/framework/tensor_types.h" diff --git a/tensorflow/core/kernels/eye_functor_gpu.cu.cc b/tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc similarity index 97% rename from tensorflow/core/kernels/eye_functor_gpu.cu.cc rename to tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc index 90df538dd2c..85865588f2c 100644 --- a/tensorflow/core/kernels/eye_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/type_traits.h" -#include "tensorflow/core/kernels/eye_functor.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg_ops_common.cc b/tensorflow/core/kernels/linalg/linalg_ops_common.cc similarity index 99% rename from tensorflow/core/kernels/linalg_ops_common.cc rename to tensorflow/core/kernels/linalg/linalg_ops_common.cc index 56a941fbd1f..c8d33e435c7 100644 --- a/tensorflow/core/kernels/linalg_ops_common.cc +++ b/tensorflow/core/kernels/linalg/linalg_ops_common.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include diff --git a/tensorflow/core/kernels/linalg/linalg_ops_common.h b/tensorflow/core/kernels/linalg/linalg_ops_common.h new file mode 100644 index 00000000000..3ab37480c90 --- /dev/null +++ b/tensorflow/core/kernels/linalg/linalg_ops_common.h @@ -0,0 +1,221 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ + +// Classes to support linear algebra functionality, similar to the numpy.linalg +// module. Supports batch computation on several matrices at once, sharding the +// computations across different threads if necessary. +#include + +#include "third_party/eigen3/Eigen/Core" +#include "tensorflow/core/framework/kernel_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/work_sharder.h" + +namespace tensorflow { + +// Base class for linear algebra operators. +template +class LinearAlgebraOp : public OpKernel { + public: + explicit LinearAlgebraOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override; + + protected: + using TensorShapes = gtl::InlinedVector; + // Returns the number of leading inputs that are to be treated as matrix + // inputs. By default this is all the inputs. Derived classes can override + // this to tell the base class to ignore one or more trailing inputs. + virtual int NumMatrixInputs(const OpKernelContext* context) const { + return context->num_inputs(); + } + + // Returns true if the number of inputs and their shapes are as expected. + // Many ops take a single square input matrix, so we provide that as a default + // implementation for convenience. + virtual void ValidateInputMatrixShapes( + OpKernelContext* context, const TensorShapes& input_matrix_shapes) const { + ValidateSingleSquareMatrix(context, input_matrix_shapes); + } + + // Convenience validators for common cases: + // + // Validate op taking a single matrix A. + static void ValidateSingleMatrix(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + // Validate op taking a single square matrix A. + static void ValidateSingleSquareMatrix( + OpKernelContext* context, const TensorShapes& input_matrix_shapes); + // Validate op taking two matrices A and B that have the same number of rows. + static void ValidateSolver(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + // Validate op taking two matrices A and B that have the same number of rows + // and A is square. + static void ValidateSquareSolver(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + + // Returns the output shapes of each individual matrix operation. Output + // matrices shapes must be rank 0, 1, or 2. Scalar outputs are rank 0. + // + // The derived class may return a number of shapes (N) less than + // context->num_outputs() (M) to indicate that a only leading subset of + // the outputs will be populated. In this case, a dummy scalar tensor with + // value zero will be return for the last M-N outputs. + // + // For many ops, the output dimensions are the same as the input dimensions, + // so we provide that as a default implementation for convenience. + virtual TensorShapes GetOutputMatrixShapes( + const TensorShapes& input_matrix_shapes) const { + return input_matrix_shapes; + } + + // Returns the cost per matrix operation. This is used to determine the + // number of threads to use for parallelizing calls to ComputeMatrix in + // batch mode. Cost per unit is assumed to be roughly 1ns, based on comments + // in core/util/work_sharder.cc. Many linear algebra ops take roughly max(m,n) + // * min(m,n)^2, where the first input matrix is m-by-n. We provide that as a + // default implementation for convenience. + virtual int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const { + double m = static_cast(input_matrix_shapes[0].dim_size(0)); + double n = static_cast(input_matrix_shapes[0].dim_size(1)); + double cost = std::max(m, n) * std::min(m, n) * std::min(m, n); + return cost >= static_cast(kint64max) ? kint64max + : static_cast(cost); + } + + // Returns true if it is safe to forward (alias) input to output buffer + // and expect the kernel to perform the computation inplace. + virtual bool EnableInputForwarding() const { return true; } + + using InputMatrix = Eigen::Matrix; + using InputConstMatrixMap = Eigen::Map; + using InputMatrixMap = Eigen::Map; + using InputConstVectorMap = + Eigen::Map>; + using InputConstMatrixMaps = gtl::InlinedVector; + using InputMatrixMaps = gtl::InlinedVector; + using InputRealScalar = typename Eigen::NumTraits::Real; + + using OutputMatrix = Eigen::Matrix; + using OutputConstMatrixMap = Eigen::Map; + using OutputMatrixMap = Eigen::Map; + using OutputConstVectorMap = + Eigen::Map>; + using OutputConstMatrixMaps = gtl::InlinedVector; + using OutputMatrixMaps = gtl::InlinedVector; + using OutputRealScalar = typename Eigen::NumTraits::Real; + + // backward compatibility + using Scalar = OutputScalar; + using Matrix = + Eigen::Matrix; + using ConstMatrixMap = Eigen::Map; + using MatrixMap = Eigen::Map; + using ConstVectorMap = + Eigen::Map>; + using ConstMatrixMaps = gtl::InlinedVector; + using MatrixMaps = gtl::InlinedVector; + using RealScalar = typename Eigen::NumTraits::Real; + + // Performs a single matrix computation given input matrices, and + // stores the result in outputs. For batch operations, this will be called + // repeatedly for a single call to Compute() when multiple matrices exist in + // input Tensors with rank > 2. In this case the calls to ComputeMatrix are + // parallelized. The number of threads used is determined by a cost model from + // the value returned by GetCostPerUnit(). + virtual void ComputeMatrix(OpKernelContext* context, + const InputConstMatrixMaps& inputs, + OutputMatrixMaps* outputs) = 0; + + private: + using TensorInputs = gtl::InlinedVector; + using TensorOutputs = gtl::InlinedVector; + // This function maps 2-d slices (matrices) of the input and output tensors + // using Eigen::Map and calls ComputeMatrix implemented in terms of the + // Eigen::MatrixBase API by the derived class. + // + // The 'matrix_index' parameter specifies the index of the matrix to be used + // from each input tensor, and the index of the matrix to be written to each + // output tensor. The input matrices are in row major order, and located at + // the memory addresses + // inputs[i].flat().data() + + // matrix_index * input_matrix_shapes[i].num_elements() + // for i in 0...inputs.size()-1. + // The output matrices are in row major order, and located at the memory + // address + // outputs[i]->flat().data() + + // matrix_index * output_matrix_shapes[i].num_elements(). + // for i in 0...outputs.size()-1. + // + void ComputeTensorSlice(OpKernelContext* context, int64 matrix_index, + const TensorInputs& inputs, + const TensorShapes& input_matrix_shapes, + const TensorOutputs& outputs, + const TensorShapes& output_matrix_shapes); + + void AnalyzeInputs(OpKernelContext* context, TensorInputs* inputs, + TensorShapes* input_matrix_shapes, + TensorShape* batch_shape); + + void PrepareOutputs(OpKernelContext* context, + const TensorShapes& input_matrix_shapes, + const TensorShape& batch_shape, TensorOutputs* outputs, + TensorShapes* output_matrix_shapes); +}; + +// Declare LinearAlgebraOp, which is explicitly instantiated in +// linalg_ops_common.cc for float, double, complex64, and complex128. +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; + +} // namespace tensorflow + +#define INHERIT_LINALG_TYPEDEFS(Scalar) \ + typedef LinearAlgebraOp Base; \ + using RealScalar = typename Eigen::NumTraits::Real; \ + using Matrix = typename Base::Matrix; \ + using MatrixMap = typename Base::MatrixMap; \ + using MatrixMaps = typename Base::MatrixMaps; \ + using ConstMatrixMap = typename Base::ConstMatrixMap; \ + using ConstMatrixMaps = typename Base::ConstMatrixMaps; \ + using ConstVectorMap = typename Base::ConstVectorMap; \ + using TensorShapes = typename Base::TensorShapes; + +#define REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) \ + REGISTER_KERNEL_BUILDER( \ + Name(OpName).Device(DEVICE_CPU).TypeConstraint("T"), OpClass) + +#define REGISTER_LINALG_OP_GPU(OpName, OpClass, Scalar) \ + REGISTER_KERNEL_BUILDER( \ + Name(OpName).Device(DEVICE_GPU).TypeConstraint("T"), OpClass) + +// Deprecated, use one of the device-specific macros above. +#define REGISTER_LINALG_OP(OpName, OpClass, Scalar) \ + REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) + +#endif // TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ diff --git a/tensorflow/core/kernels/lu_op.cc b/tensorflow/core/kernels/linalg/lu_op.cc similarity index 100% rename from tensorflow/core/kernels/lu_op.cc rename to tensorflow/core/kernels/linalg/lu_op.cc diff --git a/tensorflow/core/kernels/lu_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/lu_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc index 47b37ed7f7a..9d23a35057d 100644 --- a/tensorflow/core/kernels/lu_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc @@ -25,9 +25,9 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_band_part_op.cc b/tensorflow/core/kernels/linalg/matrix_band_part_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_band_part_op.cc rename to tensorflow/core/kernels/linalg/matrix_band_part_op.cc index 4dcce5a8f58..23619bacc33 100644 --- a/tensorflow/core/kernels/matrix_band_part_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_band_part_op.cc @@ -21,11 +21,12 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include #include #include + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" diff --git a/tensorflow/core/kernels/matrix_band_part_op.h b/tensorflow/core/kernels/linalg/matrix_band_part_op.h similarity index 86% rename from tensorflow/core/kernels/matrix_band_part_op.h rename to tensorflow/core/kernels/linalg/matrix_band_part_op.h index b04e36db8ed..2f68eba6dcd 100644 --- a/tensorflow/core/kernels/matrix_band_part_op.h +++ b/tensorflow/core/kernels/linalg/matrix_band_part_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" @@ -34,4 +34,4 @@ struct MatrixBandPartFunctor { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ diff --git a/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc similarity index 97% rename from tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc index 9eb3e4f72a2..9c734b7fd6e 100644 --- a/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc @@ -21,7 +21,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_diag_op.cc b/tensorflow/core/kernels/linalg/matrix_diag_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_diag_op.cc rename to tensorflow/core/kernels/linalg/matrix_diag_op.cc index 05d7e4e6f86..69cc8170793 100644 --- a/tensorflow/core/kernels/matrix_diag_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_diag_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/matrix_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" #include #include diff --git a/tensorflow/core/kernels/matrix_diag_op.h b/tensorflow/core/kernels/linalg/matrix_diag_op.h similarity index 94% rename from tensorflow/core/kernels/matrix_diag_op.h rename to tensorflow/core/kernels/linalg/matrix_diag_op.h index 707fd9b6c14..5758ba664cc 100644 --- a/tensorflow/core/kernels/matrix_diag_op.h +++ b/tensorflow/core/kernels/linalg/matrix_diag_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ // Generator definition for MatrixDiagOp, must be compilable by nvcc. @@ -69,4 +69,4 @@ struct MatrixDiag { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ diff --git a/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc index 76271798d5f..6b52e70716d 100644 --- a/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc @@ -18,7 +18,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_exponential_op.cc b/tensorflow/core/kernels/linalg/matrix_exponential_op.cc similarity index 97% rename from tensorflow/core/kernels/matrix_exponential_op.cc rename to tensorflow/core/kernels/linalg/matrix_exponential_op.cc index 01d4894438c..73407614955 100644 --- a/tensorflow/core/kernels/matrix_exponential_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_exponential_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_inverse_op.cc b/tensorflow/core/kernels/linalg/matrix_inverse_op.cc similarity index 98% rename from tensorflow/core/kernels/matrix_inverse_op.cc rename to tensorflow/core/kernels/linalg/matrix_inverse_op.cc index 52afdd15ba6..dc51776f2fe 100644 --- a/tensorflow/core/kernels/matrix_inverse_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_inverse_op.cc @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -32,9 +32,9 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/eye_functor.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" #include "tensorflow/core/kernels/transpose_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_logarithm_op.cc b/tensorflow/core/kernels/linalg/matrix_logarithm_op.cc similarity index 97% rename from tensorflow/core/kernels/matrix_logarithm_op.cc rename to tensorflow/core/kernels/linalg/matrix_logarithm_op.cc index 22ca094e243..79d5472f140 100644 --- a/tensorflow/core/kernels/matrix_logarithm_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_logarithm_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_set_diag_op.cc b/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_set_diag_op.cc rename to tensorflow/core/kernels/linalg/matrix_set_diag_op.cc index bf98fd0d47d..df32228d0f2 100644 --- a/tensorflow/core/kernels/matrix_set_diag_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc @@ -21,7 +21,7 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" @@ -30,7 +30,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/matrix_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_set_diag_op.h b/tensorflow/core/kernels/linalg/matrix_set_diag_op.h similarity index 89% rename from tensorflow/core/kernels/matrix_set_diag_op.h rename to tensorflow/core/kernels/linalg/matrix_set_diag_op.h index 04877cd34ca..449a3607ede 100644 --- a/tensorflow/core/kernels/matrix_set_diag_op.h +++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" @@ -39,4 +39,4 @@ struct MatrixSetDiag { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ diff --git a/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc index 4e32f8a52e8..0cdb457db03 100644 --- a/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc @@ -18,7 +18,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc index 22274cc3daf..4e64eb42371 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc index c8421a3efba..719201f3f9e 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_double.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_double.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc index c7d03cb1052..614ecee4e23 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_double.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_float.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_float.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc index c98a84beded..809cff8148c 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_float.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_impl.h b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h similarity index 96% rename from tensorflow/core/kernels/matrix_solve_ls_op_impl.h rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h index 00a05a87a3a..1c8101a05b4 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_impl.h +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" @@ -163,4 +163,4 @@ class MatrixSolveLsOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/matrix_solve_op.cc b/tensorflow/core/kernels/linalg/matrix_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_solve_op.cc rename to tensorflow/core/kernels/linalg/matrix_solve_op.cc index 3a75054f4ea..70f02bddf9b 100644 --- a/tensorflow/core/kernels/matrix_solve_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_op.cc @@ -25,7 +25,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -33,8 +33,8 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/transpose_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_square_root_op.cc b/tensorflow/core/kernels/linalg/matrix_square_root_op.cc similarity index 97% rename from tensorflow/core/kernels/matrix_square_root_op.cc rename to tensorflow/core/kernels/linalg/matrix_square_root_op.cc index fe3d3043c26..ce43e358350 100644 --- a/tensorflow/core/kernels/matrix_square_root_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_square_root_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc similarity index 92% rename from tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc index ae3702078a0..27f3e77e29c 100644 --- a/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc +++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc @@ -14,7 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_triangular_solve_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_impl.h b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h similarity index 97% rename from tensorflow/core/kernels/matrix_triangular_solve_op_impl.h rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h index fb7e6f0f5ff..99249f792b6 100644 --- a/tensorflow/core/kernels/matrix_triangular_solve_op_impl.h +++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h @@ -15,8 +15,8 @@ limitations under the License. // See docs in ../ops/linalg_ops.cc. // -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ #include "third_party/eigen3/Eigen/Core" #include "tensorflow/core/framework/kernel_def_builder.h" @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -38,9 +38,9 @@ limitations under the License. #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #elif TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/rocm_solvers.h" +#include "tensorflow/core/util/rocm_solvers.h" #endif namespace tensorflow { @@ -434,4 +434,4 @@ struct LaunchBatchMatrixTriangularSolve { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_real.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc similarity index 93% rename from tensorflow/core/kernels/matrix_triangular_solve_op_real.cc rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc index 0f92964dd72..71a62441dc4 100644 --- a/tensorflow/core/kernels/matrix_triangular_solve_op_real.cc +++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_triangular_solve_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h" #if GOOGLE_CUDA #include "third_party/gpus/cuda/include/cuda.h" diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_test.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc similarity index 100% rename from tensorflow/core/kernels/matrix_triangular_solve_op_test.cc rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc diff --git a/tensorflow/core/kernels/qr_op_complex128.cc b/tensorflow/core/kernels/linalg/qr_op_complex128.cc similarity index 96% rename from tensorflow/core/kernels/qr_op_complex128.cc rename to tensorflow/core/kernels/linalg/qr_op_complex128.cc index 8a3e3dc0a92..0c14c6d2818 100644 --- a/tensorflow/core/kernels/qr_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/qr_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_complex64.cc b/tensorflow/core/kernels/linalg/qr_op_complex64.cc similarity index 95% rename from tensorflow/core/kernels/qr_op_complex64.cc rename to tensorflow/core/kernels/linalg/qr_op_complex64.cc index 467fa6c2d6a..fc0227ef7f9 100644 --- a/tensorflow/core/kernels/qr_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/qr_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_double.cc b/tensorflow/core/kernels/linalg/qr_op_double.cc similarity index 96% rename from tensorflow/core/kernels/qr_op_double.cc rename to tensorflow/core/kernels/linalg/qr_op_double.cc index 05537a0eaa3..ae00b3e7921 100644 --- a/tensorflow/core/kernels/qr_op_double.cc +++ b/tensorflow/core/kernels/linalg/qr_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_float.cc b/tensorflow/core/kernels/linalg/qr_op_float.cc similarity index 96% rename from tensorflow/core/kernels/qr_op_float.cc rename to tensorflow/core/kernels/linalg/qr_op_float.cc index 6aebd981865..77b8eeb0286 100644 --- a/tensorflow/core/kernels/qr_op_float.cc +++ b/tensorflow/core/kernels/linalg/qr_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_impl.h b/tensorflow/core/kernels/linalg/qr_op_impl.h similarity index 96% rename from tensorflow/core/kernels/qr_op_impl.h rename to tensorflow/core/kernels/linalg/qr_op_impl.h index 535df9d160d..876594bc511 100644 --- a/tensorflow/core/kernels/qr_op_impl.h +++ b/tensorflow/core/kernels/linalg/qr_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. // @@ -33,7 +33,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -41,11 +41,11 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/cwise_ops.h" -#include "tensorflow/core/kernels/eye_functor.h" -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include "tensorflow/core/kernels/transpose_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { @@ -299,4 +299,4 @@ class QrOpGpu : public AsyncOpKernel { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/self_adjoint_eig_op.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc similarity index 98% rename from tensorflow/core/kernels/self_adjoint_eig_op.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc index cea5883db7b..ebf1955b8ff 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_op.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc index 4c7a391d56c..424c33a7ac1 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc index 0ec5ec24dd1..bdd20998e3c 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc similarity index 92% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc index 7f81bb69021..afc50500d40 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc similarity index 92% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc index bf30952d1e7..1f795777a2e 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc similarity index 99% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc index 3a84df07a9a..778c50ff408 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc @@ -26,12 +26,12 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/cast_op.h" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/cwise_ops.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h similarity index 91% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h index b5274f8788b..56f2936a66e 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" @@ -89,4 +89,4 @@ class SelfAdjointEigV2Op : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/svd_op_complex128.cc b/tensorflow/core/kernels/linalg/svd_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_complex128.cc rename to tensorflow/core/kernels/linalg/svd_op_complex128.cc index a0f39418aca..36ac629e38a 100644 --- a/tensorflow/core/kernels/svd_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/svd_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_complex64.cc b/tensorflow/core/kernels/linalg/svd_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_complex64.cc rename to tensorflow/core/kernels/linalg/svd_op_complex64.cc index a8fd50c67d1..50d940b534a 100644 --- a/tensorflow/core/kernels/svd_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/svd_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_double.cc b/tensorflow/core/kernels/linalg/svd_op_double.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_double.cc rename to tensorflow/core/kernels/linalg/svd_op_double.cc index 539dae3a081..85bbe08d8c9 100644 --- a/tensorflow/core/kernels/svd_op_double.cc +++ b/tensorflow/core/kernels/linalg/svd_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_float.cc b/tensorflow/core/kernels/linalg/svd_op_float.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_float.cc rename to tensorflow/core/kernels/linalg/svd_op_float.cc index 03839aa49c3..961d131293b 100644 --- a/tensorflow/core/kernels/svd_op_float.cc +++ b/tensorflow/core/kernels/linalg/svd_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/svd_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc index 482fd057e4e..06d1efe6dd5 100644 --- a/tensorflow/core/kernels/svd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc @@ -36,14 +36,14 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/eye_functor.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_impl.h b/tensorflow/core/kernels/linalg/svd_op_impl.h similarity index 95% rename from tensorflow/core/kernels/svd_op_impl.h rename to tensorflow/core/kernels/linalg/svd_op_impl.h index 675826a057c..c43aaaa4b7b 100644 --- a/tensorflow/core/kernels/svd_op_impl.h +++ b/tensorflow/core/kernels/linalg/svd_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. // @@ -27,7 +27,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -118,4 +118,4 @@ class SvdOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/tridiagonal_matmul_op.cc b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc similarity index 98% rename from tensorflow/core/kernels/tridiagonal_matmul_op.cc rename to tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc index 3ddf22012de..9d17c574148 100644 --- a/tensorflow/core/kernels/tridiagonal_matmul_op.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc similarity index 96% rename from tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc index 1c82cc18e32..a65db40d822 100644 --- a/tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc @@ -22,11 +22,11 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_device_functions.h" #include "tensorflow/core/util/gpu_kernel_helper.h" #include "tensorflow/core/util/gpu_launch_config.h" diff --git a/tensorflow/core/kernels/tridiagonal_solve_op.cc b/tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/tridiagonal_solve_op.cc rename to tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc index 88931ff3e66..8fe04125f9a 100644 --- a/tensorflow/core/kernels/tridiagonal_solve_op.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc index 089fa8c040f..86514cfb033 100644 --- a/tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc @@ -23,11 +23,11 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_device_functions.h" #include "tensorflow/core/util/gpu_kernel_helper.h" #include "tensorflow/core/util/gpu_launch_config.h" diff --git a/tensorflow/core/kernels/linalg_ops_common.h b/tensorflow/core/kernels/linalg_ops_common.h index 65c2fb90f0e..0aa69801f19 100644 --- a/tensorflow/core/kernels/linalg_ops_common.h +++ b/tensorflow/core/kernels/linalg_ops_common.h @@ -12,211 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - #ifndef TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ #define TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ -// Classes to support linear algebra functionality, similar to the numpy.linalg -// module. Supports batch computation on several matrices at once, sharding the -// computations across different threads if necessary. -#include - -#include "third_party/eigen3/Eigen/Core" -#include "tensorflow/core/framework/kernel_def_builder.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/inlined_vector.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/work_sharder.h" - -namespace tensorflow { - -// Base class for linear algebra operators. -template -class LinearAlgebraOp : public OpKernel { - public: - explicit LinearAlgebraOp(OpKernelConstruction* context) : OpKernel(context) {} - - void Compute(OpKernelContext* context) override; - - protected: - using TensorShapes = gtl::InlinedVector; - // Returns the number of leading inputs that are to be treated as matrix - // inputs. By default this is all the inputs. Derived classes can override - // this to tell the base class to ignore one or more trailing inputs. - virtual int NumMatrixInputs(const OpKernelContext* context) const { - return context->num_inputs(); - } - - // Returns true if the number of inputs and their shapes are as expected. - // Many ops take a single square input matrix, so we provide that as a default - // implementation for convenience. - virtual void ValidateInputMatrixShapes( - OpKernelContext* context, const TensorShapes& input_matrix_shapes) const { - ValidateSingleSquareMatrix(context, input_matrix_shapes); - } - - // Convenience validators for common cases: - // - // Validate op taking a single matrix A. - static void ValidateSingleMatrix(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - // Validate op taking a single square matrix A. - static void ValidateSingleSquareMatrix( - OpKernelContext* context, const TensorShapes& input_matrix_shapes); - // Validate op taking two matrices A and B that have the same number of rows. - static void ValidateSolver(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - // Validate op taking two matrices A and B that have the same number of rows - // and A is square. - static void ValidateSquareSolver(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - - // Returns the output shapes of each individual matrix operation. Output - // matrices shapes must be rank 0, 1, or 2. Scalar outputs are rank 0. - // - // The derived class may return a number of shapes (N) less than - // context->num_outputs() (M) to indicate that a only leading subset of - // the outputs will be populated. In this case, a dummy scalar tensor with - // value zero will be return for the last M-N outputs. - // - // For many ops, the output dimensions are the same as the input dimensions, - // so we provide that as a default implementation for convenience. - virtual TensorShapes GetOutputMatrixShapes( - const TensorShapes& input_matrix_shapes) const { - return input_matrix_shapes; - } - - // Returns the cost per matrix operation. This is used to determine the - // number of threads to use for parallelizing calls to ComputeMatrix in - // batch mode. Cost per unit is assumed to be roughly 1ns, based on comments - // in core/util/work_sharder.cc. Many linear algebra ops take roughly max(m,n) - // * min(m,n)^2, where the first input matrix is m-by-n. We provide that as a - // default implementation for convenience. - virtual int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const { - double m = static_cast(input_matrix_shapes[0].dim_size(0)); - double n = static_cast(input_matrix_shapes[0].dim_size(1)); - double cost = std::max(m, n) * std::min(m, n) * std::min(m, n); - return cost >= static_cast(kint64max) ? kint64max - : static_cast(cost); - } - - // Returns true if it is safe to forward (alias) input to output buffer - // and expect the kernel to perform the computation inplace. - virtual bool EnableInputForwarding() const { return true; } - - using InputMatrix = Eigen::Matrix; - using InputConstMatrixMap = Eigen::Map; - using InputMatrixMap = Eigen::Map; - using InputConstVectorMap = - Eigen::Map>; - using InputConstMatrixMaps = gtl::InlinedVector; - using InputMatrixMaps = gtl::InlinedVector; - using InputRealScalar = typename Eigen::NumTraits::Real; - - using OutputMatrix = Eigen::Matrix; - using OutputConstMatrixMap = Eigen::Map; - using OutputMatrixMap = Eigen::Map; - using OutputConstVectorMap = - Eigen::Map>; - using OutputConstMatrixMaps = gtl::InlinedVector; - using OutputMatrixMaps = gtl::InlinedVector; - using OutputRealScalar = typename Eigen::NumTraits::Real; - - // backward compatibility - using Scalar = OutputScalar; - using Matrix = - Eigen::Matrix; - using ConstMatrixMap = Eigen::Map; - using MatrixMap = Eigen::Map; - using ConstVectorMap = - Eigen::Map>; - using ConstMatrixMaps = gtl::InlinedVector; - using MatrixMaps = gtl::InlinedVector; - using RealScalar = typename Eigen::NumTraits::Real; - - // Performs a single matrix computation given input matrices, and - // stores the result in outputs. For batch operations, this will be called - // repeatedly for a single call to Compute() when multiple matrices exist in - // input Tensors with rank > 2. In this case the calls to ComputeMatrix are - // parallelized. The number of threads used is determined by a cost model from - // the value returned by GetCostPerUnit(). - virtual void ComputeMatrix(OpKernelContext* context, - const InputConstMatrixMaps& inputs, - OutputMatrixMaps* outputs) = 0; - - private: - using TensorInputs = gtl::InlinedVector; - using TensorOutputs = gtl::InlinedVector; - // This function maps 2-d slices (matrices) of the input and output tensors - // using Eigen::Map and calls ComputeMatrix implemented in terms of the - // Eigen::MatrixBase API by the derived class. - // - // The 'matrix_index' parameter specifies the index of the matrix to be used - // from each input tensor, and the index of the matrix to be written to each - // output tensor. The input matrices are in row major order, and located at - // the memory addresses - // inputs[i].flat().data() + - // matrix_index * input_matrix_shapes[i].num_elements() - // for i in 0...inputs.size()-1. - // The output matrices are in row major order, and located at the memory - // address - // outputs[i]->flat().data() + - // matrix_index * output_matrix_shapes[i].num_elements(). - // for i in 0...outputs.size()-1. - // - void ComputeTensorSlice(OpKernelContext* context, int64 matrix_index, - const TensorInputs& inputs, - const TensorShapes& input_matrix_shapes, - const TensorOutputs& outputs, - const TensorShapes& output_matrix_shapes); - - void AnalyzeInputs(OpKernelContext* context, TensorInputs* inputs, - TensorShapes* input_matrix_shapes, - TensorShape* batch_shape); - - void PrepareOutputs(OpKernelContext* context, - const TensorShapes& input_matrix_shapes, - const TensorShape& batch_shape, TensorOutputs* outputs, - TensorShapes* output_matrix_shapes); -}; - -// Declare LinearAlgebraOp, which is explicitly instantiated in -// linalg_ops_common.cc for float, double, complex64, and complex128. -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; - -} // namespace tensorflow - -#define INHERIT_LINALG_TYPEDEFS(Scalar) \ - typedef LinearAlgebraOp Base; \ - using RealScalar = typename Eigen::NumTraits::Real; \ - using Matrix = typename Base::Matrix; \ - using MatrixMap = typename Base::MatrixMap; \ - using MatrixMaps = typename Base::MatrixMaps; \ - using ConstMatrixMap = typename Base::ConstMatrixMap; \ - using ConstMatrixMaps = typename Base::ConstMatrixMaps; \ - using ConstVectorMap = typename Base::ConstVectorMap; \ - using TensorShapes = typename Base::TensorShapes; - -#define REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) \ - REGISTER_KERNEL_BUILDER( \ - Name(OpName).Device(DEVICE_CPU).TypeConstraint("T"), OpClass) - -#define REGISTER_LINALG_OP_GPU(OpName, OpClass, Scalar) \ - REGISTER_KERNEL_BUILDER( \ - Name(OpName).Device(DEVICE_GPU).TypeConstraint("T"), OpClass) - -// Deprecated, use one of the device-specific macros above. -#define REGISTER_LINALG_OP(OpName, OpClass, Scalar) \ - REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) +// Temporary forwarding header. +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #endif // TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ diff --git a/tensorflow/core/kernels/segment_reduction_ops_impl.h b/tensorflow/core/kernels/segment_reduction_ops_impl.h index 6c3fad668ae..7cf15ef5b72 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_impl.h +++ b/tensorflow/core/kernels/segment_reduction_ops_impl.h @@ -45,13 +45,13 @@ limitations under the License. #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/stream_executor/cuda/cuda_activation.h" using stream_executor::cuda::ScopedActivateExecutorContext; #elif TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/platform/rocm.h" +#include "tensorflow/core/util/cuda_solvers.h" using stream_executor::rocm::ScopedActivateExecutorContext; #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/sparse/BUILD b/tensorflow/core/kernels/sparse/BUILD index 1d281bc1d61..bfb6c4934bb 100644 --- a/tensorflow/core/kernels/sparse/BUILD +++ b/tensorflow/core/kernels/sparse/BUILD @@ -80,8 +80,8 @@ tf_kernel_library( "//tensorflow/core/kernels:transpose_functor", "//tensorflow/core/kernels:gpu_prim_hdrs", ] + if_cuda_or_rocm([ - "//tensorflow/core/kernels:cuda_solvers", - "//tensorflow/core/kernels:cuda_sparse", + "//tensorflow/core/util:cuda_solvers", + "//tensorflow/core/util:cuda_sparse", ]), alwayslink = 1, ) diff --git a/tensorflow/core/kernels/sparse/add_op.cc b/tensorflow/core/kernels/sparse/add_op.cc index b6265a1412c..06fe1cd042e 100644 --- a/tensorflow/core/kernels/sparse/add_op.cc +++ b/tensorflow/core/kernels/sparse/add_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/kernels/fill_functor.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/conj_op.cc b/tensorflow/core/kernels/sparse/conj_op.cc index 7275262c1f0..147160fbe6c 100644 --- a/tensorflow/core/kernels/sparse/conj_op.cc +++ b/tensorflow/core/kernels/sparse/conj_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc index 364c2c07bd8..2e5afbdcad7 100644 --- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc +++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc @@ -34,8 +34,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc index 55ebfa4fc10..a81ccfa562e 100644 --- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc +++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc index 459bb219343..5c62a44f9ba 100644 --- a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc +++ b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc @@ -35,8 +35,8 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc index 1c014db3d0a..6b11e64307a 100644 --- a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc +++ b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc @@ -20,13 +20,13 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/cuda_sparse.h" #include "tensorflow/core/kernels/gpu_device_array.h" #include "tensorflow/core/kernels/gpu_device_array_gpu.h" #include "tensorflow/core/kernels/gpu_prim.h" #include "tensorflow/core/kernels/sparse/kernels.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/mat_mul_op.cc b/tensorflow/core/kernels/sparse/mat_mul_op.cc index 50fa0ec88ea..bf9de570fbf 100644 --- a/tensorflow/core/kernels/sparse/mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/mat_mul_op.cc @@ -37,8 +37,8 @@ limitations under the License. #include "tensorflow/core/platform/threadpool.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/mul_op.cc b/tensorflow/core/kernels/sparse/mul_op.cc index 33c3756ce58..d08f1568db1 100644 --- a/tensorflow/core/kernels/sparse/mul_op.cc +++ b/tensorflow/core/kernels/sparse/mul_op.cc @@ -29,7 +29,7 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/nnz_op.cc b/tensorflow/core/kernels/sparse/nnz_op.cc index ebc48c3e9a4..d67620443f0 100644 --- a/tensorflow/core/kernels/sparse/nnz_op.cc +++ b/tensorflow/core/kernels/sparse/nnz_op.cc @@ -29,8 +29,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/softmax_op.cc b/tensorflow/core/kernels/sparse/softmax_op.cc index 25025bfe2a6..f1a5db8d0f0 100644 --- a/tensorflow/core/kernels/sparse/softmax_op.cc +++ b/tensorflow/core/kernels/sparse/softmax_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #define EIGEN_USE_GPU #endif diff --git a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc index fb652e13d15..fecee9e4555 100644 --- a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc @@ -36,8 +36,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc index 59540f63846..2eaf9bd5310 100644 --- a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc @@ -30,8 +30,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc index e1a4b4194d2..2548ceaa57c 100644 --- a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc @@ -33,8 +33,8 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/sparse/transpose_op.cc b/tensorflow/core/kernels/sparse/transpose_op.cc index 3158eb5016d..08d37fa1692 100644 --- a/tensorflow/core/kernels/sparse/transpose_op.cc +++ b/tensorflow/core/kernels/sparse/transpose_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #define EIGEN_USE_GPU #endif diff --git a/tensorflow/core/kernels/where_op.cc b/tensorflow/core/kernels/where_op.cc index 598cb526d77..d504ec9b2ed 100644 --- a/tensorflow/core/kernels/where_op.cc +++ b/tensorflow/core/kernels/where_op.cc @@ -39,7 +39,7 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #if GOOGLE_CUDA #include "tensorflow/stream_executor/cuda/cuda_activation.h" using stream_executor::cuda::ScopedActivateExecutorContext; diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index bb2b9ff429e..dcb2787e309 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -14,6 +14,7 @@ load( "tf_copts", "tf_cuda_library", "tf_cuda_only_cc_test", + "tf_kernel_library", ) load("//tensorflow:tensorflow.bzl", "tf_version_info_genrule") load( @@ -24,6 +25,11 @@ load( "//tensorflow/core/platform:build_config_root.bzl", "if_static", ) +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load( + "@local_config_rocm//rocm:build_defs.bzl", + "if_rocm", +) default_package_visibility = [ "//tensorflow/core:__subpackages__", @@ -567,6 +573,63 @@ cc_library( ], ) +tf_kernel_library( + name = "cuda_solvers", + srcs = ["cuda_solvers.cc"], + hdrs = ["cuda_solvers.h"], + # @local_config_cuda//cuda:cusolver_static, //third_party/eigen3:blas, + # and //third_party/libf2c all contain various parts of BLAS, LAPACK, + # and f2c helper functions in global namespace. Tell the compiler to + # allow multiple definitions when linking this. + linkopts = select({ + "//tensorflow:macos": [], + "//tensorflow:windows": [], + "//conditions:default": ["-Wl,-z,muldefs"], + }), + visibility = ["//tensorflow/core/kernels:friends"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/platform/default/build_config:cublas_plugin", + "//tensorflow/stream_executor/cuda:cublas_lib", + "//tensorflow/stream_executor/cuda:cusolver_lib", + ], +) + +tf_kernel_library( + name = "rocm_solvers", + srcs = ["rocm_solvers.cc"], + hdrs = ["rocm_solvers.h"], + visibility = ["//tensorflow/core/kernels:friends"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/stream_executor/lib", + "//tensorflow/stream_executor/platform:dso_loader", + "//tensorflow/stream_executor/rocm:rocblas_plugin", + "//tensorflow/stream_executor/rocm:rocm_gpu_executor", + ] + if_rocm([ + "@local_config_rocm//rocm:rocprim", + ]), +) + +tf_kernel_library( + name = "cuda_sparse", + srcs = if_cuda(["cuda_sparse.cc"]) + if_rocm(["rocm_sparse.cc"]), + hdrs = ["cuda_sparse.h"], + deps = [ + ":cuda_solvers", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ] + if_cuda([ + "//tensorflow/stream_executor/cuda:cusparse_lib", + "@cub_archive//:cub", + ]) + if_rocm([ + "@local_config_rocm//rocm:hipsparse", + ]), +) + # Tests. tf_cc_test( diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/util/cuda_solvers.cc similarity index 99% rename from tensorflow/core/kernels/cuda_solvers.cc rename to tensorflow/core/util/cuda_solvers.cc index f41ce2a5d27..3e4d2a05ac6 100644 --- a/tensorflow/core/kernels/cuda_solvers.cc +++ b/tensorflow/core/util/cuda_solvers.cc @@ -14,7 +14,7 @@ ============================================================================== */ #ifdef GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #include #include diff --git a/tensorflow/core/kernels/cuda_solvers.h b/tensorflow/core/util/cuda_solvers.h similarity index 99% rename from tensorflow/core/kernels/cuda_solvers.h rename to tensorflow/core/util/cuda_solvers.h index eb1d5c8a200..79f45c9b0ea 100644 --- a/tensorflow/core/kernels/cuda_solvers.h +++ b/tensorflow/core/util/cuda_solvers.h @@ -14,8 +14,8 @@ limitations under the License. ============================================================================== */ -#ifndef TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ -#define TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ // This header declares the class CudaSolver, which contains wrappers of linear // algebra solvers in the cuBlas and cuSolverDN libraries for use in TensorFlow @@ -435,7 +435,7 @@ class HostLapackInfo : public ScratchSpace { public: HostLapackInfo(OpKernelContext* context, int64 size, const std::string& debug_info) - : ScratchSpace(context, size, debug_info, /* on_host */ true){}; + : ScratchSpace(context, size, debug_info, /* on_host */ true) {} }; class DeviceLapackInfo : public ScratchSpace { @@ -489,4 +489,4 @@ inline DeviceLapackInfo CudaSolver::GetDeviceLapackInfo( #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ diff --git a/tensorflow/core/kernels/cuda_sparse.cc b/tensorflow/core/util/cuda_sparse.cc similarity index 99% rename from tensorflow/core/kernels/cuda_sparse.cc rename to tensorflow/core/util/cuda_sparse.cc index 141aae61571..47e018560e1 100644 --- a/tensorflow/core/kernels/cuda_sparse.cc +++ b/tensorflow/core/util/cuda_sparse.cc @@ -15,7 +15,7 @@ limitations under the License. #ifdef GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #include #include @@ -28,7 +28,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -38,6 +37,7 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" // TODO(rmlarsen,penporn): Investigate using newer kernels in CUDA 10.1+. diff --git a/tensorflow/core/kernels/cuda_sparse.h b/tensorflow/core/util/cuda_sparse.h similarity index 93% rename from tensorflow/core/kernels/cuda_sparse.h rename to tensorflow/core/util/cuda_sparse.h index 978bc9005ed..76580766d69 100644 --- a/tensorflow/core/kernels/cuda_sparse.h +++ b/tensorflow/core/util/cuda_sparse.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ -#define TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ // This header declares the class GpuSparse, which contains wrappers of // cuSparse libraries for use in TensorFlow kernels. @@ -75,8 +75,7 @@ using gpuStream_t = hipStream_t; namespace tensorflow { -inline std::string ConvertGPUSparseErrorToString( - const gpusparseStatus_t status) { +inline string ConvertGPUSparseErrorToString(const gpusparseStatus_t status) { switch (status) { #define STRINGIZE(q) #q #define RETURN_IF_STATUS(err) \ @@ -206,49 +205,49 @@ class GpuSparse { // Solves tridiagonal system of equations. // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2 template - Status Gtsv2(int m, int n, const Scalar *dl, const Scalar *d, - const Scalar *du, Scalar *B, int ldb, void *pBuffer) const; + Status Gtsv2(int m, int n, const Scalar* dl, const Scalar* d, + const Scalar* du, Scalar* B, int ldb, void* pBuffer) const; // Computes the size of a temporary buffer used by Gtsv2. // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2_bufferSize template - Status Gtsv2BufferSizeExt(int m, int n, const Scalar *dl, const Scalar *d, - const Scalar *du, const Scalar *B, int ldb, - size_t *bufferSizeInBytes) const; + Status Gtsv2BufferSizeExt(int m, int n, const Scalar* dl, const Scalar* d, + const Scalar* du, const Scalar* B, int ldb, + size_t* bufferSizeInBytes) const; // Solves tridiagonal system of equations without partial pivoting. // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2_nopivot template - Status Gtsv2NoPivot(int m, int n, const Scalar *dl, const Scalar *d, - const Scalar *du, Scalar *B, int ldb, - void *pBuffer) const; + Status Gtsv2NoPivot(int m, int n, const Scalar* dl, const Scalar* d, + const Scalar* du, Scalar* B, int ldb, + void* pBuffer) const; // Computes the size of a temporary buffer used by Gtsv2NoPivot. // See: // https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2_nopivot_bufferSize template - Status Gtsv2NoPivotBufferSizeExt(int m, int n, const Scalar *dl, - const Scalar *d, const Scalar *du, - const Scalar *B, int ldb, - size_t *bufferSizeInBytes) const; + Status Gtsv2NoPivotBufferSizeExt(int m, int n, const Scalar* dl, + const Scalar* d, const Scalar* du, + const Scalar* B, int ldb, + size_t* bufferSizeInBytes) const; // Solves a batch of tridiagonal systems of equations. Doesn't support // multiple right-hand sides per each system. Doesn't do pivoting. // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2stridedbatch template - Status Gtsv2StridedBatch(int m, const Scalar *dl, const Scalar *d, - const Scalar *du, Scalar *x, int batchCount, - int batchStride, void *pBuffer) const; + Status Gtsv2StridedBatch(int m, const Scalar* dl, const Scalar* d, + const Scalar* du, Scalar* x, int batchCount, + int batchStride, void* pBuffer) const; // Computes the size of a temporary buffer used by Gtsv2StridedBatch. // See: // https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2stridedbatch_bufferSize template - Status Gtsv2StridedBatchBufferSizeExt(int m, const Scalar *dl, - const Scalar *d, const Scalar *du, - const Scalar *x, int batchCount, + Status Gtsv2StridedBatchBufferSizeExt(int m, const Scalar* dl, + const Scalar* d, const Scalar* du, + const Scalar* x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) const; + size_t* bufferSizeInBytes) const; // Compresses the indices of rows or columns. It can be interpreted as a // conversion from COO to CSR sparse storage format. See: @@ -449,7 +448,7 @@ class GpuSparse { private: bool initialized_; - OpKernelContext *context_; // not owned. + OpKernelContext* context_; // not owned. gpuStream_t gpu_stream_; gpusparseHandle_t* gpusparse_handle_; // not owned. @@ -585,4 +584,4 @@ class GpuSparseCsrSortingConversionInfo { #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ diff --git a/tensorflow/core/kernels/rocm_solvers.cc b/tensorflow/core/util/rocm_solvers.cc similarity index 99% rename from tensorflow/core/kernels/rocm_solvers.cc rename to tensorflow/core/util/rocm_solvers.cc index 5faf718332e..13dadf602a7 100644 --- a/tensorflow/core/kernels/rocm_solvers.cc +++ b/tensorflow/core/util/rocm_solvers.cc @@ -14,7 +14,7 @@ ============================================================================== */ #if TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/rocm_solvers.h" +#include "tensorflow/core/util/rocm_solvers.h" #include #include diff --git a/tensorflow/core/kernels/rocm_solvers.h b/tensorflow/core/util/rocm_solvers.h similarity index 96% rename from tensorflow/core/kernels/rocm_solvers.h rename to tensorflow/core/util/rocm_solvers.h index 94d3c82a497..afc8b936d05 100644 --- a/tensorflow/core/kernels/rocm_solvers.h +++ b/tensorflow/core/util/rocm_solvers.h @@ -14,8 +14,8 @@ limitations under the License. ============================================================================== */ -#ifndef TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ -#define TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ // This header declares the class ROCmSolver, which contains wrappers of linear // algebra solvers in the cuBlas and cuSolverDN libraries for use in TensorFlow @@ -158,4 +158,4 @@ class ScratchSpace { #endif // TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ diff --git a/tensorflow/core/kernels/rocm_sparse.cc b/tensorflow/core/util/rocm_sparse.cc similarity index 99% rename from tensorflow/core/kernels/rocm_sparse.cc rename to tensorflow/core/util/rocm_sparse.cc index 97488692bc1..cc7b56fdc01 100644 --- a/tensorflow/core/kernels/rocm_sparse.cc +++ b/tensorflow/core/util/rocm_sparse.cc @@ -24,8 +24,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -35,6 +33,8 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" namespace tensorflow { namespace {