Move clustering ops to core.

PiperOrigin-RevId: 228808275
2019-01-10 17:33:58 -08:00 · 2019-01-10 17:33:58 -08:00 · 578bd3a276
commit 578bd3a276
parent 3fc2b09b60
16 changed files with 221 additions and 157 deletions
--- a/tensorflow/contrib/factorization/BUILD
+++ b/tensorflow/contrib/factorization/BUILD
@ -28,7 +28,6 @@ tf_custom_op_py_library(
        "python/ops/wals.py",
    ],
    dso = [
-        ":python/ops/_clustering_ops.so",
        ":python/ops/_factorization_ops.so",
    ],
    kernels = [
@ -38,12 +37,12 @@ tf_custom_op_py_library(
    srcs_version = "PY2AND3",
    deps = [
        ":factorization_ops_test_utils_py",
-        ":gen_clustering_ops",
        ":gen_factorization_ops",
        "//tensorflow/contrib/framework:framework_py",
        "//tensorflow/contrib/util:util_py",
        "//tensorflow/python:array_ops",
        "//tensorflow/python:check_ops",
+        "//tensorflow/python:clustering_ops_gen",
        "//tensorflow/python:control_flow_ops",
        "//tensorflow/python:data_flow_ops",
        "//tensorflow/python:embedding_ops",
@ -77,17 +76,6 @@ py_library(
    ],
 )

-# Ops
-tf_custom_op_library(
-    name = "python/ops/_clustering_ops.so",
-    srcs = [
-        "ops/clustering_ops.cc",
-    ],
-    deps = [
-        "//tensorflow/contrib/factorization/kernels:clustering_ops",
-    ],
-)
-
 tf_custom_op_library(
    name = "python/ops/_factorization_ops.so",
    srcs = [
@ -100,26 +88,16 @@ tf_custom_op_library(
 )

 tf_gen_op_libs([
-    "clustering_ops",
    "factorization_ops",
 ])

 cc_library(
    name = "all_ops",
    deps = [
-        ":clustering_ops_op_lib",
        ":factorization_ops_op_lib",
    ],
 )

-tf_gen_op_wrapper_py(
-    name = "gen_clustering_ops",
-    out = "python/ops/gen_clustering_ops.py",
-    deps = [
-        ":clustering_ops_op_lib",
-    ],
-)
-
 tf_gen_op_wrapper_py(
    name = "gen_factorization_ops",
    out = "python/ops/gen_factorization_ops.py",
--- a/tensorflow/contrib/factorization/kernels/BUILD
+++ b/tensorflow/contrib/factorization/kernels/BUILD
@ -11,7 +11,6 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_test")
 cc_library(
    name = "all_kernels",
    deps = [
-        ":clustering_ops",
        ":masked_matmul_ops",
        ":wals_solver_ops",
        "@protobuf_archive//:protobuf_headers",
@ -29,17 +28,6 @@ cc_library(
    alwayslink = 1,
 )

-cc_library(
-    name = "clustering_ops",
-    srcs = ["clustering_ops.cc"],
-    deps = [
-        "//tensorflow/core:framework_headers_lib",
-        "//third_party/eigen3",
-        "@protobuf_archive//:protobuf_headers",
-    ],
-    alwayslink = 1,
-)
-
 cc_library(
    name = "masked_matmul_ops",
    srcs = ["masked_matmul_ops.cc"],
@ -51,19 +39,3 @@ cc_library(
    ],
    alwayslink = 1,
 )
-
-tf_cc_test(
-    name = "clustering_ops_test",
-    srcs = ["clustering_ops_test.cc"],
-    deps = [
-        ":clustering_ops",
-        "//tensorflow/contrib/factorization:clustering_ops_op_lib",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-    ],
-)
--- a/tensorflow/contrib/factorization/ops/clustering_ops.cc
+++ b/tensorflow/contrib/factorization/ops/clustering_ops.cc
@ -1,91 +0,0 @@
-// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License"); you may not
-// use this file except in compliance with the License.  You may obtain a copy
-// of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
-// License for the specific language governing permissions and limitations under
-// the License.
-// ==============================================================================
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-
-namespace tensorflow {
-
-REGISTER_OP("KmeansPlusPlusInitialization")
-    .Input("points: float32")
-    .Input("num_to_sample: int64")
-    .Input("seed: int64")
-    .Input("num_retries_per_sample: int64")
-    .Output("samples: float32")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"(
-Selects num_to_sample rows of input using the KMeans++ criterion.
-
-Rows of points are assumed to be input points. One row is selected at random.
-Subsequent rows are sampled with probability proportional to the squared L2
-distance from the nearest row selected thus far till num_to_sample rows have
-been sampled.
-
-points: Matrix of shape (n, d). Rows are assumed to be input points.
-num_to_sample: Scalar. The number of rows to sample. This value must not be
-  larger than n.
-seed: Scalar. Seed for initializing the random number generator.
-num_retries_per_sample: Scalar. For each row that is sampled, this parameter
-  specifies the number of additional points to draw from the current
-  distribution before selecting the best. If a negative value is specified, a
-  heuristic is used to sample O(log(num_to_sample)) additional points.
-samples: Matrix of shape (num_to_sample, d). The sampled rows.
-)");
-
-REGISTER_OP("KMC2ChainInitialization")
-    .Input("distances: float32")
-    .Input("seed: int64")
-    .Output("index: int64")
-    .SetShapeFn(shape_inference::ScalarShape)
-    .Doc(R"(
-Returns the index of a data point that should be added to the seed set.
-
-Entries in distances are assumed to be squared distances of candidate points to
-the already sampled centers in the seed set. The op constructs one Markov chain
-of the k-MC^2 algorithm and returns the index of one candidate point to be added
-as an additional cluster center.
-
-distances: Vector with squared distances to the closest previously sampled
-  cluster center for each candidate point.
-seed: Scalar. Seed for initializing the random number generator.
-index: Scalar with the index of the sampled point.
-)");
-
-REGISTER_OP("NearestNeighbors")
-    .Input("points: float32")
-    .Input("centers: float32")
-    .Input("k: int64")
-    .Output("nearest_center_indices: int64")
-    .Output("nearest_center_distances: float32")
-    .SetShapeFn(shape_inference::UnknownShape)
-    .Doc(R"(
-Selects the k nearest centers for each point.
-
-Rows of points are assumed to be input points. Rows of centers are assumed to be
-the list of candidate centers. For each point, the k centers that have least L2
-distance to it are computed.
-
-points: Matrix of shape (n, d). Rows are assumed to be input points.
-centers: Matrix of shape (m, d). Rows are assumed to be centers.
-k: Scalar. Number of nearest centers to return for each point. If k is larger
-  than m, then only m centers are returned.
-nearest_center_indices: Matrix of shape (n, min(m, k)). Each row contains the
-  indices of the centers closest to the corresponding point, ordered by
-  increasing distance.
-nearest_center_distances: Matrix of shape (n, min(m, k)). Each row contains the
-  squared L2 distance to the corresponding center in nearest_center_indices.
-)");
-
-}  // namespace tensorflow
--- a/tensorflow/contrib/factorization/python/ops/clustering_ops.py
+++ b/tensorflow/contrib/factorization/python/ops/clustering_ops.py
@ -18,28 +18,23 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from tensorflow.contrib.factorization.python.ops import gen_clustering_ops
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.factorization.python.ops.gen_clustering_ops import *
-# pylint: enable=wildcard-import
-from tensorflow.contrib.util import loader
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_clustering_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_impl
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops.embedding_ops import embedding_lookup
-from tensorflow.python.platform import resource_loader
-
-_clustering_ops = loader.load_op_library(
-    resource_loader.get_path_to_datafile('_clustering_ops.so'))
+# go/tf-wildcard-import
+# pylint: disable=wildcard-import
+from tensorflow.python.ops.gen_clustering_ops import *
+# pylint: enable=wildcard-import

 # Euclidean distance between vectors U and V is defined as \\(||U - V||_F\\)
 # which is the square root of the sum of the absolute squares of the elements
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -1074,6 +1074,7 @@ tf_gen_op_libs(
        "tensor_forest_ops",
        "candidate_sampling_ops",
        "checkpoint_ops",
+        "clustering_ops",
        "collective_ops",
        "control_flow_ops",
        "ctc_ops",
@ -1228,6 +1229,7 @@ cc_library(
        ":tensor_forest_ops_op_lib",
        ":candidate_sampling_ops_op_lib",
        ":checkpoint_ops_op_lib",
+        ":clustering_ops_op_lib",
        ":collective_ops_op_lib",
        ":control_flow_ops_op_lib",
        ":ctc_ops_op_lib",
@ -1382,6 +1384,7 @@ cc_library(
        "//tensorflow/core/kernels:tensor_forest_ops",
        "//tensorflow/core/kernels:candidate_sampler_ops",
        "//tensorflow/core/kernels:checkpoint_ops",
+        "//tensorflow/core/kernels:clustering_ops",
        "//tensorflow/core/kernels:collective_ops",
        "//tensorflow/core/kernels:control_flow_ops",
        "//tensorflow/core/kernels:ctc_ops",
--- a/tensorflow/core/api_def/base_api/api_def_KMC2ChainInitialization.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_KMC2ChainInitialization.pbtxt
@ -0,0 +1,30 @@
+op {
+  graph_op_name: "KMC2ChainInitialization"
+  visibility: HIDDEN
+  in_arg {
+    name: "distances"
+    description: <<END
+Vector with squared distances to the closest previously sampled cluster center
+for each candidate point.
+END
+  }
+  in_arg {
+    name: "seed"
+    description: <<END
+Scalar. Seed for initializing the random number generator.
+END
+  }
+  out_arg {
+    name: "index"
+    description: <<END
+Scalar with the index of the sampled point.
+END
+  }
+  summary: "Returns the index of a data point that should be added to the seed set."
+  description: <<END
+Entries in distances are assumed to be squared distances of candidate points to
+the already sampled centers in the seed set. The op constructs one Markov chain
+of the k-MC^2 algorithm and returns the index of one candidate point to be added
+as an additional cluster center.
+END
+}
--- a/tensorflow/core/api_def/base_api/api_def_KmeansPlusPlusInitialization.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_KmeansPlusPlusInitialization.pbtxt
@ -0,0 +1,44 @@
+op {
+  graph_op_name: "KmeansPlusPlusInitialization"
+  visibility: HIDDEN
+  in_arg {
+    name: "points"
+    description: <<END
+Matrix of shape (n, d). Rows are assumed to be input points.
+END
+  }
+  in_arg {
+    name: "num_to_sample"
+    description: <<END
+Scalar. The number of rows to sample. This value must not be larger than n.
+END
+  }
+  in_arg {
+    name: "seed"
+    description: <<END
+Scalar. Seed for initializing the random number generator.
+END
+  }
+  in_arg {
+    name: "num_retries_per_sample"
+    description: <<END
+Scalar. For each row that is sampled, this parameter
+specifies the number of additional points to draw from the current
+distribution before selecting the best. If a negative value is specified, a
+heuristic is used to sample O(log(num_to_sample)) additional points.
+END
+  }
+  out_arg {
+    name: "samples"
+    description: <<END
+Matrix of shape (num_to_sample, d). The sampled rows.
+END
+  }
+  summary: "Selects num_to_sample rows of input using the KMeans++ criterion."
+  description: <<END
+Rows of points are assumed to be input points. One row is selected at random.
+Subsequent rows are sampled with probability proportional to the squared L2
+distance from the nearest row selected thus far till num_to_sample rows have
+been sampled.
+END
+}
--- a/tensorflow/core/api_def/base_api/api_def_NearestNeighbors.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_NearestNeighbors.pbtxt
@ -0,0 +1,43 @@
+op {
+  graph_op_name: "NearestNeighbors"
+  visibility: HIDDEN
+  in_arg {
+    name: "points"
+    description: <<END
+Matrix of shape (n, d). Rows are assumed to be input points.
+END
+  }
+  in_arg {
+    name: "centers"
+    description: <<END
+Matrix of shape (m, d). Rows are assumed to be centers.
+END
+  }
+  in_arg {
+    name: "k"
+    description: <<END
+Number of nearest centers to return for each point. If k is larger than m, then
+only m centers are returned.
+END
+  }
+  out_arg {
+    name: "nearest_center_indices"
+    description: <<END
+Matrix of shape (n, min(m, k)). Each row contains the indices of the centers
+closest to the corresponding point, ordered by increasing distance.
+END
+  }
+  out_arg {
+    name: "nearest_center_distances"
+    description: <<END
+Matrix of shape (n, min(m, k)). Each row contains the squared L2 distance to the
+corresponding center in nearest_center_indices.
+END
+  }
+  summary: "Selects the k nearest centers for each point."
+  description: <<END
+Rows of points are assumed to be input points. Rows of centers are assumed to be
+the list of candidate centers. For each point, the k centers that have least L2
+distance to it are computed.
+END
+}
--- a/tensorflow/core/api_def/python_api/api_def_KMC2ChainInitialization.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_KMC2ChainInitialization.pbtxt
@ -0,0 +1,4 @@
+op {
+  graph_op_name: "KMC2ChainInitialization"
+  visibility: HIDDEN
+}
--- a/tensorflow/core/api_def/python_api/api_def_KmeansPlusPlusInitialization.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_KmeansPlusPlusInitialization.pbtxt
@ -0,0 +1,4 @@
+op {
+  graph_op_name: "KmeansPlusPlusInitialization"
+  visibility: HIDDEN
+}
--- a/tensorflow/core/api_def/python_api/api_def_NearestNeighbors.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_NearestNeighbors.pbtxt
@ -0,0 +1,4 @@
+op {
+  graph_op_name: "NearestNeighbors"
+  visibility: HIDDEN
+}
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@ -152,6 +152,33 @@ tf_kernel_library(
    ],
 )

+tf_kernel_library(
+    name = "clustering_ops",
+    prefix = "clustering_ops",
+    deps = [
+        "//tensorflow/core:clustering_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "clustering_ops_test",
+    srcs = ["clustering_ops_test.cc"],
+    deps = [
+        ":clustering_ops",
+        "//tensorflow/core:clustering_ops_op_lib",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_kernel_library(
    name = "collective_ops",
    prefix = "collective_ops",
--- a/tensorflow/contrib/factorization/kernels/clustering_ops.cc
+++ b/tensorflow/contrib/factorization/kernels/clustering_ops.cc
@ -392,7 +392,7 @@ class NearestNeighborsOp : public OpKernel {
      for (; start < limit; ++start) {
        const int64 start_row = num_points * start / num_units;
        const int64 limit_row = num_points * (start + 1) / num_units;
-        CHECK_LE(limit_row, num_points);
+        DCHECK_LE(limit_row, num_points);
        const int64 num_rows = limit_row - start_row;
        auto points_shard = points.middleRows(start_row, num_rows);
        const Eigen::VectorXf points_half_squared_norm =
@ -430,7 +430,7 @@ class NearestNeighborsOp : public OpKernel {
      const Eigen::Ref<const Eigen::VectorXf>& centers_half_squared_norm,
      const Eigen::Ref<MatrixXi64RowMajor>& nearest_center_indices,
      const Eigen::Ref<MatrixXfRowMajor>& nearest_center_distances) {
-    CHECK_LE(k, centers.rows());
+    DCHECK_LE(k, centers.rows());
    if (centers.rows() <= kNearestNeighborsCentersMaxBlockSize) {
      FindKNearestCentersOneBlock(k, points, points_half_squared_norm, centers,
                                  centers_half_squared_norm,
@ -451,7 +451,7 @@ class NearestNeighborsOp : public OpKernel {
      const Eigen::Ref<const Eigen::VectorXf>& centers_half_squared_norm,
      Eigen::Ref<MatrixXi64RowMajor> nearest_center_indices,
      Eigen::Ref<MatrixXfRowMajor> nearest_center_distances) {
-    CHECK_LE(k, centers.rows());
+    DCHECK_LE(k, centers.rows());
    const int64 num_points = points.rows();
    const MatrixXfRowMajor inner_product = points * centers.transpose();
    // Find nearest neighbors.
@ -500,8 +500,8 @@ class NearestNeighborsOp : public OpKernel {
      Eigen::Ref<MatrixXfRowMajor> nearest_center_distances) {
    const int64 num_points = points.rows();
    const int64 num_centers = centers.rows();
-    CHECK_LE(k, num_centers);
-    CHECK_GT(num_centers, kNearestNeighborsCentersMaxBlockSize);
+    DCHECK_LE(k, num_centers);
+    DCHECK_GT(num_centers, kNearestNeighborsCentersMaxBlockSize);
    // Store nearest neighbors with first block of centers directly into the
    // output matrices.
    int64 out_k = std::min(k, kNearestNeighborsCentersMaxBlockSize);
--- a/tensorflow/contrib/factorization/kernels/clustering_ops_test.cc
+++ b/tensorflow/contrib/factorization/kernels/clustering_ops_test.cc
--- a/tensorflow/core/ops/clustering_ops.cc
+++ b/tensorflow/core/ops/clustering_ops.cc
@ -0,0 +1,43 @@
+// Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License.  You may obtain a copy
+// of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+// License for the specific language governing permissions and limitations under
+// the License.
+// ==============================================================================
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+
+namespace tensorflow {
+
+REGISTER_OP("KmeansPlusPlusInitialization")
+    .Input("points: float32")
+    .Input("num_to_sample: int64")
+    .Input("seed: int64")
+    .Input("num_retries_per_sample: int64")
+    .Output("samples: float32")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+REGISTER_OP("KMC2ChainInitialization")
+    .Input("distances: float32")
+    .Input("seed: int64")
+    .Output("index: int64")
+    .SetShapeFn(shape_inference::ScalarShape);
+
+REGISTER_OP("NearestNeighbors")
+    .Input("points: float32")
+    .Input("centers: float32")
+    .Input("k: int64")
+    .Output("nearest_center_indices: int64")
+    .Output("nearest_center_distances: float32")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+}  // namespace tensorflow
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@ -1680,6 +1680,14 @@ tf_gen_op_wrapper_private_py(
    ],
 )

+tf_gen_op_wrapper_private_py(
+    name = "clustering_ops_gen",
+    visibility = ["//tensorflow:internal"],
+    deps = [
+        "//tensorflow/core:clustering_ops_op_lib",
+    ],
+)
+
 tf_gen_op_wrapper_private_py(
    name = "collective_ops_gen",
    visibility = ["//tensorflow:internal"],