Move clustering ops to core.

PiperOrigin-RevId: 228808275
This commit is contained in:
Zhenyu Tan 2019-01-10 17:33:58 -08:00 committed by TensorFlower Gardener
parent 3fc2b09b60
commit 578bd3a276
16 changed files with 221 additions and 157 deletions

View File

@ -28,7 +28,6 @@ tf_custom_op_py_library(
"python/ops/wals.py",
],
dso = [
":python/ops/_clustering_ops.so",
":python/ops/_factorization_ops.so",
],
kernels = [
@ -38,12 +37,12 @@ tf_custom_op_py_library(
srcs_version = "PY2AND3",
deps = [
":factorization_ops_test_utils_py",
":gen_clustering_ops",
":gen_factorization_ops",
"//tensorflow/contrib/framework:framework_py",
"//tensorflow/contrib/util:util_py",
"//tensorflow/python:array_ops",
"//tensorflow/python:check_ops",
"//tensorflow/python:clustering_ops_gen",
"//tensorflow/python:control_flow_ops",
"//tensorflow/python:data_flow_ops",
"//tensorflow/python:embedding_ops",
@ -77,17 +76,6 @@ py_library(
],
)
# Ops
tf_custom_op_library(
name = "python/ops/_clustering_ops.so",
srcs = [
"ops/clustering_ops.cc",
],
deps = [
"//tensorflow/contrib/factorization/kernels:clustering_ops",
],
)
tf_custom_op_library(
name = "python/ops/_factorization_ops.so",
srcs = [
@ -100,26 +88,16 @@ tf_custom_op_library(
)
tf_gen_op_libs([
"clustering_ops",
"factorization_ops",
])
cc_library(
name = "all_ops",
deps = [
":clustering_ops_op_lib",
":factorization_ops_op_lib",
],
)
tf_gen_op_wrapper_py(
name = "gen_clustering_ops",
out = "python/ops/gen_clustering_ops.py",
deps = [
":clustering_ops_op_lib",
],
)
tf_gen_op_wrapper_py(
name = "gen_factorization_ops",
out = "python/ops/gen_factorization_ops.py",

View File

@ -11,7 +11,6 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_test")
cc_library(
name = "all_kernels",
deps = [
":clustering_ops",
":masked_matmul_ops",
":wals_solver_ops",
"@protobuf_archive//:protobuf_headers",
@ -29,17 +28,6 @@ cc_library(
alwayslink = 1,
)
cc_library(
name = "clustering_ops",
srcs = ["clustering_ops.cc"],
deps = [
"//tensorflow/core:framework_headers_lib",
"//third_party/eigen3",
"@protobuf_archive//:protobuf_headers",
],
alwayslink = 1,
)
cc_library(
name = "masked_matmul_ops",
srcs = ["masked_matmul_ops.cc"],
@ -51,19 +39,3 @@ cc_library(
],
alwayslink = 1,
)
tf_cc_test(
name = "clustering_ops_test",
srcs = ["clustering_ops_test.cc"],
deps = [
":clustering_ops",
"//tensorflow/contrib/factorization:clustering_ops_op_lib",
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
],
)

View File

@ -1,91 +0,0 @@
// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.
// ==============================================================================
#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/op.h"
namespace tensorflow {
REGISTER_OP("KmeansPlusPlusInitialization")
.Input("points: float32")
.Input("num_to_sample: int64")
.Input("seed: int64")
.Input("num_retries_per_sample: int64")
.Output("samples: float32")
.SetShapeFn(shape_inference::UnknownShape)
.Doc(R"(
Selects num_to_sample rows of input using the KMeans++ criterion.
Rows of points are assumed to be input points. One row is selected at random.
Subsequent rows are sampled with probability proportional to the squared L2
distance from the nearest row selected thus far till num_to_sample rows have
been sampled.
points: Matrix of shape (n, d). Rows are assumed to be input points.
num_to_sample: Scalar. The number of rows to sample. This value must not be
larger than n.
seed: Scalar. Seed for initializing the random number generator.
num_retries_per_sample: Scalar. For each row that is sampled, this parameter
specifies the number of additional points to draw from the current
distribution before selecting the best. If a negative value is specified, a
heuristic is used to sample O(log(num_to_sample)) additional points.
samples: Matrix of shape (num_to_sample, d). The sampled rows.
)");
REGISTER_OP("KMC2ChainInitialization")
.Input("distances: float32")
.Input("seed: int64")
.Output("index: int64")
.SetShapeFn(shape_inference::ScalarShape)
.Doc(R"(
Returns the index of a data point that should be added to the seed set.
Entries in distances are assumed to be squared distances of candidate points to
the already sampled centers in the seed set. The op constructs one Markov chain
of the k-MC^2 algorithm and returns the index of one candidate point to be added
as an additional cluster center.
distances: Vector with squared distances to the closest previously sampled
cluster center for each candidate point.
seed: Scalar. Seed for initializing the random number generator.
index: Scalar with the index of the sampled point.
)");
REGISTER_OP("NearestNeighbors")
.Input("points: float32")
.Input("centers: float32")
.Input("k: int64")
.Output("nearest_center_indices: int64")
.Output("nearest_center_distances: float32")
.SetShapeFn(shape_inference::UnknownShape)
.Doc(R"(
Selects the k nearest centers for each point.
Rows of points are assumed to be input points. Rows of centers are assumed to be
the list of candidate centers. For each point, the k centers that have least L2
distance to it are computed.
points: Matrix of shape (n, d). Rows are assumed to be input points.
centers: Matrix of shape (m, d). Rows are assumed to be centers.
k: Scalar. Number of nearest centers to return for each point. If k is larger
than m, then only m centers are returned.
nearest_center_indices: Matrix of shape (n, min(m, k)). Each row contains the
indices of the centers closest to the corresponding point, ordered by
increasing distance.
nearest_center_distances: Matrix of shape (n, min(m, k)). Each row contains the
squared L2 distance to the corresponding center in nearest_center_indices.
)");
} // namespace tensorflow

View File

@ -18,28 +18,23 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.factorization.python.ops import gen_clustering_ops
# go/tf-wildcard-import
# pylint: disable=wildcard-import
from tensorflow.contrib.factorization.python.ops.gen_clustering_ops import *
# pylint: enable=wildcard-import
from tensorflow.contrib.util import loader
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import gen_clustering_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_impl
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import state_ops
from tensorflow.python.ops import variable_scope
from tensorflow.python.ops.embedding_ops import embedding_lookup
from tensorflow.python.platform import resource_loader
_clustering_ops = loader.load_op_library(
resource_loader.get_path_to_datafile('_clustering_ops.so'))
# go/tf-wildcard-import
# pylint: disable=wildcard-import
from tensorflow.python.ops.gen_clustering_ops import *
# pylint: enable=wildcard-import
# Euclidean distance between vectors U and V is defined as \\(||U - V||_F\\)
# which is the square root of the sum of the absolute squares of the elements

View File

@ -1074,6 +1074,7 @@ tf_gen_op_libs(
"tensor_forest_ops",
"candidate_sampling_ops",
"checkpoint_ops",
"clustering_ops",
"collective_ops",
"control_flow_ops",
"ctc_ops",
@ -1228,6 +1229,7 @@ cc_library(
":tensor_forest_ops_op_lib",
":candidate_sampling_ops_op_lib",
":checkpoint_ops_op_lib",
":clustering_ops_op_lib",
":collective_ops_op_lib",
":control_flow_ops_op_lib",
":ctc_ops_op_lib",
@ -1382,6 +1384,7 @@ cc_library(
"//tensorflow/core/kernels:tensor_forest_ops",
"//tensorflow/core/kernels:candidate_sampler_ops",
"//tensorflow/core/kernels:checkpoint_ops",
"//tensorflow/core/kernels:clustering_ops",
"//tensorflow/core/kernels:collective_ops",
"//tensorflow/core/kernels:control_flow_ops",
"//tensorflow/core/kernels:ctc_ops",

View File

@ -0,0 +1,30 @@
op {
graph_op_name: "KMC2ChainInitialization"
visibility: HIDDEN
in_arg {
name: "distances"
description: <<END
Vector with squared distances to the closest previously sampled cluster center
for each candidate point.
END
}
in_arg {
name: "seed"
description: <<END
Scalar. Seed for initializing the random number generator.
END
}
out_arg {
name: "index"
description: <<END
Scalar with the index of the sampled point.
END
}
summary: "Returns the index of a data point that should be added to the seed set."
description: <<END
Entries in distances are assumed to be squared distances of candidate points to
the already sampled centers in the seed set. The op constructs one Markov chain
of the k-MC^2 algorithm and returns the index of one candidate point to be added
as an additional cluster center.
END
}

View File

@ -0,0 +1,44 @@
op {
graph_op_name: "KmeansPlusPlusInitialization"
visibility: HIDDEN
in_arg {
name: "points"
description: <<END
Matrix of shape (n, d). Rows are assumed to be input points.
END
}
in_arg {
name: "num_to_sample"
description: <<END
Scalar. The number of rows to sample. This value must not be larger than n.
END
}
in_arg {
name: "seed"
description: <<END
Scalar. Seed for initializing the random number generator.
END
}
in_arg {
name: "num_retries_per_sample"
description: <<END
Scalar. For each row that is sampled, this parameter
specifies the number of additional points to draw from the current
distribution before selecting the best. If a negative value is specified, a
heuristic is used to sample O(log(num_to_sample)) additional points.
END
}
out_arg {
name: "samples"
description: <<END
Matrix of shape (num_to_sample, d). The sampled rows.
END
}
summary: "Selects num_to_sample rows of input using the KMeans++ criterion."
description: <<END
Rows of points are assumed to be input points. One row is selected at random.
Subsequent rows are sampled with probability proportional to the squared L2
distance from the nearest row selected thus far till num_to_sample rows have
been sampled.
END
}

View File

@ -0,0 +1,43 @@
op {
graph_op_name: "NearestNeighbors"
visibility: HIDDEN
in_arg {
name: "points"
description: <<END
Matrix of shape (n, d). Rows are assumed to be input points.
END
}
in_arg {
name: "centers"
description: <<END
Matrix of shape (m, d). Rows are assumed to be centers.
END
}
in_arg {
name: "k"
description: <<END
Number of nearest centers to return for each point. If k is larger than m, then
only m centers are returned.
END
}
out_arg {
name: "nearest_center_indices"
description: <<END
Matrix of shape (n, min(m, k)). Each row contains the indices of the centers
closest to the corresponding point, ordered by increasing distance.
END
}
out_arg {
name: "nearest_center_distances"
description: <<END
Matrix of shape (n, min(m, k)). Each row contains the squared L2 distance to the
corresponding center in nearest_center_indices.
END
}
summary: "Selects the k nearest centers for each point."
description: <<END
Rows of points are assumed to be input points. Rows of centers are assumed to be
the list of candidate centers. For each point, the k centers that have least L2
distance to it are computed.
END
}

View File

@ -0,0 +1,4 @@
op {
graph_op_name: "KMC2ChainInitialization"
visibility: HIDDEN
}

View File

@ -0,0 +1,4 @@
op {
graph_op_name: "KmeansPlusPlusInitialization"
visibility: HIDDEN
}

View File

@ -0,0 +1,4 @@
op {
graph_op_name: "NearestNeighbors"
visibility: HIDDEN
}

View File

@ -152,6 +152,33 @@ tf_kernel_library(
],
)
tf_kernel_library(
name = "clustering_ops",
prefix = "clustering_ops",
deps = [
"//tensorflow/core:clustering_ops_op_lib",
"//tensorflow/core:framework",
"//tensorflow/core:framework_headers_lib",
"//tensorflow/core:lib",
],
)
tf_cc_test(
name = "clustering_ops_test",
srcs = ["clustering_ops_test.cc"],
deps = [
":clustering_ops",
"//tensorflow/core:clustering_ops_op_lib",
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
],
)
tf_kernel_library(
name = "collective_ops",
prefix = "collective_ops",

View File

@ -392,7 +392,7 @@ class NearestNeighborsOp : public OpKernel {
for (; start < limit; ++start) {
const int64 start_row = num_points * start / num_units;
const int64 limit_row = num_points * (start + 1) / num_units;
CHECK_LE(limit_row, num_points);
DCHECK_LE(limit_row, num_points);
const int64 num_rows = limit_row - start_row;
auto points_shard = points.middleRows(start_row, num_rows);
const Eigen::VectorXf points_half_squared_norm =
@ -430,7 +430,7 @@ class NearestNeighborsOp : public OpKernel {
const Eigen::Ref<const Eigen::VectorXf>& centers_half_squared_norm,
const Eigen::Ref<MatrixXi64RowMajor>& nearest_center_indices,
const Eigen::Ref<MatrixXfRowMajor>& nearest_center_distances) {
CHECK_LE(k, centers.rows());
DCHECK_LE(k, centers.rows());
if (centers.rows() <= kNearestNeighborsCentersMaxBlockSize) {
FindKNearestCentersOneBlock(k, points, points_half_squared_norm, centers,
centers_half_squared_norm,
@ -451,7 +451,7 @@ class NearestNeighborsOp : public OpKernel {
const Eigen::Ref<const Eigen::VectorXf>& centers_half_squared_norm,
Eigen::Ref<MatrixXi64RowMajor> nearest_center_indices,
Eigen::Ref<MatrixXfRowMajor> nearest_center_distances) {
CHECK_LE(k, centers.rows());
DCHECK_LE(k, centers.rows());
const int64 num_points = points.rows();
const MatrixXfRowMajor inner_product = points * centers.transpose();
// Find nearest neighbors.
@ -500,8 +500,8 @@ class NearestNeighborsOp : public OpKernel {
Eigen::Ref<MatrixXfRowMajor> nearest_center_distances) {
const int64 num_points = points.rows();
const int64 num_centers = centers.rows();
CHECK_LE(k, num_centers);
CHECK_GT(num_centers, kNearestNeighborsCentersMaxBlockSize);
DCHECK_LE(k, num_centers);
DCHECK_GT(num_centers, kNearestNeighborsCentersMaxBlockSize);
// Store nearest neighbors with first block of centers directly into the
// output matrices.
int64 out_k = std::min(k, kNearestNeighborsCentersMaxBlockSize);

View File

@ -0,0 +1,43 @@
// Copyright 2018 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.
// ==============================================================================
#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/op.h"
namespace tensorflow {
REGISTER_OP("KmeansPlusPlusInitialization")
.Input("points: float32")
.Input("num_to_sample: int64")
.Input("seed: int64")
.Input("num_retries_per_sample: int64")
.Output("samples: float32")
.SetShapeFn(shape_inference::UnknownShape);
REGISTER_OP("KMC2ChainInitialization")
.Input("distances: float32")
.Input("seed: int64")
.Output("index: int64")
.SetShapeFn(shape_inference::ScalarShape);
REGISTER_OP("NearestNeighbors")
.Input("points: float32")
.Input("centers: float32")
.Input("k: int64")
.Output("nearest_center_indices: int64")
.Output("nearest_center_distances: float32")
.SetShapeFn(shape_inference::UnknownShape);
} // namespace tensorflow

View File

@ -1680,6 +1680,14 @@ tf_gen_op_wrapper_private_py(
],
)
tf_gen_op_wrapper_private_py(
name = "clustering_ops_gen",
visibility = ["//tensorflow:internal"],
deps = [
"//tensorflow/core:clustering_ops_op_lib",
],
)
tf_gen_op_wrapper_private_py(
name = "collective_ops_gen",
visibility = ["//tensorflow:internal"],