Update tf.embedding_lookup to use partition_strategy and max_norm when ids is ragged. (Prior to this change, these two parameters were ignored for ragged ids values.)

PiperOrigin-RevId: 293507108 Change-Id: Id5879023d81286cbe79d829d03a303cfca0e1df2
2020-02-05 19:46:37 -08:00 · 2020-02-05 19:46:37 -08:00 · 2d663113d9
commit 2d663113d9
parent 66779177f6
3 changed files with 52 additions and 3 deletions
--- a/tensorflow/python/kernel_tests/embedding_ops_test.py
+++ b/tensorflow/python/kernel_tests/embedding_ops_test.py
@ -40,6 +40,7 @@ from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging
 from tensorflow.python.util import compat
@ -644,6 +645,12 @@ class EmbeddingLookupTest(test.TestCase):
          # same results. Therefore, an exact comparison cannot be made.
          self.assertAllClose(simple, sharded)

+  def testRaggedMaxNorm(self):
+    embeddings = constant_op.constant([[2.0]])
+    ids = ragged_factory_ops.constant([[0, 0], [0]], dtype=dtypes.int32)
+    embedding = embedding_ops.embedding_lookup([embeddings], ids, max_norm=1.0)
+    self.assertAllEqual(embedding, [[[1.0], [1.0]], [[1.0]]])
+

 class EmbeddingLookupSparseTest(test.TestCase):

--- a/tensorflow/python/ops/embedding_ops.py
+++ b/tensorflow/python/ops/embedding_ops.py
@ -312,7 +312,10 @@ def embedding_lookup(
    ValueError: If `params` is empty.
  """
  if isinstance(ids, ragged_tensor.RaggedTensor):
-    return embedding_lookup_ragged(params, ids)
+    return embedding_lookup_ragged(params, ids,
+                                   partition_strategy=partition_strategy,
+                                   max_norm=max_norm,
+                                   name=name)

  return _embedding_lookup_and_transform(
      params=params,
@ -823,7 +826,11 @@ def safe_embedding_lookup_sparse(embedding_weights,
    return final_result


-def embedding_lookup_ragged(embedding_weights, ragged_ids, name=None):
+def embedding_lookup_ragged(embedding_weights,
+                            ragged_ids,
+                            partition_strategy="mod",
+                            max_norm=None,
+                            name=None):
  """Look up the ragged ids in a list of embedding tensors.

  Args:
@ -832,6 +839,9 @@ def embedding_lookup_ragged(embedding_weights, ragged_ids, name=None):
    ragged_ids: A 'RaggedTensor' with type 'int32' or 'int64' containing the ids
      to be looked up in 'embedding_weights' of shape [r0, ..rN]. Values must be
      in the range '[0, embedding_weights.shape[0]]'.
+    partition_strategy: A string specifying the partitioning strategy.
+    max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
+      than this value.
    name: A name for the operation (optional)

  Returns:
@ -853,7 +863,11 @@ def embedding_lookup_ragged(embedding_weights, ragged_ids, name=None):

  with ops.name_scope(name, "embedding_lookup_ragged") as name:
    looked_up_ragged = ragged_functional_ops.map_flat_values(
-        array_ops.gather, embedding_weights, ragged_ids)
+        embedding_lookup,
+        params=embedding_weights,
+        ids=ragged_ids,
+        partition_strategy=partition_strategy,
+        max_norm=max_norm)

    return looked_up_ragged

--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@ -1590,6 +1590,34 @@ class RaggedEmbeddingTest(test_lib.TestCase):
        ValueError, "The values contained by the inputs have type*"):
      nn.embedding_lookup_ragged(weights, ragged_ids)

+  def testMaxNormForEmbeddings(self):
+    weights = constant_op.constant([[0, 0, 0, 0], [1, 1, 1, 1],
+                                    [2, 2, 2, 2], [3, 3, 3, 3]],
+                                   dtype=dtypes.float32)
+    ragged_ids = ragged_factory_ops.constant([[1, 2, 3], [0], [1, 2]],
+                                             ragged_rank=1)
+
+    actual_embeddings = [
+        nn.embedding_lookup(weights, ragged_ids, max_norm=max_norm)
+        for max_norm in [1, 2, 5]]
+
+    expected_embeddings = (
+        # max_norm = 1
+        [[[.5, .5, .5, .5], [.5, .5, .5, .5], [.5, .5, .5, .5]],
+         [[0, 0, 0, 0]], [[.5, .5, .5, .5], [.5, .5, .5, .5]]],
+        # max_norm = 2
+        [[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
+         [[0, 0, 0, 0]], [[1, 1, 1, 1], [1, 1, 1, 1]]],
+        # max_norm = 5
+        [[[1, 1, 1, 1], [2, 2, 2, 2], [2.5, 2.5, 2.5, 2.5]],
+         [[0, 0, 0, 0]], [[1, 1, 1, 1], [2, 2, 2, 2]]],
+        )
+
+    for expected, actual in zip(expected_embeddings, actual_embeddings):
+      self.assertAllClose(
+          ragged_factory_ops.constant(expected, dtype=float, ragged_rank=1),
+          actual)
+

 if __name__ == "__main__":
  test_lib.main()