Makes slot_creator support copy sharding from the primary variable.

When creating EMA variables, copy sharding from the primary variables. PiperOrigin-RevId: 352102947 Change-Id: Ib29ae4dc1be7642924da8e386fb21d1a7e1a42e1
2021-01-15 16:10:35 -08:00 · 2021-01-15 16:10:35 -08:00 · b754340bed
commit b754340bed
parent 186d9b18da
6 changed files with 154 additions and 18 deletions
--- a/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py
+++ b/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py
@ -228,7 +228,7 @@ def copy_sharding(from_tensor, to_tensor, use_sharding_op=False):
  Returns:
    A tensor with sharding annotation copied from `from_tensor`.
  """
-  sharding = get_op_sharding(from_tensor.op)
+  sharding = get_tensor_sharding(from_tensor)
  if sharding is None:
    return to_tensor
@ -343,6 +343,25 @@ def get_op_sharding(op):
    return op.get_attr('_XlaSharding')
  except ValueError:
    return None
  except AttributeError:
    # AttributeError: 'DistributedVarOp' object has no attribute 'get_attr'.
    return None
 def get_tensor_sharding(tensor):
  """Returns sharding attribute of a Tensor.
  Args:
    tensor: a Tensor.
  Returns:
    The attribute representing XLA sharding on tensor's op.
  """
  try:
    return get_op_sharding(tensor.op)
  except AttributeError:
    # AttributeError: Tensor.op is meaningless when eager execution is enabled.
    return None
 def auto_to_manual_spmd_partition(tensor, manual_sharding):
--- a/tensorflow/python/training/BUILD
+++ b/tensorflow/python/training/BUILD
@ -499,6 +499,7 @@ py_library(
    srcs = ["slot_creator.py"],
    srcs_version = "PY2AND3",
    deps = [
        "//tensorflow/compiler/xla/experimental/xla_sharding",
        "//tensorflow/python:array_ops",
        "//tensorflow/python:init_ops",
        "//tensorflow/python:resource_variable_ops",
--- a/tensorflow/python/training/moving_averages.py
+++ b/tensorflow/python/training/moving_averages.py
@ -457,7 +457,8 @@ class ExponentialMovingAverage(object):
                var,
                var.initialized_value(),
                self.name,
-                colocate_with_primary=True)
+                colocate_with_primary=True,
                copy_xla_sharding=True)
            # NOTE(mrry): We only add `tf.Variable` objects to the
            # `MOVING_AVERAGE_VARIABLES` collection.
            ops.add_to_collection(ops.GraphKeys.MOVING_AVERAGE_VARIABLES, var)
@ -467,7 +468,8 @@ class ExponentialMovingAverage(object):
                self.name,
                colocate_with_primary=(var.op.type in [
                    "Variable", "VariableV2", "VarHandleOp"
-                ]))
+                ]),
                copy_xla_sharding=True)
            if self._zero_debias:
              zero_debias_true.add(avg.ref())
        self._averages[var.ref()] = avg
--- a/tensorflow/python/training/moving_averages_test.py
+++ b/tensorflow/python/training/moving_averages_test.py
@ -18,6 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import numpy as np
 from tensorflow.compiler.xla.experimental.xla_sharding import xla_sharding
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@ -491,6 +494,20 @@ class ExponentialMovingAverageTest(test.TestCase):
      self.assertEqual(len(vars_to_restore), 1)
      self.assertIn("v/foo_avg", vars_to_restore)
  @test_util.deprecated_graph_mode_only
  def testCopyXlaSharding(self):
    ema = moving_averages.ExponentialMovingAverage(0.25, name="foo_avg")
    v = variables.Variable(_Repeat(10.0, 2), name="v")
    self.assertIsNone(xla_sharding.get_tensor_sharding(v))
    v = xla_sharding.mesh_split(v, np.array([0, 1]), [0], use_sharding_op=False)
    self.assertIsNotNone(xla_sharding.get_tensor_sharding(v))
    self.evaluate(variables.global_variables_initializer())
    ema.apply([v])
    avg = ema.average(v)
    self.assertEqual(
        xla_sharding.get_tensor_sharding(v),
        xla_sharding.get_tensor_sharding(avg))
 if __name__ == "__main__":
  test.main()
--- a/tensorflow/python/training/slot_creator.py
+++ b/tensorflow/python/training/slot_creator.py
@ -39,6 +39,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from tensorflow.compiler.xla.experimental.xla_sharding import xla_sharding
 from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
@ -47,7 +48,14 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
-def _create_slot_var(primary, val, scope, validate_shape, shape, dtype):
+def _create_slot_var(primary,
                     val,
                     scope,
                     validate_shape,
                     shape,
                     dtype,
                     *,
                     copy_xla_sharding=False):
  """Helper function for creating a slot variable."""
  # TODO(lukaszkaiser): Consider allowing partitioners to be set in the current
@ -98,10 +106,19 @@ def _create_slot_var(primary, val, scope, validate_shape, shape, dtype):
              slice_info.full_shape[:n], slice_info.var_offset[:n],
              slice_info.var_shape[:n]))
  # pylint: enable=protected-access
  # Copy XLA sharding attributes from primary.
  if copy_xla_sharding:
    slot = xla_sharding.copy_sharding(primary, slot, use_sharding_op=False)
  return slot
-def create_slot(primary, val, name, colocate_with_primary=True):
+def create_slot(primary,
                val,
                name,
                colocate_with_primary=True,
                *,
                copy_xla_sharding=False):
  """Create a slot initialized to the given value.
  The type of the slot is determined by the given value.
@ -112,6 +129,8 @@ def create_slot(primary, val, name, colocate_with_primary=True):
    name: Name to use for the slot variable.
    colocate_with_primary: Boolean.  If True the slot is located
      on the same device as `primary`.
    copy_xla_sharding: Boolean. If True also copies XLA sharding
      from primary.
  Returns:
    A `Variable` object.
@ -130,13 +149,33 @@ def create_slot(primary, val, name, colocate_with_primary=True):
    if colocate_with_primary:
      distribution_strategy = distribution_strategy_context.get_strategy()
      with distribution_strategy.extended.colocate_vars_with(primary):
-        return _create_slot_var(primary, val, "", validate_shape, None, None)
+        return _create_slot_var(
            primary,
            val,
            "",
            validate_shape,
            None,
            None,
            copy_xla_sharding=copy_xla_sharding)
    else:
-      return _create_slot_var(primary, val, "", validate_shape, None, None)
+      return _create_slot_var(
          primary,
          val,
          "",
          validate_shape,
          None,
          None,
          copy_xla_sharding=copy_xla_sharding)
-def create_slot_with_initializer(primary, initializer, shape, dtype, name,
+def create_slot_with_initializer(primary,
-                                 colocate_with_primary=True):
+                                 initializer,
                                 shape,
                                 dtype,
                                 name,
                                 colocate_with_primary=True,
                                 *,
                                 copy_xla_sharding=False):
  """Creates a slot initialized using an `Initializer`.
  The type of the slot is determined by the given value.
@ -149,6 +188,8 @@ def create_slot_with_initializer(primary, initializer, shape, dtype, name,
    name: Name to use for the slot variable.
    colocate_with_primary: Boolean.  If True the slot is located
      on the same device as `primary`.
    copy_xla_sharding: Boolean. If True also copies XLA sharding
      from primary.
  Returns:
    A `Variable` object.
@ -167,14 +208,31 @@ def create_slot_with_initializer(primary, initializer, shape, dtype, name,
    if colocate_with_primary:
      distribution_strategy = distribution_strategy_context.get_strategy()
      with distribution_strategy.extended.colocate_vars_with(primary):
-        return _create_slot_var(primary, initializer, "", validate_shape, shape,
+        return _create_slot_var(
-                                dtype)
+            primary,
            initializer,
            "",
            validate_shape,
            shape,
            dtype,
            copy_xla_sharding=copy_xla_sharding)
    else:
-      return _create_slot_var(primary, initializer, "", validate_shape, shape,
+      return _create_slot_var(
-                              dtype)
+          primary,
          initializer,
          "",
          validate_shape,
          shape,
          dtype,
          copy_xla_sharding=copy_xla_sharding)
-def create_zeros_slot(primary, name, dtype=None, colocate_with_primary=True):
+def create_zeros_slot(primary,
                      name,
                      dtype=None,
                      colocate_with_primary=True,
                      *,
                      copy_xla_sharding=False):
  """Create a slot initialized to 0 with same shape as the primary object.
  Args:
@ -183,6 +241,8 @@ def create_zeros_slot(primary, name, dtype=None, colocate_with_primary=True):
    dtype: Type of the slot variable.  Defaults to the type of `primary`.
    colocate_with_primary: Boolean.  If True the slot is located
      on the same device as `primary`.
    copy_xla_sharding: Boolean. If True also copies XLA sharding
      from primary.
  Returns:
    A `Variable` object.
@ -193,13 +253,22 @@ def create_zeros_slot(primary, name, dtype=None, colocate_with_primary=True):
  if slot_shape.is_fully_defined():
    initializer = init_ops.zeros_initializer()
    return create_slot_with_initializer(
-        primary, initializer, slot_shape, dtype, name,
+        primary,
-        colocate_with_primary=colocate_with_primary)
+        initializer,
        slot_shape,
        dtype,
        name,
        colocate_with_primary=colocate_with_primary,
        copy_xla_sharding=copy_xla_sharding)
  else:
    if isinstance(primary, variables.Variable):
      slot_shape = array_ops.shape(primary.initialized_value())
    else:
      slot_shape = array_ops.shape(primary)
    val = array_ops.zeros(slot_shape, dtype=dtype)
-    return create_slot(primary, val, name,
+    return create_slot(
-                       colocate_with_primary=colocate_with_primary)
+        primary,
        val,
        name,
        colocate_with_primary=colocate_with_primary,
        copy_xla_sharding=copy_xla_sharding)
--- a/tensorflow/python/training/slot_creator_test.py
+++ b/tensorflow/python/training/slot_creator_test.py
@ -18,6 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import numpy as np
 from tensorflow.compiler.xla.experimental.xla_sharding import xla_sharding
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@ -174,6 +177,31 @@ class SlotCreatorTest(test.TestCase):
        self.assertEqual(dtypes.float32, slot.dtype.base_dtype)
        self.assertAllEqual(1.0, slot)
  def testCreateSlotFromVariableCopyXlaSharding(self):
    # slot_creator is used only in optimizer V1.
    with ops.Graph().as_default(), self.cached_session():
      v = variables.Variable([1.0, 2.5], name="var")
      v = xla_sharding.mesh_split(
          v, np.array([0, 1]), [0], use_sharding_op=False)
      slot = slot_creator.create_slot(
          v, v.initialized_value(), name="slot", copy_xla_sharding=True)
      self.assertEqual(
          xla_sharding.get_tensor_sharding(v),
          xla_sharding.get_tensor_sharding(slot))
  def testCreateZerosSlotFromVariableCopyXlaSharding(self):
    # slot_creator is used only in optimizer V1.
    with ops.Graph().as_default(), self.cached_session():
      v = variables.Variable([1.0, 2.5], name="var")
      v = xla_sharding.mesh_split(
          v, np.array([0, 1]), [0], use_sharding_op=False)
      with ops.control_dependencies(None):
        slot = slot_creator.create_zeros_slot(
            v, name="slot", dtype=dtypes.float64, copy_xla_sharding=True)
      self.assertEqual(
          xla_sharding.get_tensor_sharding(v),
          xla_sharding.get_tensor_sharding(slot))
 if __name__ == "__main__":
  test.main()