* Fixing checks for fully-known shapes.

* Adding pre_train_ops to ConstrainedMinimizationProblem. These are tf.Operations that should be executed before each training step. * Updating ConstrainedOptimizers to have the train_ops they create depend on the pre_train_ops from the ConstrainedMinimizationProblem. PiperOrigin-RevId: 208871419
2018-08-15 13:21:54 -07:00 · 2018-08-15 13:21:54 -07:00 · d2875ea713
commit d2875ea713
parent 75399bba46
4 changed files with 192 additions and 73 deletions
--- a/tensorflow/contrib/constrained_optimization/python/constrained_minimization_problem.py
+++ b/tensorflow/contrib/constrained_optimization/python/constrained_minimization_problem.py
@ -72,7 +72,8 @@ class ConstrainedMinimizationProblem(object):
    else:
      proxy_constraints_shape = self.proxy_constraints.get_shape()

-    if (constraints_shape is None or proxy_constraints_shape is None or
+    if (constraints_shape.ndims is None or
+        proxy_constraints_shape.ndims is None or
        any([ii is None for ii in constraints_shape.as_list()]) or
        any([ii is None for ii in proxy_constraints_shape.as_list()])):
      raise ValueError(
@ -121,3 +122,19 @@ class ConstrainedMinimizationProblem(object):
      A tensor of proxy constraint functions.
    """
    return None
+
+  # This is a property, instead of an abstract property, since it doesn't need
+  # to be overridden: if pre_train_ops returns None, then there are no ops to
+  # run before train_op.
+  @property
+  def pre_train_ops(self):
+    """Returns a list of `Operation`s to run before the train_op.
+
+    When a `ConstrainedOptimizer` creates a train_op (in `minimize`
+    `minimize_unconstrained`, or `minimize_constrained`), it will include these
+    ops before the main training step.
+
+    Returns:
+      A list of `Operation`s.
+    """
+    return None
--- a/tensorflow/contrib/constrained_optimization/python/constrained_optimizer.py
+++ b/tensorflow/contrib/constrained_optimization/python/constrained_optimizer.py
@ -55,20 +55,21 @@ class ConstrainedOptimizer(object):
    """Returns the `tf.train.Optimizer` used for optimization."""
    return self._optimizer

-  def minimize_unconstrained(self,
-                             minimization_problem,
-                             global_step=None,
-                             var_list=None,
-                             gate_gradients=train_optimizer.Optimizer.GATE_OP,
-                             aggregation_method=None,
-                             colocate_gradients_with_ops=False,
-                             name=None,
-                             grad_loss=None):
-    """Returns an `Op` for minimizing the unconstrained problem.
+  @abc.abstractmethod
+  def _minimize_constrained(self,
+                            minimization_problem,
+                            global_step=None,
+                            var_list=None,
+                            gate_gradients=train_optimizer.Optimizer.GATE_OP,
+                            aggregation_method=None,
+                            colocate_gradients_with_ops=False,
+                            name=None,
+                            grad_loss=None):
+    """Version of `minimize_constrained` to be overridden by subclasses.

-    Unlike `minimize_constrained`, this function ignores the `constraints` (and
-    `proxy_constraints`) portion of the minimization problem entirely, and only
-    minimizes `objective`.
+    Implementations of this method should ignore the `pre_train_ops` property of
+    the `minimization_problem`. The public `minimize_constrained` method will
+    take care of executing these before the returned train_op.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
@ -83,19 +84,10 @@ class ConstrainedOptimizer(object):
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

    Returns:
-      TensorFlow Op.
+      `Operation`, the train_op.
    """
-    return self.optimizer.minimize(
-        minimization_problem.objective,
-        global_step=global_step,
-        var_list=var_list,
-        gate_gradients=gate_gradients,
-        aggregation_method=aggregation_method,
-        colocate_gradients_with_ops=colocate_gradients_with_ops,
-        name=name,
-        grad_loss=grad_loss)
+    pass

-  @abc.abstractmethod
  def minimize_constrained(self,
                           minimization_problem,
                           global_step=None,
@ -105,7 +97,7 @@ class ConstrainedOptimizer(object):
                           colocate_gradients_with_ops=False,
                           name=None,
                           grad_loss=None):
-    """Returns an `Op` for minimizing the constrained problem.
+    """Returns an `Operation` for minimizing the constrained problem.

    Unlike `minimize_unconstrained`, this function attempts to find a solution
    that minimizes the `objective` portion of the minimization problem while
@ -124,9 +116,83 @@ class ConstrainedOptimizer(object):
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

    Returns:
-      TensorFlow Op.
+      `Operation`, the train_op.
    """
-    pass
+
+    def train_op_callback():
+      return self._minimize_constrained(
+          minimization_problem,
+          global_step=global_step,
+          var_list=var_list,
+          gate_gradients=gate_gradients,
+          aggregation_method=aggregation_method,
+          colocate_gradients_with_ops=colocate_gradients_with_ops,
+          name=name,
+          grad_loss=grad_loss)
+
+    # If we have pre_train_ops, use tf.control_dependencies() to ensure that
+    # they execute before the train_op.
+    pre_train_ops = minimization_problem.pre_train_ops
+    if pre_train_ops:
+      with ops.control_dependencies(pre_train_ops):
+        train_op = train_op_callback()
+    else:
+      train_op = train_op_callback()
+
+    return train_op
+
+  def minimize_unconstrained(self,
+                             minimization_problem,
+                             global_step=None,
+                             var_list=None,
+                             gate_gradients=train_optimizer.Optimizer.GATE_OP,
+                             aggregation_method=None,
+                             colocate_gradients_with_ops=False,
+                             name=None,
+                             grad_loss=None):
+    """Returns an `Operation` for minimizing the unconstrained problem.
+
+    Unlike `minimize_constrained`, this function ignores the `constraints` (and
+    `proxy_constraints`) portion of the minimization problem entirely, and only
+    minimizes `objective`.
+
+    Args:
+      minimization_problem: ConstrainedMinimizationProblem, the problem to
+        optimize.
+      global_step: as in `tf.train.Optimizer`'s `minimize` method.
+      var_list: as in `tf.train.Optimizer`'s `minimize` method.
+      gate_gradients: as in `tf.train.Optimizer`'s `minimize` method.
+      aggregation_method: as in `tf.train.Optimizer`'s `minimize` method.
+      colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize`
+        method.
+      name: as in `tf.train.Optimizer`'s `minimize` method.
+      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
+
+    Returns:
+      `Operation`, the train_op.
+    """
+
+    def train_op_callback():
+      return self.optimizer.minimize(
+          minimization_problem.objective,
+          global_step=global_step,
+          var_list=var_list,
+          gate_gradients=gate_gradients,
+          aggregation_method=aggregation_method,
+          colocate_gradients_with_ops=colocate_gradients_with_ops,
+          name=name,
+          grad_loss=grad_loss)
+
+    # If we have pre_train_ops, use tf.control_dependencies() to ensure that
+    # they execute before the train_op.
+    pre_train_ops = minimization_problem.pre_train_ops
+    if pre_train_ops:
+      with ops.control_dependencies(pre_train_ops):
+        train_op = train_op_callback()
+    else:
+      train_op = train_op_callback()
+
+    return train_op

  def minimize(self,
               minimization_problem,
@ -138,7 +204,7 @@ class ConstrainedOptimizer(object):
               colocate_gradients_with_ops=False,
               name=None,
               grad_loss=None):
-    """Returns an `Op` for minimizing the constrained problem.
+    """Returns an `Operation` for minimizing the constrained problem.

    This method combines the functionality of `minimize_unconstrained` and
    `minimize_constrained`. If global_step < unconstrained_steps, it will
@ -164,14 +230,14 @@ class ConstrainedOptimizer(object):
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

    Returns:
-      TensorFlow Op.
+      `Operation`, the train_op.

    Raises:
      ValueError: If unconstrained_steps is provided, but global_step is not.
    """

    def unconstrained_fn():
-      """Returns an `Op` for minimizing the unconstrained problem."""
+      """Returns an `Operation` for minimizing the unconstrained problem."""
      return self.minimize_unconstrained(
          minimization_problem=minimization_problem,
          global_step=global_step,
@ -183,7 +249,7 @@ class ConstrainedOptimizer(object):
          grad_loss=grad_loss)

    def constrained_fn():
-      """Returns an `Op` for minimizing the constrained problem."""
+      """Returns an `Operation` for minimizing the constrained problem."""
      return self.minimize_constrained(
          minimization_problem=minimization_problem,
          global_step=global_step,
--- a/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py
+++ b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py
@ -70,11 +70,13 @@ def _project_multipliers_wrt_euclidean_norm(multipliers, radius):
      region w.r.t. the Euclidean norm.

  Raises:
-    ValueError: if the `multipliers` tensor does not have a fully-known shape,
-      or is not one-dimensional.
+    ValueError: if the `multipliers` tensor is not floating-point, does not have
+      a fully-known shape, or is not one-dimensional.
  """
+  if not multipliers.dtype.is_floating:
+    raise ValueError("multipliers must have a floating-point dtype")
  multipliers_shape = multipliers.get_shape()
-  if multipliers_shape is None:
+  if multipliers_shape.ndims is None:
    raise ValueError("multipliers must have known shape")
  if multipliers_shape.ndims != 1:
    raise ValueError(
@ -101,12 +103,12 @@ def _project_multipliers_wrt_euclidean_norm(multipliers, radius):
        (radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum(
            1.0, standard_ops.reduce_sum(inactive)))
    multipliers += scale * inactive
-    new_inactive = standard_ops.to_float(multipliers > 0)
+    new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype)
    multipliers *= new_inactive
    return (iteration, multipliers, new_inactive, inactive)

  iteration = standard_ops.constant(0)
-  inactive = standard_ops.ones_like(multipliers)
+  inactive = standard_ops.ones_like(multipliers, dtype=multipliers.dtype)

  # We actually want a do-while loop, so we explicitly call while_loop_body()
  # once before tf.while_loop().
@ -189,16 +191,16 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
  def _projection_op(self, state, name=None):
    pass

-  def minimize_constrained(self,
-                           minimization_problem,
-                           global_step=None,
-                           var_list=None,
-                           gate_gradients=train_optimizer.Optimizer.GATE_OP,
-                           aggregation_method=None,
-                           colocate_gradients_with_ops=False,
-                           name=None,
-                           grad_loss=None):
-    """Returns an `Op` for minimizing the constrained problem.
+  def _minimize_constrained(self,
+                            minimization_problem,
+                            global_step=None,
+                            var_list=None,
+                            gate_gradients=train_optimizer.Optimizer.GATE_OP,
+                            aggregation_method=None,
+                            colocate_gradients_with_ops=False,
+                            name=None,
+                            grad_loss=None):
+    """Returns an `Operation` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the Lagrange multipliers will be updated using
@ -216,8 +218,11 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
      name: as in `tf.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

+    Raises:
+      ValueError: If the minimization_problem tensors have different dtypes.
+
    Returns:
-      TensorFlow Op.
+      `Operation`, the train_op.
    """
    objective = minimization_problem.objective

@ -225,6 +230,14 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
    proxy_constraints = minimization_problem.proxy_constraints
    if proxy_constraints is None:
      proxy_constraints = constraints
+
+    # Make sure that the objective, constraints and proxy constraints all have
+    # the same dtype.
+    if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
+        objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
+      raise ValueError("objective, constraints and proxy_constraints must "
+                       "have the same dtype")
+
    # Flatten both constraints tensors to 1d.
    num_constraints = minimization_problem.num_constraints
    constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
@ -241,8 +254,10 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):

    multipliers = self._lagrange_multipliers(state)
    loss = (
-        objective + standard_ops.tensordot(multipliers, proxy_constraints, 1))
-    multipliers_gradient = constraints
+        objective + standard_ops.tensordot(
+            standard_ops.cast(multipliers, proxy_constraints.dtype),
+            proxy_constraints, 1))
+    multipliers_gradient = standard_ops.cast(constraints, multipliers.dtype)

    update_ops = []
    if self.constraint_optimizer is None:
@ -356,6 +371,8 @@ class AdditiveExternalRegretOptimizer(_ExternalRegretOptimizer):
    # For an AdditiveExternalRegretOptimizer, the internal state is simply a
    # tensor of Lagrange multipliers with shape (m,), where m is the number of
    # constraints.
+    #
+    # FUTURE WORK: make the dtype a parameter.
    return standard_ops.zeros((num_constraints,), dtype=dtypes.float32)

  def _lagrange_multipliers(self, state):
--- a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py
+++ b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py
@ -79,9 +79,11 @@ def _maximal_eigenvector_power_method(matrix,
    The maximal right-eigenvector of `matrix`.

  Raises:
-    ValueError: If the epsilon or maximum_iterations parameters violate their
-      bounds.
+    ValueError: If the `matrix` tensor is not floating-point, or if the
+      `epsilon` or `maximum_iterations` parameters violate their bounds.
  """
+  if not matrix.dtype.is_floating:
+    raise ValueError("multipliers must have a floating-point dtype")
  if epsilon <= 0.0:
    raise ValueError("epsilon must be strictly positive")
  if maximum_iterations <= 0:
@ -139,11 +141,13 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix):
      (i.e. the Frobenius norm).

  Raises:
-    ValueError: if the `matrix` tensor does not have a fully-known shape, or is
-      not two-dimensional and square.
+    ValueError: if the `matrix` tensor is not floating-point, does not have a
+      fully-known shape, or is not two-dimensional and square.
  """
+  if not matrix.dtype.is_floating:
+    raise ValueError("multipliers must have a floating-point dtype")
  matrix_shape = matrix.get_shape()
-  if matrix_shape is None:
+  if matrix_shape.ndims is None:
    raise ValueError("matrix must have known shape")
  if matrix_shape.ndims != 2:
    raise ValueError(
@ -172,12 +176,12 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix):
        matrix, axis=0, keepdims=True)) / standard_ops.maximum(
            1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True))
    matrix += scale * inactive
-    new_inactive = standard_ops.to_float(matrix > 0)
+    new_inactive = standard_ops.cast(matrix > 0, matrix.dtype)
    matrix *= new_inactive
    return (iteration, matrix, new_inactive, inactive)

  iteration = standard_ops.constant(0)
-  inactive = standard_ops.ones_like(matrix)
+  inactive = standard_ops.ones_like(matrix, dtype=matrix.dtype)

  # We actually want a do-while loop, so we explicitly call while_loop_body()
  # once before tf.while_loop().
@ -218,7 +222,7 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
  """Base class representing a `_SwapRegretOptimizer`.

  This class contains most of the logic for performing constrained optimization,
-  minimizing external regret for the constraints player. What it *doesn't* do is
+  minimizing swap regret for the constraints player. What it *doesn't* do is
  keep track of the internal state (the stochastic matrix).  Instead, the state
  is accessed via the _initial_state(), _stochastic_matrix(),
  _constraint_grad_and_var() and _projection_op() methods.
@ -291,16 +295,16 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
  def _projection_op(self, state, name=None):
    pass

-  def minimize_constrained(self,
-                           minimization_problem,
-                           global_step=None,
-                           var_list=None,
-                           gate_gradients=train_optimizer.Optimizer.GATE_OP,
-                           aggregation_method=None,
-                           colocate_gradients_with_ops=False,
-                           name=None,
-                           grad_loss=None):
-    """Returns an `Op` for minimizing the constrained problem.
+  def _minimize_constrained(self,
+                            minimization_problem,
+                            global_step=None,
+                            var_list=None,
+                            gate_gradients=train_optimizer.Optimizer.GATE_OP,
+                            aggregation_method=None,
+                            colocate_gradients_with_ops=False,
+                            name=None,
+                            grad_loss=None):
+    """Returns an `Operation` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the constraint/objective weight matrix (the analogue of
@ -320,8 +324,11 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
      name: as in `tf.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

+    Raises:
+      ValueError: If the minimization_problem tensors have different dtypes.
+
    Returns:
-      TensorFlow Op.
+      `Operation`, the train_op.
    """
    objective = minimization_problem.objective

@ -329,6 +336,14 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
    proxy_constraints = minimization_problem.proxy_constraints
    if proxy_constraints is None:
      proxy_constraints = constraints
+
+    # Make sure that the objective, constraints and proxy constraints all have
+    # the same dtype.
+    if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
+        objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
+      raise ValueError("objective, constraints and proxy_constraints must "
+                       "have the same dtype")
+
    # Flatten both constraints tensors to 1d.
    num_constraints = minimization_problem.num_constraints
    constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
@ -344,15 +359,18 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
        name="swap_regret_optimizer_state")

    zero_and_constraints = standard_ops.concat(
-        (standard_ops.zeros((1,)), constraints), axis=0)
+        (standard_ops.zeros((1,), dtype=constraints.dtype), constraints),
+        axis=0)
    objective_and_proxy_constraints = standard_ops.concat(
        (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0)

    distribution = self._distribution(state)
-    loss = standard_ops.tensordot(distribution, objective_and_proxy_constraints,
-                                  1)
+    loss = standard_ops.tensordot(
+        standard_ops.cast(distribution, objective_and_proxy_constraints.dtype),
+        objective_and_proxy_constraints, 1)
    matrix_gradient = standard_ops.matmul(
-        standard_ops.expand_dims(zero_and_constraints, 1),
+        standard_ops.expand_dims(
+            standard_ops.cast(zero_and_constraints, distribution.dtype), 1),
        standard_ops.expand_dims(distribution, 0))

    update_ops = []
@ -555,6 +573,7 @@ class MultiplicativeSwapRegretOptimizer(_SwapRegretOptimizer):
    log_initial_one = math.log(1.0 - (self._initial_multiplier_radius *
                                      (dimension - 1) / (dimension)))
    log_initial_zero = math.log(self._initial_multiplier_radius / dimension)
+    # FUTURE WORK: make the dtype a parameter.
    return standard_ops.concat(
        (standard_ops.constant(
            log_initial_one, dtype=dtypes.float32, shape=(1, dimension)),