diff --git a/tensorflow/contrib/constrained_optimization/python/constrained_minimization_problem.py b/tensorflow/contrib/constrained_optimization/python/constrained_minimization_problem.py index 70813fb2179..41258edd908 100644 --- a/tensorflow/contrib/constrained_optimization/python/constrained_minimization_problem.py +++ b/tensorflow/contrib/constrained_optimization/python/constrained_minimization_problem.py @@ -72,7 +72,8 @@ class ConstrainedMinimizationProblem(object): else: proxy_constraints_shape = self.proxy_constraints.get_shape() - if (constraints_shape is None or proxy_constraints_shape is None or + if (constraints_shape.ndims is None or + proxy_constraints_shape.ndims is None or any([ii is None for ii in constraints_shape.as_list()]) or any([ii is None for ii in proxy_constraints_shape.as_list()])): raise ValueError( @@ -121,3 +122,19 @@ class ConstrainedMinimizationProblem(object): A tensor of proxy constraint functions. """ return None + + # This is a property, instead of an abstract property, since it doesn't need + # to be overridden: if pre_train_ops returns None, then there are no ops to + # run before train_op. + @property + def pre_train_ops(self): + """Returns a list of `Operation`s to run before the train_op. + + When a `ConstrainedOptimizer` creates a train_op (in `minimize` + `minimize_unconstrained`, or `minimize_constrained`), it will include these + ops before the main training step. + + Returns: + A list of `Operation`s. + """ + return None diff --git a/tensorflow/contrib/constrained_optimization/python/constrained_optimizer.py b/tensorflow/contrib/constrained_optimization/python/constrained_optimizer.py index 80555453661..0b79bdf7c05 100644 --- a/tensorflow/contrib/constrained_optimization/python/constrained_optimizer.py +++ b/tensorflow/contrib/constrained_optimization/python/constrained_optimizer.py @@ -55,20 +55,21 @@ class ConstrainedOptimizer(object): """Returns the `tf.train.Optimizer` used for optimization.""" return self._optimizer - def minimize_unconstrained(self, - minimization_problem, - global_step=None, - var_list=None, - gate_gradients=train_optimizer.Optimizer.GATE_OP, - aggregation_method=None, - colocate_gradients_with_ops=False, - name=None, - grad_loss=None): - """Returns an `Op` for minimizing the unconstrained problem. + @abc.abstractmethod + def _minimize_constrained(self, + minimization_problem, + global_step=None, + var_list=None, + gate_gradients=train_optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Version of `minimize_constrained` to be overridden by subclasses. - Unlike `minimize_constrained`, this function ignores the `constraints` (and - `proxy_constraints`) portion of the minimization problem entirely, and only - minimizes `objective`. + Implementations of this method should ignore the `pre_train_ops` property of + the `minimization_problem`. The public `minimize_constrained` method will + take care of executing these before the returned train_op. Args: minimization_problem: ConstrainedMinimizationProblem, the problem to @@ -83,19 +84,10 @@ class ConstrainedOptimizer(object): grad_loss: as in `tf.train.Optimizer`'s `minimize` method. Returns: - TensorFlow Op. + `Operation`, the train_op. """ - return self.optimizer.minimize( - minimization_problem.objective, - global_step=global_step, - var_list=var_list, - gate_gradients=gate_gradients, - aggregation_method=aggregation_method, - colocate_gradients_with_ops=colocate_gradients_with_ops, - name=name, - grad_loss=grad_loss) + pass - @abc.abstractmethod def minimize_constrained(self, minimization_problem, global_step=None, @@ -105,7 +97,7 @@ class ConstrainedOptimizer(object): colocate_gradients_with_ops=False, name=None, grad_loss=None): - """Returns an `Op` for minimizing the constrained problem. + """Returns an `Operation` for minimizing the constrained problem. Unlike `minimize_unconstrained`, this function attempts to find a solution that minimizes the `objective` portion of the minimization problem while @@ -124,9 +116,83 @@ class ConstrainedOptimizer(object): grad_loss: as in `tf.train.Optimizer`'s `minimize` method. Returns: - TensorFlow Op. + `Operation`, the train_op. """ - pass + + def train_op_callback(): + return self._minimize_constrained( + minimization_problem, + global_step=global_step, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + name=name, + grad_loss=grad_loss) + + # If we have pre_train_ops, use tf.control_dependencies() to ensure that + # they execute before the train_op. + pre_train_ops = minimization_problem.pre_train_ops + if pre_train_ops: + with ops.control_dependencies(pre_train_ops): + train_op = train_op_callback() + else: + train_op = train_op_callback() + + return train_op + + def minimize_unconstrained(self, + minimization_problem, + global_step=None, + var_list=None, + gate_gradients=train_optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Returns an `Operation` for minimizing the unconstrained problem. + + Unlike `minimize_constrained`, this function ignores the `constraints` (and + `proxy_constraints`) portion of the minimization problem entirely, and only + minimizes `objective`. + + Args: + minimization_problem: ConstrainedMinimizationProblem, the problem to + optimize. + global_step: as in `tf.train.Optimizer`'s `minimize` method. + var_list: as in `tf.train.Optimizer`'s `minimize` method. + gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. + aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. + colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` + method. + name: as in `tf.train.Optimizer`'s `minimize` method. + grad_loss: as in `tf.train.Optimizer`'s `minimize` method. + + Returns: + `Operation`, the train_op. + """ + + def train_op_callback(): + return self.optimizer.minimize( + minimization_problem.objective, + global_step=global_step, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + name=name, + grad_loss=grad_loss) + + # If we have pre_train_ops, use tf.control_dependencies() to ensure that + # they execute before the train_op. + pre_train_ops = minimization_problem.pre_train_ops + if pre_train_ops: + with ops.control_dependencies(pre_train_ops): + train_op = train_op_callback() + else: + train_op = train_op_callback() + + return train_op def minimize(self, minimization_problem, @@ -138,7 +204,7 @@ class ConstrainedOptimizer(object): colocate_gradients_with_ops=False, name=None, grad_loss=None): - """Returns an `Op` for minimizing the constrained problem. + """Returns an `Operation` for minimizing the constrained problem. This method combines the functionality of `minimize_unconstrained` and `minimize_constrained`. If global_step < unconstrained_steps, it will @@ -164,14 +230,14 @@ class ConstrainedOptimizer(object): grad_loss: as in `tf.train.Optimizer`'s `minimize` method. Returns: - TensorFlow Op. + `Operation`, the train_op. Raises: ValueError: If unconstrained_steps is provided, but global_step is not. """ def unconstrained_fn(): - """Returns an `Op` for minimizing the unconstrained problem.""" + """Returns an `Operation` for minimizing the unconstrained problem.""" return self.minimize_unconstrained( minimization_problem=minimization_problem, global_step=global_step, @@ -183,7 +249,7 @@ class ConstrainedOptimizer(object): grad_loss=grad_loss) def constrained_fn(): - """Returns an `Op` for minimizing the constrained problem.""" + """Returns an `Operation` for minimizing the constrained problem.""" return self.minimize_constrained( minimization_problem=minimization_problem, global_step=global_step, diff --git a/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py index 01c6e4f08af..d1af15f7e42 100644 --- a/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py +++ b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py @@ -70,11 +70,13 @@ def _project_multipliers_wrt_euclidean_norm(multipliers, radius): region w.r.t. the Euclidean norm. Raises: - ValueError: if the `multipliers` tensor does not have a fully-known shape, - or is not one-dimensional. + ValueError: if the `multipliers` tensor is not floating-point, does not have + a fully-known shape, or is not one-dimensional. """ + if not multipliers.dtype.is_floating: + raise ValueError("multipliers must have a floating-point dtype") multipliers_shape = multipliers.get_shape() - if multipliers_shape is None: + if multipliers_shape.ndims is None: raise ValueError("multipliers must have known shape") if multipliers_shape.ndims != 1: raise ValueError( @@ -101,12 +103,12 @@ def _project_multipliers_wrt_euclidean_norm(multipliers, radius): (radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum( 1.0, standard_ops.reduce_sum(inactive))) multipliers += scale * inactive - new_inactive = standard_ops.to_float(multipliers > 0) + new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype) multipliers *= new_inactive return (iteration, multipliers, new_inactive, inactive) iteration = standard_ops.constant(0) - inactive = standard_ops.ones_like(multipliers) + inactive = standard_ops.ones_like(multipliers, dtype=multipliers.dtype) # We actually want a do-while loop, so we explicitly call while_loop_body() # once before tf.while_loop(). @@ -189,16 +191,16 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer): def _projection_op(self, state, name=None): pass - def minimize_constrained(self, - minimization_problem, - global_step=None, - var_list=None, - gate_gradients=train_optimizer.Optimizer.GATE_OP, - aggregation_method=None, - colocate_gradients_with_ops=False, - name=None, - grad_loss=None): - """Returns an `Op` for minimizing the constrained problem. + def _minimize_constrained(self, + minimization_problem, + global_step=None, + var_list=None, + gate_gradients=train_optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Returns an `Operation` for minimizing the constrained problem. The `optimizer` constructor parameter will be used to update the model parameters, while the Lagrange multipliers will be updated using @@ -216,8 +218,11 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer): name: as in `tf.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.train.Optimizer`'s `minimize` method. + Raises: + ValueError: If the minimization_problem tensors have different dtypes. + Returns: - TensorFlow Op. + `Operation`, the train_op. """ objective = minimization_problem.objective @@ -225,6 +230,14 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer): proxy_constraints = minimization_problem.proxy_constraints if proxy_constraints is None: proxy_constraints = constraints + + # Make sure that the objective, constraints and proxy constraints all have + # the same dtype. + if (objective.dtype.base_dtype != constraints.dtype.base_dtype or + objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype): + raise ValueError("objective, constraints and proxy_constraints must " + "have the same dtype") + # Flatten both constraints tensors to 1d. num_constraints = minimization_problem.num_constraints constraints = standard_ops.reshape(constraints, shape=(num_constraints,)) @@ -241,8 +254,10 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer): multipliers = self._lagrange_multipliers(state) loss = ( - objective + standard_ops.tensordot(multipliers, proxy_constraints, 1)) - multipliers_gradient = constraints + objective + standard_ops.tensordot( + standard_ops.cast(multipliers, proxy_constraints.dtype), + proxy_constraints, 1)) + multipliers_gradient = standard_ops.cast(constraints, multipliers.dtype) update_ops = [] if self.constraint_optimizer is None: @@ -356,6 +371,8 @@ class AdditiveExternalRegretOptimizer(_ExternalRegretOptimizer): # For an AdditiveExternalRegretOptimizer, the internal state is simply a # tensor of Lagrange multipliers with shape (m,), where m is the number of # constraints. + # + # FUTURE WORK: make the dtype a parameter. return standard_ops.zeros((num_constraints,), dtype=dtypes.float32) def _lagrange_multipliers(self, state): diff --git a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py index ff846b191a3..2c673d93471 100644 --- a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py +++ b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py @@ -79,9 +79,11 @@ def _maximal_eigenvector_power_method(matrix, The maximal right-eigenvector of `matrix`. Raises: - ValueError: If the epsilon or maximum_iterations parameters violate their - bounds. + ValueError: If the `matrix` tensor is not floating-point, or if the + `epsilon` or `maximum_iterations` parameters violate their bounds. """ + if not matrix.dtype.is_floating: + raise ValueError("multipliers must have a floating-point dtype") if epsilon <= 0.0: raise ValueError("epsilon must be strictly positive") if maximum_iterations <= 0: @@ -139,11 +141,13 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix): (i.e. the Frobenius norm). Raises: - ValueError: if the `matrix` tensor does not have a fully-known shape, or is - not two-dimensional and square. + ValueError: if the `matrix` tensor is not floating-point, does not have a + fully-known shape, or is not two-dimensional and square. """ + if not matrix.dtype.is_floating: + raise ValueError("multipliers must have a floating-point dtype") matrix_shape = matrix.get_shape() - if matrix_shape is None: + if matrix_shape.ndims is None: raise ValueError("matrix must have known shape") if matrix_shape.ndims != 2: raise ValueError( @@ -172,12 +176,12 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix): matrix, axis=0, keepdims=True)) / standard_ops.maximum( 1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True)) matrix += scale * inactive - new_inactive = standard_ops.to_float(matrix > 0) + new_inactive = standard_ops.cast(matrix > 0, matrix.dtype) matrix *= new_inactive return (iteration, matrix, new_inactive, inactive) iteration = standard_ops.constant(0) - inactive = standard_ops.ones_like(matrix) + inactive = standard_ops.ones_like(matrix, dtype=matrix.dtype) # We actually want a do-while loop, so we explicitly call while_loop_body() # once before tf.while_loop(). @@ -218,7 +222,7 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer): """Base class representing a `_SwapRegretOptimizer`. This class contains most of the logic for performing constrained optimization, - minimizing external regret for the constraints player. What it *doesn't* do is + minimizing swap regret for the constraints player. What it *doesn't* do is keep track of the internal state (the stochastic matrix). Instead, the state is accessed via the _initial_state(), _stochastic_matrix(), _constraint_grad_and_var() and _projection_op() methods. @@ -291,16 +295,16 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer): def _projection_op(self, state, name=None): pass - def minimize_constrained(self, - minimization_problem, - global_step=None, - var_list=None, - gate_gradients=train_optimizer.Optimizer.GATE_OP, - aggregation_method=None, - colocate_gradients_with_ops=False, - name=None, - grad_loss=None): - """Returns an `Op` for minimizing the constrained problem. + def _minimize_constrained(self, + minimization_problem, + global_step=None, + var_list=None, + gate_gradients=train_optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Returns an `Operation` for minimizing the constrained problem. The `optimizer` constructor parameter will be used to update the model parameters, while the constraint/objective weight matrix (the analogue of @@ -320,8 +324,11 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer): name: as in `tf.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.train.Optimizer`'s `minimize` method. + Raises: + ValueError: If the minimization_problem tensors have different dtypes. + Returns: - TensorFlow Op. + `Operation`, the train_op. """ objective = minimization_problem.objective @@ -329,6 +336,14 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer): proxy_constraints = minimization_problem.proxy_constraints if proxy_constraints is None: proxy_constraints = constraints + + # Make sure that the objective, constraints and proxy constraints all have + # the same dtype. + if (objective.dtype.base_dtype != constraints.dtype.base_dtype or + objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype): + raise ValueError("objective, constraints and proxy_constraints must " + "have the same dtype") + # Flatten both constraints tensors to 1d. num_constraints = minimization_problem.num_constraints constraints = standard_ops.reshape(constraints, shape=(num_constraints,)) @@ -344,15 +359,18 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer): name="swap_regret_optimizer_state") zero_and_constraints = standard_ops.concat( - (standard_ops.zeros((1,)), constraints), axis=0) + (standard_ops.zeros((1,), dtype=constraints.dtype), constraints), + axis=0) objective_and_proxy_constraints = standard_ops.concat( (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0) distribution = self._distribution(state) - loss = standard_ops.tensordot(distribution, objective_and_proxy_constraints, - 1) + loss = standard_ops.tensordot( + standard_ops.cast(distribution, objective_and_proxy_constraints.dtype), + objective_and_proxy_constraints, 1) matrix_gradient = standard_ops.matmul( - standard_ops.expand_dims(zero_and_constraints, 1), + standard_ops.expand_dims( + standard_ops.cast(zero_and_constraints, distribution.dtype), 1), standard_ops.expand_dims(distribution, 0)) update_ops = [] @@ -555,6 +573,7 @@ class MultiplicativeSwapRegretOptimizer(_SwapRegretOptimizer): log_initial_one = math.log(1.0 - (self._initial_multiplier_radius * (dimension - 1) / (dimension))) log_initial_zero = math.log(self._initial_multiplier_radius / dimension) + # FUTURE WORK: make the dtype a parameter. return standard_ops.concat( (standard_ops.constant( log_initial_one, dtype=dtypes.float32, shape=(1, dimension)),