* Fixing checks for fully-known shapes.
* Adding pre_train_ops to ConstrainedMinimizationProblem. These are tf.Operations that should be executed before each training step. * Updating ConstrainedOptimizers to have the train_ops they create depend on the pre_train_ops from the ConstrainedMinimizationProblem. PiperOrigin-RevId: 208871419
This commit is contained in:
parent
75399bba46
commit
d2875ea713
@ -72,7 +72,8 @@ class ConstrainedMinimizationProblem(object):
|
||||
else:
|
||||
proxy_constraints_shape = self.proxy_constraints.get_shape()
|
||||
|
||||
if (constraints_shape is None or proxy_constraints_shape is None or
|
||||
if (constraints_shape.ndims is None or
|
||||
proxy_constraints_shape.ndims is None or
|
||||
any([ii is None for ii in constraints_shape.as_list()]) or
|
||||
any([ii is None for ii in proxy_constraints_shape.as_list()])):
|
||||
raise ValueError(
|
||||
@ -121,3 +122,19 @@ class ConstrainedMinimizationProblem(object):
|
||||
A tensor of proxy constraint functions.
|
||||
"""
|
||||
return None
|
||||
|
||||
# This is a property, instead of an abstract property, since it doesn't need
|
||||
# to be overridden: if pre_train_ops returns None, then there are no ops to
|
||||
# run before train_op.
|
||||
@property
|
||||
def pre_train_ops(self):
|
||||
"""Returns a list of `Operation`s to run before the train_op.
|
||||
|
||||
When a `ConstrainedOptimizer` creates a train_op (in `minimize`
|
||||
`minimize_unconstrained`, or `minimize_constrained`), it will include these
|
||||
ops before the main training step.
|
||||
|
||||
Returns:
|
||||
A list of `Operation`s.
|
||||
"""
|
||||
return None
|
||||
|
@ -55,20 +55,21 @@ class ConstrainedOptimizer(object):
|
||||
"""Returns the `tf.train.Optimizer` used for optimization."""
|
||||
return self._optimizer
|
||||
|
||||
def minimize_unconstrained(self,
|
||||
minimization_problem,
|
||||
global_step=None,
|
||||
var_list=None,
|
||||
gate_gradients=train_optimizer.Optimizer.GATE_OP,
|
||||
aggregation_method=None,
|
||||
colocate_gradients_with_ops=False,
|
||||
name=None,
|
||||
grad_loss=None):
|
||||
"""Returns an `Op` for minimizing the unconstrained problem.
|
||||
@abc.abstractmethod
|
||||
def _minimize_constrained(self,
|
||||
minimization_problem,
|
||||
global_step=None,
|
||||
var_list=None,
|
||||
gate_gradients=train_optimizer.Optimizer.GATE_OP,
|
||||
aggregation_method=None,
|
||||
colocate_gradients_with_ops=False,
|
||||
name=None,
|
||||
grad_loss=None):
|
||||
"""Version of `minimize_constrained` to be overridden by subclasses.
|
||||
|
||||
Unlike `minimize_constrained`, this function ignores the `constraints` (and
|
||||
`proxy_constraints`) portion of the minimization problem entirely, and only
|
||||
minimizes `objective`.
|
||||
Implementations of this method should ignore the `pre_train_ops` property of
|
||||
the `minimization_problem`. The public `minimize_constrained` method will
|
||||
take care of executing these before the returned train_op.
|
||||
|
||||
Args:
|
||||
minimization_problem: ConstrainedMinimizationProblem, the problem to
|
||||
@ -83,19 +84,10 @@ class ConstrainedOptimizer(object):
|
||||
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
|
||||
Returns:
|
||||
TensorFlow Op.
|
||||
`Operation`, the train_op.
|
||||
"""
|
||||
return self.optimizer.minimize(
|
||||
minimization_problem.objective,
|
||||
global_step=global_step,
|
||||
var_list=var_list,
|
||||
gate_gradients=gate_gradients,
|
||||
aggregation_method=aggregation_method,
|
||||
colocate_gradients_with_ops=colocate_gradients_with_ops,
|
||||
name=name,
|
||||
grad_loss=grad_loss)
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def minimize_constrained(self,
|
||||
minimization_problem,
|
||||
global_step=None,
|
||||
@ -105,7 +97,7 @@ class ConstrainedOptimizer(object):
|
||||
colocate_gradients_with_ops=False,
|
||||
name=None,
|
||||
grad_loss=None):
|
||||
"""Returns an `Op` for minimizing the constrained problem.
|
||||
"""Returns an `Operation` for minimizing the constrained problem.
|
||||
|
||||
Unlike `minimize_unconstrained`, this function attempts to find a solution
|
||||
that minimizes the `objective` portion of the minimization problem while
|
||||
@ -124,9 +116,83 @@ class ConstrainedOptimizer(object):
|
||||
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
|
||||
Returns:
|
||||
TensorFlow Op.
|
||||
`Operation`, the train_op.
|
||||
"""
|
||||
pass
|
||||
|
||||
def train_op_callback():
|
||||
return self._minimize_constrained(
|
||||
minimization_problem,
|
||||
global_step=global_step,
|
||||
var_list=var_list,
|
||||
gate_gradients=gate_gradients,
|
||||
aggregation_method=aggregation_method,
|
||||
colocate_gradients_with_ops=colocate_gradients_with_ops,
|
||||
name=name,
|
||||
grad_loss=grad_loss)
|
||||
|
||||
# If we have pre_train_ops, use tf.control_dependencies() to ensure that
|
||||
# they execute before the train_op.
|
||||
pre_train_ops = minimization_problem.pre_train_ops
|
||||
if pre_train_ops:
|
||||
with ops.control_dependencies(pre_train_ops):
|
||||
train_op = train_op_callback()
|
||||
else:
|
||||
train_op = train_op_callback()
|
||||
|
||||
return train_op
|
||||
|
||||
def minimize_unconstrained(self,
|
||||
minimization_problem,
|
||||
global_step=None,
|
||||
var_list=None,
|
||||
gate_gradients=train_optimizer.Optimizer.GATE_OP,
|
||||
aggregation_method=None,
|
||||
colocate_gradients_with_ops=False,
|
||||
name=None,
|
||||
grad_loss=None):
|
||||
"""Returns an `Operation` for minimizing the unconstrained problem.
|
||||
|
||||
Unlike `minimize_constrained`, this function ignores the `constraints` (and
|
||||
`proxy_constraints`) portion of the minimization problem entirely, and only
|
||||
minimizes `objective`.
|
||||
|
||||
Args:
|
||||
minimization_problem: ConstrainedMinimizationProblem, the problem to
|
||||
optimize.
|
||||
global_step: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
var_list: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
gate_gradients: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
aggregation_method: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize`
|
||||
method.
|
||||
name: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
|
||||
Returns:
|
||||
`Operation`, the train_op.
|
||||
"""
|
||||
|
||||
def train_op_callback():
|
||||
return self.optimizer.minimize(
|
||||
minimization_problem.objective,
|
||||
global_step=global_step,
|
||||
var_list=var_list,
|
||||
gate_gradients=gate_gradients,
|
||||
aggregation_method=aggregation_method,
|
||||
colocate_gradients_with_ops=colocate_gradients_with_ops,
|
||||
name=name,
|
||||
grad_loss=grad_loss)
|
||||
|
||||
# If we have pre_train_ops, use tf.control_dependencies() to ensure that
|
||||
# they execute before the train_op.
|
||||
pre_train_ops = minimization_problem.pre_train_ops
|
||||
if pre_train_ops:
|
||||
with ops.control_dependencies(pre_train_ops):
|
||||
train_op = train_op_callback()
|
||||
else:
|
||||
train_op = train_op_callback()
|
||||
|
||||
return train_op
|
||||
|
||||
def minimize(self,
|
||||
minimization_problem,
|
||||
@ -138,7 +204,7 @@ class ConstrainedOptimizer(object):
|
||||
colocate_gradients_with_ops=False,
|
||||
name=None,
|
||||
grad_loss=None):
|
||||
"""Returns an `Op` for minimizing the constrained problem.
|
||||
"""Returns an `Operation` for minimizing the constrained problem.
|
||||
|
||||
This method combines the functionality of `minimize_unconstrained` and
|
||||
`minimize_constrained`. If global_step < unconstrained_steps, it will
|
||||
@ -164,14 +230,14 @@ class ConstrainedOptimizer(object):
|
||||
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
|
||||
Returns:
|
||||
TensorFlow Op.
|
||||
`Operation`, the train_op.
|
||||
|
||||
Raises:
|
||||
ValueError: If unconstrained_steps is provided, but global_step is not.
|
||||
"""
|
||||
|
||||
def unconstrained_fn():
|
||||
"""Returns an `Op` for minimizing the unconstrained problem."""
|
||||
"""Returns an `Operation` for minimizing the unconstrained problem."""
|
||||
return self.minimize_unconstrained(
|
||||
minimization_problem=minimization_problem,
|
||||
global_step=global_step,
|
||||
@ -183,7 +249,7 @@ class ConstrainedOptimizer(object):
|
||||
grad_loss=grad_loss)
|
||||
|
||||
def constrained_fn():
|
||||
"""Returns an `Op` for minimizing the constrained problem."""
|
||||
"""Returns an `Operation` for minimizing the constrained problem."""
|
||||
return self.minimize_constrained(
|
||||
minimization_problem=minimization_problem,
|
||||
global_step=global_step,
|
||||
|
@ -70,11 +70,13 @@ def _project_multipliers_wrt_euclidean_norm(multipliers, radius):
|
||||
region w.r.t. the Euclidean norm.
|
||||
|
||||
Raises:
|
||||
ValueError: if the `multipliers` tensor does not have a fully-known shape,
|
||||
or is not one-dimensional.
|
||||
ValueError: if the `multipliers` tensor is not floating-point, does not have
|
||||
a fully-known shape, or is not one-dimensional.
|
||||
"""
|
||||
if not multipliers.dtype.is_floating:
|
||||
raise ValueError("multipliers must have a floating-point dtype")
|
||||
multipliers_shape = multipliers.get_shape()
|
||||
if multipliers_shape is None:
|
||||
if multipliers_shape.ndims is None:
|
||||
raise ValueError("multipliers must have known shape")
|
||||
if multipliers_shape.ndims != 1:
|
||||
raise ValueError(
|
||||
@ -101,12 +103,12 @@ def _project_multipliers_wrt_euclidean_norm(multipliers, radius):
|
||||
(radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum(
|
||||
1.0, standard_ops.reduce_sum(inactive)))
|
||||
multipliers += scale * inactive
|
||||
new_inactive = standard_ops.to_float(multipliers > 0)
|
||||
new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype)
|
||||
multipliers *= new_inactive
|
||||
return (iteration, multipliers, new_inactive, inactive)
|
||||
|
||||
iteration = standard_ops.constant(0)
|
||||
inactive = standard_ops.ones_like(multipliers)
|
||||
inactive = standard_ops.ones_like(multipliers, dtype=multipliers.dtype)
|
||||
|
||||
# We actually want a do-while loop, so we explicitly call while_loop_body()
|
||||
# once before tf.while_loop().
|
||||
@ -189,16 +191,16 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
|
||||
def _projection_op(self, state, name=None):
|
||||
pass
|
||||
|
||||
def minimize_constrained(self,
|
||||
minimization_problem,
|
||||
global_step=None,
|
||||
var_list=None,
|
||||
gate_gradients=train_optimizer.Optimizer.GATE_OP,
|
||||
aggregation_method=None,
|
||||
colocate_gradients_with_ops=False,
|
||||
name=None,
|
||||
grad_loss=None):
|
||||
"""Returns an `Op` for minimizing the constrained problem.
|
||||
def _minimize_constrained(self,
|
||||
minimization_problem,
|
||||
global_step=None,
|
||||
var_list=None,
|
||||
gate_gradients=train_optimizer.Optimizer.GATE_OP,
|
||||
aggregation_method=None,
|
||||
colocate_gradients_with_ops=False,
|
||||
name=None,
|
||||
grad_loss=None):
|
||||
"""Returns an `Operation` for minimizing the constrained problem.
|
||||
|
||||
The `optimizer` constructor parameter will be used to update the model
|
||||
parameters, while the Lagrange multipliers will be updated using
|
||||
@ -216,8 +218,11 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
|
||||
name: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
|
||||
Raises:
|
||||
ValueError: If the minimization_problem tensors have different dtypes.
|
||||
|
||||
Returns:
|
||||
TensorFlow Op.
|
||||
`Operation`, the train_op.
|
||||
"""
|
||||
objective = minimization_problem.objective
|
||||
|
||||
@ -225,6 +230,14 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
|
||||
proxy_constraints = minimization_problem.proxy_constraints
|
||||
if proxy_constraints is None:
|
||||
proxy_constraints = constraints
|
||||
|
||||
# Make sure that the objective, constraints and proxy constraints all have
|
||||
# the same dtype.
|
||||
if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
|
||||
objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
|
||||
raise ValueError("objective, constraints and proxy_constraints must "
|
||||
"have the same dtype")
|
||||
|
||||
# Flatten both constraints tensors to 1d.
|
||||
num_constraints = minimization_problem.num_constraints
|
||||
constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
|
||||
@ -241,8 +254,10 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
|
||||
|
||||
multipliers = self._lagrange_multipliers(state)
|
||||
loss = (
|
||||
objective + standard_ops.tensordot(multipliers, proxy_constraints, 1))
|
||||
multipliers_gradient = constraints
|
||||
objective + standard_ops.tensordot(
|
||||
standard_ops.cast(multipliers, proxy_constraints.dtype),
|
||||
proxy_constraints, 1))
|
||||
multipliers_gradient = standard_ops.cast(constraints, multipliers.dtype)
|
||||
|
||||
update_ops = []
|
||||
if self.constraint_optimizer is None:
|
||||
@ -356,6 +371,8 @@ class AdditiveExternalRegretOptimizer(_ExternalRegretOptimizer):
|
||||
# For an AdditiveExternalRegretOptimizer, the internal state is simply a
|
||||
# tensor of Lagrange multipliers with shape (m,), where m is the number of
|
||||
# constraints.
|
||||
#
|
||||
# FUTURE WORK: make the dtype a parameter.
|
||||
return standard_ops.zeros((num_constraints,), dtype=dtypes.float32)
|
||||
|
||||
def _lagrange_multipliers(self, state):
|
||||
|
@ -79,9 +79,11 @@ def _maximal_eigenvector_power_method(matrix,
|
||||
The maximal right-eigenvector of `matrix`.
|
||||
|
||||
Raises:
|
||||
ValueError: If the epsilon or maximum_iterations parameters violate their
|
||||
bounds.
|
||||
ValueError: If the `matrix` tensor is not floating-point, or if the
|
||||
`epsilon` or `maximum_iterations` parameters violate their bounds.
|
||||
"""
|
||||
if not matrix.dtype.is_floating:
|
||||
raise ValueError("multipliers must have a floating-point dtype")
|
||||
if epsilon <= 0.0:
|
||||
raise ValueError("epsilon must be strictly positive")
|
||||
if maximum_iterations <= 0:
|
||||
@ -139,11 +141,13 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix):
|
||||
(i.e. the Frobenius norm).
|
||||
|
||||
Raises:
|
||||
ValueError: if the `matrix` tensor does not have a fully-known shape, or is
|
||||
not two-dimensional and square.
|
||||
ValueError: if the `matrix` tensor is not floating-point, does not have a
|
||||
fully-known shape, or is not two-dimensional and square.
|
||||
"""
|
||||
if not matrix.dtype.is_floating:
|
||||
raise ValueError("multipliers must have a floating-point dtype")
|
||||
matrix_shape = matrix.get_shape()
|
||||
if matrix_shape is None:
|
||||
if matrix_shape.ndims is None:
|
||||
raise ValueError("matrix must have known shape")
|
||||
if matrix_shape.ndims != 2:
|
||||
raise ValueError(
|
||||
@ -172,12 +176,12 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix):
|
||||
matrix, axis=0, keepdims=True)) / standard_ops.maximum(
|
||||
1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True))
|
||||
matrix += scale * inactive
|
||||
new_inactive = standard_ops.to_float(matrix > 0)
|
||||
new_inactive = standard_ops.cast(matrix > 0, matrix.dtype)
|
||||
matrix *= new_inactive
|
||||
return (iteration, matrix, new_inactive, inactive)
|
||||
|
||||
iteration = standard_ops.constant(0)
|
||||
inactive = standard_ops.ones_like(matrix)
|
||||
inactive = standard_ops.ones_like(matrix, dtype=matrix.dtype)
|
||||
|
||||
# We actually want a do-while loop, so we explicitly call while_loop_body()
|
||||
# once before tf.while_loop().
|
||||
@ -218,7 +222,7 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
|
||||
"""Base class representing a `_SwapRegretOptimizer`.
|
||||
|
||||
This class contains most of the logic for performing constrained optimization,
|
||||
minimizing external regret for the constraints player. What it *doesn't* do is
|
||||
minimizing swap regret for the constraints player. What it *doesn't* do is
|
||||
keep track of the internal state (the stochastic matrix). Instead, the state
|
||||
is accessed via the _initial_state(), _stochastic_matrix(),
|
||||
_constraint_grad_and_var() and _projection_op() methods.
|
||||
@ -291,16 +295,16 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
|
||||
def _projection_op(self, state, name=None):
|
||||
pass
|
||||
|
||||
def minimize_constrained(self,
|
||||
minimization_problem,
|
||||
global_step=None,
|
||||
var_list=None,
|
||||
gate_gradients=train_optimizer.Optimizer.GATE_OP,
|
||||
aggregation_method=None,
|
||||
colocate_gradients_with_ops=False,
|
||||
name=None,
|
||||
grad_loss=None):
|
||||
"""Returns an `Op` for minimizing the constrained problem.
|
||||
def _minimize_constrained(self,
|
||||
minimization_problem,
|
||||
global_step=None,
|
||||
var_list=None,
|
||||
gate_gradients=train_optimizer.Optimizer.GATE_OP,
|
||||
aggregation_method=None,
|
||||
colocate_gradients_with_ops=False,
|
||||
name=None,
|
||||
grad_loss=None):
|
||||
"""Returns an `Operation` for minimizing the constrained problem.
|
||||
|
||||
The `optimizer` constructor parameter will be used to update the model
|
||||
parameters, while the constraint/objective weight matrix (the analogue of
|
||||
@ -320,8 +324,11 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
|
||||
name: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
|
||||
|
||||
Raises:
|
||||
ValueError: If the minimization_problem tensors have different dtypes.
|
||||
|
||||
Returns:
|
||||
TensorFlow Op.
|
||||
`Operation`, the train_op.
|
||||
"""
|
||||
objective = minimization_problem.objective
|
||||
|
||||
@ -329,6 +336,14 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
|
||||
proxy_constraints = minimization_problem.proxy_constraints
|
||||
if proxy_constraints is None:
|
||||
proxy_constraints = constraints
|
||||
|
||||
# Make sure that the objective, constraints and proxy constraints all have
|
||||
# the same dtype.
|
||||
if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
|
||||
objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
|
||||
raise ValueError("objective, constraints and proxy_constraints must "
|
||||
"have the same dtype")
|
||||
|
||||
# Flatten both constraints tensors to 1d.
|
||||
num_constraints = minimization_problem.num_constraints
|
||||
constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
|
||||
@ -344,15 +359,18 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
|
||||
name="swap_regret_optimizer_state")
|
||||
|
||||
zero_and_constraints = standard_ops.concat(
|
||||
(standard_ops.zeros((1,)), constraints), axis=0)
|
||||
(standard_ops.zeros((1,), dtype=constraints.dtype), constraints),
|
||||
axis=0)
|
||||
objective_and_proxy_constraints = standard_ops.concat(
|
||||
(standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0)
|
||||
|
||||
distribution = self._distribution(state)
|
||||
loss = standard_ops.tensordot(distribution, objective_and_proxy_constraints,
|
||||
1)
|
||||
loss = standard_ops.tensordot(
|
||||
standard_ops.cast(distribution, objective_and_proxy_constraints.dtype),
|
||||
objective_and_proxy_constraints, 1)
|
||||
matrix_gradient = standard_ops.matmul(
|
||||
standard_ops.expand_dims(zero_and_constraints, 1),
|
||||
standard_ops.expand_dims(
|
||||
standard_ops.cast(zero_and_constraints, distribution.dtype), 1),
|
||||
standard_ops.expand_dims(distribution, 0))
|
||||
|
||||
update_ops = []
|
||||
@ -555,6 +573,7 @@ class MultiplicativeSwapRegretOptimizer(_SwapRegretOptimizer):
|
||||
log_initial_one = math.log(1.0 - (self._initial_multiplier_radius *
|
||||
(dimension - 1) / (dimension)))
|
||||
log_initial_zero = math.log(self._initial_multiplier_radius / dimension)
|
||||
# FUTURE WORK: make the dtype a parameter.
|
||||
return standard_ops.concat(
|
||||
(standard_ops.constant(
|
||||
log_initial_one, dtype=dtypes.float32, shape=(1, dimension)),
|
||||
|
Loading…
Reference in New Issue
Block a user