* Fixing checks for fully-known shapes.

* Adding pre_train_ops to ConstrainedMinimizationProblem. These are tf.Operations that should be executed before each training step.

* Updating ConstrainedOptimizers to have the train_ops they create depend on the pre_train_ops from the ConstrainedMinimizationProblem.

PiperOrigin-RevId: 208871419
This commit is contained in:
Andrew Cotter 2018-08-15 13:21:54 -07:00 committed by TensorFlower Gardener
parent 75399bba46
commit d2875ea713
4 changed files with 192 additions and 73 deletions

View File

@ -72,7 +72,8 @@ class ConstrainedMinimizationProblem(object):
else:
proxy_constraints_shape = self.proxy_constraints.get_shape()
if (constraints_shape is None or proxy_constraints_shape is None or
if (constraints_shape.ndims is None or
proxy_constraints_shape.ndims is None or
any([ii is None for ii in constraints_shape.as_list()]) or
any([ii is None for ii in proxy_constraints_shape.as_list()])):
raise ValueError(
@ -121,3 +122,19 @@ class ConstrainedMinimizationProblem(object):
A tensor of proxy constraint functions.
"""
return None
# This is a property, instead of an abstract property, since it doesn't need
# to be overridden: if pre_train_ops returns None, then there are no ops to
# run before train_op.
@property
def pre_train_ops(self):
"""Returns a list of `Operation`s to run before the train_op.
When a `ConstrainedOptimizer` creates a train_op (in `minimize`
`minimize_unconstrained`, or `minimize_constrained`), it will include these
ops before the main training step.
Returns:
A list of `Operation`s.
"""
return None

View File

@ -55,20 +55,21 @@ class ConstrainedOptimizer(object):
"""Returns the `tf.train.Optimizer` used for optimization."""
return self._optimizer
def minimize_unconstrained(self,
minimization_problem,
global_step=None,
var_list=None,
gate_gradients=train_optimizer.Optimizer.GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False,
name=None,
grad_loss=None):
"""Returns an `Op` for minimizing the unconstrained problem.
@abc.abstractmethod
def _minimize_constrained(self,
minimization_problem,
global_step=None,
var_list=None,
gate_gradients=train_optimizer.Optimizer.GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False,
name=None,
grad_loss=None):
"""Version of `minimize_constrained` to be overridden by subclasses.
Unlike `minimize_constrained`, this function ignores the `constraints` (and
`proxy_constraints`) portion of the minimization problem entirely, and only
minimizes `objective`.
Implementations of this method should ignore the `pre_train_ops` property of
the `minimization_problem`. The public `minimize_constrained` method will
take care of executing these before the returned train_op.
Args:
minimization_problem: ConstrainedMinimizationProblem, the problem to
@ -83,19 +84,10 @@ class ConstrainedOptimizer(object):
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
Returns:
TensorFlow Op.
`Operation`, the train_op.
"""
return self.optimizer.minimize(
minimization_problem.objective,
global_step=global_step,
var_list=var_list,
gate_gradients=gate_gradients,
aggregation_method=aggregation_method,
colocate_gradients_with_ops=colocate_gradients_with_ops,
name=name,
grad_loss=grad_loss)
pass
@abc.abstractmethod
def minimize_constrained(self,
minimization_problem,
global_step=None,
@ -105,7 +97,7 @@ class ConstrainedOptimizer(object):
colocate_gradients_with_ops=False,
name=None,
grad_loss=None):
"""Returns an `Op` for minimizing the constrained problem.
"""Returns an `Operation` for minimizing the constrained problem.
Unlike `minimize_unconstrained`, this function attempts to find a solution
that minimizes the `objective` portion of the minimization problem while
@ -124,9 +116,83 @@ class ConstrainedOptimizer(object):
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
Returns:
TensorFlow Op.
`Operation`, the train_op.
"""
pass
def train_op_callback():
return self._minimize_constrained(
minimization_problem,
global_step=global_step,
var_list=var_list,
gate_gradients=gate_gradients,
aggregation_method=aggregation_method,
colocate_gradients_with_ops=colocate_gradients_with_ops,
name=name,
grad_loss=grad_loss)
# If we have pre_train_ops, use tf.control_dependencies() to ensure that
# they execute before the train_op.
pre_train_ops = minimization_problem.pre_train_ops
if pre_train_ops:
with ops.control_dependencies(pre_train_ops):
train_op = train_op_callback()
else:
train_op = train_op_callback()
return train_op
def minimize_unconstrained(self,
minimization_problem,
global_step=None,
var_list=None,
gate_gradients=train_optimizer.Optimizer.GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False,
name=None,
grad_loss=None):
"""Returns an `Operation` for minimizing the unconstrained problem.
Unlike `minimize_constrained`, this function ignores the `constraints` (and
`proxy_constraints`) portion of the minimization problem entirely, and only
minimizes `objective`.
Args:
minimization_problem: ConstrainedMinimizationProblem, the problem to
optimize.
global_step: as in `tf.train.Optimizer`'s `minimize` method.
var_list: as in `tf.train.Optimizer`'s `minimize` method.
gate_gradients: as in `tf.train.Optimizer`'s `minimize` method.
aggregation_method: as in `tf.train.Optimizer`'s `minimize` method.
colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize`
method.
name: as in `tf.train.Optimizer`'s `minimize` method.
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
Returns:
`Operation`, the train_op.
"""
def train_op_callback():
return self.optimizer.minimize(
minimization_problem.objective,
global_step=global_step,
var_list=var_list,
gate_gradients=gate_gradients,
aggregation_method=aggregation_method,
colocate_gradients_with_ops=colocate_gradients_with_ops,
name=name,
grad_loss=grad_loss)
# If we have pre_train_ops, use tf.control_dependencies() to ensure that
# they execute before the train_op.
pre_train_ops = minimization_problem.pre_train_ops
if pre_train_ops:
with ops.control_dependencies(pre_train_ops):
train_op = train_op_callback()
else:
train_op = train_op_callback()
return train_op
def minimize(self,
minimization_problem,
@ -138,7 +204,7 @@ class ConstrainedOptimizer(object):
colocate_gradients_with_ops=False,
name=None,
grad_loss=None):
"""Returns an `Op` for minimizing the constrained problem.
"""Returns an `Operation` for minimizing the constrained problem.
This method combines the functionality of `minimize_unconstrained` and
`minimize_constrained`. If global_step < unconstrained_steps, it will
@ -164,14 +230,14 @@ class ConstrainedOptimizer(object):
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
Returns:
TensorFlow Op.
`Operation`, the train_op.
Raises:
ValueError: If unconstrained_steps is provided, but global_step is not.
"""
def unconstrained_fn():
"""Returns an `Op` for minimizing the unconstrained problem."""
"""Returns an `Operation` for minimizing the unconstrained problem."""
return self.minimize_unconstrained(
minimization_problem=minimization_problem,
global_step=global_step,
@ -183,7 +249,7 @@ class ConstrainedOptimizer(object):
grad_loss=grad_loss)
def constrained_fn():
"""Returns an `Op` for minimizing the constrained problem."""
"""Returns an `Operation` for minimizing the constrained problem."""
return self.minimize_constrained(
minimization_problem=minimization_problem,
global_step=global_step,

View File

@ -70,11 +70,13 @@ def _project_multipliers_wrt_euclidean_norm(multipliers, radius):
region w.r.t. the Euclidean norm.
Raises:
ValueError: if the `multipliers` tensor does not have a fully-known shape,
or is not one-dimensional.
ValueError: if the `multipliers` tensor is not floating-point, does not have
a fully-known shape, or is not one-dimensional.
"""
if not multipliers.dtype.is_floating:
raise ValueError("multipliers must have a floating-point dtype")
multipliers_shape = multipliers.get_shape()
if multipliers_shape is None:
if multipliers_shape.ndims is None:
raise ValueError("multipliers must have known shape")
if multipliers_shape.ndims != 1:
raise ValueError(
@ -101,12 +103,12 @@ def _project_multipliers_wrt_euclidean_norm(multipliers, radius):
(radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum(
1.0, standard_ops.reduce_sum(inactive)))
multipliers += scale * inactive
new_inactive = standard_ops.to_float(multipliers > 0)
new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype)
multipliers *= new_inactive
return (iteration, multipliers, new_inactive, inactive)
iteration = standard_ops.constant(0)
inactive = standard_ops.ones_like(multipliers)
inactive = standard_ops.ones_like(multipliers, dtype=multipliers.dtype)
# We actually want a do-while loop, so we explicitly call while_loop_body()
# once before tf.while_loop().
@ -189,16 +191,16 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
def _projection_op(self, state, name=None):
pass
def minimize_constrained(self,
minimization_problem,
global_step=None,
var_list=None,
gate_gradients=train_optimizer.Optimizer.GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False,
name=None,
grad_loss=None):
"""Returns an `Op` for minimizing the constrained problem.
def _minimize_constrained(self,
minimization_problem,
global_step=None,
var_list=None,
gate_gradients=train_optimizer.Optimizer.GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False,
name=None,
grad_loss=None):
"""Returns an `Operation` for minimizing the constrained problem.
The `optimizer` constructor parameter will be used to update the model
parameters, while the Lagrange multipliers will be updated using
@ -216,8 +218,11 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
name: as in `tf.train.Optimizer`'s `minimize` method.
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
Raises:
ValueError: If the minimization_problem tensors have different dtypes.
Returns:
TensorFlow Op.
`Operation`, the train_op.
"""
objective = minimization_problem.objective
@ -225,6 +230,14 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
proxy_constraints = minimization_problem.proxy_constraints
if proxy_constraints is None:
proxy_constraints = constraints
# Make sure that the objective, constraints and proxy constraints all have
# the same dtype.
if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
raise ValueError("objective, constraints and proxy_constraints must "
"have the same dtype")
# Flatten both constraints tensors to 1d.
num_constraints = minimization_problem.num_constraints
constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
@ -241,8 +254,10 @@ class _ExternalRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
multipliers = self._lagrange_multipliers(state)
loss = (
objective + standard_ops.tensordot(multipliers, proxy_constraints, 1))
multipliers_gradient = constraints
objective + standard_ops.tensordot(
standard_ops.cast(multipliers, proxy_constraints.dtype),
proxy_constraints, 1))
multipliers_gradient = standard_ops.cast(constraints, multipliers.dtype)
update_ops = []
if self.constraint_optimizer is None:
@ -356,6 +371,8 @@ class AdditiveExternalRegretOptimizer(_ExternalRegretOptimizer):
# For an AdditiveExternalRegretOptimizer, the internal state is simply a
# tensor of Lagrange multipliers with shape (m,), where m is the number of
# constraints.
#
# FUTURE WORK: make the dtype a parameter.
return standard_ops.zeros((num_constraints,), dtype=dtypes.float32)
def _lagrange_multipliers(self, state):

View File

@ -79,9 +79,11 @@ def _maximal_eigenvector_power_method(matrix,
The maximal right-eigenvector of `matrix`.
Raises:
ValueError: If the epsilon or maximum_iterations parameters violate their
bounds.
ValueError: If the `matrix` tensor is not floating-point, or if the
`epsilon` or `maximum_iterations` parameters violate their bounds.
"""
if not matrix.dtype.is_floating:
raise ValueError("multipliers must have a floating-point dtype")
if epsilon <= 0.0:
raise ValueError("epsilon must be strictly positive")
if maximum_iterations <= 0:
@ -139,11 +141,13 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix):
(i.e. the Frobenius norm).
Raises:
ValueError: if the `matrix` tensor does not have a fully-known shape, or is
not two-dimensional and square.
ValueError: if the `matrix` tensor is not floating-point, does not have a
fully-known shape, or is not two-dimensional and square.
"""
if not matrix.dtype.is_floating:
raise ValueError("multipliers must have a floating-point dtype")
matrix_shape = matrix.get_shape()
if matrix_shape is None:
if matrix_shape.ndims is None:
raise ValueError("matrix must have known shape")
if matrix_shape.ndims != 2:
raise ValueError(
@ -172,12 +176,12 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix):
matrix, axis=0, keepdims=True)) / standard_ops.maximum(
1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True))
matrix += scale * inactive
new_inactive = standard_ops.to_float(matrix > 0)
new_inactive = standard_ops.cast(matrix > 0, matrix.dtype)
matrix *= new_inactive
return (iteration, matrix, new_inactive, inactive)
iteration = standard_ops.constant(0)
inactive = standard_ops.ones_like(matrix)
inactive = standard_ops.ones_like(matrix, dtype=matrix.dtype)
# We actually want a do-while loop, so we explicitly call while_loop_body()
# once before tf.while_loop().
@ -218,7 +222,7 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
"""Base class representing a `_SwapRegretOptimizer`.
This class contains most of the logic for performing constrained optimization,
minimizing external regret for the constraints player. What it *doesn't* do is
minimizing swap regret for the constraints player. What it *doesn't* do is
keep track of the internal state (the stochastic matrix). Instead, the state
is accessed via the _initial_state(), _stochastic_matrix(),
_constraint_grad_and_var() and _projection_op() methods.
@ -291,16 +295,16 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
def _projection_op(self, state, name=None):
pass
def minimize_constrained(self,
minimization_problem,
global_step=None,
var_list=None,
gate_gradients=train_optimizer.Optimizer.GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False,
name=None,
grad_loss=None):
"""Returns an `Op` for minimizing the constrained problem.
def _minimize_constrained(self,
minimization_problem,
global_step=None,
var_list=None,
gate_gradients=train_optimizer.Optimizer.GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False,
name=None,
grad_loss=None):
"""Returns an `Operation` for minimizing the constrained problem.
The `optimizer` constructor parameter will be used to update the model
parameters, while the constraint/objective weight matrix (the analogue of
@ -320,8 +324,11 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
name: as in `tf.train.Optimizer`'s `minimize` method.
grad_loss: as in `tf.train.Optimizer`'s `minimize` method.
Raises:
ValueError: If the minimization_problem tensors have different dtypes.
Returns:
TensorFlow Op.
`Operation`, the train_op.
"""
objective = minimization_problem.objective
@ -329,6 +336,14 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
proxy_constraints = minimization_problem.proxy_constraints
if proxy_constraints is None:
proxy_constraints = constraints
# Make sure that the objective, constraints and proxy constraints all have
# the same dtype.
if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
raise ValueError("objective, constraints and proxy_constraints must "
"have the same dtype")
# Flatten both constraints tensors to 1d.
num_constraints = minimization_problem.num_constraints
constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
@ -344,15 +359,18 @@ class _SwapRegretOptimizer(constrained_optimizer.ConstrainedOptimizer):
name="swap_regret_optimizer_state")
zero_and_constraints = standard_ops.concat(
(standard_ops.zeros((1,)), constraints), axis=0)
(standard_ops.zeros((1,), dtype=constraints.dtype), constraints),
axis=0)
objective_and_proxy_constraints = standard_ops.concat(
(standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0)
distribution = self._distribution(state)
loss = standard_ops.tensordot(distribution, objective_and_proxy_constraints,
1)
loss = standard_ops.tensordot(
standard_ops.cast(distribution, objective_and_proxy_constraints.dtype),
objective_and_proxy_constraints, 1)
matrix_gradient = standard_ops.matmul(
standard_ops.expand_dims(zero_and_constraints, 1),
standard_ops.expand_dims(
standard_ops.cast(zero_and_constraints, distribution.dtype), 1),
standard_ops.expand_dims(distribution, 0))
update_ops = []
@ -555,6 +573,7 @@ class MultiplicativeSwapRegretOptimizer(_SwapRegretOptimizer):
log_initial_one = math.log(1.0 - (self._initial_multiplier_radius *
(dimension - 1) / (dimension)))
log_initial_zero = math.log(self._initial_multiplier_radius / dimension)
# FUTURE WORK: make the dtype a parameter.
return standard_ops.concat(
(standard_ops.constant(
log_initial_one, dtype=dtypes.float32, shape=(1, dimension)),