313 lines
8.8 KiB
Python
313 lines
8.8 KiB
Python
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Various classes representing distributed values for PS."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import weakref
|
|
|
|
from tensorflow.python.distribute import distribute_lib
|
|
from tensorflow.python.distribute import distribution_strategy_context as ds_context
|
|
from tensorflow.python.distribute import values
|
|
from tensorflow.python.distribute import values_util
|
|
from tensorflow.python.framework import ops
|
|
from tensorflow.python.ops import variable_scope as vs
|
|
from tensorflow.python.ops import variables as variables_lib
|
|
from tensorflow.python.training.tracking import base as trackable
|
|
from tensorflow.python.types import core
|
|
|
|
|
|
# Variable used in PSStrategy TF 1 and CentralStorageStrategy.
|
|
class AggregatingVariable(variables_lib.Variable, core.Tensor):
|
|
"""A wrapper around a variable that aggregates updates across replicas."""
|
|
|
|
def __init__(self, strategy, v, aggregation):
|
|
self._distribute_strategy = strategy
|
|
self._v = v
|
|
# NOTE: We don't use "_distributed_container" here because we don't want
|
|
# to trigger that code path in regroup().
|
|
v._aggregating_container = weakref.ref(self) # pylint: disable=protected-access
|
|
self._aggregation = aggregation
|
|
|
|
def get(self):
|
|
return self._v
|
|
|
|
@property
|
|
def distribute_strategy(self):
|
|
return self._distribute_strategy
|
|
|
|
def __getattr__(self, name):
|
|
return getattr(self._v, name)
|
|
|
|
def _assign_func(self, *args, **kwargs):
|
|
with ds_context.enter_or_assert_strategy(self._distribute_strategy):
|
|
f = kwargs.pop("f")
|
|
if ds_context.in_cross_replica_context():
|
|
if distribute_lib.get_update_replica_id() is not None:
|
|
# We are calling an assign function in an update context.
|
|
return f(self._v, *args, **kwargs)
|
|
|
|
# We are calling an assign function in cross replica context, wrap it in
|
|
# an update call.
|
|
return self._distribute_strategy.extended.update(
|
|
self, f, args=args, kwargs=kwargs)
|
|
else:
|
|
replica_context = ds_context.get_replica_context()
|
|
assert replica_context
|
|
# We are calling an assign function in replica context.
|
|
# We reduce the value we want to assign/add/sub. More details about how
|
|
# we handle the different use cases can be found in the _reduce method.
|
|
# We call the function with the reduced value.
|
|
if self._aggregation == vs.VariableAggregation.NONE:
|
|
raise ValueError(
|
|
values_util.aggregation_error_msg.format(
|
|
variable_type="AggregatingVariable"))
|
|
|
|
def merge_fn(strategy,
|
|
value,
|
|
use_locking=False,
|
|
name=None,
|
|
read_value=True):
|
|
v = values_util.apply_aggregation(strategy, value, self._aggregation,
|
|
self)
|
|
if name and isinstance(name, values.PerReplica):
|
|
name = name.values[0]
|
|
return strategy.extended.update(
|
|
self,
|
|
f,
|
|
args=(v,),
|
|
kwargs={
|
|
"use_locking": use_locking,
|
|
"name": name,
|
|
"read_value": read_value
|
|
})
|
|
return replica_context.merge_call(merge_fn, args=args, kwargs=kwargs)
|
|
|
|
def assign_sub(self, *args, **kwargs):
|
|
assign_sub_fn = lambda var, *a, **kw: var.assign_sub(*a, **kw)
|
|
return self._assign_func(f=assign_sub_fn, *args, **kwargs)
|
|
|
|
def assign_add(self, *args, **kwargs):
|
|
assign_add_fn = lambda var, *a, **kw: var.assign_add(*a, **kw)
|
|
return self._assign_func(f=assign_add_fn, *args, **kwargs)
|
|
|
|
def assign(self, *args, **kwargs):
|
|
assign_fn = lambda var, *a, **kw: var.assign(*a, **kw)
|
|
return self._assign_func(f=assign_fn, *args, **kwargs)
|
|
|
|
@property
|
|
def initializer(self):
|
|
return self._v.initializer
|
|
|
|
def initialized_value(self):
|
|
return self._v.initialized_value()
|
|
|
|
@property
|
|
def initial_value(self):
|
|
return self._v.initial_value
|
|
|
|
@property
|
|
def op(self):
|
|
return self._v.op
|
|
|
|
def read_value(self):
|
|
return self._v.read_value()
|
|
|
|
def eval(self, session=None):
|
|
return self._v.eval(session)
|
|
|
|
@property
|
|
def graph(self):
|
|
return self._v.graph
|
|
|
|
@property
|
|
def device(self):
|
|
return self._v.device
|
|
|
|
@property
|
|
def shape(self):
|
|
return self._v.shape
|
|
|
|
@property
|
|
def aggregation(self):
|
|
return self._aggregation
|
|
|
|
@property
|
|
def synchronization(self):
|
|
return self._v.synchronization
|
|
|
|
@property
|
|
def name(self):
|
|
return self._v.name
|
|
|
|
@property
|
|
def trainable(self):
|
|
return self._v.trainable
|
|
|
|
@property
|
|
def dtype(self):
|
|
return self._v.dtype
|
|
|
|
# TODO(josh11b): Test saving & restoring.
|
|
def _gather_saveables_for_checkpoint(self):
|
|
return {trackable.VARIABLE_VALUE_KEY: self._v}
|
|
|
|
def _map_resources(self):
|
|
"""For implementing `Trackable`."""
|
|
# By delegating this method to the wrapped variable, SavedModel with
|
|
# AggregatingVariable are identical to SavedModel with normal variables.
|
|
obj_map, resource_map = self._v._map_resources() # pylint:disable=protected-access
|
|
obj_map[self] = obj_map[self._v]
|
|
return obj_map, resource_map
|
|
|
|
# pylint: disable=multiple-statements
|
|
def __add__(self, o):
|
|
return self._v + o
|
|
|
|
def __radd__(self, o):
|
|
return o + self._v
|
|
|
|
def __sub__(self, o):
|
|
return self._v - o
|
|
|
|
def __rsub__(self, o):
|
|
return o - self._v
|
|
|
|
def __mul__(self, o):
|
|
return self._v * o
|
|
|
|
def __rmul__(self, o):
|
|
return o * self._v
|
|
|
|
def __truediv__(self, o):
|
|
return self._v / o
|
|
|
|
def __rtruediv__(self, o):
|
|
return o / self._v
|
|
|
|
def __floordiv__(self, o):
|
|
return self._v // o
|
|
|
|
def __rfloordiv__(self, o):
|
|
return o // self._v
|
|
|
|
def __mod__(self, o):
|
|
return self._v % o
|
|
|
|
def __rmod__(self, o):
|
|
return o % self._v
|
|
|
|
def __lt__(self, o):
|
|
return self._v < o
|
|
|
|
def __le__(self, o):
|
|
return self._v <= o
|
|
|
|
def __gt__(self, o):
|
|
return self._v > o
|
|
|
|
def __ge__(self, o):
|
|
return self._v >= o
|
|
|
|
def __and__(self, o):
|
|
return self._v & o
|
|
|
|
def __rand__(self, o):
|
|
return o & self._v
|
|
|
|
def __or__(self, o):
|
|
return self._v | o
|
|
|
|
def __ror__(self, o):
|
|
return o | self._v
|
|
|
|
def __xor__(self, o):
|
|
return self._v ^ o
|
|
|
|
def __rxor__(self, o):
|
|
return o ^ self._v
|
|
|
|
def __getitem__(self, o):
|
|
return self._v[o]
|
|
|
|
def __pow__(self, o, modulo=None):
|
|
return pow(self._v, o, modulo)
|
|
|
|
def __rpow__(self, o):
|
|
return pow(o, self._v)
|
|
|
|
def __invert__(self):
|
|
return ~self._v
|
|
|
|
def __neg__(self):
|
|
return -self._v
|
|
|
|
def __abs__(self):
|
|
return abs(self._v)
|
|
|
|
def __div__(self, o):
|
|
try:
|
|
return self._v.__div__(o)
|
|
except AttributeError:
|
|
# See https://docs.python.org/3/library/constants.html#NotImplemented
|
|
return NotImplemented
|
|
|
|
def __rdiv__(self, o):
|
|
try:
|
|
return self._v.__rdiv__(o)
|
|
except AttributeError:
|
|
# See https://docs.python.org/3/library/constants.html#NotImplemented
|
|
return NotImplemented
|
|
|
|
def __matmul__(self, o):
|
|
try:
|
|
return self._v.__matmul__(o)
|
|
except AttributeError:
|
|
# See https://docs.python.org/3/library/constants.html#NotImplemented
|
|
return NotImplemented
|
|
|
|
def __rmatmul__(self, o):
|
|
try:
|
|
return self._v.__rmatmul__(o)
|
|
except AttributeError:
|
|
# See https://docs.python.org/3/library/constants.html#NotImplemented
|
|
return NotImplemented
|
|
|
|
def __str__(self):
|
|
return str(self._v)
|
|
|
|
def __repr__(self):
|
|
return repr(self._v)
|
|
|
|
def _should_act_as_resource_variable(self):
|
|
"""Pass resource_variable_ops.is_resource_variable check."""
|
|
pass
|
|
|
|
def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False):
|
|
return ops.convert_to_tensor(self.get(), dtype=dtype, name=name,
|
|
as_ref=as_ref)
|
|
|
|
|
|
# Register a conversion function which reads the value of the variable,
|
|
# allowing instances of the class to be used as tensors.
|
|
def _tensor_conversion_aggregate(var, dtype=None, name=None, as_ref=False):
|
|
return var._dense_var_to_tensor(dtype, name, as_ref) # pylint: disable=protected-access
|
|
|
|
|
|
ops.register_tensor_conversion_function(AggregatingVariable,
|
|
_tensor_conversion_aggregate)
|