Resurrects autograd-free eager gradients.
PiperOrigin-RevId: 168448557
This commit is contained in:
parent
8f37f30027
commit
655f26fc70
@ -637,6 +637,7 @@ py_library(
|
||||
"//tensorflow/core:protos_all_py",
|
||||
"//tensorflow/python/eager:context",
|
||||
"//tensorflow/python/eager:core",
|
||||
"//tensorflow/python/eager:tape",
|
||||
"@six_archive//:six",
|
||||
],
|
||||
)
|
||||
@ -1800,7 +1801,6 @@ py_library(
|
||||
"//tensorflow/python/eager:context",
|
||||
"//tensorflow/python/eager:custom_gradient",
|
||||
"//tensorflow/python/eager:tape",
|
||||
"//tensorflow/python/eager:tensor_node",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -82,7 +82,6 @@ py_library(
|
||||
visibility = ["//tensorflow:internal"],
|
||||
deps = [
|
||||
"//tensorflow/python:dtypes",
|
||||
"//tensorflow/python:framework_ops",
|
||||
"//tensorflow/python:util",
|
||||
],
|
||||
)
|
||||
@ -307,25 +306,6 @@ py_library(
|
||||
],
|
||||
)
|
||||
|
||||
py_library(
|
||||
name = "tensor_node",
|
||||
srcs = ["tensor_node.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
visibility = ["//tensorflow:internal"],
|
||||
deps = [
|
||||
":context",
|
||||
":custom_gradient",
|
||||
":tape",
|
||||
":tensor",
|
||||
"//tensorflow/python:array_ops",
|
||||
"//tensorflow/python:common_shapes",
|
||||
"//tensorflow/python:dtypes",
|
||||
"//tensorflow/python:framework_ops",
|
||||
"//tensorflow/python:math_ops",
|
||||
"//tensorflow/python:tensor_shape",
|
||||
],
|
||||
)
|
||||
|
||||
py_library(
|
||||
name = "backprop",
|
||||
srcs = ["backprop.py"],
|
||||
@ -344,7 +324,6 @@ py_library(
|
||||
"//tensorflow/python/eager:execute",
|
||||
"//tensorflow/python/eager:tape",
|
||||
"//tensorflow/python/eager:tensor",
|
||||
"//tensorflow/python/eager:tensor_node",
|
||||
"@six_archive//:six",
|
||||
],
|
||||
)
|
||||
@ -386,18 +365,6 @@ py_test(
|
||||
],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "tensor_node_test",
|
||||
srcs = ["tensor_node_test.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":tensor",
|
||||
":tensor_node",
|
||||
":test",
|
||||
"//tensorflow/python:framework_test_lib",
|
||||
],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "ops_test",
|
||||
srcs = ["ops_test.py"],
|
||||
|
@ -21,30 +21,265 @@ from __future__ import print_function
|
||||
import collections
|
||||
import threading
|
||||
|
||||
from autograd import container_types
|
||||
from autograd import convenience_wrappers
|
||||
from autograd import core as ag_core
|
||||
|
||||
import six
|
||||
|
||||
from tensorflow.python import pywrap_tensorflow
|
||||
from tensorflow.python.eager import context
|
||||
from tensorflow.python.eager import execute
|
||||
from tensorflow.python.eager import tape
|
||||
from tensorflow.python.eager import tensor
|
||||
# Imports TensorNode to enable autograd tracing of TF ops. We don't need to use
|
||||
# any symbols here but import the file just to get the right registrations to
|
||||
# happen.
|
||||
from tensorflow.python.eager import tensor_node # pylint: disable=unused-import
|
||||
from tensorflow.python.framework import constant_op
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import errors
|
||||
from tensorflow.python.framework import ops as tf_ops
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.framework import tensor_shape
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.util import tf_contextlib
|
||||
from tensorflow.python.util import tf_inspect
|
||||
|
||||
|
||||
# Terminology:
|
||||
#
|
||||
# - op: a possibly composite operation, which has an entry in the tape
|
||||
# - target: dy in dx/dy
|
||||
# - source: dx in dx/dy
|
||||
# - tensor: one of the many inputs or outputs of an operation
|
||||
#
|
||||
# Below here we do the gradient algorithm. It works as follows:
|
||||
#
|
||||
# First we filter the tape to just the subset of operations we want to
|
||||
# differentiate. In the process of doing so we count how many times each Tensor
|
||||
# is used as an input to an op (so we know when we're done computing gradients
|
||||
# for that Tensor). We also count, for each tape entry, how many of its output
|
||||
# Tensors need gradients to be computed (Tensors which are not used do not need
|
||||
# any gradients to be computed).
|
||||
#
|
||||
# Finally, we start a backprop stack with a set of tape entries for which we
|
||||
# have all gradients available. This set usually is a subset of the set of
|
||||
# targets (not all since targets which have outputs in the tape will not have
|
||||
# gradients available initially).
|
||||
#
|
||||
# Then we repeatedly pop an entry from the stack, run its backprop, and update
|
||||
# the gradients of its inputs. Once we have computed all gradients for a single
|
||||
# input we can mark this input as done, and this can trigger adding an entry to
|
||||
# the stack if all outputs of that entry are now done.
|
||||
#
|
||||
# When the stack is empty we have gradients for all tensors we're interested in.
|
||||
|
||||
|
||||
def _prepare_backprop(target, tensor_to_op, op_to_entry, id_sources):
|
||||
"""Filters the tape to only include relevant entries and counts tensor usages.
|
||||
|
||||
Args:
|
||||
target: the target to optimize.
|
||||
tensor_to_op: Map from tensor id to key in op_to_entry that produced it.
|
||||
op_to_entry: Map from op id to a tape.TapeEntry object
|
||||
id_sources: the ids of the sources wrt the gradient is being taken.
|
||||
|
||||
Returns:
|
||||
usage counts (how many entries downstream from a tensor use it)
|
||||
op_to_entry_map: entry map (a filtered tape, with only the relevant
|
||||
entries),
|
||||
missing: map from tensor id to how many downstream gradients still need
|
||||
to be computed before this tensor's gradient can be computed.
|
||||
"""
|
||||
if isinstance(target, (ops.Tensor)):
|
||||
tensor_stack = [ops.tensor_id(target)]
|
||||
else:
|
||||
tensor_stack = list([ops.tensor_id(x) for x in target])
|
||||
tensor_usage_counts = {}
|
||||
o_to_e = {} # Copy of just the bits we need from op_to_entry
|
||||
while tensor_stack:
|
||||
t = tensor_stack.pop()
|
||||
op = tensor_to_op[t]
|
||||
# op is None if the tensor is a source (i.e. was watched directly)
|
||||
if op is None or op in o_to_e:
|
||||
continue
|
||||
op_trace = op_to_entry[op]
|
||||
o_to_e[op] = op_trace
|
||||
for i in op_trace.inputs:
|
||||
it = ops.tensor_id(i)
|
||||
if it in tensor_usage_counts:
|
||||
tensor_usage_counts[it] += 1
|
||||
else:
|
||||
tensor_usage_counts[it] = 1
|
||||
if it not in id_sources and it in tensor_to_op:
|
||||
tensor_stack.append(it)
|
||||
op_missing_tensor_counts = collections.defaultdict(int)
|
||||
for t in tensor_usage_counts:
|
||||
if t in tensor_to_op and tensor_to_op[t] is not None:
|
||||
op_missing_tensor_counts[tensor_to_op[t]] += 1
|
||||
return tensor_usage_counts, o_to_e, op_missing_tensor_counts
|
||||
|
||||
|
||||
def _initialize_backprop_stack(op_to_entry, op_missing_tensor):
|
||||
"""Returns the set of tape entries which are available for backprop."""
|
||||
ready_ops = []
|
||||
for op in op_to_entry:
|
||||
if op not in op_missing_tensor:
|
||||
ready_ops.append(op)
|
||||
return ready_ops
|
||||
|
||||
|
||||
def _initial_gradients(target, output_gradients, tensor_usage_counts):
|
||||
"""Computes the initial gradients for each Tensor."""
|
||||
# Initialize the backprop stack
|
||||
gradients = collections.defaultdict(list)
|
||||
if isinstance(target, ops.Tensor):
|
||||
if output_gradients is not None:
|
||||
output_gradient = output_gradients
|
||||
else:
|
||||
output_gradient = array_ops.ones_like(target)
|
||||
gradients[ops.tensor_id(target)].append(output_gradient)
|
||||
else:
|
||||
for i, t in enumerate(target):
|
||||
if ops.tensor_id(t) in tensor_usage_counts:
|
||||
# Can't provide a gradient of something we're trying to differentiate
|
||||
assert output_gradients is None or output_gradients[i] is None
|
||||
else:
|
||||
if output_gradients is None or output_gradients[i] is None:
|
||||
out_grad = ops.ones_like(t)
|
||||
else:
|
||||
out_grad = output_gradients[i]
|
||||
gradients[ops.tensor_id(t)].append(out_grad)
|
||||
return gradients
|
||||
|
||||
|
||||
@tf_contextlib.contextmanager
|
||||
def _no_op():
|
||||
yield
|
||||
|
||||
|
||||
def _aggregate_grads(gradients):
|
||||
"""Aggregate gradients from multiple sources.
|
||||
|
||||
Args:
|
||||
gradients: A list of 'Tensor' or 'IndexedSlices' gradients.
|
||||
|
||||
Returns:
|
||||
If 'gradients' only has 'Tensor', returns an aggregated 'Tensor'.
|
||||
Otherwise returns an aggregated 'IndexedSlices'.
|
||||
"""
|
||||
assert gradients, "No gradients to aggregate"
|
||||
|
||||
if len(gradients) == 1:
|
||||
return gradients[0]
|
||||
if all([isinstance(g, ops.Tensor) for g in gradients]):
|
||||
return math_ops.add_n(gradients)
|
||||
else:
|
||||
assert all([isinstance(g, (ops.Tensor, ops.IndexedSlices))
|
||||
for g in gradients])
|
||||
indexed_slices_list = []
|
||||
for grad in gradients:
|
||||
# TODO(xpan): Support nested IndexedSlices and core IndexedSlices
|
||||
if isinstance(grad, ops.Tensor):
|
||||
indexed_slices = ops.IndexedSlices(
|
||||
grad,
|
||||
constant_op.constant(range(grad.shape[0])),
|
||||
constant_op.constant(grad.shape.as_list()))
|
||||
indexed_slices_list.append(indexed_slices)
|
||||
else:
|
||||
indexed_slices_list.append(grad)
|
||||
|
||||
# Dense shapes from all gradients should be the same.
|
||||
dense_shape = indexed_slices_list[0].dense_shape
|
||||
# For simplicity now, always cast to int64.
|
||||
indices = array_ops.concat([math_ops.cast(x.indices, dtypes.int64)
|
||||
for x in indexed_slices_list], 0)
|
||||
values = array_ops.concat([x.values for x in indexed_slices_list], 0)
|
||||
return ops.IndexedSlices(values, indices, dense_shape)
|
||||
|
||||
|
||||
def imperative_grad(
|
||||
target,
|
||||
sources,
|
||||
output_gradients=None):
|
||||
"""Computes gradients from the imperatively defined tape on top of the stack.
|
||||
|
||||
Works by filtering the tape, computing how many downstream usages are of each
|
||||
tensor and entry, and repeatedly applying backward functions until we have
|
||||
gradients for all sources.
|
||||
|
||||
Args:
|
||||
target: either a Tensor or list of Tensors to be differentiated.
|
||||
sources: list of Tensors for which we want gradients
|
||||
output_gradients: if not None, a list of gradient provided for each Target,
|
||||
or None if we are to use the target's computed downstream gradient.
|
||||
|
||||
Returns:
|
||||
the gradient wrt each of the sources.
|
||||
|
||||
Raises:
|
||||
RuntimeError: if something goes wrong.
|
||||
ValueError: if there is no sequence of differentiable operations connecting
|
||||
a source and any target Tensor. This can happen either if the target is
|
||||
not computed based on the source, if the tracing was set up incorrectly,
|
||||
or if only non-differentiable functions of the source were used in the
|
||||
computation of target.
|
||||
"""
|
||||
if not tape._tape_stack.stack: # pylint: disable=protected-access
|
||||
raise RuntimeError("Computing a gradient with no tape present")
|
||||
bp_tape = tape.pop_tape()
|
||||
tensor_to_op, op_to_entry, output_to_shape_dtype = bp_tape.export()
|
||||
# This overwrites the op_to_entry variable, which will release all memory used
|
||||
# to keep traces that are irrelevant to the gradient computation we're doing
|
||||
# here.
|
||||
id_sources = [ops.tensor_id(t) for t in sources]
|
||||
tensor_usage_counts, op_to_entry, op_missing_tensor = _prepare_backprop(
|
||||
target, tensor_to_op, op_to_entry, id_sources)
|
||||
ready_ops = _initialize_backprop_stack(op_to_entry, op_missing_tensor)
|
||||
gradients = _initial_gradients(target, output_gradients,
|
||||
tensor_usage_counts)
|
||||
# Now exhaust the backprop stack
|
||||
while ready_ops:
|
||||
op = ready_ops.pop()
|
||||
op_trace = op_to_entry.pop(op)
|
||||
out_gradients = [gradients.pop(t, None) for t in op_trace.output_ids]
|
||||
for i in range(len(out_gradients)):
|
||||
if out_gradients[i] is None:
|
||||
# TODO(apassos) this should be in the right device
|
||||
out_gradients[i] = array_ops.zeros(
|
||||
*output_to_shape_dtype[op_trace.output_ids[i]])
|
||||
else:
|
||||
out_gradients[i] = _aggregate_grads(out_gradients[i])
|
||||
|
||||
in_gradients = op_trace.backward_function(
|
||||
*(out_gradients + op_trace.side_outputs))
|
||||
in_gradients = ([in_gradients]
|
||||
if isinstance(in_gradients, (ops.Tensor,
|
||||
ops.IndexedSlices,
|
||||
type(None)))
|
||||
else in_gradients)
|
||||
for i, t in enumerate(op_trace.inputs):
|
||||
if in_gradients[i] is not None:
|
||||
gradients[ops.tensor_id(t)].append(in_gradients[i])
|
||||
if tensor_usage_counts.get(ops.tensor_id(t), 0) > 0:
|
||||
tensor_usage_counts[ops.tensor_id(t)] -= 1
|
||||
if ops.tensor_id(t) in tensor_to_op and tensor_usage_counts[
|
||||
ops.tensor_id(t)] == 0 and ops.tensor_id(t) not in id_sources:
|
||||
in_op = tensor_to_op[ops.tensor_id(t)]
|
||||
if in_op is None:
|
||||
continue
|
||||
if op_missing_tensor.get(in_op, 0) > 0:
|
||||
op_missing_tensor[in_op] -= 1
|
||||
if op_missing_tensor.get(in_op, 0) == 0:
|
||||
ready_ops.append(in_op)
|
||||
result = []
|
||||
for i, s in enumerate(sources):
|
||||
g = gradients.get(ops.tensor_id(s), None)
|
||||
if g is None:
|
||||
# TODO(apassos): figure out a way to summarize why sources and targets are
|
||||
# not connected.
|
||||
raise ValueError("There is no sequence of operations connecting source "
|
||||
"tensor %s (%s) to any of the target Tensors. This is "
|
||||
"commonly caused by the tape not recording all "
|
||||
"operations in the forward pass or if by mistake a "
|
||||
"source was only used in non-differentiable operations."
|
||||
% (i, s))
|
||||
result.append(_aggregate_grads(g))
|
||||
return result
|
||||
|
||||
|
||||
def op_attr_type(op_type, attr_name):
|
||||
with errors.raise_exception_on_not_ok_status() as status:
|
||||
h = context.context()._handle # pylint: disable=protected-access
|
||||
@ -82,26 +317,23 @@ class _MockOp(object):
|
||||
raise KeyError(attr)
|
||||
|
||||
|
||||
def _magic_gradient_function(op_name, attr_tuple, num_inputs, num_outputs,
|
||||
*tensors):
|
||||
def _magic_gradient_function(op_name, attr_tuple, num_inputs,
|
||||
inputs, outputs, out_grads):
|
||||
"""Calls the gradient function of the op.
|
||||
|
||||
Args:
|
||||
op_name: the name of the op to be differentiated.
|
||||
attr_tuple: the attrs, as a tuple.
|
||||
num_inputs: the number of inputs to the op.
|
||||
num_outputs: the number of outputs of the op.
|
||||
*tensors: a list of tensors, composed of, in order, the inputs, the outputs,
|
||||
and the gradients with respect to the outputs.
|
||||
inputs: inputs to the original operation.
|
||||
outputs: outputs to the original operation.
|
||||
out_grads: gradients of the operation wrt its outputs.
|
||||
|
||||
Returns:
|
||||
The gradients with respect to the inputs of the function, as a list.
|
||||
"""
|
||||
inputs = tensors[:num_inputs]
|
||||
outputs = tensors[num_inputs:num_inputs + num_outputs]
|
||||
out_grads = tensors[num_inputs + num_outputs:]
|
||||
mock_op = _MockOp(attr_tuple, inputs, outputs, op_name)
|
||||
grad_fn = tf_ops._gradient_registry.lookup(op_name) # pylint: disable=protected-access
|
||||
grad_fn = ops._gradient_registry.lookup(op_name) # pylint: disable=protected-access
|
||||
if grad_fn is None:
|
||||
return [None] * num_inputs
|
||||
out_grads = [
|
||||
@ -136,31 +368,23 @@ def _record_gradient(op_name, inputs, attrs, results, name):
|
||||
Raises:
|
||||
An exception on error.
|
||||
"""
|
||||
if not any(ag_core.isnode(x) for x in inputs):
|
||||
return results
|
||||
num_outputs = len(results)
|
||||
if num_outputs == 0:
|
||||
return results
|
||||
if attrs is not None:
|
||||
attrs = tuple(tuple(x) if isinstance(x, list) else x for x in attrs)
|
||||
|
||||
# It is imperative we make a copy of results here as otherwise we create a
|
||||
# dependency cycle in the captured function and this can delay garbage
|
||||
# collecting of the tensors arbitrarily.
|
||||
results_size = len(results) if isinstance(results, (list, tuple)) else 1
|
||||
attrs = attrs
|
||||
|
||||
def grad_fn(*orig_outputs):
|
||||
"""Generated gradient function."""
|
||||
tensors = inputs + list(orig_outputs)
|
||||
tensors = container_types.make_sequence(tape.EagerList, *tensors)
|
||||
result = _magic_gradient_function(op_name, attrs, len(inputs),
|
||||
num_outputs, *(tensors))
|
||||
inputs, results, orig_outputs)
|
||||
if _tracing:
|
||||
print("Gradient for", (name if name else op_name), "inputs", inputs,
|
||||
"output_grads", orig_outputs[results_size:], "gradients", result)
|
||||
"output_grads", orig_outputs, "gradients", result)
|
||||
return result
|
||||
|
||||
results = tape.record_operation(results, inputs, [], grad_fn)
|
||||
inputs = [ops.convert_to_tensor(x) for x in inputs]
|
||||
tape.record_operation(results, inputs, [], grad_fn)
|
||||
if _tracing:
|
||||
print("Computed op", (name if name else op_name), "inputs", inputs,
|
||||
"outputs", results)
|
||||
@ -170,27 +394,6 @@ def _record_gradient(op_name, inputs, attrs, results, name):
|
||||
execute.record_gradient = _record_gradient
|
||||
|
||||
|
||||
def _aggregate_grads(gradients):
|
||||
"""Aggregate gradients of the same tensor."""
|
||||
grad_lists = collections.OrderedDict()
|
||||
for g, v in gradients:
|
||||
if g is None:
|
||||
continue
|
||||
if id(v) not in grad_lists:
|
||||
grad_lists[id(v)] = [(g, v)]
|
||||
else:
|
||||
grad_lists[id(v)].append((g, v))
|
||||
|
||||
ret = []
|
||||
for _, g_list in six.iteritems(grad_lists):
|
||||
if len(g_list) == 1:
|
||||
ret.append(g_list[0])
|
||||
else:
|
||||
# TODO(xpan): Aggregate IndexedSlices.
|
||||
ret.append((math_ops.add_n(list(zip(*g_list))[0]), g_list[1][1]))
|
||||
return ret
|
||||
|
||||
|
||||
def implicit_val_and_grad(f):
|
||||
"""Returns a function which differentiates f with respect to variables.
|
||||
|
||||
@ -224,23 +427,14 @@ def implicit_val_and_grad(f):
|
||||
Its second element is list of (gradient, variable) pairs.
|
||||
"""
|
||||
|
||||
def grad_fn(*args, **kwds):
|
||||
def grad_fn(*args):
|
||||
"""Computes the gradient of the wrapped function."""
|
||||
tape.push_new_tape()
|
||||
end_node = f(*args)
|
||||
start_node = tape.pop_tape()
|
||||
ag_core.active_progenitors.remove(start_node)
|
||||
if not ag_core.isnode(end_node):
|
||||
raise ValueError(
|
||||
"Target not part of a computation being traced. %s." % end_node)
|
||||
if start_node not in end_node.progenitors:
|
||||
raise ValueError("Target not derived from source. %s %s." %
|
||||
(end_node.progenitors, repr(start_node)))
|
||||
output_gradients = kwds.get("output_gradients", None)
|
||||
if output_gradients is None:
|
||||
output_gradients = array_ops.ones_like(end_node.value)
|
||||
grad = ag_core.backward_pass(output_gradients, end_node, start_node)
|
||||
return end_node.value, _aggregate_grads(grad.gradients)
|
||||
variables = tape.top_tape_watched_variables()
|
||||
sources = [x.handle for x in variables]
|
||||
grad = imperative_grad(end_node, sources)
|
||||
return end_node, list(zip(grad, variables))
|
||||
|
||||
return grad_fn
|
||||
|
||||
@ -295,24 +489,25 @@ def gradients_function(f, params=None):
|
||||
differentiates with respect to all parameters.
|
||||
|
||||
Returns:
|
||||
function which, when called, returns the gradient of f with
|
||||
respect to all of `params`.
|
||||
function which, when called, returns the value of f and the gradient
|
||||
of f with respect to all of `params`. The function takes an extra optional
|
||||
keyword argument "dy". Setting it allows computation of vector jacobian
|
||||
products for vectors other than the vector of ones.
|
||||
|
||||
Raises:
|
||||
ValueError: if the params are not all strings or all integers.
|
||||
"""
|
||||
parameter_positions = _get_arg_spec(f, params)
|
||||
|
||||
def decorated(*args, **kwargs):
|
||||
tensors = convenience_wrappers.multigrad(f, parameter_positions)(*args,
|
||||
**kwargs)
|
||||
return [t.tensor() if isinstance(t, tensor.LazyZero)
|
||||
else t for t in tensors]
|
||||
def decorated(*args, **kwds):
|
||||
"""Computes the gradient of the decorated function."""
|
||||
|
||||
_, grad = val_and_grad_function(f, params)(*args, **kwds)
|
||||
return grad
|
||||
|
||||
return decorated
|
||||
|
||||
|
||||
def val_and_grad_function(f, params=None):
|
||||
def val_and_grad_function(f, params):
|
||||
"""Returns a function that computes f and is derivative w.r.t. params.
|
||||
|
||||
Args:
|
||||
@ -321,11 +516,30 @@ def val_and_grad_function(f, params=None):
|
||||
parameters with respect to which we'll differentiate. Passing None
|
||||
differentiates with respect to all parameters.
|
||||
|
||||
Returns:
|
||||
function which, when called, returns the value of f and the
|
||||
gradient of f with respect to all of `params`.
|
||||
Returns: function which, when called, returns the value of f and the gradient
|
||||
of f with respect to all of `params`. The function takes an extra optional
|
||||
keyword argument "dy". Setting it allows computation of vector jacobian
|
||||
products for vectors other than the vector of ones.
|
||||
|
||||
Raises:
|
||||
ValueError: if the params are not all strings or all integers.
|
||||
"""
|
||||
return convenience_wrappers.value_and_multigrad(f, _get_arg_spec(f, params))
|
||||
parameter_positions = _get_arg_spec(f, params)
|
||||
|
||||
def decorated(*args, **kwds):
|
||||
"""Computes the value and gradient of the decorated function."""
|
||||
dy = kwds.pop("dy", None)
|
||||
assert not kwds, "The gradient function can't take keyword arguments."
|
||||
tape.push_new_tape()
|
||||
sources = []
|
||||
args = list(args)
|
||||
for i in parameter_positions:
|
||||
sources.append(args[i])
|
||||
tape.watch(args[i])
|
||||
result = f(*args)
|
||||
return result, imperative_grad(
|
||||
result,
|
||||
sources,
|
||||
output_gradients=dy)
|
||||
|
||||
return decorated
|
||||
|
@ -23,10 +23,10 @@ from tensorflow.python.eager import backprop
|
||||
from tensorflow.python.eager import context
|
||||
from tensorflow.python.eager import tape
|
||||
from tensorflow.python.eager import tensor
|
||||
from tensorflow.python.eager import tensor_node
|
||||
from tensorflow.python.eager import test
|
||||
from tensorflow.python.framework import constant_op
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.framework import tensor_shape
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import embedding_ops
|
||||
@ -58,6 +58,7 @@ class BackpropTest(test.TestCase):
|
||||
var_np = np.random.rand(4, 2).astype(np.float32)
|
||||
var = tensor.Tensor(var_np)
|
||||
grad = backprop.gradients_function(fn, [0])(var)[0]
|
||||
grad = ops.convert_to_tensor(grad).numpy()
|
||||
|
||||
with context.graph_mode(), self.test_session():
|
||||
tf_var = array_ops.constant(var_np, dtypes.float32)
|
||||
@ -74,11 +75,7 @@ class BackpropTest(test.TestCase):
|
||||
tf_dense_grad = math_ops.unsorted_segment_sum(
|
||||
tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0])
|
||||
|
||||
self.assertAllClose(grad.numpy(), tf_dense_grad.eval())
|
||||
|
||||
def testTensoVspaceNoneMutAdd(self):
|
||||
t = tensor.Tensor(1.0)
|
||||
self.assertEqual(tensor_node.TensorVSpace(t).mut_add(t, None).numpy(), 1.0)
|
||||
self.assertAllClose(grad, tf_dense_grad.eval())
|
||||
|
||||
def testImplicitGradWithResourceVariable(self):
|
||||
x = resource_variable_ops.ResourceVariable(
|
||||
@ -94,6 +91,16 @@ class BackpropTest(test.TestCase):
|
||||
self.assertEqual(grads_and_vars[0][0].numpy(), 1.0)
|
||||
self.assertEqual(id(grads_and_vars[0][1]), id(x))
|
||||
|
||||
def testDy(self):
|
||||
|
||||
def f(x):
|
||||
return x
|
||||
|
||||
grad_fn = backprop.gradients_function(f)
|
||||
self.assertAllEqual(grad_fn(constant_op.constant(1.0),
|
||||
dy=constant_op.constant(2.0))[0].numpy(),
|
||||
2.0)
|
||||
|
||||
def testImplicitGradOverEmbeddingLookup(self):
|
||||
batch_size = 8
|
||||
embedding_size = 512
|
||||
|
@ -18,22 +18,11 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from autograd import core as ag_core
|
||||
|
||||
from tensorflow.python.eager import tape
|
||||
from tensorflow.python.eager import tensor as _tensor
|
||||
from tensorflow.python.framework import ops as tf_ops
|
||||
from tensorflow.python.util import nest
|
||||
|
||||
|
||||
def _watch_value_from_tape(tensor):
|
||||
for t in tape._tape_stack.stack: # pylint: disable=protected-access
|
||||
w = t.value.tensors.get(tf_ops.tensor_id(tensor), None)
|
||||
if w is not None:
|
||||
return w
|
||||
return tensor
|
||||
|
||||
|
||||
def custom_gradient(f):
|
||||
"""Decorator to define a function with a custom gradient.
|
||||
|
||||
@ -52,27 +41,23 @@ def custom_gradient(f):
|
||||
|
||||
def decorated(*args, **kwargs):
|
||||
"""Decorated function with custom gradient."""
|
||||
input_tensors = [_watch_value_from_tape(x) for x in args
|
||||
if isinstance(x, (_tensor.Tensor, tf_ops.Tensor))
|
||||
or ag_core.isnode(x)]
|
||||
input_tensors = [x for x in args
|
||||
if isinstance(x, tf_ops.Tensor)]
|
||||
result, grad_fn = f(*args, **kwargs)
|
||||
result_size = len(result) if isinstance(result, (list, tuple)) else 1
|
||||
|
||||
# TODO(apassos): naive uses of custom_gradient will not get the correct
|
||||
# second derivative this way if they capture any output tensors. Change the
|
||||
# signature of custom_gradient.
|
||||
def actual_grad_fn(*outputs):
|
||||
outputs = outputs[result_size:]
|
||||
return grad_fn(*outputs)
|
||||
|
||||
flat_result = nest.flatten(result)
|
||||
flat_result = [ag_core.getval(x) for x in flat_result]
|
||||
flat_result = tape.record_operation(
|
||||
tape.record_operation(
|
||||
flat_result,
|
||||
input_tensors,
|
||||
[],
|
||||
actual_grad_fn)
|
||||
flat_result = list(flat_result)
|
||||
return nest.pack_sequence_as(structure=result, flat_sequence=flat_result)
|
||||
return result
|
||||
|
||||
return decorated
|
||||
|
@ -18,7 +18,6 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from autograd import core as ag_core
|
||||
import six
|
||||
|
||||
from google.protobuf import text_format
|
||||
@ -57,7 +56,6 @@ def execute(op_name, num_outputs, inputs, attrs=None, name=None):
|
||||
"""
|
||||
ctx = context.get_default_context()
|
||||
# TODO(apassos) move this to convert_to_tensor
|
||||
inputs = [ag_core.getval(x) for x in inputs]
|
||||
# pylint: disable=protected-access
|
||||
input_handles = [c._handle for c in inputs]
|
||||
device_name = ctx.device_name
|
||||
@ -184,7 +182,7 @@ def args_to_matching_eager(l, default_dtype=None):
|
||||
# Is some input already a Tensor with a dtype?
|
||||
dtype = None
|
||||
for t in l:
|
||||
if isinstance(ag_core.getval(t), tensor.Tensor):
|
||||
if isinstance(t, tensor.Tensor):
|
||||
dtype = t.dtype
|
||||
break
|
||||
|
||||
@ -203,7 +201,7 @@ def args_to_matching_eager(l, default_dtype=None):
|
||||
|
||||
|
||||
def convert_to_mixed_eager_tensors(values):
|
||||
v = [t if isinstance(ag_core.getval(t), tensor.Tensor) else tensor.Tensor(t)
|
||||
v = [t if isinstance(t, tensor.Tensor) else tensor.Tensor(t)
|
||||
for t in values]
|
||||
types = [t.dtype for t in v]
|
||||
return types, v
|
||||
@ -228,7 +226,7 @@ def args_to_mixed_eager_tensors(lists):
|
||||
dtype = None
|
||||
# If any list has a Tensor, use that dtype
|
||||
for l in lists:
|
||||
if isinstance(ag_core.getval(l[i]), tensor.Tensor):
|
||||
if isinstance(l[i], tensor.Tensor):
|
||||
dtype = l[i].dtype
|
||||
break
|
||||
if dtype is None:
|
||||
|
@ -23,7 +23,6 @@ import collections
|
||||
import contextlib
|
||||
import threading
|
||||
|
||||
from autograd import core as ag_core
|
||||
import numpy as np
|
||||
|
||||
from tensorflow.python.eager import context
|
||||
@ -88,6 +87,7 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False):
|
||||
tensor_map[ops.tensor_id(value)] = (value, captured_value)
|
||||
else:
|
||||
captured_value = captured_value[1]
|
||||
tape.record_operation([captured_value], [value], [], lambda x: x)
|
||||
return captured_value
|
||||
|
||||
|
||||
@ -193,11 +193,8 @@ class _GraphModeFunction(object):
|
||||
self._num_outputs = len(fdef.signature.output_arg)
|
||||
self._ops = operations
|
||||
self._func_outputs = func_outputs
|
||||
if (isinstance(func_outputs, (ops.Tensor, type(None))) or
|
||||
ag_core.isnode(func_outputs)):
|
||||
self._returns = [func_outputs]
|
||||
else:
|
||||
self._returns = list(func_outputs)
|
||||
self._returns = [func_outputs] if isinstance(
|
||||
func_outputs, (ops.Tensor, type(None))) else list(func_outputs)
|
||||
self._returns_to_fedf_outputs = func_outputs_to_fdef_outputs
|
||||
self._output_shapes = output_shapes
|
||||
|
||||
@ -208,7 +205,7 @@ class _GraphModeFunction(object):
|
||||
c = _CapturingContext()
|
||||
with c:
|
||||
filtered_outputs = [
|
||||
ag_core.getval(x) for x in self._returns if x is not None
|
||||
x for x in self._returns if x is not None
|
||||
]
|
||||
self._out_grad_placeholders = [
|
||||
graph_placeholder(x.dtype, x.shape) for x in filtered_outputs
|
||||
@ -242,16 +239,19 @@ class _GraphModeFunction(object):
|
||||
if context.in_graph_mode():
|
||||
g = ops.get_default_graph()
|
||||
g._add_function(self._forward_fdef) # pylint: disable=protected-access
|
||||
unwrapped_args = [ag_core.getval(x) for x in all_args]
|
||||
def make_tensor(x):
|
||||
if isinstance(x, ops.Tensor):
|
||||
return x
|
||||
return ops.convert_to_tensor(x)
|
||||
op = g.create_op(
|
||||
signature.name, [ops.convert_to_tensor(x) for x in unwrapped_args],
|
||||
signature.name, [make_tensor(x) for x in all_args],
|
||||
[dtypes.DType(x.type) for x in signature.output_arg],
|
||||
op_def=signature,
|
||||
name="FunctionCall",
|
||||
compute_shapes=False)
|
||||
outputs = op.outputs
|
||||
outputs = [outputs] if isinstance(
|
||||
outputs, (tensor.Tensor, ops.Tensor, type(None))) else list(outputs)
|
||||
outputs, (ops.Tensor, type(None))) else list(outputs)
|
||||
for i, s in enumerate(self._output_shapes):
|
||||
outputs[i].set_shape(s)
|
||||
else:
|
||||
@ -261,25 +261,12 @@ class _GraphModeFunction(object):
|
||||
inputs=all_args)
|
||||
real_outputs = outputs[:len(self._returns)]
|
||||
side_outputs = outputs[len(self._returns):]
|
||||
watched_extra_inputs = []
|
||||
for t in self._extra_inputs:
|
||||
tid = ops.tensor_id(t)
|
||||
for t in tape._tape_stack.stack: # pylint: disable=protected-access
|
||||
w = t.value.tensors.get(tid, None)
|
||||
if w is not None:
|
||||
watched_extra_inputs.append(w)
|
||||
break
|
||||
else: # Note: for-else here done on purpose
|
||||
watched_extra_inputs.append(t)
|
||||
|
||||
def backward_function_wrapper(*outputs):
|
||||
outputs = outputs[len(real_outputs):]
|
||||
return self._backward_function(*outputs)
|
||||
real_outputs = tape.record_operation(
|
||||
tape.record_operation(
|
||||
real_outputs,
|
||||
(args + watched_extra_inputs),
|
||||
(args + self._extra_inputs),
|
||||
side_outputs,
|
||||
backward_function_wrapper)
|
||||
self._backward_function)
|
||||
|
||||
return self._build_call_outputs(self._returns, real_outputs)
|
||||
|
||||
@ -288,10 +275,10 @@ class _GraphModeFunction(object):
|
||||
tensor_inputs = [
|
||||
x for x in nest.flatten(args)
|
||||
if isinstance(x, (tensor.Tensor, ops.Tensor,
|
||||
tensor.LazyZero)) or ag_core.isnode(x)
|
||||
tensor.LazyZero))
|
||||
]
|
||||
if tape.should_record(tensor_inputs) or any(
|
||||
tape.any_tape_has(t) for t in self._extra_inputs):
|
||||
if tape.should_record(tensor_inputs) or tape.should_record(
|
||||
self._extra_inputs):
|
||||
if not self._has_backprop:
|
||||
self._compute_backprop()
|
||||
return self._backprop_call(tensor_inputs)
|
||||
@ -334,12 +321,12 @@ class _GraphModeFunction(object):
|
||||
"""
|
||||
if self._func_outputs is None:
|
||||
return None
|
||||
if isinstance(ag_core.getval(self._func_outputs), ops.Tensor):
|
||||
if isinstance(self._func_outputs, ops.Tensor):
|
||||
return result[0]
|
||||
|
||||
outputs = []
|
||||
for o in func_outputs:
|
||||
vo = ag_core.getval(o)
|
||||
vo = o
|
||||
if isinstance(vo, ops.Tensor):
|
||||
outputs.append(result[self._returns_to_fedf_outputs[id(vo)]])
|
||||
elif type(vo) in (tuple, list):
|
||||
@ -354,7 +341,6 @@ def _get_defun_inputs(args):
|
||||
"""Maps the inputs args to graph inputs."""
|
||||
ret = []
|
||||
for a in args:
|
||||
a = ag_core.getval(a)
|
||||
if isinstance(a, (tensor.LazyZero, ops.Tensor, tensor.Tensor)):
|
||||
ret.append(graph_placeholder(a.dtype, a.shape))
|
||||
elif type(a) in (tuple, list):
|
||||
@ -395,7 +381,7 @@ def _defun_internal(name, func, args, kwds):
|
||||
]
|
||||
all_inputs = flat_inputs + list(extra_placeholders)
|
||||
|
||||
func_def_outputs = [ag_core.getval(x) for x in outputs_list if x is not None]
|
||||
func_def_outputs = [x for x in outputs_list if x is not None]
|
||||
inference_function_def = graph_to_function_def.graph_to_function_def(
|
||||
tmp_graph, tmp_graph.get_operations(), all_inputs, func_def_outputs)
|
||||
# Register any other functions defined in the graph
|
||||
@ -421,7 +407,6 @@ _ZeroDtype = collections.namedtuple("_ZeroDtype", ["dtype", "shape"])
|
||||
|
||||
def _cache_key(x):
|
||||
"""Cache key for tfe functions."""
|
||||
x = ag_core.getval(x)
|
||||
if isinstance(x, tensor.Tensor):
|
||||
return _TensorDtype(x.dtype, x._shape_tuple()) # pylint: disable=protected-access
|
||||
if isinstance(x, tensor.LazyZero):
|
||||
|
@ -18,96 +18,114 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import threading
|
||||
|
||||
from autograd import container_types
|
||||
from autograd import core as ag_core
|
||||
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.util import nest
|
||||
from tensorflow.python.util import tf_contextlib
|
||||
|
||||
|
||||
class ImplicitTape(object):
|
||||
"""Global object which can watch tensors and wrap them with autograd."""
|
||||
def tid(tensor):
|
||||
return tensor._id # pylint: disable=protected-access
|
||||
|
||||
|
||||
class TapeEntry(
|
||||
collections.namedtuple("TapeEntry", [
|
||||
"output_ids", "inputs", "side_outputs", "backward_function"
|
||||
])):
|
||||
"""Entry in the gradient tape.
|
||||
|
||||
Represents the execution of one op or function, with instructions for doing
|
||||
its backward pass and useful information for it.
|
||||
|
||||
Args:
|
||||
output_ids: tensor_id(t) for each output tensor T
|
||||
inputs: input tensors
|
||||
side_outputs: optional tensors which need to be provided to the backward
|
||||
function.
|
||||
backward_function: function to be called with the downstream gradients and
|
||||
side outputs as arguments which computes the backward pass.
|
||||
"""
|
||||
|
||||
|
||||
def _tensor_shape(t):
|
||||
return t._shape_tuple() # pylint: disable=protected-access
|
||||
|
||||
|
||||
class Tape(object):
|
||||
"""Represents a gradient propagation trace."""
|
||||
|
||||
def __init__(self):
|
||||
self.tensors = {}
|
||||
self.variables = {}
|
||||
self.gradients = []
|
||||
# _tensor_tape maps from tensor IDs to their operation IDs
|
||||
self._tensor_tape = {}
|
||||
# maps output tensor IDs to their shapes and dtypes
|
||||
self._shape_dtype = {}
|
||||
# maps from operation ID to TapeEntry
|
||||
self._op_tape = {}
|
||||
# next operation ID
|
||||
self._next_op_id = 0
|
||||
# List of directly watched tensors
|
||||
self._watched = []
|
||||
# Set of directly watched variables
|
||||
self._watched_variables = set()
|
||||
|
||||
def __eq__(self, other):
|
||||
return self is other
|
||||
def should_record(self, tensors):
|
||||
"""Returns true if any tensor should be recorded.
|
||||
|
||||
def __hash__(self):
|
||||
return id(self)
|
||||
Args:
|
||||
tensors: some tensors.
|
||||
|
||||
Returns:
|
||||
True if any of the tensors is in the tape.
|
||||
"""
|
||||
return any(x._id in self._tensor_tape for x in tensors) # pylint: disable=protected-access
|
||||
|
||||
@ag_core.primitive
|
||||
def _watch_with_tape_internal(_, tensor):
|
||||
"""Primitive to wrap a tensor around an ImplicitTape progenitor."""
|
||||
return tensor
|
||||
def watch(self, tensor):
|
||||
"""Adds a tensor to the tape."""
|
||||
if tid(tensor) not in self._tensor_tape:
|
||||
self._tensor_tape[tid(tensor)] = None
|
||||
self._watched.append(tensor)
|
||||
|
||||
def watch_variable(self, v):
|
||||
self._watched_variables.add(v)
|
||||
self.watch(v.handle)
|
||||
|
||||
def _watch_with_tape(tape, resource_variable):
|
||||
"""Wraps a watched Tensor and keeps track of it in the implicit tape."""
|
||||
tensor = resource_variable.handle
|
||||
w = _watch_with_tape_internal(tape, tensor)
|
||||
if ag_core.isnode(tape):
|
||||
tape.value.variables[ops.tensor_id(tensor)] = resource_variable
|
||||
tape.value.tensors[ops.tensor_id(tensor)] = w
|
||||
def record_operation(self, output_tensors, input_tensors, side_outputs,
|
||||
backward_function):
|
||||
"""Records an operation in the tape."""
|
||||
if not self.should_record(input_tensors):
|
||||
return output_tensors
|
||||
for t in output_tensors:
|
||||
self._tensor_tape[tid(t)] = self._next_op_id
|
||||
self._shape_dtype[tid(t)] = (_tensor_shape(t), t.dtype)
|
||||
|
||||
self._op_tape[self._next_op_id] = TapeEntry(
|
||||
[tid(t) for t in output_tensors],
|
||||
input_tensors,
|
||||
side_outputs,
|
||||
backward_function)
|
||||
self._next_op_id += 1
|
||||
|
||||
def _watch_with_tape_vjp(g, ans, vs, gvs, tape, tensor):
|
||||
"""Gradient for _watch_with_tape_internal."""
|
||||
del ans, gvs
|
||||
def delete_trace(self, tensor):
|
||||
"""Deletes any trace we have for this tensor."""
|
||||
if tid(tensor) in self._tensor_tape:
|
||||
op = self._tensor_tape[tid(tensor)]
|
||||
del self._tensor_tape[tid(tensor)]
|
||||
if op in self._op_tape:
|
||||
if not any(
|
||||
x in self._tensor_tape for x in self._op_tape[op].output_ids):
|
||||
del self._op_tape[op]
|
||||
|
||||
def mut_add(implicit_tape):
|
||||
resource_variable = tape.value.variables[ops.tensor_id(tensor)]
|
||||
implicit_tape.gradients.append((g, resource_variable))
|
||||
return implicit_tape
|
||||
def export(self):
|
||||
"""Exports the internal state of this tape.
|
||||
|
||||
return ag_core.SparseObject(vs, mut_add)
|
||||
|
||||
_watch_with_tape_internal.defvjp(_watch_with_tape_vjp, argnum=0)
|
||||
_watch_with_tape_internal.defvjp(
|
||||
lambda g, ans, vs, gvs, tape, tensor: g,
|
||||
argnum=1)
|
||||
|
||||
|
||||
class ImplicitTapeVSpace(ag_core.VSpace):
|
||||
"""VSpace needed to have ImplicitTape be a valid progenitor."""
|
||||
|
||||
def zeros(self):
|
||||
return ImplicitTape()
|
||||
|
||||
|
||||
class ImplicitTapeNode(ag_core.Node):
|
||||
"""Node to wrap ImplicitTape in."""
|
||||
|
||||
def __eq__(self, other):
|
||||
return self is other
|
||||
|
||||
def __hash__(self):
|
||||
return id(self)
|
||||
|
||||
ag_core.register_node(ImplicitTapeNode, ImplicitTape)
|
||||
ag_core.register_vspace(ImplicitTapeVSpace, ImplicitTape)
|
||||
|
||||
|
||||
# TODO(apassos) try to not do this.
|
||||
class NoneVSpace(ag_core.VSpace):
|
||||
"""VSpace for python None."""
|
||||
|
||||
def __init__(self, _):
|
||||
self.size = 0
|
||||
|
||||
def zeros(self):
|
||||
return 0
|
||||
|
||||
|
||||
ag_core.register_vspace(NoneVSpace, type(None))
|
||||
Returns:
|
||||
tensor_tape: a map from tensor_id(tensor) to <identifier for op>
|
||||
responsible for generating that tensor.
|
||||
op_tape: a map from <identifier for op> to TapeEntry for that op.
|
||||
output_to_shape_dtype: a map from tensor_id(tensor) to its shape and
|
||||
dtype, for tensors which are outputs
|
||||
"""
|
||||
return self._tensor_tape, self._op_tape, self._shape_dtype
|
||||
|
||||
|
||||
class _TapeStack(threading.local):
|
||||
@ -134,19 +152,33 @@ _tape_stack = _TapeStack()
|
||||
|
||||
def push_new_tape():
|
||||
"""Pushes a new tape onto the tape stack."""
|
||||
progenitor = ag_core.new_progenitor(ImplicitTape())
|
||||
_tape_stack.stack.append(progenitor)
|
||||
ag_core.active_progenitors.add(progenitor)
|
||||
_tape_stack.stack.append(Tape())
|
||||
|
||||
|
||||
def watch_variable(resource_variable):
|
||||
"""Marks this ResourceVariable to be watched by all tapes in the stack.
|
||||
def watch(tensor):
|
||||
"""Marks this tensor to be watched by all tapes in the stack.
|
||||
|
||||
Args:
|
||||
resource_variable: A ResourceVariable to be watched.
|
||||
tensor: tensor to be watched.
|
||||
|
||||
Returns:
|
||||
The tensor, potentially wrapped by all tapes in the stack.
|
||||
"""
|
||||
for t in _tape_stack.stack:
|
||||
_watch_with_tape(t, resource_variable)
|
||||
t.watch(tensor)
|
||||
|
||||
|
||||
def watch_variable(variable):
|
||||
"""Marks this variable to be watched by all tapes in the stack.
|
||||
|
||||
Args:
|
||||
variable: variable to be watched.
|
||||
|
||||
Returns:
|
||||
The tensor, potentially wrapped by all tapes in the stack.
|
||||
"""
|
||||
for t in _tape_stack.stack:
|
||||
t.watch_variable(variable)
|
||||
|
||||
|
||||
def pop_tape():
|
||||
@ -156,85 +188,34 @@ def pop_tape():
|
||||
return None
|
||||
|
||||
|
||||
def any_tape_has(tensor):
|
||||
for t in _tape_stack.stack:
|
||||
if ops.tensor_id(tensor) in t.value.tensors:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def should_record(tensors):
|
||||
"""Returns true if any tape in the stack watches any of these tensors."""
|
||||
return any(ag_core.isnode(x) for x in tensors)
|
||||
"""Returns true if any tape in the stach watches any of these tensors."""
|
||||
if not _tape_stack.stack:
|
||||
return False
|
||||
return any(x.should_record(tensors) for x in _tape_stack.stack)
|
||||
|
||||
|
||||
class _EagerSequenceNode(container_types.SequenceNode):
|
||||
"""Eager version of SequenceNode, to live in EagerSequenceVSpace."""
|
||||
pass
|
||||
def record_operation(output_tensors, input_tensors, side_outputs,
|
||||
backward_function):
|
||||
"""Records the operation on all tapes in the stack."""
|
||||
for t in _tape_stack.stack:
|
||||
t.record_operation(output_tensors,
|
||||
input_tensors,
|
||||
side_outputs,
|
||||
backward_function)
|
||||
|
||||
|
||||
class _EagerSequenceVSpace(container_types.SequenceVSpace):
|
||||
"""Changes equality on SequenceVSpace to conform to tfe requirements."""
|
||||
|
||||
def __init__(self, value):
|
||||
self.shape = [ag_core.vspace(x) for x in value]
|
||||
self.size = sum(s.size for s in self.shape)
|
||||
self.sequence_type = type(value)
|
||||
|
||||
def __eq__(self, other):
|
||||
if type(self) != type(other): # pylint: disable=unidiomatic-typecheck
|
||||
return False
|
||||
if len(self.shape) != len(other.shape):
|
||||
# TODO(apassos) function gradients sometimes return gradients for side
|
||||
# inputs which breaks this assertion. Understand how to fix it.
|
||||
return True
|
||||
for ss, os in zip(self.shape, other.shape):
|
||||
if ss != os:
|
||||
if isinstance(ss, NoneVSpace) or isinstance(os, NoneVSpace):
|
||||
continue
|
||||
if ss.dtype == dtypes.resource or os.dtype == dtypes.resource:
|
||||
continue
|
||||
return False
|
||||
return True
|
||||
def delete_trace(tensor):
|
||||
"""Deletes traces for this Tensor from all tapes in the stack."""
|
||||
for t in _tape_stack.stack:
|
||||
t.delete_trace(tensor)
|
||||
|
||||
|
||||
class EagerList(list):
|
||||
"""Type used to bypass SequenceVSpace.
|
||||
|
||||
SequenceVSpace has a very strict equality check which does not match
|
||||
tensorflow semantics.
|
||||
"""
|
||||
|
||||
def __init__(self, value):
|
||||
super(EagerList, self).__init__(value)
|
||||
for v in value:
|
||||
assert not ag_core.isnode(v)
|
||||
|
||||
ag_core.register_vspace(_EagerSequenceVSpace, EagerList)
|
||||
ag_core.register_node(_EagerSequenceNode, EagerList)
|
||||
def top_tape_watched_tensors():
|
||||
t = _tape_stack.stack[-1]
|
||||
return t._watched # pylint: disable=protected-access
|
||||
|
||||
|
||||
@ag_core.primitive
|
||||
def _record_operation(output_tensors, input_tensors, side_outputs,
|
||||
backward_function):
|
||||
del input_tensors, side_outputs, backward_function
|
||||
return EagerList(output_tensors)
|
||||
|
||||
|
||||
def record_operation(o, i, s, b):
|
||||
"""Primitive to trigger autograd tracing on outputs from inputs."""
|
||||
inputs = container_types.make_sequence(EagerList, *i)
|
||||
return _record_operation(o, inputs, s, b)
|
||||
|
||||
|
||||
def _record_operation_vjp(g, ans, vs, gvs, output_tensors, input_tensors,
|
||||
side_outputs, backward_function):
|
||||
"""Gradient for _record_operation."""
|
||||
del vs, gvs, input_tensors, output_tensors
|
||||
backward_args = tuple(g) + tuple(side_outputs)
|
||||
backward_args = container_types.make_sequence(
|
||||
EagerList, *(tuple(ans) + backward_args))
|
||||
tensors = nest.flatten(backward_function(*backward_args))
|
||||
return container_types.make_sequence(EagerList, *tensors)
|
||||
|
||||
_record_operation.defvjp(_record_operation_vjp, argnum=1)
|
||||
def top_tape_watched_variables():
|
||||
t = _tape_stack.stack[-1]
|
||||
return t._watched_variables # pylint: disable=protected-access
|
||||
|
@ -1,301 +0,0 @@
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""TensorNode for autograd tracing of computations with Tensors."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from autograd import core as ag_core
|
||||
|
||||
from tensorflow.python.eager import context
|
||||
from tensorflow.python.eager import custom_gradient
|
||||
from tensorflow.python.eager import tensor
|
||||
from tensorflow.python.framework import common_shapes
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.framework import tensor_shape
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import math_ops
|
||||
|
||||
|
||||
@ag_core.primitive
|
||||
def _tensor_numpy(t):
|
||||
return t.numpy()
|
||||
|
||||
|
||||
@ag_core.primitive
|
||||
def _as_gpu_tensor(t, index=0):
|
||||
return t.as_gpu_tensor(gpu_index=index)
|
||||
|
||||
|
||||
_as_gpu_tensor.defvjp(
|
||||
lambda g, ans, vs, gvs, t, index: g.as_cpu_tensor(), argnum=0)
|
||||
|
||||
|
||||
@custom_gradient.custom_gradient
|
||||
def _tensor_copy(t, ctx=None, device_name=None):
|
||||
|
||||
def grad(dresult):
|
||||
return dresult._copy(device_name=t.device) # pylint: disable=protected-access
|
||||
|
||||
return t.value._copy(ctx=ctx, device_name=device_name), grad # pylint: disable=protected-access
|
||||
|
||||
|
||||
@ag_core.primitive
|
||||
def _as_cpu_tensor(t):
|
||||
return t.as_cpu_tensor()
|
||||
|
||||
|
||||
_as_cpu_tensor.defvjp(lambda g, ans, vs, gvs, t: g.as_gpu_tensor(), argnum=0)
|
||||
|
||||
|
||||
# TODO(apassos,ashankar): The operator overrides here need to be kept in sync
|
||||
# with the overrides for ops.Tensor and ops.EagerTensor.
|
||||
#
|
||||
# Note that we cannot use self.value.__op__() because that would result
|
||||
# in an ops.EagerTensor instead of a TensorNode being returned.
|
||||
#
|
||||
# We need to figure out a way to ensure that the two are in sync.
|
||||
class TensorNode(ag_core.Node):
|
||||
"""A TensorFlow Tensor."""
|
||||
|
||||
__slots__ = []
|
||||
|
||||
def __getitem__(self, idx):
|
||||
return array_ops._SliceHelper(self, idx) # pylint: disable=protected-access
|
||||
|
||||
shape = property(lambda self: self.value.shape)
|
||||
dtype = property(lambda self: self.value.dtype)
|
||||
device = property(lambda self: self.value.device)
|
||||
|
||||
def get_shape(self):
|
||||
return self.shape
|
||||
|
||||
def numpy(self):
|
||||
return _tensor_numpy(self)
|
||||
|
||||
def _shape_tuple(self):
|
||||
return self.value._shape_tuple # pylint: disable=protected-access
|
||||
|
||||
def as_cpu_tensor(self):
|
||||
return _as_cpu_tensor(self)
|
||||
|
||||
def as_gpu_tensor(self, gpu_index=0):
|
||||
return _as_gpu_tensor(self, gpu_index)
|
||||
|
||||
def _copy(self, ctx=None, device_name=None):
|
||||
return _tensor_copy(self, ctx, device_name)
|
||||
|
||||
def __neg__(self):
|
||||
return math_ops.negative(self)
|
||||
|
||||
def __abs__(self):
|
||||
return math_ops.abs(self) # pylint: disable=protected-access
|
||||
|
||||
def __invert__(self):
|
||||
# ops.Tensor used math_ops.logical_not as of August 2017.
|
||||
# Now that bitwise_ops.invert exists, it might make sense
|
||||
# for both ops.Tensor and TensorNode to use that if the
|
||||
# type is compatible.
|
||||
return math_ops.logical_not(self)
|
||||
|
||||
def __hash__(self):
|
||||
return id(self)
|
||||
|
||||
def __add__(self, other):
|
||||
if isinstance(self.value, tensor.LazyZero):
|
||||
return other
|
||||
if isinstance(other, tensor.LazyZero):
|
||||
return self
|
||||
return math_ops.add(self, other)
|
||||
|
||||
def __radd__(self, other):
|
||||
if isinstance(self.value, tensor.LazyZero):
|
||||
return other
|
||||
if isinstance(ag_core.getval(other), tensor.LazyZero):
|
||||
return self
|
||||
return math_ops.add(other, self)
|
||||
|
||||
def __sub__(self, other):
|
||||
return math_ops.subtract(self, other)
|
||||
|
||||
def __rsub__(self, other):
|
||||
return math_ops.subtract(other, self)
|
||||
|
||||
def __mul__(self, other):
|
||||
return math_ops.multiply(self, other)
|
||||
|
||||
def __rmul__(self, other):
|
||||
return math_ops.multiply(other, self)
|
||||
|
||||
def __mod__(self, other):
|
||||
return math_ops.floormod(self, other)
|
||||
|
||||
def __rmod__(self, other):
|
||||
return math_ops.floormod(other, self)
|
||||
|
||||
def __pow__(self, other):
|
||||
return math_ops.pow(self, other)
|
||||
|
||||
def __rpow__(self, other):
|
||||
return math_ops.pow(other, self)
|
||||
|
||||
def __div__(self, other):
|
||||
return math_ops._div_python2(self, other) # pylint: disable=protected-access
|
||||
|
||||
def __rdiv__(self, other):
|
||||
return math_ops._div_python2(other, self) # pylint: disable=protected-access
|
||||
|
||||
def __truediv__(self, other):
|
||||
return math_ops._truediv_python3(self, other) # pylint: disable=protected-access
|
||||
|
||||
def __rtruediv__(self, other):
|
||||
return math_ops._truediv_python3(other, self) # pylint: disable=protected-access
|
||||
|
||||
def __floordiv__(self, other):
|
||||
return math_ops.floordiv(self, other)
|
||||
|
||||
def __rfloordiv__(self, other):
|
||||
return math_ops.floordiv(other, self)
|
||||
|
||||
def __eq__(self, other):
|
||||
# math_ops.equal raises an error if shapes are not compatible, so check that
|
||||
# explicitly first.
|
||||
if common_shapes.is_broadcast_compatible(
|
||||
self.shape, ops.convert_to_tensor(other).shape):
|
||||
return math_ops.equal(self, other)
|
||||
return False
|
||||
|
||||
def __gt__(self, other):
|
||||
return math_ops.greater(self, other)
|
||||
|
||||
def __ge__(self, other):
|
||||
return math_ops.greater_equal(self, other)
|
||||
|
||||
def __lt__(self, other):
|
||||
return math_ops.less(self, other)
|
||||
|
||||
def __le__(self, other):
|
||||
return math_ops.less_equal(self, other)
|
||||
|
||||
|
||||
ag_core.register_node(TensorNode, tensor.Tensor)
|
||||
ag_core.register_node(TensorNode, ops.Tensor)
|
||||
|
||||
|
||||
def _zeros(shape, dtype):
|
||||
with context.device("cpu:0"):
|
||||
shape = tensor.Tensor(shape, dtype=dtypes.int32)
|
||||
return array_ops.fill(shape, tensor.Tensor(0, dtype=dtype))
|
||||
|
||||
|
||||
def _ones(shape, dtype):
|
||||
return array_ops.fill(
|
||||
tensor.Tensor(shape, dtype=dtypes.int32), tensor.Tensor(1, dtype=dtype))
|
||||
|
||||
|
||||
def _lazy_zero_tensor(zero):
|
||||
return _zeros(zero.shape, zero.dtype)
|
||||
|
||||
|
||||
tensor.LazyZero.tensor = _lazy_zero_tensor
|
||||
|
||||
|
||||
def _lazy_zero_to_tensor(lazy_zero, dtype=None, name=None, as_ref=False):
|
||||
del as_ref, name, dtype
|
||||
return _zeros(lazy_zero.shape, lazy_zero.dtype)
|
||||
|
||||
|
||||
ops.register_tensor_conversion_function(tensor.LazyZero, _lazy_zero_to_tensor)
|
||||
|
||||
|
||||
def _indexed_slices_to_tensor(value):
|
||||
"""Converts an IndexedSlices object `value` to a Tensor.
|
||||
|
||||
Args:
|
||||
value: An ops.IndexedSlices object.
|
||||
|
||||
Returns:
|
||||
A dense Tensor representing the values in the given IndexedSlices.
|
||||
|
||||
Raises:
|
||||
ValueError: If the IndexedSlices does not have the same dtype.
|
||||
"""
|
||||
if value.dense_shape is None:
|
||||
raise ValueError(
|
||||
"Tensor conversion requested for IndexedSlices without dense_shape: %s"
|
||||
% str(value))
|
||||
return math_ops.unsorted_segment_sum(value.values, value.indices,
|
||||
value.dense_shape[0])
|
||||
|
||||
|
||||
class TensorVSpace(ag_core.VSpace):
|
||||
"""VSpace for tf/tfe Tensors in autograd."""
|
||||
|
||||
def __init__(self, value):
|
||||
self.size = 1
|
||||
if isinstance(value, ops.IndexedSlices):
|
||||
self.shape = tensor_shape.TensorShape(value.dense_shape.numpy())
|
||||
self.dtype = value.values.dtype
|
||||
else:
|
||||
self.shape = value._shape_tuple() # pylint: disable=protected-access
|
||||
self.dtype = value.dtype
|
||||
# TODO(apassos) put gradients on the same device as ops.
|
||||
|
||||
def __eq__(self, other):
|
||||
# TODO(apassos) consider revisiting this if not performance sensitive.
|
||||
return True
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def zeros(self):
|
||||
return tensor.LazyZero(self.shape, self.dtype)
|
||||
|
||||
def ones(self):
|
||||
return _ones(self.shape, self.dtype)
|
||||
|
||||
def standard_basis(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def flatten(self, value):
|
||||
return array_ops.reshape(value, tensor.Tensor(-1))
|
||||
|
||||
def unflatten(self, value):
|
||||
return array_ops.reshape(value, tensor.Tensor(self.shape))
|
||||
|
||||
def mut_add(self, x, y):
|
||||
"""Add wrapper safe for IndexedSlices and LazyZero."""
|
||||
if isinstance(ag_core.getval(x), tensor.LazyZero):
|
||||
return y
|
||||
if isinstance(ag_core.getval(y), tensor.LazyZero):
|
||||
return x
|
||||
if isinstance(x, ops.IndexedSlices):
|
||||
x = _indexed_slices_to_tensor(x)
|
||||
if isinstance(y, ops.IndexedSlices):
|
||||
y = _indexed_slices_to_tensor(y)
|
||||
if x is None:
|
||||
return y
|
||||
if y is None:
|
||||
return x
|
||||
return math_ops.add(x, y)
|
||||
|
||||
|
||||
ag_core.register_vspace(TensorVSpace, tensor.Tensor)
|
||||
ag_core.register_vspace(TensorVSpace, ops.Tensor)
|
||||
ag_core.register_vspace(TensorVSpace, ops.IndexedSlices)
|
||||
ag_core.register_vspace(TensorVSpace, tensor.LazyZero)
|
||||
ag_core.register_node(TensorNode, tensor.LazyZero)
|
@ -1,84 +0,0 @@
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.python.eager import tensor
|
||||
from tensorflow.python.eager import tensor_node
|
||||
from tensorflow.python.eager import test
|
||||
from tensorflow.python.framework import test_util
|
||||
|
||||
|
||||
def public_or_operator(n):
|
||||
if not n.startswith("_"):
|
||||
return True
|
||||
return n.startswith("__") and n.endswith("__")
|
||||
|
||||
|
||||
class TensorNodeTest(test_util.TensorFlowTestCase):
|
||||
|
||||
# TensorNode must implement autograd core's Node interface, which
|
||||
# it does via inheritance. It also needs to be duck-typeable as
|
||||
# a tensorflow.python.framework.ops.EagerTensor.
|
||||
#
|
||||
# This is a "test" to help ensure interface compatibility.
|
||||
def testCanBeATensor(self):
|
||||
# TODO(ashankar,apassos): This list of "exceptions" - list of
|
||||
# Tensor methods not implemented by TensorNode needs to be
|
||||
# trimmed.
|
||||
exceptions = set([
|
||||
"OVERLOADABLE_OPERATORS",
|
||||
"__and__",
|
||||
"__del__",
|
||||
"__dict__",
|
||||
"__iter__",
|
||||
"__len__",
|
||||
"__matmul__",
|
||||
"__or__",
|
||||
"__rand__",
|
||||
"__rmatmul__",
|
||||
"__ror__",
|
||||
"__rxor__",
|
||||
"__weakref__",
|
||||
"__xor__",
|
||||
# BEGIN: Methods of Tensor that EagerTensor raises exceptions on.
|
||||
# But perhaps TensorNode should defer to "self.value.<method>" for
|
||||
# them?
|
||||
"consumers",
|
||||
"eval",
|
||||
"graph",
|
||||
"name",
|
||||
"op",
|
||||
"set_shape",
|
||||
"value_index",
|
||||
# END: Methods of Tensor that EagerTensor raises exceptions on.
|
||||
])
|
||||
|
||||
tensor_dir = dir(tensor.Tensor)
|
||||
tensor_dir = filter(public_or_operator, tensor_dir)
|
||||
tensor_dir = set(tensor_dir).difference(exceptions)
|
||||
|
||||
tensor_node_dir = set(dir(tensor_node.TensorNode))
|
||||
|
||||
missing = tensor_dir.difference(tensor_node_dir)
|
||||
self.assertEqual(
|
||||
0,
|
||||
len(missing),
|
||||
msg="Methods/properties missing in TensorNode: {}".format(missing))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test.main()
|
@ -41,7 +41,6 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from autograd import core as ag_core
|
||||
import numpy as np
|
||||
|
||||
from tensorflow.core.framework import attr_value_pb2
|
||||
@ -76,7 +75,7 @@ def _eager_fill(dims, value):
|
||||
|
||||
def convert_to_eager_tensor(t, dtype=None):
|
||||
"""Converts the given `value` to an `EagerTensor`."""
|
||||
if isinstance(ag_core.getval(t), ops.EagerTensor):
|
||||
if isinstance(t, ops.EagerTensor):
|
||||
if dtype is not None and t.dtype != dtype:
|
||||
raise TypeError("Expected tensor with type %r not %r" % (dtype, t.dtype))
|
||||
return t
|
||||
|
@ -19,7 +19,6 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from autograd import core as ag_core
|
||||
import six
|
||||
|
||||
from tensorflow.core.framework import attr_value_pb2
|
||||
@ -503,7 +502,6 @@ class OpDefLibrary(object):
|
||||
default_dtype = default_type_attr_map[input_arg.type_attr]
|
||||
|
||||
try:
|
||||
values = ag_core.getval(values)
|
||||
values = ops.internal_convert_to_tensor(
|
||||
values,
|
||||
name=input_arg.name,
|
||||
@ -784,7 +782,6 @@ class OpDefLibrary(object):
|
||||
if arg.is_ref]
|
||||
with _MaybeColocateWith(must_colocate_inputs):
|
||||
# Add Op to graph
|
||||
inputs = [ag_core.getval(x) for x in inputs]
|
||||
op = g.create_op(op_type_name, inputs, output_types, name=scope,
|
||||
input_types=input_types, attrs=attr_protos,
|
||||
op_def=op_def)
|
||||
|
@ -25,7 +25,6 @@ import re
|
||||
import sys
|
||||
import threading
|
||||
|
||||
from autograd import core as ag_core
|
||||
import numpy as np
|
||||
|
||||
import six
|
||||
@ -38,6 +37,7 @@ from tensorflow.core.framework import versions_pb2
|
||||
from tensorflow.python import pywrap_tensorflow as c_api
|
||||
from tensorflow.python.eager import context
|
||||
from tensorflow.python.eager import core
|
||||
from tensorflow.python.eager import tape
|
||||
from tensorflow.python.framework import c_api_util
|
||||
from tensorflow.python.framework import device as pydev
|
||||
from tensorflow.python.framework import dtypes
|
||||
@ -70,10 +70,9 @@ from tensorflow.python.util import tf_contextlib
|
||||
_USE_C_API = False
|
||||
|
||||
|
||||
def tensor_id(t):
|
||||
def tensor_id(tensor):
|
||||
"""Returns a unique identifier for this Tensor."""
|
||||
t = ag_core.getval(t)
|
||||
return t._id # pylint: disable=protected-access
|
||||
return tensor._id # pylint: disable=protected-access
|
||||
|
||||
|
||||
def _in_gpu_device():
|
||||
@ -703,6 +702,7 @@ class EagerTensor(Tensor):
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
tape.delete_trace(self)
|
||||
if c_api is not None and c_api.TFE_DeleteTensorHandle is not None:
|
||||
c_api.TFE_DeleteTensorHandle(self._handle)
|
||||
if core.active_trace() is not None:
|
||||
@ -727,7 +727,7 @@ class EagerTensor(Tensor):
|
||||
self.dtype.name)
|
||||
|
||||
def __repr__(self):
|
||||
return "<tf.Tensor: id=%s, shape=%s, dtype=%s, numpy=%s)>" % (
|
||||
return "<tf.Tensor: id=%s, shape=%s, dtype=%s, numpy=%s>" % (
|
||||
self._id, self.shape, self.dtype.name, self._numpy_text(is_repr=True))
|
||||
|
||||
@staticmethod
|
||||
@ -770,6 +770,16 @@ class EagerTensor(Tensor):
|
||||
tensor_id(new_tensor),
|
||||
new_tensor.device,
|
||||
new_tensor.shape.num_elements())
|
||||
|
||||
# Record the copy on tape and define backprop copy as well.
|
||||
if not context.in_graph_mode():
|
||||
self_device = self.device
|
||||
def grad_fun(dresult):
|
||||
with errors.raise_exception_on_not_ok_status() as status:
|
||||
grad_h = c_api.TFE_TensorHandleCopyToDevice(
|
||||
dresult._handle, ctx._handle, self_device, status)
|
||||
return _tensor_from_handle(grad_h)
|
||||
tape.record_operation([new_tensor], [self], [], grad_fun)
|
||||
return new_tensor
|
||||
# pylint: enable=protected-access
|
||||
|
||||
@ -1033,26 +1043,21 @@ def internal_convert_to_tensor(value,
|
||||
RuntimeError: If a registered conversion function returns an invalid value.
|
||||
|
||||
"""
|
||||
# Note we check the type of the object unwrapped from an autograd node, if
|
||||
# tracing gradients, to ensure the same behavior happens with and without
|
||||
# tracing.
|
||||
unwrapped = ag_core.getval(value)
|
||||
|
||||
if context.in_eager_mode():
|
||||
# Fast path for EagerTensors that don't need any conversion.
|
||||
if isinstance(unwrapped, EagerTensor):
|
||||
if isinstance(value, EagerTensor):
|
||||
# Note that we don't check that value's dtype matches the dtype
|
||||
# argument. We exepct that the C runtime will do that checking
|
||||
# when we execute the kernel.
|
||||
return value
|
||||
values = nest.flatten(value)
|
||||
if (len(values) > 1 and
|
||||
any(isinstance(ag_core.getval(v), EagerTensor) for v in values)):
|
||||
any(isinstance(v, EagerTensor) for v in values)):
|
||||
raise TypeError("Cannot convert to a eager tensor.")
|
||||
|
||||
if dtype is not None:
|
||||
dtype = dtypes.as_dtype(dtype)
|
||||
unwrapped_type = type(unwrapped)
|
||||
unwrapped_type = type(value)
|
||||
conversion_func_list = _tensor_conversion_func_cache.get(unwrapped_type, None)
|
||||
if conversion_func_list is None:
|
||||
with _tensor_conversion_func_lock:
|
||||
@ -1060,7 +1065,7 @@ def internal_convert_to_tensor(value,
|
||||
for _, funcs_at_priority in sorted(
|
||||
_tensor_conversion_func_registry.items()):
|
||||
for base_type, conversion_func in funcs_at_priority:
|
||||
if isinstance(unwrapped, base_type):
|
||||
if isinstance(value, base_type):
|
||||
conversion_func_list.append((base_type, conversion_func))
|
||||
_tensor_conversion_func_cache[unwrapped_type] = conversion_func_list
|
||||
|
||||
@ -1090,7 +1095,7 @@ def internal_convert_to_tensor(value,
|
||||
if ret is NotImplemented:
|
||||
continue
|
||||
|
||||
if not isinstance(ag_core.getval(ret), Tensor):
|
||||
if not isinstance(ret, Tensor):
|
||||
raise RuntimeError(
|
||||
"%sConversion function %r for type %s returned non-Tensor: %r" %
|
||||
(_error_prefix(name), conversion_func, base_type, ret))
|
||||
|
@ -18,7 +18,6 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from autograd import core as ag_core
|
||||
import numpy as np
|
||||
import six
|
||||
|
||||
@ -607,7 +606,7 @@ def ShapeEquals(tensor_proto, shape):
|
||||
|
||||
def _ConstantValue(tensor, partial):
|
||||
# TODO(touts): Support Variables?
|
||||
if not isinstance(ag_core.getval(tensor), ops.Tensor):
|
||||
if not isinstance(tensor, ops.Tensor):
|
||||
raise TypeError("tensor is not a Tensor")
|
||||
if tensor.op.type == "Const":
|
||||
return MakeNdarray(tensor.op.get_attr("value"))
|
||||
@ -737,7 +736,7 @@ def constant_value(tensor, partial=False): # pylint: disable=invalid-name
|
||||
Raises:
|
||||
TypeError: if tensor is not an ops.Tensor.
|
||||
"""
|
||||
if isinstance(ag_core.getval(tensor), ops.EagerTensor):
|
||||
if isinstance(tensor, ops.EagerTensor):
|
||||
return tensor.numpy()
|
||||
ret = _ConstantValue(tensor, partial)
|
||||
if ret is not None:
|
||||
|
@ -291,10 +291,11 @@ def shape_internal(input, name=None, optimize=True, out_type=dtypes.int32):
|
||||
sparse_tensor.SparseTensorValue)):
|
||||
return gen_math_ops.cast(input.dense_shape, out_type)
|
||||
else:
|
||||
input_tensor = ops.convert_to_tensor(input)
|
||||
input_shape = input_tensor.get_shape()
|
||||
if optimize and input_shape.is_fully_defined():
|
||||
return constant(input_shape.as_list(), out_type, name=name)
|
||||
if context.in_graph_mode():
|
||||
input_tensor = ops.convert_to_tensor(input)
|
||||
input_shape = input_tensor.get_shape()
|
||||
if optimize and input_shape.is_fully_defined():
|
||||
return constant(input_shape.as_list(), out_type, name=name)
|
||||
return gen_array_ops.shape(input, name=name, out_type=out_type)
|
||||
|
||||
|
||||
@ -1428,7 +1429,9 @@ def zeros(shape, dtype=dtypes.float32, name=None):
|
||||
zero = ""
|
||||
else:
|
||||
zero = 0
|
||||
if context.in_eager_mode():
|
||||
# Checking for boolean dtype to prevent attempting to run fill on the GPU
|
||||
# which does not have a boolean kernel registered.
|
||||
if context.in_eager_mode() and dtype != dtypes.bool:
|
||||
return fill(shape, constant(zero, dtype=dtype), name=name)
|
||||
try:
|
||||
shape = tensor_shape.as_shape(shape)
|
||||
|
@ -144,7 +144,6 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from autograd import core as ag_core
|
||||
import numpy as np
|
||||
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||
|
||||
@ -1113,12 +1112,11 @@ floormod = gen_math_ops._floor_mod
|
||||
|
||||
def _mul_dispatch(x, y, name=None):
|
||||
"""Dispatches cwise mul for "Dense*Dense" and "Dense*Sparse"."""
|
||||
is_tensor_y = isinstance(ag_core.getval(y), ops.Tensor)
|
||||
is_tensor_y = isinstance(y, ops.Tensor)
|
||||
if is_tensor_y:
|
||||
return gen_math_ops._mul(x, y, name=name)
|
||||
else:
|
||||
assert isinstance(ag_core.getval(y),
|
||||
sparse_tensor.SparseTensor) # Case: Dense * Sparse.
|
||||
assert isinstance(y, sparse_tensor.SparseTensor) # Case: Dense * Sparse.
|
||||
new_vals = gen_sparse_ops.sparse_dense_cwise_mul(y.indices, y.values,
|
||||
y.dense_shape, x, name)
|
||||
return sparse_tensor.SparseTensor(y.indices, new_vals, y.dense_shape)
|
||||
|
@ -19,14 +19,10 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from autograd import core as ag_core
|
||||
|
||||
from tensorflow.core.framework import attr_value_pb2
|
||||
from tensorflow.core.framework import variable_pb2
|
||||
from tensorflow.python.eager import context
|
||||
from tensorflow.python.eager import custom_gradient
|
||||
from tensorflow.python.eager import tape
|
||||
from tensorflow.python.eager import tensor_node
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.framework import tensor_shape
|
||||
@ -506,10 +502,8 @@ class ResourceVariable(variables.Variable):
|
||||
def _read_variable_op(self):
|
||||
if hasattr(self, "_trainable") and self._trainable:
|
||||
tape.watch_variable(self)
|
||||
return read_variable_op(self._handle, dtype=self._dtype)
|
||||
else:
|
||||
return gen_resource_variable_ops.read_variable_op(self._handle,
|
||||
self._dtype)
|
||||
return gen_resource_variable_ops.read_variable_op(self._handle,
|
||||
self._dtype)
|
||||
|
||||
def read_value(self):
|
||||
"""Constructs an op which reads the value of this variable.
|
||||
@ -541,7 +535,7 @@ class ResourceVariable(variables.Variable):
|
||||
with ops.name_scope("Gather" if name is None else name) as name:
|
||||
if self._trainable:
|
||||
tape.watch_variable(self)
|
||||
value = resource_gather(
|
||||
value = gen_resource_variable_ops.resource_gather(
|
||||
self._handle, indices, dtype=self._dtype, name=name)
|
||||
return array_ops.identity(value)
|
||||
|
||||
@ -614,13 +608,7 @@ class ResourceVariable(variables.Variable):
|
||||
def _run_op(a, *args):
|
||||
# pylint: disable=protected-access
|
||||
value = a._AsTensor()
|
||||
if ag_core.isnode(value):
|
||||
# This avoids autograd trying to wrap a ResourceVariable.
|
||||
value = ops.convert_to_tensor(value)
|
||||
args = [ops.convert_to_tensor(x) for x in args]
|
||||
return getattr(tensor_node.TensorNode, operator)(value, *args)
|
||||
else:
|
||||
return getattr(ops.Tensor, operator)(value, *args)
|
||||
return getattr(ops.Tensor, operator)(value, *args)
|
||||
|
||||
# Propagate __doc__ to wrapper
|
||||
try:
|
||||
@ -693,33 +681,6 @@ class ResourceVariable(variables.Variable):
|
||||
return self.value()
|
||||
|
||||
|
||||
@custom_gradient.custom_gradient
|
||||
def read_variable_op(handle, dtype):
|
||||
"""Reads the value of a variable.
|
||||
|
||||
The tensor returned by this operation is immutable.
|
||||
|
||||
The value returned by this operation is guaranteed to be influenced by all the
|
||||
writes on which this operation depends directly or indirectly, and to not be
|
||||
influenced by any of the writes which depend directly or indirectly on this
|
||||
operation.
|
||||
|
||||
Args:
|
||||
handle: A `Tensor` of type `resource`.
|
||||
handle to the resource in which to store the variable.
|
||||
dtype: A `tf.DType`. the dtype of the value.
|
||||
|
||||
Returns:
|
||||
A `Tensor` of type `dtype`.
|
||||
"""
|
||||
result = gen_resource_variable_ops.read_variable_op(handle, dtype)
|
||||
|
||||
def grad(dresult):
|
||||
return dresult
|
||||
|
||||
return result, grad
|
||||
|
||||
|
||||
def _dense_var_to_tensor(var, dtype=None, name=None, as_ref=False):
|
||||
return var._dense_var_to_tensor(dtype=dtype, name=name, as_ref=as_ref) # pylint: disable=protected-access
|
||||
|
||||
@ -744,51 +705,6 @@ def _ReadGrad(_, grad):
|
||||
return grad
|
||||
|
||||
|
||||
# TODO(apassos) do not use custom_gradient here by making other entry points
|
||||
# than custom_gradient also aware of how to deal with variables implicitly
|
||||
# watched in the tape (i.e. the call to _watch_value in custom_gradient)
|
||||
@custom_gradient.custom_gradient
|
||||
def resource_gather(resource, indices, dtype, validate_indices=True, name=None):
|
||||
"""Gather slices from the variable pointed to by `resource`.
|
||||
|
||||
`indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
|
||||
Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
|
||||
|
||||
```python
|
||||
# Scalar indices
|
||||
output[:, ..., :] = params[indices, :, ... :]
|
||||
|
||||
# Vector indices
|
||||
output[i, :, ..., :] = params[indices[i], :, ... :]
|
||||
|
||||
# Higher rank indices
|
||||
output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
|
||||
```
|
||||
|
||||
Args:
|
||||
resource: A `Tensor` of type `resource`.
|
||||
handle to the resource in which to store the variable.
|
||||
indices: a integer `Tensor` containing the indices to be gathered.
|
||||
dtype: A `tf.DType`. the dtype of the value.
|
||||
validate_indices: optional `bool`. If false will not validate that the
|
||||
indices fit in the variable.
|
||||
name: The optional name for the operation to be added.
|
||||
|
||||
Returns:
|
||||
A `Tensor` of type `dtype`.
|
||||
"""
|
||||
result = gen_resource_variable_ops.resource_gather(
|
||||
resource, indices, dtype, validate_indices=validate_indices, name=name)
|
||||
|
||||
def grad(dresult):
|
||||
return ops.IndexedSlices(
|
||||
dresult,
|
||||
indices,
|
||||
dense_shape=gen_resource_variable_ops.variable_shape(resource))
|
||||
|
||||
return result, grad
|
||||
|
||||
|
||||
@ops.RegisterGradient("ResourceGather")
|
||||
def _GatherGrad(op, grad):
|
||||
"""Gradient for gather op."""
|
||||
@ -797,7 +713,11 @@ def _GatherGrad(op, grad):
|
||||
# TODO(apassos): more robust way of getting the shape.
|
||||
# TODO(apassos): implement this for EAGER mode.
|
||||
if context.in_eager_mode():
|
||||
raise NotImplementedError("_GatherGrad not implemented for EAGER mode")
|
||||
dense_shape = gen_resource_variable_ops.variable_shape(op.inputs[0])
|
||||
return (ops.IndexedSlices(grad,
|
||||
op.inputs[1],
|
||||
dense_shape=dense_shape),
|
||||
None)
|
||||
handle = op.inputs[0]
|
||||
while handle.op.type != "VarHandleOp":
|
||||
handle = handle.op.inputs[0]
|
||||
|
Loading…
Reference in New Issue
Block a user