Initial move of tf.contrib.distributions to core:

* Move distribution_utils
* Move base distribution classes
* Move Bijector class
* Move a unit test just to establish the directory structure.
Change: 154426175
This commit is contained in:
Eugene Brevdo 2017-04-27 07:38:14 -08:00 committed by TensorFlower Gardener
parent 15a275969a
commit 0db6371fd8
65 changed files with 907 additions and 776 deletions

View File

@ -307,6 +307,8 @@ filegroup(
"//tensorflow/python/debug:all_files",
"//tensorflow/python/estimator:all_files",
"//tensorflow/python/kernel_tests:all_files",
"//tensorflow/python/kernel_tests/distributions:all_files",
"//tensorflow/python/ops/distributions:all_files",
"//tensorflow/python/saved_model:all_files",
"//tensorflow/python/tools:all_files",
"//tensorflow/tensorboard:all_files",

View File

@ -48,9 +48,9 @@ import threading
import six
from tensorflow.contrib.bayesflow.python.ops import stochastic_gradient_estimators as sge
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops.distributions import distribution
STOCHASTIC_TENSOR_COLLECTION = "_stochastic_tensor_collection_"

View File

@ -28,10 +28,10 @@ from __future__ import print_function
from tensorflow.contrib.bayesflow.python.ops import stochastic_graph_impl as sg
from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor_impl as st
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import kullback_leibler
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.platform import tf_logging as logging
VI_PRIORS = "__vi_priors__"

View File

@ -206,11 +206,13 @@ add_python_module("tensorflow/python/estimator/inputs/queues")
add_python_module("tensorflow/python/framework")
add_python_module("tensorflow/python/grappler")
add_python_module("tensorflow/python/kernel_tests")
add_python_module("tensorflow/python/kernel_tests/distributions")
add_python_module("tensorflow/python/layers")
add_python_module("tensorflow/python/lib")
add_python_module("tensorflow/python/lib/core")
add_python_module("tensorflow/python/lib/io")
add_python_module("tensorflow/python/ops")
add_python_module("tensorflow/python/ops/distributions")
add_python_module("tensorflow/python/ops/losses")
add_python_module("tensorflow/python/platform")
add_python_module("tensorflow/python/platform/default")

View File

@ -29,6 +29,7 @@ py_library(
"//tensorflow/python:nn_ops",
"//tensorflow/python:random_ops",
"//tensorflow/python:special_math_ops",
"//tensorflow/python/ops/distributions",
"//third_party/py/numpy",
"@six_archive//:six",
],
@ -54,6 +55,7 @@ py_library(
"//tensorflow/python:nn_ops",
"//tensorflow/python:random_ops",
"//tensorflow/python:special_math_ops",
"//tensorflow/python/ops/distributions",
"//third_party/py/numpy",
"@six_archive//:six",
],
@ -813,25 +815,6 @@ filegroup(
# === Bijector Tests ==========================================================
cuda_py_test(
name = "bijector_test",
size = "small",
srcs = ["python/kernel_tests/bijectors/bijector_test.py"],
additional_deps = [
":bijectors_py",
":distributions_py",
"//third_party/py/numpy",
"@six_archive//:six",
"//tensorflow/contrib/linalg:linalg_py",
"//tensorflow/python:array_ops",
"//tensorflow/python:client_testlib",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:math_ops",
"//tensorflow/python:platform_test",
],
)
cuda_py_test(
name = "conditional_bijector_test",
size = "small",

View File

@ -96,12 +96,10 @@ from tensorflow.contrib.distributions.python.ops.beta import *
from tensorflow.contrib.distributions.python.ops.binomial import *
from tensorflow.contrib.distributions.python.ops.categorical import *
from tensorflow.contrib.distributions.python.ops.chi2 import *
from tensorflow.contrib.distributions.python.ops.conditional_distribution import *
from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import *
from tensorflow.contrib.distributions.python.ops.deterministic import *
from tensorflow.contrib.distributions.python.ops.dirichlet import *
from tensorflow.contrib.distributions.python.ops.dirichlet_multinomial import *
from tensorflow.contrib.distributions.python.ops.distribution import *
from tensorflow.contrib.distributions.python.ops.distribution_util import matrix_diag_transform
from tensorflow.contrib.distributions.python.ops.distribution_util import softplus_inverse
from tensorflow.contrib.distributions.python.ops.exponential import *
@ -129,6 +127,8 @@ from tensorflow.contrib.distributions.python.ops.student_t import *
from tensorflow.contrib.distributions.python.ops.transformed_distribution import *
from tensorflow.contrib.distributions.python.ops.uniform import *
from tensorflow.contrib.distributions.python.ops.wishart import *
from tensorflow.python.ops.distributions.conditional_distribution import *
from tensorflow.python.ops.distributions.distribution import *
# pylint: enable=unused-import,wildcard-import,line-too-long,g-importing-member

View File

@ -20,10 +20,10 @@ from __future__ import print_function
import numpy as np
from scipy import stats
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import logistic
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.platform import test

View File

@ -19,16 +19,14 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib import distributions as distributions_lib
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib import distributions
from tensorflow.contrib.distributions.python.ops import operator_pd_cholesky
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops.distributions import util as distribution_util
from tensorflow.python.platform import test
distributions = distributions_lib
def softplus(x):
return np.log(1 + np.exp(x))

View File

@ -18,8 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import kullback_leibler
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -30,6 +28,8 @@ from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
class Bernoulli(distribution.Distribution):

View File

@ -20,8 +20,6 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import kullback_leibler
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
@ -33,6 +31,8 @@ from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -39,7 +39,6 @@ from __future__ import print_function
from tensorflow.contrib.distributions.python.ops.bijectors.affine import *
from tensorflow.contrib.distributions.python.ops.bijectors.affine_linear_operator import *
from tensorflow.contrib.distributions.python.ops.bijectors.bijector import *
from tensorflow.contrib.distributions.python.ops.bijectors.chain import *
from tensorflow.contrib.distributions.python.ops.bijectors.cholesky_outer_product import *
from tensorflow.contrib.distributions.python.ops.bijectors.conditional_bijector import *
@ -52,6 +51,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid import *
from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid_centered import *
from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import *
from tensorflow.contrib.distributions.python.ops.bijectors.softplus import *
from tensorflow.python.ops.distributions.bijector import *
# pylint: enable=unused-import,wildcard-import,line-too-long,g-importing-member

View File

@ -22,7 +22,6 @@ from tensorflow.contrib.distributions.python.ops import operator_pd_cholesky
from tensorflow.contrib.distributions.python.ops import operator_pd_diag
from tensorflow.contrib.distributions.python.ops import operator_pd_identity
from tensorflow.contrib.distributions.python.ops import operator_pd_vdvt_update
from tensorflow.contrib.distributions.python.ops.bijectors import bijector
from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -32,6 +31,7 @@ from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import linalg_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import bijector
__all__ = [

View File

@ -18,7 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops.bijectors import bijector
from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
from tensorflow.contrib.linalg.python.ops import linear_operator
from tensorflow.python.framework import constant_op
@ -27,6 +26,7 @@ from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops.distributions import bijector
__all__ = [

View File

@ -20,8 +20,8 @@ from __future__ import print_function
import itertools
from tensorflow.contrib.distributions.python.ops.bijectors import bijector
from tensorflow.python.framework import constant_op
from tensorflow.python.ops.distributions import bijector
__all__ = [

View File

@ -20,8 +20,6 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops.bijectors import bijector
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
@ -29,6 +27,8 @@ from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import linalg_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import bijector
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -18,8 +18,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops.bijectors import bijector
from tensorflow.python.ops.distributions import bijector
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = ["ConditionalBijector"]

View File

@ -18,8 +18,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops.bijectors import bijector
from tensorflow.python.framework import constant_op
from tensorflow.python.ops.distributions import bijector
__all__ = [

View File

@ -18,7 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops.bijectors import bijector
from tensorflow.python.ops.distributions import bijector
__all__ = [

View File

@ -18,7 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops.bijectors import bijector as bijector_lib
from tensorflow.python.ops.distributions import bijector as bijector_lib
__all__ = [
"Invert",

View File

@ -18,12 +18,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops.bijectors import bijector
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import bijector
__all__ = [

View File

@ -18,9 +18,9 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops.bijectors import bijector
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops.distributions import bijector
__all__ = [

View File

@ -20,7 +20,6 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops.bijectors import bijector
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
@ -30,6 +29,7 @@ from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops.distributions import bijector
__all__ = [

View File

@ -18,10 +18,10 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops.bijectors import bijector
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops.distributions import bijector
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -17,8 +17,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -27,6 +25,8 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
_binomial_sample_note = """

View File

@ -18,8 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import kullback_leibler
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
@ -29,6 +27,8 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
class Categorical(distribution.Distribution):

View File

@ -17,10 +17,10 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import conditional_distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import transformed_distribution
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import conditional_distribution
from tensorflow.python.ops.distributions import util as distribution_util
# pylint: disable=protected-access

View File

@ -22,7 +22,6 @@ import abc
import six
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -32,6 +31,7 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import distribution
__all__ = [
"Deterministic",

View File

@ -20,8 +20,6 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
@ -29,6 +27,8 @@ from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import special_math_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -18,8 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
@ -28,6 +26,8 @@ from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import special_math_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -18,619 +18,14 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import hashlib
import math
import numpy as np
from tensorflow.contrib import linalg
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
def assert_close(
x, y, data=None, summarize=None, message=None, name="assert_close"):
"""Assert that that x and y are within machine epsilon of each other.
Args:
x: Floating-point `Tensor`
y: Floating-point `Tensor`
data: The tensors to print out if the condition is `False`. Defaults to
error message and first few entries of `x` and `y`.
summarize: Print this many entries of each tensor.
message: A string to prefix to the default message.
name: A name for this operation (optional).
Returns:
Op raising `InvalidArgumentError` if |x - y| > machine epsilon.
"""
message = message or ""
x = ops.convert_to_tensor(x, name="x")
y = ops.convert_to_tensor(y, name="y")
if data is None:
data = [
message,
"Condition x ~= y did not hold element-wise: x = ", x.name, x, "y = ",
y.name, y
]
if x.dtype.is_integer:
return check_ops.assert_equal(
x, y, data=data, summarize=summarize, message=message, name=name)
with ops.name_scope(name, "assert_close", [x, y, data]):
tol = np.finfo(x.dtype.as_numpy_dtype).eps
condition = math_ops.reduce_all(math_ops.less_equal(math_ops.abs(x-y), tol))
return control_flow_ops.Assert(
condition, data, summarize=summarize)
def assert_integer_form(
x, data=None, summarize=None, message=None, name="assert_integer_form"):
"""Assert that x has integer components (or floats equal to integers).
Args:
x: Floating-point `Tensor`
data: The tensors to print out if the condition is `False`. Defaults to
error message and first few entries of `x` and `y`.
summarize: Print this many entries of each tensor.
message: A string to prefix to the default message.
name: A name for this operation (optional).
Returns:
Op raising `InvalidArgumentError` if round(x) != x.
"""
message = message or "x has non-integer components"
x = ops.convert_to_tensor(x, name="x")
casted_x = math_ops.to_int64(x)
return check_ops.assert_equal(
x, math_ops.cast(math_ops.round(casted_x), x.dtype),
data=data, summarize=summarize, message=message, name=name)
def assert_symmetric(matrix):
matrix_t = array_ops.matrix_transpose(matrix)
return control_flow_ops.with_dependencies(
[check_ops.assert_equal(matrix, matrix_t)], matrix)
def embed_check_nonnegative_discrete(x, check_integer=True):
"""Assert x is a non-negative tensor, and optionally of integers."""
assertions = [check_ops.assert_non_negative(
x, message="x must be non-negative.")]
if check_integer:
assertions += [assert_integer_form(
x, message="x cannot contain fractional components.")]
return control_flow_ops.with_dependencies(assertions, x)
def same_dynamic_shape(a, b):
"""Returns whether a and b have the same dynamic shape.
Args:
a: `Tensor`
b: `Tensor`
Returns:
`bool` `Tensor` representing if both tensors have the same shape.
"""
a = ops.convert_to_tensor(a, name="a")
b = ops.convert_to_tensor(b, name="b")
# Here we can't just do math_ops.equal(a.shape, b.shape), since
# static shape inference may break the equality comparison between
# shape(a) and shape(b) in math_ops.equal.
def all_shapes_equal():
return math_ops.reduce_all(math_ops.equal(
array_ops.concat([array_ops.shape(a), array_ops.shape(b)], 0),
array_ops.concat([array_ops.shape(b), array_ops.shape(a)], 0)))
# One of the shapes isn't fully defined, so we need to use the dynamic
# shape.
return control_flow_ops.cond(
math_ops.equal(array_ops.rank(a), array_ops.rank(b)),
all_shapes_equal,
lambda: constant_op.constant(False))
def get_logits_and_probs(logits=None,
probs=None,
multidimensional=False,
validate_args=False,
name="get_logits_and_probs"):
"""Converts logit to probabilities (or vice-versa), and returns both.
Args:
logits: Floating-point `Tensor` representing log-odds.
probs: Floating-point `Tensor` representing probabilities.
multidimensional: Python `bool`, default `False`.
If `True`, represents whether the last dimension of `logits` or `probs`,
a `[N1, N2, ... k]` dimensional tensor, representing the
logit or probability of `shape[-1]` classes.
validate_args: Python `bool`, default `False`. When `True`, either assert
`0 <= probs <= 1` (if not `multidimensional`) or that the last dimension
of `probs` sums to one.
name: A name for this operation (optional).
Returns:
logits, probs: Tuple of `Tensor`s. If `probs` has an entry that is `0` or
`1`, then the corresponding entry in the returned logit will be `-Inf` and
`Inf` respectively.
Raises:
ValueError: if neither `probs` nor `logits` were passed in, or both were.
"""
with ops.name_scope(name, values=[probs, logits]):
if (probs is None) == (logits is None):
raise ValueError("Must pass probs or logits, but not both.")
if probs is None:
logits = ops.convert_to_tensor(logits, name="logits")
if multidimensional:
return logits, nn.softmax(logits, name="probs")
return logits, math_ops.sigmoid(logits, name="probs")
probs = ops.convert_to_tensor(probs, name="probs")
if validate_args:
with ops.name_scope("validate_probs"):
one = constant_op.constant(1., probs.dtype)
dependencies = [check_ops.assert_non_negative(probs)]
if multidimensional:
dependencies += [assert_close(math_ops.reduce_sum(probs, -1), one,
message="probs does not sum to 1.")]
else:
dependencies += [check_ops.assert_less_equal(
probs, one, message="probs has components greater than 1.")]
probs = control_flow_ops.with_dependencies(dependencies, probs)
with ops.name_scope("logits"):
if multidimensional:
# Here we don't compute the multidimensional case, in a manner
# consistent with respect to the unidimensional case. We do so
# following the TF convention. Typically, you might expect to see
# logits = log(probs) - log(probs[pivot]). A side-effect of
# being consistent with the TF approach is that the unidimensional case
# implicitly handles the second dimension but the multidimensional case
# explicitly keeps the pivot dimension.
return math_ops.log(probs), probs
return math_ops.log(probs) - math_ops.log1p(-1. * probs), probs
def log_combinations(n, counts, name="log_combinations"):
"""Multinomial coefficient.
Given `n` and `counts`, where `counts` has last dimension `k`, we compute
the multinomial coefficient as:
```n! / sum_i n_i!```
where `i` runs over all `k` classes.
Args:
n: Floating-point `Tensor` broadcastable with `counts`. This represents `n`
outcomes.
counts: Floating-point `Tensor` broadcastable with `n`. This represents
counts in `k` classes, where `k` is the last dimension of the tensor.
name: A name for this operation (optional).
Returns:
`Tensor` representing the multinomial coefficient between `n` and `counts`.
"""
# First a bit about the number of ways counts could have come in:
# E.g. if counts = [1, 2], then this is 3 choose 2.
# In general, this is (sum counts)! / sum(counts!)
# The sum should be along the last dimension of counts. This is the
# "distribution" dimension. Here n a priori represents the sum of counts.
with ops.name_scope(name, values=[n, counts]):
n = ops.convert_to_tensor(n, name="n")
counts = ops.convert_to_tensor(counts, name="counts")
total_permutations = math_ops.lgamma(n + 1)
counts_factorial = math_ops.lgamma(counts + 1)
redundant_permutations = math_ops.reduce_sum(counts_factorial, axis=[-1])
return total_permutations - redundant_permutations
def matrix_diag_transform(matrix, transform=None, name=None):
"""Transform diagonal of [batch-]matrix, leave rest of matrix unchanged.
Create a trainable covariance defined by a Cholesky factor:
```python
# Transform network layer into 2 x 2 array.
matrix_values = tf.contrib.layers.fully_connected(activations, 4)
matrix = tf.reshape(matrix_values, (batch_size, 2, 2))
# Make the diagonal positive. If the upper triangle was zero, this would be a
# valid Cholesky factor.
chol = matrix_diag_transform(matrix, transform=tf.nn.softplus)
# OperatorPDCholesky ignores the upper triangle.
operator = OperatorPDCholesky(chol)
```
Example of heteroskedastic 2-D linear regression.
```python
# Get a trainable Cholesky factor.
matrix_values = tf.contrib.layers.fully_connected(activations, 4)
matrix = tf.reshape(matrix_values, (batch_size, 2, 2))
chol = matrix_diag_transform(matrix, transform=tf.nn.softplus)
# Get a trainable mean.
mu = tf.contrib.layers.fully_connected(activations, 2)
# This is a fully trainable multivariate normal!
dist = tf.contrib.distributions.MVNCholesky(mu, chol)
# Standard log loss. Minimizing this will "train" mu and chol, and then dist
# will be a distribution predicting labels as multivariate Gaussians.
loss = -1 * tf.reduce_mean(dist.log_prob(labels))
```
Args:
matrix: Rank `R` `Tensor`, `R >= 2`, where the last two dimensions are
equal.
transform: Element-wise function mapping `Tensors` to `Tensors`. To
be applied to the diagonal of `matrix`. If `None`, `matrix` is returned
unchanged. Defaults to `None`.
name: A name to give created ops.
Defaults to "matrix_diag_transform".
Returns:
A `Tensor` with same shape and `dtype` as `matrix`.
"""
with ops.name_scope(name, "matrix_diag_transform", [matrix]):
matrix = ops.convert_to_tensor(matrix, name="matrix")
if transform is None:
return matrix
# Replace the diag with transformed diag.
diag = array_ops.matrix_diag_part(matrix)
transformed_diag = transform(diag)
transformed_mat = array_ops.matrix_set_diag(matrix, transformed_diag)
return transformed_mat
def rotate_transpose(x, shift, name="rotate_transpose"):
"""Circularly moves dims left or right.
Effectively identical to:
```python
numpy.transpose(x, numpy.roll(numpy.arange(len(x.shape)), shift))
```
When `validate_args=False` additional graph-runtime checks are
performed. These checks entail moving data from to GPU to CPU.
Example:
```python
x = ... # Tensor of shape [1, 2, 3, 4].
rotate_transpose(x, -1) # result shape: [2, 3, 4, 1]
rotate_transpose(x, -2) # result shape: [3, 4, 1, 2]
rotate_transpose(x, 1) # result shape: [4, 1, 2, 3]
rotate_transpose(x, 2) # result shape: [3, 4, 1, 2]
rotate_transpose(x, 7) == rotate_transpose(x, 3)
rotate_transpose(x, -7) == rotate_transpose(x, -3)
```
Args:
x: `Tensor`.
shift: `Tensor`. Number of dimensions to transpose left (shift<0) or
transpose right (shift>0).
name: Python `str`. The name to give this op.
Returns:
rotated_x: Input `Tensor` with dimensions circularly rotated by shift.
Raises:
TypeError: if shift is not integer type.
"""
with ops.name_scope(name, values=[x, shift]):
x = ops.convert_to_tensor(x, name="x")
shift = ops.convert_to_tensor(shift, name="shift")
# We do not assign back to preserve constant-ness.
check_ops.assert_integer(shift)
shift_value_static = tensor_util.constant_value(shift)
ndims = x.get_shape().ndims
if ndims is not None and shift_value_static is not None:
if ndims < 2: return x
shift_value_static = np.sign(shift_value_static) * (
abs(shift_value_static) % ndims)
if shift_value_static == 0: return x
perm = np.roll(np.arange(ndims), shift_value_static)
return array_ops.transpose(x, perm=perm)
else:
# Consider if we always had a positive shift, and some specified
# direction.
# When shifting left we want the new array:
# last(x, n-shift) + first(x, shift)
# and if shifting right then we want:
# last(x, shift) + first(x, n-shift)
# Observe that last(a) == slice(a, n) and first(a) == slice(0, a).
# Also, we can encode direction and shift as one: direction * shift.
# Combining these facts, we have:
# a = cond(shift<0, -shift, n-shift)
# last(x, n-a) + first(x, a) == x[a:n] + x[0:a]
# Finally, we transform shift by modulo length so it can be specified
# independently from the array upon which it operates (like python).
ndims = array_ops.rank(x)
shift = array_ops.where(math_ops.less(shift, 0),
math_ops.mod(-shift, ndims),
ndims - math_ops.mod(shift, ndims))
first = math_ops.range(0, shift)
last = math_ops.range(shift, ndims)
perm = array_ops.concat([last, first], 0)
return array_ops.transpose(x, perm=perm)
def pick_vector(cond,
true_vector,
false_vector,
name="pick_vector"):
"""Picks possibly different length row `Tensor`s based on condition.
Value `Tensor`s should have exactly one dimension.
If `cond` is a python Boolean or `tf.constant` then either `true_vector` or
`false_vector` is immediately returned. I.e., no graph nodes are created and
no validation happens.
Args:
cond: `Tensor`. Must have `dtype=tf.bool` and be scalar.
true_vector: `Tensor` of one dimension. Returned when cond is `True`.
false_vector: `Tensor` of one dimension. Returned when cond is `False`.
name: Python `str`. The name to give this op.
Example:
```python
pick_vector(tf.less(0, 5), tf.range(10, 12), tf.range(15, 18))
# result is tensor: [10, 11].
pick_vector(tf.less(5, 0), tf.range(10, 12), tf.range(15, 18))
# result is tensor: [15, 16, 17].
```
Returns:
true_or_false_vector: `Tensor`.
Raises:
TypeError: if `cond.dtype != tf.bool`
TypeError: if `cond` is not a constant and
`true_vector.dtype != false_vector.dtype`
"""
with ops.name_scope(name, values=(cond, true_vector, false_vector)):
cond = ops.convert_to_tensor(cond, name="cond")
if cond.dtype != dtypes.bool:
raise TypeError("%s.dtype=%s which is not %s" %
(cond.name, cond.dtype, dtypes.bool))
cond_value_static = tensor_util.constant_value(cond)
if cond_value_static is not None:
return true_vector if cond_value_static else false_vector
true_vector = ops.convert_to_tensor(true_vector, name="true_vector")
false_vector = ops.convert_to_tensor(false_vector, name="false_vector")
if true_vector.dtype != false_vector.dtype:
raise TypeError(
"%s.dtype=%s does not match %s.dtype=%s"
% (true_vector.name, true_vector.dtype,
false_vector.name, false_vector.dtype))
n = array_ops.shape(true_vector)[0]
return array_ops.slice(
array_ops.concat([true_vector, false_vector], 0),
[array_ops.where(cond, 0, n)], [array_ops.where(cond, n, -1)])
def gen_new_seed(seed, salt):
"""Generate a new seed, from the given seed and salt."""
if seed is None:
return None
string = (str(seed) + salt).encode("utf-8")
return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF
def fill_lower_triangular(x, validate_args=False, name="fill_lower_triangular"):
"""Creates a (batch of) lower triangular matrix from a vector of inputs.
If `x.get_shape()` is `[b1, b2, ..., bK, d]` then the output shape is `[b1,
b2, ..., bK, n, n]` where `n` is such that `d = n(n+1)/2`, i.e.,
`n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))`.
Although the non-batch complexity is O(n**2), large constants and sub-optimal
vectorization means the complexity of this function is 5x slower than zeroing
out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`. This
function becomes competitive only when several matmul/cholesky/etc ops can be
ellided in constructing the input. Example: wiring a fully connected layer as
a covariance matrix; this function reduces the final layer by 2x and possibly
reduces the network arch complexity considerably. In most cases it is better
to simply build a full matrix and zero out the upper triangular elements,
e.g., `tril = tf.matrix_band_part(full, -1, 0)`, rather than directly
construct a lower triangular.
Example:
```python
fill_lower_triangular([1, 2, 3, 4, 5, 6])
# Returns: [[1, 0, 0],
# [2, 3, 0],
# [4, 5, 6]]
```
For comparison, a pure numpy version of this function can be found in
`distribution_util_test.py`, function `_fill_lower_triangular`.
Args:
x: `Tensor` representing lower triangular elements.
validate_args: Python `bool`, default `False`. Whether to ensure the shape
of `x` can be mapped to a lower triangular matrix (controls non-static
checks only).
name: Python `str`. The name to give this op.
Returns:
tril: `Tensor` with lower triangular elements filled from `x`.
Raises:
ValueError: if shape if `x` has static shape which cannot be mapped to a
lower triangular matrix.
"""
# TODO(jvdillon): Replace this code with dedicated op when it exists.
with ops.name_scope(name, values=[x]):
x = ops.convert_to_tensor(x, name="x")
if (x.get_shape().ndims is not None and
x.get_shape()[-1].value is not None):
d = x.get_shape()[-1].value
# d = n(n+1)/2 implies n is:
n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))
d_inferred = n * (n + 1) /2
if d != d_inferred:
raise ValueError("Input cannot be mapped to a lower triangular; "
"n*(n+1)/2 = %d != %d" % (d_inferred, d))
final_shape = x.get_shape()[:-1].concatenate(
tensor_shape.TensorShape([n, n]))
else:
d = math_ops.cast(array_ops.shape(x)[-1], dtype=dtypes.float32)
# d = n(n+1)/2 implies n is:
n = math_ops.cast(0.5 * (dtypes.sqrt(1. + 8. * d) - 1.),
dtype=dtypes.int32)
if validate_args:
is_valid_input_shape = check_ops.assert_equal(
n * (n + 1) / 2, d,
message="Input cannot be mapped to a lower triangular.")
n = control_flow_ops.with_dependencies([is_valid_input_shape], n)
final_shape = x.get_shape()[:-1].concatenate(
tensor_shape.TensorShape([None, None]))
def tril_ids(n):
"""Internal helper to create vector of linear indices into y."""
# Build the ids statically; chose 512 because it implies 1MiB.
if not tensor_util.is_tensor(n) and n <= 512:
ids = np.arange(n**2, dtype=np.int32)
rows = (ids / n).astype(np.int32) # Implicit floor.
# We need to stop incrementing the index when we encounter
# upper-triangular elements. The idea here is to compute the
# lower-right number of zeros then by "symmetry" subtract this from the
# total number of zeros, n(n-1)/2.
# Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2
offset = (rows * (2 * n - rows - 1) / 2).astype(np.int32)
# We could also zero out when (rows < cols) == (rows < ids-n*rows).
# mask = (ids <= (n + 1) * rows).astype(np.int32)
else:
ids = math_ops.range(n**2)
rows = math_ops.cast(ids / n, dtype=dtypes.int32)
offset = math_ops.cast(rows * (2 * n - rows - 1) / 2,
dtype=dtypes.int32)
return ids - offset
# Special-case non-batch case.
if x.get_shape().ndims == 1:
y = array_ops.gather(x, array_ops.reshape(tril_ids(n), [n, n]))
y = array_ops.matrix_band_part(y, -1, 0)
y.set_shape(y.get_shape().merge_with(final_shape))
return y
# Make ids for each batch dim.
if (x.get_shape().ndims is not None and
x.get_shape()[:-1].is_fully_defined()):
batch_shape = np.asarray(x.get_shape()[:-1].as_list(), dtype=np.int32)
m = np.prod(batch_shape).astype(np.int32)
else:
batch_shape = array_ops.shape(x)[:-1]
m = array_ops.reduce_prod(array_ops.shape(x)[:-1])
batch_ids = math_ops.range(m)
# Assemble the tril_ids into batch,tril_id pairs.
idx = array_ops.stack([
array_ops.tile(array_ops.expand_dims(batch_ids, 1), [1, n * n]),
array_ops.tile(array_ops.expand_dims(tril_ids(n), 0), [m, 1])
])
idx = array_ops.transpose(idx, [1, 2, 0])
# Gather up, reshape, and return.
y = array_ops.reshape(x, [-1, d])
y = array_ops.gather_nd(y, idx)
y = array_ops.reshape(y, array_ops.concat([batch_shape, [n, n]], 0))
y = array_ops.matrix_band_part(y, -1, 0)
y.set_shape(y.get_shape().merge_with(final_shape))
return y
# TODO(jvdillon): Merge this test back into:
# tensorflow/python/ops/softplus_op_test.py
# once TF core is accepting new ops.
def softplus_inverse(x, name=None):
"""Computes the inverse softplus, i.e., x = softplus_inverse(softplus(x)).
Mathematically this op is equivalent to:
```none
softplus_inverse = log(exp(x) - 1.)
```
Args:
x: `Tensor`. Non-negative (not enforced), floating-point.
name: A name for the operation (optional).
Returns:
`Tensor`. Has the same type/shape as input `x`.
"""
with ops.name_scope(name, "softplus_inverse", values=[x]):
x = ops.convert_to_tensor(x, name="x")
# We begin by deriving a more numerically stable softplus_inverse:
# x = softplus(y) = Log[1 + exp{y}], (which means x > 0).
# ==> exp{x} = 1 + exp{y} (1)
# ==> y = Log[exp{x} - 1] (2)
# = Log[(exp{x} - 1) / exp{x}] + Log[exp{x}]
# = Log[(1 - exp{-x}) / 1] + Log[exp{x}]
# = Log[1 - exp{-x}] + x (3)
# (2) is the "obvious" inverse, but (3) is more stable than (2) for large x.
# For small x (e.g. x = 1e-10), (3) will become -inf since 1 - exp{-x} will
# be zero. To fix this, we use 1 - exp{-x} approx x for small x > 0.
#
# In addition to the numerically stable derivation above, we clamp
# small/large values to be congruent with the logic in:
# tensorflow/core/kernels/softplus_op.h
#
# Finally, we set the input to one whenever the input is too large or too
# small. This ensures that no unchosen codepath is +/- inf. This is
# necessary to ensure the gradient doesn't get NaNs. Recall that the
# gradient of `where` behaves like `pred*pred_true + (1-pred)*pred_false`
# thus an `inf` in an unselected path results in `0*inf=nan`. We are careful
# to overwrite `x` with ones only when we will never actually use this
# value. Note that we use ones and not zeros since `log(expm1(0.)) = -inf`.
threshold = np.log(np.finfo(x.dtype.as_numpy_dtype).eps) + 2.
is_too_small = math_ops.less(x, np.exp(threshold))
is_too_large = math_ops.greater(x, -threshold)
too_small_value = math_ops.log(x)
too_large_value = x
# This `where` will ultimately be a NOP because we won't select this
# codepath whenever we used the surrogate `ones_like`.
x = array_ops.where(math_ops.logical_or(is_too_small, is_too_large),
array_ops.ones_like(x), x)
y = x + math_ops.log(-math_ops.expm1(-x)) # == log(expm1(x))
return array_ops.where(is_too_small, too_small_value,
array_ops.where(is_too_large, too_large_value, y))
# TODO(b/35290280): Add unit-tests.
def dimension_size(x, axis):
"""Returns the size of a specific dimension."""
# Since tf.gather isn't "constant-in, constant-out", we must first check the
# static shape or fallback to dynamic shape.
num_rows = (None if x.get_shape().ndims is None
else x.get_shape()[axis].value)
if num_rows is not None:
return num_rows
return array_ops.shape(x)[axis]
from tensorflow.python.ops.distributions import util
from tensorflow.python.ops.distributions.util import * # pylint: disable=wildcard-import
# TODO(b/35290280): Add unit-tests.
@ -678,7 +73,7 @@ def make_diag_scale(loc, scale_diag, scale_identity_multiplier,
raise ValueError(
"Cannot infer `event_shape` unless `loc` is specified.")
num_rows = dimension_size(loc, -1)
num_rows = util.dimension_size(loc, -1)
if scale_identity_multiplier is None:
return linalg.LinearOperatorIdentity(
@ -695,64 +90,3 @@ def make_diag_scale(loc, scale_diag, scale_identity_multiplier,
is_self_adjoint=True,
is_positive_definite=assert_positive,
assert_proper_shapes=validate_args)
class AppendDocstring(object):
"""Helper class to promote private subclass docstring to public counterpart.
Example:
```python
class TransformedDistribution(Distribution):
@distribution_util.AppendDocstring(
additional_note="A special note!",
kwargs_dict={"foo": "An extra arg."})
def _prob(self, y, foo=None):
pass
```
In this case, the `AppendDocstring` decorator appends the `additional_note` to
the docstring of `prob` (not `_prob`) and adds a new `kwargs`
section with each dictionary item as a bullet-point.
For a more detailed example, see `TransformedDistribution`.
"""
def __init__(self, additional_note="", kwargs_dict=None):
"""Initializes the AppendDocstring object.
Args:
additional_note: Python string added as additional docstring to public
version of function.
kwargs_dict: Python string/string dictionary representing
specific kwargs expanded from the **kwargs input.
Raises:
ValueError: if kwargs_dict.key contains whitespace.
ValueError: if kwargs_dict.value contains newlines.
"""
self._additional_note = additional_note
if kwargs_dict:
bullets = []
for key in sorted(kwargs_dict.keys()):
value = kwargs_dict[key]
if any(x.isspace() for x in key):
raise ValueError(
"Parameter name \"%s\" contains whitespace." % key)
value = value.lstrip()
if "\n" in value:
raise ValueError(
"Parameter description for \"%s\" contains newlines." % key)
bullets.append("* `%s`: %s" % (key, value))
self._additional_note += ("\n\n##### `kwargs`:\n\n" +
"\n".join(bullets))
def __call__(self, fn):
@functools.wraps(fn)
def _fn(*args, **kwargs):
return fn(*args, **kwargs)
if _fn.__doc__ is None:
_fn.__doc__ = self._additional_note
else:
_fn.__doc__ += "\n%s" % self._additional_note
return _fn

View File

@ -20,8 +20,6 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import kullback_leibler
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
@ -33,6 +31,8 @@ from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -19,8 +19,6 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -31,6 +29,8 @@ from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
class Geometric(distribution.Distribution):

View File

@ -20,7 +20,6 @@ from __future__ import print_function
import math
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -29,6 +28,7 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
class _Gumbel(distribution.Distribution):

View File

@ -20,8 +20,6 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -32,6 +30,8 @@ from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -22,7 +22,6 @@ import math
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import special_math
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
@ -33,6 +32,7 @@ from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
__all__ = [

View File

@ -21,7 +21,6 @@ from __future__ import print_function
import math
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -31,6 +30,7 @@ from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
class Logistic(distribution.Distribution):

View File

@ -21,8 +21,6 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops import categorical
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
@ -31,6 +29,8 @@ from tensorflow.python.ops import check_ops
from tensorflow.python.ops import data_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
class Mixture(distribution.Distribution):

View File

@ -18,8 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
@ -27,6 +25,8 @@ from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -20,7 +20,6 @@ from __future__ import print_function
from tensorflow.contrib import linalg
from tensorflow.contrib.distributions.python.ops import bijectors
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import kullback_leibler
from tensorflow.contrib.distributions.python.ops import normal
from tensorflow.contrib.distributions.python.ops import transformed_distribution
@ -30,6 +29,7 @@ from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import linalg_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -19,13 +19,13 @@ from __future__ import division
from __future__ import print_function
from tensorflow.contrib import linalg
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import mvn_linear_operator as mvn_linop
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -18,8 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
@ -27,6 +25,8 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
class NegativeBinomial(distribution.Distribution):

View File

@ -20,7 +20,6 @@ from __future__ import print_function
import math
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import kullback_leibler
from tensorflow.contrib.distributions.python.ops import special_math
from tensorflow.python.framework import constant_op
@ -32,6 +31,7 @@ from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
__all__ = [

View File

@ -18,8 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import kullback_leibler
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -29,6 +27,8 @@ from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
class OneHotCategorical(distribution.Distribution):

View File

@ -18,10 +18,10 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import operator_pd_cholesky
from tensorflow.python.framework import ops
from tensorflow.python.ops import linalg_ops
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -18,8 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -28,6 +26,8 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [
"Poisson",

View File

@ -20,13 +20,13 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution as distributions
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import distribution as distributions
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = ["QuantizedDistribution"]

View File

@ -18,7 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import logistic
from tensorflow.contrib.distributions.python.ops import transformed_distribution
# Bijectors must be directly imported because `remove_undocumented` prevents
@ -28,6 +27,7 @@ from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops.distributions import util as distribution_util
class RelaxedBernoulli(transformed_distribution.TransformedDistribution):

View File

@ -20,8 +20,6 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops import bijectors
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import transformed_distribution
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -31,6 +29,8 @@ from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
class ExpRelaxedOneHotCategorical(distribution.Distribution):

View File

@ -19,7 +19,6 @@ from __future__ import print_function
import contextlib
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_util
@ -27,6 +26,7 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import util as distribution_util
class _DistributionShape(object):

View File

@ -20,8 +20,6 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -33,6 +31,8 @@ from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import special_math_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -19,8 +19,6 @@ from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution as distribution_lib
from tensorflow.contrib.distributions.python.ops import distribution_util
# Bijectors must be directly imported because `remove_undocumented` prevents
# individual file imports.
from tensorflow.contrib.distributions.python.ops.bijectors.identity import Identity
@ -33,6 +31,8 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import distribution as distribution_lib
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [
"TransformedDistribution",

View File

@ -20,7 +20,6 @@ from __future__ import print_function
import math
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -29,6 +28,7 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
class Uniform(distribution.Distribution):

View File

@ -19,7 +19,6 @@ from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import bijectors
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import student_t
from tensorflow.contrib.distributions.python.ops import transformed_distribution
from tensorflow.python.framework import constant_op
@ -27,6 +26,7 @@ from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.ops import array_ops
from tensorflow.python.ops.distributions import util as distribution_util
# TODO(jvdillon): Add unittests for this once we know where will put this code

View File

@ -21,8 +21,6 @@ from __future__ import print_function
import math
import numpy as np
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import operator_pd_cholesky
from tensorflow.contrib.distributions.python.ops import operator_pd_full
from tensorflow.python.framework import constant_op
@ -35,6 +33,8 @@ from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import linalg_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops.distributions import distribution
from tensorflow.python.ops.distributions import util as distribution_util
__all__ = [

View File

@ -81,6 +81,7 @@ py_library(
"//third_party/py/numpy",
"//tensorflow/python/estimator:estimator_py",
"//tensorflow/python/ops/losses",
"//tensorflow/python/ops/distributions",
"//tensorflow/python/saved_model",
] + if_not_windows([
"//tensorflow/contrib:contrib_py",

View File

@ -0,0 +1,42 @@
# Tests of TensorFlow kernels written using the Python API.
package(
default_visibility = ["//tensorflow:internal"],
features = [
"-layering_check",
"-parse_headers",
],
)
licenses(["notice"]) # Apache 2.0
load("//tensorflow:tensorflow.bzl", "cuda_py_test")
cuda_py_test(
name = "bijector_test",
size = "small",
srcs = ["bijector_test.py"],
additional_deps = [
"//tensorflow/python/ops/distributions",
"//third_party/py/numpy",
"@six_archive//:six",
"//tensorflow/python:array_ops",
"//tensorflow/python:client_testlib",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:math_ops",
"//tensorflow/python:platform_test",
],
)
filegroup(
name = "all_files",
srcs = glob(
["**/*"],
exclude = [
"**/METADATA",
"**/OWNERS",
],
),
visibility = ["//tensorflow:__subpackages__"],
)

View File

@ -0,0 +1,18 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Kernel tests for tf.distributions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

View File

@ -22,9 +22,9 @@ import abc
import six
from tensorflow.contrib.distributions.python.ops.bijectors.bijector import Bijector
from tensorflow.python.framework import constant_op
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import bijector
from tensorflow.python.platform import test
@ -36,10 +36,10 @@ class BaseBijectorTest(test.TestCase):
with self.assertRaisesRegexp(TypeError,
("Can't instantiate abstract class Bijector "
"with abstract methods __init__")):
Bijector()
bijector.Bijector() # pylint: disable=abstract-class-instantiated
def testDefaults(self):
class _BareBonesBijector(Bijector):
class _BareBonesBijector(bijector.Bijector):
"""Minimal specification of a `Bijector`."""
def __init__(self):
@ -80,7 +80,7 @@ class IntentionallyMissingError(Exception):
pass
class BrokenBijector(Bijector):
class BrokenBijector(bijector.Bijector):
"""Forward and inverse are not inverses of each other."""
def __init__(self, forward_missing=False, inverse_missing=False):

View File

@ -0,0 +1,41 @@
package(
default_visibility = [
"//tensorflow:internal",
],
features = [
"-layering_check",
"-parse_headers",
],
)
load("//tensorflow:tensorflow.bzl", "py_test")
licenses(["notice"]) # Apache 2.0
py_library(
name = "distributions",
srcs = glob(["*.py"]),
srcs_version = "PY2AND3",
deps = [
"//tensorflow/python:array_ops",
"//tensorflow/python:control_flow_ops",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:math_ops",
"//tensorflow/python:nn",
"//tensorflow/python:nn_ops",
"//tensorflow/python:platform",
"//tensorflow/python:util",
],
)
filegroup(
name = "all_files",
srcs = glob(
["**/*"],
exclude = [
"**/METADATA",
"**/OWNERS",
],
),
visibility = ["//tensorflow:__subpackages__"],
)

View File

@ -0,0 +1,18 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Core module for TensorFlow distribution objects and helpers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

View File

@ -20,7 +20,7 @@ from __future__ import print_function
# go/tf-wildcard-import
# pylint: disable=wildcard-import
from tensorflow.contrib.distributions.python.ops.bijectors.bijector_impl import *
from tensorflow.python.ops.distributions.bijector_impl import *
# pylint: enable=wildcard-import
from tensorflow.python.util.all_util import remove_undocumented

View File

@ -18,8 +18,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import distribution
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.ops.distributions import distribution
class ConditionalDistribution(distribution.Distribution):

View File

@ -25,13 +25,13 @@ import types
import numpy as np
import six
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import util
from tensorflow.python.util import tf_inspect
@ -241,7 +241,7 @@ class Distribution(_BaseDistribution):
docstrings for their method specializations. For example:
```python
@distribution_util.AppendDocstring("Some other details.")
@util.AppendDocstring("Some other details.")
def _log_prob(self, value):
...
```
@ -1033,10 +1033,9 @@ class Distribution(_BaseDistribution):
if ndims is None:
# Maybe expand_dims.
ndims = array_ops.rank(x)
expanded_shape = distribution_util.pick_vector(
expanded_shape = util.pick_vector(
math_ops.equal(ndims, 0),
np.array([1], dtype=np.int32),
array_ops.shape(x))
np.array([1], dtype=np.int32), array_ops.shape(x))
x = array_ops.reshape(x, expanded_shape)
elif ndims == 0:
# Definitely expand_dims.

View File

@ -0,0 +1,693 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for probability distributions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import hashlib
import math
import numpy as np
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
def assert_close(
x, y, data=None, summarize=None, message=None, name="assert_close"):
"""Assert that that x and y are within machine epsilon of each other.
Args:
x: Floating-point `Tensor`
y: Floating-point `Tensor`
data: The tensors to print out if the condition is `False`. Defaults to
error message and first few entries of `x` and `y`.
summarize: Print this many entries of each tensor.
message: A string to prefix to the default message.
name: A name for this operation (optional).
Returns:
Op raising `InvalidArgumentError` if |x - y| > machine epsilon.
"""
message = message or ""
x = ops.convert_to_tensor(x, name="x")
y = ops.convert_to_tensor(y, name="y")
if data is None:
data = [
message,
"Condition x ~= y did not hold element-wise: x = ", x.name, x, "y = ",
y.name, y
]
if x.dtype.is_integer:
return check_ops.assert_equal(
x, y, data=data, summarize=summarize, message=message, name=name)
with ops.name_scope(name, "assert_close", [x, y, data]):
tol = np.finfo(x.dtype.as_numpy_dtype).eps
condition = math_ops.reduce_all(math_ops.less_equal(math_ops.abs(x-y), tol))
return control_flow_ops.Assert(
condition, data, summarize=summarize)
def assert_integer_form(
x, data=None, summarize=None, message=None, name="assert_integer_form"):
"""Assert that x has integer components (or floats equal to integers).
Args:
x: Floating-point `Tensor`
data: The tensors to print out if the condition is `False`. Defaults to
error message and first few entries of `x` and `y`.
summarize: Print this many entries of each tensor.
message: A string to prefix to the default message.
name: A name for this operation (optional).
Returns:
Op raising `InvalidArgumentError` if round(x) != x.
"""
message = message or "x has non-integer components"
x = ops.convert_to_tensor(x, name="x")
casted_x = math_ops.to_int64(x)
return check_ops.assert_equal(
x, math_ops.cast(math_ops.round(casted_x), x.dtype),
data=data, summarize=summarize, message=message, name=name)
def assert_symmetric(matrix):
matrix_t = array_ops.matrix_transpose(matrix)
return control_flow_ops.with_dependencies(
[check_ops.assert_equal(matrix, matrix_t)], matrix)
def embed_check_nonnegative_discrete(x, check_integer=True):
"""Assert x is a non-negative tensor, and optionally of integers."""
assertions = [check_ops.assert_non_negative(
x, message="x must be non-negative.")]
if check_integer:
assertions += [assert_integer_form(
x, message="x cannot contain fractional components.")]
return control_flow_ops.with_dependencies(assertions, x)
def same_dynamic_shape(a, b):
"""Returns whether a and b have the same dynamic shape.
Args:
a: `Tensor`
b: `Tensor`
Returns:
`bool` `Tensor` representing if both tensors have the same shape.
"""
a = ops.convert_to_tensor(a, name="a")
b = ops.convert_to_tensor(b, name="b")
# Here we can't just do math_ops.equal(a.shape, b.shape), since
# static shape inference may break the equality comparison between
# shape(a) and shape(b) in math_ops.equal.
def all_shapes_equal():
return math_ops.reduce_all(math_ops.equal(
array_ops.concat([array_ops.shape(a), array_ops.shape(b)], 0),
array_ops.concat([array_ops.shape(b), array_ops.shape(a)], 0)))
# One of the shapes isn't fully defined, so we need to use the dynamic
# shape.
return control_flow_ops.cond(
math_ops.equal(array_ops.rank(a), array_ops.rank(b)),
all_shapes_equal,
lambda: constant_op.constant(False))
def get_logits_and_probs(logits=None,
probs=None,
multidimensional=False,
validate_args=False,
name="get_logits_and_probs"):
"""Converts logit to probabilities (or vice-versa), and returns both.
Args:
logits: Floating-point `Tensor` representing log-odds.
probs: Floating-point `Tensor` representing probabilities.
multidimensional: Python `bool`, default `False`.
If `True`, represents whether the last dimension of `logits` or `probs`,
a `[N1, N2, ... k]` dimensional tensor, representing the
logit or probability of `shape[-1]` classes.
validate_args: Python `bool`, default `False`. When `True`, either assert
`0 <= probs <= 1` (if not `multidimensional`) or that the last dimension
of `probs` sums to one.
name: A name for this operation (optional).
Returns:
logits, probs: Tuple of `Tensor`s. If `probs` has an entry that is `0` or
`1`, then the corresponding entry in the returned logit will be `-Inf` and
`Inf` respectively.
Raises:
ValueError: if neither `probs` nor `logits` were passed in, or both were.
"""
with ops.name_scope(name, values=[probs, logits]):
if (probs is None) == (logits is None):
raise ValueError("Must pass probs or logits, but not both.")
if probs is None:
logits = ops.convert_to_tensor(logits, name="logits")
if multidimensional:
return logits, nn.softmax(logits, name="probs")
return logits, math_ops.sigmoid(logits, name="probs")
probs = ops.convert_to_tensor(probs, name="probs")
if validate_args:
with ops.name_scope("validate_probs"):
one = constant_op.constant(1., probs.dtype)
dependencies = [check_ops.assert_non_negative(probs)]
if multidimensional:
dependencies += [assert_close(math_ops.reduce_sum(probs, -1), one,
message="probs does not sum to 1.")]
else:
dependencies += [check_ops.assert_less_equal(
probs, one, message="probs has components greater than 1.")]
probs = control_flow_ops.with_dependencies(dependencies, probs)
with ops.name_scope("logits"):
if multidimensional:
# Here we don't compute the multidimensional case, in a manner
# consistent with respect to the unidimensional case. We do so
# following the TF convention. Typically, you might expect to see
# logits = log(probs) - log(probs[pivot]). A side-effect of
# being consistent with the TF approach is that the unidimensional case
# implicitly handles the second dimension but the multidimensional case
# explicitly keeps the pivot dimension.
return math_ops.log(probs), probs
return math_ops.log(probs) - math_ops.log1p(-1. * probs), probs
def log_combinations(n, counts, name="log_combinations"):
"""Multinomial coefficient.
Given `n` and `counts`, where `counts` has last dimension `k`, we compute
the multinomial coefficient as:
```n! / sum_i n_i!```
where `i` runs over all `k` classes.
Args:
n: Floating-point `Tensor` broadcastable with `counts`. This represents `n`
outcomes.
counts: Floating-point `Tensor` broadcastable with `n`. This represents
counts in `k` classes, where `k` is the last dimension of the tensor.
name: A name for this operation (optional).
Returns:
`Tensor` representing the multinomial coefficient between `n` and `counts`.
"""
# First a bit about the number of ways counts could have come in:
# E.g. if counts = [1, 2], then this is 3 choose 2.
# In general, this is (sum counts)! / sum(counts!)
# The sum should be along the last dimension of counts. This is the
# "distribution" dimension. Here n a priori represents the sum of counts.
with ops.name_scope(name, values=[n, counts]):
n = ops.convert_to_tensor(n, name="n")
counts = ops.convert_to_tensor(counts, name="counts")
total_permutations = math_ops.lgamma(n + 1)
counts_factorial = math_ops.lgamma(counts + 1)
redundant_permutations = math_ops.reduce_sum(counts_factorial, axis=[-1])
return total_permutations - redundant_permutations
def matrix_diag_transform(matrix, transform=None, name=None):
"""Transform diagonal of [batch-]matrix, leave rest of matrix unchanged.
Create a trainable covariance defined by a Cholesky factor:
```python
# Transform network layer into 2 x 2 array.
matrix_values = tf.contrib.layers.fully_connected(activations, 4)
matrix = tf.reshape(matrix_values, (batch_size, 2, 2))
# Make the diagonal positive. If the upper triangle was zero, this would be a
# valid Cholesky factor.
chol = matrix_diag_transform(matrix, transform=tf.nn.softplus)
# OperatorPDCholesky ignores the upper triangle.
operator = OperatorPDCholesky(chol)
```
Example of heteroskedastic 2-D linear regression.
```python
# Get a trainable Cholesky factor.
matrix_values = tf.contrib.layers.fully_connected(activations, 4)
matrix = tf.reshape(matrix_values, (batch_size, 2, 2))
chol = matrix_diag_transform(matrix, transform=tf.nn.softplus)
# Get a trainable mean.
mu = tf.contrib.layers.fully_connected(activations, 2)
# This is a fully trainable multivariate normal!
dist = tf.contrib.distributions.MVNCholesky(mu, chol)
# Standard log loss. Minimizing this will "train" mu and chol, and then dist
# will be a distribution predicting labels as multivariate Gaussians.
loss = -1 * tf.reduce_mean(dist.log_prob(labels))
```
Args:
matrix: Rank `R` `Tensor`, `R >= 2`, where the last two dimensions are
equal.
transform: Element-wise function mapping `Tensors` to `Tensors`. To
be applied to the diagonal of `matrix`. If `None`, `matrix` is returned
unchanged. Defaults to `None`.
name: A name to give created ops.
Defaults to "matrix_diag_transform".
Returns:
A `Tensor` with same shape and `dtype` as `matrix`.
"""
with ops.name_scope(name, "matrix_diag_transform", [matrix]):
matrix = ops.convert_to_tensor(matrix, name="matrix")
if transform is None:
return matrix
# Replace the diag with transformed diag.
diag = array_ops.matrix_diag_part(matrix)
transformed_diag = transform(diag)
transformed_mat = array_ops.matrix_set_diag(matrix, transformed_diag)
return transformed_mat
def rotate_transpose(x, shift, name="rotate_transpose"):
"""Circularly moves dims left or right.
Effectively identical to:
```python
numpy.transpose(x, numpy.roll(numpy.arange(len(x.shape)), shift))
```
When `validate_args=False` additional graph-runtime checks are
performed. These checks entail moving data from to GPU to CPU.
Example:
```python
x = ... # Tensor of shape [1, 2, 3, 4].
rotate_transpose(x, -1) # result shape: [2, 3, 4, 1]
rotate_transpose(x, -2) # result shape: [3, 4, 1, 2]
rotate_transpose(x, 1) # result shape: [4, 1, 2, 3]
rotate_transpose(x, 2) # result shape: [3, 4, 1, 2]
rotate_transpose(x, 7) == rotate_transpose(x, 3)
rotate_transpose(x, -7) == rotate_transpose(x, -3)
```
Args:
x: `Tensor`.
shift: `Tensor`. Number of dimensions to transpose left (shift<0) or
transpose right (shift>0).
name: Python `str`. The name to give this op.
Returns:
rotated_x: Input `Tensor` with dimensions circularly rotated by shift.
Raises:
TypeError: if shift is not integer type.
"""
with ops.name_scope(name, values=[x, shift]):
x = ops.convert_to_tensor(x, name="x")
shift = ops.convert_to_tensor(shift, name="shift")
# We do not assign back to preserve constant-ness.
check_ops.assert_integer(shift)
shift_value_static = tensor_util.constant_value(shift)
ndims = x.get_shape().ndims
if ndims is not None and shift_value_static is not None:
if ndims < 2: return x
shift_value_static = np.sign(shift_value_static) * (
abs(shift_value_static) % ndims)
if shift_value_static == 0: return x
perm = np.roll(np.arange(ndims), shift_value_static)
return array_ops.transpose(x, perm=perm)
else:
# Consider if we always had a positive shift, and some specified
# direction.
# When shifting left we want the new array:
# last(x, n-shift) + first(x, shift)
# and if shifting right then we want:
# last(x, shift) + first(x, n-shift)
# Observe that last(a) == slice(a, n) and first(a) == slice(0, a).
# Also, we can encode direction and shift as one: direction * shift.
# Combining these facts, we have:
# a = cond(shift<0, -shift, n-shift)
# last(x, n-a) + first(x, a) == x[a:n] + x[0:a]
# Finally, we transform shift by modulo length so it can be specified
# independently from the array upon which it operates (like python).
ndims = array_ops.rank(x)
shift = array_ops.where(math_ops.less(shift, 0),
math_ops.mod(-shift, ndims),
ndims - math_ops.mod(shift, ndims))
first = math_ops.range(0, shift)
last = math_ops.range(shift, ndims)
perm = array_ops.concat([last, first], 0)
return array_ops.transpose(x, perm=perm)
def pick_vector(cond,
true_vector,
false_vector,
name="pick_vector"):
"""Picks possibly different length row `Tensor`s based on condition.
Value `Tensor`s should have exactly one dimension.
If `cond` is a python Boolean or `tf.constant` then either `true_vector` or
`false_vector` is immediately returned. I.e., no graph nodes are created and
no validation happens.
Args:
cond: `Tensor`. Must have `dtype=tf.bool` and be scalar.
true_vector: `Tensor` of one dimension. Returned when cond is `True`.
false_vector: `Tensor` of one dimension. Returned when cond is `False`.
name: Python `str`. The name to give this op.
Example:
```python
pick_vector(tf.less(0, 5), tf.range(10, 12), tf.range(15, 18))
# result is tensor: [10, 11].
pick_vector(tf.less(5, 0), tf.range(10, 12), tf.range(15, 18))
# result is tensor: [15, 16, 17].
```
Returns:
true_or_false_vector: `Tensor`.
Raises:
TypeError: if `cond.dtype != tf.bool`
TypeError: if `cond` is not a constant and
`true_vector.dtype != false_vector.dtype`
"""
with ops.name_scope(name, values=(cond, true_vector, false_vector)):
cond = ops.convert_to_tensor(cond, name="cond")
if cond.dtype != dtypes.bool:
raise TypeError("%s.dtype=%s which is not %s" %
(cond.name, cond.dtype, dtypes.bool))
cond_value_static = tensor_util.constant_value(cond)
if cond_value_static is not None:
return true_vector if cond_value_static else false_vector
true_vector = ops.convert_to_tensor(true_vector, name="true_vector")
false_vector = ops.convert_to_tensor(false_vector, name="false_vector")
if true_vector.dtype != false_vector.dtype:
raise TypeError(
"%s.dtype=%s does not match %s.dtype=%s"
% (true_vector.name, true_vector.dtype,
false_vector.name, false_vector.dtype))
n = array_ops.shape(true_vector)[0]
return array_ops.slice(
array_ops.concat([true_vector, false_vector], 0),
[array_ops.where(cond, 0, n)], [array_ops.where(cond, n, -1)])
def gen_new_seed(seed, salt):
"""Generate a new seed, from the given seed and salt."""
if seed is None:
return None
string = (str(seed) + salt).encode("utf-8")
return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF
def fill_lower_triangular(x, validate_args=False, name="fill_lower_triangular"):
"""Creates a (batch of) lower triangular matrix from a vector of inputs.
If `x.get_shape()` is `[b1, b2, ..., bK, d]` then the output shape is `[b1,
b2, ..., bK, n, n]` where `n` is such that `d = n(n+1)/2`, i.e.,
`n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))`.
Although the non-batch complexity is O(n**2), large constants and sub-optimal
vectorization means the complexity of this function is 5x slower than zeroing
out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`. This
function becomes competitive only when several matmul/cholesky/etc ops can be
ellided in constructing the input. Example: wiring a fully connected layer as
a covariance matrix; this function reduces the final layer by 2x and possibly
reduces the network arch complexity considerably. In most cases it is better
to simply build a full matrix and zero out the upper triangular elements,
e.g., `tril = tf.matrix_band_part(full, -1, 0)`, rather than directly
construct a lower triangular.
Example:
```python
fill_lower_triangular([1, 2, 3, 4, 5, 6])
# Returns: [[1, 0, 0],
# [2, 3, 0],
# [4, 5, 6]]
```
For comparison, a pure numpy version of this function can be found in
`distribution_util_test.py`, function `_fill_lower_triangular`.
Args:
x: `Tensor` representing lower triangular elements.
validate_args: Python `bool`, default `False`. Whether to ensure the shape
of `x` can be mapped to a lower triangular matrix (controls non-static
checks only).
name: Python `str`. The name to give this op.
Returns:
tril: `Tensor` with lower triangular elements filled from `x`.
Raises:
ValueError: if shape if `x` has static shape which cannot be mapped to a
lower triangular matrix.
"""
# TODO(jvdillon): Replace this code with dedicated op when it exists.
with ops.name_scope(name, values=[x]):
x = ops.convert_to_tensor(x, name="x")
if (x.get_shape().ndims is not None and
x.get_shape()[-1].value is not None):
d = x.get_shape()[-1].value
# d = n(n+1)/2 implies n is:
n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))
d_inferred = n * (n + 1) /2
if d != d_inferred:
raise ValueError("Input cannot be mapped to a lower triangular; "
"n*(n+1)/2 = %d != %d" % (d_inferred, d))
final_shape = x.get_shape()[:-1].concatenate(
tensor_shape.TensorShape([n, n]))
else:
d = math_ops.cast(array_ops.shape(x)[-1], dtype=dtypes.float32)
# d = n(n+1)/2 implies n is:
n = math_ops.cast(0.5 * (dtypes.sqrt(1. + 8. * d) - 1.),
dtype=dtypes.int32)
if validate_args:
is_valid_input_shape = check_ops.assert_equal(
n * (n + 1) / 2, d,
message="Input cannot be mapped to a lower triangular.")
n = control_flow_ops.with_dependencies([is_valid_input_shape], n)
final_shape = x.get_shape()[:-1].concatenate(
tensor_shape.TensorShape([None, None]))
def tril_ids(n):
"""Internal helper to create vector of linear indices into y."""
# Build the ids statically; chose 512 because it implies 1MiB.
if not tensor_util.is_tensor(n) and n <= 512:
ids = np.arange(n**2, dtype=np.int32)
rows = (ids / n).astype(np.int32) # Implicit floor.
# We need to stop incrementing the index when we encounter
# upper-triangular elements. The idea here is to compute the
# lower-right number of zeros then by "symmetry" subtract this from the
# total number of zeros, n(n-1)/2.
# Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2
offset = (rows * (2 * n - rows - 1) / 2).astype(np.int32)
# We could also zero out when (rows < cols) == (rows < ids-n*rows).
# mask = (ids <= (n + 1) * rows).astype(np.int32)
else:
ids = math_ops.range(n**2)
rows = math_ops.cast(ids / n, dtype=dtypes.int32)
offset = math_ops.cast(rows * (2 * n - rows - 1) / 2,
dtype=dtypes.int32)
return ids - offset
# Special-case non-batch case.
if x.get_shape().ndims == 1:
y = array_ops.gather(x, array_ops.reshape(tril_ids(n), [n, n]))
y = array_ops.matrix_band_part(y, -1, 0)
y.set_shape(y.get_shape().merge_with(final_shape))
return y
# Make ids for each batch dim.
if (x.get_shape().ndims is not None and
x.get_shape()[:-1].is_fully_defined()):
batch_shape = np.asarray(x.get_shape()[:-1].as_list(), dtype=np.int32)
m = np.prod(batch_shape).astype(np.int32)
else:
batch_shape = array_ops.shape(x)[:-1]
m = array_ops.reduce_prod(array_ops.shape(x)[:-1])
batch_ids = math_ops.range(m)
# Assemble the tril_ids into batch,tril_id pairs.
idx = array_ops.stack([
array_ops.tile(array_ops.expand_dims(batch_ids, 1), [1, n * n]),
array_ops.tile(array_ops.expand_dims(tril_ids(n), 0), [m, 1])
])
idx = array_ops.transpose(idx, [1, 2, 0])
# Gather up, reshape, and return.
y = array_ops.reshape(x, [-1, d])
y = array_ops.gather_nd(y, idx)
y = array_ops.reshape(y, array_ops.concat([batch_shape, [n, n]], 0))
y = array_ops.matrix_band_part(y, -1, 0)
y.set_shape(y.get_shape().merge_with(final_shape))
return y
# TODO(jvdillon): Merge this test back into:
# tensorflow/python/ops/softplus_op_test.py
# once TF core is accepting new ops.
def softplus_inverse(x, name=None):
"""Computes the inverse softplus, i.e., x = softplus_inverse(softplus(x)).
Mathematically this op is equivalent to:
```none
softplus_inverse = log(exp(x) - 1.)
```
Args:
x: `Tensor`. Non-negative (not enforced), floating-point.
name: A name for the operation (optional).
Returns:
`Tensor`. Has the same type/shape as input `x`.
"""
with ops.name_scope(name, "softplus_inverse", values=[x]):
x = ops.convert_to_tensor(x, name="x")
# We begin by deriving a more numerically stable softplus_inverse:
# x = softplus(y) = Log[1 + exp{y}], (which means x > 0).
# ==> exp{x} = 1 + exp{y} (1)
# ==> y = Log[exp{x} - 1] (2)
# = Log[(exp{x} - 1) / exp{x}] + Log[exp{x}]
# = Log[(1 - exp{-x}) / 1] + Log[exp{x}]
# = Log[1 - exp{-x}] + x (3)
# (2) is the "obvious" inverse, but (3) is more stable than (2) for large x.
# For small x (e.g. x = 1e-10), (3) will become -inf since 1 - exp{-x} will
# be zero. To fix this, we use 1 - exp{-x} approx x for small x > 0.
#
# In addition to the numerically stable derivation above, we clamp
# small/large values to be congruent with the logic in:
# tensorflow/core/kernels/softplus_op.h
#
# Finally, we set the input to one whenever the input is too large or too
# small. This ensures that no unchosen codepath is +/- inf. This is
# necessary to ensure the gradient doesn't get NaNs. Recall that the
# gradient of `where` behaves like `pred*pred_true + (1-pred)*pred_false`
# thus an `inf` in an unselected path results in `0*inf=nan`. We are careful
# to overwrite `x` with ones only when we will never actually use this
# value. Note that we use ones and not zeros since `log(expm1(0.)) = -inf`.
threshold = np.log(np.finfo(x.dtype.as_numpy_dtype).eps) + 2.
is_too_small = math_ops.less(x, np.exp(threshold))
is_too_large = math_ops.greater(x, -threshold)
too_small_value = math_ops.log(x)
too_large_value = x
# This `where` will ultimately be a NOP because we won't select this
# codepath whenever we used the surrogate `ones_like`.
x = array_ops.where(math_ops.logical_or(is_too_small, is_too_large),
array_ops.ones_like(x), x)
y = x + math_ops.log(-math_ops.expm1(-x)) # == log(expm1(x))
return array_ops.where(is_too_small, too_small_value,
array_ops.where(is_too_large, too_large_value, y))
# TODO(b/35290280): Add unit-tests.
def dimension_size(x, axis):
"""Returns the size of a specific dimension."""
# Since tf.gather isn't "constant-in, constant-out", we must first check the
# static shape or fallback to dynamic shape.
num_rows = (None if x.get_shape().ndims is None
else x.get_shape()[axis].value)
if num_rows is not None:
return num_rows
return array_ops.shape(x)[axis]
class AppendDocstring(object):
"""Helper class to promote private subclass docstring to public counterpart.
Example:
```python
class TransformedDistribution(Distribution):
@distribution_util.AppendDocstring(
additional_note="A special note!",
kwargs_dict={"foo": "An extra arg."})
def _prob(self, y, foo=None):
pass
```
In this case, the `AppendDocstring` decorator appends the `additional_note` to
the docstring of `prob` (not `_prob`) and adds a new `kwargs`
section with each dictionary item as a bullet-point.
For a more detailed example, see `TransformedDistribution`.
"""
def __init__(self, additional_note="", kwargs_dict=None):
"""Initializes the AppendDocstring object.
Args:
additional_note: Python string added as additional docstring to public
version of function.
kwargs_dict: Python string/string dictionary representing
specific kwargs expanded from the **kwargs input.
Raises:
ValueError: if kwargs_dict.key contains whitespace.
ValueError: if kwargs_dict.value contains newlines.
"""
self._additional_note = additional_note
if kwargs_dict:
bullets = []
for key in sorted(kwargs_dict.keys()):
value = kwargs_dict[key]
if any(x.isspace() for x in key):
raise ValueError(
"Parameter name \"%s\" contains whitespace." % key)
value = value.lstrip()
if "\n" in value:
raise ValueError(
"Parameter description for \"%s\" contains newlines." % key)
bullets.append("* `%s`: %s" % (key, value))
self._additional_note += ("\n\n##### `kwargs`:\n\n" +
"\n".join(bullets))
def __call__(self, fn):
@functools.wraps(fn)
def _fn(*args, **kwargs):
return fn(*args, **kwargs)
if _fn.__doc__ is None:
_fn.__doc__ = self._additional_note
else:
_fn.__doc__ += "\n%s" % self._additional_note
return _fn