diff --git a/eigen.BUILD b/eigen.BUILD index a657493380b..16dd4f84228 100644 --- a/eigen.BUILD +++ b/eigen.BUILD @@ -1,6 +1,6 @@ package(default_visibility = ["//visibility:public"]) -archive_dir = "eigen-eigen-a5e9085a94e8" +archive_dir = "eigen-eigen-f3a13643ac1f" cc_library( name = "eigen", diff --git a/tensorflow/contrib/cmake/external/eigen.cmake b/tensorflow/contrib/cmake/external/eigen.cmake index 42fa7686632..c1929a10f32 100644 --- a/tensorflow/contrib/cmake/external/eigen.cmake +++ b/tensorflow/contrib/cmake/external/eigen.cmake @@ -7,7 +7,7 @@ include (ExternalProject) -set(eigen_archive_hash "a5e9085a94e8") +set(eigen_archive_hash "f3a13643ac1f") set(eigen_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR} @@ -16,7 +16,7 @@ set(eigen_INCLUDE_DIRS ${tensorflow_source_dir}/third_party/eigen3 ) set(eigen_URL https://bitbucket.org/eigen/eigen/get/${eigen_archive_hash}.tar.gz) -set(eigen_HASH SHA256=967126237829c7c87abb6cd0e13a5a235b0377d51575522c390b9486aed13e71) +set(eigen_HASH SHA256=a9266e60366cddb371a23d86b11a297eee86372a89ef4b38a3509012f9cc37ec) set(eigen_BUILD ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen) set(eigen_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/eigen/install) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 451a34320e0..44263dc8aed 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -55,9 +55,9 @@ cuda_py_tests( ) cuda_py_tests( - name = "gaussian_test", + name = "normal_test", size = "small", - srcs = ["python/kernel_tests/gaussian_test.py"], + srcs = ["python/kernel_tests/normal_test.py"], additional_deps = [ ":distributions_py", "//tensorflow/python:framework_test_lib", @@ -98,9 +98,9 @@ cuda_py_tests( ) cuda_py_tests( - name = "gaussian_conjugate_posteriors_test", + name = "normal_conjugate_posteriors_test", size = "small", - srcs = ["python/kernel_tests/gaussian_conjugate_posteriors_test.py"], + srcs = ["python/kernel_tests/normal_conjugate_posteriors_test.py"], additional_deps = [ ":distributions_py", "//tensorflow/python:platform_test", diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 2c8a0343b28..7fa8c0fb0c6 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -30,7 +30,7 @@ initialized with parameters that define the distributions. @@Chi2 @@Exponential @@Gamma -@@Gaussian +@@Normal @@StudentT @@Uniform @@ -44,10 +44,10 @@ initialized with parameters that define the distributions. Functions that transform conjugate prior/likelihood pairs to distributions representing the posterior or posterior predictive. -### Gaussian likelihood with conjugate prior. +### Normal likelihood with conjugate prior. -@@gaussian_conjugates_known_sigma_posterior -@@gaussian_congugates_known_sigma_predictive +@@normal_conjugates_known_sigma_posterior +@@normal_congugates_known_sigma_predictive """ from __future__ import absolute_import from __future__ import division @@ -60,8 +60,8 @@ from tensorflow.contrib.distributions.python.ops.dirichlet_multinomial import * from tensorflow.contrib.distributions.python.ops.distribution import * from tensorflow.contrib.distributions.python.ops.exponential import * from tensorflow.contrib.distributions.python.ops.gamma import * -from tensorflow.contrib.distributions.python.ops.gaussian import * -from tensorflow.contrib.distributions.python.ops.gaussian_conjugate_posteriors import * from tensorflow.contrib.distributions.python.ops.mvn import * +from tensorflow.contrib.distributions.python.ops.normal import * +from tensorflow.contrib.distributions.python.ops.normal_conjugate_posteriors import * from tensorflow.contrib.distributions.python.ops.student_t import * from tensorflow.contrib.distributions.python.ops.uniform import * diff --git a/tensorflow/contrib/distributions/python/kernel_tests/exponential_test.py b/tensorflow/contrib/distributions/python/kernel_tests/exponential_test.py index 5e3fed1ed80..6fd03e90bf6 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/exponential_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/exponential_test.py @@ -105,10 +105,9 @@ class ExponentialTest(tf.test.TestCase): exponential = tf.contrib.distributions.Exponential(lam=lam) - n_v = 100000 - n = tf.constant(n_v) + n = 100000 samples = exponential.sample(n, seed=138) - self.assertEqual(samples.get_shape(), (n_v, batch_size, 2)) + self.assertEqual(samples.get_shape(), (n, batch_size, 2)) sample_values = samples.eval() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/gaussian_conjugate_posteriors_test.py b/tensorflow/contrib/distributions/python/kernel_tests/normal_conjugate_posteriors_test.py similarity index 74% rename from tensorflow/contrib/distributions/python/kernel_tests/gaussian_conjugate_posteriors_test.py rename to tensorflow/contrib/distributions/python/kernel_tests/normal_conjugate_posteriors_test.py index c3a2464b5bd..1d03396bf68 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/gaussian_conjugate_posteriors_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/normal_conjugate_posteriors_test.py @@ -25,9 +25,9 @@ import tensorflow as tf distributions = tf.contrib.distributions -class GaussianTest(tf.test.TestCase): +class NormalTest(tf.test.TestCase): - def testGaussianConjugateKnownSigmaPosterior(self): + def testNormalConjugateKnownSigmaPosterior(self): with tf.Session(): mu0 = tf.constant([3.0]) sigma0 = tf.constant([math.sqrt(10.0)]) @@ -35,16 +35,16 @@ class GaussianTest(tf.test.TestCase): x = tf.constant([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]) s = tf.reduce_sum(x) n = tf.size(x) - prior = distributions.Gaussian(mu=mu0, sigma=sigma0) - posterior = distributions.gaussian_conjugates_known_sigma_posterior( + prior = distributions.Normal(mu=mu0, sigma=sigma0) + posterior = distributions.normal_conjugates_known_sigma_posterior( prior=prior, sigma=sigma, s=s, n=n) # Smoke test - self.assertTrue(isinstance(posterior, distributions.Gaussian)) + self.assertTrue(isinstance(posterior, distributions.Normal)) posterior_log_pdf = posterior.log_pdf(x).eval() self.assertEqual(posterior_log_pdf.shape, (6,)) - def testGaussianConjugateKnownSigmaPosteriorND(self): + def testNormalConjugateKnownSigmaPosteriorND(self): with tf.Session(): batch_size = 6 mu0 = tf.constant([[3.0, -3.0]] * batch_size) @@ -54,16 +54,16 @@ class GaussianTest(tf.test.TestCase): tf.constant([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=tf.float32)) s = tf.reduce_sum(x) n = tf.size(x) - prior = distributions.Gaussian(mu=mu0, sigma=sigma0) - posterior = distributions.gaussian_conjugates_known_sigma_posterior( + prior = distributions.Normal(mu=mu0, sigma=sigma0) + posterior = distributions.normal_conjugates_known_sigma_posterior( prior=prior, sigma=sigma, s=s, n=n) # Smoke test - self.assertTrue(isinstance(posterior, distributions.Gaussian)) + self.assertTrue(isinstance(posterior, distributions.Normal)) posterior_log_pdf = posterior.log_pdf(x).eval() self.assertEqual(posterior_log_pdf.shape, (6, 2)) - def testGaussianConjugateKnownSigmaNDPosteriorND(self): + def testNormalConjugateKnownSigmaNDPosteriorND(self): with tf.Session(): batch_size = 6 mu0 = tf.constant([[3.0, -3.0]] * batch_size) @@ -75,19 +75,19 @@ class GaussianTest(tf.test.TestCase): s = tf.reduce_sum(x, reduction_indices=[1]) x = tf.transpose(x) # Reshape to shape (6, 2) n = tf.constant([6] * 2) - prior = distributions.Gaussian(mu=mu0, sigma=sigma0) - posterior = distributions.gaussian_conjugates_known_sigma_posterior( + prior = distributions.Normal(mu=mu0, sigma=sigma0) + posterior = distributions.normal_conjugates_known_sigma_posterior( prior=prior, sigma=sigma, s=s, n=n) # Smoke test - self.assertTrue(isinstance(posterior, distributions.Gaussian)) + self.assertTrue(isinstance(posterior, distributions.Normal)) # Calculate log_pdf under the 2 models posterior_log_pdf = posterior.log_pdf(x) self.assertEqual(posterior_log_pdf.get_shape(), (6, 2)) self.assertEqual(posterior_log_pdf.eval().shape, (6, 2)) - def testGaussianConjugateKnownSigmaPredictive(self): + def testNormalConjugateKnownSigmaPredictive(self): with tf.Session(): batch_size = 6 mu0 = tf.constant([3.0] * batch_size) @@ -96,12 +96,12 @@ class GaussianTest(tf.test.TestCase): x = tf.constant([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]) s = tf.reduce_sum(x) n = tf.size(x) - prior = distributions.Gaussian(mu=mu0, sigma=sigma0) - predictive = distributions.gaussian_congugates_known_sigma_predictive( + prior = distributions.Normal(mu=mu0, sigma=sigma0) + predictive = distributions.normal_congugates_known_sigma_predictive( prior=prior, sigma=sigma, s=s, n=n) # Smoke test - self.assertTrue(isinstance(predictive, distributions.Gaussian)) + self.assertTrue(isinstance(predictive, distributions.Normal)) predictive_log_pdf = predictive.log_pdf(x).eval() self.assertEqual(predictive_log_pdf.shape, (6,)) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/gaussian_test.py b/tensorflow/contrib/distributions/python/kernel_tests/normal_test.py similarity index 79% rename from tensorflow/contrib/distributions/python/kernel_tests/gaussian_test.py rename to tensorflow/contrib/distributions/python/kernel_tests/normal_test.py index f0a82df901c..0e9f8a40cca 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/gaussian_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/normal_test.py @@ -24,9 +24,9 @@ import numpy as np import tensorflow as tf -class GaussianTest(tf.test.TestCase): +class NormalTest(tf.test.TestCase): - def testGaussianLogPDF(self): + def testNormalLogPDF(self): with tf.Session(): batch_size = 6 mu = tf.constant([3.0] * batch_size) @@ -34,18 +34,18 @@ class GaussianTest(tf.test.TestCase): mu_v = 3.0 sigma_v = np.sqrt(10.0) x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32) - gaussian = tf.contrib.distributions.Gaussian(mu=mu, sigma=sigma) + normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma) expected_log_pdf = np.log( 1 / np.sqrt(2 * np.pi) / sigma_v * np.exp(-1.0 / (2 * sigma_v**2) * (x - mu_v)**2)) - log_pdf = gaussian.log_pdf(x) + log_pdf = normal.log_pdf(x) self.assertAllClose(expected_log_pdf, log_pdf.eval()) - pdf = gaussian.pdf(x) + pdf = normal.pdf(x) self.assertAllClose(np.exp(expected_log_pdf), pdf.eval()) - def testGaussianLogPDFMultidimensional(self): + def testNormalLogPDFMultidimensional(self): with tf.Session(): batch_size = 6 mu = tf.constant([[3.0, -3.0]] * batch_size) @@ -53,22 +53,22 @@ class GaussianTest(tf.test.TestCase): mu_v = np.array([3.0, -3.0]) sigma_v = np.array([np.sqrt(10.0), np.sqrt(15.0)]) x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T - gaussian = tf.contrib.distributions.Gaussian(mu=mu, sigma=sigma) + normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma) expected_log_pdf = np.log( 1 / np.sqrt(2 * np.pi) / sigma_v * np.exp(-1.0 / (2 * sigma_v**2) * (x - mu_v)**2)) - log_pdf = gaussian.log_pdf(x) + log_pdf = normal.log_pdf(x) log_pdf_values = log_pdf.eval() self.assertEqual(log_pdf.get_shape(), (6, 2)) self.assertAllClose(expected_log_pdf, log_pdf_values) - pdf = gaussian.pdf(x) + pdf = normal.pdf(x) pdf_values = pdf.eval() self.assertEqual(pdf.get_shape(), (6, 2)) self.assertAllClose(np.exp(expected_log_pdf), pdf_values) - def testGaussianCDF(self): + def testNormalCDF(self): with tf.Session(): batch_size = 6 mu = tf.constant([3.0] * batch_size) @@ -77,40 +77,40 @@ class GaussianTest(tf.test.TestCase): sigma_v = np.sqrt(10.0) x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32) - gaussian = tf.contrib.distributions.Gaussian(mu=mu, sigma=sigma) + normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma) erf_fn = np.vectorize(math.erf) # From Wikipedia expected_cdf = 0.5 * (1.0 + erf_fn((x - mu_v)/(sigma_v*np.sqrt(2)))) - cdf = gaussian.cdf(x) + cdf = normal.cdf(x) self.assertAllClose(expected_cdf, cdf.eval()) - def testGaussianEntropy(self): + def testNormalEntropy(self): with tf.Session(): mu_v = np.array([1.0, 1.0, 1.0]) sigma_v = np.array([[1.0, 2.0, 3.0]]).T - gaussian = tf.contrib.distributions.Gaussian(mu=mu_v, sigma=sigma_v) + normal = tf.contrib.distributions.Normal(mu=mu_v, sigma=sigma_v) sigma_broadcast = mu_v * sigma_v expected_entropy = 0.5 * np.log(2*np.pi*np.exp(1)*sigma_broadcast**2) - self.assertAllClose(expected_entropy, gaussian.entropy().eval()) + self.assertAllClose(expected_entropy, normal.entropy().eval()) - def testGaussianSample(self): + def testNormalSample(self): with tf.Session(): mu = tf.constant(3.0) sigma = tf.constant(math.sqrt(10.0)) mu_v = 3.0 sigma_v = np.sqrt(10.0) n = tf.constant(100000) - gaussian = tf.contrib.distributions.Gaussian(mu=mu, sigma=sigma) - samples = gaussian.sample(n, seed=137) + normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma) + samples = normal.sample(n, seed=137) sample_values = samples.eval() self.assertEqual(sample_values.shape, (100000,)) self.assertAllClose(sample_values.mean(), mu_v, atol=1e-2) self.assertAllClose(sample_values.std(), sigma_v, atol=1e-1) - def testGaussianSampleMultiDimensional(self): + def testNormalSampleMultiDimensional(self): with tf.Session(): batch_size = 2 mu = tf.constant([[3.0, -3.0]] * batch_size) @@ -118,8 +118,8 @@ class GaussianTest(tf.test.TestCase): mu_v = [3.0, -3.0] sigma_v = [np.sqrt(10.0), np.sqrt(15.0)] n = tf.constant(100000) - gaussian = tf.contrib.distributions.Gaussian(mu=mu, sigma=sigma) - samples = gaussian.sample(n, seed=137) + normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma) + samples = normal.sample(n, seed=137) sample_values = samples.eval() self.assertEqual(samples.get_shape(), (100000, batch_size, 2)) self.assertAllClose(sample_values[:, 0, 0].mean(), mu_v[0], atol=1e-2) @@ -129,13 +129,13 @@ class GaussianTest(tf.test.TestCase): def testNegativeSigmaFails(self): with tf.Session(): - gaussian = tf.contrib.distributions.Gaussian( + normal = tf.contrib.distributions.Normal( mu=[1.], sigma=[-5.], name='G') with self.assertRaisesOpError( r'should contain only positive values'): - gaussian.mean.eval() + normal.mean.eval() if __name__ == '__main__': tf.test.main() diff --git a/tensorflow/contrib/distributions/python/ops/exponential.py b/tensorflow/contrib/distributions/python/ops/exponential.py index b80632fc496..4a93c210b91 100644 --- a/tensorflow/contrib/distributions/python/ops/exponential.py +++ b/tensorflow/contrib/distributions/python/ops/exponential.py @@ -70,6 +70,7 @@ class Exponential(gamma.Gamma): """ broadcast_shape = self._lam.get_shape() with ops.op_scope([self.lam, n], name, "ExponentialSample"): + n = ops.convert_to_tensor(n, name="n") shape = array_ops.concat( 0, [array_ops.pack([n]), array_ops.shape(self._lam)]) sampled = random_ops.random_uniform( diff --git a/tensorflow/contrib/distributions/python/ops/gaussian.py b/tensorflow/contrib/distributions/python/ops/normal.py similarity index 82% rename from tensorflow/contrib/distributions/python/ops/gaussian.py rename to tensorflow/contrib/distributions/python/ops/normal.py index 8e2049444af..dc08a0e1dec 100644 --- a/tensorflow/contrib/distributions/python/ops/gaussian.py +++ b/tensorflow/contrib/distributions/python/ops/normal.py @@ -38,8 +38,8 @@ def _assert_all_positive(x): ["Tensor %s should contain only positive values: " % x.name, x]) -class Gaussian(object): - """The scalar Gaussian distribution with mean and stddev parameters mu, sigma. +class Normal(object): + """The scalar Normal distribution with mean and stddev parameters mu, sigma. #### Mathematical details @@ -52,15 +52,15 @@ class Gaussian(object): Examples of initialization of one or a batch of distributions. ```python - # Define a single scalar Gaussian distribution. - dist = tf.contrib.distributions.Gaussian(mu=0, sigma=3) + # Define a single scalar Normal distribution. + dist = tf.contrib.distributions.Normal(mu=0, sigma=3) # Evaluate the cdf at 1, returning a scalar. dist.cdf(1) - # Define a batch of two scalar valued Gaussians. + # Define a batch of two scalar valued Normals. # The first has mean 1 and standard deviation 11, the second 2 and 22. - dist = tf.contrib.distributions.Gaussian(mu=[1, 2.], sigma=[11, 22.]) + dist = tf.contrib.distributions.Normal(mu=[1, 2.], sigma=[11, 22.]) # Evaluate the pdf of the first distribution on 0, and the second on 1.5, # returning a length two tensor. @@ -73,9 +73,9 @@ class Gaussian(object): Arguments are broadcast when possible. ```python - # Define a batch of two scalar valued Gaussians. + # Define a batch of two scalar valued Normals. # Both have mean 1, but different standard deviations. - dist = tf.contrib.distributions.Gaussian(mu=1, sigma=[11, 22.]) + dist = tf.contrib.distributions.Normal(mu=1, sigma=[11, 22.]) # Evaluate the pdf of both distributions on the same point, 3.0, # returning a length 2 tensor. @@ -85,7 +85,7 @@ class Gaussian(object): """ def __init__(self, mu, sigma, name=None): - """Construct Gaussian distributions with mean and stddev `mu` and `sigma`. + """Construct Normal distributions with mean and stddev `mu` and `sigma`. The parameters `mu` and `sigma` must be shaped in a way that supports broadcasting (e.g. `mu + sigma` is a valid operation). @@ -99,7 +99,7 @@ class Gaussian(object): Raises: TypeError: if mu and sigma are different dtypes. """ - with ops.op_scope([mu, sigma], name, "Gaussian"): + with ops.op_scope([mu, sigma], name, "Normal"): mu = ops.convert_to_tensor(mu) sigma = ops.convert_to_tensor(sigma) with ops.control_dependencies([_assert_all_positive(sigma)]): @@ -125,7 +125,7 @@ class Gaussian(object): return self._mu * array_ops.ones_like(self._sigma) def log_pdf(self, x, name=None): - """Log pdf of observations in `x` under these Gaussian distribution(s). + """Log pdf of observations in `x` under these Normal distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `mu` and `sigma`. @@ -134,7 +134,7 @@ class Gaussian(object): Returns: log_pdf: tensor of dtype `dtype`, the log-PDFs of `x`. """ - with ops.op_scope([self._mu, self._sigma, x], name, "GaussianLogPdf"): + with ops.op_scope([self._mu, self._sigma, x], name, "NormalLogPdf"): x = ops.convert_to_tensor(x) if x.dtype != self.dtype: raise TypeError("Input x dtype does not match dtype: %s vs. %s" @@ -144,7 +144,7 @@ class Gaussian(object): -0.5*math_ops.square((x - self._mu) / self._sigma)) def cdf(self, x, name=None): - """CDF of observations in `x` under these Gaussian distribution(s). + """CDF of observations in `x` under these Normal distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `mu` and `sigma`. @@ -153,7 +153,7 @@ class Gaussian(object): Returns: cdf: tensor of dtype `dtype`, the CDFs of `x`. """ - with ops.op_scope([self._mu, self._sigma, x], name, "GaussianCdf"): + with ops.op_scope([self._mu, self._sigma, x], name, "NormalCdf"): x = ops.convert_to_tensor(x) if x.dtype != self.dtype: raise TypeError("Input x dtype does not match dtype: %s vs. %s" @@ -162,7 +162,7 @@ class Gaussian(object): 1.0/(math.sqrt(2.0) * self._sigma)*(x - self._mu))) def log_cdf(self, x, name=None): - """Log CDF of observations `x` under these Gaussian distribution(s). + """Log CDF of observations `x` under these Normal distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `mu` and `sigma`. @@ -171,11 +171,11 @@ class Gaussian(object): Returns: log_cdf: tensor of dtype `dtype`, the log-CDFs of `x`. """ - with ops.op_scope([self._mu, self._sigma, x], name, "GaussianLogCdf"): + with ops.op_scope([self._mu, self._sigma, x], name, "NormalLogCdf"): return math_ops.log(self.cdf(x)) def pdf(self, x, name=None): - """The PDF of observations in `x` under these Gaussian distribution(s). + """The PDF of observations in `x` under these Normal distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `mu` and `sigma`. @@ -184,11 +184,11 @@ class Gaussian(object): Returns: pdf: tensor of dtype `dtype`, the pdf values of `x`. """ - with ops.op_scope([self._mu, self._sigma, x], name, "GaussianPdf"): + with ops.op_scope([self._mu, self._sigma, x], name, "NormalPdf"): return math_ops.exp(self.log_pdf(x)) def entropy(self, name=None): - """The entropy of Gaussian distribution(s). + """The entropy of Normal distribution(s). Args: name: The name to give this op. @@ -196,7 +196,7 @@ class Gaussian(object): Returns: entropy: tensor of dtype `dtype`, the entropy. """ - with ops.op_scope([self._mu, self._sigma], name, "GaussianEntropy"): + with ops.op_scope([self._mu, self._sigma], name, "NormalEntropy"): two_pi_e1 = constant_op.constant( 2 * math.pi * math.exp(1), dtype=self.dtype) # Use broadcasting rules to calculate the full broadcast sigma. @@ -204,7 +204,7 @@ class Gaussian(object): return 0.5 * math_ops.log(two_pi_e1 * math_ops.square(sigma)) def sample(self, n, seed=None, name=None): - """Sample `n` observations from the Gaussian Distributions. + """Sample `n` observations from the Normal Distributions. Args: n: `Scalar`, type int32, the number of observations to sample. @@ -215,7 +215,7 @@ class Gaussian(object): samples: `[n, ...]`, a `Tensor` of `n` samples for each of the distributions determined by broadcasting the hyperparameters. """ - with ops.op_scope([self._mu, self._sigma, n], name, "GaussianSample"): + with ops.op_scope([self._mu, self._sigma, n], name, "NormalSample"): broadcast_shape = (self._mu + self._sigma).get_shape() n = ops.convert_to_tensor(n) shape = array_ops.concat( diff --git a/tensorflow/contrib/distributions/python/ops/gaussian_conjugate_posteriors.py b/tensorflow/contrib/distributions/python/ops/normal_conjugate_posteriors.py similarity index 73% rename from tensorflow/contrib/distributions/python/ops/gaussian_conjugate_posteriors.py rename to tensorflow/contrib/distributions/python/ops/normal_conjugate_posteriors.py index c0089964152..45ddd3ada36 100644 --- a/tensorflow/contrib/distributions/python/ops/gaussian_conjugate_posteriors.py +++ b/tensorflow/contrib/distributions/python/ops/normal_conjugate_posteriors.py @@ -12,32 +12,32 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""The Gaussian distribution: conjugate posterior closed form calculations.""" +"""The Normal distribution: conjugate posterior closed form calculations.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.distributions.python.ops.gaussian import Gaussian # pylint: disable=line-too-long +from tensorflow.contrib.distributions.python.ops.normal import Normal # pylint: disable=line-too-long from tensorflow.python.ops import math_ops -def gaussian_conjugates_known_sigma_posterior(prior, sigma, s, n): - """Posterior Gaussian distribution with conjugate prior on the mean. +def normal_conjugates_known_sigma_posterior(prior, sigma, s, n): + """Posterior Normal distribution with conjugate prior on the mean. This model assumes that `n` observations (with sum `s`) come from a - Gaussian with unknown mean `mu` (described by the Gaussian `prior`) + Normal with unknown mean `mu` (described by the Normal `prior`) and known variance `sigma^2`. The "known sigma posterior" is the distribution of the unknown `mu`. - Accepts a prior Gaussian distribution object, having parameters + Accepts a prior Normal distribution object, having parameters `mu0` and `sigma0`, as well as known `sigma` values of the predictive - distribution(s) (also assumed Gaussian), + distribution(s) (also assumed Normal), and statistical estimates `s` (the sum(s) of the observations) and `n` (the number(s) of observations). - Returns a posterior (also Gaussian) distribution object, with parameters + Returns a posterior (also Normal) distribution object, with parameters `(mu', sigma'^2)`, where: ``` @@ -50,7 +50,7 @@ def gaussian_conjugates_known_sigma_posterior(prior, sigma, s, n): will broadcast in the case of multidimensional sets of parameters. Args: - prior: `Gaussian` object of type `dtype`: + prior: `Normal` object of type `dtype`: the prior distribution having parameters `(mu0, sigma0)`. sigma: tensor of type `dtype`, taking values `sigma > 0`. The known stddev parameter(s). @@ -58,15 +58,15 @@ def gaussian_conjugates_known_sigma_posterior(prior, sigma, s, n): n: Tensor of type `int`. The number(s) of observations. Returns: - A new Gaussian posterior distribution object for the unknown observation + A new Normal posterior distribution object for the unknown observation mean `mu`. Raises: TypeError: if dtype of `s` does not match `dtype`, or `prior` is not a - Gaussian object. + Normal object. """ - if not isinstance(prior, Gaussian): - raise TypeError("Expected prior to be an instance of type Gaussian") + if not isinstance(prior, Normal): + raise TypeError("Expected prior to be an instance of type Normal") if s.dtype != prior.dtype: raise TypeError( @@ -77,27 +77,27 @@ def gaussian_conjugates_known_sigma_posterior(prior, sigma, s, n): sigma0_2 = math_ops.square(prior.sigma) sigma_2 = math_ops.square(sigma) sigmap_2 = 1.0/(1/sigma0_2 + n/sigma_2) - return Gaussian( + return Normal( mu=(prior.mu/sigma0_2 + s/sigma_2) * sigmap_2, sigma=math_ops.sqrt(sigmap_2)) -def gaussian_congugates_known_sigma_predictive(prior, sigma, s, n): - """Posterior predictive Gaussian distribution w. conjugate prior on the mean. +def normal_congugates_known_sigma_predictive(prior, sigma, s, n): + """Posterior predictive Normal distribution w. conjugate prior on the mean. This model assumes that `n` observations (with sum `s`) come from a - Gaussian with unknown mean `mu` (described by the Gaussian `prior`) + Normal with unknown mean `mu` (described by the Normal `prior`) and known variance `sigma^2`. The "known sigma predictive" is the distribution of new observations, conditioned on the existing observations and our prior. - Accepts a prior Gaussian distribution object, having parameters + Accepts a prior Normal distribution object, having parameters `mu0` and `sigma0`, as well as known `sigma` values of the predictive - distribution(s) (also assumed Gaussian), + distribution(s) (also assumed Normal), and statistical estimates `s` (the sum(s) of the observations) and `n` (the number(s) of observations). - Calculates the Gaussian distribution(s) `p(x | sigma^2)`: + Calculates the Normal distribution(s) `p(x | sigma^2)`: ``` p(x | sigma^2) = int N(x | mu, sigma^2) N(mu | prior.mu, prior.sigma^2) dmu @@ -117,7 +117,7 @@ def gaussian_congugates_known_sigma_predictive(prior, sigma, s, n): will broadcast in the case of multidimensional sets of parameters. Args: - prior: `Gaussian` object of type `dtype`: + prior: `Normal` object of type `dtype`: the prior distribution having parameters `(mu0, sigma0)`. sigma: tensor of type `dtype`, taking values `sigma > 0`. The known stddev parameter(s). @@ -125,14 +125,14 @@ def gaussian_congugates_known_sigma_predictive(prior, sigma, s, n): n: Tensor of type `int`. The number(s) of observations. Returns: - A new Gaussian predictive distribution object. + A new Normal predictive distribution object. Raises: TypeError: if dtype of `s` does not match `dtype`, or `prior` is not a - Gaussian object. + Normal object. """ - if not isinstance(prior, Gaussian): - raise TypeError("Expected prior to be an instance of type Gaussian") + if not isinstance(prior, Normal): + raise TypeError("Expected prior to be an instance of type Normal") if s.dtype != prior.dtype: raise TypeError( @@ -143,6 +143,6 @@ def gaussian_congugates_known_sigma_predictive(prior, sigma, s, n): sigma0_2 = math_ops.square(prior.sigma) sigma_2 = math_ops.square(sigma) sigmap_2 = 1.0/(1/sigma0_2 + n/sigma_2) - return Gaussian( + return Normal( mu=(prior.mu/sigma0_2 + s/sigma_2) * sigmap_2, sigma=math_ops.sqrt(sigmap_2 + sigma_2)) diff --git a/tensorflow/contrib/ffmpeg/BUILD b/tensorflow/contrib/ffmpeg/BUILD index 75d58ccf23b..268d7bea369 100644 --- a/tensorflow/contrib/ffmpeg/BUILD +++ b/tensorflow/contrib/ffmpeg/BUILD @@ -17,6 +17,8 @@ filegroup( srcs = glob(["testdata/*"]), ) +exports_files(["ffmpeg_lib.h"]) + cc_library( name = "decode_audio_op_cc", srcs = ["decode_audio_op.cc"], diff --git a/tensorflow/contrib/ffmpeg/decode_audio_op.cc b/tensorflow/contrib/ffmpeg/decode_audio_op.cc index b38b9957a84..a2ecc7f287e 100644 --- a/tensorflow/contrib/ffmpeg/decode_audio_op.cc +++ b/tensorflow/contrib/ffmpeg/decode_audio_op.cc @@ -18,7 +18,7 @@ #include #include -#include "tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h" +#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/lib/io/path.h" diff --git a/tensorflow/contrib/ffmpeg/default/BUILD b/tensorflow/contrib/ffmpeg/default/BUILD index f8566df6730..e1b7bb61924 100644 --- a/tensorflow/contrib/ffmpeg/default/BUILD +++ b/tensorflow/contrib/ffmpeg/default/BUILD @@ -11,7 +11,10 @@ package(default_visibility = ["//tensorflow:__subpackages__"]) cc_library( name = "ffmpeg_lib", srcs = ["ffmpeg_lib.cc"], - hdrs = ["ffmpeg_lib.h"], + hdrs = [ + # Header is shared between implementations. + "//tensorflow/contrib/ffmpeg:ffmpeg_lib.h", + ], deps = [ "//google/protobuf", "//tensorflow/core:framework_headers_lib", diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc index 629072ed7e1..8a7b6840f67 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc @@ -13,7 +13,7 @@ // limitations under the License. // ============================================================================= -#include "tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h" +#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" #include #include @@ -212,9 +212,9 @@ Status ReadAudioFile(const string& filename, } } -Status CreateAudioFile(const string& audio_format_id, int32 samples_per_second, - int32 channel_count, const std::vector& samples, - string* output_data) { +Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second, + int32 samples_per_second, int32 channel_count, + const std::vector& samples, string* output_data) { if (audio_format_id != "wav") { return Status(error::Code::INVALID_ARGUMENT, "CreateAudioFile only supports the 'wav' audio format."); diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc index 9001341e641..ec0b19f961a 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc @@ -13,7 +13,7 @@ // limitations under the License. // ============================================================================= -#include "tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h" +#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" #include #include @@ -91,7 +91,7 @@ TEST(FfmpegLibTest, TestRoundTripGeneratedWav) { sine_wave.push_back(std::sin(6.28 * 440.0 * i / 20000.0)); } string content; - ASSERT_TRUE(CreateAudioFile("wav", 20000, 1, sine_wave, &content).ok()); + ASSERT_TRUE(CreateAudioFile("wav", 0, 20000, 1, sine_wave, &content).ok()); string temp_filename = GetTempFilename("wav"); ASSERT_TRUE(WriteStringToFile(Env::Default(), temp_filename, content).ok()); std::vector roundtrip_data; @@ -122,7 +122,7 @@ TEST(FfmpegLibTest, TestRoundTripWav) { string written_audio; ASSERT_TRUE( - CreateAudioFile("wav", 10000, 1, output_samples, &written_audio).ok()); + CreateAudioFile("wav", 0, 10000, 1, output_samples, &written_audio).ok()); EXPECT_EQ(original_audio, written_audio); } diff --git a/tensorflow/contrib/ffmpeg/encode_audio_op.cc b/tensorflow/contrib/ffmpeg/encode_audio_op.cc index 0997c0458db..46fcbc75d74 100644 --- a/tensorflow/contrib/ffmpeg/encode_audio_op.cc +++ b/tensorflow/contrib/ffmpeg/encode_audio_op.cc @@ -15,7 +15,7 @@ #include -#include "tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h" +#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -35,6 +35,8 @@ class EncodeAudioOp : public OpKernel { context, context->GetAttr("samples_per_second", &samples_per_second_)); OP_REQUIRES(context, samples_per_second_ > 0, errors::InvalidArgument("samples_per_second must be > 0.")); + OP_REQUIRES_OK( + context, context->GetAttr("bits_per_second", &bits_per_second_)); } void Compute(OpKernelContext* context) override { @@ -61,9 +63,9 @@ class EncodeAudioOp : public OpKernel { } const int32 channel_count = contents.dim_size(1); string encoded_audio; - OP_REQUIRES_OK(context, - CreateAudioFile(file_format_, samples_per_second_, - channel_count, samples, &encoded_audio)); + OP_REQUIRES_OK(context, CreateAudioFile(file_format_, bits_per_second_, + samples_per_second_, channel_count, + samples, &encoded_audio)); // Copy the encoded audio file to the output tensor. Tensor* output = nullptr; @@ -75,6 +77,7 @@ class EncodeAudioOp : public OpKernel { private: string file_format_; int32 samples_per_second_; + int32 bits_per_second_; }; REGISTER_KERNEL_BUILDER(Name("EncodeAudio").Device(DEVICE_CPU), EncodeAudioOp); @@ -84,6 +87,7 @@ REGISTER_OP("EncodeAudio") .Output("contents: string") .Attr("file_format: string") .Attr("samples_per_second: int") + .Attr("bits_per_second: int = 192000") .Doc(R"doc( Processes a `Tensor` containing sampled audio with the number of channels and length of the audio specified by the dimensions of the `Tensor`. The @@ -100,6 +104,8 @@ sampled_audio: A rank 2 tensor containing all tracks of the audio. Dimension 0 contents: The binary audio file contents. file_format: A string describing the audio file format. This must be "wav". samples_per_second: The number of samples per second that the audio should have. +bits_per_second: The approximate bitrate of the encoded audio file. This is + ignored by the "wav" file format. )doc"); } // namespace ffmpeg diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h similarity index 83% rename from tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h rename to tensorflow/contrib/ffmpeg/ffmpeg_lib.h index d7b8f957de5..46b42c14334 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h +++ b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h @@ -13,10 +13,11 @@ // limitations under the License. // ============================================================================= -#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_FFMPEG_DEFAULT_FFMPEG_LIB_H_ -#define THIRD_PARTY_TENSORFLOW_CONTRIB_FFMPEG_DEFAULT_FFMPEG_LIB_H_ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_FFMPEG_FFMPEG_LIB_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_FFMPEG_FFMPEG_LIB_H_ #include +#include #include "tensorflow/core/lib/core/status.h" @@ -40,9 +41,9 @@ Status ReadAudioFile(const string& filename, // contain a separate sample for each channel. Frames are ordered by time. // Currently, the implementation only supports wav files, and ffmpeg is not used // to create them. -Status CreateAudioFile(const string& audio_format_id, int32 samples_per_second, - int32 channel_count, const std::vector& samples, - string* output_data); +Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second, + int32 samples_per_second, int32 channel_count, + const std::vector& samples, string* output_data); } // namespace ffmpeg } // namespace tensorflow diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 261103a746f..de447847f21 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -39,6 +39,7 @@ from tensorflow.python.training import moving_averages # TODO(b/28426988): Remove legacy_* when all uses have migrated to new API. __all__ = ['bias_add', 'batch_norm', + 'conv2d', 'convolution2d', 'fully_connected', 'linear', @@ -113,7 +114,7 @@ def batch_norm(inputs, scale=False, epsilon=0.001, activation_fn=None, - updates_collection=None, + updates_collections=ops.GraphKeys.UPDATE_OPS, is_training=True, reuse=None, variables_collections=None, @@ -138,8 +139,9 @@ def batch_norm(inputs, disabled since the scaling can be done by the next layer. epsilon: small float added to variance to avoid dividing by zero. activation_fn: Optional activation function. - updates_collection: collection to collect the update ops for computation. If - None a control dependency would be added to make sure they are computed. + updates_collections: collections to collect the update ops for computation. + If None, a control dependency would be added to make sure the updates are + computed. is_training: whether or not the layer is in training mode. In training mode it would accumulate the statistics of the moments into `moving_mean` and `moving_variance` using an exponential moving average with the given @@ -207,7 +209,7 @@ def batch_norm(inputs, moving_mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay) - if updates_collection is None: + if updates_collections is None: # Make sure the updates are computed here. with ops.control_dependencies([update_moving_mean, update_moving_variance]): @@ -215,8 +217,8 @@ def batch_norm(inputs, inputs, mean, variance, beta, gamma, epsilon) else: # Collect the updates to be computed later. - ops.add_to_collection(updates_collection, update_moving_mean) - ops.add_to_collection(updates_collection, update_moving_variance) + ops.add_to_collections(updates_collections, update_moving_mean) + ops.add_to_collections(updates_collections, update_moving_variance) outputs = nn.batch_normalization( inputs, mean, variance, beta, gamma, epsilon) else: @@ -504,22 +506,6 @@ def legacy_fully_connected(x, Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ - # pylint: enable=anomalous-backslash-in-string -# TODO(ptucker) redirect to fully_connected -# _ = trainable -# variables_collections = {'weights': weight_collections, -# 'biases': bias_collections} -# outputs = fully_connected(inputs=x, -# num_outputs=num_output_units, -# activation_fn=activation_fn, -# weights_initializer=weight_init, -# weights_regularizer=weight_regularizer, -# biases_initializer=bias_init, -# biases_regularizer=bias_regularizer, -# variables_collections=variables_collections, -# scope=name) -# ops.add_to_collections(output_collections, outputs) -# return outputs with variable_scope.variable_op_scope([x], name, 'fully_connected'): dims = x.get_shape().dims if dims is None: @@ -645,24 +631,6 @@ def legacy_convolution2d(x, Raises: ValueError: If `kernel_size` or `stride` are not length 2. """ -# TODO(ptucker) redirect to convolution2d -# _ = trainable -# variables_collections = {'weights': weight_collections, -# 'biases': bias_collections} -# outputs = convolution2d(inputs=x, -# num_outputs=num_output_channels, -# kernel_size=kernel_size, -# stride=stride, -# padding=padding, -# activation_fn=activation_fn, -# weights_initializer=weight_init, -# weights_regularizer=weight_regularizer, -# biases_initializer=bias_init, -# biases_regularizer=bias_regularizer, -# variables_collections=variables_collections, -# scope=name) -# ops.add_to_collections(output_collections, outputs) -# return outputs with variable_scope.variable_op_scope([x], name, 'convolution2d'): num_input_channels = x.get_shape().dims[3].value @@ -714,3 +682,6 @@ linear = legacy_linear relu = legacy_relu relu6 = legacy_relu6 +# Simple alias for convolution2d. +conv2d = convolution2d + diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index de073e573eb..0c3be3c98f7 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -430,8 +430,8 @@ class BatchNormTest(tf.test.TestCase): height, width = 3, 3 with self.test_session(): images = tf.random_uniform((5, height, width, 3), seed=1) - tf.contrib.layers.batch_norm(images, updates_collection='update_ops') - update_layers = tf.get_collection('update_ops') + tf.contrib.layers.batch_norm(images, updates_collections='my_update_ops') + update_layers = tf.get_collection('my_update_ops') update_moving_mean = update_layers[0] update_moving_variance = update_layers[1] self.assertEquals(update_moving_mean.op.name, @@ -460,7 +460,7 @@ class BatchNormTest(tf.test.TestCase): with self.test_session(): images = tf.random_uniform((5, height, width, 3), seed=1) with tf.contrib.framework.arg_scope([tf.contrib.layers.batch_norm], - updates_collection='update_ops'): + updates_collections='update_ops'): tf.contrib.layers.batch_norm(images, scope='bn') self.assertEquals(len(tf.get_collection('update_ops')), 2) tf.contrib.layers.batch_norm(images, scope='bn', reuse=True) @@ -479,7 +479,7 @@ class BatchNormTest(tf.test.TestCase): self.assertEquals(len(moving_variance), 1) self.assertEquals(moving_variance[0].op.name, 'BatchNorm/moving_variance') - def testUpdateMovingVars(self): + def testForceUpdateMovingVars(self): height, width = 3, 3 with self.test_session() as sess: image_shape = (10, height, width, 3) @@ -487,7 +487,8 @@ class BatchNormTest(tf.test.TestCase): expected_mean = np.mean(image_values, axis=(0, 1, 2)) expected_var = np.var(image_values, axis=(0, 1, 2)) images = tf.constant(image_values, shape=image_shape, dtype=tf.float32) - output = tf.contrib.layers.batch_norm(images, decay=0.1) + output = tf.contrib.layers.batch_norm(images, decay=0.1, + updates_collections=None) # Initialize all variables sess.run(tf.initialize_all_variables()) moving_mean = tf.contrib.framework.get_variables( @@ -515,9 +516,8 @@ class BatchNormTest(tf.test.TestCase): expected_mean = np.mean(image_values, axis=(0, 1, 2)) expected_var = np.var(image_values, axis=(0, 1, 2)) images = tf.constant(image_values, shape=image_shape, dtype=tf.float32) - output = tf.contrib.layers.batch_norm(images, decay=0.1, - updates_collection='update_ops') - update_ops = tf.get_collection('update_ops') + output = tf.contrib.layers.batch_norm(images, decay=0.1) + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): barrier = tf.no_op(name='barrier') output = control_flow_ops.with_dependencies([barrier], output) @@ -550,10 +550,9 @@ class BatchNormTest(tf.test.TestCase): images = tf.constant(image_values, shape=image_shape, dtype=tf.float32) output = tf.contrib.layers.batch_norm(images, decay=0.1, - is_training=False, - updates_collection='update_ops') - update_layers = tf.get_collection('update_ops') - self.assertEquals(update_layers, []) + is_training=False) + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + self.assertEquals(update_ops, []) # Initialize all variables sess.run(tf.initialize_all_variables()) moving_mean = tf.contrib.framework.get_variables( @@ -587,10 +586,9 @@ class BatchNormTest(tf.test.TestCase): images = tf.constant(image_values, shape=image_shape, dtype=tf.float32) output = tf.contrib.layers.batch_norm(images, decay=0.1, - is_training=False, - updates_collection='update_ops') - update_layers = tf.get_collection('update_ops') - self.assertEquals(update_layers, []) + is_training=False) + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + self.assertEquals(update_ops, []) # Initialize all variables sess.run(tf.initialize_all_variables()) moving_mean = tf.contrib.framework.get_variables( diff --git a/tensorflow/contrib/learn/python/learn/__init__.py b/tensorflow/contrib/learn/python/learn/__init__.py index 8de7797e6b7..1d72243f992 100644 --- a/tensorflow/contrib/learn/python/learn/__init__.py +++ b/tensorflow/contrib/learn/python/learn/__init__.py @@ -1,5 +1,4 @@ -"""Main Scikit Flow module.""" -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""High level API for learning with TensorFlow.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/tensorflow/contrib/learn/python/learn/datasets/base.py b/tensorflow/contrib/learn/python/learn/datasets/base.py index 7f78b2dced9..9c29b9eeb11 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/base.py +++ b/tensorflow/contrib/learn/python/learn/datasets/base.py @@ -1,5 +1,4 @@ -"""Base utilities for loading datasets.""" -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Base utilities for loading datasets.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/tensorflow/contrib/learn/python/learn/estimators/__init__.py b/tensorflow/contrib/learn/python/learn/estimators/__init__.py index e714c15f2e0..1b0d0aef6f5 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/__init__.py +++ b/tensorflow/contrib/learn/python/learn/estimators/__init__.py @@ -1,5 +1,4 @@ -"""Scikit Flow Estimators.""" -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,12 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Estimators.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib.learn.python.learn.estimators.autoencoder import TensorFlowDNNAutoencoder -from tensorflow.contrib.learn.python.learn.estimators.base import TensorFlowEstimator, TensorFlowBaseTransformer +from tensorflow.contrib.learn.python.learn.estimators.base import TensorFlowBaseTransformer +from tensorflow.contrib.learn.python.learn.estimators.base import TensorFlowEstimator from tensorflow.contrib.learn.python.learn.estimators.dnn import DNNClassifier from tensorflow.contrib.learn.python.learn.estimators.dnn import DNNRegressor from tensorflow.contrib.learn.python.learn.estimators.dnn import TensorFlowDNNClassifier diff --git a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py index dcd1d81056b..5032ea966d4 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py @@ -1,5 +1,4 @@ -"""sklearn cross-support.""" -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""sklearn cross-support.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -20,6 +22,8 @@ import collections import os import numpy as np +import six + def _pprint(d): return ', '.join(['%s=%s' % (key, str(value)) for key, value in d.items()]) @@ -102,6 +106,7 @@ class _BaseEstimator(object): _pprint(self.get_params(deep=False)),) +# pylint: disable=old-style-class class _ClassifierMixin(): """Mixin class for all classifiers.""" pass @@ -111,8 +116,10 @@ class _RegressorMixin(): """Mixin class for all regression estimators.""" pass + class _TransformerMixin(): - """Mixin class for all transformer estimators.""" + """Mixin class for all transformer estimators.""" + class _NotFittedError(ValueError, AttributeError): """Exception class to raise if estimator is used before fitting. @@ -134,6 +141,8 @@ class _NotFittedError(ValueError, AttributeError): https://github.com/scikit-learn/scikit-learn/master/sklearn/exceptions.py """ +# pylint: enable=old-style-class + def _accuracy_score(y_true, y_pred): score = y_true == y_pred @@ -149,8 +158,7 @@ def _mean_squared_error(y_true, y_pred): def _train_test_split(*args, **options): - n_array = len(args) - + # pylint: disable=missing-docstring test_size = options.pop('test_size', None) train_size = options.pop('train_size', None) random_state = options.pop('random_state', None) @@ -159,7 +167,7 @@ def _train_test_split(*args, **options): train_size = 0.75 elif train_size is None: train_size = 1 - test_size - train_size = train_size * args[0].shape[0] + train_size *= args[0].shape[0] np.random.seed(random_state) indices = np.random.permutation(args[0].shape[0]) @@ -173,6 +181,7 @@ def _train_test_split(*args, **options): # If "TENSORFLOW_SKLEARN" flag is defined then try to import from sklearn. TRY_IMPORT_SKLEARN = os.environ.get('TENSORFLOW_SKLEARN', False) if TRY_IMPORT_SKLEARN: + # pylint: disable=g-import-not-at-top,g-multiple-import,unused-import from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin, TransformerMixin from sklearn.metrics import accuracy_score, log_loss, mean_squared_error from sklearn.cross_validation import train_test_split diff --git a/tensorflow/contrib/learn/python/learn/estimators/autoencoder.py b/tensorflow/contrib/learn/python/learn/estimators/autoencoder.py index 690bac8f196..a3f41697680 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/autoencoder.py +++ b/tensorflow/contrib/learn/python/learn/estimators/autoencoder.py @@ -1,5 +1,4 @@ -"""Deep Autoencoder estimators.""" -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,105 +11,115 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Deep Autoencoder estimators.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.ops import nn -from tensorflow.contrib.learn.python.learn.estimators.base import TensorFlowBaseTransformer +import numpy as np + from tensorflow.contrib.learn.python.learn import models +from tensorflow.contrib.learn.python.learn.estimators.base import TensorFlowBaseTransformer +from tensorflow.python.ops import nn class TensorFlowDNNAutoencoder(TensorFlowBaseTransformer): - """TensorFlow Autoencoder Regressor model. + """TensorFlow Autoencoder Regressor model. - Parameters: - hidden_units: List of hidden units per layer. - batch_size: Mini batch size. - activation: activation function used to map inner latent layer onto - reconstruction layer. - add_noise: a function that adds noise to tensor_in, - e.g. def add_noise(x): - return(x + np.random.normal(0, 0.1, (len(x), len(x[0])))) - steps: Number of steps to run over data. - optimizer: Optimizer name (or class), for example "SGD", "Adam", - "Adagrad". - learning_rate: If this is constant float value, no decay function is used. - Instead, a customized decay function can be passed that accepts - global_step as parameter and returns a Tensor. - e.g. exponential decay function: - def exp_decay(global_step): - return tf.train.exponential_decay( - learning_rate=0.1, global_step, - decay_steps=2, decay_rate=0.001) - continue_training: when continue_training is True, once initialized - model will be continuely trained on every call of fit. - config: RunConfig object that controls the configurations of the session, - e.g. num_cores, gpu_memory_fraction, etc. - verbose: Controls the verbosity, possible values: - 0: the algorithm and debug information is muted. - 1: trainer prints the progress. - 2: log device placement is printed. - dropout: When not None, the probability we will drop out a given - coordinate. - """ - def __init__(self, hidden_units, n_classes=0, batch_size=32, - steps=200, optimizer="Adagrad", learning_rate=0.1, - clip_gradients=5.0, activation=nn.relu, add_noise=None, - continue_training=False, config=None, - verbose=1, dropout=None): - self.hidden_units = hidden_units - self.dropout = dropout - self.activation = activation - self.add_noise = add_noise - super(TensorFlowDNNAutoencoder, self).__init__( - model_fn=self._model_fn, - n_classes=n_classes, - batch_size=batch_size, steps=steps, optimizer=optimizer, - learning_rate=learning_rate, clip_gradients=clip_gradients, - continue_training=continue_training, - config=config, verbose=verbose) + Parameters: + hidden_units: List of hidden units per layer. + batch_size: Mini batch size. + activation: activation function used to map inner latent layer onto + reconstruction layer. + add_noise: a function that adds noise to tensor_in, + e.g. def add_noise(x): + return(x + np.random.normal(0, 0.1, (len(x), len(x[0])))) + steps: Number of steps to run over data. + optimizer: Optimizer name (or class), for example "SGD", "Adam", + "Adagrad". + learning_rate: If this is constant float value, no decay function is used. + Instead, a customized decay function can be passed that accepts + global_step as parameter and returns a Tensor. + e.g. exponential decay function: + def exp_decay(global_step): + return tf.train.exponential_decay( + learning_rate=0.1, global_step, + decay_steps=2, decay_rate=0.001) + continue_training: when continue_training is True, once initialized + model will be continuely trained on every call of fit. + config: RunConfig object that controls the configurations of the session, + e.g. num_cores, gpu_memory_fraction, etc. + verbose: Controls the verbosity, possible values: + 0: the algorithm and debug information is muted. + 1: trainer prints the progress. + 2: log device placement is printed. + dropout: When not None, the probability we will drop out a given + coordinate. + """ - def _model_fn(self, X, y): - encoder, decoder, autoencoder_estimator = models.get_autoencoder_model( - self.hidden_units, - models.linear_regression, - activation=self.activation, - add_noise=self.add_noise, - dropout=self.dropout)(X) - self.encoder = encoder - self.decoder = decoder - return autoencoder_estimator + def __init__(self, hidden_units, n_classes=0, batch_size=32, + steps=200, optimizer="Adagrad", learning_rate=0.1, + clip_gradients=5.0, activation=nn.relu, add_noise=None, + continue_training=False, config=None, + verbose=1, dropout=None): + self.hidden_units = hidden_units + self.dropout = dropout + self.activation = activation + self.add_noise = add_noise + super(TensorFlowDNNAutoencoder, self).__init__( + model_fn=self._model_fn, + n_classes=n_classes, + batch_size=batch_size, steps=steps, optimizer=optimizer, + learning_rate=learning_rate, clip_gradients=clip_gradients, + continue_training=continue_training, + config=config, verbose=verbose) - def generate(self, hidden=None): - """Generate new data using trained construction layer""" - if hidden is None: - last_layer = len(self.hidden_units) - 1 - bias = self.get_tensor_value('encoder/dnn/layer%d/Linear/Bias:0' % last_layer) - import numpy as np - hidden = np.random.normal(size=bias.shape) - hidden = np.reshape(hidden, (1, len(hidden))) - return self._session.run(self.decoder, feed_dict={self.encoder: hidden}) + def _model_fn(self, X, y): + encoder, decoder, autoencoder_estimator = models.get_autoencoder_model( + self.hidden_units, + models.linear_regression, + activation=self.activation, + add_noise=self.add_noise, + dropout=self.dropout)(X) + self.encoder = encoder + self.decoder = decoder + return autoencoder_estimator - @property - def weights_(self): - """Returns weights of the autoencoder's weight layers.""" - weights = [] - for layer in range(len(self.hidden_units)): - weights.append(self.get_tensor_value('encoder/dnn/layer%d/Linear/Matrix:0' % layer)) - for layer in range(len(self.hidden_units)): - weights.append(self.get_tensor_value('decoder/dnn/layer%d/Linear/Matrix:0' % layer)) - weights.append(self.get_tensor_value('linear_regression/weights:0')) - return weights + def generate(self, hidden=None): + """Generate new data using trained construction layer.""" + if hidden is None: + last_layer = len(self.hidden_units) - 1 + bias = self.get_tensor_value( + "encoder/dnn/layer%d/Linear/Bias:0" % last_layer) + hidden = np.random.normal(size=bias.shape) + hidden = np.reshape(hidden, (1, len(hidden))) + return self._session.run(self.decoder, feed_dict={self.encoder: hidden}) - @property - def bias_(self): - """Returns bias of the autoencoder's bias layers.""" - biases = [] - for layer in range(len(self.hidden_units)): - biases.append(self.get_tensor_value('encoder/dnn/layer%d/Linear/Bias:0' % layer)) - for layer in range(len(self.hidden_units)): - biases.append(self.get_tensor_value('decoder/dnn/layer%d/Linear/Bias:0' % layer)) - biases.append(self.get_tensor_value('linear_regression/bias:0')) - return biases + @property + def weights_(self): + """Returns weights of the autoencoder's weight layers.""" + weights = [] + for layer in range(len(self.hidden_units)): + weights.append(self.get_tensor_value( + "encoder/dnn/layer%d/Linear/Matrix:0" % layer)) + for layer in range(len(self.hidden_units)): + weights.append(self.get_tensor_value( + "decoder/dnn/layer%d/Linear/Matrix:0" % layer)) + weights.append(self.get_tensor_value("linear_regression/weights:0")) + return weights + + @property + def bias_(self): + """Returns bias of the autoencoder's bias layers.""" + biases = [] + for layer in range(len(self.hidden_units)): + biases.append(self.get_tensor_value( + "encoder/dnn/layer%d/Linear/Bias:0" % layer)) + for layer in range(len(self.hidden_units)): + biases.append(self.get_tensor_value( + "decoder/dnn/layer%d/Linear/Bias:0" % layer)) + biases.append(self.get_tensor_value("linear_regression/bias:0")) + return biases diff --git a/tensorflow/contrib/learn/python/learn/estimators/base.py b/tensorflow/contrib/learn/python/learn/estimators/base.py index 39131f059b0..ab00ae76f78 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/base.py +++ b/tensorflow/contrib/learn/python/learn/estimators/base.py @@ -1,5 +1,4 @@ -"""Base estimator class.""" -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,18 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Base estimator class.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function -import datetime import json import os -import shutil from six import string_types -import numpy as np - from google.protobuf import text_format from tensorflow.python.platform import gfile diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py index 017667699bc..5447d9ec052 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py @@ -1,5 +1,4 @@ -"""Deep Neural Network estimators.""" -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Deep Neural Network estimators.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index 0fce7d140f1..1f476e13937 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -563,9 +563,13 @@ class Estimator(BaseEstimator): input_fn=input_fn, batch_size=batch_size) if self._classification: - for key in predictions: - cur_axis = (len(predictions[key].shape) - 1) if axis is None else axis - predictions[key] = np.argmax(predictions[key], axis=cur_axis) + if isinstance(predictions, dict): + for key in predictions: + cur_axis = (len(predictions[key].shape) - 1) if axis is None else axis + predictions[key] = np.argmax(predictions[key], axis=cur_axis) + else: + cur_axis = (len(predictions.shape) - 1) if axis is None else axis + predictions = np.argmax(predictions, axis=cur_axis) return predictions def predict_proba(self, x=None, input_fn=None, batch_size=None): diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py index 40a455c6bf1..b45cf8af168 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py @@ -36,6 +36,17 @@ def boston_input_fn(): return features, target +def iris_input_fn(): + iris = tf.contrib.learn.datasets.load_iris() + features = tf.cast( + tf.reshape( + tf.constant(iris.data), [-1, 4]), tf.float32) + target = tf.cast( + tf.reshape( + tf.constant(iris.target), [-1, 1]), tf.int32) + return features, target + + def boston_eval_fn(): boston = tf.contrib.learn.datasets.load_boston() n_examples = len(boston.target) @@ -52,6 +63,10 @@ def linear_model_fn(features, target, unused_mode): return tf.contrib.learn.models.linear_regression_zero_init(features, target) +def logistic_model_fn(features, target, unused_mode): + return tf.contrib.learn.models.logistic_regression_zero_init(features, target) + + class CheckCallsMonitor(tf.contrib.learn.monitors.BaseMonitor): def __init__(self): @@ -84,6 +99,15 @@ class EstimatorTest(tf.test.TestCase): other_score = mean_squared_error(predictions, boston.target) self.assertAllClose(other_score, scores['mean_squared_error']) + def testIrisAll(self): + iris = tf.contrib.learn.datasets.load_iris() + est = tf.contrib.learn.Estimator(model_fn=logistic_model_fn, + classification=True) + est.train(input_fn=iris_input_fn, steps=100) + _ = est.evaluate(input_fn=iris_input_fn, steps=1) + predictions = est.predict(x=iris.data) + self.assertEqual(predictions.shape[0], iris.target.shape[0]) + def testTrainInputFn(self): est = tf.contrib.learn.Estimator(model_fn=linear_model_fn, classification=False) diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py index d58ab35f5ee..ef73c44013a 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py @@ -1,5 +1,4 @@ -"""Linear Estimators.""" -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Linear Estimators.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/tensorflow/contrib/learn/python/learn/estimators/rnn.py b/tensorflow/contrib/learn/python/learn/estimators/rnn.py index b703f607657..719a19a5bc8 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/rnn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/rnn.py @@ -1,5 +1,4 @@ -"""Recurrent Neural Network estimators.""" -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Recurrent Neural Network estimators.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/tensorflow/contrib/learn/python/learn/io/data_feeder.py b/tensorflow/contrib/learn/python/learn/io/data_feeder.py index 04bbd997482..b3ed3bc7d92 100644 --- a/tensorflow/contrib/learn/python/learn/io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/io/data_feeder.py @@ -1,6 +1,4 @@ -"""Implementations of different data feeders to provide data for TF trainer.""" - -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Implementations of different data feeders to provide data for TF trainer.""" + # TODO(ipolosukhin): Replace this module with feed-dict queue runners & queues. from __future__ import absolute_import diff --git a/tensorflow/contrib/learn/python/learn/models.py b/tensorflow/contrib/learn/python/learn/models.py index 8cabd390fc7..dddd152f368 100644 --- a/tensorflow/contrib/learn/python/learn/models.py +++ b/tensorflow/contrib/learn/python/learn/models.py @@ -1,5 +1,4 @@ -"""Various high level TF models.""" -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Various high level TF models.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.learn.python.learn.ops import autoencoder_ops from tensorflow.contrib.learn.python.learn.ops import dnn_ops from tensorflow.contrib.learn.python.learn.ops import losses_ops -from tensorflow.contrib.learn.python.learn.ops import autoencoder_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops as array_ops_ @@ -29,8 +31,7 @@ from tensorflow.python.ops import variable_scope as vs def linear_regression_zero_init(X, y): - """Creates a linear regression TensorFlow subgraph, in which weights and - bias terms are initialized to exactly zero. + """Linear regression subgraph with zero-value initial weights and bias. Args: X: tensor or placeholder for input features. @@ -43,8 +44,7 @@ def linear_regression_zero_init(X, y): def logistic_regression_zero_init(X, y): - """Creates a logistic regression TensorFlow subgraph, in which weights and - bias terms are initialized to exactly zero. + """Logistic regression subgraph with zero-value initial weights and bias. Args: X: tensor or placeholder for input features. @@ -85,7 +85,7 @@ def linear_regression(X, y, init_mean=None, init_stddev=1.0): else: output_shape = y_shape[1] # Set up the requested initialization. - if (init_mean is None): + if init_mean is None: weights = vs.get_variable('weights', [X.get_shape()[1], output_shape]) bias = vs.get_variable('bias', [output_shape]) else: @@ -134,7 +134,7 @@ def logistic_regression(X, logging_ops.histogram_summary('logistic_regression.X', X) logging_ops.histogram_summary('logistic_regression.y', y) # Set up the requested initialization. - if (init_mean is None): + if init_mean is None: weights = vs.get_variable('weights', [X.get_shape()[1], y.get_shape()[-1]]) bias = vs.get_variable('bias', [y.get_shape()[-1]]) @@ -188,35 +188,37 @@ def get_dnn_model(hidden_units, target_predictor_fn, dropout=None): return dnn_estimator + def get_autoencoder_model(hidden_units, target_predictor_fn, activation, add_noise=None, dropout=None): - """Returns a function that creates a Autoencoder TensorFlow subgraph with given - params. + """Returns a function that creates a Autoencoder TensorFlow subgraph. - Args: - hidden_units: List of values of hidden units for layers. - target_predictor_fn: Function that will predict target from input - features. This can be logistic regression, - linear regression or any other model, - that takes X, y and returns predictions and loss tensors. - activation: activation function used to map inner latent layer onto - reconstruction layer. - add_noise: a function that adds noise to tensor_in, - e.g. def add_noise(x): - return(x + np.random.normal(0, 0.1, (len(x), len(x[0])))) - dropout: When not none, causes dropout regularization to be used, - with the specified probability of removing a given coordinate. + Args: + hidden_units: List of values of hidden units for layers. + target_predictor_fn: Function that will predict target from input + features. This can be logistic regression, + linear regression or any other model, + that takes X, y and returns predictions and loss + tensors. + activation: activation function used to map inner latent layer onto + reconstruction layer. + add_noise: a function that adds noise to tensor_in, + e.g. def add_noise(x): + return(x + np.random.normal(0, 0.1, (len(x), len(x[0])))) + dropout: When not none, causes dropout regularization to be used, + with the specified probability of removing a given coordinate. + + Returns: + A function that creates the subgraph. + """ + def dnn_autoencoder_estimator(X): + """Autoencoder estimator with target predictor function on top.""" + encoder, decoder = autoencoder_ops.dnn_autoencoder( + X, hidden_units, activation, + add_noise=add_noise, dropout=dropout) + return encoder, decoder, target_predictor_fn(X, decoder) + return dnn_autoencoder_estimator - Returns: - A function that creates the subgraph. - """ - def dnn_autoencoder_estimator(X): - """Autoencoder estimator with target predictor function on top.""" - encoder, decoder = autoencoder_ops.dnn_autoencoder( - X, hidden_units, activation, - add_noise=add_noise, dropout=dropout) - return encoder, decoder, target_predictor_fn(X, decoder) - return dnn_autoencoder_estimator ## This will be in Tensorflow 0.7. ## TODO(ilblackdragon): Clean this up when it's released diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py index 861db1758f5..79c629d9491 100644 --- a/tensorflow/contrib/learn/python/learn/monitors.py +++ b/tensorflow/contrib/learn/python/learn/monitors.py @@ -1,5 +1,4 @@ -"""Monitors to track model training, report on progress and request early stopping""" -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Monitors to track training, report progress and request early stopping.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc b/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc index c03cb27df50..c911290f28b 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc @@ -81,6 +81,9 @@ class GPUAllocatorRetryTest : public ::testing::Test { return; } } + // Failures are more likely to occur if each consumer + // delays for a while before returning the memory. + Env::Default()->SleepForMicroseconds(500); ++consumer_count_[i]; for (int j = 0; j < cap_needed; ++j) { alloc_->DeallocateRaw(ptr); @@ -141,9 +144,10 @@ TEST_F(GPUAllocatorRetryTest, RetrySuccess) { EXPECT_GT(consumer_count_[2], 0); } -/* Disabled due to flakiness. b/24738751 // Verifies OutOfMemory failure when memory is slightly overcommitted -// and retry is not allowed. +// and retry is not allowed. Note that this test will fail, i.e. no +// memory alloc failure will be detected, if it is run in a context that +// does not permit real multi-threaded execution. TEST_F(GPUAllocatorRetryTest, NoRetryFail) { // Support up to 2 allocations simultaneously, waits up to 0 msec for // a chance to alloc. @@ -162,7 +166,6 @@ TEST_F(GPUAllocatorRetryTest, NoRetryFail) { EXPECT_TRUE(has_failed_); } } -*/ // Verifies OutOfMemory failure when retry is allowed but memory capacity // is too low even for retry. diff --git a/tensorflow/core/graph/dot.cc b/tensorflow/core/graph/dot.cc index 799bbe71475..a546b84ee13 100644 --- a/tensorflow/core/graph/dot.cc +++ b/tensorflow/core/graph/dot.cc @@ -32,7 +32,7 @@ static string GraphNodeName(const DotOptions& opts, const Node* n) { return strings::StrCat("N", n->id()); } -bool ShoulDisplayOpType(const Node* n) { +bool ShouldDisplayOpType(const Node* n) { if (n->type_string() == "NoOp") { return false; } @@ -125,7 +125,7 @@ string DotGraph(const Graph& g, const DotOptions& opts) { continue; } string label = src->name(); - if (ShoulDisplayOpType(src)) { + if (ShouldDisplayOpType(src)) { // Append the op type if it is not directly deducible from the op name. strings::StrAppend(&label, "\\n(", src->type_string(), ")"); } @@ -137,7 +137,14 @@ string DotGraph(const Graph& g, const DotOptions& opts) { shape = "oval"; } else { const string& d = src->assigned_device_name(); - const int dindex = (!d.empty()) ? device_index[d] : -1; + + int dindex; + if (opts.node_color) { + dindex = opts.node_color(src); + } else { + dindex = (!d.empty()) ? device_index[d] : -1; + } + if (dindex >= 0) { color = ColorFor(dindex); } diff --git a/tensorflow/core/graph/dot.h b/tensorflow/core/graph/dot.h index 79a538978a8..96e48773a9a 100644 --- a/tensorflow/core/graph/dot.h +++ b/tensorflow/core/graph/dot.h @@ -48,6 +48,11 @@ struct DotOptions { // A function that returns the "cost" of the edge. The dot display // makes a edge thickness proportional to its cost. std::function edge_cost; + + // A function that returns a color number to apply to each node. < 0 means + // no color. A color will be assigned to each color number from a palette; + // adjacent color numbers will receive different colors. + std::function node_color; }; // Return a string that contains a graphviz specification of the graph. diff --git a/tensorflow/core/kernels/concat_op.cc b/tensorflow/core/kernels/concat_op.cc index 36cd60a9da0..9af6dfb8d5a 100644 --- a/tensorflow/core/kernels/concat_op.cc +++ b/tensorflow/core/kernels/concat_op.cc @@ -76,7 +76,7 @@ class ConcatOp : public OpKernel { for (int d = 0; d < concat_dim; ++d) { inputs_flat_dim0 *= input_shape.dim_size(d); } - int output_concat_dim = 0; + int64 output_concat_dim = 0; const bool input_is_scalar = IsLegacyScalar(input_shape); for (int i = 0; i < N; ++i) { const auto in = values[i]; diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions.h b/tensorflow/core/kernels/eigen_spatial_convolutions.h index a99bb6a092d..774436bacd8 100644 --- a/tensorflow/core/kernels/eigen_spatial_convolutions.h +++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h @@ -61,6 +61,7 @@ class TensorContractionInputMapper< typedef SubMapper LinearMapper; typedef typename packet_traits::type Packet; + EIGEN_DEVICE_FUNC TensorContractionInputMapper( const TensorEvaluator< const TensorReshapingOp< @@ -77,7 +78,7 @@ class TensorContractionInputMapper< m_patch_cols = tensor.impl().dimensions()[2]; m_num_patches = tensor.impl().dimensions()[3]; } else { - static const int NumDims = tensor.impl().dimensions().size(); + const int NumDims = tensor.impl().dimensions().size(); patch_depth = tensor.impl().dimensions()[NumDims - 1]; patch_rows = tensor.impl().dimensions()[NumDims - 2]; m_patch_cols = tensor.impl().dimensions()[NumDims - 3]; @@ -99,7 +100,7 @@ class TensorContractionInputMapper< m_inputRows = tensor.impl().impl().dimensions()[1]; m_inputCols = tensor.impl().impl().dimensions()[2]; } else { - static const int NumDims = tensor.impl().impl().dimensions().size(); + const int NumDims = tensor.impl().impl().dimensions().size(); m_inputRows = tensor.impl().impl().dimensions()[NumDims - 2]; m_inputCols = tensor.impl().impl().dimensions()[NumDims - 3]; } @@ -121,6 +122,7 @@ class TensorContractionInputMapper< m_fastDimZero = internal::TensorIntDivisor(patch_depth); } + EIGEN_DEVICE_FUNC TensorContractionInputMapper(const TensorContractionInputMapper& base_mapper) : m_impl(base_mapper.m_impl) { m_patch_cols = base_mapper.m_patch_cols; @@ -650,8 +652,10 @@ struct gemm_pack_rhs< SubMapper; typedef SubMapper DataMapper; + EIGEN_DEVICE_FUNC static inline Index ceil_div(Index a, Index b) { return (a + b - 1) / b; } + EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs, Index depth, Index cols, Index stride = 0, Index offset = 0) const { @@ -822,8 +826,10 @@ struct gemm_pack_rhs< SubMapper; typedef SubMapper DataMapper; + EIGEN_DEVICE_FUNC static inline Index ceil_div(Index a, Index b) { return (a + b - 1) / b; } + EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs, Index depth, Index cols, Index stride = 0, Index offset = 0) const { @@ -898,36 +904,40 @@ struct gemm_pack_rhs< * */ template -EIGEN_ALWAYS_INLINE static const typename internal::conditional< - internal::traits::Layout == ColMajor, - TensorReshapingOp< - const DSizes::Index, - internal::traits::NumDimensions>, - const TensorContractionOp< - const array::Index>, 1>, - const TensorReshapingOp< - const DSizes::Index, 2>, - const Kernel>, - const TensorReshapingOp< - const DSizes::Index, 2>, - const TensorImagePatchOp > > >, - TensorReshapingOp< - const DSizes::Index, - internal::traits::NumDimensions>, - const TensorContractionOp< - const array::Index>, 1>, - const TensorReshapingOp< - const DSizes::Index, 2>, - const TensorImagePatchOp >, - const TensorReshapingOp< - const DSizes::Index, 2>, - const Kernel> > > >::type -SpatialConvolution(const Input& input, const Kernel& kernel, - const DenseIndex row_stride = 1, - const DenseIndex col_stride = 1, - const PaddingType padding_type = PADDING_SAME, - const DenseIndex row_in_stride = 1, - const DenseIndex col_in_stride = 1) { +EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE static const typename internal::conditional< + internal::traits::Layout == ColMajor, + TensorReshapingOp< + const DSizes::Index, + internal::traits::NumDimensions>, + const TensorContractionOp< + const array::Index>, + 1>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const Kernel>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp > > >, + TensorReshapingOp< + const DSizes::Index, + internal::traits::NumDimensions>, + const TensorContractionOp< + const array::Index>, + 1>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp >, + const TensorReshapingOp< + const DSizes::Index, 2>, + const Kernel> > > >::type + SpatialConvolution(const Input& input, const Kernel& kernel, + const DenseIndex row_stride = 1, + const DenseIndex col_stride = 1, + const PaddingType padding_type = PADDING_SAME, + const DenseIndex row_in_stride = 1, + const DenseIndex col_in_stride = 1) { typedef typename internal::traits::Index TensorIndex; TensorRef::Scalar, internal::traits::NumDimensions, @@ -941,9 +951,9 @@ SpatialConvolution(const Input& input, const Kernel& kernel, EIGEN_STATIC_ASSERT( internal::traits::Layout == internal::traits::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); - static const bool isColMajor = (internal::traits::Layout == ColMajor); + const bool isColMajor = (internal::traits::Layout == ColMajor); - static const int NumDims = internal::traits::NumDimensions; + const int NumDims = internal::traits::NumDimensions; // Number of filters to apply. This is the same as the output depth of the // result diff --git a/tensorflow/core/kernels/random_shuffle_op.cc b/tensorflow/core/kernels/random_shuffle_op.cc index d87883eae83..c81929de8d4 100644 --- a/tensorflow/core/kernels/random_shuffle_op.cc +++ b/tensorflow/core/kernels/random_shuffle_op.cc @@ -46,6 +46,19 @@ static inline void RandomShuffle(Iter first, Iter last, Random& uniform) { } } +template +static void IndexedShuffle(const int64 size, const InT& input_mat, + OutT output_mat, Random& uniform) { + std::vector permutation(size); + for (IntT i = 0; i < size; i++) { + permutation[i] = i; + } + RandomShuffle(permutation.begin(), permutation.end(), uniform); + for (IntT i = 0; i < size; i++) { + output_mat.template chip<0>(i) = input_mat.template chip<0>(permutation[i]); + } +} + template class RandomShuffleOp : public OpKernel { public: @@ -79,14 +92,10 @@ class RandomShuffleOp : public OpKernel { context->allocate_output(0, input.shape(), &output)); const auto input_mat = input.flat_outer_dims(); auto output_mat = output->flat_outer_dims(); - std::vector permutation(size); - for (int i = 0; i < size; i++) { - permutation[i] = i; - } - RandomShuffle(permutation.begin(), permutation.end(), uniform); - for (int i = 0; i < size; i++) { - output_mat.template chip<0>(i) = - input_mat.template chip<0>(permutation[i]); + if (size < kint32max) { + IndexedShuffle(size, input_mat, output_mat, uniform); + } else { + IndexedShuffle(size, input_mat, output_mat, uniform); } } } diff --git a/tensorflow/core/kernels/sparse_reduce_sum_op.cc b/tensorflow/core/kernels/sparse_reduce_sum_op.cc index 02b64c48479..20233b120d2 100644 --- a/tensorflow/core/kernels/sparse_reduce_sum_op.cc +++ b/tensorflow/core/kernels/sparse_reduce_sum_op.cc @@ -74,6 +74,14 @@ class SparseReduceSumOp : public OpKernel { std::vector axes(num_reduction_axes); std::copy_n(reduction_axes_t->flat().data(), num_reduction_axes, axes.begin()); + for (int i = 0; i < num_reduction_axes; ++i) { + int32 axis = axes[i]; + OP_REQUIRES( + ctx, axis >= -ndims && axis < ndims, + errors::InvalidArgument("Invalid reduction dimension ", axis, + ", for input with ", ndims, " dimensions.")); + axes[i] = (axes[i] + ndims) % ndims; + } std::sort(axes.begin(), axes.end()); std::vector group_by_dims; diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc index c8f4f8d25b0..378733f59b8 100644 --- a/tensorflow/core/ops/sparse_ops.cc +++ b/tensorflow/core/ops/sparse_ops.cc @@ -430,7 +430,8 @@ Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless with length 1. If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -with a single element is returned. +with a single element is returned. Additionally, the axes can be negative, +which are interpreted according to the indexing rules in Python. input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a SparseTensor, possibly not in canonical ordering. diff --git a/tensorflow/examples/skflow/boston.py b/tensorflow/examples/skflow/boston.py index bf2066770c7..9d895bd8e38 100644 --- a/tensorflow/examples/skflow/boston.py +++ b/tensorflow/examples/skflow/boston.py @@ -1,4 +1,4 @@ -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensorflow/examples/skflow/iris.py b/tensorflow/examples/skflow/iris.py index c6c566b10fd..ea44428d541 100644 --- a/tensorflow/examples/skflow/iris.py +++ b/tensorflow/examples/skflow/iris.py @@ -1,4 +1,4 @@ -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensorflow/examples/skflow/iris_custom_decay_dnn.py b/tensorflow/examples/skflow/iris_custom_decay_dnn.py index f9c172725d9..b8b1a1dd140 100644 --- a/tensorflow/examples/skflow/iris_custom_decay_dnn.py +++ b/tensorflow/examples/skflow/iris_custom_decay_dnn.py @@ -1,4 +1,4 @@ -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensorflow/examples/skflow/mnist.py b/tensorflow/examples/skflow/mnist.py index 082ecb2f839..d1288a31e98 100644 --- a/tensorflow/examples/skflow/mnist.py +++ b/tensorflow/examples/skflow/mnist.py @@ -1,4 +1,4 @@ -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensorflow/examples/skflow/resnet.py b/tensorflow/examples/skflow/resnet.py index f1f39568d46..03a5d5e5191 100644 --- a/tensorflow/examples/skflow/resnet.py +++ b/tensorflow/examples/skflow/resnet.py @@ -1,4 +1,4 @@ -# Copyright 2015-present The Scikit Flow Authors. All Rights Reserved. +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,147 +12,155 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -This example builds deep residual network for mnist data. +"""This example builds deep residual network for mnist data. + Reference Paper: http://arxiv.org/pdf/1512.03385.pdf Note that this is still a work-in-progress. Feel free to submit a PR to make this better. """ + from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os from collections import namedtuple from math import sqrt +import os from sklearn import metrics import tensorflow as tf -from tensorflow.examples.tutorials.mnist import input_data from tensorflow.contrib import learn +from tensorflow.examples.tutorials.mnist import input_data def res_net(x, y, activation=tf.nn.relu): - """Builds a residual network. Note that if the input tensor is 2D, it must be - square in order to be converted to a 4D tensor. + """Builds a residual network. - Borrowed structure from here: https://github.com/pkmital/tensorflow_tutorials/blob/master/10_residual_network.py + Note that if the input tensor is 2D, it must be square in order to be + converted to a 4D tensor. - Args: - x: Input of the network - y: Output of the network - activation: Activation function to apply after each convolution - """ + Borrowed structure from: + github.com/pkmital/tensorflow_tutorials/blob/master/10_residual_network.py - # Configurations for each bottleneck block - BottleneckBlock = namedtuple( - 'BottleneckBlock', ['num_layers', 'num_filters', 'bottleneck_size']) - blocks = [BottleneckBlock(3, 128, 32), - BottleneckBlock(3, 256, 64), - BottleneckBlock(3, 512, 128), - BottleneckBlock(3, 1024, 256)] + Args: + x: Input of the network + y: Output of the network + activation: Activation function to apply after each convolution - input_shape = x.get_shape().as_list() + Returns: + Predictions and loss tensors. + """ - # Reshape the input into the right shape if it's 2D tensor - if len(input_shape) == 2: - ndim = int(sqrt(input_shape[1])) - x = tf.reshape(x, [-1, ndim, ndim, 1]) + # Configurations for each bottleneck block. + BottleneckBlock = namedtuple( + 'BottleneckBlock', ['num_layers', 'num_filters', 'bottleneck_size']) + blocks = [BottleneckBlock(3, 128, 32), + BottleneckBlock(3, 256, 64), + BottleneckBlock(3, 512, 128), + BottleneckBlock(3, 1024, 256)] - # First convolution expands to 64 channels - with tf.variable_scope('conv_layer1'): - net = learn.ops.conv2d(x, 64, [7, 7], batch_norm=True, - activation=activation, bias=False) + input_shape = x.get_shape().as_list() - # Max pool - net = tf.nn.max_pool( - net, [1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') + # Reshape the input into the right shape if it's 2D tensor + if len(input_shape) == 2: + ndim = int(sqrt(input_shape[1])) + x = tf.reshape(x, [-1, ndim, ndim, 1]) - # First chain of resnets - with tf.variable_scope('conv_layer2'): - net = learn.ops.conv2d(net, blocks[0].num_filters, - [1, 1], [1, 1, 1, 1], - padding='VALID', bias=True) + # First convolution expands to 64 channels + with tf.variable_scope('conv_layer1'): + net = learn.ops.conv2d(x, 64, [7, 7], batch_norm=True, + activation=activation, bias=False) - # Create each bottleneck building block for each layer - for block_i, block in enumerate(blocks): - for layer_i in range(block.num_layers): + # Max pool + net = tf.nn.max_pool( + net, [1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') - name = 'block_%d/layer_%d' % (block_i, layer_i) + # First chain of resnets + with tf.variable_scope('conv_layer2'): + net = learn.ops.conv2d(net, blocks[0].num_filters, + [1, 1], [1, 1, 1, 1], + padding='VALID', bias=True) - # 1x1 convolution responsible for reducing dimension - with tf.variable_scope(name + '/conv_in'): - conv = learn.ops.conv2d(net, block.bottleneck_size, - [1, 1], [1, 1, 1, 1], - padding='VALID', - activation=activation, - batch_norm=True, - bias=False) + # Create each bottleneck building block for each layer + for block_i, block in enumerate(blocks): + for layer_i in range(block.num_layers): - with tf.variable_scope(name + '/conv_bottleneck'): - conv = learn.ops.conv2d(conv, block.bottleneck_size, - [3, 3], [1, 1, 1, 1], - padding='SAME', - activation=activation, - batch_norm=True, - bias=False) + name = 'block_%d/layer_%d' % (block_i, layer_i) - # 1x1 convolution responsible for restoring dimension - with tf.variable_scope(name + '/conv_out'): - conv = learn.ops.conv2d(conv, block.num_filters, - [1, 1], [1, 1, 1, 1], - padding='VALID', - activation=activation, - batch_norm=True, - bias=False) + # 1x1 convolution responsible for reducing dimension + with tf.variable_scope(name + '/conv_in'): + conv = learn.ops.conv2d(net, block.bottleneck_size, + [1, 1], [1, 1, 1, 1], + padding='VALID', + activation=activation, + batch_norm=True, + bias=False) - # shortcut connections that turn the network into its counterpart - # residual function (identity shortcut) - net = conv + net + with tf.variable_scope(name + '/conv_bottleneck'): + conv = learn.ops.conv2d(conv, block.bottleneck_size, + [3, 3], [1, 1, 1, 1], + padding='SAME', + activation=activation, + batch_norm=True, + bias=False) - try: - # upscale to the next block size - next_block = blocks[block_i + 1] - with tf.variable_scope('block_%d/conv_upscale' % block_i): - net = learn.ops.conv2d(net, next_block.num_filters, - [1, 1], [1, 1, 1, 1], - bias=False, - padding='SAME') - except IndexError: - pass + # 1x1 convolution responsible for restoring dimension + with tf.variable_scope(name + '/conv_out'): + conv = learn.ops.conv2d(conv, block.num_filters, + [1, 1], [1, 1, 1, 1], + padding='VALID', + activation=activation, + batch_norm=True, + bias=False) - net_shape = net.get_shape().as_list() - net = tf.nn.avg_pool(net, - ksize=[1, net_shape[1], net_shape[2], 1], - strides=[1, 1, 1, 1], padding='VALID') + # shortcut connections that turn the network into its counterpart + # residual function (identity shortcut) + net = conv + net - net_shape = net.get_shape().as_list() - net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]]) + try: + # upscale to the next block size + next_block = blocks[block_i + 1] + with tf.variable_scope('block_%d/conv_upscale' % block_i): + net = learn.ops.conv2d(net, next_block.num_filters, + [1, 1], [1, 1, 1, 1], + bias=False, + padding='SAME') + except IndexError: + pass - return learn.models.logistic_regression(net, y) + net_shape = net.get_shape().as_list() + net = tf.nn.avg_pool(net, + ksize=[1, net_shape[1], net_shape[2], 1], + strides=[1, 1, 1, 1], padding='VALID') + + net_shape = net.get_shape().as_list() + net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]]) + + return learn.models.logistic_regression(net, y) # Download and load MNIST data. mnist = input_data.read_data_sets('MNIST_data') # Restore model if graph is saved into a folder. -if os.path.exists("models/resnet/graph.pbtxt"): - classifier = learn.TensorFlowEstimator.restore("models/resnet/") +if os.path.exists('models/resnet/graph.pbtxt'): + classifier = learn.TensorFlowEstimator.restore('models/resnet/') else: - # Create a new resnet classifier. - classifier = learn.TensorFlowEstimator( - model_fn=res_net, n_classes=10, batch_size=100, steps=100, - learning_rate=0.001, continue_training=True) + # Create a new resnet classifier. + classifier = learn.TensorFlowEstimator( + model_fn=res_net, n_classes=10, batch_size=100, steps=100, + learning_rate=0.001, continue_training=True) while True: - # Train model and save summaries into logdir. - classifier.fit(mnist.train.images, mnist.train.labels, logdir="models/resnet/") + # Train model and save summaries into logdir. + classifier.fit( + mnist.train.images, mnist.train.labels, logdir='models/resnet/') - # Calculate accuracy. - score = metrics.accuracy_score( - mnist.test.labels, classifier.predict(mnist.test.images, batch_size=64)) - print('Accuracy: {0:f}'.format(score)) + # Calculate accuracy. + score = metrics.accuracy_score( + mnist.test.labels, classifier.predict(mnist.test.images, batch_size=64)) + print('Accuracy: {0:f}'.format(score)) - # Save model graph and checkpoints. - classifier.save("models/resnet/") + # Save model graph and checkpoints. + classifier.save('models/resnet/') diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/tf.parse_example.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/tf.parse_example.md index 9a7476475ec..2f2f5111963 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/tf.parse_example.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/tf.parse_example.md @@ -74,7 +74,7 @@ example_names: ["input0", "input1"], features: { "kw": VarLenFeature(tf.string), "dank": VarLenFeature(tf.int64), - "gps": VarLenFeature(tf.float), + "gps": VarLenFeature(tf.float32), } ``` diff --git a/tensorflow/g3doc/api_docs/python/io_ops.md b/tensorflow/g3doc/api_docs/python/io_ops.md index 127b461e4d2..61d01910524 100644 --- a/tensorflow/g3doc/api_docs/python/io_ops.md +++ b/tensorflow/g3doc/api_docs/python/io_ops.md @@ -1289,7 +1289,7 @@ example_names: ["input0", "input1"], features: { "kw": VarLenFeature(tf.string), "dank": VarLenFeature(tf.int64), - "gps": VarLenFeature(tf.float), + "gps": VarLenFeature(tf.float32), } ``` diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 37178746933..9f28ad8b64e 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1916,10 +1916,18 @@ class Graph(object): def __init__(self): """Creates a new, empty Graph.""" - self._nodes_by_id = dict() - self._next_node_id = [dict()] - self._next_id_counter = 0 - self._nodes_by_name = dict() + # Protects the core state that may be accessed by multiple readers. + # Only state that can be returned via public accessors (`as_graph_def()`, + # `get_operations()`, `as_graph_element()`, `get_collection()`, and + # `get_collection_ref()`) is by the lock. Thread-safety is provided on a + # best-effort basis to support buggy programs, and is not guaranteed by the + # public `tf.Graph` API. + # NOTE(mrry): This does not protect the various stacks. A warning will + # be reported if these are used from multiple threads + self._lock = threading.Lock() + self._nodes_by_id = dict() # GUARDED_BY(self._lock) + self._next_id_counter = 0 # GUARDED_BY(self._lock) + self._nodes_by_name = dict() # GUARDED_BY(self._lock) # Current name stack: uniquified names self._name_stack = "" # Maps a name used in the graph to the next id to use for that name. @@ -1987,15 +1995,15 @@ class Graph(object): self._check_not_finalized() if not isinstance(op, (Tensor, Operation)): raise TypeError("op must be a Tensor or Operation: %s" % op) - - if op._id in self._nodes_by_id: - raise ValueError("cannot add an op with id %d as it already " - "exists in the graph" % op._id) - if op.name in self._nodes_by_name: - raise ValueError("cannot add op with name %s as that name " - "is already used" % op.name) - self._nodes_by_id[op._id] = op - self._nodes_by_name[op.name] = op + with self._lock: + if op._id in self._nodes_by_id: + raise ValueError("cannot add an op with id %d as it already " + "exists in the graph" % op._id) + if op.name in self._nodes_by_name: + raise ValueError("cannot add op with name %s as that name " + "is already used" % op.name) + self._nodes_by_id[op._id] = op + self._nodes_by_name[op.name] = op @property def version(self): @@ -2081,31 +2089,32 @@ class Graph(object): Raises: ValueError: If the `graph_def` would be too large. """ - graph = graph_pb2.GraphDef() - graph.versions.CopyFrom(self._graph_def_versions) - bytesize = 0 - for op_id in sorted(self._nodes_by_id): - op = self._nodes_by_id[op_id] - if from_version is None or op_id > from_version: - graph.node.extend([op.node_def]) - if op.outputs and add_shapes: - assert "_output_shapes" not in graph.node[-1].attr - graph.node[-1].attr["_output_shapes"].list.shape.extend([ - output.get_shape().as_proto() for output in op.outputs]) - bytesize += op.node_def.ByteSize() - if bytesize >= (1 << 31) or bytesize < 0: - raise ValueError("GraphDef cannot be larger than 2GB.") - if self._functions: - for f in self._functions.values(): - bytesize += f.ByteSize() - if bytesize >= (1 << 31) or bytesize < 0: - raise ValueError("GraphDef cannot be larger than 2GB.") - graph.library.function.extend(self._functions.values()) - for func in self._function_gradient: - grad_def = function_pb2.GradientDef() - grad_def.function_name = func - grad_def.gradient_func = self._function_gradient[func] - graph.library.gradient.extend([grad_def]) + with self._lock: + graph = graph_pb2.GraphDef() + graph.versions.CopyFrom(self._graph_def_versions) + bytesize = 0 + for op_id in sorted(self._nodes_by_id): + op = self._nodes_by_id[op_id] + if from_version is None or op_id > from_version: + graph.node.extend([op.node_def]) + if op.outputs and add_shapes: + assert "_output_shapes" not in graph.node[-1].attr + graph.node[-1].attr["_output_shapes"].list.shape.extend([ + output.get_shape().as_proto() for output in op.outputs]) + bytesize += op.node_def.ByteSize() + if bytesize >= (1 << 31) or bytesize < 0: + raise ValueError("GraphDef cannot be larger than 2GB.") + if self._functions: + for f in self._functions.values(): + bytesize += f.ByteSize() + if bytesize >= (1 << 31) or bytesize < 0: + raise ValueError("GraphDef cannot be larger than 2GB.") + graph.library.function.extend(self._functions.values()) + for func in self._function_gradient: + grad_def = function_pb2.GradientDef() + grad_def.function_name = func + grad_def.gradient_func = self._function_gradient[func] + graph.library.gradient.extend([grad_def]) return graph @@ -2298,7 +2307,11 @@ class Graph(object): example, an invalid string. KeyError: If `obj` is not an object in the graph. """ + with self._lock: + return self._as_graph_element_locked(obj, allow_tensor, allow_operation) + def _as_graph_element_locked(self, obj, allow_tensor, allow_operation): + """See `Graph.as_graph_element()` for details.""" # The vast majority of this function is figuring # out what an API user might be doing wrong, so # that we can give helpful error messages. @@ -2398,7 +2411,8 @@ class Graph(object): Returns: A list of Operations. """ - return list(self._nodes_by_id.values()) + with self._lock: + return list(self._nodes_by_id.values()) def get_operation_by_name(self, name): """Returns the `Operation` with the given `name`. @@ -2445,8 +2459,9 @@ class Graph(object): def _next_id(self): """Id for next Operation instance. Also increments the internal id.""" self._check_not_finalized() - self._next_id_counter += 1 - return self._next_id_counter + with self._lock: + self._next_id_counter += 1 + return self._next_id_counter @property def _last_id(self): @@ -2499,10 +2514,11 @@ class Graph(object): value: The value to add to the collection. """ self._check_not_finalized() - if name not in self._collections: - self._collections[name] = [value] - else: - self._collections[name].append(value) + with self._lock: + if name not in self._collections: + self._collections[name] = [value] + else: + self._collections[name].append(value) def add_to_collections(self, names, value): """Stores `value` in the collections given by `names`. @@ -2543,11 +2559,12 @@ class Graph(object): The list of values in the collection with the given `name`, or an empty list if no value has been added to that collection. """ - coll_list = self._collections.get(name, None) - if coll_list is None: - coll_list = [] - self._collections[name] = coll_list - return coll_list + with self._lock: + coll_list = self._collections.get(name, None) + if coll_list is None: + coll_list = [] + self._collections[name] = coll_list + return coll_list def get_collection(self, name, scope=None): """Returns a list of values in the collection with the given `name`. @@ -2571,22 +2588,24 @@ class Graph(object): list contains the values in the order under which they were collected. """ - coll_list = self._collections.get(name, None) - if coll_list is None: - return [] - if scope is None: - return list(coll_list) - else: - c = [] - regex = re.compile(scope) - for item in coll_list: - if hasattr(item, "name") and regex.match(item.name): - c.append(item) - return c + with self._lock: + coll_list = self._collections.get(name, None) + if coll_list is None: + return [] + if scope is None: + return list(coll_list) + else: + c = [] + regex = re.compile(scope) + for item in coll_list: + if hasattr(item, "name") and regex.match(item.name): + c.append(item) + return c def get_all_collection_keys(self): """Returns a list of collections used in this graph.""" - return [x for x in self._collections if isinstance(x, six.string_types)] + with self._lock: + return [x for x in self._collections if isinstance(x, six.string_types)] @contextlib.contextmanager def _original_op(self, op): diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py index 038799681cb..97452a791d0 100644 --- a/tensorflow/python/kernel_tests/concat_op_test.py +++ b/tensorflow/python/kernel_tests/concat_op_test.py @@ -412,6 +412,17 @@ class ConcatOpTest(tf.test.TestCase): self.assertEqual(n + 3, after - before) print("graph = ", [x.name for x in g.get_operations()]) + def testConcatLargeTensors(self): + # CPU-only test, because it fails on GPUs with <= 4GB memory. + with tf.device("/cpu:0"): + a = tf.ones([2**31 + 6], dtype=tf.int8) + b = tf.zeros([1024], dtype=tf.int8) + onezeros = tf.concat(0, [a, b]) + with self.test_session(use_gpu=False): + # TODO(dga): Add more depth to this test to validate correctness, + # not just non-crashingness, once other large tensor fixes have gone in. + _ = onezeros.eval() + class ConcatOffsetTest(tf.test.TestCase): diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py index 595f3f41204..6c817a5da80 100644 --- a/tensorflow/python/kernel_tests/matmul_op_test.py +++ b/tensorflow/python/kernel_tests/matmul_op_test.py @@ -158,14 +158,14 @@ class MatMulTest(tf.test.TestCase): def testComplex64Random(self): for _ in range(10): - n, k, m = np.random.randint(1, 100, size=3) + n, k, m = np.random.randint(1, 10, size=3) # Smaller range than float x = self._randMatrix(n, k, np.complex64) y = self._randMatrix(k, m, np.complex64) self._testCpuMatmul(x, y) def testComplex128Random(self): for _ in range(10): - n, k, m = np.random.randint(1, 100, size=3) + n, k, m = np.random.randint(1, 10, size=3) # Smaller range than float x = self._randMatrix(n, k, np.complex128) y = self._randMatrix(k, m, np.complex128) self._testCpuMatmul(x, y) diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py index 6b046883d4d..037d1f2c3eb 100644 --- a/tensorflow/python/kernel_tests/sparse_ops_test.py +++ b/tensorflow/python/kernel_tests/sparse_ops_test.py @@ -417,16 +417,27 @@ class SparseFillEmptyRowsTest(test_util.TensorFlowTestCase): class SparseReduceSumTest(test_util.TensorFlowTestCase): - def _compare(self, sp_t, reduction_axes, keep_dims): + # [[1, ?, 1] + # [?, 1, ?]] + # where ? is implictly-zero. + ind = np.array([[0, 0], [0, 2], [1, 1]]).astype(np.int64) + vals = np.array([1, 1, 1]).astype(np.int32) + shape = np.array([2, 3]).astype(np.int64) + + def _compare(self, sp_t, reduction_axes, ndims, keep_dims): densified = sparse_ops.sparse_tensor_to_dense(sp_t).eval() np_ans = densified if reduction_axes is None: np_ans = np.sum(np_ans, keepdims=keep_dims) else: - if isinstance(reduction_axes, list): - reduction_axes = sorted(reduction_axes) # loop below depends on sorted + if not isinstance(reduction_axes, list): # Single scalar. + reduction_axes = [reduction_axes] reduction_axes = np.array(reduction_axes).astype(np.int32) + # Handles negative axes. + reduction_axes = (reduction_axes + ndims) % ndims + # Loop below depends on sorted. + reduction_axes.sort() for ra in reduction_axes.ravel()[::-1]: np_ans = np.sum(np_ans, axis=ra, keepdims=keep_dims) @@ -436,25 +447,21 @@ class SparseReduceSumTest(test_util.TensorFlowTestCase): self.assertAllClose(np_ans, out) - def _compare_all(self, sp_t, reduction_axes): - self._compare(sp_t, reduction_axes, False) - self._compare(sp_t, reduction_axes, True) + def _compare_all(self, sp_t, reduction_axes, ndims): + self._compare(sp_t, reduction_axes, ndims, False) + self._compare(sp_t, reduction_axes, ndims, True) def testSimpleAndRandomInputs(self): - # [[1, ?, 1] - # [?, 1, ?]] - # where ? is implictly-zero. - ind = np.array([[0, 0], [0, 2], [1, 1]]).astype(np.int64) - vals = np.array([1, 1, 1]).astype(np.int32) - shape = np.array([2, 3]).astype(np.int64) - sp_t = ops.SparseTensor(ind, vals, shape) + sp_t = ops.SparseTensor(self.ind, self.vals, self.shape) with self.test_session(use_gpu=False): - self._compare_all(sp_t, None) - self._compare_all(sp_t, 0) - self._compare_all(sp_t, [1]) - self._compare_all(sp_t, [0, 1]) - self._compare_all(sp_t, [1, 0]) + self._compare_all(sp_t, None, ndims=2) + self._compare_all(sp_t, 0, ndims=2) + self._compare_all(sp_t, [1], ndims=2) + self._compare_all(sp_t, [0, 1], ndims=2) + self._compare_all(sp_t, [1, 0], ndims=2) + self._compare_all(sp_t, [-1], ndims=2) + self._compare_all(sp_t, [1, -2], ndims=2) np.random.seed(1618) test_dims = [(1618, 1, 11, 7, 1), (1,), (1, 1, 1)] @@ -462,11 +469,19 @@ class SparseReduceSumTest(test_util.TensorFlowTestCase): for dims in test_dims: sp_t, unused_nnz = _sparsify(np.random.randn(*dims)) # reduce all using None - self._compare_all(sp_t, None) + self._compare_all(sp_t, None, ndims=len(dims)) # reduce random axes from 1D to N-D for d in range(1, len(dims) + 1): axes = np.random.choice(len(dims), size=d, replace=False).tolist() - self._compare_all(sp_t, axes) + self._compare_all(sp_t, axes, ndims=len(dims)) + + def testInvalidAxes(self): + sp_t = ops.SparseTensor(self.ind, self.vals, self.shape) + with self.test_session(use_gpu=False): + with self.assertRaisesOpError("Invalid reduction dimension -3"): + sparse_ops.sparse_reduce_sum(sp_t, -3).eval() + with self.assertRaisesOpError("Invalid reduction dimension 2"): + sparse_ops.sparse_reduce_sum(sp_t, 2).eval() def testGradient(self): np.random.seed(8161) @@ -483,6 +498,12 @@ class SparseReduceSumTest(test_util.TensorFlowTestCase): reduced.eval().shape) self.assertLess(err, 1e-3) + # Tests for negative axes. + reduced = sparse_ops.sparse_reduce_sum(sp_t, -1) + err = tf.test.compute_gradient_error(sp_t.values, (nnz,), reduced, + reduced.eval().shape) + self.assertLess(err, 1e-3) + class SparseMathOpsTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py index fa6696cbbc0..9d3a135cf0c 100644 --- a/tensorflow/python/ops/parsing_ops.py +++ b/tensorflow/python/ops/parsing_ops.py @@ -225,7 +225,7 @@ def parse_example(serialized, features, name=None, example_names=None): features: { "kw": VarLenFeature(tf.string), "dank": VarLenFeature(tf.int64), - "gps": VarLenFeature(tf.float), + "gps": VarLenFeature(tf.float32), } ``` diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 4df0e9c5d8e..fbce1103fcc 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -548,7 +548,8 @@ def sparse_reduce_sum(sp_input, reduction_axes=None, keep_dims=False): with length 1. If `reduction_axes` has no entries, all dimensions are reduced, and a tensor - with a single element is returned. + with a single element is returned. Additionally, the axes can be negative, + similar to the indexing rules in Python. For example: @@ -558,7 +559,7 @@ def sparse_reduce_sum(sp_input, reduction_axes=None, keep_dims=False): # where ? is implictly-zero. tf.sparse_reduce_sum(x) ==> 3 tf.sparse_reduce_sum(x, 0) ==> [1, 1, 1] - tf.sparse_reduce_sum(x, 1) ==> [2, 1] + tf.sparse_reduce_sum(x, 1) ==> [2, 1] # Can also use -1 as the axis. tf.sparse_reduce_sum(x, 1, keep_dims=True) ==> [[2], [1]] tf.sparse_reduce_sum(x, [0, 1]) ==> 3 ``` diff --git a/tensorflow/python/summary/event_accumulator.py b/tensorflow/python/summary/event_accumulator.py index 2ee8a369f76..204ed009129 100644 --- a/tensorflow/python/summary/event_accumulator.py +++ b/tensorflow/python/summary/event_accumulator.py @@ -114,8 +114,7 @@ class EventAccumulator(object): `Accumulator.Scalars(tag)`) allow for the retrieval of all data associated with that tag. - Before usage, the `EventAccumulator` must be activated via `Reload()`. This - method synchronosly loads all of the data written so far. + The `Reload()` method synchronously loads all of the data written so far. Histograms, audio, and images are very large, so storing all of them is not recommended. @@ -175,7 +174,6 @@ class EventAccumulator(object): self._compression_bps = compression_bps self.purge_orphaned_data = purge_orphaned_data - self._activated = False self.most_recent_step = -1 self.most_recent_wall_time = -1 self.file_version = None @@ -188,12 +186,10 @@ class EventAccumulator(object): """Loads all events added since the last call to `Reload`. If `Reload` was never called, loads all events in the file. - Calling `Reload` activates the `EventAccumulator`. Returns: The `EventAccumulator`. """ - self._activated = True with self._generator_mutex: for event in self._generator.Load(): if event.HasField('file_version'): @@ -232,13 +228,9 @@ class EventAccumulator(object): def Tags(self): """Return all tags found in the value stream. - Raises: - RuntimeError: If the `EventAccumulator` has not been activated. - Returns: A `{tagType: ['list', 'of', 'tags']}` dictionary. """ - self._VerifyActivated() return {IMAGES: self._images.Keys(), AUDIO: self._audio.Keys(), HISTOGRAMS: self._histograms.Keys(), @@ -255,12 +247,10 @@ class EventAccumulator(object): Raises: KeyError: If the tag is not found. - RuntimeError: If the `EventAccumulator` has not been activated. Returns: An array of `ScalarEvent`s. """ - self._VerifyActivated() return self._scalars.Items(tag) def Graph(self): @@ -268,12 +258,10 @@ class EventAccumulator(object): Raises: ValueError: If there is no graph for this run. - RuntimeError: If the `EventAccumulator` has not been activated. Returns: The `graph_def` proto. """ - self._VerifyActivated() if self._graph is None: raise ValueError('There is no graph in this EventAccumulator') graph = graph_pb2.GraphDef() @@ -288,12 +276,10 @@ class EventAccumulator(object): Raises: ValueError: If the tag is not found. - RuntimeError: If the `EventAccumulator` has not been activated. Returns: The metadata in form of `RunMetadata` proto. """ - self._VerifyActivated() if tag not in self._tagged_metadata: raise ValueError('There is no run metadata with this tag name') @@ -309,12 +295,10 @@ class EventAccumulator(object): Raises: KeyError: If the tag is not found. - RuntimeError: If the `EventAccumulator` has not been activated. Returns: An array of `HistogramEvent`s. """ - self._VerifyActivated() return self._histograms.Items(tag) def CompressedHistograms(self, tag): @@ -325,12 +309,10 @@ class EventAccumulator(object): Raises: KeyError: If the tag is not found. - RuntimeError: If the `EventAccumulator` has not been activated. Returns: An array of `CompressedHistogramEvent`s. """ - self._VerifyActivated() return self._compressed_histograms.Items(tag) def Images(self, tag): @@ -341,12 +323,10 @@ class EventAccumulator(object): Raises: KeyError: If the tag is not found. - RuntimeError: If the `EventAccumulator` has not been activated. Returns: An array of `ImageEvent`s. """ - self._VerifyActivated() return self._images.Items(tag) def Audio(self, tag): @@ -357,12 +337,10 @@ class EventAccumulator(object): Raises: KeyError: If the tag is not found. - RuntimeError: If the `EventAccumulator` has not been activated. Returns: An array of `AudioEvent`s. """ - self._VerifyActivated() return self._audio.Items(tag) def _MaybePurgeOrphanedData(self, event): @@ -599,10 +577,6 @@ class EventAccumulator(object): event.wall_time, *expired_per_type) logging.warn(purge_msg) - def _VerifyActivated(self): - if not self._activated: - raise RuntimeError('Accumulator must be activated before it may be used.') - def _GetPurgeMessage(most_recent_step, most_recent_wall_time, event_step, event_wall_time, num_expired_scalars, num_expired_histos, diff --git a/tensorflow/python/summary/event_accumulator_test.py b/tensorflow/python/summary/event_accumulator_test.py index f6b60b91db9..b154d853322 100644 --- a/tensorflow/python/summary/event_accumulator_test.py +++ b/tensorflow/python/summary/event_accumulator_test.py @@ -456,18 +456,6 @@ class MockingEventAccumulatorTest(EventAccumulatorTest): self.assertEqual(acc.Audio('snd1'), [snd1]) self.assertEqual(acc.Audio('snd2'), [snd2]) - def testActivation(self): - gen = _EventGenerator() - acc = ea.EventAccumulator(gen) - self.assertFalse(acc._activated) - with self.assertRaises(RuntimeError): - acc.Tags() - with self.assertRaises(RuntimeError): - acc.Scalars('s1') - acc.Reload() - self.assertTrue(acc._activated) - acc._activated = False - def testKeyError(self): gen = _EventGenerator() acc = ea.EventAccumulator(gen) diff --git a/tensorflow/python/summary/event_multiplexer.py b/tensorflow/python/summary/event_multiplexer.py index a0f4ef402f3..00eab3d215d 100644 --- a/tensorflow/python/summary/event_multiplexer.py +++ b/tensorflow/python/summary/event_multiplexer.py @@ -113,8 +113,7 @@ class EventMultiplexer(object): accumulator. If `Reload` has been called, it will `Reload` the newly created - accumulators. This maintains the invariant that once the Multiplexer was - activated, all of its accumulators are active. + accumulators. Args: path: Path to the event files (or event directory) for given run. @@ -199,7 +198,6 @@ class EventMultiplexer(object): Raises: KeyError: If the run is not found, or the tag is not available for the given run. - RuntimeError: If the run's `EventAccumulator` has not been activated. Returns: An array of `event_accumulator.ScalarEvents`. @@ -216,7 +214,6 @@ class EventMultiplexer(object): Raises: KeyError: If the run is not found. ValueError: If the run does not have an associated graph. - RuntimeError: If the run's EventAccumulator has not been activated. Returns: The `graph_def` protobuf data structure. @@ -234,7 +231,6 @@ class EventMultiplexer(object): Raises: KeyError: If the run is not found, or the tag is not available for the given run. - RuntimeError: If the run's EventAccumulator has not been activated. Returns: The metadata in the form of `RunMetadata` protobuf data structure. @@ -252,7 +248,6 @@ class EventMultiplexer(object): Raises: KeyError: If the run is not found, or the tag is not available for the given run. - RuntimeError: If the run's `EventAccumulator` has not been activated. Returns: An array of `event_accumulator.HistogramEvents`. @@ -270,7 +265,6 @@ class EventMultiplexer(object): Raises: KeyError: If the run is not found, or the tag is not available for the given run. - RuntimeError: If the run's EventAccumulator has not been activated. Returns: An array of `event_accumulator.CompressedHistogramEvents`. @@ -288,7 +282,6 @@ class EventMultiplexer(object): Raises: KeyError: If the run is not found, or the tag is not available for the given run. - RuntimeError: If the run's `EventAccumulator` has not been activated. Returns: An array of `event_accumulator.ImageEvents`. @@ -306,7 +299,6 @@ class EventMultiplexer(object): Raises: KeyError: If the run is not found, or the tag is not available for the given run. - RuntimeError: If the run's `EventAccumulator` has not been activated. Returns: An array of `event_accumulator.AudioEvents`. diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 8f2b3d1c7c2..15aeee645c6 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -184,6 +184,7 @@ bool IsCudnnR2() { __macro(cudnnSetStream) \ __macro(cudnnActivationForward) \ __macro(cudnnConvolutionForward) \ + __macro(cudnnConvolutionBackwardBias) \ __macro(cudnnGetConvolutionForwardWorkspaceSize) \ __macro(cudnnTransformTensor) \ __macro(cudnnSetConvolutionNdDescriptor) \ @@ -1493,6 +1494,72 @@ bool CudnnSupport::DoConvolveBackwardFilter( algorithm, output_profile_result); } +template +bool CudnnSupport::DoConvolveBackwardBiasImpl( + Stream* stream, int cudnn_type, // Actually cudnnDataType_t. + const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const dnn::BatchDescriptor& bias_descriptor, + DeviceMemory* backward_bias_data) { + mutex_lock lock{dnn_handle_mutex_}; + auto status = dynload::cudnnSetStream(parent_, ToHandle(dnn_handle_), + AsCUDAStreamValue(stream)); + if (status != CUDNN_STATUS_SUCCESS) { + LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status); + } + + ScopedTensorDescriptor input_nd{parent_, input_descriptor, + static_cast(cudnn_type)}; + ScopedTensorDescriptor bias_nd{parent_, bias_descriptor, + static_cast(cudnn_type)}; + + // Alpha is the scaling factor for input. + float alpha = 1.0; + // Beta is the scaling factor for output. + float beta = 0.0; + + status = dynload::cudnnConvolutionBackwardBias( + parent_, ToHandle(dnn_handle_), &alpha, input_nd.handle(), + input_data.opaque(), &beta, bias_nd.handle(), + backward_bias_data->opaque()); + if (status != CUDNN_STATUS_SUCCESS) { + LOG(FATAL) << "failed to enqueue backward convolution on stream: " + << ToString(status); + return false; + } + return true; +} + +bool CudnnSupport::DoConvolveBackwardBias( + Stream* stream, const BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const BatchDescriptor& bias_descriptor, + DeviceMemory* backward_bias_data) { + return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_DOUBLE, input_descriptor, + input_data, bias_descriptor, + backward_bias_data); +} + +bool CudnnSupport::DoConvolveBackwardBias( + Stream* stream, const BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const BatchDescriptor& bias_descriptor, + DeviceMemory* backward_bias_data) { + return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_FLOAT, input_descriptor, + input_data, bias_descriptor, + backward_bias_data); +} + +bool CudnnSupport::DoConvolveBackwardBias( + Stream* stream, const BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const BatchDescriptor& bias_descriptor, + DeviceMemory* backward_bias_data) { + return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_HALF, input_descriptor, + input_data, bias_descriptor, + backward_bias_data); +} + bool CudnnSupport::DoMatMul(Stream* stream, const DeviceMemory& input_data, const DeviceMemory& weights, diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index 9388969770d..e3c9175e019 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -140,6 +140,24 @@ class CudnnSupport : public dnn::DnnSupport { ScratchAllocator* scratch_allocator, dnn::AlgorithmType algorithm, dnn::ProfileResult* output_profile_result) override; + bool DoConvolveBackwardBias( + Stream* stream, const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const dnn::BatchDescriptor& bias_descriptor, + DeviceMemory* backward_bias_data) override; + + bool DoConvolveBackwardBias(Stream* stream, + const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const dnn::BatchDescriptor& bias_descriptor, + DeviceMemory* backward_bias_data) override; + + bool DoConvolveBackwardBias( + Stream* stream, const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const dnn::BatchDescriptor& bias_descriptor, + DeviceMemory* backward_bias_data) override; + bool DoMatMul(Stream* stream, const DeviceMemory& input_data, const DeviceMemory& weights, const dnn::BatchDescriptor& input_dimensions, @@ -311,6 +329,14 @@ class CudnnSupport : public dnn::DnnSupport { dnn::AlgorithmType algorithm, dnn::ProfileResult* output_profile_result); + template + bool DoConvolveBackwardBiasImpl(Stream* stream, + int cudnn_type, // Actually cudnnDataType_t. + const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const dnn::BatchDescriptor& bias_descriptor, + DeviceMemory* backward_bias_data); + SE_DISALLOW_COPY_AND_ASSIGN(CudnnSupport); }; diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 01c457c90c7..6eaadcadc20 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -849,6 +849,43 @@ class DnnSupport { ScratchAllocator* scratch_allocator, AlgorithmType algorithm, ProfileResult* output_profile_result) = 0; + // Enqueues a single-precision backward convolution (for bias) operation onto + // the stream. + // + // Arguments: + // stream: borrowed pointer to the stream that the 'convolve' operation + // should be enqueued onto. + // input_descriptor: dimensions of the input layer. + // input_data: un-owned device memory region which contains the + // convolution input. + // bias_descriptor: dimensions of the bias tensor. Should be the same as the + // input dimensions, but with the spatial dimensions set to 1. + // backward_filter_data: un-owned device memory region in which to place the + // backprop of the bias. + virtual bool DoConvolveBackwardBias(Stream* stream, + const BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const BatchDescriptor& bias_descriptor, + DeviceMemory* backward_bias_data) { + return false; + } + + virtual bool DoConvolveBackwardBias( + Stream* stream, const BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const BatchDescriptor& bias_descriptor, + DeviceMemory* backward_bias_data) { + return false; + } + + virtual bool DoConvolveBackwardBias( + Stream* stream, const BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const BatchDescriptor& bias_descriptor, + DeviceMemory* backward_bias_data) { + return false; + } + // Fully connects the "nodes" (float values) in input_data with // shape input_dimensions to output_data with output_dimensions // using provided weights. This is equivalent to computing a matrix diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 57a10b84f3b..3d264989026 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -741,6 +741,57 @@ Stream &Stream::ThenConvolveBackwardFilter( /*scratch_allocator=*/nullptr); } +template +Stream &Stream::ThenConvolveBackwardBiasImpl( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &bias_descriptor, + DeviceMemory *backward_bias_data) { + VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), PARAM(bias_descriptor), + PARAM(backward_bias_data)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoConvolveBackwardBias(this, input_descriptor, input_data, + bias_descriptor, + backward_bias_data)); + } else { + SetError(); + LOG(WARNING) + << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + } + return *this; +} + +Stream &Stream::ThenConvolveBackwardBias( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &bias_descriptor, + DeviceMemory *backward_bias_data) { + return ThenConvolveBackwardBiasImpl(input_descriptor, input_data, + bias_descriptor, backward_bias_data); +} + +Stream &Stream::ThenConvolveBackwardBias( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &bias_descriptor, + DeviceMemory *backward_bias_data) { + return ThenConvolveBackwardBiasImpl(input_descriptor, input_data, + bias_descriptor, backward_bias_data); +} + +Stream &Stream::ThenConvolveBackwardBias( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &bias_descriptor, + DeviceMemory *backward_bias_data) { + return ThenConvolveBackwardBiasImpl(input_descriptor, input_data, + bias_descriptor, backward_bias_data); +} + Stream &Stream::ThenMatMul(const DeviceMemory &input_data, const DeviceMemory &weights, const dnn::BatchDescriptor &input_dimensions, diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index f5583d62215..b14bf06cdc8 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -371,6 +371,22 @@ class Stream { ScratchAllocator *scratch_allocator, dnn::AlgorithmType algorithm, dnn::ProfileResult *output_profile_result); + Stream &ThenConvolveBackwardBias(const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &bias_descriptor, + DeviceMemory *backward_bias_data); + + Stream &ThenConvolveBackwardBias(const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &bias_descriptor, + DeviceMemory *backward_bias_data); + + Stream &ThenConvolveBackwardBias( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &bias_descriptor, + DeviceMemory *backward_bias_data); + Stream &ThenMatMul(const DeviceMemory &input_data, const DeviceMemory &weights, const dnn::BatchDescriptor &input_dimensions, @@ -1439,6 +1455,14 @@ class Stream { // BlockHostUntilDone() is called. internal::TemporaryMemoryManager temporary_memory_manager_; + // Implementation of ThenConvolveBackwardBias that is shared by all types. + template + Stream &ThenConvolveBackwardBiasImpl( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &bias_descriptor, + DeviceMemory *backward_bias_data); + SE_DISALLOW_COPY_AND_ASSIGN(Stream); }; diff --git a/tensorflow/tensorboard/backend/server.py b/tensorflow/tensorboard/backend/server.py index cfdd6c56543..b025a2f5b9f 100644 --- a/tensorflow/tensorboard/backend/server.py +++ b/tensorflow/tensorboard/backend/server.py @@ -120,12 +120,9 @@ def StartMultiplexerReloadingThread(multiplexer, path_to_run, load_interval): Returns: A started `threading.Thread` that reloads the multiplexer. - """ - # Ensure the Multiplexer initializes in a loaded state before it adds runs - # So it can handle HTTP requests while runs are loading - multiplexer.Reload() - + # We don't call multiplexer.Reload() here because that would make + # AddRunsFromDirectory block until the runs have all loaded. for path in path_to_run.keys(): if gcs.IsGCSPath(path): gcs.CheckIsSupported() diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 03054238a24..4eb5619ecd7 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -321,6 +321,11 @@ def _cuda_copts(): "--cuda-gpu-arch=sm_35", ] ), + }) + select({ + # Pass -O3 when building CUDA code with clang; some important + # optimizations are not enabled at O2. + "//third_party/gpus/cuda:using_clang_opt": ["-O3"], + "//conditions:default": [], }) # Build defs for TensorFlow kernels diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 7c68fb763fa..b95f84ce5e4 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -13,8 +13,8 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): native.new_http_archive( name = "eigen_archive", - url = "https://bitbucket.org/eigen/eigen/get/a5e9085a94e8.tar.gz", - sha256 = "967126237829c7c87abb6cd0e13a5a235b0377d51575522c390b9486aed13e71", + url = "https://bitbucket.org/eigen/eigen/get/f3a13643ac1f.tar.gz", + sha256 = "a9266e60366cddb371a23d86b11a297eee86372a89ef4b38a3509012f9cc37ec", build_file = path_prefix + "eigen.BUILD", ) diff --git a/third_party/eigen3/Eigen/Cholesky b/third_party/eigen3/Eigen/Cholesky index ca263316709..7b196a89043 100644 --- a/third_party/eigen3/Eigen/Cholesky +++ b/third_party/eigen3/Eigen/Cholesky @@ -1 +1 @@ -#include "eigen-eigen-a5e9085a94e8/Eigen/Cholesky" +#include "eigen-eigen-f3a13643ac1f/Eigen/Cholesky" diff --git a/third_party/eigen3/Eigen/Core b/third_party/eigen3/Eigen/Core index 1e6ac595cc5..97361e51834 100644 --- a/third_party/eigen3/Eigen/Core +++ b/third_party/eigen3/Eigen/Core @@ -1 +1 @@ -#include "eigen-eigen-a5e9085a94e8/Eigen/Core" +#include "eigen-eigen-f3a13643ac1f/Eigen/Core" diff --git a/third_party/eigen3/Eigen/Eigenvalues b/third_party/eigen3/Eigen/Eigenvalues index 480d9079b03..a5f98ed8702 100644 --- a/third_party/eigen3/Eigen/Eigenvalues +++ b/third_party/eigen3/Eigen/Eigenvalues @@ -1 +1 @@ -#include "eigen-eigen-a5e9085a94e8/Eigen/Eigenvalues" +#include "eigen-eigen-f3a13643ac1f/Eigen/Eigenvalues" diff --git a/third_party/eigen3/Eigen/LU b/third_party/eigen3/Eigen/LU index 0e82ebb8fc9..5172aece6cf 100644 --- a/third_party/eigen3/Eigen/LU +++ b/third_party/eigen3/Eigen/LU @@ -1 +1 @@ -#include "eigen-eigen-a5e9085a94e8/Eigen/LU" +#include "eigen-eigen-f3a13643ac1f/Eigen/LU" diff --git a/third_party/eigen3/Eigen/QR b/third_party/eigen3/Eigen/QR index 13562bca3cd..bd59f7adf20 100644 --- a/third_party/eigen3/Eigen/QR +++ b/third_party/eigen3/Eigen/QR @@ -1 +1 @@ -#include "eigen-eigen-a5e9085a94e8/Eigen/QR" +#include "eigen-eigen-f3a13643ac1f/Eigen/QR" diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor index a9b263f5ae3..8d363c3845f 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor +++ b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor @@ -1 +1 @@ -#include "eigen-eigen-a5e9085a94e8/unsupported/Eigen/CXX11/Tensor" +#include "eigen-eigen-f3a13643ac1f/unsupported/Eigen/CXX11/Tensor" diff --git a/third_party/gpus/cuda/BUILD b/third_party/gpus/cuda/BUILD index a0d1d6561b0..b68104385d6 100644 --- a/third_party/gpus/cuda/BUILD +++ b/third_party/gpus/cuda/BUILD @@ -31,6 +31,15 @@ config_setting( }, ) +# Equivalent to using_clang && -c opt. +config_setting( + name = "using_clang_opt", + values = { + "define": "using_cuda_clang=true", + "compilation_mode": "opt", + }, +) + config_setting( name = "darwin", values = {"cpu": "darwin"},