Merge changes from github.

PiperOrigin-RevId: 194997009
This commit is contained in:
Patrick Nguyen 2018-05-01 14:28:36 -07:00 committed by TensorFlower Gardener
parent 46bf1e8934
commit 325d0ef21a
121 changed files with 1809 additions and 724 deletions

1
.gitignore vendored
View File

@ -27,6 +27,7 @@ Podfile.lock
/tensorflow/contrib/lite/examples/ios/simple/data/*.txt
/tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
xcuserdata/**
/api_init_files_list.txt
# Android
.gradle

View File

@ -1700,7 +1700,7 @@ TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_NoGradInputs) {
TestGradientsError(false);
}
// REGISTER_OP for CApiTestAttributesTest test cases.
// REGISTER_OP for CApiAttributesTest test cases.
// Registers two ops, each with a single attribute called 'v'.
// The attribute in one op will have a type 'type', the other
// will have list(type).

View File

@ -385,6 +385,42 @@ Status MirrorPadGradGrad(const Scope& scope, const Operation& op,
}
REGISTER_GRADIENT_OP("MirrorPadGrad", MirrorPadGradGrad);
Status StridedSliceGradHelper(const Scope& scope, const Operation& op,
const std::vector<Output>& grad_inputs,
std::vector<Output>* grad_outputs) {
Input x = Shape(scope, op.input(0));
Input begin = op.input(1);
Input end = op.input(2);
Input strides = op.input(3);
int64 begin_mask;
int64 end_mask;
int64 ellipsis_mask;
int64 new_axis_mask;
int64 shrink_axis_mask;
TF_RETURN_IF_ERROR(
GetNodeAttr(op.node()->attrs(), "begin_mask", &begin_mask));
TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "end_mask", &end_mask));
TF_RETURN_IF_ERROR(
GetNodeAttr(op.node()->attrs(), "ellipsis_mask", &ellipsis_mask));
TF_RETURN_IF_ERROR(
GetNodeAttr(op.node()->attrs(), "new_axis_mask", &new_axis_mask));
TF_RETURN_IF_ERROR(
GetNodeAttr(op.node()->attrs(), "shrink_axis_mask", &shrink_axis_mask));
grad_outputs->push_back(
StridedSliceGrad(scope, x, begin, end, strides, grad_inputs[0],
StridedSliceGrad::BeginMask(begin_mask)
.EndMask(end_mask)
.EllipsisMask(ellipsis_mask)
.NewAxisMask(new_axis_mask)
.ShrinkAxisMask(shrink_axis_mask)));
// No gradients returned for begin, end and strides
grad_outputs->push_back(NoGradient());
grad_outputs->push_back(NoGradient());
grad_outputs->push_back(NoGradient());
return scope.status();
}
REGISTER_GRADIENT_OP("StridedSlice", StridedSliceGradHelper);
} // anonymous namespace
} // namespace ops
} // namespace tensorflow

View File

@ -354,5 +354,29 @@ TEST_F(ArrayGradTest, MirrorPadGradGrad_Symmetric) {
RunTest(x, x_shape, y, y_shape);
}
TEST_F(ArrayGradTest, StridedSliceGrad) {
TensorShape x_shape({6, 4, 4});
auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
// y = x[2:6:2, 1:3, 1:3]
auto y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1});
// y.shape = [2, 2, 2];
RunTest(x, x_shape, y, {2, 2, 2});
// y = x[2:6:2, 1:3, 1:3]
// begin_mask = 1<<1 (ignore begin_index = 1)
// end_mask = 1<<2 (ignore end_index = 2)
y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1},
StridedSlice::BeginMask(1 << 1).EndMask(1 << 2));
// y.shape = [2, 3, 3];
RunTest(x, x_shape, y, {2, 3, 3});
// y = [tf.newaxis, 2:6:2, 1:3, 1:3]
y = StridedSlice(scope_, x, {0, 2, 1, 1}, {0, 6, 3, 3}, {1, 2, 1, 1},
StridedSlice::NewAxisMask(1 << 0));
// y.shape = [1, 2, 2, 2];
RunTest(x, x_shape, y, {1, 2, 2, 2});
}
} // namespace
} // namespace tensorflow

View File

@ -56,8 +56,6 @@ Use AutoGraph in one of the following ways, described below:
1. Annotations (simpler)
2. Functional API (more flexible)
NOTE: You can find more examples in this [interactive notebook](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb).
To get started, install the latest nightly TensorFlow build:
```shell
@ -70,6 +68,13 @@ Then import the `autograph` module from `tf.contrib`:
from tensorflow.contrib import autograph as ag
```
### Interactive demo notebooks
For more extensive examples, check out these interactive notebooks:
* [RNN trained using Keras and Estimators](https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/rnn_keras_estimator.ipynb)
* [Demo from the TF Dev Summit 2018](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb)
## Using with annotations
Annotating a function or class with `@convert` converts it in place:

View File

@ -84,7 +84,7 @@ if (NOT WIN32)
option(systemlib_ALL "Turn on every possible systemlib_* options" OFF)
if (systemlib_ALL)
set (systmelib_ZLIB ON)
set (systemlib_ZLIB ON)
endif (systemlib_ALL)
endif()
@ -471,6 +471,10 @@ if (tensorflow_ENABLE_GPU)
include_directories(${tensorflow_source_dir}/third_party/gpus)
# add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
if(NOT WIN32)
# add gomp to tensorflow_EXTERNAL_LIBRARIES, needed by libcusolver.so
list(APPEND tensorflow_EXTERNAL_LIBRARIES gomp)
endif()
# NOTE(mrry): Update these flags when the version of CUDA or cuDNN used
# in the default build is upgraded.

View File

@ -177,6 +177,16 @@ if(WIN32)
"${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc"
)
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
else(WIN32)
if(tensorflow_ENABLE_GPU)
file(GLOB_RECURSE tf_core_kernels_gpu_exclude_srcs
# temporarily disable nccl as it needs to be ported with gpu
"${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_manager.cc"
"${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc"
)
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_gpu_exclude_srcs})
endif(tensorflow_ENABLE_GPU)
endif(WIN32)
file(GLOB_RECURSE tf_core_gpu_kernels_srcs

View File

@ -64,6 +64,8 @@ file(GLOB tf_stream_executor_srcs
if (tensorflow_ENABLE_GPU)
file(GLOB tf_stream_executor_gpu_srcs
"${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc"
"${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.h"
"${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.cc"
)
if (NOT tensorflow_BUILD_CC_TESTS)
file(GLOB tf_stream_executor_gpu_tests

View File

@ -152,6 +152,22 @@ class CrfTest(test.TestCase):
self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
def testCrfLogNormZeroSeqLength(self):
"""
Test `crf_log_norm` when `sequence_lengths` contains one or more zeros.
"""
with self.test_session() as sess:
inputs = constant_op.constant(np.ones([2, 10, 5],
dtype=np.float32))
transition_params = constant_op.constant(np.ones([5, 5],
dtype=np.float32))
sequence_lengths = constant_op.constant(np.zeros([2],
dtype=np.int32))
expected_log_norm = np.zeros([2], dtype=np.float32)
log_norm = crf.crf_log_norm(inputs, sequence_lengths, transition_params)
tf_log_norm = sess.run(log_norm)
self.assertAllClose(tf_log_norm, expected_log_norm)
def testCrfLogLikelihood(self):
inputs = np.array(
[[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
@ -292,10 +308,10 @@ class CrfTest(test.TestCase):
dtype=np.float32))
sequence_lengths = constant_op.constant(np.zeros([2],
dtype=np.int32))
values = crf.crf_decode(inputs, transition_params, sequence_lengths)
tags, scores = sess.run(values)
self.assertEqual(len(tags.shape), 2)
self.assertEqual(len(scores.shape), 1)
tags, scores = crf.crf_decode(inputs, transition_params, sequence_lengths)
tf_tags, tf_scores = sess.run([tags, scores])
self.assertEqual(len(tf_tags.shape), 2)
self.assertEqual(len(tf_scores.shape), 1)
if __name__ == "__main__":
test.main()

View File

@ -90,9 +90,13 @@ def crf_sequence_score(inputs, tag_indices, sequence_lengths,
batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0]
example_inds = array_ops.reshape(
math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1])
return array_ops.gather_nd(
sequence_scores = array_ops.gather_nd(
array_ops.squeeze(inputs, [1]),
array_ops.concat([example_inds, tag_indices], axis=1))
sequence_scores = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
array_ops.zeros_like(sequence_scores),
sequence_scores)
return sequence_scores
def _multi_seq_fn():
# Compute the scores of the given tag sequence.
@ -128,7 +132,12 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
# If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over
# the "initial state" (the unary potentials).
def _single_seq_fn():
return math_ops.reduce_logsumexp(first_input, [1])
log_norm = math_ops.reduce_logsumexp(first_input, [1])
# Mask `log_norm` of the sequences with length <= zero.
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
array_ops.zeros_like(log_norm),
log_norm)
return log_norm
def _multi_seq_fn():
"""Forward computation of alpha values."""
@ -137,13 +146,19 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
# Compute the alpha values in the forward algorithm in order to get the
# partition function.
forward_cell = CrfForwardRnnCell(transition_params)
# Sequence length is not allowed to be less than zero.
sequence_lengths_less_one = math_ops.maximum(0, sequence_lengths - 1)
_, alphas = rnn.dynamic_rnn(
cell=forward_cell,
inputs=rest_of_input,
sequence_length=sequence_lengths - 1,
sequence_length=sequence_lengths_less_one,
initial_state=first_input,
dtype=dtypes.float32)
log_norm = math_ops.reduce_logsumexp(alphas, [1])
# Mask `log_norm` of the sequences with length <= zero.
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
array_ops.zeros_like(log_norm),
log_norm)
return log_norm
max_seq_len = array_ops.shape(inputs)[1]
@ -479,7 +494,7 @@ def crf_decode(potentials, transition_params, sequence_length):
initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O]
inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O]
# sequence length is not allowed to be less than zero
# Sequence length is not allowed to be less than zero.
sequence_length_less_one = math_ops.maximum(0, sequence_length - 1)
backpointers, last_score = rnn.dynamic_rnn( # [B, T - 1, O], [B, O]
crf_fwd_cell,

View File

@ -0,0 +1,109 @@
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for Bijector."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops.bijectors.ordered import Ordered
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops.distributions.bijector_test_util import assert_bijective_and_finite
from tensorflow.python.platform import test
class OrderedBijectorTest(test.TestCase):
"""Tests correctness of the ordered transformation."""
def setUp(self):
self._rng = np.random.RandomState(42)
@test_util.run_in_graph_and_eager_modes()
def testBijectorVector(self):
with self.test_session():
ordered = Ordered()
self.assertEqual("ordered", ordered.name)
x = np.asarray([[2., 3, 4], [4., 8, 13]])
y = [[2., 0, 0], [4., np.log(4.), np.log(5.)]]
self.assertAllClose(y, self.evaluate(ordered.forward(x)))
self.assertAllClose(x, self.evaluate(ordered.inverse(y)))
self.assertAllClose(
np.sum(np.asarray(y)[..., 1:], axis=-1),
self.evaluate(ordered.inverse_log_det_jacobian(y, event_ndims=1)),
atol=0.,
rtol=1e-7)
self.assertAllClose(
self.evaluate(-ordered.inverse_log_det_jacobian(y, event_ndims=1)),
self.evaluate(ordered.forward_log_det_jacobian(x, event_ndims=1)),
atol=0.,
rtol=1e-7)
def testBijectorUnknownShape(self):
with self.test_session():
ordered = Ordered()
self.assertEqual("ordered", ordered.name)
x = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32)
real_x = np.asarray([[2., 3, 4], [4., 8, 13]])
y = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32)
real_y = [[2., 0, 0], [4., np.log(4.), np.log(5.)]]
self.assertAllClose(real_y, ordered.forward(x).eval(
feed_dict={x: real_x}))
self.assertAllClose(real_x, ordered.inverse(y).eval(
feed_dict={y: real_y}))
self.assertAllClose(
np.sum(np.asarray(real_y)[..., 1:], axis=-1),
ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(
feed_dict={y: real_y}),
atol=0.,
rtol=1e-7)
self.assertAllClose(
-ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(
feed_dict={y: real_y}),
ordered.forward_log_det_jacobian(x, event_ndims=1).eval(
feed_dict={x: real_x}),
atol=0.,
rtol=1e-7)
@test_util.run_in_graph_and_eager_modes()
def testShapeGetters(self):
with self.test_session():
x = tensor_shape.TensorShape([4])
y = tensor_shape.TensorShape([4])
bijector = Ordered(validate_args=True)
self.assertAllEqual(y, bijector.forward_event_shape(x))
self.assertAllEqual(y.as_list(),
self.evaluate(bijector.forward_event_shape_tensor(
x.as_list())))
self.assertAllEqual(x, bijector.inverse_event_shape(y))
self.assertAllEqual(x.as_list(),
self.evaluate(bijector.inverse_event_shape_tensor(
y.as_list())))
def testBijectiveAndFinite(self):
with self.test_session():
ordered = Ordered()
x = np.sort(self._rng.randn(3, 10), axis=-1).astype(np.float32)
y = (self._rng.randn(3, 10)).astype(np.float32)
assert_bijective_and_finite(ordered, x, y, event_ndims=1)
if __name__ == "__main__":
test.main()

View File

@ -30,6 +30,7 @@
@@Invert
@@Kumaraswamy
@@MaskedAutoregressiveFlow
@@Ordered
@@Permute
@@PowerTransform
@@RealNVP
@ -67,6 +68,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.inline import *
from tensorflow.contrib.distributions.python.ops.bijectors.invert import *
from tensorflow.contrib.distributions.python.ops.bijectors.kumaraswamy import *
from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import *
from tensorflow.contrib.distributions.python.ops.bijectors.ordered import *
from tensorflow.contrib.distributions.python.ops.bijectors.permute import *
from tensorflow.contrib.distributions.python.ops.bijectors.power_transform import *
from tensorflow.contrib.distributions.python.ops.bijectors.real_nvp import *

View File

@ -170,7 +170,7 @@ class CholeskyOuterProduct(bijector.Bijector):
sum_weighted_log_diag = array_ops.squeeze(
math_ops.matmul(math_ops.log(diag),
exponents[..., array_ops.newaxis]),
squeeze_dims=-1)
axis=-1)
fldj = p_float * np.log(2.) + sum_weighted_log_diag
return fldj

View File

@ -18,14 +18,14 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.ops.distributions import bijector as bijector_lib
from tensorflow.python.ops.distributions import bijector
__all__ = [
"Invert",
]
class Invert(bijector_lib.Bijector):
class Invert(bijector.Bijector):
"""Bijector which inverts another Bijector.
Example Use: [ExpGammaDistribution (see Background & Context)](

View File

@ -32,7 +32,7 @@ from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import template as template_ops
from tensorflow.python.ops import variable_scope as variable_scope_lib
from tensorflow.python.ops.distributions import bijector as bijector_lib
from tensorflow.python.ops.distributions import bijector
__all__ = [
@ -42,7 +42,7 @@ __all__ = [
]
class MaskedAutoregressiveFlow(bijector_lib.Bijector):
class MaskedAutoregressiveFlow(bijector.Bijector):
"""Affine MaskedAutoregressiveFlow bijector for vector-valued events.
The affine autoregressive flow [(Papamakarios et al., 2016)][3] provides a

View File

@ -0,0 +1,125 @@
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Ordered bijector."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.framework import tensor_shape
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import bijector
__all__ = [
"Ordered",
]
class Ordered(bijector.Bijector):
"""Bijector which maps a tensor x_k that has increasing elements in the last
dimension to an unconstrained tensor y_k.
Both the domain and the codomain of the mapping is [-inf, inf], however,
the input of the forward mapping must be strictly increasing.
The inverse of the bijector applied to a normal random vector `y ~ N(0, 1)`
gives back a sorted random vector with the same distribution `x ~ N(0, 1)`
where `x = sort(y)`
On the last dimension of the tensor, Ordered bijector performs:
`y[0] = x[0]`
`y[1:] = math_ops.log(x[1:] - x[:-1])`
#### Example Use:
```python
bijector.Ordered().forward([2, 3, 4])
# Result: [2., 0., 0.]
bijector.Ordered().inverse([0.06428002, -1.07774478, -0.71530371])
# Result: [0.06428002, 0.40464228, 0.8936858]
```
"""
def __init__(self, validate_args=False, name="ordered"):
super(Ordered, self).__init__(
forward_min_event_ndims=1,
validate_args=validate_args,
name=name)
def _forward_event_shape(self, input_shape):
if input_shape.ndims is None or input_shape[-1] is None:
return input_shape
return tensor_shape.TensorShape([input_shape[-1]])
def _forward_event_shape_tensor(self, input_shape):
return (input_shape[-1])[..., array_ops.newaxis]
def _inverse_event_shape(self, output_shape):
if output_shape.ndims is None or output_shape[-1] is None:
return output_shape
if output_shape[-1] <= 1:
raise ValueError("output_shape[-1] = %d <= 1" % output_shape[-1])
return tensor_shape.TensorShape([output_shape[-1]])
def _inverse_event_shape_tensor(self, output_shape):
if self.validate_args:
is_greater_one = check_ops.assert_greater(
output_shape[-1], 1, message="Need last dimension greater than 1.")
output_shape = control_flow_ops.with_dependencies(
[is_greater_one], output_shape)
return (output_shape[-1])[..., array_ops.newaxis]
def _forward(self, x):
x = self._maybe_assert_valid_x(x)
y0 = x[..., 0, array_ops.newaxis]
yk = math_ops.log(x[..., 1:] - x[..., :-1])
y = array_ops.concat([y0, yk], axis=-1)
return y
def _inverse(self, y):
x0 = y[..., 0, array_ops.newaxis]
xk = math_ops.exp(y[..., 1:])
x = array_ops.concat([x0, xk], axis=-1)
return math_ops.cumsum(x, axis=-1)
def _inverse_log_det_jacobian(self, y):
# The Jacobian of the inverse mapping is lower
# triangular, with the diagonal elements being:
# J[i,i] = 1 if i=1, and
# exp(y_i) if 1<i<=K
# which gives the absolute Jacobian determinant:
# |det(Jac)| = prod_{i=1}^{K} exp(y[i]).
# (1) - Stan Modeling Language User's Guide and Reference Manual
# Version 2.17.0 session 35.2
return math_ops.reduce_sum(y[..., 1:], axis=-1)
def _forward_log_det_jacobian(self, x):
x = self._maybe_assert_valid_x(x)
return -math_ops.reduce_sum(
math_ops.log(x[..., 1:] - x[..., :-1]),
axis=-1)
def _maybe_assert_valid_x(self, x):
if not self.validate_args:
return x
is_valid = check_ops.assert_positive(
x[..., 1:] - x[..., :-1],
message="Forward transformation input must be strictly increasing.")
return control_flow_ops.with_dependencies([is_valid], x)

View File

@ -28,7 +28,7 @@ from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops.distributions import bijector as bijector_lib
from tensorflow.python.ops.distributions import bijector
__all__ = [
@ -36,7 +36,7 @@ __all__ = [
]
class Permute(bijector_lib.Bijector):
class Permute(bijector.Bijector):
"""Permutes the rightmost dimension of a `Tensor`.
```python

View File

@ -25,7 +25,7 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import template as template_ops
from tensorflow.python.ops.distributions import bijector as bijector_lib
from tensorflow.python.ops.distributions import bijector
__all__ = [
@ -34,7 +34,7 @@ __all__ = [
]
class RealNVP(bijector_lib.Bijector):
class RealNVP(bijector.Bijector):
"""RealNVP "affine coupling layer" for vector-valued events.
Real NVP models a normalizing flow on a `D`-dimensional distribution via a

View File

@ -28,7 +28,7 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import bijector as bijector_lib
from tensorflow.python.ops.distributions import bijector
__all__ = [
@ -44,7 +44,7 @@ def _ndims_from_shape(shape):
return array_ops.shape(shape)[0]
class Reshape(bijector_lib.Bijector):
class Reshape(bijector.Bijector):
"""Reshapes the `event_shape` of a `Tensor`.
The semantics generally follow that of `tf.reshape()`, with

View File

@ -128,7 +128,7 @@ class Weibull(bijector.Bijector):
return x
is_valid = check_ops.assert_non_negative(
x,
message="Forward transformation input must be at least {}.".format(0))
message="Forward transformation input must be at least 0.")
return control_flow_ops.with_dependencies([is_valid], x)
def _maybe_assert_valid_y(self, y):

View File

@ -439,7 +439,7 @@ class _DistributionShape(object):
if self._batch_ndims_is_0 and expand_batch_dim:
squeeze_dims += [1]
if squeeze_dims:
x = array_ops.squeeze(x, squeeze_dims=squeeze_dims)
x = array_ops.squeeze(x, axis=squeeze_dims)
# x.shape: [prod(S)]+B+E
_, batch_shape, event_shape = self.get_shape(x)
else:

View File

@ -397,7 +397,7 @@ class GmmAlgorithm(object):
# Compute the effective number of data points assigned to component k.
with ops.control_dependencies(self._w):
points_in_k = array_ops.squeeze(
math_ops.add_n(self._points_in_k), squeeze_dims=[0])
math_ops.add_n(self._points_in_k), axis=[0])
# Update alpha.
if 'w' in self._params:
final_points_in_k = points_in_k / num_batches

View File

@ -932,7 +932,8 @@ def convolution(inputs,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
scope=None,
conv_dims=None):
"""Adds an N-D convolution followed by an optional batch_norm layer.
It is required that 1 <= N <= 3.
@ -993,6 +994,10 @@ def convolution(inputs,
trainable: If `True` also add variables to the graph collection
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
scope: Optional scope for `variable_scope`.
conv_dims: Optional convolution dimensionality, when set it would use the
corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When
leaved to None it would select the convolution dimensionality based on
the input rank (i.e. Conv ND, with N = input_rank - 2).
Returns:
A tensor representing the output of the operation.
@ -1015,6 +1020,9 @@ def convolution(inputs,
inputs = ops.convert_to_tensor(inputs)
input_rank = inputs.get_shape().ndims
if conv_dims is not None and conv_dims + 2 != input_rank:
raise ValueError('Convolution expects input with rank %d, got %d' %
(conv_dims + 2, input_rank))
if input_rank == 3:
layer_class = convolutional_layers.Convolution1D
elif input_rank == 4:
@ -1061,10 +1069,134 @@ def convolution(inputs,
outputs = activation_fn(outputs)
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
@add_arg_scope
def convolution1d(inputs,
num_outputs,
kernel_size,
stride=1,
padding='SAME',
data_format=None,
rate=1,
activation_fn=nn.relu,
normalizer_fn=None,
normalizer_params=None,
weights_initializer=initializers.xavier_initializer(),
weights_regularizer=None,
biases_initializer=init_ops.zeros_initializer(),
biases_regularizer=None,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
return convolution(inputs,
num_outputs,
kernel_size,
stride,
padding,
data_format,
rate,
activation_fn,
normalizer_fn,
normalizer_params,
weights_initializer,
weights_regularizer,
biases_initializer,
biases_regularizer,
reuse,
variables_collections,
outputs_collections,
trainable,
scope,
conv_dims=1)
convolution2d = convolution
convolution3d = convolution
convolution1d.__doc__ = convolution.__doc__
@add_arg_scope
def convolution2d(inputs,
num_outputs,
kernel_size,
stride=1,
padding='SAME',
data_format=None,
rate=1,
activation_fn=nn.relu,
normalizer_fn=None,
normalizer_params=None,
weights_initializer=initializers.xavier_initializer(),
weights_regularizer=None,
biases_initializer=init_ops.zeros_initializer(),
biases_regularizer=None,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
return convolution(inputs,
num_outputs,
kernel_size,
stride,
padding,
data_format,
rate,
activation_fn,
normalizer_fn,
normalizer_params,
weights_initializer,
weights_regularizer,
biases_initializer,
biases_regularizer,
reuse,
variables_collections,
outputs_collections,
trainable,
scope,
conv_dims=2)
convolution2d.__doc__ = convolution.__doc__
@add_arg_scope
def convolution3d(inputs,
num_outputs,
kernel_size,
stride=1,
padding='SAME',
data_format=None,
rate=1,
activation_fn=nn.relu,
normalizer_fn=None,
normalizer_params=None,
weights_initializer=initializers.xavier_initializer(),
weights_regularizer=None,
biases_initializer=init_ops.zeros_initializer(),
biases_regularizer=None,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
return convolution(inputs,
num_outputs,
kernel_size,
stride,
padding,
data_format,
rate,
activation_fn,
normalizer_fn,
normalizer_params,
weights_initializer,
weights_regularizer,
biases_initializer,
biases_regularizer,
reuse,
variables_collections,
outputs_collections,
trainable,
scope,
conv_dims=3)
convolution3d.__doc__ = convolution.__doc__
@add_arg_scope
def convolution2d_in_plane(
@ -1411,7 +1543,7 @@ def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None):
Args:
tensor: An `int` `Tensor` to be converted to a `Sparse`.
eos_token: An integer.
It is part of the target label that signfies the end of a sentence.
It is part of the target label that signifies the end of a sentence.
outputs_collections: Collection to add the outputs.
scope: Optional scope for name_scope.
"""
@ -1555,7 +1687,7 @@ def _inner_flatten(inputs, new_rank, output_collections=None, scope=None):
output_collections: Collection to which the outputs will be added.
scope: Optional scope for `name_scope`.
Returns:
A `Tensor` or `SparseTensor` conataining the same values as `inputs`, but
A `Tensor` or `SparseTensor` containing the same values as `inputs`, but
with innermost dimensions flattened to obtain rank `new_rank`.
Raises:

View File

@ -310,6 +310,17 @@ class BiasAddTest(test.TestCase):
class ConvolutionTest(test.TestCase):
def testInvalidShape(self):
with self.test_session():
images_2d = random_ops.random_uniform((5, 7, 9, 3), seed=1)
with self.assertRaisesRegexp(
ValueError, 'Convolution expects input with rank 5, got 4'):
layers_lib.convolution3d(images_2d, 32, 3)
images_3d = random_ops.random_uniform((5, 6, 7, 9, 3), seed=1)
with self.assertRaisesRegexp(
ValueError, 'Convolution expects input with rank 4, got 5'):
layers_lib.convolution2d(images_3d, 32, 3)
def testInvalidDataFormat(self):
height, width = 7, 9
with self.test_session():
@ -3155,7 +3166,7 @@ class RepeatTests(test.TestCase):
with self.test_session():
images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32)
output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3])
self.assertEqual(output.op.name, 'Repeat/convolution_3/Relu')
self.assertEqual(output.op.name, 'Repeat/convolution2d_3/Relu')
self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 32])
def testRepeatWithScope(self):
@ -3749,7 +3760,7 @@ class StackTests(test.TestCase):
layers_lib.convolution2d, [10, 20, 30],
kernel_size=[3, 3],
padding='SAME')
self.assertEqual(output.op.name, 'Stack/convolution_3/Relu')
self.assertEqual(output.op.name, 'Stack/convolution2d_3/Relu')
self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 30])
def testStackWithScope(self):

View File

@ -270,7 +270,7 @@ class _RegressionTargetColumn(_TargetColumn):
def logits_to_predictions(self, logits, proba=False):
if self.num_label_columns == 1:
return array_ops.squeeze(logits, squeeze_dims=[1])
return array_ops.squeeze(logits, axis=[1])
return logits
def get_eval_ops(self, features, logits, labels, metrics=None):
@ -418,7 +418,7 @@ def _softmax_cross_entropy_loss(logits, target):
"Instead got %s." % target.dtype)
# sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
if len(target.get_shape()) == 2:
target = array_ops.squeeze(target, squeeze_dims=[1])
target = array_ops.squeeze(target, axis=[1])
loss_vec = nn.sparse_softmax_cross_entropy_with_logits(
labels=target, logits=logits)
return loss_vec

View File

@ -777,7 +777,7 @@ class _RegressionHead(_SingleHead):
key = prediction_key.PredictionKey.SCORES
with ops.name_scope(None, "predictions", (logits,)):
if self.logits_dimension == 1:
logits = array_ops.squeeze(logits, squeeze_dims=(1,), name=key)
logits = array_ops.squeeze(logits, axis=(1,), name=key)
return {key: self._link_fn(logits)}
def _metrics(self, eval_loss, predictions, labels, weights):
@ -974,7 +974,7 @@ def _softmax_cross_entropy_loss(labels, logits, weights=None):
is_squeezed_labels = False
# TODO(ptucker): This will break for dynamic shapes.
if len(labels.get_shape()) == 2:
labels = array_ops.squeeze(labels, squeeze_dims=(1,))
labels = array_ops.squeeze(labels, axis=(1,))
is_squeezed_labels = True
loss = nn.sparse_softmax_cross_entropy_with_logits(

View File

@ -40,7 +40,7 @@ def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None):
[tensor_in, labels]):
predictions = nn.xw_plus_b(tensor_in, weights, biases)
if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2:
predictions = array_ops_.squeeze(predictions, squeeze_dims=[1])
predictions = array_ops_.squeeze(predictions, axis=[1])
return predictions, losses.mean_squared_error(labels, predictions)

View File

@ -17,6 +17,7 @@ limitations under the License.
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <memory>
#include <sstream>
@ -70,6 +71,23 @@ TfLiteStatus ReadLabelsFile(const string& file_name,
return kTfLiteOk;
}
void PrintProfilingInfo(const profiling::ProfileEvent* e, uint32_t op_index,
TfLiteRegistration registration) {
// output something like
// time (ms) , Node xxx, OpCode xxx, symblic name
// 5.352, Node 5, OpCode 4, DEPTHWISE_CONV_2D
LOG(INFO) << std::fixed << std::setw(10) << std::setprecision(3)
<< (e->end_timestamp_us - e->begin_timestamp_us) / 1000.0
<< ", Node " << std::setw(3) << std::setprecision(3) << op_index
<< ", OpCode " << std::setw(3) << std::setprecision(3)
<< registration.builtin_code << ", "
<< EnumNameBuiltinOperator(
(BuiltinOperator)registration.builtin_code)
<< "\n";
}
void RunInference(Settings* s) {
if (!s->model_name.c_str()) {
LOG(ERROR) << "no model file name\n";
@ -166,6 +184,11 @@ void RunInference(Settings* s) {
exit(-1);
}
profiling::Profiler* profiler = new profiling::Profiler();
interpreter->SetProfiler(profiler);
if (s->profiling) profiler->StartProfiling();
struct timeval start_time, stop_time;
gettimeofday(&start_time, NULL);
for (int i = 0; i < s->loop_count; i++) {
@ -179,6 +202,18 @@ void RunInference(Settings* s) {
<< (get_us(stop_time) - get_us(start_time)) / (s->loop_count * 1000)
<< " ms \n";
if (s->profiling) {
profiler->StopProfiling();
auto profile_events = profiler->GetProfileEvents();
for (int i = 0; i < profile_events.size(); i++) {
auto op_index = profile_events[i]->event_metadata;
const auto node_and_registration =
interpreter->node_and_registration(op_index);
const TfLiteRegistration registration = node_and_registration->second;
PrintProfilingInfo(profile_events[i], op_index, registration);
}
}
const int output_size = 1000;
const size_t num_results = 5;
const float threshold = 0.001f;
@ -217,13 +252,14 @@ void RunInference(Settings* s) {
void display_usage() {
LOG(INFO) << "label_image\n"
<< "--accelerated, -a: [0|1], use Android NNAPI or note\n"
<< "--accelerated, -a: [0|1], use Android NNAPI or not\n"
<< "--count, -c: loop interpreter->Invoke() for certain times\n"
<< "--input_mean, -b: input mean\n"
<< "--input_std, -s: input standard deviation\n"
<< "--image, -i: image_name.bmp\n"
<< "--labels, -l: labels for the model\n"
<< "--tflite_model, -m: model_name.tflite\n"
<< "--profiling, -p: [0|1], profiling or not\n"
<< "--threads, -t: number of threads\n"
<< "--verbose, -v: [0|1] print more information\n"
<< "\n";
@ -241,6 +277,7 @@ int Main(int argc, char** argv) {
{"image", required_argument, 0, 'i'},
{"labels", required_argument, 0, 'l'},
{"tflite_model", required_argument, 0, 'm'},
{"profiling", required_argument, 0, 'p'},
{"threads", required_argument, 0, 't'},
{"input_mean", required_argument, 0, 'b'},
{"input_std", required_argument, 0, 's'},
@ -249,7 +286,7 @@ int Main(int argc, char** argv) {
/* getopt_long stores the option index here. */
int option_index = 0;
c = getopt_long(argc, argv, "a:b:c:f:i:l:m:s:t:v:", long_options,
c = getopt_long(argc, argv, "a:b:c:f:i:l:m:p:s:t:v:", long_options,
&option_index);
/* Detect the end of the options. */
@ -276,6 +313,10 @@ int Main(int argc, char** argv) {
case 'm':
s.model_name = optarg;
break;
case 'p':
s.profiling = strtol( // NOLINT(runtime/deprecated_fn)
optarg, (char**)NULL, 10);
break;
case 's':
s.input_std = strtod(optarg, NULL);
break;

View File

@ -25,6 +25,7 @@ struct Settings {
bool verbose = false;
bool accel = false;
bool input_floating = false;
bool profiling = false;
int loop_count = 1;
float input_mean = 127.5f;
float input_std = 127.5f;

View File

@ -84,4 +84,32 @@
android:visibility="visible" />
</RelativeLayout>
<RelativeLayout
android:id="@+id/control2"
android:layout_width="match_parent"
android:layout_height="135dp"
android:layout_alignParentLeft="true"
android:layout_alignParentStart="true"
android:layout_alignTop="@+id/control"
android:layout_marginLeft="300dp"
android:layout_marginStart="300dp"
android:background="@color/control_background">
<ToggleButton
android:id="@+id/button"
android:textOff="@string/tflite"
android:textOn="@string/nnapi"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_alignParentLeft="true"
android:layout_alignParentStart="true" />
<NumberPicker
android:id="@+id/np"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_below="@+id/button"
android:visibility="visible" />
</RelativeLayout>
</RelativeLayout>

View File

@ -25,8 +25,8 @@ namespace builtin {
namespace topk_v2 {
constexpr int kInputTensor = 0;
constexpr int kInputTopK = 1;
constexpr int kOutputIndexes = 0;
constexpr int kOutputValues = 1;
constexpr int kOutputValues = 0;
constexpr int kOutputIndexes = 1;
namespace {
TfLiteStatus ResizeOutput(TfLiteContext* context, TfLiteNode* node) {

View File

@ -31,8 +31,8 @@ class TopKV2OpModel : public SingleOpModel {
int top_k) {
input_ = AddInput(input_type);
top_k_ = AddInput(TensorType_INT32);
output_indexes_ = AddOutput(TensorType_INT32);
output_values_ = AddOutput(input_type);
output_indexes_ = AddOutput(TensorType_INT32);
SetBuiltinOp(BuiltinOperator_TOPK_V2, BuiltinOptions_TopKV2Options, 0);
BuildInterpreter({input_shape, {1}});
PopulateTensor<int32_t>(top_k_, {top_k});

View File

@ -609,7 +609,7 @@ enum {
* Long short-term memory unit (LSTM) recurrent network layer.
*
* The default non-peephole implementation is based on:
* http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
* http://www.bioinf.jku.at/publications/older/2604.pdf
* S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural
* Computation, 9(8):1735-1780, 1997.
*

View File

@ -37,9 +37,9 @@ struct ProfileEvent {
// Label of the event. This usually describes the event.
const char* tag;
// Timestamp in microseconds when the event began.
int64_t begin_timestamp_us;
uint64_t begin_timestamp_us;
// Timestamp in microseconds when the event ended.
int64_t end_timestamp_us;
uint64_t end_timestamp_us;
// The field containing the type of event. This must be one of the event types
// in EventType.
EventType event_type;
@ -74,7 +74,7 @@ class ProfileBuffer {
if (!enabled_) {
return kInvalidEventHandle;
}
int64_t timestamp = NowMicros();
uint64_t timestamp = NowMicros();
int index = current_index_ % event_buffer_.size();
event_buffer_[index].tag = tag;
event_buffer_[index].event_type = event_type;
@ -134,7 +134,7 @@ class ProfileBuffer {
}
private:
static int64_t NowMicros() {
static uint64_t NowMicros() {
// TODO(shashishekhar): Refactor this to a separate file.
struct timeval tv;
gettimeofday(&tv, nullptr);

View File

@ -124,6 +124,15 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
SetDataTypeForAllOutputs(model, op, rand_op->dtype);
break;
}
case OperatorType::kTopK_V2: {
// topk(values: T, k: int32) -> values: T, indices: int32
CHECK_EQ(op->inputs.size(), 2);
CHECK_EQ(op->outputs.size(), 2);
CHECK(model->GetArray(op->inputs[1]).data_type == ArrayDataType::kInt32);
model->GetArray(op->outputs[0]).data_type = model->GetArray(op->inputs[0]).data_type;
model->GetArray(op->outputs[1]).data_type = ArrayDataType ::kInt32;
break;
}
case OperatorType::kTensorFlowUnsupported: {
auto* unsupported_op = static_cast<TensorFlowUnsupportedOperator*>(op);
// Some output tensors from the op could be eliminated by optimization.

View File

@ -1087,8 +1087,8 @@ void ProcessGatherOperator(Model* model, GatherOperator* op) {
void ProcessTopkV2Operator(Model* model, TopKV2Operator* op) {
const auto& input_values = model->GetArray(op->inputs[0]);
const auto& input_k = model->GetArray(op->inputs[1]);
auto& output_indexes = model->GetArray(op->outputs[0]);
auto& output_values = model->GetArray(op->outputs[1]);
auto& output_values = model->GetArray(op->outputs[0]);
auto& output_indexes = model->GetArray(op->outputs[1]);
// Bail if we already know the output shape.
if (output_indexes.has_shape()) {

View File

@ -1991,7 +1991,7 @@ void ConvertTopKV2Operator(const NodeDef& node,
op->inputs.push_back(node.input(1));
}
// The op has two outputs.
op->outputs.push_back(node.name() + ":0");
op->outputs.push_back(node.name());
op->outputs.push_back(node.name() + ":1");
model->operators.emplace_back(op.release());
}

View File

@ -825,11 +825,6 @@ void FixNoOrphanedArray(Model* model) {
void CheckEachArray(const Model& model) {
for (const auto& array_entry : model.GetArrayMap()) {
const auto& array = array_entry.second;
if (array->has_shape()) {
for (int d : array->shape().dims()) {
CHECK_GE(d, 1);
}
}
// It's OK to have a buffer or an alloc, but not both.
// (Since allocs are for transient arrays without a buffer).
CHECK(!array->buffer || !array->alloc);
@ -839,6 +834,10 @@ void CheckEachArray(const Model& model) {
// The presence of a fixed buffer should imply the presence of a fixed
// shape.
CHECK(array->has_shape());
// Constant buffer should has a valid shape.
for (int d : array->shape().dims()) {
CHECK_GE(d, 1);
}
// The shape flat-size should agree with the buffer length.
CHECK_EQ(array->buffer->Length(),
RequiredBufferSizeForShape(array->shape()));

View File

@ -22,6 +22,7 @@ limitations under the License.
#include <string>
#include <vector>
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/lib/strings/str_util.h"
// Skip MPI C++ bindings support, this matches the usage in other places

View File

@ -56,21 +56,21 @@ class LazyAdamOptimizer(adam.AdamOptimizer):
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m := beta1 * m + (1 - beta1) * g_t
# \\(m := beta1 * m + (1 - beta1) * g_t\\)
m = self.get_slot(var, "m")
m_t = state_ops.scatter_update(m, grad.indices,
beta1_t * array_ops.gather(m, grad.indices) +
(1 - beta1_t) * grad.values,
use_locking=self._use_locking)
# v := beta2 * v + (1 - beta2) * (g_t * g_t)
# \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\)
v = self.get_slot(var, "v")
v_t = state_ops.scatter_update(v, grad.indices,
beta2_t * array_ops.gather(v, grad.indices) +
(1 - beta2_t) * math_ops.square(grad.values),
use_locking=self._use_locking)
# variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))
# \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\)
m_t_slice = array_ops.gather(m_t, grad.indices)
v_t_slice = array_ops.gather(v_t, grad.indices)
denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t

View File

@ -40,23 +40,19 @@ class AdamOptimizer(optimizer_v2.OptimizerV2):
Initialization:
```
m_0 <- 0 (Initialize initial 1st moment vector)
v_0 <- 0 (Initialize initial 2nd moment vector)
t <- 0 (Initialize timestep)
```
$$m_0 := 0 (Initialize initial 1st moment vector)$$
$$v_0 := 0 (Initialize initial 2nd moment vector)$$
$$t := 0 (Initialize timestep)$$
The update rule for `variable` with gradient `g` uses an optimization
described at the end of section2 of the paper:
```
t <- t + 1
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
$$t := t + 1$$
$$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
m_t <- beta1 * m_{t-1} + (1 - beta1) * g
v_t <- beta2 * v_{t-1} + (1 - beta2) * g * g
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
```
$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
$$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
The default value of 1e-8 for epsilon might not be a good default in
general. For example, when training an Inception network on ImageNet a

View File

@ -307,6 +307,21 @@ class LSTMTest(test.TestCase):
self._seed = 23489
np.random.seed(self._seed)
def testDType(self):
# Test case for GitHub issue 16228
# Not passing dtype in constructor results in default float32
lstm = rnn_cell.LSTMCell(10)
input_tensor = array_ops.ones([10, 50])
lstm.build(input_tensor.get_shape())
self.assertEqual(lstm._bias.dtype, dtypes.float32_ref)
# Explicitly pass dtype in constructor
for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
lstm = rnn_cell.LSTMCell(10, dtype=dtype)
input_tensor = array_ops.ones([10, 50])
lstm.build(input_tensor.get_shape())
self.assertEqual(lstm._bias.dtype, dtype._as_ref)
def testNoProjNoSharding(self):
num_units = 3
input_size = 5

View File

@ -37,7 +37,7 @@ def _top_k_generator(k):
def _top_k(probabilities, targets):
targets = math_ops.to_int32(targets)
if targets.get_shape().ndims > 1:
targets = array_ops.squeeze(targets, squeeze_dims=[1])
targets = array_ops.squeeze(targets, axis=[1])
return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k))
return _top_k
@ -57,7 +57,7 @@ def _r2(probabilities, targets, weights=None):
def _squeeze_and_onehot(targets, depth):
targets = array_ops.squeeze(targets, squeeze_dims=[1])
targets = array_ops.squeeze(targets, axis=[1])
return array_ops.one_hot(math_ops.to_int32(targets), depth)

View File

@ -55,7 +55,7 @@ class ManyToOneLayer(hybrid_layer.HybridLayer):
# There is always one activation per instance by definition, so squeeze
# away the extra dimension.
return array_ops.squeeze(nn_activations, squeeze_dims=[1])
return array_ops.squeeze(nn_activations, axis=[1])
class FlattenedFullyConnectedLayer(hybrid_layer.HybridLayer):

View File

@ -445,7 +445,7 @@ class RandomForestGraphs(object):
mask = math_ops.less(
r, array_ops.ones_like(r) * self.params.bagging_fraction)
gather_indices = array_ops.squeeze(
array_ops.where(mask), squeeze_dims=[1])
array_ops.where(mask), axis=[1])
# TODO(thomaswc): Calculate out-of-bag data and labels, and store
# them for use in calculating statistics later.
tree_data = array_ops.gather(processed_dense_features, gather_indices)

View File

@ -111,20 +111,22 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph,
}
}
std::pair<string, int> ParseTensorName(string name, int default_idx = 0) {
std::pair<string, int> ParseTensorName(const string& name,
int default_idx = 0) {
string name_no_idx = name;
int idx = default_idx;
size_t sep = name.find_last_of(':');
const size_t sep = name_no_idx.find_last_of(':');
if (sep != string::npos) {
name = name.substr(0, sep);
name_no_idx = name_no_idx.substr(0, sep);
idx = std::stoi(name.substr(sep + 1));
}
return std::make_pair(name, idx);
return std::make_pair(name_no_idx, idx);
}
std::unordered_map<string, std::vector<int>> BuildTensorNameMap(
const std::vector<string>& tensor_names) {
std::unordered_map<string, std::vector<int>> result;
for (string const& tensor_name : tensor_names) {
for (const string& tensor_name : tensor_names) {
string node_name;
int index;
std::tie(node_name, index) = ParseTensorName(tensor_name);
@ -132,6 +134,7 @@ std::unordered_map<string, std::vector<int>> BuildTensorNameMap(
}
return result;
}
// TODO(sami): convert references to pointers
struct ConvertGraphParams {
ConvertGraphParams(

View File

@ -78,7 +78,7 @@ class StubTimeSeriesModel(model.TimeSeriesModel):
batch_end_values = array_ops.squeeze(
array_ops.slice(values, [0, array_ops.shape(times)[1] - 1, 0],
[-1, 1, -1]),
squeeze_dims=[1, 2])
axis=[1, 2])
# A pretty odd but easy to think about loss: L1 loss on the batch end
# values.
loss = math_ops.reduce_sum(

View File

@ -170,7 +170,7 @@ class KalmanFilter(object):
math_ops.matmul(
transition_matrices,
prior_state[..., None]),
squeeze_dims=[-1])
axis=[-1])
return advanced_state
def predict_state_var(
@ -254,7 +254,7 @@ class KalmanFilter(object):
kalman_gain_transposed,
array_ops.expand_dims(residual, -1),
adjoint_a=True),
squeeze_dims=[-1])
axis=[-1])
gain_obs = math_ops.matmul(
kalman_gain_transposed, observation_model, adjoint_a=True)
identity_extradim = linalg_ops.eye(
@ -332,7 +332,7 @@ class KalmanFilter(object):
array_ops.expand_dims(state_mean, 1),
observation_model,
adjoint_b=True),
squeeze_dims=[1])
axis=[1])
observed_var = math_ops.matmul(
math_ops.matmul(observation_model, state_var),
observation_model,

View File

@ -2292,7 +2292,9 @@ tf_cuda_library(
CORE_CPU_BASE_HDRS = GRAPH_HDRS + [
"common_runtime/device.h",
"common_runtime/device_factory.h",
"common_runtime/device_mgr.h",
"common_runtime/device_set.h",
"common_runtime/eval_const_tensor.h",
"common_runtime/graph_runner.h",
"common_runtime/shape_refiner.h",
@ -2350,9 +2352,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [
"common_runtime/copy_tensor.h",
"common_runtime/costmodel_manager.h",
"common_runtime/debugger_state_interface.h",
"common_runtime/device_factory.h",
"common_runtime/device_resolver_local.h",
"common_runtime/device_set.h",
"common_runtime/dma_helper.h",
"common_runtime/eigen_thread_pool.h",
"common_runtime/executor.h",

View File

@ -82,9 +82,9 @@ END
}
summary: "Update \'*var\' according to the Adam algorithm."
description: <<END
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
$$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
$$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
END
}

View File

@ -24,5 +24,6 @@ pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
[0, 0, 2, 2, 0, 0]
[0, 0, 0, 0, 0, 0]]
```
END
}

View File

@ -44,6 +44,7 @@ In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
if T == qint8, out[i] -= (range(T) + 1) / 2.0
```
here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
*MIN_COMBINED Mode Example*
@ -87,6 +88,7 @@ choosing to elide the lowest possible value for symmetry (e.g., output range is
We first find the range of values in our tensor. The
range we use is always centered on 0, so we find m such that
```c++
m = max(abs(input_min), abs(input_max))
```
@ -95,6 +97,7 @@ Our input tensor range is then `[-m, m]`.
Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
If T is signed, this is
```
num_bits = sizeof(T) * 8
[min_fixed, max_fixed] =
@ -102,16 +105,19 @@ If T is signed, this is
```
Otherwise, if T is unsigned, the fixed-point range is
```
[min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
```
From this we compute our scaling factor, s:
```c++
s = (max_fixed - min_fixed) / (2 * m)
```
Now we can quantize the elements of our tensor:
```c++
result = round(input * s)
```

View File

@ -76,9 +76,9 @@ END
}
summary: "Update \'*var\' according to the Adam algorithm."
description: <<END
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
$$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
$$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
END
}

View File

@ -25,12 +25,12 @@ A new tensor with the given shape and updates applied according
to the indices.
END
}
summary: "Scatter `updates` into a new (initially zero) tensor according to `indices`."
summary: "Scatter `updates` into a new tensor according to `indices`."
description: <<END
Creates a new tensor by applying sparse `updates` to individual
values or slices within a zero tensor of the given `shape` according to
indices. This operator is the inverse of the @{tf.gather_nd} operator which
extracts values or slices from a given tensor.
Creates a new tensor by applying sparse `updates` to individual values or
slices within a tensor (initially zero for numeric, empty for string) of
the given `shape` according to indices. This operator is the inverse of the
@{tf.gather_nd} operator which extracts values or slices from a given tensor.
**WARNING**: The order in which updates are applied is nondeterministic, so the
output will be nondeterministic if `indices` contains duplicates.

View File

@ -490,7 +490,7 @@ Status GraphExecutionState::OptimizeGraph(
cpu_device = device;
}
}
grappler::VirtualCluster cluster(device_map);
grappler::VirtualCluster cluster(device_map, device_set_);
GraphDef new_graph;
TF_RETURN_IF_ERROR(grappler::RunMetaOptimizer(
item, rewrite_options, cpu_device, &cluster, &new_graph));

View File

@ -547,14 +547,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
// If Op has been specifically assigned to a non-CPU device, then No.
if (!n->assigned_device_name().empty() &&
!StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) {
!str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) {
result = false;
reason = "Op has been assigned a runtime device that is not CPU.";
}
// If user has specifically assigned this op to a non-CPU device, then No.
if (!n->def().device().empty() &&
!StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) {
!str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) {
result = false;
reason = "User has assigned a device that is not CPU.";
}
@ -2691,14 +2691,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
// If Op has been specifically assigned to a non-CPU device, then No.
if (!n->assigned_device_name().empty() &&
!StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) {
!str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) {
result = false;
reason = "Op has been assigned a runtime device that is not CPU.";
}
// If user has specifically assigned this op to a non-CPU device, then No.
if (!n->def().device().empty() &&
!StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) {
!str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) {
result = false;
reason = "User has assigned a device that is not CPU.";
}

View File

@ -56,6 +56,7 @@ cc_library(
],
visibility = ["//visibility:public"],
deps = [
"//tensorflow/core:core_cpu_base",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
@ -73,6 +74,7 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
":cluster",
"//tensorflow/core:core_cpu_base",
"//tensorflow/core:framework",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/grappler/costs:op_level_cost_estimator",

View File

@ -21,6 +21,7 @@ limitations under the License.
#include <utility>
#include <vector>
#include "tensorflow/core/common_runtime/device_set.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/grappler/grappler_item.h"
#include "tensorflow/core/lib/core/status.h"
@ -92,6 +93,10 @@ class Cluster {
// sorted alphabetically.
const std::vector<string> GetDeviceNames() const;
// The DeviceSet is not always available, but when it is it contains a
// superset of the devices listed in GetDevices/GetDeviceNames().
const DeviceSet* GetDeviceSet() const { return device_set_; }
// Enables collecting the allocator stats. Call with enable=true must be made
// before Provision().
virtual Status EnablePeakMemoryStats(bool enable) {
@ -119,6 +124,7 @@ class Cluster {
protected:
std::unordered_map<string, DeviceProperties> devices_;
const DeviceSet* device_set_ = nullptr; // Not owned
const int timeout_s_;
SessionOptions options_;
RunOptions run_options_;

View File

@ -37,6 +37,14 @@ VirtualCluster::VirtualCluster(
: Cluster(0), node_estimator_(node_estimator), node_manager_(node_manager) {
devices_ = devices;
}
VirtualCluster::VirtualCluster(
const std::unordered_map<string, DeviceProperties>& devices,
const DeviceSet* device_set)
: VirtualCluster(devices) {
device_set_ = device_set;
}
VirtualCluster::~VirtualCluster() {}
Status VirtualCluster::Provision() { return Status::OK(); }

View File

@ -17,6 +17,8 @@ limitations under the License.
#define TENSORFLOW_CORE_GRAPPLER_CLUSTERS_VIRTUAL_CLUSTER_H_
#include <unordered_map>
#include "tensorflow/core/common_runtime/device_set.h"
#include "tensorflow/core/grappler/clusters/cluster.h"
#include "tensorflow/core/grappler/costs/op_level_cost_estimator.h"
#include "tensorflow/core/grappler/costs/virtual_scheduler.h"
@ -34,6 +36,8 @@ class VirtualCluster : public Cluster {
VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices,
OpLevelCostEstimator* node_estimator,
ReadyNodeManager* node_manager);
VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices,
const DeviceSet* device_set);
~VirtualCluster() override;

View File

@ -199,7 +199,7 @@ class FirstReadyManager : public ReadyNodeManager {
// current node.
std::vector<const NodeDef*> nodes_;
// Newly added nodes are added to waiting_queue_. That way, GetCurrNode(),
// wihch returns the front of the nodes_, always returns the same node,
// which returns the front of the nodes_, always returns the same node,
// even if any of new nodes has time_ready smaller than the current node's.
std::vector<const NodeDef*> waiting_queue_;
// Comparator functor for heap; stl heap is max heap, so we use "greater than"
@ -212,7 +212,7 @@ class FirstReadyManager : public ReadyNodeManager {
};
// CompositeNodeManager has a few other NodeManagers: per-device LIFO for normal
// ops (neither _Send nor _Recv) and FirstyReadyManagers for _Send ops and _Recv
// ops (neither _Send nor _Recv) and FirstReadyManagers for _Send ops and _Recv
// ops, and then it chooses FirstReady among the ops chosen from each
// internal NodeManagers. The objective is to maximize producer-consumer
// locality within device, while processing nodes across devices, including

View File

@ -33,7 +33,7 @@ class CustomGraphOptimizerRegistry {
static std::vector<string> GetRegisteredOptimizers();
typedef std::function<CustomGraphOptimizer*()> Creator;
// Regsiter graph optimizer which can be called during program initialization.
// Register graph optimizer which can be called during program initialization.
// This class is not thread-safe.
static void RegisterOptimizerOrDie(const Creator& optimizer_creator,
const string& name);

View File

@ -160,13 +160,26 @@ Status MetaOptimizer::InitializeOptimizersByName(
VLOG(2) << "Can't register an optimizer by name: " << optimizer_name;
}
}
for (const auto& optimizer_config : cfg_.custom_optimizers()) {
auto custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull(
optimizer_config.name());
if (custom_optimizer) {
VLOG(2) << "Registered custom configurable graph optimizer: "
<< optimizer_config.name();
TF_RETURN_IF_ERROR(custom_optimizer->Init(&optimizer_config));
optimizers->push_back(std::move(custom_optimizer));
} else {
VLOG(2) << "Can't register an optimizer by name: "
<< optimizer_config.name();
}
}
return Status::OK();
}
Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
GraphDef* optimized_graph) {
std::vector<std::unique_ptr<GraphOptimizer>> optimizers;
if (cfg_.optimizers().empty()) {
if (cfg_.optimizers().empty() && cfg_.custom_optimizers().empty()) {
TF_RETURN_IF_ERROR(InitializeOptimizers(&optimizers));
} else {
TF_RETURN_IF_ERROR(InitializeOptimizersByName(&optimizers));
@ -337,7 +350,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
cfg.auto_parallel().enable() ||
cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
cfg.debug_stripper() == RewriterConfig::ON ||
!cfg.optimizers().empty();
!cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
}
Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg,

View File

@ -134,6 +134,8 @@ Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
switch (element.dtype()) {
TF_CALL_ALL_TYPES(HANDLE_TYPE);
TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
TF_CALL_uint32(HANDLE_TYPE);
TF_CALL_uint64(HANDLE_TYPE);
#undef HANDLE_TYPE
default:
return errors::Unimplemented("CopyElementToSlice Unhandled data type: ",

View File

@ -16,8 +16,8 @@ limitations under the License.
#include "tensorflow/core/kernels/cwise_ops_common.h"
namespace tensorflow {
REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
int16, int32, int64);
REGISTER6(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
int8, int16, int32, int64);
REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
Eigen::half, double);

View File

@ -20,6 +20,7 @@ limitations under the License.
#include <map>
#include <string>
#include <vector>
#include <memory>
#include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/op_kernel.h"
@ -42,14 +43,13 @@ limitations under the License.
#include "tensorflow/core/util/mkl_util.h"
#ifndef INTEL_MKL_ML
#include "mkldnn.hpp"
using mkldnn::prop_kind;
using mkldnn::stream;
using mkldnn::convolution_direct;
using mkldnn::convolution_forward;
using mkldnn::convolution_direct;
#else
#include "mkl_dnn.h"
#include "mkl_dnn_types.h"
@ -57,11 +57,232 @@ using mkldnn::convolution_forward;
namespace tensorflow {
#ifndef INTEL_MKL_ML
struct ConvFwdDimensions {
memory::dims src_dims;
memory::dims filter_dims;
memory::dims bias_dims;
memory::dims dst_dims;
memory::dims strides;
memory::dims dilations;
memory::dims padding_left;
memory::dims padding_right;
ConvFwdDimensions(memory::dims src_dims,
memory::dims filter_dims, memory::dims bias_dims,
memory::dims dst_dims, memory::dims strides,
memory::dims dilations, memory::dims padding_left,
memory::dims padding_right) :
src_dims(src_dims), filter_dims(filter_dims),
bias_dims(bias_dims), dst_dims(dst_dims),
strides(strides), dilations(dilations),
padding_left(padding_left), padding_right(padding_right) {
}
};
template <typename T>
class Conv2DFwd : public DnnOp {
public:
explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) {
fwd_stream_.reset(new stream(stream::kind::eager));
// create conv primitive
if (conv_fwd_ == nullptr) {
Setup(convFwdDims);
}
}
~Conv2DFwd() {}
// Convolution forward execute with bias
// src_data: input data buffer of src
// filter_data: input data buffer of filter (weights)
// bias_data: input data buffer of bias
// dst_data: output data buffer of dst
void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) {
src_mem_->set_data_handle(static_cast<void*>(src_data));
filter_mem_->set_data_handle(static_cast<void*>(filter_data));
bias_mem_->set_data_handle(static_cast<void*>(bias_data));
dst_mem_->set_data_handle(static_cast<void*>(dst_data));
fwd_stream_->submit(fwd_primitives_);
// after exec, set data handle back
src_mem_->set_data_handle(DummyData);
filter_mem_->set_data_handle(DummyData);
bias_mem_->set_data_handle(DummyData);
dst_mem_->set_data_handle(DummyData);
return;
}
// Convolution forward execute without bias
// src_data: input data buffer of src
// filter_data: input data buffer of filter (weights)
// dst_data: output data buffer of dst
void Execute(T* src_data, T* filter_data, T* dst_data) {
src_mem_->set_data_handle(static_cast<void*>(src_data));
filter_mem_->set_data_handle(static_cast<void*>(filter_data));
dst_mem_->set_data_handle(static_cast<void*>(dst_data));
fwd_stream_->submit(fwd_primitives_);
// after exec, set data handle back
src_mem_->set_data_handle(DummyData);
filter_mem_->set_data_handle(DummyData);
dst_mem_->set_data_handle(DummyData);
return;
}
// expected memory format for this primitive instance
memory::format src_fmt_;
memory::format filter_fmt_;
// convolution primitive
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> fwd_pd_;
std::shared_ptr<mkldnn::primitive> conv_fwd_;
private:
void Setup(const ConvFwdDimensions& convFwdDims) {
// create memory descriptors for convolution data w/ no specified format
src_md_.reset(new memory::desc({convFwdDims.src_dims},
MklDnnType<T>(), memory::format::any));
filter_md_.reset(new memory::desc({convFwdDims.filter_dims},
MklDnnType<T>(), memory::format::any));
dst_md_.reset(new memory::desc({convFwdDims.dst_dims},
MklDnnType<T>(), memory::format::any));
if (!convFwdDims.bias_dims.empty())
bias_md_.reset(new memory::desc({convFwdDims.bias_dims},
MklDnnType<T>(), memory::format::any));
// create a convolution
if (!convFwdDims.bias_dims.empty()) {
fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward,
convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_,
convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left,
convFwdDims.padding_right, padding_kind::zero));
} else {
fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward,
convolution_direct, *src_md_, *filter_md_, *dst_md_,
convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left,
convFwdDims.padding_right, padding_kind::zero));
}
fwd_pd_.reset(new convolution_forward::primitive_desc(
*fwd_desc_, cpu_engine_));
// store the expected memory format
src_fmt_ = static_cast<mkldnn::memory::format>(
fwd_pd_.get()->src_primitive_desc().desc().data.format);
filter_fmt_ = static_cast<mkldnn::memory::format>(
fwd_pd_.get()->weights_primitive_desc().desc().data.format);
// create memory primitive based on dummy data
src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData));
filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(),
DummyData));
dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData));
// create convolution primitive and add it to net
if (!convFwdDims.bias_dims.empty()) {
bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType<T>(),
memory::format::x}, cpu_engine_}, DummyData));
conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_,
*filter_mem_, *bias_mem_, *dst_mem_));
} else {
conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_,
*filter_mem_, *dst_mem_));
}
fwd_primitives_.push_back(*conv_fwd_);
return;
}
// MKLDNN memory
std::shared_ptr<mkldnn::memory> src_mem_;
std::shared_ptr<mkldnn::memory> filter_mem_;
std::shared_ptr<mkldnn::memory> bias_mem_;
std::shared_ptr<mkldnn::memory> dst_mem_;
std::shared_ptr<mkldnn::stream> fwd_stream_;
std::vector<mkldnn::primitive> fwd_primitives_;
// desc & prmitive desc
std::shared_ptr<mkldnn::convolution_forward::desc> fwd_desc_;
// memory desc
std::shared_ptr<mkldnn::memory::desc> src_md_;
std::shared_ptr<mkldnn::memory::desc> filter_md_;
std::shared_ptr<mkldnn::memory::desc> bias_md_;
std::shared_ptr<mkldnn::memory::desc> dst_md_;
engine cpu_engine_ = engine(engine::cpu, 0);
};
template <typename T>
class Conv2DFwdFactory : public DnnOpFactory<T> {
public:
static Conv2DFwd<T>* Get(const ConvFwdDimensions& convFwdDims) {
Conv2DFwd<T>* conv2d_fwd = nullptr;
// try to find a suitable one in pool
conv2d_fwd = dynamic_cast<Conv2DFwd<T>*> (
Conv2DFwdFactory<T>::GetInstance().GetConv2DFwd(convFwdDims));
if (conv2d_fwd == nullptr) {
conv2d_fwd = new Conv2DFwd<T>(convFwdDims);
Conv2DFwdFactory<T>::GetInstance().SetConv2DFwd(
convFwdDims, conv2d_fwd);
}
return conv2d_fwd;
}
private:
Conv2DFwdFactory() {}
~Conv2DFwdFactory() {}
static const int kDilationH = 0, kDilationW = 1;
static Conv2DFwdFactory& GetInstance() {
static Conv2DFwdFactory instance_;
return instance_;
}
static std::string CreateKey(const ConvFwdDimensions& convFwdDims) {
std::string prefix = "conv2d_fwd_";
FactoryKeyCreator key_creator;
key_creator.AddAsKey(prefix);
key_creator.AddAsKey(convFwdDims.src_dims);
key_creator.AddAsKey(convFwdDims.filter_dims);
key_creator.AddAsKey(convFwdDims.bias_dims);
key_creator.AddAsKey(convFwdDims.dst_dims);
key_creator.AddAsKey(convFwdDims.strides);
key_creator.AddAsKey(convFwdDims.dilations);
key_creator.AddAsKey(convFwdDims.padding_left);
key_creator.AddAsKey(convFwdDims.padding_right);
return key_creator.GetKey();
}
DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) {
std::string key = CreateKey(convFwdDims);
return this->GetOp(key);
}
void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) {
std::string key = CreateKey(convFwdDims);
this->SetOp(key, op);
}
};
#endif
typedef Eigen::ThreadPoolDevice CPUDevice;
// MKL-DNN is now default. MKL-ML must be specified explicitly.
// For now, MKL-ML is default. So making MKL-DNN not a default choice.
#ifdef INTEL_MKL_ML
template <typename Device, typename T, bool biasEnabled>
class MklConv2DOp : public OpKernel {
public:
@ -528,8 +749,6 @@ class MklConv2DOp : public OpKernel {
void Compute(OpKernelContext* context) override {
try {
auto cpu_engine = engine(engine::cpu, 0);
// Input tensors
const Tensor& src_tensor = MklGetInput(context, kInputIndex_Src);
const Tensor& filter_tensor = MklGetInput(context, kInputIndex_Filter);
@ -538,16 +757,16 @@ class MklConv2DOp : public OpKernel {
GetMklShape(context, kInputIndex_Src, &src_mkl_shape);
GetMklShape(context, kInputIndex_Filter, &filter_mkl_shape);
OP_REQUIRES(context, filter_mkl_shape.IsMklTensor() == false,
errors::InvalidArgument("Filter should not be in "
"Mkl Layout"));
errors::InvalidArgument("Filter should not be in "
"Mkl Layout"));
MklDnnData<T> src(&cpu_engine);
MklDnnData<T> filter(&cpu_engine);
MklDnnData<T> output(&cpu_engine);
MklDnnData<T> dst(&cpu_engine); // output
memory::dims src_dims, filter_dims, padding_l, padding_r,
memory::dims src_dims, filter_dims, padding_left, padding_right,
dilations, strides;
memory::dims output_dims_tf_order, output_dims_mkl_order;
memory::dims dst_dims_tf_order, dst_dims_mkl_order;
// Get shapes of input tensors in MKL-DNN order
MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_,
@ -555,31 +774,29 @@ class MklConv2DOp : public OpKernel {
auto src_tf_shape = GetTfShape(context, kInputIndex_Src);
auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter);
conv_utl.GetConvFwdSizesInMklOrder(
src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, &strides,
&dilations, &output_dims_tf_order, &output_dims_mkl_order,
&padding_l, &padding_r);
src_tf_shape, filter_tf_shape, &src_dims, &filter_dims,
&strides, &dilations, &dst_dims_tf_order, &dst_dims_mkl_order,
&padding_left, &padding_right);
if (!context->status().ok()) return;
// Check for corner case - if there is nothing to compute, return.
TensorShape output_tf_shape = MklDnnDimsToTFShape(output_dims_tf_order);
TensorShape dst_tf_shape = MklDnnDimsToTFShape(dst_dims_tf_order);
// Corner cases: output with 0 elements and 0 batch size.
Tensor* output_tensor = nullptr;
if (output_tf_shape.num_elements() == 0 || output_dims_tf_order[0] == 0) {
// TODO(jbobba): Verify correctness here
// Need semantics for Null MKL tensor
MklDnnShape output_mkl_shape;
output_mkl_shape.SetMklTensor(false);
AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor,
src_tf_shape, output_mkl_shape);
Tensor* dst_tensor = nullptr;
if (dst_tf_shape.num_elements() == 0 ||
dst_dims_tf_order[0] == 0) {
MklDnnShape dst_mkl_shape;
dst_mkl_shape.SetMklTensor(false);
AllocateOutputSetMklShape(context, kOutputIndex_Dst,
&dst_tensor, src_tf_shape, dst_mkl_shape);
// MklConv2D also outputs converted filter as 2nd output of Conv2D.
filter_mkl_shape.SetMklTensor(false);
Tensor* output_filter_tensor = nullptr;
AllocateOutputSetMklShape(context, kOutputIndex_Filter,
&output_filter_tensor, filter_tf_shape,
filter_mkl_shape);
&output_filter_tensor,
filter_tf_shape, filter_mkl_shape);
return;
}
@ -587,6 +804,7 @@ class MklConv2DOp : public OpKernel {
// Describe how the inputs and outputs of Convolution look like. Also
// specify buffers containing actual input and output data.
auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
// If input is in MKL layout, then simply grab input layout; otherwise,
// construct input Tf layout. For TF layout, although input shape
// (src_dims) required is in MKL-DNN order, the layout is Tensorflow's
@ -595,6 +813,7 @@ class MklConv2DOp : public OpKernel {
? src_mkl_shape.GetMklLayout()
: memory::desc(src_dims, MklDnnType<T>(), tf_fmt);
src.SetUsrMem(src_md, &src_tensor);
// Although filter shape (filter_dims) required is in MKL-DNN order,
// the layout is Tensorflow's layout (HWIO).
auto filter_md = filter_mkl_shape.IsMklTensor() // Should NEVER be true
@ -603,98 +822,70 @@ class MklConv2DOp : public OpKernel {
memory::format::hwio);
filter.SetUsrMem(filter_md, &filter_tensor);
// Set output shape (output_dims) required in MKL-DNN order.
// Currently, we set output layout as Tensorflow's layout (NHWC or NCHW
// depending on data format). But later we propagate Mkl layout of the
// output to the next op directly.
output.SetUsrMem(output_dims_mkl_order, tf_fmt);
// Create memory descriptors for convolution data w/ no specified format.
src.SetOpMemDesc(src_dims, memory::format::any);
filter.SetOpMemDesc(filter_dims, memory::format::any);
output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
// MKLDNN dilation starts from 0.
dilations[kDilationH] -= 1;
dilations[kDilationW] -= 1;
// get a conv2d fwd from primitive pool
Conv2DFwd<T> *conv2d_fwd = nullptr;
if (biasEnabled) {
// Create convolution primitive with Bias.
MklDnnData<T> bias(&cpu_engine);
memory::dims bias_size;
conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size);
const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias);
bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor);
bias.SetOpMemDesc(bias_size, memory::format::any);
// Create convolution primitive with Bias.
// Use MKLDNN dilated convolution in case of dilated rate (>0).
auto conv_desc = (dilations[kDilationH] > 0 ||
dilations[kDilationW] > 0) ?
convolution_forward::desc(prop_kind::forward,
convolution_direct, src.GetOpMemDesc(),
filter.GetOpMemDesc(), bias.GetOpMemDesc(),
output.GetOpMemDesc(), strides, dilations,
padding_l, padding_r,
TFPaddingToMklDnnPadding(padding_)):
convolution_forward::desc(prop_kind::forward,
convolution_direct, src.GetOpMemDesc(),
filter.GetOpMemDesc(), bias.GetOpMemDesc(),
output.GetOpMemDesc(), strides,
padding_l, padding_r,
TFPaddingToMklDnnPadding(padding_));
auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
cpu_engine);
AllocateOutputTensor(context, conv_prim_desc,
output_dims_mkl_order, tf_fmt, &output_tensor);
// Set data handle for output.
output.SetUsrMemDataHandle(output_tensor);
Tensor* filter_out_tensor = nullptr;
AllocateFilterOutputTensor(context, conv_prim_desc,
TFShapeToMklDnnDims(filter_tf_shape),
&filter_out_tensor);
PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output,
filter_out_tensor);
memory::dims bias_dims = {};
conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims);
ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims,
dst_dims_mkl_order, strides, dilations, padding_left, padding_right);
conv2d_fwd = Conv2DFwdFactory<T>::Get(convFwdDims);
} else {
// Create convolution primitive without Bias.
// Use MKLDNN dilated convolution in case of dilated rate (>0).
auto conv_desc = (dilations[kDilationH] > 0 ||
dilations[kDilationW] > 0) ?
convolution_forward::desc(prop_kind::forward,
convolution_direct, src.GetOpMemDesc(),
filter.GetOpMemDesc(), output.GetOpMemDesc(),
strides, dilations, padding_l, padding_r,
TFPaddingToMklDnnPadding(padding_)):
convolution_forward::desc(prop_kind::forward,
convolution_direct, src.GetOpMemDesc(),
filter.GetOpMemDesc(), output.GetOpMemDesc(),
strides, padding_l, padding_r,
TFPaddingToMklDnnPadding(padding_));
auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
cpu_engine);
AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order,
tf_fmt, &output_tensor);
// Set data handle for output.
output.SetUsrMemDataHandle(output_tensor);
Tensor* filter_out_tensor = nullptr;
AllocateFilterOutputTensor(context, conv_prim_desc,
TFShapeToMklDnnDims(filter_tf_shape),
&filter_out_tensor);
PrepareAndExecuteNet(conv_prim_desc, &src, &filter,
nullptr, &output, filter_out_tensor);
ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS,
dst_dims_mkl_order, strides, dilations, padding_left, padding_right);
conv2d_fwd = Conv2DFwdFactory<T>::Get(convFwdDims);
}
} catch (mkldnn::error& e) {
// allocate output tensors output_tensor and filter_out_tensor
std::shared_ptr<mkldnn::convolution_forward::primitive_desc>
conv_fwd_pd = conv2d_fwd->fwd_pd_;
AllocateOutputTensor(context, *conv_fwd_pd,
dst_dims_mkl_order, tf_fmt, &dst_tensor);
Tensor* filter_out_tensor = nullptr;
AllocateFilterOutputTensor(context, *conv_fwd_pd,
TFShapeToMklDnnDims(filter_tf_shape),
&filter_out_tensor);
T* dst_data = static_cast<T*>(dst_tensor->flat<T>().data());
// check whether src/filter need reorder
std::vector<primitive> net;
if (src_md.data.format != conv2d_fwd->src_fmt_)
src.CheckReorderToOpMem(
conv_fwd_pd.get()->src_primitive_desc(), &net);
if (filter_md.data.format != conv2d_fwd->filter_fmt_)
filter.CheckReorderToOpMem(
conv_fwd_pd.get()->weights_primitive_desc(),
filter.GetTensorBuffer(filter_out_tensor), &net);
stream(stream::kind::eager).submit(net).wait();
T* src_data = static_cast<T*>(
src.GetOpMem().get_data_handle());
T* filter_data = static_cast<T*>(
filter.GetOpMem().get_data_handle());
// execute convolution
if (biasEnabled) {
const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias);
T* bias_data = static_cast<T*>(const_cast<T*>(
bias_tensor.flat<T>().data()));
conv2d_fwd->Execute(src_data, filter_data, bias_data, dst_data);
} else {
conv2d_fwd->Execute(src_data, filter_data, dst_data);
}
} catch (mkldnn::error &e) {
string error_msg = "Status: " + std::to_string(e.status) +
", message: " + std::string(e.message) + ", in file " +
std::string(__FILE__) + ":" + std::to_string(__LINE__);
OP_REQUIRES_OK(
context,
errors::Aborted("Operation received an exception:", error_msg));
", message: " + std::string(e.message) +
", in file " + std::string(__FILE__) + ":" +
std::to_string(__LINE__);
OP_REQUIRES_OK(context,
errors::Aborted("Operation received an exception:", error_msg));
}
}
@ -706,6 +897,7 @@ class MklConv2DOp : public OpKernel {
const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2;
const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1;
const int kDilationH = 0, kDilationW = 1;
engine cpu_engine = engine(engine::cpu, 0);
// Allocate output tensor.
void AllocateOutputTensor(

View File

@ -241,6 +241,7 @@ class ScatterNdUpdateOp : public OpKernel {
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_ADD_SUB_CPU);
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_UPDATE_CPU);
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_CPU);
TF_CALL_string(REGISTER_SCATTER_ND_CPU);
// Registers GPU kernels.
#if GOOGLE_CUDA

View File

@ -160,6 +160,7 @@ struct ScatterNdFunctor<CPUDevice, T, Index, OP, IXDIM> {
REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB);
TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE);
REGISTER_SCATTER_ND_INDEX(string, scatter_nd_op::UpdateOp::ADD);
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_MATH)
#undef REGISTER_SCATTER_ND_MATH

View File

@ -16,35 +16,6 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
#define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"
// Unfortunately we can't add the #include, since it breaks compilation for
// non-GPU targets. This only breaks in clang, because it's more strict for
// template code and CudaAtomicMax is used in template context.
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"
// Unfortunately we can't add the #include, since it breaks compilation for
// non-GPU targets. This only breaks in clang, because it's more strict for
// template code and CudaAtomicMax is used in template context.
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"
// Unfortunately we can't add the #include, since it breaks compilation for
// non-GPU targets. This only breaks in clang, because it's more strict for
// template code and CudaAtomicMax is used in template context.
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"
// Unfortunately we can't add the #include, since it breaks compilation for
// non-GPU targets. This only breaks in clang, because it's more strict for
// template code and CudaAtomicMax is used in template context.
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"

View File

@ -23,7 +23,7 @@ limitations under the License.
#if defined(WIN32)
#include "extras/CUPTI/include/cupti.h"
#else
#include "cuda/extras/CUPTI/include/cupti.h"
#include "cupti.h"
#endif
namespace perftools {
namespace gputools {

View File

@ -24,7 +24,7 @@ limitations under the License.
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1")
#define TF_VERSION_SUFFIX "-rc0"
#define TF_VERSION_SUFFIX "-rc1"
#define TF_STR_HELPER(x) #x
#define TF_STR(x) TF_STR_HELPER(x)

View File

@ -19,6 +19,8 @@ limitations under the License.
#include <string>
#include <vector>
#include <unordered_map>
#include <utility>
#include "mkl_dnn.h"
#include "mkl_dnn_types.h"
@ -1759,7 +1761,90 @@ class MklDnnData {
}
};
#endif // INTEL_MKL_ML
/// Base class for operations with reuse of DNN primitives
///
class DnnOp {
public:
virtual ~DnnOp() {}
// Dummy data. Its size, hard-coded as 256 here, does
// not matter since MKL should never operate on this buffer.
unsigned char DummyData[256];
};
const mkldnn::memory::dims NONE_DIMS = {};
// This constant is used to declare dummy buffer (size), for MKL primitives
template <typename T>
class DnnOpFactory {
public:
DnnOpFactory() {}
~DnnOpFactory() {}
DnnOp* GetOp(const std::string& key) {
auto stream_iter = DnnOpFactory<T>::GetHashMap().find(key);
if (stream_iter == DnnOpFactory<T>::GetHashMap().end()) {
return nullptr;
} else {
return stream_iter->second;
}
}
void SetOp(const std::string& key, DnnOp* op) {
auto stream_iter = DnnOpFactory<T>::GetHashMap().find(key);
CHECK(stream_iter == DnnOpFactory<T>::GetHashMap().end());
DnnOpFactory<T>::GetHashMap()[key] = op;
}
private:
static inline std::unordered_map<std::string, DnnOp*> &GetHashMap() {
static thread_local std::unordered_map<std::string, DnnOp*> map_;
return map_;
}
};
// utility class for creating keys of MKL primitive pool.
class FactoryKeyCreator {
public:
FactoryKeyCreator() {
key_.reserve(kMaxKeyLength);
}
~FactoryKeyCreator() {}
void AddAsKey(const string &str) {
auto buffer = reinterpret_cast<const char *>(str.c_str());
Append(buffer, str.length());
}
void AddAsKey(const mkldnn::memory::dims &dims) {
for (unsigned int i = 0; i < dims.size(); i++) {
AddAsKey<int>(dims[i]);
}
}
template <typename T>
void AddAsKey(const T data) {
auto buffer = reinterpret_cast<const char *>(&data);
Append(buffer, sizeof(T));
}
std::string GetKey() {
return key_;
}
private:
string key_;
const char delimiter = 'x';
const int kMaxKeyLength = 256;
void Append(const char* data, int len) {
key_.append(data, len);
key_.append(1, delimiter);
}
};
#endif // INTEL_MKL_DNN
} // namespace tensorflow
#endif // INTEL_MKL

View File

@ -1,5 +1,5 @@
# Roadmap
**Last updated: Feb 15, 2018**
**Last updated: Apr 27, 2018**
TensorFlow is a rapidly moving, community supported project. This document is intended
to provide guidance about priorities and focus areas of the core set of TensorFlow
@ -14,12 +14,12 @@ expected in the next one to two releases.
### APIs
#### High Level APIs:
* Easy multi-GPU utilization with Estimators
* Easy multi-GPU and TPU utilization with Estimators
* Easy-to-use high-level pre-made estimators for Gradient Boosted Trees, Time Series, and other models
#### Eager Execution:
* Efficient utilization of multiple GPUs
* Distributed training (multi-machine)
* Distributed training support (multi-machine)
* Performance improvements
* Simpler export to a GraphDef/SavedModel
@ -31,14 +31,14 @@ to create Keras models Eager- style via Model subclassing)
#### Official Models:
* A set of
[reference models](https://github.com/tensorflow/models/tree/master/official)
[models](https://github.com/tensorflow/models/tree/master/official)
across image recognition, speech, object detection, and
translation that demonstrate best practices and serve as a starting point for
high-performance model development.
#### Contrib:
* Deprecation notices added to parts of tf.contrib where preferred implementations exist outside of tf.contrib.
* As much as possible, large projects inside tf.contrib moved to separate repositories.
* Deprecate parts of tf.contrib where preferred implementations exist outside of tf.contrib.
* As much as possible, move large projects inside tf.contrib to separate repositories.
* The tf.contrib module will eventually be discontinued in its current form, experimental development will in future happen in other repositories.
@ -50,36 +50,72 @@ across image recognition, speech, object detection, and
### Platforms
#### TensorFlow Lite:
* Increased coverage of supported ops in TensorFlow Lite
* Increase coverage of supported ops in TensorFlow Lite
* Easier conversion of a trained TensorFlow graph for use on TensorFlow Lite
* Support for GPU acceleration in TensorFlow Lite (iOS and Android)
* Support for hardware accelerators via Android NeuralNets API
* Improved CPU performance by quantization and other network optimizations (eg. pruning, distillation)
* Increased support for devices beyond Android and iOS (eg. RPi, Cortex-M)
* Improve CPU performance by quantization and other network optimizations (eg. pruning, distillation)
* Increase support for devices beyond Android and iOS (eg. RPi, Cortex-M)
#### TensorFlow.js:
* Release package for Node.js bindings to the TensorFlow C API through the TensorFlow.js backend interface
* Expand support for importing TensorFlow SavedModels and Keras models into browser with unified APIs supporting retraining in browser
* Improve Layers API and allow model exporting/saving
* Release tfjs-data API for efficient data input pipelines
#### TensorFlow with Swift:
* Establish open source project including documentation, open design, and code availability.
* Continue implementing and refining implementation and design through 2018.
* Aim for implementation to be solid enough for general use later in 2018.
### Performance
#### Distributed TensorFlow:
* Multi-GPU support optimized for a variety of GPU topologies
* Improved mechanisms for distributing computations on several machines
* Optimize Multi-GPU support for a variety of GPU topologies
* Improve mechanisms for distributing computations on several machines
#### Optimizations:
* Mixed precision training support with initial example model and guide
* Native TensorRT support
#### GPU Optimizations:
* Simplify mixed precision API with initial example model and guide.
* Finalize TensorRT API and move to core.
* CUDA 9.2 and NCCL 2.x default in TensorFlow builds.
* Optimizations for DGX-2.
* Remove support for CUDA less than 8.x and cuDNN less than 6.x.
#### CPU Optimizations
* Int8 support for SkyLake via MKL
* Dynamic loading of SIMD-optimized kernels
* MKL for Linux and Windows
### End-to-end ML systems:
#### TensorFlow Hub:
* Expand support for module-types in TF Hub with TF Eager integration, Keras layers integration, and TensorFlow.js integration
* Accept variable-sized image input
* Improve multi-GPU estimator support
* Document and improve TPU integration
#### TensorFlow Extended:
* Open source more of the TensorFlow Extended platform to facilitate adoption of TensorFlow in production settings.
* Release TFX libraries for Data Validation
### Documentation and Resources:
* Update documentation, tutorials and Getting Started guides on all features and APIs
* Update [Youtube Tensorflow channel](https://youtube.com/tensorflow) weekly with new content:
Coding TensorFlow - where we teach folks coding with tensorflow
TensorFlow Meets - where we highlight community contributions
Ask TensorFlow - where we answer community questions
Guest and Showcase videos
* Update [Official TensorFlow blog](https://blog.tensorflow.org) with regular articles from Google team and the Community
### Documentation and Usability:
* Updated documentation, tutorials and Getting Started guides
* Process to enable external contributions to tutorials, documentation, and blogs showcasing best practice use-cases of TensorFlow and high-impact applications
### Community and Partner Engagement
#### Special Interest Groups:
* Mobilizing the community to work together in focused domains
* Mobilize the community to work together in focused domains
* [tf-distribute](https://groups.google.com/a/tensorflow.org/forum/#!forum/tf-distribute): build and packaging of TensorFlow
* More to be identified and launched
* SIG TensorBoard, SIG Rust, and more to be identified and launched
#### Community:
* Incorporate public feedback on significant design decisions via a Request-for-Comment (RFC) process
* Formalize process for external contributions to land in TensorFlow and associated projects
* Grow global TensorFlow communities and user groups
* Collaborate with partners to co-develop and publish research papers
* Process to enable external contributions to tutorials, documentation, and blogs showcasing best practice use-cases of TensorFlow and high-impact applications

View File

@ -38,8 +38,10 @@ Estimators automatically write the following to disk:
uses to create visualizations.
To specify the top-level directory in which the Estimator stores its
information, assign a value to the optional `model_dir` argument of any
Estimator's constructor. For example, the following code sets the `model_dir`
information, assign a value to the optional `model_dir` argument of *any*
`Estimator`'s constructor.
Taking `DNNClassifier` as an example,
the following code sets the `model_dir`
argument to the `models/iris` directory:
```python

View File

@ -138,7 +138,7 @@ The model will represent the buckets as follows:
|< 1960 | [1, 0, 0, 0] |
|>= 1960 but < 1980 | [0, 1, 0, 0] |
|>= 1980 but < 2000 | [0, 0, 1, 0] |
|> 2000 | [0, 0, 0, 1] |
|>= 2000 | [0, 0, 0, 1] |
Why would you want to split a number—a perfectly valid input to your
model—into a categorical value? Well, notice that the categorization splits a

View File

@ -10,7 +10,7 @@ course prior to diving into TensorFlow documentation:
TensorFlow is a tool for machine learning. While it contains a wide range of
functionality, TensorFlow is mainly designed for deep neural network models.
The easiest way to get started with tensorflow is using Eager Execution.
The easiest way to get started with TensorFlow is using Eager Execution.
* @{$get_started/eager}, is for anyone new to machine learning or TensorFlow.

View File

@ -38,7 +38,7 @@ enable TensorFlow for C:
OS="linux" # Change to "darwin" for macOS
TARGET_DIRECTORY="/usr/local"
curl -L \
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc0.tar.gz" |
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" |
sudo tar -C $TARGET_DIRECTORY -xz
The `tar` command extracts the TensorFlow C library into the `lib`

View File

@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go:
TF_TYPE="cpu" # Change to "gpu" for GPU support
TARGET_DIRECTORY='/usr/local'
curl -L \
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc0.tar.gz" |
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc1.tar.gz" |
sudo tar -C $TARGET_DIRECTORY -xz
The `tar` command extracts the TensorFlow C library into the `lib`

View File

@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs:
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>tensorflow</artifactId>
<version>1.8.0-rc0</version>
<version>1.8.0-rc1</version>
</dependency>
```
@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow:
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>tensorflow</artifactId>
<version>1.8.0-rc0</version>
<version>1.8.0-rc1</version>
</dependency>
</dependencies>
</project>
@ -124,12 +124,12 @@ instead:
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>libtensorflow</artifactId>
<version>1.8.0-rc0</version>
<version>1.8.0-rc1</version>
</dependency>
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>libtensorflow_jni_gpu</artifactId>
<version>1.8.0-rc0</version>
<version>1.8.0-rc1</version>
</dependency>
```
@ -148,7 +148,7 @@ refer to the simpler instructions above instead.
Take the following steps to install TensorFlow for Java on Linux or macOS:
1. Download
[libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc0.jar),
[libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar),
which is the TensorFlow Java Archive (JAR).
2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
mkdir -p ./jni
curl -L \
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc0.tar.gz" |
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" |
tar -xz -C ./jni
### Install on Windows
@ -175,10 +175,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
Take the following steps to install TensorFlow for Java on Windows:
1. Download
[libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc0.jar),
[libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar),
which is the TensorFlow Java Archive (JAR).
2. Download the following Java Native Interface (JNI) file appropriate for
[TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc0.zip).
[TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc1.zip).
3. Extract this .zip file.
@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the
downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
as follows:
<pre><b>javac -cp libtensorflow-1.8.0-rc0.jar HelloTF.java</b></pre>
<pre><b>javac -cp libtensorflow-1.8.0-rc1.jar HelloTF.java</b></pre>
### Running
@ -241,11 +241,11 @@ two files are available to the JVM:
For example, the following command line executes the `HelloTF` program on Linux
and macOS X:
<pre><b>java -cp libtensorflow-1.8.0-rc0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
<pre><b>java -cp libtensorflow-1.8.0-rc1.jar:. -Djava.library.path=./jni HelloTF</b></pre>
And the following command line executes the `HelloTF` program on Windows:
<pre><b>java -cp libtensorflow-1.8.0-rc0.jar;. -Djava.library.path=jni HelloTF</b></pre>
<pre><b>java -cp libtensorflow-1.8.0-rc1.jar;. -Djava.library.path=jni HelloTF</b></pre>
If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
installed TensorFlow for Java and are ready to use the API. If the program

View File

@ -1,139 +1,266 @@
# Installing TensorFlow on Ubuntu
This guide explains how to install TensorFlow on Ubuntu. Although these
instructions might also work on other Linux variants, we have only
tested (and we only support) these instructions on machines meeting the
following requirements:
This guide explains how to install TensorFlow on Ubuntu Linux. While these
instructions may work on other Linux variants, they are tested and supported with
the following system requirements:
* 64-bit desktops or laptops
* Ubuntu 16.04 or higher
* 64-bit desktops or laptops
* Ubuntu 16.04 or higher
## Determine which TensorFlow to install
## Choose which TensorFlow to install
You must choose one of the following types of TensorFlow to install:
The following TensorFlow variants are available for installation:
* **TensorFlow with CPU support only**. If your system does not have a
NVIDIA® GPU, you must install this version. Note that this version of
TensorFlow is typically much easier to install (typically,
in 5 or 10 minutes), so even if you have an NVIDIA GPU, we recommend
installing this version first.
* **TensorFlow with GPU support**. TensorFlow programs typically run
significantly faster on a GPU than on a CPU. Therefore, if your
system has a NVIDIA® GPU meeting the prerequisites shown below and you
need to run performance-critical applications, you should ultimately
install this version.
<a name="NVIDIARequirements"></a>
### NVIDIA requirements to run TensorFlow with GPU support
If you are installing TensorFlow with GPU support using one of the
mechanisms described in this guide, then the following NVIDIA software
must be installed on your system:
* [CUDA Toolkit 9.0](http://nvidia.com/cuda). For details, see
[NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/).
Ensure that you append the relevant CUDA pathnames to the
`LD_LIBRARY_PATH` environment variable as described in the
NVIDIA documentation.
* [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see
[NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/).
Ensure that you create the `CUDA_HOME` environment variable as
described in the NVIDIA documentation.
* GPU card with CUDA Compute Capability 3.0 or higher for building
from source and 3.5 or higher for our binaries. See
[NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for
a list of supported GPU cards.
* [GPU drivers](http://nvidia.com/driver) supporting your version of the CUDA
Toolkit.
* The libcupti-dev library, which is the NVIDIA CUDA Profile Tools Interface.
This library provides advanced profiling support. To install this library,
issue the following command for CUDA Toolkit >= 8.0:
<pre>
$ <b>sudo apt-get install cuda-command-line-tools</b>
</pre>
and add its path to your `LD_LIBRARY_PATH` environment variable:
<pre>
$ <b>export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64</b>
</pre>
For CUDA Toolkit <= 7.5 do:
<pre>
$ <b>sudo apt-get install libcupti-dev</b>
</pre>
* **[OPTIONAL]** For optimized inferencing performance, you can also install
**NVIDIA TensorRT 3.0**. The minimal set of TensorRT runtime components needed
for use with the pre-built `tensorflow-gpu` package can be installed as follows:
<pre>
$ <b>wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</b>
$ <b>sudo dpkg -i nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</b>
$ <b>sudo apt-get update</b>
$ <b>sudo apt-get install -y --allow-downgrades libnvinfer-dev libcudnn7-dev=7.0.5.15-1+cuda9.0 libcudnn7=7.0.5.15-1+cuda9.0</b>
</pre>
**IMPORTANT:** For compatibility with the pre-built `tensorflow-gpu`
package, please use the Ubuntu **14.04** package of TensorRT as shown above,
even when installing onto an Ubuntu 16.04 system.<br/>
<br/>
To build the TensorFlow-TensorRT integration module from source rather than
using pre-built binaries, see the [module documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#using-tensorrt-in-tensorflow).
For detailed TensorRT installation instructions, see [NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html).<br/>
<br/>
To avoid cuDNN version conflicts during later system upgrades, you can hold
the cuDNN version at 7.0.5:
<pre>
$ <b> sudo apt-mark hold libcudnn7 libcudnn7-dev</b>
</pre>
To later allow upgrades, you can remove the hold:
<pre>
$ <b> sudo apt-mark unhold libcudnn7 libcudnn7-dev</b>
</pre>
If you have an earlier version of the preceding packages, please upgrade to
the specified versions. If upgrading is not possible, then you may still run
TensorFlow with GPU support, if you @{$install_sources$install TensorFlow from Sources}.
* __TensorFlow with CPU support only__. If your system does not have a
NVIDIA®&nbsp;GPU, you must install this version. This version of TensorFlow is
usually easier to install, so even if you have an NVIDIA GPU, we recommend
installing this version first.
* __TensorFlow with GPU support__. TensorFlow programs usually run much faster on
a GPU instead of a CPU. If you run performance-critical applications and your
system has an NVIDIA®&nbsp;GPU that meets the prerequisites, you should install
this version. See [TensorFlow GPU support](#NVIDIARequirements) for details.
## Determine how to install TensorFlow
## How to install TensorFlow
You must pick the mechanism by which you install TensorFlow. The
supported choices are as follows:
There are a few options to install TensorFlow on your machine:
* [Virtualenv](#InstallingVirtualenv)
* ["native" pip](#InstallingNativePip)
* [Docker](#InstallingDocker)
* [Anaconda](#InstallingAnaconda)
* installing from sources, which is documented in
[a separate guide](https://www.tensorflow.org/install/install_sources).
* [Use pip in a virtual environment](#InstallingVirtualenv) *(recommended)*
* [Use pip in your system environment](#InstallingNativePip)
* [Configure a Docker container](#InstallingDocker)
* [Use pip in Anaconda](#InstallingAnaconda)
* [Install TensorFlow from source](/install/install_sources)
**We recommend the Virtualenv installation.**
[Virtualenv](https://virtualenv.pypa.io/en/stable/)
is a virtual Python environment isolated from other Python development,
incapable of interfering with or being affected by other Python programs
on the same machine. During the Virtualenv installation process,
you will install not only TensorFlow but also all the packages that
TensorFlow requires. (This is actually pretty easy.)
To start working with TensorFlow, you simply need to "activate" the
virtual environment. All in all, Virtualenv provides a safe and
reliable mechanism for installing and running TensorFlow.
<a name="InstallingVirtualenv"></a>
### Use `pip` in a virtual environment
Native pip installs TensorFlow directly on your system without going
through any container system. **We recommend the native pip install for
system administrators aiming to make TensorFlow available to everyone on a
multi-user system.** Since a native pip installation is not walled-off in
a separate container, the pip installation might interfere with other
Python-based installations on your system. However, if you understand pip
and your Python environment, a native pip installation often entails only
a single command.
Key Point: Using a virtual environment is the recommended install method.
The [Virtualenv](https://virtualenv.pypa.io/en/stable/) tool creates virtual
Python environments that are isolated from other Python development on the same
machine. In this scenario, you install TensorFlow and its dependencies within a
virtual environment that is available when *activated*. Virtualenv provides a
reliable way to install and run TensorFlow while avoiding conflicts with the rest
of the system.
##### 1. Install Python, `pip`, and `virtualenv`.
On Ubuntu, Python is automatically installed and `pip` is *usually* installed.
Confirm the `python` and `pip` versions:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">python -V # or: python3 -V</code>
<code class="devsite-terminal">pip -V # or: pip3 -V</code>
</pre>
To install these packages on Ubuntu:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-get install python-pip python-dev python-virtualenv # for Python 2.7</code>
<code class="devsite-terminal">sudo apt-get install python3-pip python3-dev python-virtualenv # for Python 3.n</code>
</pre>
We *recommend* using `pip` version 8.1 or higher. If using a release before
version 8.1, upgrade `pip`:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo pip install -U pip</code>
</pre>
If not using Ubuntu and [setuptools](https://pypi.org/project/setuptools/) is
installed, use `easy_install` to install `pip`:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">easy_install -U pip</code>
</pre>
##### 2. Create a directory for the virtual environment and choose a Python interpreter.
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">mkdir ~/tensorflow # somewhere to work out of</code>
<code class="devsite-terminal">cd ~/tensorflow</code>
<code># Choose one of the following Python environments for the ./venv directory:</code>
<code class="devsite-terminal">virtualenv --system-site-packages <var>venv</var> # Use python default (Python 2.7)</code>
<code class="devsite-terminal">virtualenv --system-site-packages -p python3 <var>venv</var> # Use Python 3.n</code>
</pre>
##### 3. Activate the Virtualenv environment.
Use one of these shell-specific commands to activate the virtual environment:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">source ~/tensorflow/<var>venv</var>/bin/activate # bash, sh, ksh, or zsh</code>
<code class="devsite-terminal">source ~/tensorflow/<var>venv</var>/bin/activate.csh # csh or tcsh</code>
<code class="devsite-terminal">. ~/tensorflow/<var>venv</var>/bin/activate.fish # fish</code>
</pre>
When the Virtualenv is activated, the shell prompt displays as `(venv) $`.
##### 4. Upgrade `pip` in the virtual environment.
Within the active virtual environment, upgrade `pip`:
<pre class="prettyprint lang-bsh">
(venv)$ pip install -U pip
</pre>
You can install other Python packages within the virtual environment without
affecting packages outside the `virtualenv`.
##### 5. Install TensorFlow in the virtual environment.
Choose one of the available TensorFlow packages for installation:
* `tensorflow` —Current release for CPU
* `tensorflow-gpu` —Current release with GPU support
* `tf-nightly` —Nightly build for CPU
* `tf-nightly-gpu` —Nightly build with GPU support
Within an active Virtualenv environment, use `pip` to install the package:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">pip install -U tensorflow</code>
</pre>
Use `pip list` to show the packages installed in the virtual environment.
[Validate the install](#ValidateYourInstallation) and test the version:
<pre class="prettyprint lang-bsh">
(venv)$ python -c "import tensorflow as tf; print(tf.__version__)"
</pre>
Success: TensorFlow is now installed.
Use the `deactivate` command to stop the Python virtual environment.
#### Problems
If the above steps failed, try installing the TensorFlow binary using the remote
URL of the `pip` package:
<pre class="prettyprint lang-bsh">
(venv)$ pip install --upgrade <var>remote-pkg-URL</var> # Python 2.7
(venv)$ pip3 install --upgrade <var>remote-pkg-URL</var> # Python 3.n
</pre>
The <var>remote-pkg-URL</var> depends on the operating system, Python version,
and GPU support. See [here](#the_url_of_the_tensorflow_python_package) for the
URL naming scheme and location.
See [Common Installation Problems](#common_installation_problems) if you
encounter problems.
#### Uninstall TensorFlow
To uninstall TensorFlow, remove the Virtualenv directory you created in step 2:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">deactivate # stop the virtualenv</code>
<code class="devsite-terminal">rm -r ~/tensorflow/<var>venv</var></code>
</pre>
<a name="InstallingNativePip"></a>
### Use `pip` in your system environment
Use `pip` to install the TensorFlow package directly on your system without
using a container or virtual environment for isolation. This method is
recommended for system administrators that want a TensorFlow installation that is
available to everyone on a multi-user system.
Since a system install is not isolated, it could interfere with other
Python-based installations. But if you understand `pip` and your Python
environment, a system `pip` install is straightforward.
See the
[REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py)
for a list of packages that TensorFlow installs.
##### 1. Install Python, `pip`, and `virtualenv`.
On Ubuntu, Python is automatically installed and `pip` is *usually* installed.
Confirm the `python` and `pip` versions:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">python -V # or: python3 -V</code>
<code class="devsite-terminal">pip -V # or: pip3 -V</code>
</pre>
To install these packages on Ubuntu:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-get install python-pip python-dev # for Python 2.7</code>
<code class="devsite-terminal">sudo apt-get install python3-pip python3-dev # for Python 3.n</code>
</pre>
We *recommend* using `pip` version 8.1 or higher. If using a release before
version 8.1, upgrade `pip`:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo pip install -U pip</code>
</pre>
If not using Ubuntu and [setuptools](https://pypi.org/project/setuptools/) is
installed, use `easy_install` to install `pip`:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">easy_install -U pip</code>
</pre>
##### 2. Install TensorFlow on system.
Choose one of the available TensorFlow packages for installation:
* `tensorflow` —Current release for CPU
* `tensorflow-gpu` —Current release with GPU support
* `tf-nightly` —Nightly build for CPU
* `tf-nightly-gpu` —Nightly build with GPU support
And use `pip` to install the package for Python 2 or 3:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo pip install -U tensorflow # Python 2.7</code>
<code class="devsite-terminal">sudo pip3 install -U tensorflow # Python 3.n</code>
</pre>
Use `pip list` to show the packages installed on the system.
[Validate the install](#ValidateYourInstallation) and test the version:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">python -c "import tensorflow as tf; print(tf.__version__)"</code>
</pre>
Success: TensorFlow is now installed.
#### Problems
If the above steps failed, try installing the TensorFlow binary using the remote
URL of the `pip` package:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo pip install --upgrade <var>remote-pkg-URL</var> # Python 2.7</code>
<code class="devsite-terminal">sudo pip3 install --upgrade <var>remote-pkg-URL</var> # Python 3.n</code>
</pre>
The <var>remote-pkg-URL</var> depends on the operating system, Python version,
and GPU support. See [here](#the_url_of_the_tensorflow_python_package) for the
URL naming scheme and location.
See [Common Installation Problems](#common_installation_problems) if you
encounter problems.
#### Uninstall TensorFlow
To uninstall TensorFlow on your system, use one of following commands:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo pip uninstall tensorflow # for Python 2.7</code>
<code class="devsite-terminal">sudo pip3 uninstall tensorflow # for Python 3.n</code>
</pre>
<a name="InstallingDocker"></a>
### Configure a Docker container
Docker completely isolates the TensorFlow installation
from pre-existing packages on your machine. The Docker container contains
@ -142,210 +269,6 @@ large (hundreds of MBs). You might choose the Docker installation if you are
incorporating TensorFlow into a larger application architecture that already
uses Docker.
In Anaconda, you may use conda to create a virtual environment.
However, within Anaconda, we recommend installing TensorFlow with the
`pip install` command, not with the `conda install` command.
**NOTE:** The conda package is community supported, not officially supported.
That is, the TensorFlow team neither tests nor maintains the conda package.
Use that package at your own risk.
<a name="InstallingVirtualenv"></a>
## Installing with Virtualenv
Take the following steps to install TensorFlow with Virtualenv:
1. Install pip and Virtualenv by issuing one of the following commands:
<pre>$ <b>sudo apt-get install python-pip python-dev python-virtualenv</b> # for Python 2.7
$ <b>sudo apt-get install python3-pip python3-dev python-virtualenv</b> # for Python 3.n</pre>
2. Create a Virtualenv environment by issuing one of the following commands:
<pre>$ <b>virtualenv --system-site-packages</b> <i>targetDirectory</i> # for Python 2.7
$ <b>virtualenv --system-site-packages -p python3</b> <i>targetDirectory</i> # for Python 3.n</pre>
where <code><em>targetDirectory</em></code> specifies the top of the
Virtualenv tree. Our instructions assume that
<code><em>targetDirectory</em></code> is `~/tensorflow`, but you may
choose any directory.
3. Activate the Virtualenv environment by issuing one of the following
commands:
<pre>$ <b>source ~/tensorflow/bin/activate</b> # bash, sh, ksh, or zsh
$ <b>source ~/tensorflow/bin/activate.csh</b> # csh or tcsh
$ <b>. ~/tensorflow/bin/activate.fish</b> # fish</pre>
The preceding <tt>source</tt> command should change your prompt
to the following:
<pre>(tensorflow)$ </pre>
4. Ensure pip ≥8.1 is installed:
<pre>(tensorflow)$ <b>easy_install -U pip</b></pre>
5. Issue one of the following commands to install TensorFlow in the active
Virtualenv environment:
<pre>(tensorflow)$ <b>pip install --upgrade tensorflow</b> # for Python 2.7
(tensorflow)$ <b>pip3 install --upgrade tensorflow</b> # for Python 3.n
(tensorflow)$ <b>pip install --upgrade tensorflow-gpu</b> # for Python 2.7 and GPU
(tensorflow)$ <b>pip3 install --upgrade tensorflow-gpu</b> # for Python 3.n and GPU</pre>
If the above command succeeds, skip Step 6. If the preceding
command fails, perform Step 6.
6. (Optional) If Step 5 failed (typically because you invoked a pip version
lower than 8.1), install TensorFlow in the active Virtualenv environment
by issuing a command of the following format:
<pre>(tensorflow)$ <b>pip install --upgrade</b> <i>tfBinaryURL</i> # Python 2.7
(tensorflow)$ <b>pip3 install --upgrade</b> <i>tfBinaryURL</i> # Python 3.n </pre>
where <code><em>tfBinaryURL</em></code> identifies the URL of the
TensorFlow Python package. The appropriate value of
<code><em>tfBinaryURL</em></code>depends on the operating system,
Python version, and GPU support. Find the appropriate value for
<code><em>tfBinaryURL</em></code> for your system
[here](#the_url_of_the_tensorflow_python_package). For example, if you
are installing TensorFlow for Linux, Python 3.4, and CPU-only support,
issue the following command to install TensorFlow in the active
Virtualenv environment:
<pre>(tensorflow)$ <b>pip3 install --upgrade \
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre>
If you encounter installation problems, see
[Common Installation Problems](#common_installation_problems).
### Next Steps
After installing TensorFlow,
[validate the installation](#ValidateYourInstallation).
Note that you must activate the Virtualenv environment each time you
use TensorFlow. If the Virtualenv environment is not currently active,
invoke one of the following commands:
<pre> $ <b>source ~/tensorflow/bin/activate</b> # bash, sh, ksh, or zsh
$ <b>source ~/tensorflow/bin/activate.csh</b> # csh or tcsh</pre>
When the Virtualenv environment is active, you may run
TensorFlow programs from this shell. Your prompt will become
the following to indicate that your tensorflow environment is active:
<pre>(tensorflow)$ </pre>
When you are done using TensorFlow, you may deactivate the
environment by invoking the `deactivate` function as follows:
<pre>(tensorflow)$ <b>deactivate</b> </pre>
The prompt will revert back to your default prompt (as defined by the
`PS1` environment variable).
### Uninstalling TensorFlow
To uninstall TensorFlow, simply remove the tree you created.
For example:
<pre>$ <b>rm -r</b> <i>targetDirectory</i> </pre>
<a name="InstallingNativePip"></a>
## Installing with native pip
You may install TensorFlow through pip, choosing between a simple
installation procedure or a more complex one.
**Note:** The
[REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py)
lists the TensorFlow packages that pip will install or upgrade.
### Prerequisite: Python and Pip
Python is automatically installed on Ubuntu. Take a moment to confirm
(by issuing a `python -V` command) that one of the following Python
versions is already installed on your system:
* Python 2.7
* Python 3.4+
The pip or pip3 package manager is *usually* installed on Ubuntu. Take a
moment to confirm (by issuing a `pip -V` or `pip3 -V` command)
that pip or pip3 is installed. We strongly recommend version 8.1 or higher
of pip or pip3. If Version 8.1 or later is not installed, issue the
following command, which will either install or upgrade to the latest
pip version:
<pre>$ <b>sudo apt-get install python-pip python-dev</b> # for Python 2.7
$ <b>sudo apt-get install python3-pip python3-dev</b> # for Python 3.n
</pre>
### Install TensorFlow
Assuming the prerequisite software is installed on your Linux host,
take the following steps:
1. Install TensorFlow by invoking **one** of the following commands:
<pre>$ <b>pip install tensorflow</b> # Python 2.7; CPU support (no GPU support)
$ <b>pip3 install tensorflow</b> # Python 3.n; CPU support (no GPU support)
$ <b>pip install tensorflow-gpu</b> # Python 2.7; GPU support
$ <b>pip3 install tensorflow-gpu</b> # Python 3.n; GPU support </pre>
If the preceding command runs to completion, you should now
[validate your installation](#ValidateYourInstallation).
2. (Optional.) If Step 1 failed, install the latest version of TensorFlow
by issuing a command of the following format:
<pre>$ <b>sudo pip install --upgrade</b> <i>tfBinaryURL</i> # Python 2.7
$ <b>sudo pip3 install --upgrade</b> <i>tfBinaryURL</i> # Python 3.n </pre>
where <code><em>tfBinaryURL</em></code> identifies the URL of the
TensorFlow Python package. The appropriate value of
<code><em>tfBinaryURL</em></code> depends on the operating system,
Python version, and GPU support. Find the appropriate value for
<code><em>tfBinaryURL</em></code>
[here](#the_url_of_the_tensorflow_python_package). For example, to
install TensorFlow for Linux, Python 3.4, and CPU-only support, issue
the following command:
<pre>
$ <b>sudo pip3 install --upgrade \
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b>
</pre>
If this step fails, see
[Common Installation Problems](#common_installation_problems).
### Next Steps
After installing TensorFlow, [validate your installation](#ValidateYourInstallation).
### Uninstalling TensorFlow
To uninstall TensorFlow, issue one of following commands:
<pre>
$ <b>sudo pip uninstall tensorflow</b> # for Python 2.7
$ <b>sudo pip3 uninstall tensorflow</b> # for Python 3.n
</pre>
<a name="InstallingDocker"></a>
## Installing with Docker
Take the following steps to install TensorFlow through Docker:
1. Install Docker on your machine as described in the
@ -364,7 +287,7 @@ Take the following steps to install TensorFlow through Docker:
The remainder of this section explains how to launch a Docker container.
### CPU-only
#### CPU-only
To launch a Docker container with CPU-only support (that is, without
GPU support), enter a command of the following format:
@ -414,7 +337,7 @@ $ <b>docker run -it -p 8888:8888 tensorflow/tensorflow</b>
Docker will download the TensorFlow binary image the first time you launch it.
### GPU support
#### GPU support
Prior to installing TensorFlow with GPU support, ensure that your system meets all
[NVIDIA software requirements](#NVIDIARequirements). To launch a Docker container
@ -470,14 +393,22 @@ For more details see the
[TensorFlow docker readme](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/docker).
### Next Steps
#### Next Steps
You should now
[validate your installation](#ValidateYourInstallation).
<a name="InstallingAnaconda"></a>
## Installing with Anaconda
### Use `pip` in Anaconda
Anaconda provides the `conda` utility to create a virtual environment. However,
within Anaconda, we recommend installing TensorFlow using the `pip install`
command and *not* with the `conda install` command.
Caution: `conda` is a community supported package this is not officially
maintained by the TensorFlow team. Use this package at your own risk since it is
not tested on new TensorFlow releases.
Take the following steps to install TensorFlow in an Anaconda environment:
@ -507,7 +438,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
<pre>
(tensorflow)$ <b>pip install --ignore-installed --upgrade \
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre>
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
<a name="ValidateYourInstallation"></a>
## Validate your installation
@ -563,11 +494,89 @@ installation problems](#common_installation_problems).
If you are new to machine learning, we recommend the following:
* [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course)
* @{$get_started/get_started_for_beginners$Getting Started for ML Beginners}
* @{$get_started/eager}
If you are experienced with machine learning but new to TensorFlow, see
@{$get_started/eager}.
<a name="NVIDIARequirements"></a>
## TensorFlow GPU support
To install TensorFlow with GPU support, configure the following NVIDIA® software
on your system:
* [CUDA Toolkit 9.0](http://nvidia.com/cuda). For details, see
[NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/).
Append the relevant CUDA pathnames to the `LD_LIBRARY_PATH` environmental
variable as described in the NVIDIA documentation.
* [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see
[NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/).
Create the `CUDA_HOME` environment variable as described in the NVIDIA
documentation.
* A GPU card with CUDA Compute Capability 3.0 or higher for building TensorFlow
from source. To use the TensorFlow binaries, version 3.5 or higher is required.
See the [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a
list of supported GPU cards.
* [GPU drivers](http://nvidia.com/driver) that support your version of the CUDA
Toolkit.
* The `libcupti-dev` library is the NVIDIA CUDA Profile Tools Interface. This
library provides advanced profiling support. To install this library,
use the following command for CUDA Toolkit >= 8.0:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-get install cuda-command-line-tools</code>
</pre>
Add this path to the `LD_LIBRARY_PATH` environmental variable:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64</code>
</pre>
For CUDA Toolkit <= 7.5 use:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-get install libcupti-dev</code>
</pre>
* *OPTIONAL*: For optimized performance during inference, install
*NVIDIA&nbsp;TensorRT&nbsp;3.0*. To install the minimal amount of TensorRT
runtime components required to use with the pre-built `tensorflow-gpu` package:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</code>
<code class="devsite-terminal">sudo dpkg -i nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</code>
<code class="devsite-terminal">sudo apt-get update</code>
<code class="devsite-terminal">sudo apt-get install -y --allow-downgrades libnvinfer-dev libcudnn7-dev=7.0.5.15-1+cuda9.0 libcudnn7=7.0.5.15-1+cuda9.0</code>
</pre>
Note: For compatibility with the pre-built `tensorflow-gpu` package, use the
Ubuntu *14.04* package of TensorRT (shown above). Use this even when installing
on an Ubuntu 16.04 system.
To build the TensorFlow-TensorRT integration module from source instead of using
the pre-built binaries, see the
[module documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#using-tensorrt-in-tensorflow).
For detailed TensorRT installation instructions, see
[NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html).
To avoid cuDNN version conflicts during later system upgrades, hold the cuDNN
version at 7.0.5:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-mark hold libcudnn7 libcudnn7-dev</code>
</pre>
To allow upgrades, remove the this hold:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-mark unhold libcudnn7 libcudnn7-dev</code>
</pre>
If you have an earlier version of the preceding packages, upgrade to the
specified versions. If upgrading is not possible, you can still run TensorFlow
with GPU support by @{$install_sources}.
## Common installation problems
@ -581,7 +590,7 @@ ask a new question about it on Stack Overflow and specify
the `tensorflow` tag.
<table>
<tr> <th>Stack Overflow Link</th> <th>Error Message</th> </tr>
<tr> <th>Link to GitHub or Stack&nbsp;Overflow</th> <th>Error Message</th> </tr>
<tr>
<td><a href="https://stackoverflow.com/q/36159194">36159194</a></td>
@ -681,14 +690,14 @@ This section documents the relevant values for Linux installations.
CPU only:
<pre>
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp27-none-linux_x86_64.whl
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp27-none-linux_x86_64.whl
</pre>
GPU support:
<pre>
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp27-none-linux_x86_64.whl
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp27-none-linux_x86_64.whl
</pre>
Note that GPU support requires the NVIDIA hardware and software described in
@ -700,14 +709,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only:
<pre>
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
</pre>
GPU support:
<pre>
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
</pre>
Note that GPU support requires the NVIDIA hardware and software described in
@ -719,14 +728,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only:
<pre>
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp35-cp35m-linux_x86_64.whl
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
</pre>
GPU support:
<pre>
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp35-cp35m-linux_x86_64.whl
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
</pre>
@ -738,14 +747,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only:
<pre>
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp36-cp36m-linux_x86_64.whl
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
</pre>
GPU support:
<pre>
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp36-cp36m-linux_x86_64.whl
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
</pre>

View File

@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv:
TensorFlow in the active Virtualenv is as follows:
<pre> $ <b>pip3 install --upgrade \
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl</b></pre>
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl</b></pre>
If you encounter installation problems, see
[Common Installation Problems](#common-installation-problems).
@ -242,7 +242,7 @@ take the following steps:
issue the following command:
<pre> $ <b>sudo pip3 install --upgrade \
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl</b> </pre>
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl</b> </pre>
If the preceding command fails, see
[installation problems](#common-installation-problems).
@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
TensorFlow for Python 2.7:
<pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl</b></pre>
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl</b></pre>
<a name="ValidateYourInstallation"></a>
@ -524,7 +524,7 @@ The value you specify depends on your Python version.
<pre>
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl
</pre>
@ -532,5 +532,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-a
<pre>
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl
</pre>

View File

@ -354,10 +354,10 @@ Invoke `pip install` to install that pip package.
The filename of the `.whl` file depends on your platform.
For example, the following command will install the pip package
for TensorFlow 1.8.0rc0 on Linux:
for TensorFlow 1.8.0rc1 on Linux:
<pre>
$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc0-py2-none-any.whl</b>
$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-py2-none-any.whl</b>
</pre>
## Validate your installation

View File

@ -86,7 +86,7 @@ code. `tf_library` utilizes `tfcompile` to compile the TensorFlow graph into
executable code.
```build
load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
# Use the tf_library macro to compile your graph into executable code.
tf_library(
@ -258,8 +258,8 @@ file.
```build
# Example of linking your binary
# Also see //third_party/tensorflow/compiler/aot/tests/BUILD
load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
# Also see //tensorflow/compiler/aot/tests/BUILD
load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
# The same tf_library call from step 2 above.
tf_library(

View File

@ -32,9 +32,9 @@ IRIS_TEST = os.path.join(os.path.dirname(__file__), "iris_test.csv")
def main(unused_argv):
# Load datasets.
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float)
filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32)
test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float)
filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32)
validation_metrics = {
"accuracy":
@ -83,7 +83,7 @@ def main(unused_argv):
# Classify two new flower samples.
new_samples = np.array(
[[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float)
[[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
y = list(classifier.predict(new_samples))
print("Predictions: {}".format(str(y)))

View File

@ -5,7 +5,7 @@ Construct and execute TensorFlow graphs in Go.
[![GoDoc](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go?status.svg)](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go)
> *WARNING*: The API defined in this package is not stable and can change
> without notice. The same goes for the awkward package path
> without notice. The same goes for the package path:
> (`github.com/tensorflow/tensorflow/tensorflow/go`).
## Quickstart

View File

@ -21386,7 +21386,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
//
// The `bad_color` argument is the color to use in the generated images for
// non-finite input values. It is a `unit8` 1-D tensor of length `channels`.
// non-finite input values. It is a `uint8` 1-D tensor of length `channels`.
// Each element must be in the range `[0, 255]` (It represents the value of a
// pixel in the output image). Non-finite values in the input tensor are
// replaced by this tensor in the output image. The default value is the color

View File

@ -644,11 +644,9 @@ class Estimator(object):
sharded=True)
saver_for_restore.restore(session, checkpoint_path)
# pylint: disable=protected-access
local_init_op = (
estimator_spec.scaffold.local_init_op or
monitored_session.Scaffold._default_local_init_op())
# pylint: enable=protected-access
monitored_session.Scaffold.default_local_init_op())
# Perform the export
builder = saved_model_builder.SavedModelBuilder(temp_export_dir)

View File

@ -29,12 +29,14 @@ from tensorflow.python.estimator import run_config as run_config_lib
from tensorflow.python.framework import ops
from tensorflow.python.framework import random_seed
from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
from tensorflow.python.framework import tensor_util
from tensorflow.python.keras._impl.keras import backend as K
from tensorflow.python.keras._impl.keras import models
from tensorflow.python.keras._impl.keras import optimizers
from tensorflow.python.keras._impl.keras.engine.base_layer import Layer
from tensorflow.python.keras._impl.keras.engine.network import Network
from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import metrics as metrics_module
from tensorflow.python.ops import variables as variables_module
@ -55,6 +57,17 @@ def _cast_tensor_to_floatx(x):
return math_ops.cast(x, K.floatx())
def _convert_tensor(x):
"""Create or cast tensor if needed."""
if not tensor_util.is_tensor(x):
# x is a numpy array
x = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(x)
if check_ops.is_numeric_tensor(x):
# is_numeric_tensor returns False if provided with a numpy array
x = _cast_tensor_to_floatx(x)
return x
def _any_variable_initalized():
"""Check if any variable has been initialized in the Keras model.
@ -86,7 +99,7 @@ def _create_ordered_io(keras_model, estimator_io, is_input=True):
if isinstance(estimator_io, (list, tuple)):
# Case currently not supported by most built-in input_fn,
# but it's good to have for sanity
return [_cast_tensor_to_floatx(x) for x in estimator_io]
return [_convert_tensor(x) for x in estimator_io]
elif isinstance(estimator_io, dict):
if is_input:
if keras_model._is_graph_network:
@ -108,12 +121,12 @@ def _create_ordered_io(keras_model, estimator_io, is_input=True):
'It needs to match one '
'of the following: %s' % ('input' if is_input else 'output', key,
', '.join(keras_io_names)))
tensors = [_cast_tensor_to_floatx(estimator_io[io_name])
tensors = [_convert_tensor(estimator_io[io_name])
for io_name in keras_io_names]
return tensors
else:
# Plain array.
return _cast_tensor_to_floatx(estimator_io)
return _convert_tensor(estimator_io)
def _in_place_subclassed_model_reset(model):
@ -274,8 +287,7 @@ def _clone_and_build_model(mode,
is_input=False)
else:
target_tensors = [
_cast_tensor_to_floatx(
sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(labels))
_convert_tensor(labels)
]
if keras_model._is_graph_network:

View File

@ -30,6 +30,7 @@ from tensorflow.python.estimator.inputs import numpy_io
from tensorflow.python.framework import ops
from tensorflow.python.framework import test_util
from tensorflow.python.keras._impl import keras
from tensorflow.python.keras._impl.keras import backend as K
from tensorflow.python.keras._impl.keras import testing_utils
from tensorflow.python.keras._impl.keras.applications import mobilenet
from tensorflow.python.keras._impl.keras.optimizers import SGD
@ -142,16 +143,20 @@ def randomize_io_type(array, name):
def multi_inputs_multi_outputs_model():
# test multi-input layer
a = keras.layers.Input(shape=(16,), name='input_a')
b = keras.layers.Input(shape=(16,), name='input_b')
m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m')
dense = keras.layers.Dense(8, name='dense_1')
a_2 = dense(a)
# Apply a mask
s_2 = keras.layers.Lambda(lambda k:
K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2])
b_2 = dense(b)
merged = keras.layers.concatenate([a_2, b_2], name='merge')
merged = keras.layers.concatenate([s_2, b_2], name='merge')
c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged)
d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged)
model = keras.models.Model(inputs=[a, b], outputs=[c, d])
model = keras.models.Model(inputs=[a, b, m], outputs=[c, d])
model.compile(
loss='categorical_crossentropy',
optimizer='rmsprop',
@ -352,18 +357,27 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
test_samples=50,
input_shape=(16,),
num_classes=2)
np.random.seed(_RANDOM_SEED)
(input_m_train, _), (input_m_test, _) = testing_utils.get_test_data(
train_samples=_TRAIN_SIZE,
test_samples=50,
input_shape=(8,),
num_classes=2)
c_train = keras.utils.to_categorical(c_train)
c_test = keras.utils.to_categorical(c_test)
d_train = keras.utils.to_categorical(d_train)
d_test = keras.utils.to_categorical(d_test)
def train_input_fn():
input_dict = {'input_a': a_train, 'input_b': b_train}
input_dict = {'input_a': a_train, 'input_b': b_train,
'input_m': input_m_train > 0}
output_dict = {'dense_2': c_train, 'dense_3': d_train}
return input_dict, output_dict
def eval_input_fn():
input_dict = {'input_a': a_test, 'input_b': b_test}
input_dict = {'input_a': a_test, 'input_b': b_test,
'input_m': input_m_test > 0}
output_dict = {'dense_2': c_test, 'dense_3': d_test}
return input_dict, output_dict

View File

@ -35,8 +35,7 @@ class DivisionTestCase(test.TestCase):
"""Test all the different ways to divide."""
values = [1, 2, 7, 11]
functions = (lambda x: x), constant_op.constant
# TODO(irving): Test int8, int16 once we support casts for those.
dtypes = np.int32, np.int64, np.float32, np.float64
dtypes = np.int8, np.int16, np.int32, np.int64, np.float32, np.float64
tensors = []
checks = []

View File

@ -160,7 +160,7 @@ class ReduceJoinTest(UnicodeTestCase):
separator=separator)
if not reduction_indices:
truth = constant_op.constant(truth)
truth_squeezed = array_ops.squeeze(truth, squeeze_dims=reduction_indices)
truth_squeezed = array_ops.squeeze(truth, axis=reduction_indices)
output_array = output.eval()
output_keep_dims_array = output_keep_dims.eval()
truth_array = truth.eval()

View File

@ -889,9 +889,9 @@ class AnyReductionTest(test.TestCase):
class CountNonzeroReductionTest(test.TestCase):
def _compare(self, x, reduction_axes, keepdims, use_gpu=False,
def _compare(self, x, reduction_axes, keepdims, use_gpu=False, zero=0,
feed_dict=None):
np_ans = (x != 0).astype(np.int32)
np_ans = (x != zero).astype(np.int32)
if reduction_axes is None:
np_ans = np.sum(np_ans, keepdims=keepdims)
else:
@ -958,6 +958,37 @@ class CountNonzeroReductionTest(test.TestCase):
y = math_ops.count_nonzero(x, [0])
self.assertAllEqual(y.eval(), np.zeros(9938))
def testStringReduce(self):
# Test case for GitHub issue 18712
with self.test_session() as sess:
v = math_ops.count_nonzero(constant_op.constant(["test"]))
self.assertAllClose(sess.run(v), 1)
def testStringReduce1D(self):
# Create a 1D array of strings
x = np.asarray(["", "", "a", "", "", "b"])
self._compare(x, None, keepdims=False, zero=np.str(""))
self._compare(x, [], keepdims=False, zero=np.str(""))
self._compare(x, [0], keepdims=False, zero=np.str(""))
self._compare(x, None, keepdims=True, zero=np.str(""))
self._compare(x, [], keepdims=True, zero=np.str(""))
self._compare(x, [0], keepdims=True, zero=np.str(""))
def testStringReduce2D(self):
# Create a 2D array of strings
x = np.asarray([["", "", "a", "", "", "b"],
["", "c", "", "d", "", ""],
["e", "", "f", "", "", ""]])
self._compare(x, None, keepdims=False, zero=np.str(""))
self._compare(x, [], keepdims=False, zero=np.str(""))
self._compare(x, [0], keepdims=False, zero=np.str(""))
self._compare(x, [1], keepdims=False, zero=np.str(""))
self._compare(x, [0, 1], keepdims=False, zero=np.str(""))
self._compare(x, None, keepdims=True, zero=np.str(""))
self._compare(x, [], keepdims=True, zero=np.str(""))
self._compare(x, [0], keepdims=True, zero=np.str(""))
self._compare(x, [0, 1], keepdims=True, zero=np.str(""))
if __name__ == "__main__":
test.main()

View File

@ -364,6 +364,42 @@ class ScatterNdTest(test.TestCase):
del input_ # input_ is not used in scatter_nd
return array_ops.scatter_nd(indices, updates, shape)
def testString(self):
indices = constant_op.constant([[4], [3], [1], [7]],
dtype=dtypes.int32)
updates = constant_op.constant(["four", "three", "one", "seven"],
dtype=dtypes.string)
expected = np.array([b"", b"one", b"", b"three", b"four",
b"", b"", b"seven"])
scatter = self.scatter_nd(indices, updates, shape=(8,))
with self.test_session() as sess:
result = sess.run(scatter)
self.assertAllEqual(expected, result)
# Same indice is updated twice by same value.
indices = constant_op.constant([[4], [3], [3], [7]],
dtype=dtypes.int32)
updates = constant_op.constant(["a", "b", "b", "c"],
dtype=dtypes.string)
expected = np.array([b"", b"", b"", b"bb", b"a", b"", b"", b"c"])
scatter = self.scatter_nd(indices, updates, shape=(8,))
with self.test_session() as sess:
result = sess.run(scatter)
self.assertAllEqual(expected, result)
# Same indice is updated twice by different value.
indices = constant_op.constant([[4], [3], [3], [7]],
dtype=dtypes.int32)
updates = constant_op.constant(["a", "b", "c", "d"],
dtype=dtypes.string)
expected = [np.array([b"", b"", b"", b"bc", b"a", b"", b"", b"d"]),
np.array([b"", b"", b"", b"cb", b"a", b"", b"", b"d"])]
scatter = self.scatter_nd(indices, updates, shape=(8,))
with self.test_session() as sess:
result = sess.run(scatter)
self.assertTrue(np.array_equal(result, expected[0]) or
np.array_equal(result, expected[1]))
def testRank3ValidShape(self):
indices = array_ops.zeros([2, 2, 2], dtypes.int32)
updates = array_ops.zeros([2, 2, 2], dtypes.int32)
@ -584,6 +620,10 @@ class ScatterNdNonAliasingAddTest(ScatterNdTest):
shape, dtype=updates.dtype))
return array_ops.scatter_nd_non_aliasing_add(input_, indices, updates)
def testString(self):
# Not supported yet.
pass
if __name__ == "__main__":
test.main()

View File

@ -196,7 +196,7 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index):
array_ops.where(
math_ops.logical_and(grad.indices >= start,
grad.indices < end)),
squeeze_dims=[1])
axis=[1])
new_indices = array_ops.gather(grad.indices, indices_to_select) - start
new_values = array_ops.gather(grad.values, indices_to_select)
out_grads.append(ops.IndexedSlices(new_values, new_indices, size))

View File

@ -994,9 +994,7 @@ def unstack(value, num=None, axis=0, name="unstack"):
`value[:, i, :, :]` and each tensor in `output` will have shape `(A, C, D)`.
Etc.
This is the opposite of stack. The numpy equivalent is
tf.unstack(x, n) = np.unstack(x)
This is the opposite of stack.
Args:
value: A rank `R > 0` `Tensor` to be unstacked.
@ -1720,8 +1718,10 @@ def placeholder(dtype, shape=None, name=None):
print(sess.run(y, feed_dict={x: rand_array})) # Will succeed.
```
@compatibility{eager} Placeholders are not compatible with eager execution.
@compatibility(eager)
Placeholders are not compatible with eager execution.
@end_compatibility
Args:
dtype: The type of elements in the tensor to be fed.
shape: The shape of the tensor to be fed (optional). If the shape is not

View File

@ -652,7 +652,7 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height,
padded.set_shape(padded_shape)
if not is_batch:
padded = array_ops.squeeze(padded, squeeze_dims=[0])
padded = array_ops.squeeze(padded, axis=[0])
return padded
@ -732,7 +732,7 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
cropped.set_shape(cropped_shape)
if not is_batch:
cropped = array_ops.squeeze(cropped, squeeze_dims=[0])
cropped = array_ops.squeeze(cropped, axis=[0])
return cropped
@ -849,7 +849,7 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
resized = control_flow_ops.with_dependencies(assert_ops, resized)
if not is_batch:
resized = array_ops.squeeze(resized, squeeze_dims=[0])
resized = array_ops.squeeze(resized, axis=[0])
return resized
@ -942,7 +942,7 @@ def resize_images(images,
for x in [new_width_const, width, new_height_const, height]) and (
width == new_width_const and height == new_height_const):
if not is_batch:
images = array_ops.squeeze(images, squeeze_dims=[0])
images = array_ops.squeeze(images, axis=[0])
return images
if method == ResizeMethod.BILINEAR:
@ -965,7 +965,7 @@ def resize_images(images,
images.set_shape([None, new_height_const, new_width_const, None])
if not is_batch:
images = array_ops.squeeze(images, squeeze_dims=[0])
images = array_ops.squeeze(images, axis=[0])
return images

Some files were not shown because too many files have changed in this diff Show More