Merge changes from github.

PiperOrigin-RevId: 194997009
This commit is contained in:
Patrick Nguyen 2018-05-01 14:28:36 -07:00 committed by TensorFlower Gardener
parent 46bf1e8934
commit 325d0ef21a
121 changed files with 1809 additions and 724 deletions

1
.gitignore vendored
View File

@ -27,6 +27,7 @@ Podfile.lock
/tensorflow/contrib/lite/examples/ios/simple/data/*.txt /tensorflow/contrib/lite/examples/ios/simple/data/*.txt
/tensorflow/contrib/lite/examples/ios/simple/data/*.tflite /tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
xcuserdata/** xcuserdata/**
/api_init_files_list.txt
# Android # Android
.gradle .gradle

View File

@ -1700,7 +1700,7 @@ TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_NoGradInputs) {
TestGradientsError(false); TestGradientsError(false);
} }
// REGISTER_OP for CApiTestAttributesTest test cases. // REGISTER_OP for CApiAttributesTest test cases.
// Registers two ops, each with a single attribute called 'v'. // Registers two ops, each with a single attribute called 'v'.
// The attribute in one op will have a type 'type', the other // The attribute in one op will have a type 'type', the other
// will have list(type). // will have list(type).

View File

@ -385,6 +385,42 @@ Status MirrorPadGradGrad(const Scope& scope, const Operation& op,
} }
REGISTER_GRADIENT_OP("MirrorPadGrad", MirrorPadGradGrad); REGISTER_GRADIENT_OP("MirrorPadGrad", MirrorPadGradGrad);
Status StridedSliceGradHelper(const Scope& scope, const Operation& op,
const std::vector<Output>& grad_inputs,
std::vector<Output>* grad_outputs) {
Input x = Shape(scope, op.input(0));
Input begin = op.input(1);
Input end = op.input(2);
Input strides = op.input(3);
int64 begin_mask;
int64 end_mask;
int64 ellipsis_mask;
int64 new_axis_mask;
int64 shrink_axis_mask;
TF_RETURN_IF_ERROR(
GetNodeAttr(op.node()->attrs(), "begin_mask", &begin_mask));
TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "end_mask", &end_mask));
TF_RETURN_IF_ERROR(
GetNodeAttr(op.node()->attrs(), "ellipsis_mask", &ellipsis_mask));
TF_RETURN_IF_ERROR(
GetNodeAttr(op.node()->attrs(), "new_axis_mask", &new_axis_mask));
TF_RETURN_IF_ERROR(
GetNodeAttr(op.node()->attrs(), "shrink_axis_mask", &shrink_axis_mask));
grad_outputs->push_back(
StridedSliceGrad(scope, x, begin, end, strides, grad_inputs[0],
StridedSliceGrad::BeginMask(begin_mask)
.EndMask(end_mask)
.EllipsisMask(ellipsis_mask)
.NewAxisMask(new_axis_mask)
.ShrinkAxisMask(shrink_axis_mask)));
// No gradients returned for begin, end and strides
grad_outputs->push_back(NoGradient());
grad_outputs->push_back(NoGradient());
grad_outputs->push_back(NoGradient());
return scope.status();
}
REGISTER_GRADIENT_OP("StridedSlice", StridedSliceGradHelper);
} // anonymous namespace } // anonymous namespace
} // namespace ops } // namespace ops
} // namespace tensorflow } // namespace tensorflow

View File

@ -354,5 +354,29 @@ TEST_F(ArrayGradTest, MirrorPadGradGrad_Symmetric) {
RunTest(x, x_shape, y, y_shape); RunTest(x, x_shape, y, y_shape);
} }
TEST_F(ArrayGradTest, StridedSliceGrad) {
TensorShape x_shape({6, 4, 4});
auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
// y = x[2:6:2, 1:3, 1:3]
auto y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1});
// y.shape = [2, 2, 2];
RunTest(x, x_shape, y, {2, 2, 2});
// y = x[2:6:2, 1:3, 1:3]
// begin_mask = 1<<1 (ignore begin_index = 1)
// end_mask = 1<<2 (ignore end_index = 2)
y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1},
StridedSlice::BeginMask(1 << 1).EndMask(1 << 2));
// y.shape = [2, 3, 3];
RunTest(x, x_shape, y, {2, 3, 3});
// y = [tf.newaxis, 2:6:2, 1:3, 1:3]
y = StridedSlice(scope_, x, {0, 2, 1, 1}, {0, 6, 3, 3}, {1, 2, 1, 1},
StridedSlice::NewAxisMask(1 << 0));
// y.shape = [1, 2, 2, 2];
RunTest(x, x_shape, y, {1, 2, 2, 2});
}
} // namespace } // namespace
} // namespace tensorflow } // namespace tensorflow

View File

@ -56,8 +56,6 @@ Use AutoGraph in one of the following ways, described below:
1. Annotations (simpler) 1. Annotations (simpler)
2. Functional API (more flexible) 2. Functional API (more flexible)
NOTE: You can find more examples in this [interactive notebook](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb).
To get started, install the latest nightly TensorFlow build: To get started, install the latest nightly TensorFlow build:
```shell ```shell
@ -70,6 +68,13 @@ Then import the `autograph` module from `tf.contrib`:
from tensorflow.contrib import autograph as ag from tensorflow.contrib import autograph as ag
``` ```
### Interactive demo notebooks
For more extensive examples, check out these interactive notebooks:
* [RNN trained using Keras and Estimators](https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/rnn_keras_estimator.ipynb)
* [Demo from the TF Dev Summit 2018](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb)
## Using with annotations ## Using with annotations
Annotating a function or class with `@convert` converts it in place: Annotating a function or class with `@convert` converts it in place:

View File

@ -84,7 +84,7 @@ if (NOT WIN32)
option(systemlib_ALL "Turn on every possible systemlib_* options" OFF) option(systemlib_ALL "Turn on every possible systemlib_* options" OFF)
if (systemlib_ALL) if (systemlib_ALL)
set (systmelib_ZLIB ON) set (systemlib_ZLIB ON)
endif (systemlib_ALL) endif (systemlib_ALL)
endif() endif()
@ -471,6 +471,10 @@ if (tensorflow_ENABLE_GPU)
include_directories(${tensorflow_source_dir}/third_party/gpus) include_directories(${tensorflow_source_dir}/third_party/gpus)
# add cuda libraries to tensorflow_EXTERNAL_LIBRARIES # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES}) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
if(NOT WIN32)
# add gomp to tensorflow_EXTERNAL_LIBRARIES, needed by libcusolver.so
list(APPEND tensorflow_EXTERNAL_LIBRARIES gomp)
endif()
# NOTE(mrry): Update these flags when the version of CUDA or cuDNN used # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used
# in the default build is upgraded. # in the default build is upgraded.

View File

@ -177,6 +177,16 @@ if(WIN32)
"${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc"
) )
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs}) list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
else(WIN32)
if(tensorflow_ENABLE_GPU)
file(GLOB_RECURSE tf_core_kernels_gpu_exclude_srcs
# temporarily disable nccl as it needs to be ported with gpu
"${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_manager.cc"
"${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc"
)
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_gpu_exclude_srcs})
endif(tensorflow_ENABLE_GPU)
endif(WIN32) endif(WIN32)
file(GLOB_RECURSE tf_core_gpu_kernels_srcs file(GLOB_RECURSE tf_core_gpu_kernels_srcs

View File

@ -64,6 +64,8 @@ file(GLOB tf_stream_executor_srcs
if (tensorflow_ENABLE_GPU) if (tensorflow_ENABLE_GPU)
file(GLOB tf_stream_executor_gpu_srcs file(GLOB tf_stream_executor_gpu_srcs
"${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc" "${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc"
"${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.h"
"${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.cc"
) )
if (NOT tensorflow_BUILD_CC_TESTS) if (NOT tensorflow_BUILD_CC_TESTS)
file(GLOB tf_stream_executor_gpu_tests file(GLOB tf_stream_executor_gpu_tests

View File

@ -152,6 +152,22 @@ class CrfTest(test.TestCase):
self.assertAllClose(tf_log_norm, tf_brute_force_log_norm) self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
def testCrfLogNormZeroSeqLength(self):
"""
Test `crf_log_norm` when `sequence_lengths` contains one or more zeros.
"""
with self.test_session() as sess:
inputs = constant_op.constant(np.ones([2, 10, 5],
dtype=np.float32))
transition_params = constant_op.constant(np.ones([5, 5],
dtype=np.float32))
sequence_lengths = constant_op.constant(np.zeros([2],
dtype=np.int32))
expected_log_norm = np.zeros([2], dtype=np.float32)
log_norm = crf.crf_log_norm(inputs, sequence_lengths, transition_params)
tf_log_norm = sess.run(log_norm)
self.assertAllClose(tf_log_norm, expected_log_norm)
def testCrfLogLikelihood(self): def testCrfLogLikelihood(self):
inputs = np.array( inputs = np.array(
[[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
@ -292,10 +308,10 @@ class CrfTest(test.TestCase):
dtype=np.float32)) dtype=np.float32))
sequence_lengths = constant_op.constant(np.zeros([2], sequence_lengths = constant_op.constant(np.zeros([2],
dtype=np.int32)) dtype=np.int32))
values = crf.crf_decode(inputs, transition_params, sequence_lengths) tags, scores = crf.crf_decode(inputs, transition_params, sequence_lengths)
tags, scores = sess.run(values) tf_tags, tf_scores = sess.run([tags, scores])
self.assertEqual(len(tags.shape), 2) self.assertEqual(len(tf_tags.shape), 2)
self.assertEqual(len(scores.shape), 1) self.assertEqual(len(tf_scores.shape), 1)
if __name__ == "__main__": if __name__ == "__main__":
test.main() test.main()

View File

@ -90,9 +90,13 @@ def crf_sequence_score(inputs, tag_indices, sequence_lengths,
batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0] batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0]
example_inds = array_ops.reshape( example_inds = array_ops.reshape(
math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1]) math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1])
return array_ops.gather_nd( sequence_scores = array_ops.gather_nd(
array_ops.squeeze(inputs, [1]), array_ops.squeeze(inputs, [1]),
array_ops.concat([example_inds, tag_indices], axis=1)) array_ops.concat([example_inds, tag_indices], axis=1))
sequence_scores = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
array_ops.zeros_like(sequence_scores),
sequence_scores)
return sequence_scores
def _multi_seq_fn(): def _multi_seq_fn():
# Compute the scores of the given tag sequence. # Compute the scores of the given tag sequence.
@ -128,7 +132,12 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
# If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over # If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over
# the "initial state" (the unary potentials). # the "initial state" (the unary potentials).
def _single_seq_fn(): def _single_seq_fn():
return math_ops.reduce_logsumexp(first_input, [1]) log_norm = math_ops.reduce_logsumexp(first_input, [1])
# Mask `log_norm` of the sequences with length <= zero.
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
array_ops.zeros_like(log_norm),
log_norm)
return log_norm
def _multi_seq_fn(): def _multi_seq_fn():
"""Forward computation of alpha values.""" """Forward computation of alpha values."""
@ -137,13 +146,19 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
# Compute the alpha values in the forward algorithm in order to get the # Compute the alpha values in the forward algorithm in order to get the
# partition function. # partition function.
forward_cell = CrfForwardRnnCell(transition_params) forward_cell = CrfForwardRnnCell(transition_params)
# Sequence length is not allowed to be less than zero.
sequence_lengths_less_one = math_ops.maximum(0, sequence_lengths - 1)
_, alphas = rnn.dynamic_rnn( _, alphas = rnn.dynamic_rnn(
cell=forward_cell, cell=forward_cell,
inputs=rest_of_input, inputs=rest_of_input,
sequence_length=sequence_lengths - 1, sequence_length=sequence_lengths_less_one,
initial_state=first_input, initial_state=first_input,
dtype=dtypes.float32) dtype=dtypes.float32)
log_norm = math_ops.reduce_logsumexp(alphas, [1]) log_norm = math_ops.reduce_logsumexp(alphas, [1])
# Mask `log_norm` of the sequences with length <= zero.
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
array_ops.zeros_like(log_norm),
log_norm)
return log_norm return log_norm
max_seq_len = array_ops.shape(inputs)[1] max_seq_len = array_ops.shape(inputs)[1]
@ -479,7 +494,7 @@ def crf_decode(potentials, transition_params, sequence_length):
initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1]) initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O] initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O]
inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O] inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O]
# sequence length is not allowed to be less than zero # Sequence length is not allowed to be less than zero.
sequence_length_less_one = math_ops.maximum(0, sequence_length - 1) sequence_length_less_one = math_ops.maximum(0, sequence_length - 1)
backpointers, last_score = rnn.dynamic_rnn( # [B, T - 1, O], [B, O] backpointers, last_score = rnn.dynamic_rnn( # [B, T - 1, O], [B, O]
crf_fwd_cell, crf_fwd_cell,

View File

@ -0,0 +1,109 @@
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for Bijector."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from tensorflow.contrib.distributions.python.ops.bijectors.ordered import Ordered
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops.distributions.bijector_test_util import assert_bijective_and_finite
from tensorflow.python.platform import test
class OrderedBijectorTest(test.TestCase):
"""Tests correctness of the ordered transformation."""
def setUp(self):
self._rng = np.random.RandomState(42)
@test_util.run_in_graph_and_eager_modes()
def testBijectorVector(self):
with self.test_session():
ordered = Ordered()
self.assertEqual("ordered", ordered.name)
x = np.asarray([[2., 3, 4], [4., 8, 13]])
y = [[2., 0, 0], [4., np.log(4.), np.log(5.)]]
self.assertAllClose(y, self.evaluate(ordered.forward(x)))
self.assertAllClose(x, self.evaluate(ordered.inverse(y)))
self.assertAllClose(
np.sum(np.asarray(y)[..., 1:], axis=-1),
self.evaluate(ordered.inverse_log_det_jacobian(y, event_ndims=1)),
atol=0.,
rtol=1e-7)
self.assertAllClose(
self.evaluate(-ordered.inverse_log_det_jacobian(y, event_ndims=1)),
self.evaluate(ordered.forward_log_det_jacobian(x, event_ndims=1)),
atol=0.,
rtol=1e-7)
def testBijectorUnknownShape(self):
with self.test_session():
ordered = Ordered()
self.assertEqual("ordered", ordered.name)
x = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32)
real_x = np.asarray([[2., 3, 4], [4., 8, 13]])
y = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32)
real_y = [[2., 0, 0], [4., np.log(4.), np.log(5.)]]
self.assertAllClose(real_y, ordered.forward(x).eval(
feed_dict={x: real_x}))
self.assertAllClose(real_x, ordered.inverse(y).eval(
feed_dict={y: real_y}))
self.assertAllClose(
np.sum(np.asarray(real_y)[..., 1:], axis=-1),
ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(
feed_dict={y: real_y}),
atol=0.,
rtol=1e-7)
self.assertAllClose(
-ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(
feed_dict={y: real_y}),
ordered.forward_log_det_jacobian(x, event_ndims=1).eval(
feed_dict={x: real_x}),
atol=0.,
rtol=1e-7)
@test_util.run_in_graph_and_eager_modes()
def testShapeGetters(self):
with self.test_session():
x = tensor_shape.TensorShape([4])
y = tensor_shape.TensorShape([4])
bijector = Ordered(validate_args=True)
self.assertAllEqual(y, bijector.forward_event_shape(x))
self.assertAllEqual(y.as_list(),
self.evaluate(bijector.forward_event_shape_tensor(
x.as_list())))
self.assertAllEqual(x, bijector.inverse_event_shape(y))
self.assertAllEqual(x.as_list(),
self.evaluate(bijector.inverse_event_shape_tensor(
y.as_list())))
def testBijectiveAndFinite(self):
with self.test_session():
ordered = Ordered()
x = np.sort(self._rng.randn(3, 10), axis=-1).astype(np.float32)
y = (self._rng.randn(3, 10)).astype(np.float32)
assert_bijective_and_finite(ordered, x, y, event_ndims=1)
if __name__ == "__main__":
test.main()

View File

@ -30,6 +30,7 @@
@@Invert @@Invert
@@Kumaraswamy @@Kumaraswamy
@@MaskedAutoregressiveFlow @@MaskedAutoregressiveFlow
@@Ordered
@@Permute @@Permute
@@PowerTransform @@PowerTransform
@@RealNVP @@RealNVP
@ -67,6 +68,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.inline import *
from tensorflow.contrib.distributions.python.ops.bijectors.invert import * from tensorflow.contrib.distributions.python.ops.bijectors.invert import *
from tensorflow.contrib.distributions.python.ops.bijectors.kumaraswamy import * from tensorflow.contrib.distributions.python.ops.bijectors.kumaraswamy import *
from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import * from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import *
from tensorflow.contrib.distributions.python.ops.bijectors.ordered import *
from tensorflow.contrib.distributions.python.ops.bijectors.permute import * from tensorflow.contrib.distributions.python.ops.bijectors.permute import *
from tensorflow.contrib.distributions.python.ops.bijectors.power_transform import * from tensorflow.contrib.distributions.python.ops.bijectors.power_transform import *
from tensorflow.contrib.distributions.python.ops.bijectors.real_nvp import * from tensorflow.contrib.distributions.python.ops.bijectors.real_nvp import *

View File

@ -170,7 +170,7 @@ class CholeskyOuterProduct(bijector.Bijector):
sum_weighted_log_diag = array_ops.squeeze( sum_weighted_log_diag = array_ops.squeeze(
math_ops.matmul(math_ops.log(diag), math_ops.matmul(math_ops.log(diag),
exponents[..., array_ops.newaxis]), exponents[..., array_ops.newaxis]),
squeeze_dims=-1) axis=-1)
fldj = p_float * np.log(2.) + sum_weighted_log_diag fldj = p_float * np.log(2.) + sum_weighted_log_diag
return fldj return fldj

View File

@ -18,14 +18,14 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from tensorflow.python.ops.distributions import bijector as bijector_lib from tensorflow.python.ops.distributions import bijector
__all__ = [ __all__ = [
"Invert", "Invert",
] ]
class Invert(bijector_lib.Bijector): class Invert(bijector.Bijector):
"""Bijector which inverts another Bijector. """Bijector which inverts another Bijector.
Example Use: [ExpGammaDistribution (see Background & Context)]( Example Use: [ExpGammaDistribution (see Background & Context)](

View File

@ -32,7 +32,7 @@ from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import template as template_ops from tensorflow.python.ops import template as template_ops
from tensorflow.python.ops import variable_scope as variable_scope_lib from tensorflow.python.ops import variable_scope as variable_scope_lib
from tensorflow.python.ops.distributions import bijector as bijector_lib from tensorflow.python.ops.distributions import bijector
__all__ = [ __all__ = [
@ -42,7 +42,7 @@ __all__ = [
] ]
class MaskedAutoregressiveFlow(bijector_lib.Bijector): class MaskedAutoregressiveFlow(bijector.Bijector):
"""Affine MaskedAutoregressiveFlow bijector for vector-valued events. """Affine MaskedAutoregressiveFlow bijector for vector-valued events.
The affine autoregressive flow [(Papamakarios et al., 2016)][3] provides a The affine autoregressive flow [(Papamakarios et al., 2016)][3] provides a

View File

@ -0,0 +1,125 @@
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Ordered bijector."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.framework import tensor_shape
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import bijector
__all__ = [
"Ordered",
]
class Ordered(bijector.Bijector):
"""Bijector which maps a tensor x_k that has increasing elements in the last
dimension to an unconstrained tensor y_k.
Both the domain and the codomain of the mapping is [-inf, inf], however,
the input of the forward mapping must be strictly increasing.
The inverse of the bijector applied to a normal random vector `y ~ N(0, 1)`
gives back a sorted random vector with the same distribution `x ~ N(0, 1)`
where `x = sort(y)`
On the last dimension of the tensor, Ordered bijector performs:
`y[0] = x[0]`
`y[1:] = math_ops.log(x[1:] - x[:-1])`
#### Example Use:
```python
bijector.Ordered().forward([2, 3, 4])
# Result: [2., 0., 0.]
bijector.Ordered().inverse([0.06428002, -1.07774478, -0.71530371])
# Result: [0.06428002, 0.40464228, 0.8936858]
```
"""
def __init__(self, validate_args=False, name="ordered"):
super(Ordered, self).__init__(
forward_min_event_ndims=1,
validate_args=validate_args,
name=name)
def _forward_event_shape(self, input_shape):
if input_shape.ndims is None or input_shape[-1] is None:
return input_shape
return tensor_shape.TensorShape([input_shape[-1]])
def _forward_event_shape_tensor(self, input_shape):
return (input_shape[-1])[..., array_ops.newaxis]
def _inverse_event_shape(self, output_shape):
if output_shape.ndims is None or output_shape[-1] is None:
return output_shape
if output_shape[-1] <= 1:
raise ValueError("output_shape[-1] = %d <= 1" % output_shape[-1])
return tensor_shape.TensorShape([output_shape[-1]])
def _inverse_event_shape_tensor(self, output_shape):
if self.validate_args:
is_greater_one = check_ops.assert_greater(
output_shape[-1], 1, message="Need last dimension greater than 1.")
output_shape = control_flow_ops.with_dependencies(
[is_greater_one], output_shape)
return (output_shape[-1])[..., array_ops.newaxis]
def _forward(self, x):
x = self._maybe_assert_valid_x(x)
y0 = x[..., 0, array_ops.newaxis]
yk = math_ops.log(x[..., 1:] - x[..., :-1])
y = array_ops.concat([y0, yk], axis=-1)
return y
def _inverse(self, y):
x0 = y[..., 0, array_ops.newaxis]
xk = math_ops.exp(y[..., 1:])
x = array_ops.concat([x0, xk], axis=-1)
return math_ops.cumsum(x, axis=-1)
def _inverse_log_det_jacobian(self, y):
# The Jacobian of the inverse mapping is lower
# triangular, with the diagonal elements being:
# J[i,i] = 1 if i=1, and
# exp(y_i) if 1<i<=K
# which gives the absolute Jacobian determinant:
# |det(Jac)| = prod_{i=1}^{K} exp(y[i]).
# (1) - Stan Modeling Language User's Guide and Reference Manual
# Version 2.17.0 session 35.2
return math_ops.reduce_sum(y[..., 1:], axis=-1)
def _forward_log_det_jacobian(self, x):
x = self._maybe_assert_valid_x(x)
return -math_ops.reduce_sum(
math_ops.log(x[..., 1:] - x[..., :-1]),
axis=-1)
def _maybe_assert_valid_x(self, x):
if not self.validate_args:
return x
is_valid = check_ops.assert_positive(
x[..., 1:] - x[..., :-1],
message="Forward transformation input must be strictly increasing.")
return control_flow_ops.with_dependencies([is_valid], x)

View File

@ -28,7 +28,7 @@ from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops from tensorflow.python.ops import nn_ops
from tensorflow.python.ops.distributions import bijector as bijector_lib from tensorflow.python.ops.distributions import bijector
__all__ = [ __all__ = [
@ -36,7 +36,7 @@ __all__ = [
] ]
class Permute(bijector_lib.Bijector): class Permute(bijector.Bijector):
"""Permutes the rightmost dimension of a `Tensor`. """Permutes the rightmost dimension of a `Tensor`.
```python ```python

View File

@ -25,7 +25,7 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import template as template_ops from tensorflow.python.ops import template as template_ops
from tensorflow.python.ops.distributions import bijector as bijector_lib from tensorflow.python.ops.distributions import bijector
__all__ = [ __all__ = [
@ -34,7 +34,7 @@ __all__ = [
] ]
class RealNVP(bijector_lib.Bijector): class RealNVP(bijector.Bijector):
"""RealNVP "affine coupling layer" for vector-valued events. """RealNVP "affine coupling layer" for vector-valued events.
Real NVP models a normalizing flow on a `D`-dimensional distribution via a Real NVP models a normalizing flow on a `D`-dimensional distribution via a

View File

@ -28,7 +28,7 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops from tensorflow.python.ops import math_ops
from tensorflow.python.ops.distributions import bijector as bijector_lib from tensorflow.python.ops.distributions import bijector
__all__ = [ __all__ = [
@ -44,7 +44,7 @@ def _ndims_from_shape(shape):
return array_ops.shape(shape)[0] return array_ops.shape(shape)[0]
class Reshape(bijector_lib.Bijector): class Reshape(bijector.Bijector):
"""Reshapes the `event_shape` of a `Tensor`. """Reshapes the `event_shape` of a `Tensor`.
The semantics generally follow that of `tf.reshape()`, with The semantics generally follow that of `tf.reshape()`, with

View File

@ -128,7 +128,7 @@ class Weibull(bijector.Bijector):
return x return x
is_valid = check_ops.assert_non_negative( is_valid = check_ops.assert_non_negative(
x, x,
message="Forward transformation input must be at least {}.".format(0)) message="Forward transformation input must be at least 0.")
return control_flow_ops.with_dependencies([is_valid], x) return control_flow_ops.with_dependencies([is_valid], x)
def _maybe_assert_valid_y(self, y): def _maybe_assert_valid_y(self, y):

View File

@ -439,7 +439,7 @@ class _DistributionShape(object):
if self._batch_ndims_is_0 and expand_batch_dim: if self._batch_ndims_is_0 and expand_batch_dim:
squeeze_dims += [1] squeeze_dims += [1]
if squeeze_dims: if squeeze_dims:
x = array_ops.squeeze(x, squeeze_dims=squeeze_dims) x = array_ops.squeeze(x, axis=squeeze_dims)
# x.shape: [prod(S)]+B+E # x.shape: [prod(S)]+B+E
_, batch_shape, event_shape = self.get_shape(x) _, batch_shape, event_shape = self.get_shape(x)
else: else:

View File

@ -397,7 +397,7 @@ class GmmAlgorithm(object):
# Compute the effective number of data points assigned to component k. # Compute the effective number of data points assigned to component k.
with ops.control_dependencies(self._w): with ops.control_dependencies(self._w):
points_in_k = array_ops.squeeze( points_in_k = array_ops.squeeze(
math_ops.add_n(self._points_in_k), squeeze_dims=[0]) math_ops.add_n(self._points_in_k), axis=[0])
# Update alpha. # Update alpha.
if 'w' in self._params: if 'w' in self._params:
final_points_in_k = points_in_k / num_batches final_points_in_k = points_in_k / num_batches

View File

@ -932,7 +932,8 @@ def convolution(inputs,
variables_collections=None, variables_collections=None,
outputs_collections=None, outputs_collections=None,
trainable=True, trainable=True,
scope=None): scope=None,
conv_dims=None):
"""Adds an N-D convolution followed by an optional batch_norm layer. """Adds an N-D convolution followed by an optional batch_norm layer.
It is required that 1 <= N <= 3. It is required that 1 <= N <= 3.
@ -993,6 +994,10 @@ def convolution(inputs,
trainable: If `True` also add variables to the graph collection trainable: If `True` also add variables to the graph collection
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
scope: Optional scope for `variable_scope`. scope: Optional scope for `variable_scope`.
conv_dims: Optional convolution dimensionality, when set it would use the
corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When
leaved to None it would select the convolution dimensionality based on
the input rank (i.e. Conv ND, with N = input_rank - 2).
Returns: Returns:
A tensor representing the output of the operation. A tensor representing the output of the operation.
@ -1015,6 +1020,9 @@ def convolution(inputs,
inputs = ops.convert_to_tensor(inputs) inputs = ops.convert_to_tensor(inputs)
input_rank = inputs.get_shape().ndims input_rank = inputs.get_shape().ndims
if conv_dims is not None and conv_dims + 2 != input_rank:
raise ValueError('Convolution expects input with rank %d, got %d' %
(conv_dims + 2, input_rank))
if input_rank == 3: if input_rank == 3:
layer_class = convolutional_layers.Convolution1D layer_class = convolutional_layers.Convolution1D
elif input_rank == 4: elif input_rank == 4:
@ -1061,10 +1069,134 @@ def convolution(inputs,
outputs = activation_fn(outputs) outputs = activation_fn(outputs)
return utils.collect_named_outputs(outputs_collections, sc.name, outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
@add_arg_scope
def convolution1d(inputs,
num_outputs,
kernel_size,
stride=1,
padding='SAME',
data_format=None,
rate=1,
activation_fn=nn.relu,
normalizer_fn=None,
normalizer_params=None,
weights_initializer=initializers.xavier_initializer(),
weights_regularizer=None,
biases_initializer=init_ops.zeros_initializer(),
biases_regularizer=None,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
return convolution(inputs,
num_outputs,
kernel_size,
stride,
padding,
data_format,
rate,
activation_fn,
normalizer_fn,
normalizer_params,
weights_initializer,
weights_regularizer,
biases_initializer,
biases_regularizer,
reuse,
variables_collections,
outputs_collections,
trainable,
scope,
conv_dims=1)
convolution2d = convolution convolution1d.__doc__ = convolution.__doc__
convolution3d = convolution
@add_arg_scope
def convolution2d(inputs,
num_outputs,
kernel_size,
stride=1,
padding='SAME',
data_format=None,
rate=1,
activation_fn=nn.relu,
normalizer_fn=None,
normalizer_params=None,
weights_initializer=initializers.xavier_initializer(),
weights_regularizer=None,
biases_initializer=init_ops.zeros_initializer(),
biases_regularizer=None,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
return convolution(inputs,
num_outputs,
kernel_size,
stride,
padding,
data_format,
rate,
activation_fn,
normalizer_fn,
normalizer_params,
weights_initializer,
weights_regularizer,
biases_initializer,
biases_regularizer,
reuse,
variables_collections,
outputs_collections,
trainable,
scope,
conv_dims=2)
convolution2d.__doc__ = convolution.__doc__
@add_arg_scope
def convolution3d(inputs,
num_outputs,
kernel_size,
stride=1,
padding='SAME',
data_format=None,
rate=1,
activation_fn=nn.relu,
normalizer_fn=None,
normalizer_params=None,
weights_initializer=initializers.xavier_initializer(),
weights_regularizer=None,
biases_initializer=init_ops.zeros_initializer(),
biases_regularizer=None,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
return convolution(inputs,
num_outputs,
kernel_size,
stride,
padding,
data_format,
rate,
activation_fn,
normalizer_fn,
normalizer_params,
weights_initializer,
weights_regularizer,
biases_initializer,
biases_regularizer,
reuse,
variables_collections,
outputs_collections,
trainable,
scope,
conv_dims=3)
convolution3d.__doc__ = convolution.__doc__
@add_arg_scope @add_arg_scope
def convolution2d_in_plane( def convolution2d_in_plane(
@ -1411,7 +1543,7 @@ def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None):
Args: Args:
tensor: An `int` `Tensor` to be converted to a `Sparse`. tensor: An `int` `Tensor` to be converted to a `Sparse`.
eos_token: An integer. eos_token: An integer.
It is part of the target label that signfies the end of a sentence. It is part of the target label that signifies the end of a sentence.
outputs_collections: Collection to add the outputs. outputs_collections: Collection to add the outputs.
scope: Optional scope for name_scope. scope: Optional scope for name_scope.
""" """
@ -1555,7 +1687,7 @@ def _inner_flatten(inputs, new_rank, output_collections=None, scope=None):
output_collections: Collection to which the outputs will be added. output_collections: Collection to which the outputs will be added.
scope: Optional scope for `name_scope`. scope: Optional scope for `name_scope`.
Returns: Returns:
A `Tensor` or `SparseTensor` conataining the same values as `inputs`, but A `Tensor` or `SparseTensor` containing the same values as `inputs`, but
with innermost dimensions flattened to obtain rank `new_rank`. with innermost dimensions flattened to obtain rank `new_rank`.
Raises: Raises:

View File

@ -310,6 +310,17 @@ class BiasAddTest(test.TestCase):
class ConvolutionTest(test.TestCase): class ConvolutionTest(test.TestCase):
def testInvalidShape(self):
with self.test_session():
images_2d = random_ops.random_uniform((5, 7, 9, 3), seed=1)
with self.assertRaisesRegexp(
ValueError, 'Convolution expects input with rank 5, got 4'):
layers_lib.convolution3d(images_2d, 32, 3)
images_3d = random_ops.random_uniform((5, 6, 7, 9, 3), seed=1)
with self.assertRaisesRegexp(
ValueError, 'Convolution expects input with rank 4, got 5'):
layers_lib.convolution2d(images_3d, 32, 3)
def testInvalidDataFormat(self): def testInvalidDataFormat(self):
height, width = 7, 9 height, width = 7, 9
with self.test_session(): with self.test_session():
@ -3155,7 +3166,7 @@ class RepeatTests(test.TestCase):
with self.test_session(): with self.test_session():
images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32) images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32)
output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3]) output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3])
self.assertEqual(output.op.name, 'Repeat/convolution_3/Relu') self.assertEqual(output.op.name, 'Repeat/convolution2d_3/Relu')
self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 32]) self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 32])
def testRepeatWithScope(self): def testRepeatWithScope(self):
@ -3749,7 +3760,7 @@ class StackTests(test.TestCase):
layers_lib.convolution2d, [10, 20, 30], layers_lib.convolution2d, [10, 20, 30],
kernel_size=[3, 3], kernel_size=[3, 3],
padding='SAME') padding='SAME')
self.assertEqual(output.op.name, 'Stack/convolution_3/Relu') self.assertEqual(output.op.name, 'Stack/convolution2d_3/Relu')
self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 30]) self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 30])
def testStackWithScope(self): def testStackWithScope(self):

View File

@ -270,7 +270,7 @@ class _RegressionTargetColumn(_TargetColumn):
def logits_to_predictions(self, logits, proba=False): def logits_to_predictions(self, logits, proba=False):
if self.num_label_columns == 1: if self.num_label_columns == 1:
return array_ops.squeeze(logits, squeeze_dims=[1]) return array_ops.squeeze(logits, axis=[1])
return logits return logits
def get_eval_ops(self, features, logits, labels, metrics=None): def get_eval_ops(self, features, logits, labels, metrics=None):
@ -418,7 +418,7 @@ def _softmax_cross_entropy_loss(logits, target):
"Instead got %s." % target.dtype) "Instead got %s." % target.dtype)
# sparse_softmax_cross_entropy_with_logits requires [batch_size] target. # sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
if len(target.get_shape()) == 2: if len(target.get_shape()) == 2:
target = array_ops.squeeze(target, squeeze_dims=[1]) target = array_ops.squeeze(target, axis=[1])
loss_vec = nn.sparse_softmax_cross_entropy_with_logits( loss_vec = nn.sparse_softmax_cross_entropy_with_logits(
labels=target, logits=logits) labels=target, logits=logits)
return loss_vec return loss_vec

View File

@ -777,7 +777,7 @@ class _RegressionHead(_SingleHead):
key = prediction_key.PredictionKey.SCORES key = prediction_key.PredictionKey.SCORES
with ops.name_scope(None, "predictions", (logits,)): with ops.name_scope(None, "predictions", (logits,)):
if self.logits_dimension == 1: if self.logits_dimension == 1:
logits = array_ops.squeeze(logits, squeeze_dims=(1,), name=key) logits = array_ops.squeeze(logits, axis=(1,), name=key)
return {key: self._link_fn(logits)} return {key: self._link_fn(logits)}
def _metrics(self, eval_loss, predictions, labels, weights): def _metrics(self, eval_loss, predictions, labels, weights):
@ -974,7 +974,7 @@ def _softmax_cross_entropy_loss(labels, logits, weights=None):
is_squeezed_labels = False is_squeezed_labels = False
# TODO(ptucker): This will break for dynamic shapes. # TODO(ptucker): This will break for dynamic shapes.
if len(labels.get_shape()) == 2: if len(labels.get_shape()) == 2:
labels = array_ops.squeeze(labels, squeeze_dims=(1,)) labels = array_ops.squeeze(labels, axis=(1,))
is_squeezed_labels = True is_squeezed_labels = True
loss = nn.sparse_softmax_cross_entropy_with_logits( loss = nn.sparse_softmax_cross_entropy_with_logits(

View File

@ -40,7 +40,7 @@ def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None):
[tensor_in, labels]): [tensor_in, labels]):
predictions = nn.xw_plus_b(tensor_in, weights, biases) predictions = nn.xw_plus_b(tensor_in, weights, biases)
if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2: if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2:
predictions = array_ops_.squeeze(predictions, squeeze_dims=[1]) predictions = array_ops_.squeeze(predictions, axis=[1])
return predictions, losses.mean_squared_error(labels, predictions) return predictions, losses.mean_squared_error(labels, predictions)

View File

@ -17,6 +17,7 @@ limitations under the License.
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
#include <iomanip>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <sstream> #include <sstream>
@ -70,6 +71,23 @@ TfLiteStatus ReadLabelsFile(const string& file_name,
return kTfLiteOk; return kTfLiteOk;
} }
void PrintProfilingInfo(const profiling::ProfileEvent* e, uint32_t op_index,
TfLiteRegistration registration) {
// output something like
// time (ms) , Node xxx, OpCode xxx, symblic name
// 5.352, Node 5, OpCode 4, DEPTHWISE_CONV_2D
LOG(INFO) << std::fixed << std::setw(10) << std::setprecision(3)
<< (e->end_timestamp_us - e->begin_timestamp_us) / 1000.0
<< ", Node " << std::setw(3) << std::setprecision(3) << op_index
<< ", OpCode " << std::setw(3) << std::setprecision(3)
<< registration.builtin_code << ", "
<< EnumNameBuiltinOperator(
(BuiltinOperator)registration.builtin_code)
<< "\n";
}
void RunInference(Settings* s) { void RunInference(Settings* s) {
if (!s->model_name.c_str()) { if (!s->model_name.c_str()) {
LOG(ERROR) << "no model file name\n"; LOG(ERROR) << "no model file name\n";
@ -166,6 +184,11 @@ void RunInference(Settings* s) {
exit(-1); exit(-1);
} }
profiling::Profiler* profiler = new profiling::Profiler();
interpreter->SetProfiler(profiler);
if (s->profiling) profiler->StartProfiling();
struct timeval start_time, stop_time; struct timeval start_time, stop_time;
gettimeofday(&start_time, NULL); gettimeofday(&start_time, NULL);
for (int i = 0; i < s->loop_count; i++) { for (int i = 0; i < s->loop_count; i++) {
@ -179,6 +202,18 @@ void RunInference(Settings* s) {
<< (get_us(stop_time) - get_us(start_time)) / (s->loop_count * 1000) << (get_us(stop_time) - get_us(start_time)) / (s->loop_count * 1000)
<< " ms \n"; << " ms \n";
if (s->profiling) {
profiler->StopProfiling();
auto profile_events = profiler->GetProfileEvents();
for (int i = 0; i < profile_events.size(); i++) {
auto op_index = profile_events[i]->event_metadata;
const auto node_and_registration =
interpreter->node_and_registration(op_index);
const TfLiteRegistration registration = node_and_registration->second;
PrintProfilingInfo(profile_events[i], op_index, registration);
}
}
const int output_size = 1000; const int output_size = 1000;
const size_t num_results = 5; const size_t num_results = 5;
const float threshold = 0.001f; const float threshold = 0.001f;
@ -217,13 +252,14 @@ void RunInference(Settings* s) {
void display_usage() { void display_usage() {
LOG(INFO) << "label_image\n" LOG(INFO) << "label_image\n"
<< "--accelerated, -a: [0|1], use Android NNAPI or note\n" << "--accelerated, -a: [0|1], use Android NNAPI or not\n"
<< "--count, -c: loop interpreter->Invoke() for certain times\n" << "--count, -c: loop interpreter->Invoke() for certain times\n"
<< "--input_mean, -b: input mean\n" << "--input_mean, -b: input mean\n"
<< "--input_std, -s: input standard deviation\n" << "--input_std, -s: input standard deviation\n"
<< "--image, -i: image_name.bmp\n" << "--image, -i: image_name.bmp\n"
<< "--labels, -l: labels for the model\n" << "--labels, -l: labels for the model\n"
<< "--tflite_model, -m: model_name.tflite\n" << "--tflite_model, -m: model_name.tflite\n"
<< "--profiling, -p: [0|1], profiling or not\n"
<< "--threads, -t: number of threads\n" << "--threads, -t: number of threads\n"
<< "--verbose, -v: [0|1] print more information\n" << "--verbose, -v: [0|1] print more information\n"
<< "\n"; << "\n";
@ -241,6 +277,7 @@ int Main(int argc, char** argv) {
{"image", required_argument, 0, 'i'}, {"image", required_argument, 0, 'i'},
{"labels", required_argument, 0, 'l'}, {"labels", required_argument, 0, 'l'},
{"tflite_model", required_argument, 0, 'm'}, {"tflite_model", required_argument, 0, 'm'},
{"profiling", required_argument, 0, 'p'},
{"threads", required_argument, 0, 't'}, {"threads", required_argument, 0, 't'},
{"input_mean", required_argument, 0, 'b'}, {"input_mean", required_argument, 0, 'b'},
{"input_std", required_argument, 0, 's'}, {"input_std", required_argument, 0, 's'},
@ -249,7 +286,7 @@ int Main(int argc, char** argv) {
/* getopt_long stores the option index here. */ /* getopt_long stores the option index here. */
int option_index = 0; int option_index = 0;
c = getopt_long(argc, argv, "a:b:c:f:i:l:m:s:t:v:", long_options, c = getopt_long(argc, argv, "a:b:c:f:i:l:m:p:s:t:v:", long_options,
&option_index); &option_index);
/* Detect the end of the options. */ /* Detect the end of the options. */
@ -276,6 +313,10 @@ int Main(int argc, char** argv) {
case 'm': case 'm':
s.model_name = optarg; s.model_name = optarg;
break; break;
case 'p':
s.profiling = strtol( // NOLINT(runtime/deprecated_fn)
optarg, (char**)NULL, 10);
break;
case 's': case 's':
s.input_std = strtod(optarg, NULL); s.input_std = strtod(optarg, NULL);
break; break;

View File

@ -25,6 +25,7 @@ struct Settings {
bool verbose = false; bool verbose = false;
bool accel = false; bool accel = false;
bool input_floating = false; bool input_floating = false;
bool profiling = false;
int loop_count = 1; int loop_count = 1;
float input_mean = 127.5f; float input_mean = 127.5f;
float input_std = 127.5f; float input_std = 127.5f;

View File

@ -84,4 +84,32 @@
android:visibility="visible" /> android:visibility="visible" />
</RelativeLayout> </RelativeLayout>
<RelativeLayout
android:id="@+id/control2"
android:layout_width="match_parent"
android:layout_height="135dp"
android:layout_alignParentLeft="true"
android:layout_alignParentStart="true"
android:layout_alignTop="@+id/control"
android:layout_marginLeft="300dp"
android:layout_marginStart="300dp"
android:background="@color/control_background">
<ToggleButton
android:id="@+id/button"
android:textOff="@string/tflite"
android:textOn="@string/nnapi"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_alignParentLeft="true"
android:layout_alignParentStart="true" />
<NumberPicker
android:id="@+id/np"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_below="@+id/button"
android:visibility="visible" />
</RelativeLayout>
</RelativeLayout> </RelativeLayout>

View File

@ -25,8 +25,8 @@ namespace builtin {
namespace topk_v2 { namespace topk_v2 {
constexpr int kInputTensor = 0; constexpr int kInputTensor = 0;
constexpr int kInputTopK = 1; constexpr int kInputTopK = 1;
constexpr int kOutputIndexes = 0; constexpr int kOutputValues = 0;
constexpr int kOutputValues = 1; constexpr int kOutputIndexes = 1;
namespace { namespace {
TfLiteStatus ResizeOutput(TfLiteContext* context, TfLiteNode* node) { TfLiteStatus ResizeOutput(TfLiteContext* context, TfLiteNode* node) {

View File

@ -31,8 +31,8 @@ class TopKV2OpModel : public SingleOpModel {
int top_k) { int top_k) {
input_ = AddInput(input_type); input_ = AddInput(input_type);
top_k_ = AddInput(TensorType_INT32); top_k_ = AddInput(TensorType_INT32);
output_indexes_ = AddOutput(TensorType_INT32);
output_values_ = AddOutput(input_type); output_values_ = AddOutput(input_type);
output_indexes_ = AddOutput(TensorType_INT32);
SetBuiltinOp(BuiltinOperator_TOPK_V2, BuiltinOptions_TopKV2Options, 0); SetBuiltinOp(BuiltinOperator_TOPK_V2, BuiltinOptions_TopKV2Options, 0);
BuildInterpreter({input_shape, {1}}); BuildInterpreter({input_shape, {1}});
PopulateTensor<int32_t>(top_k_, {top_k}); PopulateTensor<int32_t>(top_k_, {top_k});

View File

@ -609,7 +609,7 @@ enum {
* Long short-term memory unit (LSTM) recurrent network layer. * Long short-term memory unit (LSTM) recurrent network layer.
* *
* The default non-peephole implementation is based on: * The default non-peephole implementation is based on:
* http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf * http://www.bioinf.jku.at/publications/older/2604.pdf
* S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural * S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural
* Computation, 9(8):1735-1780, 1997. * Computation, 9(8):1735-1780, 1997.
* *

View File

@ -37,9 +37,9 @@ struct ProfileEvent {
// Label of the event. This usually describes the event. // Label of the event. This usually describes the event.
const char* tag; const char* tag;
// Timestamp in microseconds when the event began. // Timestamp in microseconds when the event began.
int64_t begin_timestamp_us; uint64_t begin_timestamp_us;
// Timestamp in microseconds when the event ended. // Timestamp in microseconds when the event ended.
int64_t end_timestamp_us; uint64_t end_timestamp_us;
// The field containing the type of event. This must be one of the event types // The field containing the type of event. This must be one of the event types
// in EventType. // in EventType.
EventType event_type; EventType event_type;
@ -74,7 +74,7 @@ class ProfileBuffer {
if (!enabled_) { if (!enabled_) {
return kInvalidEventHandle; return kInvalidEventHandle;
} }
int64_t timestamp = NowMicros(); uint64_t timestamp = NowMicros();
int index = current_index_ % event_buffer_.size(); int index = current_index_ % event_buffer_.size();
event_buffer_[index].tag = tag; event_buffer_[index].tag = tag;
event_buffer_[index].event_type = event_type; event_buffer_[index].event_type = event_type;
@ -134,7 +134,7 @@ class ProfileBuffer {
} }
private: private:
static int64_t NowMicros() { static uint64_t NowMicros() {
// TODO(shashishekhar): Refactor this to a separate file. // TODO(shashishekhar): Refactor this to a separate file.
struct timeval tv; struct timeval tv;
gettimeofday(&tv, nullptr); gettimeofday(&tv, nullptr);

View File

@ -124,6 +124,15 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
SetDataTypeForAllOutputs(model, op, rand_op->dtype); SetDataTypeForAllOutputs(model, op, rand_op->dtype);
break; break;
} }
case OperatorType::kTopK_V2: {
// topk(values: T, k: int32) -> values: T, indices: int32
CHECK_EQ(op->inputs.size(), 2);
CHECK_EQ(op->outputs.size(), 2);
CHECK(model->GetArray(op->inputs[1]).data_type == ArrayDataType::kInt32);
model->GetArray(op->outputs[0]).data_type = model->GetArray(op->inputs[0]).data_type;
model->GetArray(op->outputs[1]).data_type = ArrayDataType ::kInt32;
break;
}
case OperatorType::kTensorFlowUnsupported: { case OperatorType::kTensorFlowUnsupported: {
auto* unsupported_op = static_cast<TensorFlowUnsupportedOperator*>(op); auto* unsupported_op = static_cast<TensorFlowUnsupportedOperator*>(op);
// Some output tensors from the op could be eliminated by optimization. // Some output tensors from the op could be eliminated by optimization.

View File

@ -1087,8 +1087,8 @@ void ProcessGatherOperator(Model* model, GatherOperator* op) {
void ProcessTopkV2Operator(Model* model, TopKV2Operator* op) { void ProcessTopkV2Operator(Model* model, TopKV2Operator* op) {
const auto& input_values = model->GetArray(op->inputs[0]); const auto& input_values = model->GetArray(op->inputs[0]);
const auto& input_k = model->GetArray(op->inputs[1]); const auto& input_k = model->GetArray(op->inputs[1]);
auto& output_indexes = model->GetArray(op->outputs[0]); auto& output_values = model->GetArray(op->outputs[0]);
auto& output_values = model->GetArray(op->outputs[1]); auto& output_indexes = model->GetArray(op->outputs[1]);
// Bail if we already know the output shape. // Bail if we already know the output shape.
if (output_indexes.has_shape()) { if (output_indexes.has_shape()) {

View File

@ -1991,7 +1991,7 @@ void ConvertTopKV2Operator(const NodeDef& node,
op->inputs.push_back(node.input(1)); op->inputs.push_back(node.input(1));
} }
// The op has two outputs. // The op has two outputs.
op->outputs.push_back(node.name() + ":0"); op->outputs.push_back(node.name());
op->outputs.push_back(node.name() + ":1"); op->outputs.push_back(node.name() + ":1");
model->operators.emplace_back(op.release()); model->operators.emplace_back(op.release());
} }

View File

@ -825,11 +825,6 @@ void FixNoOrphanedArray(Model* model) {
void CheckEachArray(const Model& model) { void CheckEachArray(const Model& model) {
for (const auto& array_entry : model.GetArrayMap()) { for (const auto& array_entry : model.GetArrayMap()) {
const auto& array = array_entry.second; const auto& array = array_entry.second;
if (array->has_shape()) {
for (int d : array->shape().dims()) {
CHECK_GE(d, 1);
}
}
// It's OK to have a buffer or an alloc, but not both. // It's OK to have a buffer or an alloc, but not both.
// (Since allocs are for transient arrays without a buffer). // (Since allocs are for transient arrays without a buffer).
CHECK(!array->buffer || !array->alloc); CHECK(!array->buffer || !array->alloc);
@ -839,6 +834,10 @@ void CheckEachArray(const Model& model) {
// The presence of a fixed buffer should imply the presence of a fixed // The presence of a fixed buffer should imply the presence of a fixed
// shape. // shape.
CHECK(array->has_shape()); CHECK(array->has_shape());
// Constant buffer should has a valid shape.
for (int d : array->shape().dims()) {
CHECK_GE(d, 1);
}
// The shape flat-size should agree with the buffer length. // The shape flat-size should agree with the buffer length.
CHECK_EQ(array->buffer->Length(), CHECK_EQ(array->buffer->Length(),
RequiredBufferSizeForShape(array->shape())); RequiredBufferSizeForShape(array->shape()));

View File

@ -22,6 +22,7 @@ limitations under the License.
#include <string> #include <string>
#include <vector> #include <vector>
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/str_util.h"
// Skip MPI C++ bindings support, this matches the usage in other places // Skip MPI C++ bindings support, this matches the usage in other places

View File

@ -56,21 +56,21 @@ class LazyAdamOptimizer(adam.AdamOptimizer):
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m := beta1 * m + (1 - beta1) * g_t # \\(m := beta1 * m + (1 - beta1) * g_t\\)
m = self.get_slot(var, "m") m = self.get_slot(var, "m")
m_t = state_ops.scatter_update(m, grad.indices, m_t = state_ops.scatter_update(m, grad.indices,
beta1_t * array_ops.gather(m, grad.indices) + beta1_t * array_ops.gather(m, grad.indices) +
(1 - beta1_t) * grad.values, (1 - beta1_t) * grad.values,
use_locking=self._use_locking) use_locking=self._use_locking)
# v := beta2 * v + (1 - beta2) * (g_t * g_t) # \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\)
v = self.get_slot(var, "v") v = self.get_slot(var, "v")
v_t = state_ops.scatter_update(v, grad.indices, v_t = state_ops.scatter_update(v, grad.indices,
beta2_t * array_ops.gather(v, grad.indices) + beta2_t * array_ops.gather(v, grad.indices) +
(1 - beta2_t) * math_ops.square(grad.values), (1 - beta2_t) * math_ops.square(grad.values),
use_locking=self._use_locking) use_locking=self._use_locking)
# variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t)) # \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\)
m_t_slice = array_ops.gather(m_t, grad.indices) m_t_slice = array_ops.gather(m_t, grad.indices)
v_t_slice = array_ops.gather(v_t, grad.indices) v_t_slice = array_ops.gather(v_t, grad.indices)
denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t

View File

@ -40,23 +40,19 @@ class AdamOptimizer(optimizer_v2.OptimizerV2):
Initialization: Initialization:
``` $$m_0 := 0 (Initialize initial 1st moment vector)$$
m_0 <- 0 (Initialize initial 1st moment vector) $$v_0 := 0 (Initialize initial 2nd moment vector)$$
v_0 <- 0 (Initialize initial 2nd moment vector) $$t := 0 (Initialize timestep)$$
t <- 0 (Initialize timestep)
```
The update rule for `variable` with gradient `g` uses an optimization The update rule for `variable` with gradient `g` uses an optimization
described at the end of section2 of the paper: described at the end of section2 of the paper:
``` $$t := t + 1$$
t <- t + 1 $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
m_t <- beta1 * m_{t-1} + (1 - beta1) * g $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
v_t <- beta2 * v_{t-1} + (1 - beta2) * g * g $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon) $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
```
The default value of 1e-8 for epsilon might not be a good default in The default value of 1e-8 for epsilon might not be a good default in
general. For example, when training an Inception network on ImageNet a general. For example, when training an Inception network on ImageNet a

View File

@ -307,6 +307,21 @@ class LSTMTest(test.TestCase):
self._seed = 23489 self._seed = 23489
np.random.seed(self._seed) np.random.seed(self._seed)
def testDType(self):
# Test case for GitHub issue 16228
# Not passing dtype in constructor results in default float32
lstm = rnn_cell.LSTMCell(10)
input_tensor = array_ops.ones([10, 50])
lstm.build(input_tensor.get_shape())
self.assertEqual(lstm._bias.dtype, dtypes.float32_ref)
# Explicitly pass dtype in constructor
for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
lstm = rnn_cell.LSTMCell(10, dtype=dtype)
input_tensor = array_ops.ones([10, 50])
lstm.build(input_tensor.get_shape())
self.assertEqual(lstm._bias.dtype, dtype._as_ref)
def testNoProjNoSharding(self): def testNoProjNoSharding(self):
num_units = 3 num_units = 3
input_size = 5 input_size = 5

View File

@ -37,7 +37,7 @@ def _top_k_generator(k):
def _top_k(probabilities, targets): def _top_k(probabilities, targets):
targets = math_ops.to_int32(targets) targets = math_ops.to_int32(targets)
if targets.get_shape().ndims > 1: if targets.get_shape().ndims > 1:
targets = array_ops.squeeze(targets, squeeze_dims=[1]) targets = array_ops.squeeze(targets, axis=[1])
return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k)) return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k))
return _top_k return _top_k
@ -57,7 +57,7 @@ def _r2(probabilities, targets, weights=None):
def _squeeze_and_onehot(targets, depth): def _squeeze_and_onehot(targets, depth):
targets = array_ops.squeeze(targets, squeeze_dims=[1]) targets = array_ops.squeeze(targets, axis=[1])
return array_ops.one_hot(math_ops.to_int32(targets), depth) return array_ops.one_hot(math_ops.to_int32(targets), depth)

View File

@ -55,7 +55,7 @@ class ManyToOneLayer(hybrid_layer.HybridLayer):
# There is always one activation per instance by definition, so squeeze # There is always one activation per instance by definition, so squeeze
# away the extra dimension. # away the extra dimension.
return array_ops.squeeze(nn_activations, squeeze_dims=[1]) return array_ops.squeeze(nn_activations, axis=[1])
class FlattenedFullyConnectedLayer(hybrid_layer.HybridLayer): class FlattenedFullyConnectedLayer(hybrid_layer.HybridLayer):

View File

@ -445,7 +445,7 @@ class RandomForestGraphs(object):
mask = math_ops.less( mask = math_ops.less(
r, array_ops.ones_like(r) * self.params.bagging_fraction) r, array_ops.ones_like(r) * self.params.bagging_fraction)
gather_indices = array_ops.squeeze( gather_indices = array_ops.squeeze(
array_ops.where(mask), squeeze_dims=[1]) array_ops.where(mask), axis=[1])
# TODO(thomaswc): Calculate out-of-bag data and labels, and store # TODO(thomaswc): Calculate out-of-bag data and labels, and store
# them for use in calculating statistics later. # them for use in calculating statistics later.
tree_data = array_ops.gather(processed_dense_features, gather_indices) tree_data = array_ops.gather(processed_dense_features, gather_indices)

View File

@ -111,20 +111,22 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph,
} }
} }
std::pair<string, int> ParseTensorName(string name, int default_idx = 0) { std::pair<string, int> ParseTensorName(const string& name,
int default_idx = 0) {
string name_no_idx = name;
int idx = default_idx; int idx = default_idx;
size_t sep = name.find_last_of(':'); const size_t sep = name_no_idx.find_last_of(':');
if (sep != string::npos) { if (sep != string::npos) {
name = name.substr(0, sep); name_no_idx = name_no_idx.substr(0, sep);
idx = std::stoi(name.substr(sep + 1)); idx = std::stoi(name.substr(sep + 1));
} }
return std::make_pair(name, idx); return std::make_pair(name_no_idx, idx);
} }
std::unordered_map<string, std::vector<int>> BuildTensorNameMap( std::unordered_map<string, std::vector<int>> BuildTensorNameMap(
const std::vector<string>& tensor_names) { const std::vector<string>& tensor_names) {
std::unordered_map<string, std::vector<int>> result; std::unordered_map<string, std::vector<int>> result;
for (string const& tensor_name : tensor_names) { for (const string& tensor_name : tensor_names) {
string node_name; string node_name;
int index; int index;
std::tie(node_name, index) = ParseTensorName(tensor_name); std::tie(node_name, index) = ParseTensorName(tensor_name);
@ -132,6 +134,7 @@ std::unordered_map<string, std::vector<int>> BuildTensorNameMap(
} }
return result; return result;
} }
// TODO(sami): convert references to pointers // TODO(sami): convert references to pointers
struct ConvertGraphParams { struct ConvertGraphParams {
ConvertGraphParams( ConvertGraphParams(

View File

@ -78,7 +78,7 @@ class StubTimeSeriesModel(model.TimeSeriesModel):
batch_end_values = array_ops.squeeze( batch_end_values = array_ops.squeeze(
array_ops.slice(values, [0, array_ops.shape(times)[1] - 1, 0], array_ops.slice(values, [0, array_ops.shape(times)[1] - 1, 0],
[-1, 1, -1]), [-1, 1, -1]),
squeeze_dims=[1, 2]) axis=[1, 2])
# A pretty odd but easy to think about loss: L1 loss on the batch end # A pretty odd but easy to think about loss: L1 loss on the batch end
# values. # values.
loss = math_ops.reduce_sum( loss = math_ops.reduce_sum(

View File

@ -170,7 +170,7 @@ class KalmanFilter(object):
math_ops.matmul( math_ops.matmul(
transition_matrices, transition_matrices,
prior_state[..., None]), prior_state[..., None]),
squeeze_dims=[-1]) axis=[-1])
return advanced_state return advanced_state
def predict_state_var( def predict_state_var(
@ -254,7 +254,7 @@ class KalmanFilter(object):
kalman_gain_transposed, kalman_gain_transposed,
array_ops.expand_dims(residual, -1), array_ops.expand_dims(residual, -1),
adjoint_a=True), adjoint_a=True),
squeeze_dims=[-1]) axis=[-1])
gain_obs = math_ops.matmul( gain_obs = math_ops.matmul(
kalman_gain_transposed, observation_model, adjoint_a=True) kalman_gain_transposed, observation_model, adjoint_a=True)
identity_extradim = linalg_ops.eye( identity_extradim = linalg_ops.eye(
@ -332,7 +332,7 @@ class KalmanFilter(object):
array_ops.expand_dims(state_mean, 1), array_ops.expand_dims(state_mean, 1),
observation_model, observation_model,
adjoint_b=True), adjoint_b=True),
squeeze_dims=[1]) axis=[1])
observed_var = math_ops.matmul( observed_var = math_ops.matmul(
math_ops.matmul(observation_model, state_var), math_ops.matmul(observation_model, state_var),
observation_model, observation_model,

View File

@ -2292,7 +2292,9 @@ tf_cuda_library(
CORE_CPU_BASE_HDRS = GRAPH_HDRS + [ CORE_CPU_BASE_HDRS = GRAPH_HDRS + [
"common_runtime/device.h", "common_runtime/device.h",
"common_runtime/device_factory.h",
"common_runtime/device_mgr.h", "common_runtime/device_mgr.h",
"common_runtime/device_set.h",
"common_runtime/eval_const_tensor.h", "common_runtime/eval_const_tensor.h",
"common_runtime/graph_runner.h", "common_runtime/graph_runner.h",
"common_runtime/shape_refiner.h", "common_runtime/shape_refiner.h",
@ -2350,9 +2352,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [
"common_runtime/copy_tensor.h", "common_runtime/copy_tensor.h",
"common_runtime/costmodel_manager.h", "common_runtime/costmodel_manager.h",
"common_runtime/debugger_state_interface.h", "common_runtime/debugger_state_interface.h",
"common_runtime/device_factory.h",
"common_runtime/device_resolver_local.h", "common_runtime/device_resolver_local.h",
"common_runtime/device_set.h",
"common_runtime/dma_helper.h", "common_runtime/dma_helper.h",
"common_runtime/eigen_thread_pool.h", "common_runtime/eigen_thread_pool.h",
"common_runtime/executor.h", "common_runtime/executor.h",

View File

@ -82,9 +82,9 @@ END
} }
summary: "Update \'*var\' according to the Adam algorithm." summary: "Update \'*var\' according to the Adam algorithm."
description: <<END description: <<END
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon) $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
END END
} }

View File

@ -24,5 +24,6 @@ pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
[0, 0, 2, 2, 0, 0] [0, 0, 2, 2, 0, 0]
[0, 0, 0, 0, 0, 0]] [0, 0, 0, 0, 0, 0]]
``` ```
END END
} }

View File

@ -44,6 +44,7 @@ In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
if T == qint8, out[i] -= (range(T) + 1) / 2.0 if T == qint8, out[i] -= (range(T) + 1) / 2.0
``` ```
here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()` here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
*MIN_COMBINED Mode Example* *MIN_COMBINED Mode Example*
@ -87,6 +88,7 @@ choosing to elide the lowest possible value for symmetry (e.g., output range is
We first find the range of values in our tensor. The We first find the range of values in our tensor. The
range we use is always centered on 0, so we find m such that range we use is always centered on 0, so we find m such that
```c++ ```c++
m = max(abs(input_min), abs(input_max)) m = max(abs(input_min), abs(input_max))
``` ```
@ -95,6 +97,7 @@ Our input tensor range is then `[-m, m]`.
Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
If T is signed, this is If T is signed, this is
``` ```
num_bits = sizeof(T) * 8 num_bits = sizeof(T) * 8
[min_fixed, max_fixed] = [min_fixed, max_fixed] =
@ -102,16 +105,19 @@ If T is signed, this is
``` ```
Otherwise, if T is unsigned, the fixed-point range is Otherwise, if T is unsigned, the fixed-point range is
``` ```
[min_fixed, max_fixed] = [0, (1 << num_bits) - 1] [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
``` ```
From this we compute our scaling factor, s: From this we compute our scaling factor, s:
```c++ ```c++
s = (max_fixed - min_fixed) / (2 * m) s = (max_fixed - min_fixed) / (2 * m)
``` ```
Now we can quantize the elements of our tensor: Now we can quantize the elements of our tensor:
```c++ ```c++
result = round(input * s) result = round(input * s)
``` ```

View File

@ -76,9 +76,9 @@ END
} }
summary: "Update \'*var\' according to the Adam algorithm." summary: "Update \'*var\' according to the Adam algorithm."
description: <<END description: <<END
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon) $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
END END
} }

View File

@ -25,12 +25,12 @@ A new tensor with the given shape and updates applied according
to the indices. to the indices.
END END
} }
summary: "Scatter `updates` into a new (initially zero) tensor according to `indices`." summary: "Scatter `updates` into a new tensor according to `indices`."
description: <<END description: <<END
Creates a new tensor by applying sparse `updates` to individual Creates a new tensor by applying sparse `updates` to individual values or
values or slices within a zero tensor of the given `shape` according to slices within a tensor (initially zero for numeric, empty for string) of
indices. This operator is the inverse of the @{tf.gather_nd} operator which the given `shape` according to indices. This operator is the inverse of the
extracts values or slices from a given tensor. @{tf.gather_nd} operator which extracts values or slices from a given tensor.
**WARNING**: The order in which updates are applied is nondeterministic, so the **WARNING**: The order in which updates are applied is nondeterministic, so the
output will be nondeterministic if `indices` contains duplicates. output will be nondeterministic if `indices` contains duplicates.

View File

@ -490,7 +490,7 @@ Status GraphExecutionState::OptimizeGraph(
cpu_device = device; cpu_device = device;
} }
} }
grappler::VirtualCluster cluster(device_map); grappler::VirtualCluster cluster(device_map, device_set_);
GraphDef new_graph; GraphDef new_graph;
TF_RETURN_IF_ERROR(grappler::RunMetaOptimizer( TF_RETURN_IF_ERROR(grappler::RunMetaOptimizer(
item, rewrite_options, cpu_device, &cluster, &new_graph)); item, rewrite_options, cpu_device, &cluster, &new_graph));

View File

@ -547,14 +547,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
// If Op has been specifically assigned to a non-CPU device, then No. // If Op has been specifically assigned to a non-CPU device, then No.
if (!n->assigned_device_name().empty() && if (!n->assigned_device_name().empty() &&
!StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) { !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) {
result = false; result = false;
reason = "Op has been assigned a runtime device that is not CPU."; reason = "Op has been assigned a runtime device that is not CPU.";
} }
// If user has specifically assigned this op to a non-CPU device, then No. // If user has specifically assigned this op to a non-CPU device, then No.
if (!n->def().device().empty() && if (!n->def().device().empty() &&
!StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) { !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) {
result = false; result = false;
reason = "User has assigned a device that is not CPU."; reason = "User has assigned a device that is not CPU.";
} }
@ -2691,14 +2691,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
// If Op has been specifically assigned to a non-CPU device, then No. // If Op has been specifically assigned to a non-CPU device, then No.
if (!n->assigned_device_name().empty() && if (!n->assigned_device_name().empty() &&
!StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) { !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) {
result = false; result = false;
reason = "Op has been assigned a runtime device that is not CPU."; reason = "Op has been assigned a runtime device that is not CPU.";
} }
// If user has specifically assigned this op to a non-CPU device, then No. // If user has specifically assigned this op to a non-CPU device, then No.
if (!n->def().device().empty() && if (!n->def().device().empty() &&
!StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) { !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) {
result = false; result = false;
reason = "User has assigned a device that is not CPU."; reason = "User has assigned a device that is not CPU.";
} }

View File

@ -56,6 +56,7 @@ cc_library(
], ],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
"//tensorflow/core:core_cpu_base",
"//tensorflow/core:framework", "//tensorflow/core:framework",
"//tensorflow/core:lib", "//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc", "//tensorflow/core:protos_all_cc",
@ -73,6 +74,7 @@ cc_library(
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
":cluster", ":cluster",
"//tensorflow/core:core_cpu_base",
"//tensorflow/core:framework", "//tensorflow/core:framework",
"//tensorflow/core:protos_all_cc", "//tensorflow/core:protos_all_cc",
"//tensorflow/core/grappler/costs:op_level_cost_estimator", "//tensorflow/core/grappler/costs:op_level_cost_estimator",

View File

@ -21,6 +21,7 @@ limitations under the License.
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "tensorflow/core/common_runtime/device_set.h"
#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/grappler_item.h"
#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status.h"
@ -92,6 +93,10 @@ class Cluster {
// sorted alphabetically. // sorted alphabetically.
const std::vector<string> GetDeviceNames() const; const std::vector<string> GetDeviceNames() const;
// The DeviceSet is not always available, but when it is it contains a
// superset of the devices listed in GetDevices/GetDeviceNames().
const DeviceSet* GetDeviceSet() const { return device_set_; }
// Enables collecting the allocator stats. Call with enable=true must be made // Enables collecting the allocator stats. Call with enable=true must be made
// before Provision(). // before Provision().
virtual Status EnablePeakMemoryStats(bool enable) { virtual Status EnablePeakMemoryStats(bool enable) {
@ -119,6 +124,7 @@ class Cluster {
protected: protected:
std::unordered_map<string, DeviceProperties> devices_; std::unordered_map<string, DeviceProperties> devices_;
const DeviceSet* device_set_ = nullptr; // Not owned
const int timeout_s_; const int timeout_s_;
SessionOptions options_; SessionOptions options_;
RunOptions run_options_; RunOptions run_options_;

View File

@ -37,6 +37,14 @@ VirtualCluster::VirtualCluster(
: Cluster(0), node_estimator_(node_estimator), node_manager_(node_manager) { : Cluster(0), node_estimator_(node_estimator), node_manager_(node_manager) {
devices_ = devices; devices_ = devices;
} }
VirtualCluster::VirtualCluster(
const std::unordered_map<string, DeviceProperties>& devices,
const DeviceSet* device_set)
: VirtualCluster(devices) {
device_set_ = device_set;
}
VirtualCluster::~VirtualCluster() {} VirtualCluster::~VirtualCluster() {}
Status VirtualCluster::Provision() { return Status::OK(); } Status VirtualCluster::Provision() { return Status::OK(); }

View File

@ -17,6 +17,8 @@ limitations under the License.
#define TENSORFLOW_CORE_GRAPPLER_CLUSTERS_VIRTUAL_CLUSTER_H_ #define TENSORFLOW_CORE_GRAPPLER_CLUSTERS_VIRTUAL_CLUSTER_H_
#include <unordered_map> #include <unordered_map>
#include "tensorflow/core/common_runtime/device_set.h"
#include "tensorflow/core/grappler/clusters/cluster.h" #include "tensorflow/core/grappler/clusters/cluster.h"
#include "tensorflow/core/grappler/costs/op_level_cost_estimator.h" #include "tensorflow/core/grappler/costs/op_level_cost_estimator.h"
#include "tensorflow/core/grappler/costs/virtual_scheduler.h" #include "tensorflow/core/grappler/costs/virtual_scheduler.h"
@ -34,6 +36,8 @@ class VirtualCluster : public Cluster {
VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices, VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices,
OpLevelCostEstimator* node_estimator, OpLevelCostEstimator* node_estimator,
ReadyNodeManager* node_manager); ReadyNodeManager* node_manager);
VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices,
const DeviceSet* device_set);
~VirtualCluster() override; ~VirtualCluster() override;

View File

@ -199,7 +199,7 @@ class FirstReadyManager : public ReadyNodeManager {
// current node. // current node.
std::vector<const NodeDef*> nodes_; std::vector<const NodeDef*> nodes_;
// Newly added nodes are added to waiting_queue_. That way, GetCurrNode(), // Newly added nodes are added to waiting_queue_. That way, GetCurrNode(),
// wihch returns the front of the nodes_, always returns the same node, // which returns the front of the nodes_, always returns the same node,
// even if any of new nodes has time_ready smaller than the current node's. // even if any of new nodes has time_ready smaller than the current node's.
std::vector<const NodeDef*> waiting_queue_; std::vector<const NodeDef*> waiting_queue_;
// Comparator functor for heap; stl heap is max heap, so we use "greater than" // Comparator functor for heap; stl heap is max heap, so we use "greater than"
@ -212,7 +212,7 @@ class FirstReadyManager : public ReadyNodeManager {
}; };
// CompositeNodeManager has a few other NodeManagers: per-device LIFO for normal // CompositeNodeManager has a few other NodeManagers: per-device LIFO for normal
// ops (neither _Send nor _Recv) and FirstyReadyManagers for _Send ops and _Recv // ops (neither _Send nor _Recv) and FirstReadyManagers for _Send ops and _Recv
// ops, and then it chooses FirstReady among the ops chosen from each // ops, and then it chooses FirstReady among the ops chosen from each
// internal NodeManagers. The objective is to maximize producer-consumer // internal NodeManagers. The objective is to maximize producer-consumer
// locality within device, while processing nodes across devices, including // locality within device, while processing nodes across devices, including

View File

@ -33,7 +33,7 @@ class CustomGraphOptimizerRegistry {
static std::vector<string> GetRegisteredOptimizers(); static std::vector<string> GetRegisteredOptimizers();
typedef std::function<CustomGraphOptimizer*()> Creator; typedef std::function<CustomGraphOptimizer*()> Creator;
// Regsiter graph optimizer which can be called during program initialization. // Register graph optimizer which can be called during program initialization.
// This class is not thread-safe. // This class is not thread-safe.
static void RegisterOptimizerOrDie(const Creator& optimizer_creator, static void RegisterOptimizerOrDie(const Creator& optimizer_creator,
const string& name); const string& name);

View File

@ -160,13 +160,26 @@ Status MetaOptimizer::InitializeOptimizersByName(
VLOG(2) << "Can't register an optimizer by name: " << optimizer_name; VLOG(2) << "Can't register an optimizer by name: " << optimizer_name;
} }
} }
for (const auto& optimizer_config : cfg_.custom_optimizers()) {
auto custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull(
optimizer_config.name());
if (custom_optimizer) {
VLOG(2) << "Registered custom configurable graph optimizer: "
<< optimizer_config.name();
TF_RETURN_IF_ERROR(custom_optimizer->Init(&optimizer_config));
optimizers->push_back(std::move(custom_optimizer));
} else {
VLOG(2) << "Can't register an optimizer by name: "
<< optimizer_config.name();
}
}
return Status::OK(); return Status::OK();
} }
Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
GraphDef* optimized_graph) { GraphDef* optimized_graph) {
std::vector<std::unique_ptr<GraphOptimizer>> optimizers; std::vector<std::unique_ptr<GraphOptimizer>> optimizers;
if (cfg_.optimizers().empty()) { if (cfg_.optimizers().empty() && cfg_.custom_optimizers().empty()) {
TF_RETURN_IF_ERROR(InitializeOptimizers(&optimizers)); TF_RETURN_IF_ERROR(InitializeOptimizers(&optimizers));
} else { } else {
TF_RETURN_IF_ERROR(InitializeOptimizersByName(&optimizers)); TF_RETURN_IF_ERROR(InitializeOptimizersByName(&optimizers));
@ -337,7 +350,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
cfg.auto_parallel().enable() || cfg.auto_parallel().enable() ||
cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT || cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
cfg.debug_stripper() == RewriterConfig::ON || cfg.debug_stripper() == RewriterConfig::ON ||
!cfg.optimizers().empty(); !cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
} }
Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg,

View File

@ -134,6 +134,8 @@ Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
switch (element.dtype()) { switch (element.dtype()) {
TF_CALL_ALL_TYPES(HANDLE_TYPE); TF_CALL_ALL_TYPES(HANDLE_TYPE);
TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE); TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
TF_CALL_uint32(HANDLE_TYPE);
TF_CALL_uint64(HANDLE_TYPE);
#undef HANDLE_TYPE #undef HANDLE_TYPE
default: default:
return errors::Unimplemented("CopyElementToSlice Unhandled data type: ", return errors::Unimplemented("CopyElementToSlice Unhandled data type: ",

View File

@ -16,8 +16,8 @@ limitations under the License.
#include "tensorflow/core/kernels/cwise_ops_common.h" #include "tensorflow/core/kernels/cwise_ops_common.h"
namespace tensorflow { namespace tensorflow {
REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16, REGISTER6(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
int16, int32, int64); int8, int16, int32, int64);
REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float, REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
Eigen::half, double); Eigen::half, double);

View File

@ -20,6 +20,7 @@ limitations under the License.
#include <map> #include <map>
#include <string> #include <string>
#include <vector> #include <vector>
#include <memory>
#include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/op_kernel.h"
@ -42,14 +43,13 @@ limitations under the License.
#include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/mkl_util.h"
#ifndef INTEL_MKL_ML #ifndef INTEL_MKL_ML
#include "mkldnn.hpp" #include "mkldnn.hpp"
using mkldnn::prop_kind; using mkldnn::prop_kind;
using mkldnn::stream; using mkldnn::stream;
using mkldnn::convolution_direct;
using mkldnn::convolution_forward; using mkldnn::convolution_forward;
using mkldnn::convolution_direct;
#else #else
#include "mkl_dnn.h" #include "mkl_dnn.h"
#include "mkl_dnn_types.h" #include "mkl_dnn_types.h"
@ -57,11 +57,232 @@ using mkldnn::convolution_forward;
namespace tensorflow { namespace tensorflow {
#ifndef INTEL_MKL_ML
struct ConvFwdDimensions {
memory::dims src_dims;
memory::dims filter_dims;
memory::dims bias_dims;
memory::dims dst_dims;
memory::dims strides;
memory::dims dilations;
memory::dims padding_left;
memory::dims padding_right;
ConvFwdDimensions(memory::dims src_dims,
memory::dims filter_dims, memory::dims bias_dims,
memory::dims dst_dims, memory::dims strides,
memory::dims dilations, memory::dims padding_left,
memory::dims padding_right) :
src_dims(src_dims), filter_dims(filter_dims),
bias_dims(bias_dims), dst_dims(dst_dims),
strides(strides), dilations(dilations),
padding_left(padding_left), padding_right(padding_right) {
}
};
template <typename T>
class Conv2DFwd : public DnnOp {
public:
explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) {
fwd_stream_.reset(new stream(stream::kind::eager));
// create conv primitive
if (conv_fwd_ == nullptr) {
Setup(convFwdDims);
}
}
~Conv2DFwd() {}
// Convolution forward execute with bias
// src_data: input data buffer of src
// filter_data: input data buffer of filter (weights)
// bias_data: input data buffer of bias
// dst_data: output data buffer of dst
void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) {
src_mem_->set_data_handle(static_cast<void*>(src_data));
filter_mem_->set_data_handle(static_cast<void*>(filter_data));
bias_mem_->set_data_handle(static_cast<void*>(bias_data));
dst_mem_->set_data_handle(static_cast<void*>(dst_data));
fwd_stream_->submit(fwd_primitives_);
// after exec, set data handle back
src_mem_->set_data_handle(DummyData);
filter_mem_->set_data_handle(DummyData);
bias_mem_->set_data_handle(DummyData);
dst_mem_->set_data_handle(DummyData);
return;
}
// Convolution forward execute without bias
// src_data: input data buffer of src
// filter_data: input data buffer of filter (weights)
// dst_data: output data buffer of dst
void Execute(T* src_data, T* filter_data, T* dst_data) {
src_mem_->set_data_handle(static_cast<void*>(src_data));
filter_mem_->set_data_handle(static_cast<void*>(filter_data));
dst_mem_->set_data_handle(static_cast<void*>(dst_data));
fwd_stream_->submit(fwd_primitives_);
// after exec, set data handle back
src_mem_->set_data_handle(DummyData);
filter_mem_->set_data_handle(DummyData);
dst_mem_->set_data_handle(DummyData);
return;
}
// expected memory format for this primitive instance
memory::format src_fmt_;
memory::format filter_fmt_;
// convolution primitive
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> fwd_pd_;
std::shared_ptr<mkldnn::primitive> conv_fwd_;
private:
void Setup(const ConvFwdDimensions& convFwdDims) {
// create memory descriptors for convolution data w/ no specified format
src_md_.reset(new memory::desc({convFwdDims.src_dims},
MklDnnType<T>(), memory::format::any));
filter_md_.reset(new memory::desc({convFwdDims.filter_dims},
MklDnnType<T>(), memory::format::any));
dst_md_.reset(new memory::desc({convFwdDims.dst_dims},
MklDnnType<T>(), memory::format::any));
if (!convFwdDims.bias_dims.empty())
bias_md_.reset(new memory::desc({convFwdDims.bias_dims},
MklDnnType<T>(), memory::format::any));
// create a convolution
if (!convFwdDims.bias_dims.empty()) {
fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward,
convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_,
convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left,
convFwdDims.padding_right, padding_kind::zero));
} else {
fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward,
convolution_direct, *src_md_, *filter_md_, *dst_md_,
convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left,
convFwdDims.padding_right, padding_kind::zero));
}
fwd_pd_.reset(new convolution_forward::primitive_desc(
*fwd_desc_, cpu_engine_));
// store the expected memory format
src_fmt_ = static_cast<mkldnn::memory::format>(
fwd_pd_.get()->src_primitive_desc().desc().data.format);
filter_fmt_ = static_cast<mkldnn::memory::format>(
fwd_pd_.get()->weights_primitive_desc().desc().data.format);
// create memory primitive based on dummy data
src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData));
filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(),
DummyData));
dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData));
// create convolution primitive and add it to net
if (!convFwdDims.bias_dims.empty()) {
bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType<T>(),
memory::format::x}, cpu_engine_}, DummyData));
conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_,
*filter_mem_, *bias_mem_, *dst_mem_));
} else {
conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_,
*filter_mem_, *dst_mem_));
}
fwd_primitives_.push_back(*conv_fwd_);
return;
}
// MKLDNN memory
std::shared_ptr<mkldnn::memory> src_mem_;
std::shared_ptr<mkldnn::memory> filter_mem_;
std::shared_ptr<mkldnn::memory> bias_mem_;
std::shared_ptr<mkldnn::memory> dst_mem_;
std::shared_ptr<mkldnn::stream> fwd_stream_;
std::vector<mkldnn::primitive> fwd_primitives_;
// desc & prmitive desc
std::shared_ptr<mkldnn::convolution_forward::desc> fwd_desc_;
// memory desc
std::shared_ptr<mkldnn::memory::desc> src_md_;
std::shared_ptr<mkldnn::memory::desc> filter_md_;
std::shared_ptr<mkldnn::memory::desc> bias_md_;
std::shared_ptr<mkldnn::memory::desc> dst_md_;
engine cpu_engine_ = engine(engine::cpu, 0);
};
template <typename T>
class Conv2DFwdFactory : public DnnOpFactory<T> {
public:
static Conv2DFwd<T>* Get(const ConvFwdDimensions& convFwdDims) {
Conv2DFwd<T>* conv2d_fwd = nullptr;
// try to find a suitable one in pool
conv2d_fwd = dynamic_cast<Conv2DFwd<T>*> (
Conv2DFwdFactory<T>::GetInstance().GetConv2DFwd(convFwdDims));
if (conv2d_fwd == nullptr) {
conv2d_fwd = new Conv2DFwd<T>(convFwdDims);
Conv2DFwdFactory<T>::GetInstance().SetConv2DFwd(
convFwdDims, conv2d_fwd);
}
return conv2d_fwd;
}
private:
Conv2DFwdFactory() {}
~Conv2DFwdFactory() {}
static const int kDilationH = 0, kDilationW = 1;
static Conv2DFwdFactory& GetInstance() {
static Conv2DFwdFactory instance_;
return instance_;
}
static std::string CreateKey(const ConvFwdDimensions& convFwdDims) {
std::string prefix = "conv2d_fwd_";
FactoryKeyCreator key_creator;
key_creator.AddAsKey(prefix);
key_creator.AddAsKey(convFwdDims.src_dims);
key_creator.AddAsKey(convFwdDims.filter_dims);
key_creator.AddAsKey(convFwdDims.bias_dims);
key_creator.AddAsKey(convFwdDims.dst_dims);
key_creator.AddAsKey(convFwdDims.strides);
key_creator.AddAsKey(convFwdDims.dilations);
key_creator.AddAsKey(convFwdDims.padding_left);
key_creator.AddAsKey(convFwdDims.padding_right);
return key_creator.GetKey();
}
DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) {
std::string key = CreateKey(convFwdDims);
return this->GetOp(key);
}
void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) {
std::string key = CreateKey(convFwdDims);
this->SetOp(key, op);
}
};
#endif
typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::ThreadPoolDevice CPUDevice;
// MKL-DNN is now default. MKL-ML must be specified explicitly. // For now, MKL-ML is default. So making MKL-DNN not a default choice.
#ifdef INTEL_MKL_ML #ifdef INTEL_MKL_ML
template <typename Device, typename T, bool biasEnabled> template <typename Device, typename T, bool biasEnabled>
class MklConv2DOp : public OpKernel { class MklConv2DOp : public OpKernel {
public: public:
@ -528,8 +749,6 @@ class MklConv2DOp : public OpKernel {
void Compute(OpKernelContext* context) override { void Compute(OpKernelContext* context) override {
try { try {
auto cpu_engine = engine(engine::cpu, 0);
// Input tensors // Input tensors
const Tensor& src_tensor = MklGetInput(context, kInputIndex_Src); const Tensor& src_tensor = MklGetInput(context, kInputIndex_Src);
const Tensor& filter_tensor = MklGetInput(context, kInputIndex_Filter); const Tensor& filter_tensor = MklGetInput(context, kInputIndex_Filter);
@ -543,11 +762,11 @@ class MklConv2DOp : public OpKernel {
MklDnnData<T> src(&cpu_engine); MklDnnData<T> src(&cpu_engine);
MklDnnData<T> filter(&cpu_engine); MklDnnData<T> filter(&cpu_engine);
MklDnnData<T> output(&cpu_engine); MklDnnData<T> dst(&cpu_engine); // output
memory::dims src_dims, filter_dims, padding_l, padding_r, memory::dims src_dims, filter_dims, padding_left, padding_right,
dilations, strides; dilations, strides;
memory::dims output_dims_tf_order, output_dims_mkl_order; memory::dims dst_dims_tf_order, dst_dims_mkl_order;
// Get shapes of input tensors in MKL-DNN order // Get shapes of input tensors in MKL-DNN order
MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_,
@ -555,31 +774,29 @@ class MklConv2DOp : public OpKernel {
auto src_tf_shape = GetTfShape(context, kInputIndex_Src); auto src_tf_shape = GetTfShape(context, kInputIndex_Src);
auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter); auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter);
conv_utl.GetConvFwdSizesInMklOrder( conv_utl.GetConvFwdSizesInMklOrder(
src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, &strides, src_tf_shape, filter_tf_shape, &src_dims, &filter_dims,
&dilations, &output_dims_tf_order, &output_dims_mkl_order, &strides, &dilations, &dst_dims_tf_order, &dst_dims_mkl_order,
&padding_l, &padding_r); &padding_left, &padding_right);
if (!context->status().ok()) return; if (!context->status().ok()) return;
// Check for corner case - if there is nothing to compute, return. // Check for corner case - if there is nothing to compute, return.
TensorShape output_tf_shape = MklDnnDimsToTFShape(output_dims_tf_order); TensorShape dst_tf_shape = MklDnnDimsToTFShape(dst_dims_tf_order);
// Corner cases: output with 0 elements and 0 batch size. // Corner cases: output with 0 elements and 0 batch size.
Tensor* output_tensor = nullptr; Tensor* dst_tensor = nullptr;
if (output_tf_shape.num_elements() == 0 || output_dims_tf_order[0] == 0) { if (dst_tf_shape.num_elements() == 0 ||
// TODO(jbobba): Verify correctness here dst_dims_tf_order[0] == 0) {
// Need semantics for Null MKL tensor MklDnnShape dst_mkl_shape;
MklDnnShape output_mkl_shape; dst_mkl_shape.SetMklTensor(false);
output_mkl_shape.SetMklTensor(false); AllocateOutputSetMklShape(context, kOutputIndex_Dst,
&dst_tensor, src_tf_shape, dst_mkl_shape);
AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor,
src_tf_shape, output_mkl_shape);
// MklConv2D also outputs converted filter as 2nd output of Conv2D. // MklConv2D also outputs converted filter as 2nd output of Conv2D.
filter_mkl_shape.SetMklTensor(false); filter_mkl_shape.SetMklTensor(false);
Tensor* output_filter_tensor = nullptr; Tensor* output_filter_tensor = nullptr;
AllocateOutputSetMklShape(context, kOutputIndex_Filter, AllocateOutputSetMklShape(context, kOutputIndex_Filter,
&output_filter_tensor, filter_tf_shape, &output_filter_tensor,
filter_mkl_shape); filter_tf_shape, filter_mkl_shape);
return; return;
} }
@ -587,6 +804,7 @@ class MklConv2DOp : public OpKernel {
// Describe how the inputs and outputs of Convolution look like. Also // Describe how the inputs and outputs of Convolution look like. Also
// specify buffers containing actual input and output data. // specify buffers containing actual input and output data.
auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_); auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
// If input is in MKL layout, then simply grab input layout; otherwise, // If input is in MKL layout, then simply grab input layout; otherwise,
// construct input Tf layout. For TF layout, although input shape // construct input Tf layout. For TF layout, although input shape
// (src_dims) required is in MKL-DNN order, the layout is Tensorflow's // (src_dims) required is in MKL-DNN order, the layout is Tensorflow's
@ -595,6 +813,7 @@ class MklConv2DOp : public OpKernel {
? src_mkl_shape.GetMklLayout() ? src_mkl_shape.GetMklLayout()
: memory::desc(src_dims, MklDnnType<T>(), tf_fmt); : memory::desc(src_dims, MklDnnType<T>(), tf_fmt);
src.SetUsrMem(src_md, &src_tensor); src.SetUsrMem(src_md, &src_tensor);
// Although filter shape (filter_dims) required is in MKL-DNN order, // Although filter shape (filter_dims) required is in MKL-DNN order,
// the layout is Tensorflow's layout (HWIO). // the layout is Tensorflow's layout (HWIO).
auto filter_md = filter_mkl_shape.IsMklTensor() // Should NEVER be true auto filter_md = filter_mkl_shape.IsMklTensor() // Should NEVER be true
@ -603,97 +822,69 @@ class MklConv2DOp : public OpKernel {
memory::format::hwio); memory::format::hwio);
filter.SetUsrMem(filter_md, &filter_tensor); filter.SetUsrMem(filter_md, &filter_tensor);
// Set output shape (output_dims) required in MKL-DNN order.
// Currently, we set output layout as Tensorflow's layout (NHWC or NCHW
// depending on data format). But later we propagate Mkl layout of the
// output to the next op directly.
output.SetUsrMem(output_dims_mkl_order, tf_fmt);
// Create memory descriptors for convolution data w/ no specified format.
src.SetOpMemDesc(src_dims, memory::format::any);
filter.SetOpMemDesc(filter_dims, memory::format::any);
output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
// MKLDNN dilation starts from 0. // MKLDNN dilation starts from 0.
dilations[kDilationH] -= 1; dilations[kDilationH] -= 1;
dilations[kDilationW] -= 1; dilations[kDilationW] -= 1;
// get a conv2d fwd from primitive pool
Conv2DFwd<T> *conv2d_fwd = nullptr;
if (biasEnabled) { if (biasEnabled) {
// Create convolution primitive with Bias. memory::dims bias_dims = {};
MklDnnData<T> bias(&cpu_engine); conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims);
memory::dims bias_size; ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims,
conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size); dst_dims_mkl_order, strides, dilations, padding_left, padding_right);
const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias); conv2d_fwd = Conv2DFwdFactory<T>::Get(convFwdDims);
bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor);
bias.SetOpMemDesc(bias_size, memory::format::any);
// Create convolution primitive with Bias.
// Use MKLDNN dilated convolution in case of dilated rate (>0).
auto conv_desc = (dilations[kDilationH] > 0 ||
dilations[kDilationW] > 0) ?
convolution_forward::desc(prop_kind::forward,
convolution_direct, src.GetOpMemDesc(),
filter.GetOpMemDesc(), bias.GetOpMemDesc(),
output.GetOpMemDesc(), strides, dilations,
padding_l, padding_r,
TFPaddingToMklDnnPadding(padding_)):
convolution_forward::desc(prop_kind::forward,
convolution_direct, src.GetOpMemDesc(),
filter.GetOpMemDesc(), bias.GetOpMemDesc(),
output.GetOpMemDesc(), strides,
padding_l, padding_r,
TFPaddingToMklDnnPadding(padding_));
auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
cpu_engine);
AllocateOutputTensor(context, conv_prim_desc,
output_dims_mkl_order, tf_fmt, &output_tensor);
// Set data handle for output.
output.SetUsrMemDataHandle(output_tensor);
Tensor* filter_out_tensor = nullptr;
AllocateFilterOutputTensor(context, conv_prim_desc,
TFShapeToMklDnnDims(filter_tf_shape),
&filter_out_tensor);
PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output,
filter_out_tensor);
} else { } else {
// Create convolution primitive without Bias. ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS,
// Use MKLDNN dilated convolution in case of dilated rate (>0). dst_dims_mkl_order, strides, dilations, padding_left, padding_right);
auto conv_desc = (dilations[kDilationH] > 0 || conv2d_fwd = Conv2DFwdFactory<T>::Get(convFwdDims);
dilations[kDilationW] > 0) ? }
convolution_forward::desc(prop_kind::forward,
convolution_direct, src.GetOpMemDesc(),
filter.GetOpMemDesc(), output.GetOpMemDesc(),
strides, dilations, padding_l, padding_r,
TFPaddingToMklDnnPadding(padding_)):
convolution_forward::desc(prop_kind::forward,
convolution_direct, src.GetOpMemDesc(),
filter.GetOpMemDesc(), output.GetOpMemDesc(),
strides, padding_l, padding_r,
TFPaddingToMklDnnPadding(padding_));
auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
cpu_engine);
AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order,
tf_fmt, &output_tensor);
// Set data handle for output.
output.SetUsrMemDataHandle(output_tensor);
// allocate output tensors output_tensor and filter_out_tensor
std::shared_ptr<mkldnn::convolution_forward::primitive_desc>
conv_fwd_pd = conv2d_fwd->fwd_pd_;
AllocateOutputTensor(context, *conv_fwd_pd,
dst_dims_mkl_order, tf_fmt, &dst_tensor);
Tensor* filter_out_tensor = nullptr; Tensor* filter_out_tensor = nullptr;
AllocateFilterOutputTensor(context, conv_prim_desc, AllocateFilterOutputTensor(context, *conv_fwd_pd,
TFShapeToMklDnnDims(filter_tf_shape), TFShapeToMklDnnDims(filter_tf_shape),
&filter_out_tensor); &filter_out_tensor);
PrepareAndExecuteNet(conv_prim_desc, &src, &filter,
nullptr, &output, filter_out_tensor); T* dst_data = static_cast<T*>(dst_tensor->flat<T>().data());
// check whether src/filter need reorder
std::vector<primitive> net;
if (src_md.data.format != conv2d_fwd->src_fmt_)
src.CheckReorderToOpMem(
conv_fwd_pd.get()->src_primitive_desc(), &net);
if (filter_md.data.format != conv2d_fwd->filter_fmt_)
filter.CheckReorderToOpMem(
conv_fwd_pd.get()->weights_primitive_desc(),
filter.GetTensorBuffer(filter_out_tensor), &net);
stream(stream::kind::eager).submit(net).wait();
T* src_data = static_cast<T*>(
src.GetOpMem().get_data_handle());
T* filter_data = static_cast<T*>(
filter.GetOpMem().get_data_handle());
// execute convolution
if (biasEnabled) {
const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias);
T* bias_data = static_cast<T*>(const_cast<T*>(
bias_tensor.flat<T>().data()));
conv2d_fwd->Execute(src_data, filter_data, bias_data, dst_data);
} else {
conv2d_fwd->Execute(src_data, filter_data, dst_data);
} }
} catch (mkldnn::error& e) { } catch (mkldnn::error &e) {
string error_msg = "Status: " + std::to_string(e.status) + string error_msg = "Status: " + std::to_string(e.status) +
", message: " + std::string(e.message) + ", in file " + ", message: " + std::string(e.message) +
std::string(__FILE__) + ":" + std::to_string(__LINE__); ", in file " + std::string(__FILE__) + ":" +
OP_REQUIRES_OK( std::to_string(__LINE__);
context, OP_REQUIRES_OK(context,
errors::Aborted("Operation received an exception:", error_msg)); errors::Aborted("Operation received an exception:", error_msg));
} }
} }
@ -706,6 +897,7 @@ class MklConv2DOp : public OpKernel {
const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2; const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2;
const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1; const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1;
const int kDilationH = 0, kDilationW = 1; const int kDilationH = 0, kDilationW = 1;
engine cpu_engine = engine(engine::cpu, 0);
// Allocate output tensor. // Allocate output tensor.
void AllocateOutputTensor( void AllocateOutputTensor(

View File

@ -241,6 +241,7 @@ class ScatterNdUpdateOp : public OpKernel {
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_ADD_SUB_CPU); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_ADD_SUB_CPU);
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_UPDATE_CPU); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_UPDATE_CPU);
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_CPU); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_CPU);
TF_CALL_string(REGISTER_SCATTER_ND_CPU);
// Registers GPU kernels. // Registers GPU kernels.
#if GOOGLE_CUDA #if GOOGLE_CUDA

View File

@ -160,6 +160,7 @@ struct ScatterNdFunctor<CPUDevice, T, Index, OP, IXDIM> {
REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB); REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB);
TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE); TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE);
REGISTER_SCATTER_ND_INDEX(string, scatter_nd_op::UpdateOp::ADD);
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_MATH) TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_MATH)
#undef REGISTER_SCATTER_ND_MATH #undef REGISTER_SCATTER_ND_MATH

View File

@ -16,35 +16,6 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ #ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
#define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ #define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"
// Unfortunately we can't add the #include, since it breaks compilation for
// non-GPU targets. This only breaks in clang, because it's more strict for
// template code and CudaAtomicMax is used in template context.
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"
// Unfortunately we can't add the #include, since it breaks compilation for
// non-GPU targets. This only breaks in clang, because it's more strict for
// template code and CudaAtomicMax is used in template context.
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"
// Unfortunately we can't add the #include, since it breaks compilation for
// non-GPU targets. This only breaks in clang, because it's more strict for
// template code and CudaAtomicMax is used in template context.
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"
// Unfortunately we can't add the #include, since it breaks compilation for
// non-GPU targets. This only breaks in clang, because it's more strict for
// template code and CudaAtomicMax is used in template context.
// This file requires the following include because it uses CudaAtomicMax: // This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h" // #include "tensorflow/core/util/cuda_kernel_helper.h"

View File

@ -23,7 +23,7 @@ limitations under the License.
#if defined(WIN32) #if defined(WIN32)
#include "extras/CUPTI/include/cupti.h" #include "extras/CUPTI/include/cupti.h"
#else #else
#include "cuda/extras/CUPTI/include/cupti.h" #include "cupti.h"
#endif #endif
namespace perftools { namespace perftools {
namespace gputools { namespace gputools {

View File

@ -24,7 +24,7 @@ limitations under the License.
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1") // "-beta", "-rc", "-rc.1")
#define TF_VERSION_SUFFIX "-rc0" #define TF_VERSION_SUFFIX "-rc1"
#define TF_STR_HELPER(x) #x #define TF_STR_HELPER(x) #x
#define TF_STR(x) TF_STR_HELPER(x) #define TF_STR(x) TF_STR_HELPER(x)

View File

@ -19,6 +19,8 @@ limitations under the License.
#include <string> #include <string>
#include <vector> #include <vector>
#include <unordered_map>
#include <utility>
#include "mkl_dnn.h" #include "mkl_dnn.h"
#include "mkl_dnn_types.h" #include "mkl_dnn_types.h"
@ -1759,7 +1761,90 @@ class MklDnnData {
} }
}; };
#endif // INTEL_MKL_ML /// Base class for operations with reuse of DNN primitives
///
class DnnOp {
public:
virtual ~DnnOp() {}
// Dummy data. Its size, hard-coded as 256 here, does
// not matter since MKL should never operate on this buffer.
unsigned char DummyData[256];
};
const mkldnn::memory::dims NONE_DIMS = {};
// This constant is used to declare dummy buffer (size), for MKL primitives
template <typename T>
class DnnOpFactory {
public:
DnnOpFactory() {}
~DnnOpFactory() {}
DnnOp* GetOp(const std::string& key) {
auto stream_iter = DnnOpFactory<T>::GetHashMap().find(key);
if (stream_iter == DnnOpFactory<T>::GetHashMap().end()) {
return nullptr;
} else {
return stream_iter->second;
}
}
void SetOp(const std::string& key, DnnOp* op) {
auto stream_iter = DnnOpFactory<T>::GetHashMap().find(key);
CHECK(stream_iter == DnnOpFactory<T>::GetHashMap().end());
DnnOpFactory<T>::GetHashMap()[key] = op;
}
private:
static inline std::unordered_map<std::string, DnnOp*> &GetHashMap() {
static thread_local std::unordered_map<std::string, DnnOp*> map_;
return map_;
}
};
// utility class for creating keys of MKL primitive pool.
class FactoryKeyCreator {
public:
FactoryKeyCreator() {
key_.reserve(kMaxKeyLength);
}
~FactoryKeyCreator() {}
void AddAsKey(const string &str) {
auto buffer = reinterpret_cast<const char *>(str.c_str());
Append(buffer, str.length());
}
void AddAsKey(const mkldnn::memory::dims &dims) {
for (unsigned int i = 0; i < dims.size(); i++) {
AddAsKey<int>(dims[i]);
}
}
template <typename T>
void AddAsKey(const T data) {
auto buffer = reinterpret_cast<const char *>(&data);
Append(buffer, sizeof(T));
}
std::string GetKey() {
return key_;
}
private:
string key_;
const char delimiter = 'x';
const int kMaxKeyLength = 256;
void Append(const char* data, int len) {
key_.append(data, len);
key_.append(1, delimiter);
}
};
#endif // INTEL_MKL_DNN
} // namespace tensorflow } // namespace tensorflow
#endif // INTEL_MKL #endif // INTEL_MKL

View File

@ -1,5 +1,5 @@
# Roadmap # Roadmap
**Last updated: Feb 15, 2018** **Last updated: Apr 27, 2018**
TensorFlow is a rapidly moving, community supported project. This document is intended TensorFlow is a rapidly moving, community supported project. This document is intended
to provide guidance about priorities and focus areas of the core set of TensorFlow to provide guidance about priorities and focus areas of the core set of TensorFlow
@ -14,12 +14,12 @@ expected in the next one to two releases.
### APIs ### APIs
#### High Level APIs: #### High Level APIs:
* Easy multi-GPU utilization with Estimators * Easy multi-GPU and TPU utilization with Estimators
* Easy-to-use high-level pre-made estimators for Gradient Boosted Trees, Time Series, and other models * Easy-to-use high-level pre-made estimators for Gradient Boosted Trees, Time Series, and other models
#### Eager Execution: #### Eager Execution:
* Efficient utilization of multiple GPUs * Efficient utilization of multiple GPUs
* Distributed training (multi-machine) * Distributed training support (multi-machine)
* Performance improvements * Performance improvements
* Simpler export to a GraphDef/SavedModel * Simpler export to a GraphDef/SavedModel
@ -31,14 +31,14 @@ to create Keras models Eager- style via Model subclassing)
#### Official Models: #### Official Models:
* A set of * A set of
[reference models](https://github.com/tensorflow/models/tree/master/official) [models](https://github.com/tensorflow/models/tree/master/official)
across image recognition, speech, object detection, and across image recognition, speech, object detection, and
translation that demonstrate best practices and serve as a starting point for translation that demonstrate best practices and serve as a starting point for
high-performance model development. high-performance model development.
#### Contrib: #### Contrib:
* Deprecation notices added to parts of tf.contrib where preferred implementations exist outside of tf.contrib. * Deprecate parts of tf.contrib where preferred implementations exist outside of tf.contrib.
* As much as possible, large projects inside tf.contrib moved to separate repositories. * As much as possible, move large projects inside tf.contrib to separate repositories.
* The tf.contrib module will eventually be discontinued in its current form, experimental development will in future happen in other repositories. * The tf.contrib module will eventually be discontinued in its current form, experimental development will in future happen in other repositories.
@ -50,36 +50,72 @@ across image recognition, speech, object detection, and
### Platforms ### Platforms
#### TensorFlow Lite: #### TensorFlow Lite:
* Increased coverage of supported ops in TensorFlow Lite * Increase coverage of supported ops in TensorFlow Lite
* Easier conversion of a trained TensorFlow graph for use on TensorFlow Lite * Easier conversion of a trained TensorFlow graph for use on TensorFlow Lite
* Support for GPU acceleration in TensorFlow Lite (iOS and Android) * Support for GPU acceleration in TensorFlow Lite (iOS and Android)
* Support for hardware accelerators via Android NeuralNets API * Support for hardware accelerators via Android NeuralNets API
* Improved CPU performance by quantization and other network optimizations (eg. pruning, distillation) * Improve CPU performance by quantization and other network optimizations (eg. pruning, distillation)
* Increased support for devices beyond Android and iOS (eg. RPi, Cortex-M) * Increase support for devices beyond Android and iOS (eg. RPi, Cortex-M)
#### TensorFlow.js:
* Release package for Node.js bindings to the TensorFlow C API through the TensorFlow.js backend interface
* Expand support for importing TensorFlow SavedModels and Keras models into browser with unified APIs supporting retraining in browser
* Improve Layers API and allow model exporting/saving
* Release tfjs-data API for efficient data input pipelines
#### TensorFlow with Swift:
* Establish open source project including documentation, open design, and code availability.
* Continue implementing and refining implementation and design through 2018.
* Aim for implementation to be solid enough for general use later in 2018.
### Performance ### Performance
#### Distributed TensorFlow: #### Distributed TensorFlow:
* Multi-GPU support optimized for a variety of GPU topologies * Optimize Multi-GPU support for a variety of GPU topologies
* Improved mechanisms for distributing computations on several machines * Improve mechanisms for distributing computations on several machines
#### Optimizations: #### GPU Optimizations:
* Mixed precision training support with initial example model and guide * Simplify mixed precision API with initial example model and guide.
* Native TensorRT support * Finalize TensorRT API and move to core.
* CUDA 9.2 and NCCL 2.x default in TensorFlow builds.
* Optimizations for DGX-2.
* Remove support for CUDA less than 8.x and cuDNN less than 6.x.
#### CPU Optimizations
* Int8 support for SkyLake via MKL * Int8 support for SkyLake via MKL
* Dynamic loading of SIMD-optimized kernels * Dynamic loading of SIMD-optimized kernels
* MKL for Linux and Windows
### End-to-end ML systems:
#### TensorFlow Hub:
* Expand support for module-types in TF Hub with TF Eager integration, Keras layers integration, and TensorFlow.js integration
* Accept variable-sized image input
* Improve multi-GPU estimator support
* Document and improve TPU integration
#### TensorFlow Extended:
* Open source more of the TensorFlow Extended platform to facilitate adoption of TensorFlow in production settings.
* Release TFX libraries for Data Validation
### Documentation and Resources:
* Update documentation, tutorials and Getting Started guides on all features and APIs
* Update [Youtube Tensorflow channel](https://youtube.com/tensorflow) weekly with new content:
Coding TensorFlow - where we teach folks coding with tensorflow
TensorFlow Meets - where we highlight community contributions
Ask TensorFlow - where we answer community questions
Guest and Showcase videos
* Update [Official TensorFlow blog](https://blog.tensorflow.org) with regular articles from Google team and the Community
### Documentation and Usability:
* Updated documentation, tutorials and Getting Started guides
* Process to enable external contributions to tutorials, documentation, and blogs showcasing best practice use-cases of TensorFlow and high-impact applications
### Community and Partner Engagement ### Community and Partner Engagement
#### Special Interest Groups: #### Special Interest Groups:
* Mobilizing the community to work together in focused domains * Mobilize the community to work together in focused domains
* [tf-distribute](https://groups.google.com/a/tensorflow.org/forum/#!forum/tf-distribute): build and packaging of TensorFlow * [tf-distribute](https://groups.google.com/a/tensorflow.org/forum/#!forum/tf-distribute): build and packaging of TensorFlow
* More to be identified and launched * SIG TensorBoard, SIG Rust, and more to be identified and launched
#### Community: #### Community:
* Incorporate public feedback on significant design decisions via a Request-for-Comment (RFC) process * Incorporate public feedback on significant design decisions via a Request-for-Comment (RFC) process
* Formalize process for external contributions to land in TensorFlow and associated projects * Formalize process for external contributions to land in TensorFlow and associated projects
* Grow global TensorFlow communities and user groups * Grow global TensorFlow communities and user groups
* Collaborate with partners to co-develop and publish research papers * Collaborate with partners to co-develop and publish research papers
* Process to enable external contributions to tutorials, documentation, and blogs showcasing best practice use-cases of TensorFlow and high-impact applications

View File

@ -38,8 +38,10 @@ Estimators automatically write the following to disk:
uses to create visualizations. uses to create visualizations.
To specify the top-level directory in which the Estimator stores its To specify the top-level directory in which the Estimator stores its
information, assign a value to the optional `model_dir` argument of any information, assign a value to the optional `model_dir` argument of *any*
Estimator's constructor. For example, the following code sets the `model_dir` `Estimator`'s constructor.
Taking `DNNClassifier` as an example,
the following code sets the `model_dir`
argument to the `models/iris` directory: argument to the `models/iris` directory:
```python ```python

View File

@ -138,7 +138,7 @@ The model will represent the buckets as follows:
|< 1960 | [1, 0, 0, 0] | |< 1960 | [1, 0, 0, 0] |
|>= 1960 but < 1980 | [0, 1, 0, 0] | |>= 1960 but < 1980 | [0, 1, 0, 0] |
|>= 1980 but < 2000 | [0, 0, 1, 0] | |>= 1980 but < 2000 | [0, 0, 1, 0] |
|> 2000 | [0, 0, 0, 1] | |>= 2000 | [0, 0, 0, 1] |
Why would you want to split a number—a perfectly valid input to your Why would you want to split a number—a perfectly valid input to your
model—into a categorical value? Well, notice that the categorization splits a model—into a categorical value? Well, notice that the categorization splits a

View File

@ -10,7 +10,7 @@ course prior to diving into TensorFlow documentation:
TensorFlow is a tool for machine learning. While it contains a wide range of TensorFlow is a tool for machine learning. While it contains a wide range of
functionality, TensorFlow is mainly designed for deep neural network models. functionality, TensorFlow is mainly designed for deep neural network models.
The easiest way to get started with tensorflow is using Eager Execution. The easiest way to get started with TensorFlow is using Eager Execution.
* @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow.

View File

@ -38,7 +38,7 @@ enable TensorFlow for C:
OS="linux" # Change to "darwin" for macOS OS="linux" # Change to "darwin" for macOS
TARGET_DIRECTORY="/usr/local" TARGET_DIRECTORY="/usr/local"
curl -L \ curl -L \
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc0.tar.gz" | "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" |
sudo tar -C $TARGET_DIRECTORY -xz sudo tar -C $TARGET_DIRECTORY -xz
The `tar` command extracts the TensorFlow C library into the `lib` The `tar` command extracts the TensorFlow C library into the `lib`

View File

@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go:
TF_TYPE="cpu" # Change to "gpu" for GPU support TF_TYPE="cpu" # Change to "gpu" for GPU support
TARGET_DIRECTORY='/usr/local' TARGET_DIRECTORY='/usr/local'
curl -L \ curl -L \
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc0.tar.gz" | "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc1.tar.gz" |
sudo tar -C $TARGET_DIRECTORY -xz sudo tar -C $TARGET_DIRECTORY -xz
The `tar` command extracts the TensorFlow C library into the `lib` The `tar` command extracts the TensorFlow C library into the `lib`

View File

@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs:
<dependency> <dependency>
<groupId>org.tensorflow</groupId> <groupId>org.tensorflow</groupId>
<artifactId>tensorflow</artifactId> <artifactId>tensorflow</artifactId>
<version>1.8.0-rc0</version> <version>1.8.0-rc1</version>
</dependency> </dependency>
``` ```
@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow:
<dependency> <dependency>
<groupId>org.tensorflow</groupId> <groupId>org.tensorflow</groupId>
<artifactId>tensorflow</artifactId> <artifactId>tensorflow</artifactId>
<version>1.8.0-rc0</version> <version>1.8.0-rc1</version>
</dependency> </dependency>
</dependencies> </dependencies>
</project> </project>
@ -124,12 +124,12 @@ instead:
<dependency> <dependency>
<groupId>org.tensorflow</groupId> <groupId>org.tensorflow</groupId>
<artifactId>libtensorflow</artifactId> <artifactId>libtensorflow</artifactId>
<version>1.8.0-rc0</version> <version>1.8.0-rc1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.tensorflow</groupId> <groupId>org.tensorflow</groupId>
<artifactId>libtensorflow_jni_gpu</artifactId> <artifactId>libtensorflow_jni_gpu</artifactId>
<version>1.8.0-rc0</version> <version>1.8.0-rc1</version>
</dependency> </dependency>
``` ```
@ -148,7 +148,7 @@ refer to the simpler instructions above instead.
Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Linux or macOS:
1. Download 1. Download
[libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc0.jar), [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar),
which is the TensorFlow Java Archive (JAR). which is the TensorFlow Java Archive (JAR).
2. Decide whether you will run TensorFlow for Java on CPU(s) only or with 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
OS=$(uname -s | tr '[:upper:]' '[:lower:]') OS=$(uname -s | tr '[:upper:]' '[:lower:]')
mkdir -p ./jni mkdir -p ./jni
curl -L \ curl -L \
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc0.tar.gz" | "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" |
tar -xz -C ./jni tar -xz -C ./jni
### Install on Windows ### Install on Windows
@ -175,10 +175,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
Take the following steps to install TensorFlow for Java on Windows: Take the following steps to install TensorFlow for Java on Windows:
1. Download 1. Download
[libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc0.jar), [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar),
which is the TensorFlow Java Archive (JAR). which is the TensorFlow Java Archive (JAR).
2. Download the following Java Native Interface (JNI) file appropriate for 2. Download the following Java Native Interface (JNI) file appropriate for
[TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc0.zip). [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc1.zip).
3. Extract this .zip file. 3. Extract this .zip file.
@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the
downloaded `.jar` in your `classpath` by using the `-cp` compilation flag downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
as follows: as follows:
<pre><b>javac -cp libtensorflow-1.8.0-rc0.jar HelloTF.java</b></pre> <pre><b>javac -cp libtensorflow-1.8.0-rc1.jar HelloTF.java</b></pre>
### Running ### Running
@ -241,11 +241,11 @@ two files are available to the JVM:
For example, the following command line executes the `HelloTF` program on Linux For example, the following command line executes the `HelloTF` program on Linux
and macOS X: and macOS X:
<pre><b>java -cp libtensorflow-1.8.0-rc0.jar:. -Djava.library.path=./jni HelloTF</b></pre> <pre><b>java -cp libtensorflow-1.8.0-rc1.jar:. -Djava.library.path=./jni HelloTF</b></pre>
And the following command line executes the `HelloTF` program on Windows: And the following command line executes the `HelloTF` program on Windows:
<pre><b>java -cp libtensorflow-1.8.0-rc0.jar;. -Djava.library.path=jni HelloTF</b></pre> <pre><b>java -cp libtensorflow-1.8.0-rc1.jar;. -Djava.library.path=jni HelloTF</b></pre>
If the program prints <tt>Hello from <i>version</i></tt>, you've successfully If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
installed TensorFlow for Java and are ready to use the API. If the program installed TensorFlow for Java and are ready to use the API. If the program

View File

@ -1,139 +1,266 @@
# Installing TensorFlow on Ubuntu # Installing TensorFlow on Ubuntu
This guide explains how to install TensorFlow on Ubuntu. Although these This guide explains how to install TensorFlow on Ubuntu Linux. While these
instructions might also work on other Linux variants, we have only instructions may work on other Linux variants, they are tested and supported with
tested (and we only support) these instructions on machines meeting the the following system requirements:
following requirements:
* 64-bit desktops or laptops * 64-bit desktops or laptops
* Ubuntu 16.04 or higher * Ubuntu 16.04 or higher
## Determine which TensorFlow to install ## Choose which TensorFlow to install
You must choose one of the following types of TensorFlow to install: The following TensorFlow variants are available for installation:
* **TensorFlow with CPU support only**. If your system does not have a * __TensorFlow with CPU support only__. If your system does not have a
NVIDIA® GPU, you must install this version. Note that this version of NVIDIA®&nbsp;GPU, you must install this version. This version of TensorFlow is
TensorFlow is typically much easier to install (typically, usually easier to install, so even if you have an NVIDIA GPU, we recommend
in 5 or 10 minutes), so even if you have an NVIDIA GPU, we recommend
installing this version first. installing this version first.
* **TensorFlow with GPU support**. TensorFlow programs typically run * __TensorFlow with GPU support__. TensorFlow programs usually run much faster on
significantly faster on a GPU than on a CPU. Therefore, if your a GPU instead of a CPU. If you run performance-critical applications and your
system has a NVIDIA® GPU meeting the prerequisites shown below and you system has an NVIDIA®&nbsp;GPU that meets the prerequisites, you should install
need to run performance-critical applications, you should ultimately this version. See [TensorFlow GPU support](#NVIDIARequirements) for details.
install this version.
<a name="NVIDIARequirements"></a>
### NVIDIA requirements to run TensorFlow with GPU support
If you are installing TensorFlow with GPU support using one of the
mechanisms described in this guide, then the following NVIDIA software
must be installed on your system:
* [CUDA Toolkit 9.0](http://nvidia.com/cuda). For details, see
[NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/).
Ensure that you append the relevant CUDA pathnames to the
`LD_LIBRARY_PATH` environment variable as described in the
NVIDIA documentation.
* [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see
[NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/).
Ensure that you create the `CUDA_HOME` environment variable as
described in the NVIDIA documentation.
* GPU card with CUDA Compute Capability 3.0 or higher for building
from source and 3.5 or higher for our binaries. See
[NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for
a list of supported GPU cards.
* [GPU drivers](http://nvidia.com/driver) supporting your version of the CUDA
Toolkit.
* The libcupti-dev library, which is the NVIDIA CUDA Profile Tools Interface.
This library provides advanced profiling support. To install this library,
issue the following command for CUDA Toolkit >= 8.0:
<pre>
$ <b>sudo apt-get install cuda-command-line-tools</b>
</pre>
and add its path to your `LD_LIBRARY_PATH` environment variable:
<pre>
$ <b>export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64</b>
</pre>
For CUDA Toolkit <= 7.5 do:
<pre>
$ <b>sudo apt-get install libcupti-dev</b>
</pre>
* **[OPTIONAL]** For optimized inferencing performance, you can also install
**NVIDIA TensorRT 3.0**. The minimal set of TensorRT runtime components needed
for use with the pre-built `tensorflow-gpu` package can be installed as follows:
<pre>
$ <b>wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</b>
$ <b>sudo dpkg -i nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</b>
$ <b>sudo apt-get update</b>
$ <b>sudo apt-get install -y --allow-downgrades libnvinfer-dev libcudnn7-dev=7.0.5.15-1+cuda9.0 libcudnn7=7.0.5.15-1+cuda9.0</b>
</pre>
**IMPORTANT:** For compatibility with the pre-built `tensorflow-gpu`
package, please use the Ubuntu **14.04** package of TensorRT as shown above,
even when installing onto an Ubuntu 16.04 system.<br/>
<br/>
To build the TensorFlow-TensorRT integration module from source rather than
using pre-built binaries, see the [module documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#using-tensorrt-in-tensorflow).
For detailed TensorRT installation instructions, see [NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html).<br/>
<br/>
To avoid cuDNN version conflicts during later system upgrades, you can hold
the cuDNN version at 7.0.5:
<pre>
$ <b> sudo apt-mark hold libcudnn7 libcudnn7-dev</b>
</pre>
To later allow upgrades, you can remove the hold:
<pre>
$ <b> sudo apt-mark unhold libcudnn7 libcudnn7-dev</b>
</pre>
If you have an earlier version of the preceding packages, please upgrade to
the specified versions. If upgrading is not possible, then you may still run
TensorFlow with GPU support, if you @{$install_sources$install TensorFlow from Sources}.
## Determine how to install TensorFlow ## How to install TensorFlow
You must pick the mechanism by which you install TensorFlow. The There are a few options to install TensorFlow on your machine:
supported choices are as follows:
* [Virtualenv](#InstallingVirtualenv) * [Use pip in a virtual environment](#InstallingVirtualenv) *(recommended)*
* ["native" pip](#InstallingNativePip) * [Use pip in your system environment](#InstallingNativePip)
* [Docker](#InstallingDocker) * [Configure a Docker container](#InstallingDocker)
* [Anaconda](#InstallingAnaconda) * [Use pip in Anaconda](#InstallingAnaconda)
* installing from sources, which is documented in * [Install TensorFlow from source](/install/install_sources)
[a separate guide](https://www.tensorflow.org/install/install_sources).
**We recommend the Virtualenv installation.** <a name="InstallingVirtualenv"></a>
[Virtualenv](https://virtualenv.pypa.io/en/stable/) ### Use `pip` in a virtual environment
is a virtual Python environment isolated from other Python development,
incapable of interfering with or being affected by other Python programs
on the same machine. During the Virtualenv installation process,
you will install not only TensorFlow but also all the packages that
TensorFlow requires. (This is actually pretty easy.)
To start working with TensorFlow, you simply need to "activate" the
virtual environment. All in all, Virtualenv provides a safe and
reliable mechanism for installing and running TensorFlow.
Native pip installs TensorFlow directly on your system without going Key Point: Using a virtual environment is the recommended install method.
through any container system. **We recommend the native pip install for
system administrators aiming to make TensorFlow available to everyone on a The [Virtualenv](https://virtualenv.pypa.io/en/stable/) tool creates virtual
multi-user system.** Since a native pip installation is not walled-off in Python environments that are isolated from other Python development on the same
a separate container, the pip installation might interfere with other machine. In this scenario, you install TensorFlow and its dependencies within a
Python-based installations on your system. However, if you understand pip virtual environment that is available when *activated*. Virtualenv provides a
and your Python environment, a native pip installation often entails only reliable way to install and run TensorFlow while avoiding conflicts with the rest
a single command. of the system.
##### 1. Install Python, `pip`, and `virtualenv`.
On Ubuntu, Python is automatically installed and `pip` is *usually* installed.
Confirm the `python` and `pip` versions:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">python -V # or: python3 -V</code>
<code class="devsite-terminal">pip -V # or: pip3 -V</code>
</pre>
To install these packages on Ubuntu:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-get install python-pip python-dev python-virtualenv # for Python 2.7</code>
<code class="devsite-terminal">sudo apt-get install python3-pip python3-dev python-virtualenv # for Python 3.n</code>
</pre>
We *recommend* using `pip` version 8.1 or higher. If using a release before
version 8.1, upgrade `pip`:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo pip install -U pip</code>
</pre>
If not using Ubuntu and [setuptools](https://pypi.org/project/setuptools/) is
installed, use `easy_install` to install `pip`:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">easy_install -U pip</code>
</pre>
##### 2. Create a directory for the virtual environment and choose a Python interpreter.
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">mkdir ~/tensorflow # somewhere to work out of</code>
<code class="devsite-terminal">cd ~/tensorflow</code>
<code># Choose one of the following Python environments for the ./venv directory:</code>
<code class="devsite-terminal">virtualenv --system-site-packages <var>venv</var> # Use python default (Python 2.7)</code>
<code class="devsite-terminal">virtualenv --system-site-packages -p python3 <var>venv</var> # Use Python 3.n</code>
</pre>
##### 3. Activate the Virtualenv environment.
Use one of these shell-specific commands to activate the virtual environment:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">source ~/tensorflow/<var>venv</var>/bin/activate # bash, sh, ksh, or zsh</code>
<code class="devsite-terminal">source ~/tensorflow/<var>venv</var>/bin/activate.csh # csh or tcsh</code>
<code class="devsite-terminal">. ~/tensorflow/<var>venv</var>/bin/activate.fish # fish</code>
</pre>
When the Virtualenv is activated, the shell prompt displays as `(venv) $`.
##### 4. Upgrade `pip` in the virtual environment.
Within the active virtual environment, upgrade `pip`:
<pre class="prettyprint lang-bsh">
(venv)$ pip install -U pip
</pre>
You can install other Python packages within the virtual environment without
affecting packages outside the `virtualenv`.
##### 5. Install TensorFlow in the virtual environment.
Choose one of the available TensorFlow packages for installation:
* `tensorflow` —Current release for CPU
* `tensorflow-gpu` —Current release with GPU support
* `tf-nightly` —Nightly build for CPU
* `tf-nightly-gpu` —Nightly build with GPU support
Within an active Virtualenv environment, use `pip` to install the package:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">pip install -U tensorflow</code>
</pre>
Use `pip list` to show the packages installed in the virtual environment.
[Validate the install](#ValidateYourInstallation) and test the version:
<pre class="prettyprint lang-bsh">
(venv)$ python -c "import tensorflow as tf; print(tf.__version__)"
</pre>
Success: TensorFlow is now installed.
Use the `deactivate` command to stop the Python virtual environment.
#### Problems
If the above steps failed, try installing the TensorFlow binary using the remote
URL of the `pip` package:
<pre class="prettyprint lang-bsh">
(venv)$ pip install --upgrade <var>remote-pkg-URL</var> # Python 2.7
(venv)$ pip3 install --upgrade <var>remote-pkg-URL</var> # Python 3.n
</pre>
The <var>remote-pkg-URL</var> depends on the operating system, Python version,
and GPU support. See [here](#the_url_of_the_tensorflow_python_package) for the
URL naming scheme and location.
See [Common Installation Problems](#common_installation_problems) if you
encounter problems.
#### Uninstall TensorFlow
To uninstall TensorFlow, remove the Virtualenv directory you created in step 2:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">deactivate # stop the virtualenv</code>
<code class="devsite-terminal">rm -r ~/tensorflow/<var>venv</var></code>
</pre>
<a name="InstallingNativePip"></a>
### Use `pip` in your system environment
Use `pip` to install the TensorFlow package directly on your system without
using a container or virtual environment for isolation. This method is
recommended for system administrators that want a TensorFlow installation that is
available to everyone on a multi-user system.
Since a system install is not isolated, it could interfere with other
Python-based installations. But if you understand `pip` and your Python
environment, a system `pip` install is straightforward.
See the
[REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py)
for a list of packages that TensorFlow installs.
##### 1. Install Python, `pip`, and `virtualenv`.
On Ubuntu, Python is automatically installed and `pip` is *usually* installed.
Confirm the `python` and `pip` versions:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">python -V # or: python3 -V</code>
<code class="devsite-terminal">pip -V # or: pip3 -V</code>
</pre>
To install these packages on Ubuntu:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-get install python-pip python-dev # for Python 2.7</code>
<code class="devsite-terminal">sudo apt-get install python3-pip python3-dev # for Python 3.n</code>
</pre>
We *recommend* using `pip` version 8.1 or higher. If using a release before
version 8.1, upgrade `pip`:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo pip install -U pip</code>
</pre>
If not using Ubuntu and [setuptools](https://pypi.org/project/setuptools/) is
installed, use `easy_install` to install `pip`:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">easy_install -U pip</code>
</pre>
##### 2. Install TensorFlow on system.
Choose one of the available TensorFlow packages for installation:
* `tensorflow` —Current release for CPU
* `tensorflow-gpu` —Current release with GPU support
* `tf-nightly` —Nightly build for CPU
* `tf-nightly-gpu` —Nightly build with GPU support
And use `pip` to install the package for Python 2 or 3:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo pip install -U tensorflow # Python 2.7</code>
<code class="devsite-terminal">sudo pip3 install -U tensorflow # Python 3.n</code>
</pre>
Use `pip list` to show the packages installed on the system.
[Validate the install](#ValidateYourInstallation) and test the version:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">python -c "import tensorflow as tf; print(tf.__version__)"</code>
</pre>
Success: TensorFlow is now installed.
#### Problems
If the above steps failed, try installing the TensorFlow binary using the remote
URL of the `pip` package:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo pip install --upgrade <var>remote-pkg-URL</var> # Python 2.7</code>
<code class="devsite-terminal">sudo pip3 install --upgrade <var>remote-pkg-URL</var> # Python 3.n</code>
</pre>
The <var>remote-pkg-URL</var> depends on the operating system, Python version,
and GPU support. See [here](#the_url_of_the_tensorflow_python_package) for the
URL naming scheme and location.
See [Common Installation Problems](#common_installation_problems) if you
encounter problems.
#### Uninstall TensorFlow
To uninstall TensorFlow on your system, use one of following commands:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo pip uninstall tensorflow # for Python 2.7</code>
<code class="devsite-terminal">sudo pip3 uninstall tensorflow # for Python 3.n</code>
</pre>
<a name="InstallingDocker"></a>
### Configure a Docker container
Docker completely isolates the TensorFlow installation Docker completely isolates the TensorFlow installation
from pre-existing packages on your machine. The Docker container contains from pre-existing packages on your machine. The Docker container contains
@ -142,210 +269,6 @@ large (hundreds of MBs). You might choose the Docker installation if you are
incorporating TensorFlow into a larger application architecture that already incorporating TensorFlow into a larger application architecture that already
uses Docker. uses Docker.
In Anaconda, you may use conda to create a virtual environment.
However, within Anaconda, we recommend installing TensorFlow with the
`pip install` command, not with the `conda install` command.
**NOTE:** The conda package is community supported, not officially supported.
That is, the TensorFlow team neither tests nor maintains the conda package.
Use that package at your own risk.
<a name="InstallingVirtualenv"></a>
## Installing with Virtualenv
Take the following steps to install TensorFlow with Virtualenv:
1. Install pip and Virtualenv by issuing one of the following commands:
<pre>$ <b>sudo apt-get install python-pip python-dev python-virtualenv</b> # for Python 2.7
$ <b>sudo apt-get install python3-pip python3-dev python-virtualenv</b> # for Python 3.n</pre>
2. Create a Virtualenv environment by issuing one of the following commands:
<pre>$ <b>virtualenv --system-site-packages</b> <i>targetDirectory</i> # for Python 2.7
$ <b>virtualenv --system-site-packages -p python3</b> <i>targetDirectory</i> # for Python 3.n</pre>
where <code><em>targetDirectory</em></code> specifies the top of the
Virtualenv tree. Our instructions assume that
<code><em>targetDirectory</em></code> is `~/tensorflow`, but you may
choose any directory.
3. Activate the Virtualenv environment by issuing one of the following
commands:
<pre>$ <b>source ~/tensorflow/bin/activate</b> # bash, sh, ksh, or zsh
$ <b>source ~/tensorflow/bin/activate.csh</b> # csh or tcsh
$ <b>. ~/tensorflow/bin/activate.fish</b> # fish</pre>
The preceding <tt>source</tt> command should change your prompt
to the following:
<pre>(tensorflow)$ </pre>
4. Ensure pip ≥8.1 is installed:
<pre>(tensorflow)$ <b>easy_install -U pip</b></pre>
5. Issue one of the following commands to install TensorFlow in the active
Virtualenv environment:
<pre>(tensorflow)$ <b>pip install --upgrade tensorflow</b> # for Python 2.7
(tensorflow)$ <b>pip3 install --upgrade tensorflow</b> # for Python 3.n
(tensorflow)$ <b>pip install --upgrade tensorflow-gpu</b> # for Python 2.7 and GPU
(tensorflow)$ <b>pip3 install --upgrade tensorflow-gpu</b> # for Python 3.n and GPU</pre>
If the above command succeeds, skip Step 6. If the preceding
command fails, perform Step 6.
6. (Optional) If Step 5 failed (typically because you invoked a pip version
lower than 8.1), install TensorFlow in the active Virtualenv environment
by issuing a command of the following format:
<pre>(tensorflow)$ <b>pip install --upgrade</b> <i>tfBinaryURL</i> # Python 2.7
(tensorflow)$ <b>pip3 install --upgrade</b> <i>tfBinaryURL</i> # Python 3.n </pre>
where <code><em>tfBinaryURL</em></code> identifies the URL of the
TensorFlow Python package. The appropriate value of
<code><em>tfBinaryURL</em></code>depends on the operating system,
Python version, and GPU support. Find the appropriate value for
<code><em>tfBinaryURL</em></code> for your system
[here](#the_url_of_the_tensorflow_python_package). For example, if you
are installing TensorFlow for Linux, Python 3.4, and CPU-only support,
issue the following command to install TensorFlow in the active
Virtualenv environment:
<pre>(tensorflow)$ <b>pip3 install --upgrade \
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre>
If you encounter installation problems, see
[Common Installation Problems](#common_installation_problems).
### Next Steps
After installing TensorFlow,
[validate the installation](#ValidateYourInstallation).
Note that you must activate the Virtualenv environment each time you
use TensorFlow. If the Virtualenv environment is not currently active,
invoke one of the following commands:
<pre> $ <b>source ~/tensorflow/bin/activate</b> # bash, sh, ksh, or zsh
$ <b>source ~/tensorflow/bin/activate.csh</b> # csh or tcsh</pre>
When the Virtualenv environment is active, you may run
TensorFlow programs from this shell. Your prompt will become
the following to indicate that your tensorflow environment is active:
<pre>(tensorflow)$ </pre>
When you are done using TensorFlow, you may deactivate the
environment by invoking the `deactivate` function as follows:
<pre>(tensorflow)$ <b>deactivate</b> </pre>
The prompt will revert back to your default prompt (as defined by the
`PS1` environment variable).
### Uninstalling TensorFlow
To uninstall TensorFlow, simply remove the tree you created.
For example:
<pre>$ <b>rm -r</b> <i>targetDirectory</i> </pre>
<a name="InstallingNativePip"></a>
## Installing with native pip
You may install TensorFlow through pip, choosing between a simple
installation procedure or a more complex one.
**Note:** The
[REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py)
lists the TensorFlow packages that pip will install or upgrade.
### Prerequisite: Python and Pip
Python is automatically installed on Ubuntu. Take a moment to confirm
(by issuing a `python -V` command) that one of the following Python
versions is already installed on your system:
* Python 2.7
* Python 3.4+
The pip or pip3 package manager is *usually* installed on Ubuntu. Take a
moment to confirm (by issuing a `pip -V` or `pip3 -V` command)
that pip or pip3 is installed. We strongly recommend version 8.1 or higher
of pip or pip3. If Version 8.1 or later is not installed, issue the
following command, which will either install or upgrade to the latest
pip version:
<pre>$ <b>sudo apt-get install python-pip python-dev</b> # for Python 2.7
$ <b>sudo apt-get install python3-pip python3-dev</b> # for Python 3.n
</pre>
### Install TensorFlow
Assuming the prerequisite software is installed on your Linux host,
take the following steps:
1. Install TensorFlow by invoking **one** of the following commands:
<pre>$ <b>pip install tensorflow</b> # Python 2.7; CPU support (no GPU support)
$ <b>pip3 install tensorflow</b> # Python 3.n; CPU support (no GPU support)
$ <b>pip install tensorflow-gpu</b> # Python 2.7; GPU support
$ <b>pip3 install tensorflow-gpu</b> # Python 3.n; GPU support </pre>
If the preceding command runs to completion, you should now
[validate your installation](#ValidateYourInstallation).
2. (Optional.) If Step 1 failed, install the latest version of TensorFlow
by issuing a command of the following format:
<pre>$ <b>sudo pip install --upgrade</b> <i>tfBinaryURL</i> # Python 2.7
$ <b>sudo pip3 install --upgrade</b> <i>tfBinaryURL</i> # Python 3.n </pre>
where <code><em>tfBinaryURL</em></code> identifies the URL of the
TensorFlow Python package. The appropriate value of
<code><em>tfBinaryURL</em></code> depends on the operating system,
Python version, and GPU support. Find the appropriate value for
<code><em>tfBinaryURL</em></code>
[here](#the_url_of_the_tensorflow_python_package). For example, to
install TensorFlow for Linux, Python 3.4, and CPU-only support, issue
the following command:
<pre>
$ <b>sudo pip3 install --upgrade \
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b>
</pre>
If this step fails, see
[Common Installation Problems](#common_installation_problems).
### Next Steps
After installing TensorFlow, [validate your installation](#ValidateYourInstallation).
### Uninstalling TensorFlow
To uninstall TensorFlow, issue one of following commands:
<pre>
$ <b>sudo pip uninstall tensorflow</b> # for Python 2.7
$ <b>sudo pip3 uninstall tensorflow</b> # for Python 3.n
</pre>
<a name="InstallingDocker"></a>
## Installing with Docker
Take the following steps to install TensorFlow through Docker: Take the following steps to install TensorFlow through Docker:
1. Install Docker on your machine as described in the 1. Install Docker on your machine as described in the
@ -364,7 +287,7 @@ Take the following steps to install TensorFlow through Docker:
The remainder of this section explains how to launch a Docker container. The remainder of this section explains how to launch a Docker container.
### CPU-only #### CPU-only
To launch a Docker container with CPU-only support (that is, without To launch a Docker container with CPU-only support (that is, without
GPU support), enter a command of the following format: GPU support), enter a command of the following format:
@ -414,7 +337,7 @@ $ <b>docker run -it -p 8888:8888 tensorflow/tensorflow</b>
Docker will download the TensorFlow binary image the first time you launch it. Docker will download the TensorFlow binary image the first time you launch it.
### GPU support #### GPU support
Prior to installing TensorFlow with GPU support, ensure that your system meets all Prior to installing TensorFlow with GPU support, ensure that your system meets all
[NVIDIA software requirements](#NVIDIARequirements). To launch a Docker container [NVIDIA software requirements](#NVIDIARequirements). To launch a Docker container
@ -470,14 +393,22 @@ For more details see the
[TensorFlow docker readme](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/docker). [TensorFlow docker readme](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/docker).
### Next Steps #### Next Steps
You should now You should now
[validate your installation](#ValidateYourInstallation). [validate your installation](#ValidateYourInstallation).
<a name="InstallingAnaconda"></a> <a name="InstallingAnaconda"></a>
## Installing with Anaconda ### Use `pip` in Anaconda
Anaconda provides the `conda` utility to create a virtual environment. However,
within Anaconda, we recommend installing TensorFlow using the `pip install`
command and *not* with the `conda install` command.
Caution: `conda` is a community supported package this is not officially
maintained by the TensorFlow team. Use this package at your own risk since it is
not tested on new TensorFlow releases.
Take the following steps to install TensorFlow in an Anaconda environment: Take the following steps to install TensorFlow in an Anaconda environment:
@ -507,7 +438,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
<pre> <pre>
(tensorflow)$ <b>pip install --ignore-installed --upgrade \ (tensorflow)$ <b>pip install --ignore-installed --upgrade \
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre> https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
<a name="ValidateYourInstallation"></a> <a name="ValidateYourInstallation"></a>
## Validate your installation ## Validate your installation
@ -563,11 +494,89 @@ installation problems](#common_installation_problems).
If you are new to machine learning, we recommend the following: If you are new to machine learning, we recommend the following:
* [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course) * [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course)
* @{$get_started/get_started_for_beginners$Getting Started for ML Beginners} * @{$get_started/eager}
If you are experienced with machine learning but new to TensorFlow, see If you are experienced with machine learning but new to TensorFlow, see
@{$get_started/eager}. @{$get_started/eager}.
<a name="NVIDIARequirements"></a>
## TensorFlow GPU support
To install TensorFlow with GPU support, configure the following NVIDIA® software
on your system:
* [CUDA Toolkit 9.0](http://nvidia.com/cuda). For details, see
[NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/).
Append the relevant CUDA pathnames to the `LD_LIBRARY_PATH` environmental
variable as described in the NVIDIA documentation.
* [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see
[NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/).
Create the `CUDA_HOME` environment variable as described in the NVIDIA
documentation.
* A GPU card with CUDA Compute Capability 3.0 or higher for building TensorFlow
from source. To use the TensorFlow binaries, version 3.5 or higher is required.
See the [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a
list of supported GPU cards.
* [GPU drivers](http://nvidia.com/driver) that support your version of the CUDA
Toolkit.
* The `libcupti-dev` library is the NVIDIA CUDA Profile Tools Interface. This
library provides advanced profiling support. To install this library,
use the following command for CUDA Toolkit >= 8.0:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-get install cuda-command-line-tools</code>
</pre>
Add this path to the `LD_LIBRARY_PATH` environmental variable:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64</code>
</pre>
For CUDA Toolkit <= 7.5 use:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-get install libcupti-dev</code>
</pre>
* *OPTIONAL*: For optimized performance during inference, install
*NVIDIA&nbsp;TensorRT&nbsp;3.0*. To install the minimal amount of TensorRT
runtime components required to use with the pre-built `tensorflow-gpu` package:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</code>
<code class="devsite-terminal">sudo dpkg -i nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</code>
<code class="devsite-terminal">sudo apt-get update</code>
<code class="devsite-terminal">sudo apt-get install -y --allow-downgrades libnvinfer-dev libcudnn7-dev=7.0.5.15-1+cuda9.0 libcudnn7=7.0.5.15-1+cuda9.0</code>
</pre>
Note: For compatibility with the pre-built `tensorflow-gpu` package, use the
Ubuntu *14.04* package of TensorRT (shown above). Use this even when installing
on an Ubuntu 16.04 system.
To build the TensorFlow-TensorRT integration module from source instead of using
the pre-built binaries, see the
[module documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#using-tensorrt-in-tensorflow).
For detailed TensorRT installation instructions, see
[NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html).
To avoid cuDNN version conflicts during later system upgrades, hold the cuDNN
version at 7.0.5:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-mark hold libcudnn7 libcudnn7-dev</code>
</pre>
To allow upgrades, remove the this hold:
<pre class="prettyprint lang-bsh">
<code class="devsite-terminal">sudo apt-mark unhold libcudnn7 libcudnn7-dev</code>
</pre>
If you have an earlier version of the preceding packages, upgrade to the
specified versions. If upgrading is not possible, you can still run TensorFlow
with GPU support by @{$install_sources}.
## Common installation problems ## Common installation problems
@ -581,7 +590,7 @@ ask a new question about it on Stack Overflow and specify
the `tensorflow` tag. the `tensorflow` tag.
<table> <table>
<tr> <th>Stack Overflow Link</th> <th>Error Message</th> </tr> <tr> <th>Link to GitHub or Stack&nbsp;Overflow</th> <th>Error Message</th> </tr>
<tr> <tr>
<td><a href="https://stackoverflow.com/q/36159194">36159194</a></td> <td><a href="https://stackoverflow.com/q/36159194">36159194</a></td>
@ -681,14 +690,14 @@ This section documents the relevant values for Linux installations.
CPU only: CPU only:
<pre> <pre>
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp27-none-linux_x86_64.whl https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp27-none-linux_x86_64.whl
</pre> </pre>
GPU support: GPU support:
<pre> <pre>
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp27-none-linux_x86_64.whl https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp27-none-linux_x86_64.whl
</pre> </pre>
Note that GPU support requires the NVIDIA hardware and software described in Note that GPU support requires the NVIDIA hardware and software described in
@ -700,14 +709,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only: CPU only:
<pre> <pre>
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
</pre> </pre>
GPU support: GPU support:
<pre> <pre>
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp34-cp34m-linux_x86_64.whl https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
</pre> </pre>
Note that GPU support requires the NVIDIA hardware and software described in Note that GPU support requires the NVIDIA hardware and software described in
@ -719,14 +728,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only: CPU only:
<pre> <pre>
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp35-cp35m-linux_x86_64.whl https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
</pre> </pre>
GPU support: GPU support:
<pre> <pre>
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp35-cp35m-linux_x86_64.whl https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
</pre> </pre>
@ -738,14 +747,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
CPU only: CPU only:
<pre> <pre>
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp36-cp36m-linux_x86_64.whl https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
</pre> </pre>
GPU support: GPU support:
<pre> <pre>
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp36-cp36m-linux_x86_64.whl https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
</pre> </pre>

View File

@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv:
TensorFlow in the active Virtualenv is as follows: TensorFlow in the active Virtualenv is as follows:
<pre> $ <b>pip3 install --upgrade \ <pre> $ <b>pip3 install --upgrade \
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl</b></pre> https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl</b></pre>
If you encounter installation problems, see If you encounter installation problems, see
[Common Installation Problems](#common-installation-problems). [Common Installation Problems](#common-installation-problems).
@ -242,7 +242,7 @@ take the following steps:
issue the following command: issue the following command:
<pre> $ <b>sudo pip3 install --upgrade \ <pre> $ <b>sudo pip3 install --upgrade \
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl</b> </pre> https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl</b> </pre>
If the preceding command fails, see If the preceding command fails, see
[installation problems](#common-installation-problems). [installation problems](#common-installation-problems).
@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
TensorFlow for Python 2.7: TensorFlow for Python 2.7:
<pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \ <pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl</b></pre> https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl</b></pre>
<a name="ValidateYourInstallation"></a> <a name="ValidateYourInstallation"></a>
@ -524,7 +524,7 @@ The value you specify depends on your Python version.
<pre> <pre>
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl
</pre> </pre>
@ -532,5 +532,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-a
<pre> <pre>
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl
</pre> </pre>

View File

@ -354,10 +354,10 @@ Invoke `pip install` to install that pip package.
The filename of the `.whl` file depends on your platform. The filename of the `.whl` file depends on your platform.
For example, the following command will install the pip package For example, the following command will install the pip package
for TensorFlow 1.8.0rc0 on Linux: for TensorFlow 1.8.0rc1 on Linux:
<pre> <pre>
$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc0-py2-none-any.whl</b> $ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-py2-none-any.whl</b>
</pre> </pre>
## Validate your installation ## Validate your installation

View File

@ -86,7 +86,7 @@ code. `tf_library` utilizes `tfcompile` to compile the TensorFlow graph into
executable code. executable code.
```build ```build
load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library") load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
# Use the tf_library macro to compile your graph into executable code. # Use the tf_library macro to compile your graph into executable code.
tf_library( tf_library(
@ -258,8 +258,8 @@ file.
```build ```build
# Example of linking your binary # Example of linking your binary
# Also see //third_party/tensorflow/compiler/aot/tests/BUILD # Also see //tensorflow/compiler/aot/tests/BUILD
load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library") load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
# The same tf_library call from step 2 above. # The same tf_library call from step 2 above.
tf_library( tf_library(

View File

@ -32,9 +32,9 @@ IRIS_TEST = os.path.join(os.path.dirname(__file__), "iris_test.csv")
def main(unused_argv): def main(unused_argv):
# Load datasets. # Load datasets.
training_set = tf.contrib.learn.datasets.base.load_csv_with_header( training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float) filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32)
test_set = tf.contrib.learn.datasets.base.load_csv_with_header( test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float) filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32)
validation_metrics = { validation_metrics = {
"accuracy": "accuracy":
@ -83,7 +83,7 @@ def main(unused_argv):
# Classify two new flower samples. # Classify two new flower samples.
new_samples = np.array( new_samples = np.array(
[[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float) [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
y = list(classifier.predict(new_samples)) y = list(classifier.predict(new_samples))
print("Predictions: {}".format(str(y))) print("Predictions: {}".format(str(y)))

View File

@ -5,7 +5,7 @@ Construct and execute TensorFlow graphs in Go.
[![GoDoc](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go?status.svg)](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go) [![GoDoc](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go?status.svg)](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go)
> *WARNING*: The API defined in this package is not stable and can change > *WARNING*: The API defined in this package is not stable and can change
> without notice. The same goes for the awkward package path > without notice. The same goes for the package path:
> (`github.com/tensorflow/tensorflow/tensorflow/go`). > (`github.com/tensorflow/tensorflow/tensorflow/go`).
## Quickstart ## Quickstart

View File

@ -21386,7 +21386,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. // generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
// //
// The `bad_color` argument is the color to use in the generated images for // The `bad_color` argument is the color to use in the generated images for
// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. // non-finite input values. It is a `uint8` 1-D tensor of length `channels`.
// Each element must be in the range `[0, 255]` (It represents the value of a // Each element must be in the range `[0, 255]` (It represents the value of a
// pixel in the output image). Non-finite values in the input tensor are // pixel in the output image). Non-finite values in the input tensor are
// replaced by this tensor in the output image. The default value is the color // replaced by this tensor in the output image. The default value is the color

View File

@ -644,11 +644,9 @@ class Estimator(object):
sharded=True) sharded=True)
saver_for_restore.restore(session, checkpoint_path) saver_for_restore.restore(session, checkpoint_path)
# pylint: disable=protected-access
local_init_op = ( local_init_op = (
estimator_spec.scaffold.local_init_op or estimator_spec.scaffold.local_init_op or
monitored_session.Scaffold._default_local_init_op()) monitored_session.Scaffold.default_local_init_op())
# pylint: enable=protected-access
# Perform the export # Perform the export
builder = saved_model_builder.SavedModelBuilder(temp_export_dir) builder = saved_model_builder.SavedModelBuilder(temp_export_dir)

View File

@ -29,12 +29,14 @@ from tensorflow.python.estimator import run_config as run_config_lib
from tensorflow.python.framework import ops from tensorflow.python.framework import ops
from tensorflow.python.framework import random_seed from tensorflow.python.framework import random_seed
from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
from tensorflow.python.framework import tensor_util
from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import backend as K
from tensorflow.python.keras._impl.keras import models from tensorflow.python.keras._impl.keras import models
from tensorflow.python.keras._impl.keras import optimizers from tensorflow.python.keras._impl.keras import optimizers
from tensorflow.python.keras._impl.keras.engine.base_layer import Layer from tensorflow.python.keras._impl.keras.engine.base_layer import Layer
from tensorflow.python.keras._impl.keras.engine.network import Network from tensorflow.python.keras._impl.keras.engine.network import Network
from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops from tensorflow.python.ops import math_ops
from tensorflow.python.ops import metrics as metrics_module from tensorflow.python.ops import metrics as metrics_module
from tensorflow.python.ops import variables as variables_module from tensorflow.python.ops import variables as variables_module
@ -55,6 +57,17 @@ def _cast_tensor_to_floatx(x):
return math_ops.cast(x, K.floatx()) return math_ops.cast(x, K.floatx())
def _convert_tensor(x):
"""Create or cast tensor if needed."""
if not tensor_util.is_tensor(x):
# x is a numpy array
x = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(x)
if check_ops.is_numeric_tensor(x):
# is_numeric_tensor returns False if provided with a numpy array
x = _cast_tensor_to_floatx(x)
return x
def _any_variable_initalized(): def _any_variable_initalized():
"""Check if any variable has been initialized in the Keras model. """Check if any variable has been initialized in the Keras model.
@ -86,7 +99,7 @@ def _create_ordered_io(keras_model, estimator_io, is_input=True):
if isinstance(estimator_io, (list, tuple)): if isinstance(estimator_io, (list, tuple)):
# Case currently not supported by most built-in input_fn, # Case currently not supported by most built-in input_fn,
# but it's good to have for sanity # but it's good to have for sanity
return [_cast_tensor_to_floatx(x) for x in estimator_io] return [_convert_tensor(x) for x in estimator_io]
elif isinstance(estimator_io, dict): elif isinstance(estimator_io, dict):
if is_input: if is_input:
if keras_model._is_graph_network: if keras_model._is_graph_network:
@ -108,12 +121,12 @@ def _create_ordered_io(keras_model, estimator_io, is_input=True):
'It needs to match one ' 'It needs to match one '
'of the following: %s' % ('input' if is_input else 'output', key, 'of the following: %s' % ('input' if is_input else 'output', key,
', '.join(keras_io_names))) ', '.join(keras_io_names)))
tensors = [_cast_tensor_to_floatx(estimator_io[io_name]) tensors = [_convert_tensor(estimator_io[io_name])
for io_name in keras_io_names] for io_name in keras_io_names]
return tensors return tensors
else: else:
# Plain array. # Plain array.
return _cast_tensor_to_floatx(estimator_io) return _convert_tensor(estimator_io)
def _in_place_subclassed_model_reset(model): def _in_place_subclassed_model_reset(model):
@ -274,8 +287,7 @@ def _clone_and_build_model(mode,
is_input=False) is_input=False)
else: else:
target_tensors = [ target_tensors = [
_cast_tensor_to_floatx( _convert_tensor(labels)
sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(labels))
] ]
if keras_model._is_graph_network: if keras_model._is_graph_network:

View File

@ -30,6 +30,7 @@ from tensorflow.python.estimator.inputs import numpy_io
from tensorflow.python.framework import ops from tensorflow.python.framework import ops
from tensorflow.python.framework import test_util from tensorflow.python.framework import test_util
from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl import keras
from tensorflow.python.keras._impl.keras import backend as K
from tensorflow.python.keras._impl.keras import testing_utils from tensorflow.python.keras._impl.keras import testing_utils
from tensorflow.python.keras._impl.keras.applications import mobilenet from tensorflow.python.keras._impl.keras.applications import mobilenet
from tensorflow.python.keras._impl.keras.optimizers import SGD from tensorflow.python.keras._impl.keras.optimizers import SGD
@ -142,16 +143,20 @@ def randomize_io_type(array, name):
def multi_inputs_multi_outputs_model(): def multi_inputs_multi_outputs_model():
# test multi-input layer
a = keras.layers.Input(shape=(16,), name='input_a') a = keras.layers.Input(shape=(16,), name='input_a')
b = keras.layers.Input(shape=(16,), name='input_b') b = keras.layers.Input(shape=(16,), name='input_b')
m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m')
dense = keras.layers.Dense(8, name='dense_1') dense = keras.layers.Dense(8, name='dense_1')
a_2 = dense(a) a_2 = dense(a)
# Apply a mask
s_2 = keras.layers.Lambda(lambda k:
K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2])
b_2 = dense(b) b_2 = dense(b)
merged = keras.layers.concatenate([a_2, b_2], name='merge') merged = keras.layers.concatenate([s_2, b_2], name='merge')
c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged) c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged)
d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged) d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged)
model = keras.models.Model(inputs=[a, b], outputs=[c, d]) model = keras.models.Model(inputs=[a, b, m], outputs=[c, d])
model.compile( model.compile(
loss='categorical_crossentropy', loss='categorical_crossentropy',
optimizer='rmsprop', optimizer='rmsprop',
@ -352,18 +357,27 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
test_samples=50, test_samples=50,
input_shape=(16,), input_shape=(16,),
num_classes=2) num_classes=2)
np.random.seed(_RANDOM_SEED)
(input_m_train, _), (input_m_test, _) = testing_utils.get_test_data(
train_samples=_TRAIN_SIZE,
test_samples=50,
input_shape=(8,),
num_classes=2)
c_train = keras.utils.to_categorical(c_train) c_train = keras.utils.to_categorical(c_train)
c_test = keras.utils.to_categorical(c_test) c_test = keras.utils.to_categorical(c_test)
d_train = keras.utils.to_categorical(d_train) d_train = keras.utils.to_categorical(d_train)
d_test = keras.utils.to_categorical(d_test) d_test = keras.utils.to_categorical(d_test)
def train_input_fn(): def train_input_fn():
input_dict = {'input_a': a_train, 'input_b': b_train} input_dict = {'input_a': a_train, 'input_b': b_train,
'input_m': input_m_train > 0}
output_dict = {'dense_2': c_train, 'dense_3': d_train} output_dict = {'dense_2': c_train, 'dense_3': d_train}
return input_dict, output_dict return input_dict, output_dict
def eval_input_fn(): def eval_input_fn():
input_dict = {'input_a': a_test, 'input_b': b_test} input_dict = {'input_a': a_test, 'input_b': b_test,
'input_m': input_m_test > 0}
output_dict = {'dense_2': c_test, 'dense_3': d_test} output_dict = {'dense_2': c_test, 'dense_3': d_test}
return input_dict, output_dict return input_dict, output_dict

View File

@ -35,8 +35,7 @@ class DivisionTestCase(test.TestCase):
"""Test all the different ways to divide.""" """Test all the different ways to divide."""
values = [1, 2, 7, 11] values = [1, 2, 7, 11]
functions = (lambda x: x), constant_op.constant functions = (lambda x: x), constant_op.constant
# TODO(irving): Test int8, int16 once we support casts for those. dtypes = np.int8, np.int16, np.int32, np.int64, np.float32, np.float64
dtypes = np.int32, np.int64, np.float32, np.float64
tensors = [] tensors = []
checks = [] checks = []

View File

@ -160,7 +160,7 @@ class ReduceJoinTest(UnicodeTestCase):
separator=separator) separator=separator)
if not reduction_indices: if not reduction_indices:
truth = constant_op.constant(truth) truth = constant_op.constant(truth)
truth_squeezed = array_ops.squeeze(truth, squeeze_dims=reduction_indices) truth_squeezed = array_ops.squeeze(truth, axis=reduction_indices)
output_array = output.eval() output_array = output.eval()
output_keep_dims_array = output_keep_dims.eval() output_keep_dims_array = output_keep_dims.eval()
truth_array = truth.eval() truth_array = truth.eval()

View File

@ -889,9 +889,9 @@ class AnyReductionTest(test.TestCase):
class CountNonzeroReductionTest(test.TestCase): class CountNonzeroReductionTest(test.TestCase):
def _compare(self, x, reduction_axes, keepdims, use_gpu=False, def _compare(self, x, reduction_axes, keepdims, use_gpu=False, zero=0,
feed_dict=None): feed_dict=None):
np_ans = (x != 0).astype(np.int32) np_ans = (x != zero).astype(np.int32)
if reduction_axes is None: if reduction_axes is None:
np_ans = np.sum(np_ans, keepdims=keepdims) np_ans = np.sum(np_ans, keepdims=keepdims)
else: else:
@ -958,6 +958,37 @@ class CountNonzeroReductionTest(test.TestCase):
y = math_ops.count_nonzero(x, [0]) y = math_ops.count_nonzero(x, [0])
self.assertAllEqual(y.eval(), np.zeros(9938)) self.assertAllEqual(y.eval(), np.zeros(9938))
def testStringReduce(self):
# Test case for GitHub issue 18712
with self.test_session() as sess:
v = math_ops.count_nonzero(constant_op.constant(["test"]))
self.assertAllClose(sess.run(v), 1)
def testStringReduce1D(self):
# Create a 1D array of strings
x = np.asarray(["", "", "a", "", "", "b"])
self._compare(x, None, keepdims=False, zero=np.str(""))
self._compare(x, [], keepdims=False, zero=np.str(""))
self._compare(x, [0], keepdims=False, zero=np.str(""))
self._compare(x, None, keepdims=True, zero=np.str(""))
self._compare(x, [], keepdims=True, zero=np.str(""))
self._compare(x, [0], keepdims=True, zero=np.str(""))
def testStringReduce2D(self):
# Create a 2D array of strings
x = np.asarray([["", "", "a", "", "", "b"],
["", "c", "", "d", "", ""],
["e", "", "f", "", "", ""]])
self._compare(x, None, keepdims=False, zero=np.str(""))
self._compare(x, [], keepdims=False, zero=np.str(""))
self._compare(x, [0], keepdims=False, zero=np.str(""))
self._compare(x, [1], keepdims=False, zero=np.str(""))
self._compare(x, [0, 1], keepdims=False, zero=np.str(""))
self._compare(x, None, keepdims=True, zero=np.str(""))
self._compare(x, [], keepdims=True, zero=np.str(""))
self._compare(x, [0], keepdims=True, zero=np.str(""))
self._compare(x, [0, 1], keepdims=True, zero=np.str(""))
if __name__ == "__main__": if __name__ == "__main__":
test.main() test.main()

View File

@ -364,6 +364,42 @@ class ScatterNdTest(test.TestCase):
del input_ # input_ is not used in scatter_nd del input_ # input_ is not used in scatter_nd
return array_ops.scatter_nd(indices, updates, shape) return array_ops.scatter_nd(indices, updates, shape)
def testString(self):
indices = constant_op.constant([[4], [3], [1], [7]],
dtype=dtypes.int32)
updates = constant_op.constant(["four", "three", "one", "seven"],
dtype=dtypes.string)
expected = np.array([b"", b"one", b"", b"three", b"four",
b"", b"", b"seven"])
scatter = self.scatter_nd(indices, updates, shape=(8,))
with self.test_session() as sess:
result = sess.run(scatter)
self.assertAllEqual(expected, result)
# Same indice is updated twice by same value.
indices = constant_op.constant([[4], [3], [3], [7]],
dtype=dtypes.int32)
updates = constant_op.constant(["a", "b", "b", "c"],
dtype=dtypes.string)
expected = np.array([b"", b"", b"", b"bb", b"a", b"", b"", b"c"])
scatter = self.scatter_nd(indices, updates, shape=(8,))
with self.test_session() as sess:
result = sess.run(scatter)
self.assertAllEqual(expected, result)
# Same indice is updated twice by different value.
indices = constant_op.constant([[4], [3], [3], [7]],
dtype=dtypes.int32)
updates = constant_op.constant(["a", "b", "c", "d"],
dtype=dtypes.string)
expected = [np.array([b"", b"", b"", b"bc", b"a", b"", b"", b"d"]),
np.array([b"", b"", b"", b"cb", b"a", b"", b"", b"d"])]
scatter = self.scatter_nd(indices, updates, shape=(8,))
with self.test_session() as sess:
result = sess.run(scatter)
self.assertTrue(np.array_equal(result, expected[0]) or
np.array_equal(result, expected[1]))
def testRank3ValidShape(self): def testRank3ValidShape(self):
indices = array_ops.zeros([2, 2, 2], dtypes.int32) indices = array_ops.zeros([2, 2, 2], dtypes.int32)
updates = array_ops.zeros([2, 2, 2], dtypes.int32) updates = array_ops.zeros([2, 2, 2], dtypes.int32)
@ -584,6 +620,10 @@ class ScatterNdNonAliasingAddTest(ScatterNdTest):
shape, dtype=updates.dtype)) shape, dtype=updates.dtype))
return array_ops.scatter_nd_non_aliasing_add(input_, indices, updates) return array_ops.scatter_nd_non_aliasing_add(input_, indices, updates)
def testString(self):
# Not supported yet.
pass
if __name__ == "__main__": if __name__ == "__main__":
test.main() test.main()

View File

@ -196,7 +196,7 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index):
array_ops.where( array_ops.where(
math_ops.logical_and(grad.indices >= start, math_ops.logical_and(grad.indices >= start,
grad.indices < end)), grad.indices < end)),
squeeze_dims=[1]) axis=[1])
new_indices = array_ops.gather(grad.indices, indices_to_select) - start new_indices = array_ops.gather(grad.indices, indices_to_select) - start
new_values = array_ops.gather(grad.values, indices_to_select) new_values = array_ops.gather(grad.values, indices_to_select)
out_grads.append(ops.IndexedSlices(new_values, new_indices, size)) out_grads.append(ops.IndexedSlices(new_values, new_indices, size))

View File

@ -994,9 +994,7 @@ def unstack(value, num=None, axis=0, name="unstack"):
`value[:, i, :, :]` and each tensor in `output` will have shape `(A, C, D)`. `value[:, i, :, :]` and each tensor in `output` will have shape `(A, C, D)`.
Etc. Etc.
This is the opposite of stack. The numpy equivalent is This is the opposite of stack.
tf.unstack(x, n) = np.unstack(x)
Args: Args:
value: A rank `R > 0` `Tensor` to be unstacked. value: A rank `R > 0` `Tensor` to be unstacked.
@ -1720,7 +1718,9 @@ def placeholder(dtype, shape=None, name=None):
print(sess.run(y, feed_dict={x: rand_array})) # Will succeed. print(sess.run(y, feed_dict={x: rand_array})) # Will succeed.
``` ```
@compatibility{eager} Placeholders are not compatible with eager execution. @compatibility(eager)
Placeholders are not compatible with eager execution.
@end_compatibility
Args: Args:
dtype: The type of elements in the tensor to be fed. dtype: The type of elements in the tensor to be fed.

View File

@ -652,7 +652,7 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height,
padded.set_shape(padded_shape) padded.set_shape(padded_shape)
if not is_batch: if not is_batch:
padded = array_ops.squeeze(padded, squeeze_dims=[0]) padded = array_ops.squeeze(padded, axis=[0])
return padded return padded
@ -732,7 +732,7 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
cropped.set_shape(cropped_shape) cropped.set_shape(cropped_shape)
if not is_batch: if not is_batch:
cropped = array_ops.squeeze(cropped, squeeze_dims=[0]) cropped = array_ops.squeeze(cropped, axis=[0])
return cropped return cropped
@ -849,7 +849,7 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
resized = control_flow_ops.with_dependencies(assert_ops, resized) resized = control_flow_ops.with_dependencies(assert_ops, resized)
if not is_batch: if not is_batch:
resized = array_ops.squeeze(resized, squeeze_dims=[0]) resized = array_ops.squeeze(resized, axis=[0])
return resized return resized
@ -942,7 +942,7 @@ def resize_images(images,
for x in [new_width_const, width, new_height_const, height]) and ( for x in [new_width_const, width, new_height_const, height]) and (
width == new_width_const and height == new_height_const): width == new_width_const and height == new_height_const):
if not is_batch: if not is_batch:
images = array_ops.squeeze(images, squeeze_dims=[0]) images = array_ops.squeeze(images, axis=[0])
return images return images
if method == ResizeMethod.BILINEAR: if method == ResizeMethod.BILINEAR:
@ -965,7 +965,7 @@ def resize_images(images,
images.set_shape([None, new_height_const, new_width_const, None]) images.set_shape([None, new_height_const, new_width_const, None])
if not is_batch: if not is_batch:
images = array_ops.squeeze(images, squeeze_dims=[0]) images = array_ops.squeeze(images, axis=[0])
return images return images

Some files were not shown because too many files have changed in this diff Show More