Merge changes from github.
PiperOrigin-RevId: 194997009
This commit is contained in:
parent
46bf1e8934
commit
325d0ef21a
1
.gitignore
vendored
1
.gitignore
vendored
@ -27,6 +27,7 @@ Podfile.lock
|
||||
/tensorflow/contrib/lite/examples/ios/simple/data/*.txt
|
||||
/tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
|
||||
xcuserdata/**
|
||||
/api_init_files_list.txt
|
||||
|
||||
# Android
|
||||
.gradle
|
||||
|
@ -1700,7 +1700,7 @@ TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_NoGradInputs) {
|
||||
TestGradientsError(false);
|
||||
}
|
||||
|
||||
// REGISTER_OP for CApiTestAttributesTest test cases.
|
||||
// REGISTER_OP for CApiAttributesTest test cases.
|
||||
// Registers two ops, each with a single attribute called 'v'.
|
||||
// The attribute in one op will have a type 'type', the other
|
||||
// will have list(type).
|
||||
|
@ -385,6 +385,42 @@ Status MirrorPadGradGrad(const Scope& scope, const Operation& op,
|
||||
}
|
||||
REGISTER_GRADIENT_OP("MirrorPadGrad", MirrorPadGradGrad);
|
||||
|
||||
Status StridedSliceGradHelper(const Scope& scope, const Operation& op,
|
||||
const std::vector<Output>& grad_inputs,
|
||||
std::vector<Output>* grad_outputs) {
|
||||
Input x = Shape(scope, op.input(0));
|
||||
Input begin = op.input(1);
|
||||
Input end = op.input(2);
|
||||
Input strides = op.input(3);
|
||||
int64 begin_mask;
|
||||
int64 end_mask;
|
||||
int64 ellipsis_mask;
|
||||
int64 new_axis_mask;
|
||||
int64 shrink_axis_mask;
|
||||
TF_RETURN_IF_ERROR(
|
||||
GetNodeAttr(op.node()->attrs(), "begin_mask", &begin_mask));
|
||||
TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "end_mask", &end_mask));
|
||||
TF_RETURN_IF_ERROR(
|
||||
GetNodeAttr(op.node()->attrs(), "ellipsis_mask", &ellipsis_mask));
|
||||
TF_RETURN_IF_ERROR(
|
||||
GetNodeAttr(op.node()->attrs(), "new_axis_mask", &new_axis_mask));
|
||||
TF_RETURN_IF_ERROR(
|
||||
GetNodeAttr(op.node()->attrs(), "shrink_axis_mask", &shrink_axis_mask));
|
||||
grad_outputs->push_back(
|
||||
StridedSliceGrad(scope, x, begin, end, strides, grad_inputs[0],
|
||||
StridedSliceGrad::BeginMask(begin_mask)
|
||||
.EndMask(end_mask)
|
||||
.EllipsisMask(ellipsis_mask)
|
||||
.NewAxisMask(new_axis_mask)
|
||||
.ShrinkAxisMask(shrink_axis_mask)));
|
||||
// No gradients returned for begin, end and strides
|
||||
grad_outputs->push_back(NoGradient());
|
||||
grad_outputs->push_back(NoGradient());
|
||||
grad_outputs->push_back(NoGradient());
|
||||
return scope.status();
|
||||
}
|
||||
REGISTER_GRADIENT_OP("StridedSlice", StridedSliceGradHelper);
|
||||
|
||||
} // anonymous namespace
|
||||
} // namespace ops
|
||||
} // namespace tensorflow
|
||||
|
@ -354,5 +354,29 @@ TEST_F(ArrayGradTest, MirrorPadGradGrad_Symmetric) {
|
||||
RunTest(x, x_shape, y, y_shape);
|
||||
}
|
||||
|
||||
TEST_F(ArrayGradTest, StridedSliceGrad) {
|
||||
TensorShape x_shape({6, 4, 4});
|
||||
auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
|
||||
|
||||
// y = x[2:6:2, 1:3, 1:3]
|
||||
auto y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1});
|
||||
// y.shape = [2, 2, 2];
|
||||
RunTest(x, x_shape, y, {2, 2, 2});
|
||||
|
||||
// y = x[2:6:2, 1:3, 1:3]
|
||||
// begin_mask = 1<<1 (ignore begin_index = 1)
|
||||
// end_mask = 1<<2 (ignore end_index = 2)
|
||||
y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1},
|
||||
StridedSlice::BeginMask(1 << 1).EndMask(1 << 2));
|
||||
// y.shape = [2, 3, 3];
|
||||
RunTest(x, x_shape, y, {2, 3, 3});
|
||||
|
||||
// y = [tf.newaxis, 2:6:2, 1:3, 1:3]
|
||||
y = StridedSlice(scope_, x, {0, 2, 1, 1}, {0, 6, 3, 3}, {1, 2, 1, 1},
|
||||
StridedSlice::NewAxisMask(1 << 0));
|
||||
// y.shape = [1, 2, 2, 2];
|
||||
RunTest(x, x_shape, y, {1, 2, 2, 2});
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace tensorflow
|
||||
|
@ -56,8 +56,6 @@ Use AutoGraph in one of the following ways, described below:
|
||||
1. Annotations (simpler)
|
||||
2. Functional API (more flexible)
|
||||
|
||||
NOTE: You can find more examples in this [interactive notebook](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb).
|
||||
|
||||
To get started, install the latest nightly TensorFlow build:
|
||||
|
||||
```shell
|
||||
@ -70,6 +68,13 @@ Then import the `autograph` module from `tf.contrib`:
|
||||
from tensorflow.contrib import autograph as ag
|
||||
```
|
||||
|
||||
### Interactive demo notebooks
|
||||
|
||||
For more extensive examples, check out these interactive notebooks:
|
||||
|
||||
* [RNN trained using Keras and Estimators](https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/rnn_keras_estimator.ipynb)
|
||||
* [Demo from the TF Dev Summit 2018](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb)
|
||||
|
||||
## Using with annotations
|
||||
|
||||
Annotating a function or class with `@convert` converts it in place:
|
||||
|
@ -84,7 +84,7 @@ if (NOT WIN32)
|
||||
|
||||
option(systemlib_ALL "Turn on every possible systemlib_* options" OFF)
|
||||
if (systemlib_ALL)
|
||||
set (systmelib_ZLIB ON)
|
||||
set (systemlib_ZLIB ON)
|
||||
endif (systemlib_ALL)
|
||||
endif()
|
||||
|
||||
@ -471,6 +471,10 @@ if (tensorflow_ENABLE_GPU)
|
||||
include_directories(${tensorflow_source_dir}/third_party/gpus)
|
||||
# add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
|
||||
list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
|
||||
if(NOT WIN32)
|
||||
# add gomp to tensorflow_EXTERNAL_LIBRARIES, needed by libcusolver.so
|
||||
list(APPEND tensorflow_EXTERNAL_LIBRARIES gomp)
|
||||
endif()
|
||||
|
||||
# NOTE(mrry): Update these flags when the version of CUDA or cuDNN used
|
||||
# in the default build is upgraded.
|
||||
|
@ -177,6 +177,16 @@ if(WIN32)
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc"
|
||||
)
|
||||
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
|
||||
else(WIN32)
|
||||
if(tensorflow_ENABLE_GPU)
|
||||
file(GLOB_RECURSE tf_core_kernels_gpu_exclude_srcs
|
||||
# temporarily disable nccl as it needs to be ported with gpu
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_manager.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_ops.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc"
|
||||
)
|
||||
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_gpu_exclude_srcs})
|
||||
endif(tensorflow_ENABLE_GPU)
|
||||
endif(WIN32)
|
||||
|
||||
file(GLOB_RECURSE tf_core_gpu_kernels_srcs
|
||||
|
@ -64,6 +64,8 @@ file(GLOB tf_stream_executor_srcs
|
||||
if (tensorflow_ENABLE_GPU)
|
||||
file(GLOB tf_stream_executor_gpu_srcs
|
||||
"${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc"
|
||||
"${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.h"
|
||||
"${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.cc"
|
||||
)
|
||||
if (NOT tensorflow_BUILD_CC_TESTS)
|
||||
file(GLOB tf_stream_executor_gpu_tests
|
||||
|
@ -152,6 +152,22 @@ class CrfTest(test.TestCase):
|
||||
|
||||
self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
|
||||
|
||||
def testCrfLogNormZeroSeqLength(self):
|
||||
"""
|
||||
Test `crf_log_norm` when `sequence_lengths` contains one or more zeros.
|
||||
"""
|
||||
with self.test_session() as sess:
|
||||
inputs = constant_op.constant(np.ones([2, 10, 5],
|
||||
dtype=np.float32))
|
||||
transition_params = constant_op.constant(np.ones([5, 5],
|
||||
dtype=np.float32))
|
||||
sequence_lengths = constant_op.constant(np.zeros([2],
|
||||
dtype=np.int32))
|
||||
expected_log_norm = np.zeros([2], dtype=np.float32)
|
||||
log_norm = crf.crf_log_norm(inputs, sequence_lengths, transition_params)
|
||||
tf_log_norm = sess.run(log_norm)
|
||||
self.assertAllClose(tf_log_norm, expected_log_norm)
|
||||
|
||||
def testCrfLogLikelihood(self):
|
||||
inputs = np.array(
|
||||
[[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
|
||||
@ -292,10 +308,10 @@ class CrfTest(test.TestCase):
|
||||
dtype=np.float32))
|
||||
sequence_lengths = constant_op.constant(np.zeros([2],
|
||||
dtype=np.int32))
|
||||
values = crf.crf_decode(inputs, transition_params, sequence_lengths)
|
||||
tags, scores = sess.run(values)
|
||||
self.assertEqual(len(tags.shape), 2)
|
||||
self.assertEqual(len(scores.shape), 1)
|
||||
tags, scores = crf.crf_decode(inputs, transition_params, sequence_lengths)
|
||||
tf_tags, tf_scores = sess.run([tags, scores])
|
||||
self.assertEqual(len(tf_tags.shape), 2)
|
||||
self.assertEqual(len(tf_scores.shape), 1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test.main()
|
||||
|
@ -90,9 +90,13 @@ def crf_sequence_score(inputs, tag_indices, sequence_lengths,
|
||||
batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0]
|
||||
example_inds = array_ops.reshape(
|
||||
math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1])
|
||||
return array_ops.gather_nd(
|
||||
sequence_scores = array_ops.gather_nd(
|
||||
array_ops.squeeze(inputs, [1]),
|
||||
array_ops.concat([example_inds, tag_indices], axis=1))
|
||||
sequence_scores = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
|
||||
array_ops.zeros_like(sequence_scores),
|
||||
sequence_scores)
|
||||
return sequence_scores
|
||||
|
||||
def _multi_seq_fn():
|
||||
# Compute the scores of the given tag sequence.
|
||||
@ -128,7 +132,12 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
|
||||
# If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over
|
||||
# the "initial state" (the unary potentials).
|
||||
def _single_seq_fn():
|
||||
return math_ops.reduce_logsumexp(first_input, [1])
|
||||
log_norm = math_ops.reduce_logsumexp(first_input, [1])
|
||||
# Mask `log_norm` of the sequences with length <= zero.
|
||||
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
|
||||
array_ops.zeros_like(log_norm),
|
||||
log_norm)
|
||||
return log_norm
|
||||
|
||||
def _multi_seq_fn():
|
||||
"""Forward computation of alpha values."""
|
||||
@ -137,13 +146,19 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
|
||||
# Compute the alpha values in the forward algorithm in order to get the
|
||||
# partition function.
|
||||
forward_cell = CrfForwardRnnCell(transition_params)
|
||||
# Sequence length is not allowed to be less than zero.
|
||||
sequence_lengths_less_one = math_ops.maximum(0, sequence_lengths - 1)
|
||||
_, alphas = rnn.dynamic_rnn(
|
||||
cell=forward_cell,
|
||||
inputs=rest_of_input,
|
||||
sequence_length=sequence_lengths - 1,
|
||||
sequence_length=sequence_lengths_less_one,
|
||||
initial_state=first_input,
|
||||
dtype=dtypes.float32)
|
||||
log_norm = math_ops.reduce_logsumexp(alphas, [1])
|
||||
# Mask `log_norm` of the sequences with length <= zero.
|
||||
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
|
||||
array_ops.zeros_like(log_norm),
|
||||
log_norm)
|
||||
return log_norm
|
||||
|
||||
max_seq_len = array_ops.shape(inputs)[1]
|
||||
@ -479,7 +494,7 @@ def crf_decode(potentials, transition_params, sequence_length):
|
||||
initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
|
||||
initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O]
|
||||
inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O]
|
||||
# sequence length is not allowed to be less than zero
|
||||
# Sequence length is not allowed to be less than zero.
|
||||
sequence_length_less_one = math_ops.maximum(0, sequence_length - 1)
|
||||
backpointers, last_score = rnn.dynamic_rnn( # [B, T - 1, O], [B, O]
|
||||
crf_fwd_cell,
|
||||
|
@ -0,0 +1,109 @@
|
||||
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for Bijector."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
|
||||
from tensorflow.contrib.distributions.python.ops.bijectors.ordered import Ordered
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import tensor_shape
|
||||
from tensorflow.python.framework import test_util
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops.distributions.bijector_test_util import assert_bijective_and_finite
|
||||
from tensorflow.python.platform import test
|
||||
|
||||
|
||||
|
||||
class OrderedBijectorTest(test.TestCase):
|
||||
"""Tests correctness of the ordered transformation."""
|
||||
|
||||
def setUp(self):
|
||||
self._rng = np.random.RandomState(42)
|
||||
|
||||
@test_util.run_in_graph_and_eager_modes()
|
||||
def testBijectorVector(self):
|
||||
with self.test_session():
|
||||
ordered = Ordered()
|
||||
self.assertEqual("ordered", ordered.name)
|
||||
x = np.asarray([[2., 3, 4], [4., 8, 13]])
|
||||
y = [[2., 0, 0], [4., np.log(4.), np.log(5.)]]
|
||||
self.assertAllClose(y, self.evaluate(ordered.forward(x)))
|
||||
self.assertAllClose(x, self.evaluate(ordered.inverse(y)))
|
||||
self.assertAllClose(
|
||||
np.sum(np.asarray(y)[..., 1:], axis=-1),
|
||||
self.evaluate(ordered.inverse_log_det_jacobian(y, event_ndims=1)),
|
||||
atol=0.,
|
||||
rtol=1e-7)
|
||||
self.assertAllClose(
|
||||
self.evaluate(-ordered.inverse_log_det_jacobian(y, event_ndims=1)),
|
||||
self.evaluate(ordered.forward_log_det_jacobian(x, event_ndims=1)),
|
||||
atol=0.,
|
||||
rtol=1e-7)
|
||||
|
||||
def testBijectorUnknownShape(self):
|
||||
with self.test_session():
|
||||
ordered = Ordered()
|
||||
self.assertEqual("ordered", ordered.name)
|
||||
x = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32)
|
||||
real_x = np.asarray([[2., 3, 4], [4., 8, 13]])
|
||||
y = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32)
|
||||
real_y = [[2., 0, 0], [4., np.log(4.), np.log(5.)]]
|
||||
self.assertAllClose(real_y, ordered.forward(x).eval(
|
||||
feed_dict={x: real_x}))
|
||||
self.assertAllClose(real_x, ordered.inverse(y).eval(
|
||||
feed_dict={y: real_y}))
|
||||
self.assertAllClose(
|
||||
np.sum(np.asarray(real_y)[..., 1:], axis=-1),
|
||||
ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(
|
||||
feed_dict={y: real_y}),
|
||||
atol=0.,
|
||||
rtol=1e-7)
|
||||
self.assertAllClose(
|
||||
-ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(
|
||||
feed_dict={y: real_y}),
|
||||
ordered.forward_log_det_jacobian(x, event_ndims=1).eval(
|
||||
feed_dict={x: real_x}),
|
||||
atol=0.,
|
||||
rtol=1e-7)
|
||||
|
||||
@test_util.run_in_graph_and_eager_modes()
|
||||
def testShapeGetters(self):
|
||||
with self.test_session():
|
||||
x = tensor_shape.TensorShape([4])
|
||||
y = tensor_shape.TensorShape([4])
|
||||
bijector = Ordered(validate_args=True)
|
||||
self.assertAllEqual(y, bijector.forward_event_shape(x))
|
||||
self.assertAllEqual(y.as_list(),
|
||||
self.evaluate(bijector.forward_event_shape_tensor(
|
||||
x.as_list())))
|
||||
self.assertAllEqual(x, bijector.inverse_event_shape(y))
|
||||
self.assertAllEqual(x.as_list(),
|
||||
self.evaluate(bijector.inverse_event_shape_tensor(
|
||||
y.as_list())))
|
||||
|
||||
def testBijectiveAndFinite(self):
|
||||
with self.test_session():
|
||||
ordered = Ordered()
|
||||
x = np.sort(self._rng.randn(3, 10), axis=-1).astype(np.float32)
|
||||
y = (self._rng.randn(3, 10)).astype(np.float32)
|
||||
assert_bijective_and_finite(ordered, x, y, event_ndims=1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test.main()
|
@ -30,6 +30,7 @@
|
||||
@@Invert
|
||||
@@Kumaraswamy
|
||||
@@MaskedAutoregressiveFlow
|
||||
@@Ordered
|
||||
@@Permute
|
||||
@@PowerTransform
|
||||
@@RealNVP
|
||||
@ -67,6 +68,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.inline import *
|
||||
from tensorflow.contrib.distributions.python.ops.bijectors.invert import *
|
||||
from tensorflow.contrib.distributions.python.ops.bijectors.kumaraswamy import *
|
||||
from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import *
|
||||
from tensorflow.contrib.distributions.python.ops.bijectors.ordered import *
|
||||
from tensorflow.contrib.distributions.python.ops.bijectors.permute import *
|
||||
from tensorflow.contrib.distributions.python.ops.bijectors.power_transform import *
|
||||
from tensorflow.contrib.distributions.python.ops.bijectors.real_nvp import *
|
||||
|
@ -170,7 +170,7 @@ class CholeskyOuterProduct(bijector.Bijector):
|
||||
sum_weighted_log_diag = array_ops.squeeze(
|
||||
math_ops.matmul(math_ops.log(diag),
|
||||
exponents[..., array_ops.newaxis]),
|
||||
squeeze_dims=-1)
|
||||
axis=-1)
|
||||
fldj = p_float * np.log(2.) + sum_weighted_log_diag
|
||||
|
||||
return fldj
|
||||
|
@ -18,14 +18,14 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.python.ops.distributions import bijector as bijector_lib
|
||||
from tensorflow.python.ops.distributions import bijector
|
||||
|
||||
__all__ = [
|
||||
"Invert",
|
||||
]
|
||||
|
||||
|
||||
class Invert(bijector_lib.Bijector):
|
||||
class Invert(bijector.Bijector):
|
||||
"""Bijector which inverts another Bijector.
|
||||
|
||||
Example Use: [ExpGammaDistribution (see Background & Context)](
|
||||
|
@ -32,7 +32,7 @@ from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.ops import nn_ops
|
||||
from tensorflow.python.ops import template as template_ops
|
||||
from tensorflow.python.ops import variable_scope as variable_scope_lib
|
||||
from tensorflow.python.ops.distributions import bijector as bijector_lib
|
||||
from tensorflow.python.ops.distributions import bijector
|
||||
|
||||
|
||||
__all__ = [
|
||||
@ -42,7 +42,7 @@ __all__ = [
|
||||
]
|
||||
|
||||
|
||||
class MaskedAutoregressiveFlow(bijector_lib.Bijector):
|
||||
class MaskedAutoregressiveFlow(bijector.Bijector):
|
||||
"""Affine MaskedAutoregressiveFlow bijector for vector-valued events.
|
||||
|
||||
The affine autoregressive flow [(Papamakarios et al., 2016)][3] provides a
|
||||
|
125
tensorflow/contrib/distributions/python/ops/bijectors/ordered.py
Normal file
125
tensorflow/contrib/distributions/python/ops/bijectors/ordered.py
Normal file
@ -0,0 +1,125 @@
|
||||
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Ordered bijector."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
from tensorflow.python.framework import tensor_shape
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import check_ops
|
||||
from tensorflow.python.ops import control_flow_ops
|
||||
from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.ops.distributions import bijector
|
||||
|
||||
|
||||
__all__ = [
|
||||
"Ordered",
|
||||
]
|
||||
|
||||
|
||||
class Ordered(bijector.Bijector):
|
||||
"""Bijector which maps a tensor x_k that has increasing elements in the last
|
||||
dimension to an unconstrained tensor y_k.
|
||||
|
||||
Both the domain and the codomain of the mapping is [-inf, inf], however,
|
||||
the input of the forward mapping must be strictly increasing.
|
||||
The inverse of the bijector applied to a normal random vector `y ~ N(0, 1)`
|
||||
gives back a sorted random vector with the same distribution `x ~ N(0, 1)`
|
||||
where `x = sort(y)`
|
||||
|
||||
On the last dimension of the tensor, Ordered bijector performs:
|
||||
`y[0] = x[0]`
|
||||
`y[1:] = math_ops.log(x[1:] - x[:-1])`
|
||||
|
||||
#### Example Use:
|
||||
|
||||
```python
|
||||
bijector.Ordered().forward([2, 3, 4])
|
||||
# Result: [2., 0., 0.]
|
||||
|
||||
bijector.Ordered().inverse([0.06428002, -1.07774478, -0.71530371])
|
||||
# Result: [0.06428002, 0.40464228, 0.8936858]
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, validate_args=False, name="ordered"):
|
||||
super(Ordered, self).__init__(
|
||||
forward_min_event_ndims=1,
|
||||
validate_args=validate_args,
|
||||
name=name)
|
||||
|
||||
def _forward_event_shape(self, input_shape):
|
||||
if input_shape.ndims is None or input_shape[-1] is None:
|
||||
return input_shape
|
||||
return tensor_shape.TensorShape([input_shape[-1]])
|
||||
|
||||
def _forward_event_shape_tensor(self, input_shape):
|
||||
return (input_shape[-1])[..., array_ops.newaxis]
|
||||
|
||||
def _inverse_event_shape(self, output_shape):
|
||||
if output_shape.ndims is None or output_shape[-1] is None:
|
||||
return output_shape
|
||||
if output_shape[-1] <= 1:
|
||||
raise ValueError("output_shape[-1] = %d <= 1" % output_shape[-1])
|
||||
return tensor_shape.TensorShape([output_shape[-1]])
|
||||
|
||||
def _inverse_event_shape_tensor(self, output_shape):
|
||||
if self.validate_args:
|
||||
is_greater_one = check_ops.assert_greater(
|
||||
output_shape[-1], 1, message="Need last dimension greater than 1.")
|
||||
output_shape = control_flow_ops.with_dependencies(
|
||||
[is_greater_one], output_shape)
|
||||
return (output_shape[-1])[..., array_ops.newaxis]
|
||||
|
||||
def _forward(self, x):
|
||||
x = self._maybe_assert_valid_x(x)
|
||||
y0 = x[..., 0, array_ops.newaxis]
|
||||
yk = math_ops.log(x[..., 1:] - x[..., :-1])
|
||||
y = array_ops.concat([y0, yk], axis=-1)
|
||||
return y
|
||||
|
||||
def _inverse(self, y):
|
||||
x0 = y[..., 0, array_ops.newaxis]
|
||||
xk = math_ops.exp(y[..., 1:])
|
||||
x = array_ops.concat([x0, xk], axis=-1)
|
||||
return math_ops.cumsum(x, axis=-1)
|
||||
|
||||
def _inverse_log_det_jacobian(self, y):
|
||||
# The Jacobian of the inverse mapping is lower
|
||||
# triangular, with the diagonal elements being:
|
||||
# J[i,i] = 1 if i=1, and
|
||||
# exp(y_i) if 1<i<=K
|
||||
# which gives the absolute Jacobian determinant:
|
||||
# |det(Jac)| = prod_{i=1}^{K} exp(y[i]).
|
||||
# (1) - Stan Modeling Language User's Guide and Reference Manual
|
||||
# Version 2.17.0 session 35.2
|
||||
return math_ops.reduce_sum(y[..., 1:], axis=-1)
|
||||
|
||||
def _forward_log_det_jacobian(self, x):
|
||||
x = self._maybe_assert_valid_x(x)
|
||||
return -math_ops.reduce_sum(
|
||||
math_ops.log(x[..., 1:] - x[..., :-1]),
|
||||
axis=-1)
|
||||
|
||||
def _maybe_assert_valid_x(self, x):
|
||||
if not self.validate_args:
|
||||
return x
|
||||
is_valid = check_ops.assert_positive(
|
||||
x[..., 1:] - x[..., :-1],
|
||||
message="Forward transformation input must be strictly increasing.")
|
||||
return control_flow_ops.with_dependencies([is_valid], x)
|
@ -28,7 +28,7 @@ from tensorflow.python.ops import check_ops
|
||||
from tensorflow.python.ops import control_flow_ops
|
||||
from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.ops import nn_ops
|
||||
from tensorflow.python.ops.distributions import bijector as bijector_lib
|
||||
from tensorflow.python.ops.distributions import bijector
|
||||
|
||||
|
||||
__all__ = [
|
||||
@ -36,7 +36,7 @@ __all__ = [
|
||||
]
|
||||
|
||||
|
||||
class Permute(bijector_lib.Bijector):
|
||||
class Permute(bijector.Bijector):
|
||||
"""Permutes the rightmost dimension of a `Tensor`.
|
||||
|
||||
```python
|
||||
|
@ -25,7 +25,7 @@ from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.ops import nn_ops
|
||||
from tensorflow.python.ops import template as template_ops
|
||||
from tensorflow.python.ops.distributions import bijector as bijector_lib
|
||||
from tensorflow.python.ops.distributions import bijector
|
||||
|
||||
|
||||
__all__ = [
|
||||
@ -34,7 +34,7 @@ __all__ = [
|
||||
]
|
||||
|
||||
|
||||
class RealNVP(bijector_lib.Bijector):
|
||||
class RealNVP(bijector.Bijector):
|
||||
"""RealNVP "affine coupling layer" for vector-valued events.
|
||||
|
||||
Real NVP models a normalizing flow on a `D`-dimensional distribution via a
|
||||
|
@ -28,7 +28,7 @@ from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import check_ops
|
||||
from tensorflow.python.ops import control_flow_ops
|
||||
from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.ops.distributions import bijector as bijector_lib
|
||||
from tensorflow.python.ops.distributions import bijector
|
||||
|
||||
|
||||
__all__ = [
|
||||
@ -44,7 +44,7 @@ def _ndims_from_shape(shape):
|
||||
return array_ops.shape(shape)[0]
|
||||
|
||||
|
||||
class Reshape(bijector_lib.Bijector):
|
||||
class Reshape(bijector.Bijector):
|
||||
"""Reshapes the `event_shape` of a `Tensor`.
|
||||
|
||||
The semantics generally follow that of `tf.reshape()`, with
|
||||
|
@ -128,7 +128,7 @@ class Weibull(bijector.Bijector):
|
||||
return x
|
||||
is_valid = check_ops.assert_non_negative(
|
||||
x,
|
||||
message="Forward transformation input must be at least {}.".format(0))
|
||||
message="Forward transformation input must be at least 0.")
|
||||
return control_flow_ops.with_dependencies([is_valid], x)
|
||||
|
||||
def _maybe_assert_valid_y(self, y):
|
||||
|
@ -439,7 +439,7 @@ class _DistributionShape(object):
|
||||
if self._batch_ndims_is_0 and expand_batch_dim:
|
||||
squeeze_dims += [1]
|
||||
if squeeze_dims:
|
||||
x = array_ops.squeeze(x, squeeze_dims=squeeze_dims)
|
||||
x = array_ops.squeeze(x, axis=squeeze_dims)
|
||||
# x.shape: [prod(S)]+B+E
|
||||
_, batch_shape, event_shape = self.get_shape(x)
|
||||
else:
|
||||
|
@ -397,7 +397,7 @@ class GmmAlgorithm(object):
|
||||
# Compute the effective number of data points assigned to component k.
|
||||
with ops.control_dependencies(self._w):
|
||||
points_in_k = array_ops.squeeze(
|
||||
math_ops.add_n(self._points_in_k), squeeze_dims=[0])
|
||||
math_ops.add_n(self._points_in_k), axis=[0])
|
||||
# Update alpha.
|
||||
if 'w' in self._params:
|
||||
final_points_in_k = points_in_k / num_batches
|
||||
|
@ -932,7 +932,8 @@ def convolution(inputs,
|
||||
variables_collections=None,
|
||||
outputs_collections=None,
|
||||
trainable=True,
|
||||
scope=None):
|
||||
scope=None,
|
||||
conv_dims=None):
|
||||
"""Adds an N-D convolution followed by an optional batch_norm layer.
|
||||
|
||||
It is required that 1 <= N <= 3.
|
||||
@ -993,6 +994,10 @@ def convolution(inputs,
|
||||
trainable: If `True` also add variables to the graph collection
|
||||
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
|
||||
scope: Optional scope for `variable_scope`.
|
||||
conv_dims: Optional convolution dimensionality, when set it would use the
|
||||
corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When
|
||||
leaved to None it would select the convolution dimensionality based on
|
||||
the input rank (i.e. Conv ND, with N = input_rank - 2).
|
||||
|
||||
Returns:
|
||||
A tensor representing the output of the operation.
|
||||
@ -1015,6 +1020,9 @@ def convolution(inputs,
|
||||
inputs = ops.convert_to_tensor(inputs)
|
||||
input_rank = inputs.get_shape().ndims
|
||||
|
||||
if conv_dims is not None and conv_dims + 2 != input_rank:
|
||||
raise ValueError('Convolution expects input with rank %d, got %d' %
|
||||
(conv_dims + 2, input_rank))
|
||||
if input_rank == 3:
|
||||
layer_class = convolutional_layers.Convolution1D
|
||||
elif input_rank == 4:
|
||||
@ -1061,10 +1069,134 @@ def convolution(inputs,
|
||||
outputs = activation_fn(outputs)
|
||||
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
|
||||
|
||||
@add_arg_scope
|
||||
def convolution1d(inputs,
|
||||
num_outputs,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding='SAME',
|
||||
data_format=None,
|
||||
rate=1,
|
||||
activation_fn=nn.relu,
|
||||
normalizer_fn=None,
|
||||
normalizer_params=None,
|
||||
weights_initializer=initializers.xavier_initializer(),
|
||||
weights_regularizer=None,
|
||||
biases_initializer=init_ops.zeros_initializer(),
|
||||
biases_regularizer=None,
|
||||
reuse=None,
|
||||
variables_collections=None,
|
||||
outputs_collections=None,
|
||||
trainable=True,
|
||||
scope=None):
|
||||
return convolution(inputs,
|
||||
num_outputs,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
data_format,
|
||||
rate,
|
||||
activation_fn,
|
||||
normalizer_fn,
|
||||
normalizer_params,
|
||||
weights_initializer,
|
||||
weights_regularizer,
|
||||
biases_initializer,
|
||||
biases_regularizer,
|
||||
reuse,
|
||||
variables_collections,
|
||||
outputs_collections,
|
||||
trainable,
|
||||
scope,
|
||||
conv_dims=1)
|
||||
|
||||
convolution2d = convolution
|
||||
convolution3d = convolution
|
||||
convolution1d.__doc__ = convolution.__doc__
|
||||
|
||||
@add_arg_scope
|
||||
def convolution2d(inputs,
|
||||
num_outputs,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding='SAME',
|
||||
data_format=None,
|
||||
rate=1,
|
||||
activation_fn=nn.relu,
|
||||
normalizer_fn=None,
|
||||
normalizer_params=None,
|
||||
weights_initializer=initializers.xavier_initializer(),
|
||||
weights_regularizer=None,
|
||||
biases_initializer=init_ops.zeros_initializer(),
|
||||
biases_regularizer=None,
|
||||
reuse=None,
|
||||
variables_collections=None,
|
||||
outputs_collections=None,
|
||||
trainable=True,
|
||||
scope=None):
|
||||
return convolution(inputs,
|
||||
num_outputs,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
data_format,
|
||||
rate,
|
||||
activation_fn,
|
||||
normalizer_fn,
|
||||
normalizer_params,
|
||||
weights_initializer,
|
||||
weights_regularizer,
|
||||
biases_initializer,
|
||||
biases_regularizer,
|
||||
reuse,
|
||||
variables_collections,
|
||||
outputs_collections,
|
||||
trainable,
|
||||
scope,
|
||||
conv_dims=2)
|
||||
|
||||
convolution2d.__doc__ = convolution.__doc__
|
||||
|
||||
@add_arg_scope
|
||||
def convolution3d(inputs,
|
||||
num_outputs,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding='SAME',
|
||||
data_format=None,
|
||||
rate=1,
|
||||
activation_fn=nn.relu,
|
||||
normalizer_fn=None,
|
||||
normalizer_params=None,
|
||||
weights_initializer=initializers.xavier_initializer(),
|
||||
weights_regularizer=None,
|
||||
biases_initializer=init_ops.zeros_initializer(),
|
||||
biases_regularizer=None,
|
||||
reuse=None,
|
||||
variables_collections=None,
|
||||
outputs_collections=None,
|
||||
trainable=True,
|
||||
scope=None):
|
||||
return convolution(inputs,
|
||||
num_outputs,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
data_format,
|
||||
rate,
|
||||
activation_fn,
|
||||
normalizer_fn,
|
||||
normalizer_params,
|
||||
weights_initializer,
|
||||
weights_regularizer,
|
||||
biases_initializer,
|
||||
biases_regularizer,
|
||||
reuse,
|
||||
variables_collections,
|
||||
outputs_collections,
|
||||
trainable,
|
||||
scope,
|
||||
conv_dims=3)
|
||||
|
||||
convolution3d.__doc__ = convolution.__doc__
|
||||
|
||||
@add_arg_scope
|
||||
def convolution2d_in_plane(
|
||||
@ -1411,7 +1543,7 @@ def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None):
|
||||
Args:
|
||||
tensor: An `int` `Tensor` to be converted to a `Sparse`.
|
||||
eos_token: An integer.
|
||||
It is part of the target label that signfies the end of a sentence.
|
||||
It is part of the target label that signifies the end of a sentence.
|
||||
outputs_collections: Collection to add the outputs.
|
||||
scope: Optional scope for name_scope.
|
||||
"""
|
||||
@ -1555,7 +1687,7 @@ def _inner_flatten(inputs, new_rank, output_collections=None, scope=None):
|
||||
output_collections: Collection to which the outputs will be added.
|
||||
scope: Optional scope for `name_scope`.
|
||||
Returns:
|
||||
A `Tensor` or `SparseTensor` conataining the same values as `inputs`, but
|
||||
A `Tensor` or `SparseTensor` containing the same values as `inputs`, but
|
||||
with innermost dimensions flattened to obtain rank `new_rank`.
|
||||
|
||||
Raises:
|
||||
|
@ -310,6 +310,17 @@ class BiasAddTest(test.TestCase):
|
||||
|
||||
class ConvolutionTest(test.TestCase):
|
||||
|
||||
def testInvalidShape(self):
|
||||
with self.test_session():
|
||||
images_2d = random_ops.random_uniform((5, 7, 9, 3), seed=1)
|
||||
with self.assertRaisesRegexp(
|
||||
ValueError, 'Convolution expects input with rank 5, got 4'):
|
||||
layers_lib.convolution3d(images_2d, 32, 3)
|
||||
images_3d = random_ops.random_uniform((5, 6, 7, 9, 3), seed=1)
|
||||
with self.assertRaisesRegexp(
|
||||
ValueError, 'Convolution expects input with rank 4, got 5'):
|
||||
layers_lib.convolution2d(images_3d, 32, 3)
|
||||
|
||||
def testInvalidDataFormat(self):
|
||||
height, width = 7, 9
|
||||
with self.test_session():
|
||||
@ -3155,7 +3166,7 @@ class RepeatTests(test.TestCase):
|
||||
with self.test_session():
|
||||
images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32)
|
||||
output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3])
|
||||
self.assertEqual(output.op.name, 'Repeat/convolution_3/Relu')
|
||||
self.assertEqual(output.op.name, 'Repeat/convolution2d_3/Relu')
|
||||
self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 32])
|
||||
|
||||
def testRepeatWithScope(self):
|
||||
@ -3749,7 +3760,7 @@ class StackTests(test.TestCase):
|
||||
layers_lib.convolution2d, [10, 20, 30],
|
||||
kernel_size=[3, 3],
|
||||
padding='SAME')
|
||||
self.assertEqual(output.op.name, 'Stack/convolution_3/Relu')
|
||||
self.assertEqual(output.op.name, 'Stack/convolution2d_3/Relu')
|
||||
self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 30])
|
||||
|
||||
def testStackWithScope(self):
|
||||
|
@ -270,7 +270,7 @@ class _RegressionTargetColumn(_TargetColumn):
|
||||
|
||||
def logits_to_predictions(self, logits, proba=False):
|
||||
if self.num_label_columns == 1:
|
||||
return array_ops.squeeze(logits, squeeze_dims=[1])
|
||||
return array_ops.squeeze(logits, axis=[1])
|
||||
return logits
|
||||
|
||||
def get_eval_ops(self, features, logits, labels, metrics=None):
|
||||
@ -418,7 +418,7 @@ def _softmax_cross_entropy_loss(logits, target):
|
||||
"Instead got %s." % target.dtype)
|
||||
# sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
|
||||
if len(target.get_shape()) == 2:
|
||||
target = array_ops.squeeze(target, squeeze_dims=[1])
|
||||
target = array_ops.squeeze(target, axis=[1])
|
||||
loss_vec = nn.sparse_softmax_cross_entropy_with_logits(
|
||||
labels=target, logits=logits)
|
||||
return loss_vec
|
||||
|
@ -777,7 +777,7 @@ class _RegressionHead(_SingleHead):
|
||||
key = prediction_key.PredictionKey.SCORES
|
||||
with ops.name_scope(None, "predictions", (logits,)):
|
||||
if self.logits_dimension == 1:
|
||||
logits = array_ops.squeeze(logits, squeeze_dims=(1,), name=key)
|
||||
logits = array_ops.squeeze(logits, axis=(1,), name=key)
|
||||
return {key: self._link_fn(logits)}
|
||||
|
||||
def _metrics(self, eval_loss, predictions, labels, weights):
|
||||
@ -974,7 +974,7 @@ def _softmax_cross_entropy_loss(labels, logits, weights=None):
|
||||
is_squeezed_labels = False
|
||||
# TODO(ptucker): This will break for dynamic shapes.
|
||||
if len(labels.get_shape()) == 2:
|
||||
labels = array_ops.squeeze(labels, squeeze_dims=(1,))
|
||||
labels = array_ops.squeeze(labels, axis=(1,))
|
||||
is_squeezed_labels = True
|
||||
|
||||
loss = nn.sparse_softmax_cross_entropy_with_logits(
|
||||
|
@ -40,7 +40,7 @@ def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None):
|
||||
[tensor_in, labels]):
|
||||
predictions = nn.xw_plus_b(tensor_in, weights, biases)
|
||||
if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2:
|
||||
predictions = array_ops_.squeeze(predictions, squeeze_dims=[1])
|
||||
predictions = array_ops_.squeeze(predictions, axis=[1])
|
||||
return predictions, losses.mean_squared_error(labels, predictions)
|
||||
|
||||
|
||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
@ -70,6 +71,23 @@ TfLiteStatus ReadLabelsFile(const string& file_name,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void PrintProfilingInfo(const profiling::ProfileEvent* e, uint32_t op_index,
|
||||
TfLiteRegistration registration) {
|
||||
// output something like
|
||||
// time (ms) , Node xxx, OpCode xxx, symblic name
|
||||
// 5.352, Node 5, OpCode 4, DEPTHWISE_CONV_2D
|
||||
|
||||
|
||||
LOG(INFO) << std::fixed << std::setw(10) << std::setprecision(3)
|
||||
<< (e->end_timestamp_us - e->begin_timestamp_us) / 1000.0
|
||||
<< ", Node " << std::setw(3) << std::setprecision(3) << op_index
|
||||
<< ", OpCode " << std::setw(3) << std::setprecision(3)
|
||||
<< registration.builtin_code << ", "
|
||||
<< EnumNameBuiltinOperator(
|
||||
(BuiltinOperator)registration.builtin_code)
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
void RunInference(Settings* s) {
|
||||
if (!s->model_name.c_str()) {
|
||||
LOG(ERROR) << "no model file name\n";
|
||||
@ -166,6 +184,11 @@ void RunInference(Settings* s) {
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
profiling::Profiler* profiler = new profiling::Profiler();
|
||||
interpreter->SetProfiler(profiler);
|
||||
|
||||
if (s->profiling) profiler->StartProfiling();
|
||||
|
||||
struct timeval start_time, stop_time;
|
||||
gettimeofday(&start_time, NULL);
|
||||
for (int i = 0; i < s->loop_count; i++) {
|
||||
@ -179,6 +202,18 @@ void RunInference(Settings* s) {
|
||||
<< (get_us(stop_time) - get_us(start_time)) / (s->loop_count * 1000)
|
||||
<< " ms \n";
|
||||
|
||||
if (s->profiling) {
|
||||
profiler->StopProfiling();
|
||||
auto profile_events = profiler->GetProfileEvents();
|
||||
for (int i = 0; i < profile_events.size(); i++) {
|
||||
auto op_index = profile_events[i]->event_metadata;
|
||||
const auto node_and_registration =
|
||||
interpreter->node_and_registration(op_index);
|
||||
const TfLiteRegistration registration = node_and_registration->second;
|
||||
PrintProfilingInfo(profile_events[i], op_index, registration);
|
||||
}
|
||||
}
|
||||
|
||||
const int output_size = 1000;
|
||||
const size_t num_results = 5;
|
||||
const float threshold = 0.001f;
|
||||
@ -217,13 +252,14 @@ void RunInference(Settings* s) {
|
||||
|
||||
void display_usage() {
|
||||
LOG(INFO) << "label_image\n"
|
||||
<< "--accelerated, -a: [0|1], use Android NNAPI or note\n"
|
||||
<< "--accelerated, -a: [0|1], use Android NNAPI or not\n"
|
||||
<< "--count, -c: loop interpreter->Invoke() for certain times\n"
|
||||
<< "--input_mean, -b: input mean\n"
|
||||
<< "--input_std, -s: input standard deviation\n"
|
||||
<< "--image, -i: image_name.bmp\n"
|
||||
<< "--labels, -l: labels for the model\n"
|
||||
<< "--tflite_model, -m: model_name.tflite\n"
|
||||
<< "--profiling, -p: [0|1], profiling or not\n"
|
||||
<< "--threads, -t: number of threads\n"
|
||||
<< "--verbose, -v: [0|1] print more information\n"
|
||||
<< "\n";
|
||||
@ -241,6 +277,7 @@ int Main(int argc, char** argv) {
|
||||
{"image", required_argument, 0, 'i'},
|
||||
{"labels", required_argument, 0, 'l'},
|
||||
{"tflite_model", required_argument, 0, 'm'},
|
||||
{"profiling", required_argument, 0, 'p'},
|
||||
{"threads", required_argument, 0, 't'},
|
||||
{"input_mean", required_argument, 0, 'b'},
|
||||
{"input_std", required_argument, 0, 's'},
|
||||
@ -249,7 +286,7 @@ int Main(int argc, char** argv) {
|
||||
/* getopt_long stores the option index here. */
|
||||
int option_index = 0;
|
||||
|
||||
c = getopt_long(argc, argv, "a:b:c:f:i:l:m:s:t:v:", long_options,
|
||||
c = getopt_long(argc, argv, "a:b:c:f:i:l:m:p:s:t:v:", long_options,
|
||||
&option_index);
|
||||
|
||||
/* Detect the end of the options. */
|
||||
@ -276,6 +313,10 @@ int Main(int argc, char** argv) {
|
||||
case 'm':
|
||||
s.model_name = optarg;
|
||||
break;
|
||||
case 'p':
|
||||
s.profiling = strtol( // NOLINT(runtime/deprecated_fn)
|
||||
optarg, (char**)NULL, 10);
|
||||
break;
|
||||
case 's':
|
||||
s.input_std = strtod(optarg, NULL);
|
||||
break;
|
||||
|
@ -25,6 +25,7 @@ struct Settings {
|
||||
bool verbose = false;
|
||||
bool accel = false;
|
||||
bool input_floating = false;
|
||||
bool profiling = false;
|
||||
int loop_count = 1;
|
||||
float input_mean = 127.5f;
|
||||
float input_std = 127.5f;
|
||||
|
@ -84,4 +84,32 @@
|
||||
android:visibility="visible" />
|
||||
</RelativeLayout>
|
||||
|
||||
<RelativeLayout
|
||||
android:id="@+id/control2"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="135dp"
|
||||
android:layout_alignParentLeft="true"
|
||||
android:layout_alignParentStart="true"
|
||||
android:layout_alignTop="@+id/control"
|
||||
android:layout_marginLeft="300dp"
|
||||
android:layout_marginStart="300dp"
|
||||
android:background="@color/control_background">
|
||||
|
||||
<ToggleButton
|
||||
android:id="@+id/button"
|
||||
android:textOff="@string/tflite"
|
||||
android:textOn="@string/nnapi"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_alignParentLeft="true"
|
||||
android:layout_alignParentStart="true" />
|
||||
|
||||
<NumberPicker
|
||||
android:id="@+id/np"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_below="@+id/button"
|
||||
android:visibility="visible" />
|
||||
</RelativeLayout>
|
||||
|
||||
</RelativeLayout>
|
||||
|
@ -25,8 +25,8 @@ namespace builtin {
|
||||
namespace topk_v2 {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kInputTopK = 1;
|
||||
constexpr int kOutputIndexes = 0;
|
||||
constexpr int kOutputValues = 1;
|
||||
constexpr int kOutputValues = 0;
|
||||
constexpr int kOutputIndexes = 1;
|
||||
|
||||
namespace {
|
||||
TfLiteStatus ResizeOutput(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
@ -31,8 +31,8 @@ class TopKV2OpModel : public SingleOpModel {
|
||||
int top_k) {
|
||||
input_ = AddInput(input_type);
|
||||
top_k_ = AddInput(TensorType_INT32);
|
||||
output_indexes_ = AddOutput(TensorType_INT32);
|
||||
output_values_ = AddOutput(input_type);
|
||||
output_indexes_ = AddOutput(TensorType_INT32);
|
||||
SetBuiltinOp(BuiltinOperator_TOPK_V2, BuiltinOptions_TopKV2Options, 0);
|
||||
BuildInterpreter({input_shape, {1}});
|
||||
PopulateTensor<int32_t>(top_k_, {top_k});
|
||||
|
@ -609,7 +609,7 @@ enum {
|
||||
* Long short-term memory unit (LSTM) recurrent network layer.
|
||||
*
|
||||
* The default non-peephole implementation is based on:
|
||||
* http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
|
||||
* http://www.bioinf.jku.at/publications/older/2604.pdf
|
||||
* S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural
|
||||
* Computation, 9(8):1735-1780, 1997.
|
||||
*
|
||||
|
@ -37,9 +37,9 @@ struct ProfileEvent {
|
||||
// Label of the event. This usually describes the event.
|
||||
const char* tag;
|
||||
// Timestamp in microseconds when the event began.
|
||||
int64_t begin_timestamp_us;
|
||||
uint64_t begin_timestamp_us;
|
||||
// Timestamp in microseconds when the event ended.
|
||||
int64_t end_timestamp_us;
|
||||
uint64_t end_timestamp_us;
|
||||
// The field containing the type of event. This must be one of the event types
|
||||
// in EventType.
|
||||
EventType event_type;
|
||||
@ -74,7 +74,7 @@ class ProfileBuffer {
|
||||
if (!enabled_) {
|
||||
return kInvalidEventHandle;
|
||||
}
|
||||
int64_t timestamp = NowMicros();
|
||||
uint64_t timestamp = NowMicros();
|
||||
int index = current_index_ % event_buffer_.size();
|
||||
event_buffer_[index].tag = tag;
|
||||
event_buffer_[index].event_type = event_type;
|
||||
@ -134,7 +134,7 @@ class ProfileBuffer {
|
||||
}
|
||||
|
||||
private:
|
||||
static int64_t NowMicros() {
|
||||
static uint64_t NowMicros() {
|
||||
// TODO(shashishekhar): Refactor this to a separate file.
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, nullptr);
|
||||
|
@ -124,6 +124,15 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
|
||||
SetDataTypeForAllOutputs(model, op, rand_op->dtype);
|
||||
break;
|
||||
}
|
||||
case OperatorType::kTopK_V2: {
|
||||
// topk(values: T, k: int32) -> values: T, indices: int32
|
||||
CHECK_EQ(op->inputs.size(), 2);
|
||||
CHECK_EQ(op->outputs.size(), 2);
|
||||
CHECK(model->GetArray(op->inputs[1]).data_type == ArrayDataType::kInt32);
|
||||
model->GetArray(op->outputs[0]).data_type = model->GetArray(op->inputs[0]).data_type;
|
||||
model->GetArray(op->outputs[1]).data_type = ArrayDataType ::kInt32;
|
||||
break;
|
||||
}
|
||||
case OperatorType::kTensorFlowUnsupported: {
|
||||
auto* unsupported_op = static_cast<TensorFlowUnsupportedOperator*>(op);
|
||||
// Some output tensors from the op could be eliminated by optimization.
|
||||
|
@ -1087,8 +1087,8 @@ void ProcessGatherOperator(Model* model, GatherOperator* op) {
|
||||
void ProcessTopkV2Operator(Model* model, TopKV2Operator* op) {
|
||||
const auto& input_values = model->GetArray(op->inputs[0]);
|
||||
const auto& input_k = model->GetArray(op->inputs[1]);
|
||||
auto& output_indexes = model->GetArray(op->outputs[0]);
|
||||
auto& output_values = model->GetArray(op->outputs[1]);
|
||||
auto& output_values = model->GetArray(op->outputs[0]);
|
||||
auto& output_indexes = model->GetArray(op->outputs[1]);
|
||||
|
||||
// Bail if we already know the output shape.
|
||||
if (output_indexes.has_shape()) {
|
||||
|
@ -1991,7 +1991,7 @@ void ConvertTopKV2Operator(const NodeDef& node,
|
||||
op->inputs.push_back(node.input(1));
|
||||
}
|
||||
// The op has two outputs.
|
||||
op->outputs.push_back(node.name() + ":0");
|
||||
op->outputs.push_back(node.name());
|
||||
op->outputs.push_back(node.name() + ":1");
|
||||
model->operators.emplace_back(op.release());
|
||||
}
|
||||
|
@ -825,11 +825,6 @@ void FixNoOrphanedArray(Model* model) {
|
||||
void CheckEachArray(const Model& model) {
|
||||
for (const auto& array_entry : model.GetArrayMap()) {
|
||||
const auto& array = array_entry.second;
|
||||
if (array->has_shape()) {
|
||||
for (int d : array->shape().dims()) {
|
||||
CHECK_GE(d, 1);
|
||||
}
|
||||
}
|
||||
// It's OK to have a buffer or an alloc, but not both.
|
||||
// (Since allocs are for transient arrays without a buffer).
|
||||
CHECK(!array->buffer || !array->alloc);
|
||||
@ -839,6 +834,10 @@ void CheckEachArray(const Model& model) {
|
||||
// The presence of a fixed buffer should imply the presence of a fixed
|
||||
// shape.
|
||||
CHECK(array->has_shape());
|
||||
// Constant buffer should has a valid shape.
|
||||
for (int d : array->shape().dims()) {
|
||||
CHECK_GE(d, 1);
|
||||
}
|
||||
// The shape flat-size should agree with the buffer length.
|
||||
CHECK_EQ(array->buffer->Length(),
|
||||
RequiredBufferSizeForShape(array->shape()));
|
||||
|
@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/lib/strings/str_util.h"
|
||||
|
||||
// Skip MPI C++ bindings support, this matches the usage in other places
|
||||
|
@ -56,21 +56,21 @@ class LazyAdamOptimizer(adam.AdamOptimizer):
|
||||
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
|
||||
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
|
||||
|
||||
# m := beta1 * m + (1 - beta1) * g_t
|
||||
# \\(m := beta1 * m + (1 - beta1) * g_t\\)
|
||||
m = self.get_slot(var, "m")
|
||||
m_t = state_ops.scatter_update(m, grad.indices,
|
||||
beta1_t * array_ops.gather(m, grad.indices) +
|
||||
(1 - beta1_t) * grad.values,
|
||||
use_locking=self._use_locking)
|
||||
|
||||
# v := beta2 * v + (1 - beta2) * (g_t * g_t)
|
||||
# \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\)
|
||||
v = self.get_slot(var, "v")
|
||||
v_t = state_ops.scatter_update(v, grad.indices,
|
||||
beta2_t * array_ops.gather(v, grad.indices) +
|
||||
(1 - beta2_t) * math_ops.square(grad.values),
|
||||
use_locking=self._use_locking)
|
||||
|
||||
# variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))
|
||||
# \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\)
|
||||
m_t_slice = array_ops.gather(m_t, grad.indices)
|
||||
v_t_slice = array_ops.gather(v_t, grad.indices)
|
||||
denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t
|
||||
|
@ -40,23 +40,19 @@ class AdamOptimizer(optimizer_v2.OptimizerV2):
|
||||
|
||||
Initialization:
|
||||
|
||||
```
|
||||
m_0 <- 0 (Initialize initial 1st moment vector)
|
||||
v_0 <- 0 (Initialize initial 2nd moment vector)
|
||||
t <- 0 (Initialize timestep)
|
||||
```
|
||||
$$m_0 := 0 (Initialize initial 1st moment vector)$$
|
||||
$$v_0 := 0 (Initialize initial 2nd moment vector)$$
|
||||
$$t := 0 (Initialize timestep)$$
|
||||
|
||||
The update rule for `variable` with gradient `g` uses an optimization
|
||||
described at the end of section2 of the paper:
|
||||
|
||||
```
|
||||
t <- t + 1
|
||||
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
|
||||
$$t := t + 1$$
|
||||
$$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
|
||||
|
||||
m_t <- beta1 * m_{t-1} + (1 - beta1) * g
|
||||
v_t <- beta2 * v_{t-1} + (1 - beta2) * g * g
|
||||
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
|
||||
```
|
||||
$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
|
||||
$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
|
||||
$$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
|
||||
|
||||
The default value of 1e-8 for epsilon might not be a good default in
|
||||
general. For example, when training an Inception network on ImageNet a
|
||||
|
@ -307,6 +307,21 @@ class LSTMTest(test.TestCase):
|
||||
self._seed = 23489
|
||||
np.random.seed(self._seed)
|
||||
|
||||
def testDType(self):
|
||||
# Test case for GitHub issue 16228
|
||||
# Not passing dtype in constructor results in default float32
|
||||
lstm = rnn_cell.LSTMCell(10)
|
||||
input_tensor = array_ops.ones([10, 50])
|
||||
lstm.build(input_tensor.get_shape())
|
||||
self.assertEqual(lstm._bias.dtype, dtypes.float32_ref)
|
||||
|
||||
# Explicitly pass dtype in constructor
|
||||
for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
|
||||
lstm = rnn_cell.LSTMCell(10, dtype=dtype)
|
||||
input_tensor = array_ops.ones([10, 50])
|
||||
lstm.build(input_tensor.get_shape())
|
||||
self.assertEqual(lstm._bias.dtype, dtype._as_ref)
|
||||
|
||||
def testNoProjNoSharding(self):
|
||||
num_units = 3
|
||||
input_size = 5
|
||||
|
@ -37,7 +37,7 @@ def _top_k_generator(k):
|
||||
def _top_k(probabilities, targets):
|
||||
targets = math_ops.to_int32(targets)
|
||||
if targets.get_shape().ndims > 1:
|
||||
targets = array_ops.squeeze(targets, squeeze_dims=[1])
|
||||
targets = array_ops.squeeze(targets, axis=[1])
|
||||
return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k))
|
||||
return _top_k
|
||||
|
||||
@ -57,7 +57,7 @@ def _r2(probabilities, targets, weights=None):
|
||||
|
||||
|
||||
def _squeeze_and_onehot(targets, depth):
|
||||
targets = array_ops.squeeze(targets, squeeze_dims=[1])
|
||||
targets = array_ops.squeeze(targets, axis=[1])
|
||||
return array_ops.one_hot(math_ops.to_int32(targets), depth)
|
||||
|
||||
|
||||
|
@ -55,7 +55,7 @@ class ManyToOneLayer(hybrid_layer.HybridLayer):
|
||||
|
||||
# There is always one activation per instance by definition, so squeeze
|
||||
# away the extra dimension.
|
||||
return array_ops.squeeze(nn_activations, squeeze_dims=[1])
|
||||
return array_ops.squeeze(nn_activations, axis=[1])
|
||||
|
||||
|
||||
class FlattenedFullyConnectedLayer(hybrid_layer.HybridLayer):
|
||||
|
@ -445,7 +445,7 @@ class RandomForestGraphs(object):
|
||||
mask = math_ops.less(
|
||||
r, array_ops.ones_like(r) * self.params.bagging_fraction)
|
||||
gather_indices = array_ops.squeeze(
|
||||
array_ops.where(mask), squeeze_dims=[1])
|
||||
array_ops.where(mask), axis=[1])
|
||||
# TODO(thomaswc): Calculate out-of-bag data and labels, and store
|
||||
# them for use in calculating statistics later.
|
||||
tree_data = array_ops.gather(processed_dense_features, gather_indices)
|
||||
|
@ -111,20 +111,22 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph,
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<string, int> ParseTensorName(string name, int default_idx = 0) {
|
||||
std::pair<string, int> ParseTensorName(const string& name,
|
||||
int default_idx = 0) {
|
||||
string name_no_idx = name;
|
||||
int idx = default_idx;
|
||||
size_t sep = name.find_last_of(':');
|
||||
const size_t sep = name_no_idx.find_last_of(':');
|
||||
if (sep != string::npos) {
|
||||
name = name.substr(0, sep);
|
||||
name_no_idx = name_no_idx.substr(0, sep);
|
||||
idx = std::stoi(name.substr(sep + 1));
|
||||
}
|
||||
return std::make_pair(name, idx);
|
||||
return std::make_pair(name_no_idx, idx);
|
||||
}
|
||||
|
||||
std::unordered_map<string, std::vector<int>> BuildTensorNameMap(
|
||||
const std::vector<string>& tensor_names) {
|
||||
std::unordered_map<string, std::vector<int>> result;
|
||||
for (string const& tensor_name : tensor_names) {
|
||||
for (const string& tensor_name : tensor_names) {
|
||||
string node_name;
|
||||
int index;
|
||||
std::tie(node_name, index) = ParseTensorName(tensor_name);
|
||||
@ -132,6 +134,7 @@ std::unordered_map<string, std::vector<int>> BuildTensorNameMap(
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// TODO(sami): convert references to pointers
|
||||
struct ConvertGraphParams {
|
||||
ConvertGraphParams(
|
||||
|
@ -78,7 +78,7 @@ class StubTimeSeriesModel(model.TimeSeriesModel):
|
||||
batch_end_values = array_ops.squeeze(
|
||||
array_ops.slice(values, [0, array_ops.shape(times)[1] - 1, 0],
|
||||
[-1, 1, -1]),
|
||||
squeeze_dims=[1, 2])
|
||||
axis=[1, 2])
|
||||
# A pretty odd but easy to think about loss: L1 loss on the batch end
|
||||
# values.
|
||||
loss = math_ops.reduce_sum(
|
||||
|
@ -170,7 +170,7 @@ class KalmanFilter(object):
|
||||
math_ops.matmul(
|
||||
transition_matrices,
|
||||
prior_state[..., None]),
|
||||
squeeze_dims=[-1])
|
||||
axis=[-1])
|
||||
return advanced_state
|
||||
|
||||
def predict_state_var(
|
||||
@ -254,7 +254,7 @@ class KalmanFilter(object):
|
||||
kalman_gain_transposed,
|
||||
array_ops.expand_dims(residual, -1),
|
||||
adjoint_a=True),
|
||||
squeeze_dims=[-1])
|
||||
axis=[-1])
|
||||
gain_obs = math_ops.matmul(
|
||||
kalman_gain_transposed, observation_model, adjoint_a=True)
|
||||
identity_extradim = linalg_ops.eye(
|
||||
@ -332,7 +332,7 @@ class KalmanFilter(object):
|
||||
array_ops.expand_dims(state_mean, 1),
|
||||
observation_model,
|
||||
adjoint_b=True),
|
||||
squeeze_dims=[1])
|
||||
axis=[1])
|
||||
observed_var = math_ops.matmul(
|
||||
math_ops.matmul(observation_model, state_var),
|
||||
observation_model,
|
||||
|
@ -2292,7 +2292,9 @@ tf_cuda_library(
|
||||
|
||||
CORE_CPU_BASE_HDRS = GRAPH_HDRS + [
|
||||
"common_runtime/device.h",
|
||||
"common_runtime/device_factory.h",
|
||||
"common_runtime/device_mgr.h",
|
||||
"common_runtime/device_set.h",
|
||||
"common_runtime/eval_const_tensor.h",
|
||||
"common_runtime/graph_runner.h",
|
||||
"common_runtime/shape_refiner.h",
|
||||
@ -2350,9 +2352,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [
|
||||
"common_runtime/copy_tensor.h",
|
||||
"common_runtime/costmodel_manager.h",
|
||||
"common_runtime/debugger_state_interface.h",
|
||||
"common_runtime/device_factory.h",
|
||||
"common_runtime/device_resolver_local.h",
|
||||
"common_runtime/device_set.h",
|
||||
"common_runtime/dma_helper.h",
|
||||
"common_runtime/eigen_thread_pool.h",
|
||||
"common_runtime/executor.h",
|
||||
|
@ -82,9 +82,9 @@ END
|
||||
}
|
||||
summary: "Update \'*var\' according to the Adam algorithm."
|
||||
description: <<END
|
||||
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
|
||||
m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
|
||||
v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
|
||||
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
|
||||
$$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
|
||||
$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
|
||||
$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
|
||||
$$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
|
||||
END
|
||||
}
|
||||
|
@ -24,5 +24,6 @@ pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
|
||||
[0, 0, 2, 2, 0, 0]
|
||||
[0, 0, 0, 0, 0, 0]]
|
||||
```
|
||||
|
||||
END
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
|
||||
out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
|
||||
if T == qint8, out[i] -= (range(T) + 1) / 2.0
|
||||
```
|
||||
|
||||
here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
|
||||
|
||||
*MIN_COMBINED Mode Example*
|
||||
@ -87,6 +88,7 @@ choosing to elide the lowest possible value for symmetry (e.g., output range is
|
||||
|
||||
We first find the range of values in our tensor. The
|
||||
range we use is always centered on 0, so we find m such that
|
||||
|
||||
```c++
|
||||
m = max(abs(input_min), abs(input_max))
|
||||
```
|
||||
@ -95,6 +97,7 @@ Our input tensor range is then `[-m, m]`.
|
||||
|
||||
Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
|
||||
If T is signed, this is
|
||||
|
||||
```
|
||||
num_bits = sizeof(T) * 8
|
||||
[min_fixed, max_fixed] =
|
||||
@ -102,16 +105,19 @@ If T is signed, this is
|
||||
```
|
||||
|
||||
Otherwise, if T is unsigned, the fixed-point range is
|
||||
|
||||
```
|
||||
[min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
|
||||
```
|
||||
|
||||
From this we compute our scaling factor, s:
|
||||
|
||||
```c++
|
||||
s = (max_fixed - min_fixed) / (2 * m)
|
||||
```
|
||||
|
||||
Now we can quantize the elements of our tensor:
|
||||
|
||||
```c++
|
||||
result = round(input * s)
|
||||
```
|
||||
|
@ -76,9 +76,9 @@ END
|
||||
}
|
||||
summary: "Update \'*var\' according to the Adam algorithm."
|
||||
description: <<END
|
||||
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
|
||||
m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
|
||||
v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
|
||||
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
|
||||
$$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
|
||||
$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
|
||||
$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
|
||||
$$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
|
||||
END
|
||||
}
|
||||
|
@ -25,12 +25,12 @@ A new tensor with the given shape and updates applied according
|
||||
to the indices.
|
||||
END
|
||||
}
|
||||
summary: "Scatter `updates` into a new (initially zero) tensor according to `indices`."
|
||||
summary: "Scatter `updates` into a new tensor according to `indices`."
|
||||
description: <<END
|
||||
Creates a new tensor by applying sparse `updates` to individual
|
||||
values or slices within a zero tensor of the given `shape` according to
|
||||
indices. This operator is the inverse of the @{tf.gather_nd} operator which
|
||||
extracts values or slices from a given tensor.
|
||||
Creates a new tensor by applying sparse `updates` to individual values or
|
||||
slices within a tensor (initially zero for numeric, empty for string) of
|
||||
the given `shape` according to indices. This operator is the inverse of the
|
||||
@{tf.gather_nd} operator which extracts values or slices from a given tensor.
|
||||
|
||||
**WARNING**: The order in which updates are applied is nondeterministic, so the
|
||||
output will be nondeterministic if `indices` contains duplicates.
|
||||
|
@ -490,7 +490,7 @@ Status GraphExecutionState::OptimizeGraph(
|
||||
cpu_device = device;
|
||||
}
|
||||
}
|
||||
grappler::VirtualCluster cluster(device_map);
|
||||
grappler::VirtualCluster cluster(device_map, device_set_);
|
||||
GraphDef new_graph;
|
||||
TF_RETURN_IF_ERROR(grappler::RunMetaOptimizer(
|
||||
item, rewrite_options, cpu_device, &cluster, &new_graph));
|
||||
|
@ -547,14 +547,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
|
||||
|
||||
// If Op has been specifically assigned to a non-CPU device, then No.
|
||||
if (!n->assigned_device_name().empty() &&
|
||||
!StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) {
|
||||
!str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) {
|
||||
result = false;
|
||||
reason = "Op has been assigned a runtime device that is not CPU.";
|
||||
}
|
||||
|
||||
// If user has specifically assigned this op to a non-CPU device, then No.
|
||||
if (!n->def().device().empty() &&
|
||||
!StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) {
|
||||
!str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) {
|
||||
result = false;
|
||||
reason = "User has assigned a device that is not CPU.";
|
||||
}
|
||||
@ -2691,14 +2691,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
|
||||
|
||||
// If Op has been specifically assigned to a non-CPU device, then No.
|
||||
if (!n->assigned_device_name().empty() &&
|
||||
!StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) {
|
||||
!str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) {
|
||||
result = false;
|
||||
reason = "Op has been assigned a runtime device that is not CPU.";
|
||||
}
|
||||
|
||||
// If user has specifically assigned this op to a non-CPU device, then No.
|
||||
if (!n->def().device().empty() &&
|
||||
!StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) {
|
||||
!str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) {
|
||||
result = false;
|
||||
reason = "User has assigned a device that is not CPU.";
|
||||
}
|
||||
|
@ -56,6 +56,7 @@ cc_library(
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//tensorflow/core:core_cpu_base",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
@ -73,6 +74,7 @@ cc_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":cluster",
|
||||
"//tensorflow/core:core_cpu_base",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core/grappler/costs:op_level_cost_estimator",
|
||||
|
@ -21,6 +21,7 @@ limitations under the License.
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/common_runtime/device_set.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/grappler/grappler_item.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
@ -92,6 +93,10 @@ class Cluster {
|
||||
// sorted alphabetically.
|
||||
const std::vector<string> GetDeviceNames() const;
|
||||
|
||||
// The DeviceSet is not always available, but when it is it contains a
|
||||
// superset of the devices listed in GetDevices/GetDeviceNames().
|
||||
const DeviceSet* GetDeviceSet() const { return device_set_; }
|
||||
|
||||
// Enables collecting the allocator stats. Call with enable=true must be made
|
||||
// before Provision().
|
||||
virtual Status EnablePeakMemoryStats(bool enable) {
|
||||
@ -119,6 +124,7 @@ class Cluster {
|
||||
|
||||
protected:
|
||||
std::unordered_map<string, DeviceProperties> devices_;
|
||||
const DeviceSet* device_set_ = nullptr; // Not owned
|
||||
const int timeout_s_;
|
||||
SessionOptions options_;
|
||||
RunOptions run_options_;
|
||||
|
@ -37,6 +37,14 @@ VirtualCluster::VirtualCluster(
|
||||
: Cluster(0), node_estimator_(node_estimator), node_manager_(node_manager) {
|
||||
devices_ = devices;
|
||||
}
|
||||
|
||||
VirtualCluster::VirtualCluster(
|
||||
const std::unordered_map<string, DeviceProperties>& devices,
|
||||
const DeviceSet* device_set)
|
||||
: VirtualCluster(devices) {
|
||||
device_set_ = device_set;
|
||||
}
|
||||
|
||||
VirtualCluster::~VirtualCluster() {}
|
||||
|
||||
Status VirtualCluster::Provision() { return Status::OK(); }
|
||||
|
@ -17,6 +17,8 @@ limitations under the License.
|
||||
#define TENSORFLOW_CORE_GRAPPLER_CLUSTERS_VIRTUAL_CLUSTER_H_
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "tensorflow/core/common_runtime/device_set.h"
|
||||
#include "tensorflow/core/grappler/clusters/cluster.h"
|
||||
#include "tensorflow/core/grappler/costs/op_level_cost_estimator.h"
|
||||
#include "tensorflow/core/grappler/costs/virtual_scheduler.h"
|
||||
@ -34,6 +36,8 @@ class VirtualCluster : public Cluster {
|
||||
VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices,
|
||||
OpLevelCostEstimator* node_estimator,
|
||||
ReadyNodeManager* node_manager);
|
||||
VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices,
|
||||
const DeviceSet* device_set);
|
||||
|
||||
~VirtualCluster() override;
|
||||
|
||||
|
@ -199,7 +199,7 @@ class FirstReadyManager : public ReadyNodeManager {
|
||||
// current node.
|
||||
std::vector<const NodeDef*> nodes_;
|
||||
// Newly added nodes are added to waiting_queue_. That way, GetCurrNode(),
|
||||
// wihch returns the front of the nodes_, always returns the same node,
|
||||
// which returns the front of the nodes_, always returns the same node,
|
||||
// even if any of new nodes has time_ready smaller than the current node's.
|
||||
std::vector<const NodeDef*> waiting_queue_;
|
||||
// Comparator functor for heap; stl heap is max heap, so we use "greater than"
|
||||
@ -212,7 +212,7 @@ class FirstReadyManager : public ReadyNodeManager {
|
||||
};
|
||||
|
||||
// CompositeNodeManager has a few other NodeManagers: per-device LIFO for normal
|
||||
// ops (neither _Send nor _Recv) and FirstyReadyManagers for _Send ops and _Recv
|
||||
// ops (neither _Send nor _Recv) and FirstReadyManagers for _Send ops and _Recv
|
||||
// ops, and then it chooses FirstReady among the ops chosen from each
|
||||
// internal NodeManagers. The objective is to maximize producer-consumer
|
||||
// locality within device, while processing nodes across devices, including
|
||||
|
@ -33,7 +33,7 @@ class CustomGraphOptimizerRegistry {
|
||||
static std::vector<string> GetRegisteredOptimizers();
|
||||
|
||||
typedef std::function<CustomGraphOptimizer*()> Creator;
|
||||
// Regsiter graph optimizer which can be called during program initialization.
|
||||
// Register graph optimizer which can be called during program initialization.
|
||||
// This class is not thread-safe.
|
||||
static void RegisterOptimizerOrDie(const Creator& optimizer_creator,
|
||||
const string& name);
|
||||
|
@ -160,13 +160,26 @@ Status MetaOptimizer::InitializeOptimizersByName(
|
||||
VLOG(2) << "Can't register an optimizer by name: " << optimizer_name;
|
||||
}
|
||||
}
|
||||
for (const auto& optimizer_config : cfg_.custom_optimizers()) {
|
||||
auto custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull(
|
||||
optimizer_config.name());
|
||||
if (custom_optimizer) {
|
||||
VLOG(2) << "Registered custom configurable graph optimizer: "
|
||||
<< optimizer_config.name();
|
||||
TF_RETURN_IF_ERROR(custom_optimizer->Init(&optimizer_config));
|
||||
optimizers->push_back(std::move(custom_optimizer));
|
||||
} else {
|
||||
VLOG(2) << "Can't register an optimizer by name: "
|
||||
<< optimizer_config.name();
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
|
||||
GraphDef* optimized_graph) {
|
||||
std::vector<std::unique_ptr<GraphOptimizer>> optimizers;
|
||||
if (cfg_.optimizers().empty()) {
|
||||
if (cfg_.optimizers().empty() && cfg_.custom_optimizers().empty()) {
|
||||
TF_RETURN_IF_ERROR(InitializeOptimizers(&optimizers));
|
||||
} else {
|
||||
TF_RETURN_IF_ERROR(InitializeOptimizersByName(&optimizers));
|
||||
@ -337,7 +350,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
|
||||
cfg.auto_parallel().enable() ||
|
||||
cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
|
||||
cfg.debug_stripper() == RewriterConfig::ON ||
|
||||
!cfg.optimizers().empty();
|
||||
!cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
|
||||
}
|
||||
|
||||
Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg,
|
||||
|
@ -134,6 +134,8 @@ Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
|
||||
switch (element.dtype()) {
|
||||
TF_CALL_ALL_TYPES(HANDLE_TYPE);
|
||||
TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
|
||||
TF_CALL_uint32(HANDLE_TYPE);
|
||||
TF_CALL_uint64(HANDLE_TYPE);
|
||||
#undef HANDLE_TYPE
|
||||
default:
|
||||
return errors::Unimplemented("CopyElementToSlice Unhandled data type: ",
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
|
||||
int16, int32, int64);
|
||||
REGISTER6(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
|
||||
int8, int16, int32, int64);
|
||||
REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
|
||||
Eigen::half, double);
|
||||
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
@ -42,14 +43,13 @@ limitations under the License.
|
||||
#include "tensorflow/core/util/mkl_util.h"
|
||||
|
||||
#ifndef INTEL_MKL_ML
|
||||
|
||||
#include "mkldnn.hpp"
|
||||
|
||||
using mkldnn::prop_kind;
|
||||
using mkldnn::stream;
|
||||
|
||||
using mkldnn::convolution_direct;
|
||||
using mkldnn::convolution_forward;
|
||||
using mkldnn::convolution_direct;
|
||||
|
||||
#else
|
||||
#include "mkl_dnn.h"
|
||||
#include "mkl_dnn_types.h"
|
||||
@ -57,11 +57,232 @@ using mkldnn::convolution_forward;
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
#ifndef INTEL_MKL_ML
|
||||
|
||||
struct ConvFwdDimensions {
|
||||
memory::dims src_dims;
|
||||
memory::dims filter_dims;
|
||||
memory::dims bias_dims;
|
||||
memory::dims dst_dims;
|
||||
memory::dims strides;
|
||||
memory::dims dilations;
|
||||
memory::dims padding_left;
|
||||
memory::dims padding_right;
|
||||
|
||||
ConvFwdDimensions(memory::dims src_dims,
|
||||
memory::dims filter_dims, memory::dims bias_dims,
|
||||
memory::dims dst_dims, memory::dims strides,
|
||||
memory::dims dilations, memory::dims padding_left,
|
||||
memory::dims padding_right) :
|
||||
src_dims(src_dims), filter_dims(filter_dims),
|
||||
bias_dims(bias_dims), dst_dims(dst_dims),
|
||||
strides(strides), dilations(dilations),
|
||||
padding_left(padding_left), padding_right(padding_right) {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class Conv2DFwd : public DnnOp {
|
||||
public:
|
||||
explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) {
|
||||
fwd_stream_.reset(new stream(stream::kind::eager));
|
||||
// create conv primitive
|
||||
if (conv_fwd_ == nullptr) {
|
||||
Setup(convFwdDims);
|
||||
}
|
||||
}
|
||||
|
||||
~Conv2DFwd() {}
|
||||
|
||||
// Convolution forward execute with bias
|
||||
// src_data: input data buffer of src
|
||||
// filter_data: input data buffer of filter (weights)
|
||||
// bias_data: input data buffer of bias
|
||||
// dst_data: output data buffer of dst
|
||||
void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) {
|
||||
src_mem_->set_data_handle(static_cast<void*>(src_data));
|
||||
filter_mem_->set_data_handle(static_cast<void*>(filter_data));
|
||||
bias_mem_->set_data_handle(static_cast<void*>(bias_data));
|
||||
dst_mem_->set_data_handle(static_cast<void*>(dst_data));
|
||||
fwd_stream_->submit(fwd_primitives_);
|
||||
|
||||
// after exec, set data handle back
|
||||
src_mem_->set_data_handle(DummyData);
|
||||
filter_mem_->set_data_handle(DummyData);
|
||||
bias_mem_->set_data_handle(DummyData);
|
||||
dst_mem_->set_data_handle(DummyData);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Convolution forward execute without bias
|
||||
// src_data: input data buffer of src
|
||||
// filter_data: input data buffer of filter (weights)
|
||||
// dst_data: output data buffer of dst
|
||||
void Execute(T* src_data, T* filter_data, T* dst_data) {
|
||||
src_mem_->set_data_handle(static_cast<void*>(src_data));
|
||||
filter_mem_->set_data_handle(static_cast<void*>(filter_data));
|
||||
dst_mem_->set_data_handle(static_cast<void*>(dst_data));
|
||||
fwd_stream_->submit(fwd_primitives_);
|
||||
|
||||
// after exec, set data handle back
|
||||
src_mem_->set_data_handle(DummyData);
|
||||
filter_mem_->set_data_handle(DummyData);
|
||||
dst_mem_->set_data_handle(DummyData);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// expected memory format for this primitive instance
|
||||
memory::format src_fmt_;
|
||||
memory::format filter_fmt_;
|
||||
|
||||
// convolution primitive
|
||||
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> fwd_pd_;
|
||||
std::shared_ptr<mkldnn::primitive> conv_fwd_;
|
||||
|
||||
private:
|
||||
void Setup(const ConvFwdDimensions& convFwdDims) {
|
||||
// create memory descriptors for convolution data w/ no specified format
|
||||
src_md_.reset(new memory::desc({convFwdDims.src_dims},
|
||||
MklDnnType<T>(), memory::format::any));
|
||||
|
||||
filter_md_.reset(new memory::desc({convFwdDims.filter_dims},
|
||||
MklDnnType<T>(), memory::format::any));
|
||||
|
||||
dst_md_.reset(new memory::desc({convFwdDims.dst_dims},
|
||||
MklDnnType<T>(), memory::format::any));
|
||||
|
||||
if (!convFwdDims.bias_dims.empty())
|
||||
bias_md_.reset(new memory::desc({convFwdDims.bias_dims},
|
||||
MklDnnType<T>(), memory::format::any));
|
||||
|
||||
// create a convolution
|
||||
if (!convFwdDims.bias_dims.empty()) {
|
||||
fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward,
|
||||
convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_,
|
||||
convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left,
|
||||
convFwdDims.padding_right, padding_kind::zero));
|
||||
} else {
|
||||
fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward,
|
||||
convolution_direct, *src_md_, *filter_md_, *dst_md_,
|
||||
convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left,
|
||||
convFwdDims.padding_right, padding_kind::zero));
|
||||
}
|
||||
|
||||
fwd_pd_.reset(new convolution_forward::primitive_desc(
|
||||
*fwd_desc_, cpu_engine_));
|
||||
|
||||
// store the expected memory format
|
||||
src_fmt_ = static_cast<mkldnn::memory::format>(
|
||||
fwd_pd_.get()->src_primitive_desc().desc().data.format);
|
||||
|
||||
filter_fmt_ = static_cast<mkldnn::memory::format>(
|
||||
fwd_pd_.get()->weights_primitive_desc().desc().data.format);
|
||||
|
||||
// create memory primitive based on dummy data
|
||||
src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData));
|
||||
filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(),
|
||||
DummyData));
|
||||
dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData));
|
||||
|
||||
// create convolution primitive and add it to net
|
||||
if (!convFwdDims.bias_dims.empty()) {
|
||||
bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType<T>(),
|
||||
memory::format::x}, cpu_engine_}, DummyData));
|
||||
conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_,
|
||||
*filter_mem_, *bias_mem_, *dst_mem_));
|
||||
} else {
|
||||
conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_,
|
||||
*filter_mem_, *dst_mem_));
|
||||
}
|
||||
|
||||
fwd_primitives_.push_back(*conv_fwd_);
|
||||
return;
|
||||
}
|
||||
|
||||
// MKLDNN memory
|
||||
std::shared_ptr<mkldnn::memory> src_mem_;
|
||||
std::shared_ptr<mkldnn::memory> filter_mem_;
|
||||
std::shared_ptr<mkldnn::memory> bias_mem_;
|
||||
std::shared_ptr<mkldnn::memory> dst_mem_;
|
||||
|
||||
std::shared_ptr<mkldnn::stream> fwd_stream_;
|
||||
std::vector<mkldnn::primitive> fwd_primitives_;
|
||||
|
||||
// desc & prmitive desc
|
||||
std::shared_ptr<mkldnn::convolution_forward::desc> fwd_desc_;
|
||||
|
||||
// memory desc
|
||||
std::shared_ptr<mkldnn::memory::desc> src_md_;
|
||||
std::shared_ptr<mkldnn::memory::desc> filter_md_;
|
||||
std::shared_ptr<mkldnn::memory::desc> bias_md_;
|
||||
std::shared_ptr<mkldnn::memory::desc> dst_md_;
|
||||
|
||||
engine cpu_engine_ = engine(engine::cpu, 0);
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class Conv2DFwdFactory : public DnnOpFactory<T> {
|
||||
public:
|
||||
static Conv2DFwd<T>* Get(const ConvFwdDimensions& convFwdDims) {
|
||||
Conv2DFwd<T>* conv2d_fwd = nullptr;
|
||||
|
||||
// try to find a suitable one in pool
|
||||
conv2d_fwd = dynamic_cast<Conv2DFwd<T>*> (
|
||||
Conv2DFwdFactory<T>::GetInstance().GetConv2DFwd(convFwdDims));
|
||||
|
||||
if (conv2d_fwd == nullptr) {
|
||||
conv2d_fwd = new Conv2DFwd<T>(convFwdDims);
|
||||
Conv2DFwdFactory<T>::GetInstance().SetConv2DFwd(
|
||||
convFwdDims, conv2d_fwd);
|
||||
}
|
||||
return conv2d_fwd;
|
||||
}
|
||||
|
||||
private:
|
||||
Conv2DFwdFactory() {}
|
||||
~Conv2DFwdFactory() {}
|
||||
|
||||
static const int kDilationH = 0, kDilationW = 1;
|
||||
|
||||
static Conv2DFwdFactory& GetInstance() {
|
||||
static Conv2DFwdFactory instance_;
|
||||
return instance_;
|
||||
}
|
||||
|
||||
static std::string CreateKey(const ConvFwdDimensions& convFwdDims) {
|
||||
std::string prefix = "conv2d_fwd_";
|
||||
FactoryKeyCreator key_creator;
|
||||
key_creator.AddAsKey(prefix);
|
||||
key_creator.AddAsKey(convFwdDims.src_dims);
|
||||
key_creator.AddAsKey(convFwdDims.filter_dims);
|
||||
key_creator.AddAsKey(convFwdDims.bias_dims);
|
||||
key_creator.AddAsKey(convFwdDims.dst_dims);
|
||||
key_creator.AddAsKey(convFwdDims.strides);
|
||||
key_creator.AddAsKey(convFwdDims.dilations);
|
||||
key_creator.AddAsKey(convFwdDims.padding_left);
|
||||
key_creator.AddAsKey(convFwdDims.padding_right);
|
||||
return key_creator.GetKey();
|
||||
}
|
||||
|
||||
DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) {
|
||||
std::string key = CreateKey(convFwdDims);
|
||||
return this->GetOp(key);
|
||||
}
|
||||
|
||||
void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) {
|
||||
std::string key = CreateKey(convFwdDims);
|
||||
this->SetOp(key, op);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
|
||||
// MKL-DNN is now default. MKL-ML must be specified explicitly.
|
||||
// For now, MKL-ML is default. So making MKL-DNN not a default choice.
|
||||
#ifdef INTEL_MKL_ML
|
||||
|
||||
template <typename Device, typename T, bool biasEnabled>
|
||||
class MklConv2DOp : public OpKernel {
|
||||
public:
|
||||
@ -528,8 +749,6 @@ class MklConv2DOp : public OpKernel {
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
auto cpu_engine = engine(engine::cpu, 0);
|
||||
|
||||
// Input tensors
|
||||
const Tensor& src_tensor = MklGetInput(context, kInputIndex_Src);
|
||||
const Tensor& filter_tensor = MklGetInput(context, kInputIndex_Filter);
|
||||
@ -538,16 +757,16 @@ class MklConv2DOp : public OpKernel {
|
||||
GetMklShape(context, kInputIndex_Src, &src_mkl_shape);
|
||||
GetMklShape(context, kInputIndex_Filter, &filter_mkl_shape);
|
||||
OP_REQUIRES(context, filter_mkl_shape.IsMklTensor() == false,
|
||||
errors::InvalidArgument("Filter should not be in "
|
||||
"Mkl Layout"));
|
||||
errors::InvalidArgument("Filter should not be in "
|
||||
"Mkl Layout"));
|
||||
|
||||
MklDnnData<T> src(&cpu_engine);
|
||||
MklDnnData<T> filter(&cpu_engine);
|
||||
MklDnnData<T> output(&cpu_engine);
|
||||
MklDnnData<T> dst(&cpu_engine); // output
|
||||
|
||||
memory::dims src_dims, filter_dims, padding_l, padding_r,
|
||||
memory::dims src_dims, filter_dims, padding_left, padding_right,
|
||||
dilations, strides;
|
||||
memory::dims output_dims_tf_order, output_dims_mkl_order;
|
||||
memory::dims dst_dims_tf_order, dst_dims_mkl_order;
|
||||
|
||||
// Get shapes of input tensors in MKL-DNN order
|
||||
MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_,
|
||||
@ -555,31 +774,29 @@ class MklConv2DOp : public OpKernel {
|
||||
auto src_tf_shape = GetTfShape(context, kInputIndex_Src);
|
||||
auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter);
|
||||
conv_utl.GetConvFwdSizesInMklOrder(
|
||||
src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, &strides,
|
||||
&dilations, &output_dims_tf_order, &output_dims_mkl_order,
|
||||
&padding_l, &padding_r);
|
||||
src_tf_shape, filter_tf_shape, &src_dims, &filter_dims,
|
||||
&strides, &dilations, &dst_dims_tf_order, &dst_dims_mkl_order,
|
||||
&padding_left, &padding_right);
|
||||
if (!context->status().ok()) return;
|
||||
|
||||
// Check for corner case - if there is nothing to compute, return.
|
||||
TensorShape output_tf_shape = MklDnnDimsToTFShape(output_dims_tf_order);
|
||||
TensorShape dst_tf_shape = MklDnnDimsToTFShape(dst_dims_tf_order);
|
||||
|
||||
// Corner cases: output with 0 elements and 0 batch size.
|
||||
Tensor* output_tensor = nullptr;
|
||||
if (output_tf_shape.num_elements() == 0 || output_dims_tf_order[0] == 0) {
|
||||
// TODO(jbobba): Verify correctness here
|
||||
// Need semantics for Null MKL tensor
|
||||
MklDnnShape output_mkl_shape;
|
||||
output_mkl_shape.SetMklTensor(false);
|
||||
|
||||
AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor,
|
||||
src_tf_shape, output_mkl_shape);
|
||||
Tensor* dst_tensor = nullptr;
|
||||
if (dst_tf_shape.num_elements() == 0 ||
|
||||
dst_dims_tf_order[0] == 0) {
|
||||
MklDnnShape dst_mkl_shape;
|
||||
dst_mkl_shape.SetMklTensor(false);
|
||||
AllocateOutputSetMklShape(context, kOutputIndex_Dst,
|
||||
&dst_tensor, src_tf_shape, dst_mkl_shape);
|
||||
|
||||
// MklConv2D also outputs converted filter as 2nd output of Conv2D.
|
||||
filter_mkl_shape.SetMklTensor(false);
|
||||
Tensor* output_filter_tensor = nullptr;
|
||||
AllocateOutputSetMklShape(context, kOutputIndex_Filter,
|
||||
&output_filter_tensor, filter_tf_shape,
|
||||
filter_mkl_shape);
|
||||
&output_filter_tensor,
|
||||
filter_tf_shape, filter_mkl_shape);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -587,6 +804,7 @@ class MklConv2DOp : public OpKernel {
|
||||
// Describe how the inputs and outputs of Convolution look like. Also
|
||||
// specify buffers containing actual input and output data.
|
||||
auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
|
||||
|
||||
// If input is in MKL layout, then simply grab input layout; otherwise,
|
||||
// construct input Tf layout. For TF layout, although input shape
|
||||
// (src_dims) required is in MKL-DNN order, the layout is Tensorflow's
|
||||
@ -595,6 +813,7 @@ class MklConv2DOp : public OpKernel {
|
||||
? src_mkl_shape.GetMklLayout()
|
||||
: memory::desc(src_dims, MklDnnType<T>(), tf_fmt);
|
||||
src.SetUsrMem(src_md, &src_tensor);
|
||||
|
||||
// Although filter shape (filter_dims) required is in MKL-DNN order,
|
||||
// the layout is Tensorflow's layout (HWIO).
|
||||
auto filter_md = filter_mkl_shape.IsMklTensor() // Should NEVER be true
|
||||
@ -603,98 +822,70 @@ class MklConv2DOp : public OpKernel {
|
||||
memory::format::hwio);
|
||||
filter.SetUsrMem(filter_md, &filter_tensor);
|
||||
|
||||
// Set output shape (output_dims) required in MKL-DNN order.
|
||||
// Currently, we set output layout as Tensorflow's layout (NHWC or NCHW
|
||||
// depending on data format). But later we propagate Mkl layout of the
|
||||
// output to the next op directly.
|
||||
output.SetUsrMem(output_dims_mkl_order, tf_fmt);
|
||||
|
||||
// Create memory descriptors for convolution data w/ no specified format.
|
||||
src.SetOpMemDesc(src_dims, memory::format::any);
|
||||
filter.SetOpMemDesc(filter_dims, memory::format::any);
|
||||
output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
|
||||
|
||||
// MKLDNN dilation starts from 0.
|
||||
dilations[kDilationH] -= 1;
|
||||
dilations[kDilationW] -= 1;
|
||||
|
||||
// get a conv2d fwd from primitive pool
|
||||
Conv2DFwd<T> *conv2d_fwd = nullptr;
|
||||
if (biasEnabled) {
|
||||
// Create convolution primitive with Bias.
|
||||
MklDnnData<T> bias(&cpu_engine);
|
||||
memory::dims bias_size;
|
||||
conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size);
|
||||
const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias);
|
||||
bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor);
|
||||
bias.SetOpMemDesc(bias_size, memory::format::any);
|
||||
|
||||
// Create convolution primitive with Bias.
|
||||
// Use MKLDNN dilated convolution in case of dilated rate (>0).
|
||||
auto conv_desc = (dilations[kDilationH] > 0 ||
|
||||
dilations[kDilationW] > 0) ?
|
||||
convolution_forward::desc(prop_kind::forward,
|
||||
convolution_direct, src.GetOpMemDesc(),
|
||||
filter.GetOpMemDesc(), bias.GetOpMemDesc(),
|
||||
output.GetOpMemDesc(), strides, dilations,
|
||||
padding_l, padding_r,
|
||||
TFPaddingToMklDnnPadding(padding_)):
|
||||
convolution_forward::desc(prop_kind::forward,
|
||||
convolution_direct, src.GetOpMemDesc(),
|
||||
filter.GetOpMemDesc(), bias.GetOpMemDesc(),
|
||||
output.GetOpMemDesc(), strides,
|
||||
padding_l, padding_r,
|
||||
TFPaddingToMklDnnPadding(padding_));
|
||||
|
||||
auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
|
||||
cpu_engine);
|
||||
AllocateOutputTensor(context, conv_prim_desc,
|
||||
output_dims_mkl_order, tf_fmt, &output_tensor);
|
||||
// Set data handle for output.
|
||||
output.SetUsrMemDataHandle(output_tensor);
|
||||
|
||||
Tensor* filter_out_tensor = nullptr;
|
||||
AllocateFilterOutputTensor(context, conv_prim_desc,
|
||||
TFShapeToMklDnnDims(filter_tf_shape),
|
||||
&filter_out_tensor);
|
||||
|
||||
PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output,
|
||||
filter_out_tensor);
|
||||
memory::dims bias_dims = {};
|
||||
conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims);
|
||||
ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims,
|
||||
dst_dims_mkl_order, strides, dilations, padding_left, padding_right);
|
||||
conv2d_fwd = Conv2DFwdFactory<T>::Get(convFwdDims);
|
||||
} else {
|
||||
// Create convolution primitive without Bias.
|
||||
// Use MKLDNN dilated convolution in case of dilated rate (>0).
|
||||
auto conv_desc = (dilations[kDilationH] > 0 ||
|
||||
dilations[kDilationW] > 0) ?
|
||||
convolution_forward::desc(prop_kind::forward,
|
||||
convolution_direct, src.GetOpMemDesc(),
|
||||
filter.GetOpMemDesc(), output.GetOpMemDesc(),
|
||||
strides, dilations, padding_l, padding_r,
|
||||
TFPaddingToMklDnnPadding(padding_)):
|
||||
convolution_forward::desc(prop_kind::forward,
|
||||
convolution_direct, src.GetOpMemDesc(),
|
||||
filter.GetOpMemDesc(), output.GetOpMemDesc(),
|
||||
strides, padding_l, padding_r,
|
||||
TFPaddingToMklDnnPadding(padding_));
|
||||
|
||||
auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
|
||||
cpu_engine);
|
||||
AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order,
|
||||
tf_fmt, &output_tensor);
|
||||
// Set data handle for output.
|
||||
output.SetUsrMemDataHandle(output_tensor);
|
||||
|
||||
Tensor* filter_out_tensor = nullptr;
|
||||
AllocateFilterOutputTensor(context, conv_prim_desc,
|
||||
TFShapeToMklDnnDims(filter_tf_shape),
|
||||
&filter_out_tensor);
|
||||
PrepareAndExecuteNet(conv_prim_desc, &src, &filter,
|
||||
nullptr, &output, filter_out_tensor);
|
||||
ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS,
|
||||
dst_dims_mkl_order, strides, dilations, padding_left, padding_right);
|
||||
conv2d_fwd = Conv2DFwdFactory<T>::Get(convFwdDims);
|
||||
}
|
||||
} catch (mkldnn::error& e) {
|
||||
|
||||
// allocate output tensors output_tensor and filter_out_tensor
|
||||
std::shared_ptr<mkldnn::convolution_forward::primitive_desc>
|
||||
conv_fwd_pd = conv2d_fwd->fwd_pd_;
|
||||
AllocateOutputTensor(context, *conv_fwd_pd,
|
||||
dst_dims_mkl_order, tf_fmt, &dst_tensor);
|
||||
Tensor* filter_out_tensor = nullptr;
|
||||
AllocateFilterOutputTensor(context, *conv_fwd_pd,
|
||||
TFShapeToMklDnnDims(filter_tf_shape),
|
||||
&filter_out_tensor);
|
||||
|
||||
T* dst_data = static_cast<T*>(dst_tensor->flat<T>().data());
|
||||
|
||||
// check whether src/filter need reorder
|
||||
std::vector<primitive> net;
|
||||
if (src_md.data.format != conv2d_fwd->src_fmt_)
|
||||
src.CheckReorderToOpMem(
|
||||
conv_fwd_pd.get()->src_primitive_desc(), &net);
|
||||
|
||||
if (filter_md.data.format != conv2d_fwd->filter_fmt_)
|
||||
filter.CheckReorderToOpMem(
|
||||
conv_fwd_pd.get()->weights_primitive_desc(),
|
||||
filter.GetTensorBuffer(filter_out_tensor), &net);
|
||||
stream(stream::kind::eager).submit(net).wait();
|
||||
|
||||
T* src_data = static_cast<T*>(
|
||||
src.GetOpMem().get_data_handle());
|
||||
T* filter_data = static_cast<T*>(
|
||||
filter.GetOpMem().get_data_handle());
|
||||
|
||||
// execute convolution
|
||||
if (biasEnabled) {
|
||||
const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias);
|
||||
T* bias_data = static_cast<T*>(const_cast<T*>(
|
||||
bias_tensor.flat<T>().data()));
|
||||
|
||||
conv2d_fwd->Execute(src_data, filter_data, bias_data, dst_data);
|
||||
} else {
|
||||
conv2d_fwd->Execute(src_data, filter_data, dst_data);
|
||||
}
|
||||
} catch (mkldnn::error &e) {
|
||||
string error_msg = "Status: " + std::to_string(e.status) +
|
||||
", message: " + std::string(e.message) + ", in file " +
|
||||
std::string(__FILE__) + ":" + std::to_string(__LINE__);
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
errors::Aborted("Operation received an exception:", error_msg));
|
||||
", message: " + std::string(e.message) +
|
||||
", in file " + std::string(__FILE__) + ":" +
|
||||
std::to_string(__LINE__);
|
||||
OP_REQUIRES_OK(context,
|
||||
errors::Aborted("Operation received an exception:", error_msg));
|
||||
}
|
||||
}
|
||||
|
||||
@ -706,6 +897,7 @@ class MklConv2DOp : public OpKernel {
|
||||
const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2;
|
||||
const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1;
|
||||
const int kDilationH = 0, kDilationW = 1;
|
||||
engine cpu_engine = engine(engine::cpu, 0);
|
||||
|
||||
// Allocate output tensor.
|
||||
void AllocateOutputTensor(
|
||||
|
@ -241,6 +241,7 @@ class ScatterNdUpdateOp : public OpKernel {
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_ADD_SUB_CPU);
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_UPDATE_CPU);
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_CPU);
|
||||
TF_CALL_string(REGISTER_SCATTER_ND_CPU);
|
||||
|
||||
// Registers GPU kernels.
|
||||
#if GOOGLE_CUDA
|
||||
|
@ -160,6 +160,7 @@ struct ScatterNdFunctor<CPUDevice, T, Index, OP, IXDIM> {
|
||||
REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB);
|
||||
|
||||
TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE);
|
||||
REGISTER_SCATTER_ND_INDEX(string, scatter_nd_op::UpdateOp::ADD);
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_MATH)
|
||||
|
||||
#undef REGISTER_SCATTER_ND_MATH
|
||||
|
@ -16,35 +16,6 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
|
||||
#define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
|
||||
|
||||
|
||||
// This file requires the following include because it uses CudaAtomicMax:
|
||||
// #include "tensorflow/core/util/cuda_kernel_helper.h"
|
||||
|
||||
// Unfortunately we can't add the #include, since it breaks compilation for
|
||||
// non-GPU targets. This only breaks in clang, because it's more strict for
|
||||
// template code and CudaAtomicMax is used in template context.
|
||||
|
||||
// This file requires the following include because it uses CudaAtomicMax:
|
||||
// #include "tensorflow/core/util/cuda_kernel_helper.h"
|
||||
|
||||
// Unfortunately we can't add the #include, since it breaks compilation for
|
||||
// non-GPU targets. This only breaks in clang, because it's more strict for
|
||||
// template code and CudaAtomicMax is used in template context.
|
||||
|
||||
// This file requires the following include because it uses CudaAtomicMax:
|
||||
// #include "tensorflow/core/util/cuda_kernel_helper.h"
|
||||
|
||||
// Unfortunately we can't add the #include, since it breaks compilation for
|
||||
// non-GPU targets. This only breaks in clang, because it's more strict for
|
||||
// template code and CudaAtomicMax is used in template context.
|
||||
|
||||
// This file requires the following include because it uses CudaAtomicMax:
|
||||
// #include "tensorflow/core/util/cuda_kernel_helper.h"
|
||||
|
||||
// Unfortunately we can't add the #include, since it breaks compilation for
|
||||
// non-GPU targets. This only breaks in clang, because it's more strict for
|
||||
// template code and CudaAtomicMax is used in template context.
|
||||
|
||||
// This file requires the following include because it uses CudaAtomicMax:
|
||||
// #include "tensorflow/core/util/cuda_kernel_helper.h"
|
||||
|
||||
|
@ -23,7 +23,7 @@ limitations under the License.
|
||||
#if defined(WIN32)
|
||||
#include "extras/CUPTI/include/cupti.h"
|
||||
#else
|
||||
#include "cuda/extras/CUPTI/include/cupti.h"
|
||||
#include "cupti.h"
|
||||
#endif
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
|
@ -24,7 +24,7 @@ limitations under the License.
|
||||
|
||||
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
|
||||
// "-beta", "-rc", "-rc.1")
|
||||
#define TF_VERSION_SUFFIX "-rc0"
|
||||
#define TF_VERSION_SUFFIX "-rc1"
|
||||
|
||||
#define TF_STR_HELPER(x) #x
|
||||
#define TF_STR(x) TF_STR_HELPER(x)
|
||||
|
@ -19,6 +19,8 @@ limitations under the License.
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "mkl_dnn.h"
|
||||
#include "mkl_dnn_types.h"
|
||||
@ -1759,7 +1761,90 @@ class MklDnnData {
|
||||
}
|
||||
};
|
||||
|
||||
#endif // INTEL_MKL_ML
|
||||
/// Base class for operations with reuse of DNN primitives
|
||||
///
|
||||
class DnnOp {
|
||||
public:
|
||||
virtual ~DnnOp() {}
|
||||
|
||||
// Dummy data. Its size, hard-coded as 256 here, does
|
||||
// not matter since MKL should never operate on this buffer.
|
||||
unsigned char DummyData[256];
|
||||
};
|
||||
|
||||
const mkldnn::memory::dims NONE_DIMS = {};
|
||||
// This constant is used to declare dummy buffer (size), for MKL primitives
|
||||
template <typename T>
|
||||
class DnnOpFactory {
|
||||
public:
|
||||
DnnOpFactory() {}
|
||||
~DnnOpFactory() {}
|
||||
|
||||
DnnOp* GetOp(const std::string& key) {
|
||||
auto stream_iter = DnnOpFactory<T>::GetHashMap().find(key);
|
||||
if (stream_iter == DnnOpFactory<T>::GetHashMap().end()) {
|
||||
return nullptr;
|
||||
} else {
|
||||
return stream_iter->second;
|
||||
}
|
||||
}
|
||||
|
||||
void SetOp(const std::string& key, DnnOp* op) {
|
||||
auto stream_iter = DnnOpFactory<T>::GetHashMap().find(key);
|
||||
|
||||
CHECK(stream_iter == DnnOpFactory<T>::GetHashMap().end());
|
||||
|
||||
DnnOpFactory<T>::GetHashMap()[key] = op;
|
||||
}
|
||||
|
||||
private:
|
||||
static inline std::unordered_map<std::string, DnnOp*> &GetHashMap() {
|
||||
static thread_local std::unordered_map<std::string, DnnOp*> map_;
|
||||
return map_;
|
||||
}
|
||||
};
|
||||
|
||||
// utility class for creating keys of MKL primitive pool.
|
||||
class FactoryKeyCreator {
|
||||
public:
|
||||
FactoryKeyCreator() {
|
||||
key_.reserve(kMaxKeyLength);
|
||||
}
|
||||
|
||||
~FactoryKeyCreator() {}
|
||||
|
||||
void AddAsKey(const string &str) {
|
||||
auto buffer = reinterpret_cast<const char *>(str.c_str());
|
||||
Append(buffer, str.length());
|
||||
}
|
||||
|
||||
void AddAsKey(const mkldnn::memory::dims &dims) {
|
||||
for (unsigned int i = 0; i < dims.size(); i++) {
|
||||
AddAsKey<int>(dims[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void AddAsKey(const T data) {
|
||||
auto buffer = reinterpret_cast<const char *>(&data);
|
||||
Append(buffer, sizeof(T));
|
||||
}
|
||||
|
||||
std::string GetKey() {
|
||||
return key_;
|
||||
}
|
||||
|
||||
private:
|
||||
string key_;
|
||||
const char delimiter = 'x';
|
||||
const int kMaxKeyLength = 256;
|
||||
void Append(const char* data, int len) {
|
||||
key_.append(data, len);
|
||||
key_.append(1, delimiter);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // INTEL_MKL_DNN
|
||||
|
||||
} // namespace tensorflow
|
||||
#endif // INTEL_MKL
|
||||
|
@ -1,5 +1,5 @@
|
||||
# Roadmap
|
||||
**Last updated: Feb 15, 2018**
|
||||
**Last updated: Apr 27, 2018**
|
||||
|
||||
TensorFlow is a rapidly moving, community supported project. This document is intended
|
||||
to provide guidance about priorities and focus areas of the core set of TensorFlow
|
||||
@ -14,12 +14,12 @@ expected in the next one to two releases.
|
||||
|
||||
### APIs
|
||||
#### High Level APIs:
|
||||
* Easy multi-GPU utilization with Estimators
|
||||
* Easy multi-GPU and TPU utilization with Estimators
|
||||
* Easy-to-use high-level pre-made estimators for Gradient Boosted Trees, Time Series, and other models
|
||||
|
||||
#### Eager Execution:
|
||||
* Efficient utilization of multiple GPUs
|
||||
* Distributed training (multi-machine)
|
||||
* Distributed training support (multi-machine)
|
||||
* Performance improvements
|
||||
* Simpler export to a GraphDef/SavedModel
|
||||
|
||||
@ -31,14 +31,14 @@ to create Keras models Eager- style via Model subclassing)
|
||||
|
||||
#### Official Models:
|
||||
* A set of
|
||||
[reference models](https://github.com/tensorflow/models/tree/master/official)
|
||||
[models](https://github.com/tensorflow/models/tree/master/official)
|
||||
across image recognition, speech, object detection, and
|
||||
translation that demonstrate best practices and serve as a starting point for
|
||||
high-performance model development.
|
||||
|
||||
#### Contrib:
|
||||
* Deprecation notices added to parts of tf.contrib where preferred implementations exist outside of tf.contrib.
|
||||
* As much as possible, large projects inside tf.contrib moved to separate repositories.
|
||||
* Deprecate parts of tf.contrib where preferred implementations exist outside of tf.contrib.
|
||||
* As much as possible, move large projects inside tf.contrib to separate repositories.
|
||||
* The tf.contrib module will eventually be discontinued in its current form, experimental development will in future happen in other repositories.
|
||||
|
||||
|
||||
@ -50,36 +50,72 @@ across image recognition, speech, object detection, and
|
||||
|
||||
### Platforms
|
||||
#### TensorFlow Lite:
|
||||
* Increased coverage of supported ops in TensorFlow Lite
|
||||
* Increase coverage of supported ops in TensorFlow Lite
|
||||
* Easier conversion of a trained TensorFlow graph for use on TensorFlow Lite
|
||||
* Support for GPU acceleration in TensorFlow Lite (iOS and Android)
|
||||
* Support for hardware accelerators via Android NeuralNets API
|
||||
* Improved CPU performance by quantization and other network optimizations (eg. pruning, distillation)
|
||||
* Increased support for devices beyond Android and iOS (eg. RPi, Cortex-M)
|
||||
* Improve CPU performance by quantization and other network optimizations (eg. pruning, distillation)
|
||||
* Increase support for devices beyond Android and iOS (eg. RPi, Cortex-M)
|
||||
|
||||
#### TensorFlow.js:
|
||||
* Release package for Node.js bindings to the TensorFlow C API through the TensorFlow.js backend interface
|
||||
* Expand support for importing TensorFlow SavedModels and Keras models into browser with unified APIs supporting retraining in browser
|
||||
* Improve Layers API and allow model exporting/saving
|
||||
* Release tfjs-data API for efficient data input pipelines
|
||||
|
||||
#### TensorFlow with Swift:
|
||||
* Establish open source project including documentation, open design, and code availability.
|
||||
* Continue implementing and refining implementation and design through 2018.
|
||||
* Aim for implementation to be solid enough for general use later in 2018.
|
||||
|
||||
### Performance
|
||||
#### Distributed TensorFlow:
|
||||
* Multi-GPU support optimized for a variety of GPU topologies
|
||||
* Improved mechanisms for distributing computations on several machines
|
||||
* Optimize Multi-GPU support for a variety of GPU topologies
|
||||
* Improve mechanisms for distributing computations on several machines
|
||||
|
||||
#### Optimizations:
|
||||
* Mixed precision training support with initial example model and guide
|
||||
* Native TensorRT support
|
||||
#### GPU Optimizations:
|
||||
* Simplify mixed precision API with initial example model and guide.
|
||||
* Finalize TensorRT API and move to core.
|
||||
* CUDA 9.2 and NCCL 2.x default in TensorFlow builds.
|
||||
* Optimizations for DGX-2.
|
||||
* Remove support for CUDA less than 8.x and cuDNN less than 6.x.
|
||||
|
||||
|
||||
#### CPU Optimizations
|
||||
* Int8 support for SkyLake via MKL
|
||||
* Dynamic loading of SIMD-optimized kernels
|
||||
* MKL for Linux and Windows
|
||||
|
||||
### End-to-end ML systems:
|
||||
#### TensorFlow Hub:
|
||||
* Expand support for module-types in TF Hub with TF Eager integration, Keras layers integration, and TensorFlow.js integration
|
||||
* Accept variable-sized image input
|
||||
* Improve multi-GPU estimator support
|
||||
* Document and improve TPU integration
|
||||
|
||||
#### TensorFlow Extended:
|
||||
* Open source more of the TensorFlow Extended platform to facilitate adoption of TensorFlow in production settings.
|
||||
* Release TFX libraries for Data Validation
|
||||
|
||||
### Documentation and Resources:
|
||||
* Update documentation, tutorials and Getting Started guides on all features and APIs
|
||||
* Update [Youtube Tensorflow channel](https://youtube.com/tensorflow) weekly with new content:
|
||||
Coding TensorFlow - where we teach folks coding with tensorflow
|
||||
TensorFlow Meets - where we highlight community contributions
|
||||
Ask TensorFlow - where we answer community questions
|
||||
Guest and Showcase videos
|
||||
* Update [Official TensorFlow blog](https://blog.tensorflow.org) with regular articles from Google team and the Community
|
||||
|
||||
### Documentation and Usability:
|
||||
* Updated documentation, tutorials and Getting Started guides
|
||||
* Process to enable external contributions to tutorials, documentation, and blogs showcasing best practice use-cases of TensorFlow and high-impact applications
|
||||
|
||||
### Community and Partner Engagement
|
||||
#### Special Interest Groups:
|
||||
* Mobilizing the community to work together in focused domains
|
||||
* Mobilize the community to work together in focused domains
|
||||
* [tf-distribute](https://groups.google.com/a/tensorflow.org/forum/#!forum/tf-distribute): build and packaging of TensorFlow
|
||||
* More to be identified and launched
|
||||
* SIG TensorBoard, SIG Rust, and more to be identified and launched
|
||||
|
||||
#### Community:
|
||||
* Incorporate public feedback on significant design decisions via a Request-for-Comment (RFC) process
|
||||
* Formalize process for external contributions to land in TensorFlow and associated projects
|
||||
* Grow global TensorFlow communities and user groups
|
||||
* Collaborate with partners to co-develop and publish research papers
|
||||
* Process to enable external contributions to tutorials, documentation, and blogs showcasing best practice use-cases of TensorFlow and high-impact applications
|
||||
|
@ -38,8 +38,10 @@ Estimators automatically write the following to disk:
|
||||
uses to create visualizations.
|
||||
|
||||
To specify the top-level directory in which the Estimator stores its
|
||||
information, assign a value to the optional `model_dir` argument of any
|
||||
Estimator's constructor. For example, the following code sets the `model_dir`
|
||||
information, assign a value to the optional `model_dir` argument of *any*
|
||||
`Estimator`'s constructor.
|
||||
Taking `DNNClassifier` as an example,
|
||||
the following code sets the `model_dir`
|
||||
argument to the `models/iris` directory:
|
||||
|
||||
```python
|
||||
|
@ -138,7 +138,7 @@ The model will represent the buckets as follows:
|
||||
|< 1960 | [1, 0, 0, 0] |
|
||||
|>= 1960 but < 1980 | [0, 1, 0, 0] |
|
||||
|>= 1980 but < 2000 | [0, 0, 1, 0] |
|
||||
|> 2000 | [0, 0, 0, 1] |
|
||||
|>= 2000 | [0, 0, 0, 1] |
|
||||
|
||||
Why would you want to split a number—a perfectly valid input to your
|
||||
model—into a categorical value? Well, notice that the categorization splits a
|
||||
|
@ -10,7 +10,7 @@ course prior to diving into TensorFlow documentation:
|
||||
TensorFlow is a tool for machine learning. While it contains a wide range of
|
||||
functionality, TensorFlow is mainly designed for deep neural network models.
|
||||
|
||||
The easiest way to get started with tensorflow is using Eager Execution.
|
||||
The easiest way to get started with TensorFlow is using Eager Execution.
|
||||
|
||||
* @{$get_started/eager}, is for anyone new to machine learning or TensorFlow.
|
||||
|
||||
|
@ -38,7 +38,7 @@ enable TensorFlow for C:
|
||||
OS="linux" # Change to "darwin" for macOS
|
||||
TARGET_DIRECTORY="/usr/local"
|
||||
curl -L \
|
||||
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc0.tar.gz" |
|
||||
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" |
|
||||
sudo tar -C $TARGET_DIRECTORY -xz
|
||||
|
||||
The `tar` command extracts the TensorFlow C library into the `lib`
|
||||
|
@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go:
|
||||
TF_TYPE="cpu" # Change to "gpu" for GPU support
|
||||
TARGET_DIRECTORY='/usr/local'
|
||||
curl -L \
|
||||
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc0.tar.gz" |
|
||||
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc1.tar.gz" |
|
||||
sudo tar -C $TARGET_DIRECTORY -xz
|
||||
|
||||
The `tar` command extracts the TensorFlow C library into the `lib`
|
||||
|
@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs:
|
||||
<dependency>
|
||||
<groupId>org.tensorflow</groupId>
|
||||
<artifactId>tensorflow</artifactId>
|
||||
<version>1.8.0-rc0</version>
|
||||
<version>1.8.0-rc1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow:
|
||||
<dependency>
|
||||
<groupId>org.tensorflow</groupId>
|
||||
<artifactId>tensorflow</artifactId>
|
||||
<version>1.8.0-rc0</version>
|
||||
<version>1.8.0-rc1</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
@ -124,12 +124,12 @@ instead:
|
||||
<dependency>
|
||||
<groupId>org.tensorflow</groupId>
|
||||
<artifactId>libtensorflow</artifactId>
|
||||
<version>1.8.0-rc0</version>
|
||||
<version>1.8.0-rc1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.tensorflow</groupId>
|
||||
<artifactId>libtensorflow_jni_gpu</artifactId>
|
||||
<version>1.8.0-rc0</version>
|
||||
<version>1.8.0-rc1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -148,7 +148,7 @@ refer to the simpler instructions above instead.
|
||||
Take the following steps to install TensorFlow for Java on Linux or macOS:
|
||||
|
||||
1. Download
|
||||
[libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc0.jar),
|
||||
[libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar),
|
||||
which is the TensorFlow Java Archive (JAR).
|
||||
|
||||
2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
|
||||
@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
|
||||
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
|
||||
mkdir -p ./jni
|
||||
curl -L \
|
||||
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc0.tar.gz" |
|
||||
"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" |
|
||||
tar -xz -C ./jni
|
||||
|
||||
### Install on Windows
|
||||
@ -175,10 +175,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
|
||||
Take the following steps to install TensorFlow for Java on Windows:
|
||||
|
||||
1. Download
|
||||
[libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc0.jar),
|
||||
[libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar),
|
||||
which is the TensorFlow Java Archive (JAR).
|
||||
2. Download the following Java Native Interface (JNI) file appropriate for
|
||||
[TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc0.zip).
|
||||
[TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc1.zip).
|
||||
3. Extract this .zip file.
|
||||
|
||||
|
||||
@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the
|
||||
downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
|
||||
as follows:
|
||||
|
||||
<pre><b>javac -cp libtensorflow-1.8.0-rc0.jar HelloTF.java</b></pre>
|
||||
<pre><b>javac -cp libtensorflow-1.8.0-rc1.jar HelloTF.java</b></pre>
|
||||
|
||||
|
||||
### Running
|
||||
@ -241,11 +241,11 @@ two files are available to the JVM:
|
||||
For example, the following command line executes the `HelloTF` program on Linux
|
||||
and macOS X:
|
||||
|
||||
<pre><b>java -cp libtensorflow-1.8.0-rc0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
|
||||
<pre><b>java -cp libtensorflow-1.8.0-rc1.jar:. -Djava.library.path=./jni HelloTF</b></pre>
|
||||
|
||||
And the following command line executes the `HelloTF` program on Windows:
|
||||
|
||||
<pre><b>java -cp libtensorflow-1.8.0-rc0.jar;. -Djava.library.path=jni HelloTF</b></pre>
|
||||
<pre><b>java -cp libtensorflow-1.8.0-rc1.jar;. -Djava.library.path=jni HelloTF</b></pre>
|
||||
|
||||
If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
|
||||
installed TensorFlow for Java and are ready to use the API. If the program
|
||||
|
@ -1,139 +1,266 @@
|
||||
# Installing TensorFlow on Ubuntu
|
||||
|
||||
This guide explains how to install TensorFlow on Ubuntu. Although these
|
||||
instructions might also work on other Linux variants, we have only
|
||||
tested (and we only support) these instructions on machines meeting the
|
||||
following requirements:
|
||||
This guide explains how to install TensorFlow on Ubuntu Linux. While these
|
||||
instructions may work on other Linux variants, they are tested and supported with
|
||||
the following system requirements:
|
||||
|
||||
* 64-bit desktops or laptops
|
||||
* Ubuntu 16.04 or higher
|
||||
* 64-bit desktops or laptops
|
||||
* Ubuntu 16.04 or higher
|
||||
|
||||
|
||||
## Determine which TensorFlow to install
|
||||
## Choose which TensorFlow to install
|
||||
|
||||
You must choose one of the following types of TensorFlow to install:
|
||||
The following TensorFlow variants are available for installation:
|
||||
|
||||
* **TensorFlow with CPU support only**. If your system does not have a
|
||||
NVIDIA® GPU, you must install this version. Note that this version of
|
||||
TensorFlow is typically much easier to install (typically,
|
||||
in 5 or 10 minutes), so even if you have an NVIDIA GPU, we recommend
|
||||
installing this version first.
|
||||
* **TensorFlow with GPU support**. TensorFlow programs typically run
|
||||
significantly faster on a GPU than on a CPU. Therefore, if your
|
||||
system has a NVIDIA® GPU meeting the prerequisites shown below and you
|
||||
need to run performance-critical applications, you should ultimately
|
||||
install this version.
|
||||
|
||||
<a name="NVIDIARequirements"></a>
|
||||
### NVIDIA requirements to run TensorFlow with GPU support
|
||||
|
||||
If you are installing TensorFlow with GPU support using one of the
|
||||
mechanisms described in this guide, then the following NVIDIA software
|
||||
must be installed on your system:
|
||||
|
||||
* [CUDA Toolkit 9.0](http://nvidia.com/cuda). For details, see
|
||||
[NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/).
|
||||
Ensure that you append the relevant CUDA pathnames to the
|
||||
`LD_LIBRARY_PATH` environment variable as described in the
|
||||
NVIDIA documentation.
|
||||
* [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see
|
||||
[NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/).
|
||||
Ensure that you create the `CUDA_HOME` environment variable as
|
||||
described in the NVIDIA documentation.
|
||||
* GPU card with CUDA Compute Capability 3.0 or higher for building
|
||||
from source and 3.5 or higher for our binaries. See
|
||||
[NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for
|
||||
a list of supported GPU cards.
|
||||
* [GPU drivers](http://nvidia.com/driver) supporting your version of the CUDA
|
||||
Toolkit.
|
||||
* The libcupti-dev library, which is the NVIDIA CUDA Profile Tools Interface.
|
||||
This library provides advanced profiling support. To install this library,
|
||||
issue the following command for CUDA Toolkit >= 8.0:
|
||||
|
||||
<pre>
|
||||
$ <b>sudo apt-get install cuda-command-line-tools</b>
|
||||
</pre>
|
||||
|
||||
and add its path to your `LD_LIBRARY_PATH` environment variable:
|
||||
|
||||
<pre>
|
||||
$ <b>export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64</b>
|
||||
</pre>
|
||||
|
||||
For CUDA Toolkit <= 7.5 do:
|
||||
|
||||
<pre>
|
||||
$ <b>sudo apt-get install libcupti-dev</b>
|
||||
</pre>
|
||||
|
||||
* **[OPTIONAL]** For optimized inferencing performance, you can also install
|
||||
**NVIDIA TensorRT 3.0**. The minimal set of TensorRT runtime components needed
|
||||
for use with the pre-built `tensorflow-gpu` package can be installed as follows:
|
||||
|
||||
<pre>
|
||||
$ <b>wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</b>
|
||||
$ <b>sudo dpkg -i nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</b>
|
||||
$ <b>sudo apt-get update</b>
|
||||
$ <b>sudo apt-get install -y --allow-downgrades libnvinfer-dev libcudnn7-dev=7.0.5.15-1+cuda9.0 libcudnn7=7.0.5.15-1+cuda9.0</b>
|
||||
</pre>
|
||||
|
||||
**IMPORTANT:** For compatibility with the pre-built `tensorflow-gpu`
|
||||
package, please use the Ubuntu **14.04** package of TensorRT as shown above,
|
||||
even when installing onto an Ubuntu 16.04 system.<br/>
|
||||
<br/>
|
||||
To build the TensorFlow-TensorRT integration module from source rather than
|
||||
using pre-built binaries, see the [module documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#using-tensorrt-in-tensorflow).
|
||||
For detailed TensorRT installation instructions, see [NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html).<br/>
|
||||
<br/>
|
||||
To avoid cuDNN version conflicts during later system upgrades, you can hold
|
||||
the cuDNN version at 7.0.5:
|
||||
|
||||
<pre>
|
||||
$ <b> sudo apt-mark hold libcudnn7 libcudnn7-dev</b>
|
||||
</pre>
|
||||
|
||||
To later allow upgrades, you can remove the hold:
|
||||
|
||||
<pre>
|
||||
$ <b> sudo apt-mark unhold libcudnn7 libcudnn7-dev</b>
|
||||
</pre>
|
||||
|
||||
If you have an earlier version of the preceding packages, please upgrade to
|
||||
the specified versions. If upgrading is not possible, then you may still run
|
||||
TensorFlow with GPU support, if you @{$install_sources$install TensorFlow from Sources}.
|
||||
* __TensorFlow with CPU support only__. If your system does not have a
|
||||
NVIDIA® GPU, you must install this version. This version of TensorFlow is
|
||||
usually easier to install, so even if you have an NVIDIA GPU, we recommend
|
||||
installing this version first.
|
||||
* __TensorFlow with GPU support__. TensorFlow programs usually run much faster on
|
||||
a GPU instead of a CPU. If you run performance-critical applications and your
|
||||
system has an NVIDIA® GPU that meets the prerequisites, you should install
|
||||
this version. See [TensorFlow GPU support](#NVIDIARequirements) for details.
|
||||
|
||||
|
||||
## Determine how to install TensorFlow
|
||||
## How to install TensorFlow
|
||||
|
||||
You must pick the mechanism by which you install TensorFlow. The
|
||||
supported choices are as follows:
|
||||
There are a few options to install TensorFlow on your machine:
|
||||
|
||||
* [Virtualenv](#InstallingVirtualenv)
|
||||
* ["native" pip](#InstallingNativePip)
|
||||
* [Docker](#InstallingDocker)
|
||||
* [Anaconda](#InstallingAnaconda)
|
||||
* installing from sources, which is documented in
|
||||
[a separate guide](https://www.tensorflow.org/install/install_sources).
|
||||
* [Use pip in a virtual environment](#InstallingVirtualenv) *(recommended)*
|
||||
* [Use pip in your system environment](#InstallingNativePip)
|
||||
* [Configure a Docker container](#InstallingDocker)
|
||||
* [Use pip in Anaconda](#InstallingAnaconda)
|
||||
* [Install TensorFlow from source](/install/install_sources)
|
||||
|
||||
**We recommend the Virtualenv installation.**
|
||||
[Virtualenv](https://virtualenv.pypa.io/en/stable/)
|
||||
is a virtual Python environment isolated from other Python development,
|
||||
incapable of interfering with or being affected by other Python programs
|
||||
on the same machine. During the Virtualenv installation process,
|
||||
you will install not only TensorFlow but also all the packages that
|
||||
TensorFlow requires. (This is actually pretty easy.)
|
||||
To start working with TensorFlow, you simply need to "activate" the
|
||||
virtual environment. All in all, Virtualenv provides a safe and
|
||||
reliable mechanism for installing and running TensorFlow.
|
||||
<a name="InstallingVirtualenv"></a>
|
||||
### Use `pip` in a virtual environment
|
||||
|
||||
Native pip installs TensorFlow directly on your system without going
|
||||
through any container system. **We recommend the native pip install for
|
||||
system administrators aiming to make TensorFlow available to everyone on a
|
||||
multi-user system.** Since a native pip installation is not walled-off in
|
||||
a separate container, the pip installation might interfere with other
|
||||
Python-based installations on your system. However, if you understand pip
|
||||
and your Python environment, a native pip installation often entails only
|
||||
a single command.
|
||||
Key Point: Using a virtual environment is the recommended install method.
|
||||
|
||||
The [Virtualenv](https://virtualenv.pypa.io/en/stable/) tool creates virtual
|
||||
Python environments that are isolated from other Python development on the same
|
||||
machine. In this scenario, you install TensorFlow and its dependencies within a
|
||||
virtual environment that is available when *activated*. Virtualenv provides a
|
||||
reliable way to install and run TensorFlow while avoiding conflicts with the rest
|
||||
of the system.
|
||||
|
||||
##### 1. Install Python, `pip`, and `virtualenv`.
|
||||
|
||||
On Ubuntu, Python is automatically installed and `pip` is *usually* installed.
|
||||
Confirm the `python` and `pip` versions:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">python -V # or: python3 -V</code>
|
||||
<code class="devsite-terminal">pip -V # or: pip3 -V</code>
|
||||
</pre>
|
||||
|
||||
To install these packages on Ubuntu:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">sudo apt-get install python-pip python-dev python-virtualenv # for Python 2.7</code>
|
||||
<code class="devsite-terminal">sudo apt-get install python3-pip python3-dev python-virtualenv # for Python 3.n</code>
|
||||
</pre>
|
||||
|
||||
We *recommend* using `pip` version 8.1 or higher. If using a release before
|
||||
version 8.1, upgrade `pip`:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">sudo pip install -U pip</code>
|
||||
</pre>
|
||||
|
||||
If not using Ubuntu and [setuptools](https://pypi.org/project/setuptools/) is
|
||||
installed, use `easy_install` to install `pip`:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">easy_install -U pip</code>
|
||||
</pre>
|
||||
|
||||
##### 2. Create a directory for the virtual environment and choose a Python interpreter.
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">mkdir ~/tensorflow # somewhere to work out of</code>
|
||||
<code class="devsite-terminal">cd ~/tensorflow</code>
|
||||
<code># Choose one of the following Python environments for the ./venv directory:</code>
|
||||
<code class="devsite-terminal">virtualenv --system-site-packages <var>venv</var> # Use python default (Python 2.7)</code>
|
||||
<code class="devsite-terminal">virtualenv --system-site-packages -p python3 <var>venv</var> # Use Python 3.n</code>
|
||||
</pre>
|
||||
|
||||
##### 3. Activate the Virtualenv environment.
|
||||
|
||||
Use one of these shell-specific commands to activate the virtual environment:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">source ~/tensorflow/<var>venv</var>/bin/activate # bash, sh, ksh, or zsh</code>
|
||||
<code class="devsite-terminal">source ~/tensorflow/<var>venv</var>/bin/activate.csh # csh or tcsh</code>
|
||||
<code class="devsite-terminal">. ~/tensorflow/<var>venv</var>/bin/activate.fish # fish</code>
|
||||
</pre>
|
||||
|
||||
When the Virtualenv is activated, the shell prompt displays as `(venv) $`.
|
||||
|
||||
##### 4. Upgrade `pip` in the virtual environment.
|
||||
|
||||
Within the active virtual environment, upgrade `pip`:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
(venv)$ pip install -U pip
|
||||
</pre>
|
||||
|
||||
You can install other Python packages within the virtual environment without
|
||||
affecting packages outside the `virtualenv`.
|
||||
|
||||
##### 5. Install TensorFlow in the virtual environment.
|
||||
|
||||
Choose one of the available TensorFlow packages for installation:
|
||||
|
||||
* `tensorflow` —Current release for CPU
|
||||
* `tensorflow-gpu` —Current release with GPU support
|
||||
* `tf-nightly` —Nightly build for CPU
|
||||
* `tf-nightly-gpu` —Nightly build with GPU support
|
||||
|
||||
Within an active Virtualenv environment, use `pip` to install the package:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">pip install -U tensorflow</code>
|
||||
</pre>
|
||||
|
||||
Use `pip list` to show the packages installed in the virtual environment.
|
||||
[Validate the install](#ValidateYourInstallation) and test the version:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
(venv)$ python -c "import tensorflow as tf; print(tf.__version__)"
|
||||
</pre>
|
||||
|
||||
Success: TensorFlow is now installed.
|
||||
|
||||
Use the `deactivate` command to stop the Python virtual environment.
|
||||
|
||||
#### Problems
|
||||
|
||||
If the above steps failed, try installing the TensorFlow binary using the remote
|
||||
URL of the `pip` package:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
(venv)$ pip install --upgrade <var>remote-pkg-URL</var> # Python 2.7
|
||||
(venv)$ pip3 install --upgrade <var>remote-pkg-URL</var> # Python 3.n
|
||||
</pre>
|
||||
|
||||
The <var>remote-pkg-URL</var> depends on the operating system, Python version,
|
||||
and GPU support. See [here](#the_url_of_the_tensorflow_python_package) for the
|
||||
URL naming scheme and location.
|
||||
|
||||
See [Common Installation Problems](#common_installation_problems) if you
|
||||
encounter problems.
|
||||
|
||||
#### Uninstall TensorFlow
|
||||
|
||||
To uninstall TensorFlow, remove the Virtualenv directory you created in step 2:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">deactivate # stop the virtualenv</code>
|
||||
<code class="devsite-terminal">rm -r ~/tensorflow/<var>venv</var></code>
|
||||
</pre>
|
||||
|
||||
|
||||
<a name="InstallingNativePip"></a>
|
||||
### Use `pip` in your system environment
|
||||
|
||||
Use `pip` to install the TensorFlow package directly on your system without
|
||||
using a container or virtual environment for isolation. This method is
|
||||
recommended for system administrators that want a TensorFlow installation that is
|
||||
available to everyone on a multi-user system.
|
||||
|
||||
Since a system install is not isolated, it could interfere with other
|
||||
Python-based installations. But if you understand `pip` and your Python
|
||||
environment, a system `pip` install is straightforward.
|
||||
|
||||
See the
|
||||
[REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py)
|
||||
for a list of packages that TensorFlow installs.
|
||||
|
||||
##### 1. Install Python, `pip`, and `virtualenv`.
|
||||
|
||||
On Ubuntu, Python is automatically installed and `pip` is *usually* installed.
|
||||
Confirm the `python` and `pip` versions:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">python -V # or: python3 -V</code>
|
||||
<code class="devsite-terminal">pip -V # or: pip3 -V</code>
|
||||
</pre>
|
||||
|
||||
To install these packages on Ubuntu:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">sudo apt-get install python-pip python-dev # for Python 2.7</code>
|
||||
<code class="devsite-terminal">sudo apt-get install python3-pip python3-dev # for Python 3.n</code>
|
||||
</pre>
|
||||
|
||||
We *recommend* using `pip` version 8.1 or higher. If using a release before
|
||||
version 8.1, upgrade `pip`:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">sudo pip install -U pip</code>
|
||||
</pre>
|
||||
|
||||
If not using Ubuntu and [setuptools](https://pypi.org/project/setuptools/) is
|
||||
installed, use `easy_install` to install `pip`:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">easy_install -U pip</code>
|
||||
</pre>
|
||||
|
||||
##### 2. Install TensorFlow on system.
|
||||
|
||||
Choose one of the available TensorFlow packages for installation:
|
||||
|
||||
* `tensorflow` —Current release for CPU
|
||||
* `tensorflow-gpu` —Current release with GPU support
|
||||
* `tf-nightly` —Nightly build for CPU
|
||||
* `tf-nightly-gpu` —Nightly build with GPU support
|
||||
|
||||
And use `pip` to install the package for Python 2 or 3:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">sudo pip install -U tensorflow # Python 2.7</code>
|
||||
<code class="devsite-terminal">sudo pip3 install -U tensorflow # Python 3.n</code>
|
||||
</pre>
|
||||
|
||||
Use `pip list` to show the packages installed on the system.
|
||||
[Validate the install](#ValidateYourInstallation) and test the version:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">python -c "import tensorflow as tf; print(tf.__version__)"</code>
|
||||
</pre>
|
||||
|
||||
Success: TensorFlow is now installed.
|
||||
|
||||
#### Problems
|
||||
|
||||
If the above steps failed, try installing the TensorFlow binary using the remote
|
||||
URL of the `pip` package:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">sudo pip install --upgrade <var>remote-pkg-URL</var> # Python 2.7</code>
|
||||
<code class="devsite-terminal">sudo pip3 install --upgrade <var>remote-pkg-URL</var> # Python 3.n</code>
|
||||
</pre>
|
||||
|
||||
The <var>remote-pkg-URL</var> depends on the operating system, Python version,
|
||||
and GPU support. See [here](#the_url_of_the_tensorflow_python_package) for the
|
||||
URL naming scheme and location.
|
||||
|
||||
See [Common Installation Problems](#common_installation_problems) if you
|
||||
encounter problems.
|
||||
|
||||
#### Uninstall TensorFlow
|
||||
|
||||
To uninstall TensorFlow on your system, use one of following commands:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">sudo pip uninstall tensorflow # for Python 2.7</code>
|
||||
<code class="devsite-terminal">sudo pip3 uninstall tensorflow # for Python 3.n</code>
|
||||
</pre>
|
||||
|
||||
<a name="InstallingDocker"></a>
|
||||
### Configure a Docker container
|
||||
|
||||
Docker completely isolates the TensorFlow installation
|
||||
from pre-existing packages on your machine. The Docker container contains
|
||||
@ -142,210 +269,6 @@ large (hundreds of MBs). You might choose the Docker installation if you are
|
||||
incorporating TensorFlow into a larger application architecture that already
|
||||
uses Docker.
|
||||
|
||||
In Anaconda, you may use conda to create a virtual environment.
|
||||
However, within Anaconda, we recommend installing TensorFlow with the
|
||||
`pip install` command, not with the `conda install` command.
|
||||
|
||||
**NOTE:** The conda package is community supported, not officially supported.
|
||||
That is, the TensorFlow team neither tests nor maintains the conda package.
|
||||
Use that package at your own risk.
|
||||
|
||||
|
||||
<a name="InstallingVirtualenv"></a>
|
||||
## Installing with Virtualenv
|
||||
|
||||
Take the following steps to install TensorFlow with Virtualenv:
|
||||
|
||||
1. Install pip and Virtualenv by issuing one of the following commands:
|
||||
|
||||
<pre>$ <b>sudo apt-get install python-pip python-dev python-virtualenv</b> # for Python 2.7
|
||||
$ <b>sudo apt-get install python3-pip python3-dev python-virtualenv</b> # for Python 3.n</pre>
|
||||
|
||||
2. Create a Virtualenv environment by issuing one of the following commands:
|
||||
|
||||
<pre>$ <b>virtualenv --system-site-packages</b> <i>targetDirectory</i> # for Python 2.7
|
||||
$ <b>virtualenv --system-site-packages -p python3</b> <i>targetDirectory</i> # for Python 3.n</pre>
|
||||
|
||||
where <code><em>targetDirectory</em></code> specifies the top of the
|
||||
Virtualenv tree. Our instructions assume that
|
||||
<code><em>targetDirectory</em></code> is `~/tensorflow`, but you may
|
||||
choose any directory.
|
||||
|
||||
3. Activate the Virtualenv environment by issuing one of the following
|
||||
commands:
|
||||
|
||||
<pre>$ <b>source ~/tensorflow/bin/activate</b> # bash, sh, ksh, or zsh
|
||||
$ <b>source ~/tensorflow/bin/activate.csh</b> # csh or tcsh
|
||||
$ <b>. ~/tensorflow/bin/activate.fish</b> # fish</pre>
|
||||
|
||||
The preceding <tt>source</tt> command should change your prompt
|
||||
to the following:
|
||||
|
||||
<pre>(tensorflow)$ </pre>
|
||||
|
||||
4. Ensure pip ≥8.1 is installed:
|
||||
|
||||
<pre>(tensorflow)$ <b>easy_install -U pip</b></pre>
|
||||
|
||||
5. Issue one of the following commands to install TensorFlow in the active
|
||||
Virtualenv environment:
|
||||
|
||||
<pre>(tensorflow)$ <b>pip install --upgrade tensorflow</b> # for Python 2.7
|
||||
(tensorflow)$ <b>pip3 install --upgrade tensorflow</b> # for Python 3.n
|
||||
(tensorflow)$ <b>pip install --upgrade tensorflow-gpu</b> # for Python 2.7 and GPU
|
||||
(tensorflow)$ <b>pip3 install --upgrade tensorflow-gpu</b> # for Python 3.n and GPU</pre>
|
||||
|
||||
If the above command succeeds, skip Step 6. If the preceding
|
||||
command fails, perform Step 6.
|
||||
|
||||
6. (Optional) If Step 5 failed (typically because you invoked a pip version
|
||||
lower than 8.1), install TensorFlow in the active Virtualenv environment
|
||||
by issuing a command of the following format:
|
||||
|
||||
<pre>(tensorflow)$ <b>pip install --upgrade</b> <i>tfBinaryURL</i> # Python 2.7
|
||||
(tensorflow)$ <b>pip3 install --upgrade</b> <i>tfBinaryURL</i> # Python 3.n </pre>
|
||||
|
||||
where <code><em>tfBinaryURL</em></code> identifies the URL of the
|
||||
TensorFlow Python package. The appropriate value of
|
||||
<code><em>tfBinaryURL</em></code>depends on the operating system,
|
||||
Python version, and GPU support. Find the appropriate value for
|
||||
<code><em>tfBinaryURL</em></code> for your system
|
||||
[here](#the_url_of_the_tensorflow_python_package). For example, if you
|
||||
are installing TensorFlow for Linux, Python 3.4, and CPU-only support,
|
||||
issue the following command to install TensorFlow in the active
|
||||
Virtualenv environment:
|
||||
|
||||
<pre>(tensorflow)$ <b>pip3 install --upgrade \
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre>
|
||||
|
||||
If you encounter installation problems, see
|
||||
[Common Installation Problems](#common_installation_problems).
|
||||
|
||||
|
||||
### Next Steps
|
||||
|
||||
After installing TensorFlow,
|
||||
[validate the installation](#ValidateYourInstallation).
|
||||
|
||||
Note that you must activate the Virtualenv environment each time you
|
||||
use TensorFlow. If the Virtualenv environment is not currently active,
|
||||
invoke one of the following commands:
|
||||
|
||||
<pre> $ <b>source ~/tensorflow/bin/activate</b> # bash, sh, ksh, or zsh
|
||||
$ <b>source ~/tensorflow/bin/activate.csh</b> # csh or tcsh</pre>
|
||||
|
||||
When the Virtualenv environment is active, you may run
|
||||
TensorFlow programs from this shell. Your prompt will become
|
||||
the following to indicate that your tensorflow environment is active:
|
||||
|
||||
<pre>(tensorflow)$ </pre>
|
||||
|
||||
When you are done using TensorFlow, you may deactivate the
|
||||
environment by invoking the `deactivate` function as follows:
|
||||
|
||||
<pre>(tensorflow)$ <b>deactivate</b> </pre>
|
||||
|
||||
The prompt will revert back to your default prompt (as defined by the
|
||||
`PS1` environment variable).
|
||||
|
||||
|
||||
### Uninstalling TensorFlow
|
||||
|
||||
To uninstall TensorFlow, simply remove the tree you created.
|
||||
For example:
|
||||
|
||||
<pre>$ <b>rm -r</b> <i>targetDirectory</i> </pre>
|
||||
|
||||
|
||||
<a name="InstallingNativePip"></a>
|
||||
## Installing with native pip
|
||||
|
||||
You may install TensorFlow through pip, choosing between a simple
|
||||
installation procedure or a more complex one.
|
||||
|
||||
**Note:** The
|
||||
[REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py)
|
||||
lists the TensorFlow packages that pip will install or upgrade.
|
||||
|
||||
|
||||
### Prerequisite: Python and Pip
|
||||
|
||||
Python is automatically installed on Ubuntu. Take a moment to confirm
|
||||
(by issuing a `python -V` command) that one of the following Python
|
||||
versions is already installed on your system:
|
||||
|
||||
* Python 2.7
|
||||
* Python 3.4+
|
||||
|
||||
The pip or pip3 package manager is *usually* installed on Ubuntu. Take a
|
||||
moment to confirm (by issuing a `pip -V` or `pip3 -V` command)
|
||||
that pip or pip3 is installed. We strongly recommend version 8.1 or higher
|
||||
of pip or pip3. If Version 8.1 or later is not installed, issue the
|
||||
following command, which will either install or upgrade to the latest
|
||||
pip version:
|
||||
|
||||
<pre>$ <b>sudo apt-get install python-pip python-dev</b> # for Python 2.7
|
||||
$ <b>sudo apt-get install python3-pip python3-dev</b> # for Python 3.n
|
||||
</pre>
|
||||
|
||||
|
||||
### Install TensorFlow
|
||||
|
||||
Assuming the prerequisite software is installed on your Linux host,
|
||||
take the following steps:
|
||||
|
||||
1. Install TensorFlow by invoking **one** of the following commands:
|
||||
|
||||
<pre>$ <b>pip install tensorflow</b> # Python 2.7; CPU support (no GPU support)
|
||||
$ <b>pip3 install tensorflow</b> # Python 3.n; CPU support (no GPU support)
|
||||
$ <b>pip install tensorflow-gpu</b> # Python 2.7; GPU support
|
||||
$ <b>pip3 install tensorflow-gpu</b> # Python 3.n; GPU support </pre>
|
||||
|
||||
If the preceding command runs to completion, you should now
|
||||
[validate your installation](#ValidateYourInstallation).
|
||||
|
||||
2. (Optional.) If Step 1 failed, install the latest version of TensorFlow
|
||||
by issuing a command of the following format:
|
||||
|
||||
<pre>$ <b>sudo pip install --upgrade</b> <i>tfBinaryURL</i> # Python 2.7
|
||||
$ <b>sudo pip3 install --upgrade</b> <i>tfBinaryURL</i> # Python 3.n </pre>
|
||||
|
||||
where <code><em>tfBinaryURL</em></code> identifies the URL of the
|
||||
TensorFlow Python package. The appropriate value of
|
||||
<code><em>tfBinaryURL</em></code> depends on the operating system,
|
||||
Python version, and GPU support. Find the appropriate value for
|
||||
<code><em>tfBinaryURL</em></code>
|
||||
[here](#the_url_of_the_tensorflow_python_package). For example, to
|
||||
install TensorFlow for Linux, Python 3.4, and CPU-only support, issue
|
||||
the following command:
|
||||
|
||||
<pre>
|
||||
$ <b>sudo pip3 install --upgrade \
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b>
|
||||
</pre>
|
||||
|
||||
If this step fails, see
|
||||
[Common Installation Problems](#common_installation_problems).
|
||||
|
||||
|
||||
### Next Steps
|
||||
|
||||
After installing TensorFlow, [validate your installation](#ValidateYourInstallation).
|
||||
|
||||
|
||||
### Uninstalling TensorFlow
|
||||
|
||||
To uninstall TensorFlow, issue one of following commands:
|
||||
|
||||
<pre>
|
||||
$ <b>sudo pip uninstall tensorflow</b> # for Python 2.7
|
||||
$ <b>sudo pip3 uninstall tensorflow</b> # for Python 3.n
|
||||
</pre>
|
||||
|
||||
|
||||
<a name="InstallingDocker"></a>
|
||||
## Installing with Docker
|
||||
|
||||
Take the following steps to install TensorFlow through Docker:
|
||||
|
||||
1. Install Docker on your machine as described in the
|
||||
@ -364,7 +287,7 @@ Take the following steps to install TensorFlow through Docker:
|
||||
The remainder of this section explains how to launch a Docker container.
|
||||
|
||||
|
||||
### CPU-only
|
||||
#### CPU-only
|
||||
|
||||
To launch a Docker container with CPU-only support (that is, without
|
||||
GPU support), enter a command of the following format:
|
||||
@ -414,7 +337,7 @@ $ <b>docker run -it -p 8888:8888 tensorflow/tensorflow</b>
|
||||
Docker will download the TensorFlow binary image the first time you launch it.
|
||||
|
||||
|
||||
### GPU support
|
||||
#### GPU support
|
||||
|
||||
Prior to installing TensorFlow with GPU support, ensure that your system meets all
|
||||
[NVIDIA software requirements](#NVIDIARequirements). To launch a Docker container
|
||||
@ -470,14 +393,22 @@ For more details see the
|
||||
[TensorFlow docker readme](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/docker).
|
||||
|
||||
|
||||
### Next Steps
|
||||
#### Next Steps
|
||||
|
||||
You should now
|
||||
[validate your installation](#ValidateYourInstallation).
|
||||
|
||||
|
||||
<a name="InstallingAnaconda"></a>
|
||||
## Installing with Anaconda
|
||||
### Use `pip` in Anaconda
|
||||
|
||||
Anaconda provides the `conda` utility to create a virtual environment. However,
|
||||
within Anaconda, we recommend installing TensorFlow using the `pip install`
|
||||
command and *not* with the `conda install` command.
|
||||
|
||||
Caution: `conda` is a community supported package this is not officially
|
||||
maintained by the TensorFlow team. Use this package at your own risk since it is
|
||||
not tested on new TensorFlow releases.
|
||||
|
||||
Take the following steps to install TensorFlow in an Anaconda environment:
|
||||
|
||||
@ -507,7 +438,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
|
||||
|
||||
<pre>
|
||||
(tensorflow)$ <b>pip install --ignore-installed --upgrade \
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre>
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
|
||||
|
||||
<a name="ValidateYourInstallation"></a>
|
||||
## Validate your installation
|
||||
@ -563,11 +494,89 @@ installation problems](#common_installation_problems).
|
||||
If you are new to machine learning, we recommend the following:
|
||||
|
||||
* [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course)
|
||||
* @{$get_started/get_started_for_beginners$Getting Started for ML Beginners}
|
||||
* @{$get_started/eager}
|
||||
|
||||
If you are experienced with machine learning but new to TensorFlow, see
|
||||
@{$get_started/eager}.
|
||||
|
||||
<a name="NVIDIARequirements"></a>
|
||||
## TensorFlow GPU support
|
||||
|
||||
To install TensorFlow with GPU support, configure the following NVIDIA® software
|
||||
on your system:
|
||||
|
||||
* [CUDA Toolkit 9.0](http://nvidia.com/cuda). For details, see
|
||||
[NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/).
|
||||
Append the relevant CUDA pathnames to the `LD_LIBRARY_PATH` environmental
|
||||
variable as described in the NVIDIA documentation.
|
||||
* [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see
|
||||
[NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/).
|
||||
Create the `CUDA_HOME` environment variable as described in the NVIDIA
|
||||
documentation.
|
||||
* A GPU card with CUDA Compute Capability 3.0 or higher for building TensorFlow
|
||||
from source. To use the TensorFlow binaries, version 3.5 or higher is required.
|
||||
See the [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a
|
||||
list of supported GPU cards.
|
||||
* [GPU drivers](http://nvidia.com/driver) that support your version of the CUDA
|
||||
Toolkit.
|
||||
* The `libcupti-dev` library is the NVIDIA CUDA Profile Tools Interface. This
|
||||
library provides advanced profiling support. To install this library,
|
||||
use the following command for CUDA Toolkit >= 8.0:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">sudo apt-get install cuda-command-line-tools</code>
|
||||
</pre>
|
||||
|
||||
Add this path to the `LD_LIBRARY_PATH` environmental variable:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64</code>
|
||||
</pre>
|
||||
|
||||
For CUDA Toolkit <= 7.5 use:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">sudo apt-get install libcupti-dev</code>
|
||||
</pre>
|
||||
|
||||
* *OPTIONAL*: For optimized performance during inference, install
|
||||
*NVIDIA TensorRT 3.0*. To install the minimal amount of TensorRT
|
||||
runtime components required to use with the pre-built `tensorflow-gpu` package:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</code>
|
||||
<code class="devsite-terminal">sudo dpkg -i nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</code>
|
||||
<code class="devsite-terminal">sudo apt-get update</code>
|
||||
<code class="devsite-terminal">sudo apt-get install -y --allow-downgrades libnvinfer-dev libcudnn7-dev=7.0.5.15-1+cuda9.0 libcudnn7=7.0.5.15-1+cuda9.0</code>
|
||||
</pre>
|
||||
|
||||
Note: For compatibility with the pre-built `tensorflow-gpu` package, use the
|
||||
Ubuntu *14.04* package of TensorRT (shown above). Use this even when installing
|
||||
on an Ubuntu 16.04 system.
|
||||
|
||||
To build the TensorFlow-TensorRT integration module from source instead of using
|
||||
the pre-built binaries, see the
|
||||
[module documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#using-tensorrt-in-tensorflow).
|
||||
For detailed TensorRT installation instructions, see
|
||||
[NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html).
|
||||
|
||||
To avoid cuDNN version conflicts during later system upgrades, hold the cuDNN
|
||||
version at 7.0.5:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">sudo apt-mark hold libcudnn7 libcudnn7-dev</code>
|
||||
</pre>
|
||||
|
||||
To allow upgrades, remove the this hold:
|
||||
|
||||
<pre class="prettyprint lang-bsh">
|
||||
<code class="devsite-terminal">sudo apt-mark unhold libcudnn7 libcudnn7-dev</code>
|
||||
</pre>
|
||||
|
||||
If you have an earlier version of the preceding packages, upgrade to the
|
||||
specified versions. If upgrading is not possible, you can still run TensorFlow
|
||||
with GPU support by @{$install_sources}.
|
||||
|
||||
|
||||
## Common installation problems
|
||||
|
||||
@ -581,7 +590,7 @@ ask a new question about it on Stack Overflow and specify
|
||||
the `tensorflow` tag.
|
||||
|
||||
<table>
|
||||
<tr> <th>Stack Overflow Link</th> <th>Error Message</th> </tr>
|
||||
<tr> <th>Link to GitHub or Stack Overflow</th> <th>Error Message</th> </tr>
|
||||
|
||||
<tr>
|
||||
<td><a href="https://stackoverflow.com/q/36159194">36159194</a></td>
|
||||
@ -681,14 +690,14 @@ This section documents the relevant values for Linux installations.
|
||||
CPU only:
|
||||
|
||||
<pre>
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp27-none-linux_x86_64.whl
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp27-none-linux_x86_64.whl
|
||||
</pre>
|
||||
|
||||
|
||||
GPU support:
|
||||
|
||||
<pre>
|
||||
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp27-none-linux_x86_64.whl
|
||||
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp27-none-linux_x86_64.whl
|
||||
</pre>
|
||||
|
||||
Note that GPU support requires the NVIDIA hardware and software described in
|
||||
@ -700,14 +709,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
|
||||
CPU only:
|
||||
|
||||
<pre>
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
|
||||
</pre>
|
||||
|
||||
|
||||
GPU support:
|
||||
|
||||
<pre>
|
||||
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
|
||||
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
|
||||
</pre>
|
||||
|
||||
Note that GPU support requires the NVIDIA hardware and software described in
|
||||
@ -719,14 +728,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
|
||||
CPU only:
|
||||
|
||||
<pre>
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp35-cp35m-linux_x86_64.whl
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
|
||||
</pre>
|
||||
|
||||
|
||||
GPU support:
|
||||
|
||||
<pre>
|
||||
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp35-cp35m-linux_x86_64.whl
|
||||
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
|
||||
</pre>
|
||||
|
||||
|
||||
@ -738,14 +747,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
|
||||
CPU only:
|
||||
|
||||
<pre>
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp36-cp36m-linux_x86_64.whl
|
||||
https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
|
||||
</pre>
|
||||
|
||||
|
||||
GPU support:
|
||||
|
||||
<pre>
|
||||
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp36-cp36m-linux_x86_64.whl
|
||||
https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
|
||||
</pre>
|
||||
|
||||
|
||||
|
@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv:
|
||||
TensorFlow in the active Virtualenv is as follows:
|
||||
|
||||
<pre> $ <b>pip3 install --upgrade \
|
||||
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl</b></pre>
|
||||
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl</b></pre>
|
||||
|
||||
If you encounter installation problems, see
|
||||
[Common Installation Problems](#common-installation-problems).
|
||||
@ -242,7 +242,7 @@ take the following steps:
|
||||
issue the following command:
|
||||
|
||||
<pre> $ <b>sudo pip3 install --upgrade \
|
||||
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl</b> </pre>
|
||||
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl</b> </pre>
|
||||
|
||||
If the preceding command fails, see
|
||||
[installation problems](#common-installation-problems).
|
||||
@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
|
||||
TensorFlow for Python 2.7:
|
||||
|
||||
<pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \
|
||||
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl</b></pre>
|
||||
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl</b></pre>
|
||||
|
||||
|
||||
<a name="ValidateYourInstallation"></a>
|
||||
@ -524,7 +524,7 @@ The value you specify depends on your Python version.
|
||||
|
||||
|
||||
<pre>
|
||||
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl
|
||||
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl
|
||||
</pre>
|
||||
|
||||
|
||||
@ -532,5 +532,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-a
|
||||
|
||||
|
||||
<pre>
|
||||
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl
|
||||
https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl
|
||||
</pre>
|
||||
|
@ -354,10 +354,10 @@ Invoke `pip install` to install that pip package.
|
||||
The filename of the `.whl` file depends on your platform.
|
||||
For example, the following command will install the pip package
|
||||
|
||||
for TensorFlow 1.8.0rc0 on Linux:
|
||||
for TensorFlow 1.8.0rc1 on Linux:
|
||||
|
||||
<pre>
|
||||
$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc0-py2-none-any.whl</b>
|
||||
$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-py2-none-any.whl</b>
|
||||
</pre>
|
||||
|
||||
## Validate your installation
|
||||
|
@ -86,7 +86,7 @@ code. `tf_library` utilizes `tfcompile` to compile the TensorFlow graph into
|
||||
executable code.
|
||||
|
||||
```build
|
||||
load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
|
||||
load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
|
||||
|
||||
# Use the tf_library macro to compile your graph into executable code.
|
||||
tf_library(
|
||||
@ -258,8 +258,8 @@ file.
|
||||
|
||||
```build
|
||||
# Example of linking your binary
|
||||
# Also see //third_party/tensorflow/compiler/aot/tests/BUILD
|
||||
load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
|
||||
# Also see //tensorflow/compiler/aot/tests/BUILD
|
||||
load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
|
||||
|
||||
# The same tf_library call from step 2 above.
|
||||
tf_library(
|
||||
|
0
tensorflow/examples/tutorials/input_fn/__init__.py
Normal file
0
tensorflow/examples/tutorials/input_fn/__init__.py
Normal file
0
tensorflow/examples/tutorials/layers/__init__.py
Normal file
0
tensorflow/examples/tutorials/layers/__init__.py
Normal file
0
tensorflow/examples/tutorials/monitors/__init__.py
Normal file
0
tensorflow/examples/tutorials/monitors/__init__.py
Normal file
@ -32,9 +32,9 @@ IRIS_TEST = os.path.join(os.path.dirname(__file__), "iris_test.csv")
|
||||
def main(unused_argv):
|
||||
# Load datasets.
|
||||
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
|
||||
filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float)
|
||||
filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32)
|
||||
test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
|
||||
filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float)
|
||||
filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32)
|
||||
|
||||
validation_metrics = {
|
||||
"accuracy":
|
||||
@ -83,7 +83,7 @@ def main(unused_argv):
|
||||
|
||||
# Classify two new flower samples.
|
||||
new_samples = np.array(
|
||||
[[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float)
|
||||
[[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
|
||||
y = list(classifier.predict(new_samples))
|
||||
print("Predictions: {}".format(str(y)))
|
||||
|
||||
|
@ -5,7 +5,7 @@ Construct and execute TensorFlow graphs in Go.
|
||||
[](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go)
|
||||
|
||||
> *WARNING*: The API defined in this package is not stable and can change
|
||||
> without notice. The same goes for the awkward package path
|
||||
> without notice. The same goes for the package path:
|
||||
> (`github.com/tensorflow/tensorflow/tensorflow/go`).
|
||||
|
||||
## Quickstart
|
||||
|
@ -21386,7 +21386,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
|
||||
// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
|
||||
//
|
||||
// The `bad_color` argument is the color to use in the generated images for
|
||||
// non-finite input values. It is a `unit8` 1-D tensor of length `channels`.
|
||||
// non-finite input values. It is a `uint8` 1-D tensor of length `channels`.
|
||||
// Each element must be in the range `[0, 255]` (It represents the value of a
|
||||
// pixel in the output image). Non-finite values in the input tensor are
|
||||
// replaced by this tensor in the output image. The default value is the color
|
||||
|
@ -644,11 +644,9 @@ class Estimator(object):
|
||||
sharded=True)
|
||||
saver_for_restore.restore(session, checkpoint_path)
|
||||
|
||||
# pylint: disable=protected-access
|
||||
local_init_op = (
|
||||
estimator_spec.scaffold.local_init_op or
|
||||
monitored_session.Scaffold._default_local_init_op())
|
||||
# pylint: enable=protected-access
|
||||
monitored_session.Scaffold.default_local_init_op())
|
||||
|
||||
# Perform the export
|
||||
builder = saved_model_builder.SavedModelBuilder(temp_export_dir)
|
||||
|
@ -29,12 +29,14 @@ from tensorflow.python.estimator import run_config as run_config_lib
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.framework import random_seed
|
||||
from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
|
||||
from tensorflow.python.framework import tensor_util
|
||||
from tensorflow.python.keras._impl.keras import backend as K
|
||||
from tensorflow.python.keras._impl.keras import models
|
||||
from tensorflow.python.keras._impl.keras import optimizers
|
||||
from tensorflow.python.keras._impl.keras.engine.base_layer import Layer
|
||||
from tensorflow.python.keras._impl.keras.engine.network import Network
|
||||
from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope
|
||||
from tensorflow.python.ops import check_ops
|
||||
from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.ops import metrics as metrics_module
|
||||
from tensorflow.python.ops import variables as variables_module
|
||||
@ -55,6 +57,17 @@ def _cast_tensor_to_floatx(x):
|
||||
return math_ops.cast(x, K.floatx())
|
||||
|
||||
|
||||
def _convert_tensor(x):
|
||||
"""Create or cast tensor if needed."""
|
||||
if not tensor_util.is_tensor(x):
|
||||
# x is a numpy array
|
||||
x = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(x)
|
||||
if check_ops.is_numeric_tensor(x):
|
||||
# is_numeric_tensor returns False if provided with a numpy array
|
||||
x = _cast_tensor_to_floatx(x)
|
||||
return x
|
||||
|
||||
|
||||
def _any_variable_initalized():
|
||||
"""Check if any variable has been initialized in the Keras model.
|
||||
|
||||
@ -86,7 +99,7 @@ def _create_ordered_io(keras_model, estimator_io, is_input=True):
|
||||
if isinstance(estimator_io, (list, tuple)):
|
||||
# Case currently not supported by most built-in input_fn,
|
||||
# but it's good to have for sanity
|
||||
return [_cast_tensor_to_floatx(x) for x in estimator_io]
|
||||
return [_convert_tensor(x) for x in estimator_io]
|
||||
elif isinstance(estimator_io, dict):
|
||||
if is_input:
|
||||
if keras_model._is_graph_network:
|
||||
@ -108,12 +121,12 @@ def _create_ordered_io(keras_model, estimator_io, is_input=True):
|
||||
'It needs to match one '
|
||||
'of the following: %s' % ('input' if is_input else 'output', key,
|
||||
', '.join(keras_io_names)))
|
||||
tensors = [_cast_tensor_to_floatx(estimator_io[io_name])
|
||||
tensors = [_convert_tensor(estimator_io[io_name])
|
||||
for io_name in keras_io_names]
|
||||
return tensors
|
||||
else:
|
||||
# Plain array.
|
||||
return _cast_tensor_to_floatx(estimator_io)
|
||||
return _convert_tensor(estimator_io)
|
||||
|
||||
|
||||
def _in_place_subclassed_model_reset(model):
|
||||
@ -274,8 +287,7 @@ def _clone_and_build_model(mode,
|
||||
is_input=False)
|
||||
else:
|
||||
target_tensors = [
|
||||
_cast_tensor_to_floatx(
|
||||
sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(labels))
|
||||
_convert_tensor(labels)
|
||||
]
|
||||
|
||||
if keras_model._is_graph_network:
|
||||
|
@ -30,6 +30,7 @@ from tensorflow.python.estimator.inputs import numpy_io
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.framework import test_util
|
||||
from tensorflow.python.keras._impl import keras
|
||||
from tensorflow.python.keras._impl.keras import backend as K
|
||||
from tensorflow.python.keras._impl.keras import testing_utils
|
||||
from tensorflow.python.keras._impl.keras.applications import mobilenet
|
||||
from tensorflow.python.keras._impl.keras.optimizers import SGD
|
||||
@ -142,16 +143,20 @@ def randomize_io_type(array, name):
|
||||
|
||||
|
||||
def multi_inputs_multi_outputs_model():
|
||||
# test multi-input layer
|
||||
a = keras.layers.Input(shape=(16,), name='input_a')
|
||||
b = keras.layers.Input(shape=(16,), name='input_b')
|
||||
m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m')
|
||||
dense = keras.layers.Dense(8, name='dense_1')
|
||||
|
||||
a_2 = dense(a)
|
||||
# Apply a mask
|
||||
s_2 = keras.layers.Lambda(lambda k:
|
||||
K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2])
|
||||
b_2 = dense(b)
|
||||
merged = keras.layers.concatenate([a_2, b_2], name='merge')
|
||||
merged = keras.layers.concatenate([s_2, b_2], name='merge')
|
||||
c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged)
|
||||
d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged)
|
||||
model = keras.models.Model(inputs=[a, b], outputs=[c, d])
|
||||
model = keras.models.Model(inputs=[a, b, m], outputs=[c, d])
|
||||
model.compile(
|
||||
loss='categorical_crossentropy',
|
||||
optimizer='rmsprop',
|
||||
@ -352,18 +357,27 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
|
||||
test_samples=50,
|
||||
input_shape=(16,),
|
||||
num_classes=2)
|
||||
np.random.seed(_RANDOM_SEED)
|
||||
(input_m_train, _), (input_m_test, _) = testing_utils.get_test_data(
|
||||
train_samples=_TRAIN_SIZE,
|
||||
test_samples=50,
|
||||
input_shape=(8,),
|
||||
num_classes=2)
|
||||
|
||||
c_train = keras.utils.to_categorical(c_train)
|
||||
c_test = keras.utils.to_categorical(c_test)
|
||||
d_train = keras.utils.to_categorical(d_train)
|
||||
d_test = keras.utils.to_categorical(d_test)
|
||||
|
||||
def train_input_fn():
|
||||
input_dict = {'input_a': a_train, 'input_b': b_train}
|
||||
input_dict = {'input_a': a_train, 'input_b': b_train,
|
||||
'input_m': input_m_train > 0}
|
||||
output_dict = {'dense_2': c_train, 'dense_3': d_train}
|
||||
return input_dict, output_dict
|
||||
|
||||
def eval_input_fn():
|
||||
input_dict = {'input_a': a_test, 'input_b': b_test}
|
||||
input_dict = {'input_a': a_test, 'input_b': b_test,
|
||||
'input_m': input_m_test > 0}
|
||||
output_dict = {'dense_2': c_test, 'dense_3': d_test}
|
||||
return input_dict, output_dict
|
||||
|
||||
|
@ -35,8 +35,7 @@ class DivisionTestCase(test.TestCase):
|
||||
"""Test all the different ways to divide."""
|
||||
values = [1, 2, 7, 11]
|
||||
functions = (lambda x: x), constant_op.constant
|
||||
# TODO(irving): Test int8, int16 once we support casts for those.
|
||||
dtypes = np.int32, np.int64, np.float32, np.float64
|
||||
dtypes = np.int8, np.int16, np.int32, np.int64, np.float32, np.float64
|
||||
|
||||
tensors = []
|
||||
checks = []
|
||||
|
@ -160,7 +160,7 @@ class ReduceJoinTest(UnicodeTestCase):
|
||||
separator=separator)
|
||||
if not reduction_indices:
|
||||
truth = constant_op.constant(truth)
|
||||
truth_squeezed = array_ops.squeeze(truth, squeeze_dims=reduction_indices)
|
||||
truth_squeezed = array_ops.squeeze(truth, axis=reduction_indices)
|
||||
output_array = output.eval()
|
||||
output_keep_dims_array = output_keep_dims.eval()
|
||||
truth_array = truth.eval()
|
||||
|
@ -889,9 +889,9 @@ class AnyReductionTest(test.TestCase):
|
||||
|
||||
class CountNonzeroReductionTest(test.TestCase):
|
||||
|
||||
def _compare(self, x, reduction_axes, keepdims, use_gpu=False,
|
||||
def _compare(self, x, reduction_axes, keepdims, use_gpu=False, zero=0,
|
||||
feed_dict=None):
|
||||
np_ans = (x != 0).astype(np.int32)
|
||||
np_ans = (x != zero).astype(np.int32)
|
||||
if reduction_axes is None:
|
||||
np_ans = np.sum(np_ans, keepdims=keepdims)
|
||||
else:
|
||||
@ -958,6 +958,37 @@ class CountNonzeroReductionTest(test.TestCase):
|
||||
y = math_ops.count_nonzero(x, [0])
|
||||
self.assertAllEqual(y.eval(), np.zeros(9938))
|
||||
|
||||
def testStringReduce(self):
|
||||
# Test case for GitHub issue 18712
|
||||
with self.test_session() as sess:
|
||||
v = math_ops.count_nonzero(constant_op.constant(["test"]))
|
||||
self.assertAllClose(sess.run(v), 1)
|
||||
|
||||
def testStringReduce1D(self):
|
||||
# Create a 1D array of strings
|
||||
x = np.asarray(["", "", "a", "", "", "b"])
|
||||
self._compare(x, None, keepdims=False, zero=np.str(""))
|
||||
self._compare(x, [], keepdims=False, zero=np.str(""))
|
||||
self._compare(x, [0], keepdims=False, zero=np.str(""))
|
||||
self._compare(x, None, keepdims=True, zero=np.str(""))
|
||||
self._compare(x, [], keepdims=True, zero=np.str(""))
|
||||
self._compare(x, [0], keepdims=True, zero=np.str(""))
|
||||
|
||||
def testStringReduce2D(self):
|
||||
# Create a 2D array of strings
|
||||
x = np.asarray([["", "", "a", "", "", "b"],
|
||||
["", "c", "", "d", "", ""],
|
||||
["e", "", "f", "", "", ""]])
|
||||
self._compare(x, None, keepdims=False, zero=np.str(""))
|
||||
self._compare(x, [], keepdims=False, zero=np.str(""))
|
||||
self._compare(x, [0], keepdims=False, zero=np.str(""))
|
||||
self._compare(x, [1], keepdims=False, zero=np.str(""))
|
||||
self._compare(x, [0, 1], keepdims=False, zero=np.str(""))
|
||||
self._compare(x, None, keepdims=True, zero=np.str(""))
|
||||
self._compare(x, [], keepdims=True, zero=np.str(""))
|
||||
self._compare(x, [0], keepdims=True, zero=np.str(""))
|
||||
self._compare(x, [0, 1], keepdims=True, zero=np.str(""))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test.main()
|
||||
|
@ -364,6 +364,42 @@ class ScatterNdTest(test.TestCase):
|
||||
del input_ # input_ is not used in scatter_nd
|
||||
return array_ops.scatter_nd(indices, updates, shape)
|
||||
|
||||
def testString(self):
|
||||
indices = constant_op.constant([[4], [3], [1], [7]],
|
||||
dtype=dtypes.int32)
|
||||
updates = constant_op.constant(["four", "three", "one", "seven"],
|
||||
dtype=dtypes.string)
|
||||
expected = np.array([b"", b"one", b"", b"three", b"four",
|
||||
b"", b"", b"seven"])
|
||||
scatter = self.scatter_nd(indices, updates, shape=(8,))
|
||||
with self.test_session() as sess:
|
||||
result = sess.run(scatter)
|
||||
self.assertAllEqual(expected, result)
|
||||
|
||||
# Same indice is updated twice by same value.
|
||||
indices = constant_op.constant([[4], [3], [3], [7]],
|
||||
dtype=dtypes.int32)
|
||||
updates = constant_op.constant(["a", "b", "b", "c"],
|
||||
dtype=dtypes.string)
|
||||
expected = np.array([b"", b"", b"", b"bb", b"a", b"", b"", b"c"])
|
||||
scatter = self.scatter_nd(indices, updates, shape=(8,))
|
||||
with self.test_session() as sess:
|
||||
result = sess.run(scatter)
|
||||
self.assertAllEqual(expected, result)
|
||||
|
||||
# Same indice is updated twice by different value.
|
||||
indices = constant_op.constant([[4], [3], [3], [7]],
|
||||
dtype=dtypes.int32)
|
||||
updates = constant_op.constant(["a", "b", "c", "d"],
|
||||
dtype=dtypes.string)
|
||||
expected = [np.array([b"", b"", b"", b"bc", b"a", b"", b"", b"d"]),
|
||||
np.array([b"", b"", b"", b"cb", b"a", b"", b"", b"d"])]
|
||||
scatter = self.scatter_nd(indices, updates, shape=(8,))
|
||||
with self.test_session() as sess:
|
||||
result = sess.run(scatter)
|
||||
self.assertTrue(np.array_equal(result, expected[0]) or
|
||||
np.array_equal(result, expected[1]))
|
||||
|
||||
def testRank3ValidShape(self):
|
||||
indices = array_ops.zeros([2, 2, 2], dtypes.int32)
|
||||
updates = array_ops.zeros([2, 2, 2], dtypes.int32)
|
||||
@ -584,6 +620,10 @@ class ScatterNdNonAliasingAddTest(ScatterNdTest):
|
||||
shape, dtype=updates.dtype))
|
||||
return array_ops.scatter_nd_non_aliasing_add(input_, indices, updates)
|
||||
|
||||
def testString(self):
|
||||
# Not supported yet.
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test.main()
|
||||
|
@ -196,7 +196,7 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index):
|
||||
array_ops.where(
|
||||
math_ops.logical_and(grad.indices >= start,
|
||||
grad.indices < end)),
|
||||
squeeze_dims=[1])
|
||||
axis=[1])
|
||||
new_indices = array_ops.gather(grad.indices, indices_to_select) - start
|
||||
new_values = array_ops.gather(grad.values, indices_to_select)
|
||||
out_grads.append(ops.IndexedSlices(new_values, new_indices, size))
|
||||
|
@ -994,9 +994,7 @@ def unstack(value, num=None, axis=0, name="unstack"):
|
||||
`value[:, i, :, :]` and each tensor in `output` will have shape `(A, C, D)`.
|
||||
Etc.
|
||||
|
||||
This is the opposite of stack. The numpy equivalent is
|
||||
|
||||
tf.unstack(x, n) = np.unstack(x)
|
||||
This is the opposite of stack.
|
||||
|
||||
Args:
|
||||
value: A rank `R > 0` `Tensor` to be unstacked.
|
||||
@ -1720,8 +1718,10 @@ def placeholder(dtype, shape=None, name=None):
|
||||
print(sess.run(y, feed_dict={x: rand_array})) # Will succeed.
|
||||
```
|
||||
|
||||
@compatibility{eager} Placeholders are not compatible with eager execution.
|
||||
|
||||
@compatibility(eager)
|
||||
Placeholders are not compatible with eager execution.
|
||||
@end_compatibility
|
||||
|
||||
Args:
|
||||
dtype: The type of elements in the tensor to be fed.
|
||||
shape: The shape of the tensor to be fed (optional). If the shape is not
|
||||
|
@ -652,7 +652,7 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height,
|
||||
padded.set_shape(padded_shape)
|
||||
|
||||
if not is_batch:
|
||||
padded = array_ops.squeeze(padded, squeeze_dims=[0])
|
||||
padded = array_ops.squeeze(padded, axis=[0])
|
||||
|
||||
return padded
|
||||
|
||||
@ -732,7 +732,7 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
|
||||
cropped.set_shape(cropped_shape)
|
||||
|
||||
if not is_batch:
|
||||
cropped = array_ops.squeeze(cropped, squeeze_dims=[0])
|
||||
cropped = array_ops.squeeze(cropped, axis=[0])
|
||||
|
||||
return cropped
|
||||
|
||||
@ -849,7 +849,7 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
|
||||
resized = control_flow_ops.with_dependencies(assert_ops, resized)
|
||||
|
||||
if not is_batch:
|
||||
resized = array_ops.squeeze(resized, squeeze_dims=[0])
|
||||
resized = array_ops.squeeze(resized, axis=[0])
|
||||
|
||||
return resized
|
||||
|
||||
@ -942,7 +942,7 @@ def resize_images(images,
|
||||
for x in [new_width_const, width, new_height_const, height]) and (
|
||||
width == new_width_const and height == new_height_const):
|
||||
if not is_batch:
|
||||
images = array_ops.squeeze(images, squeeze_dims=[0])
|
||||
images = array_ops.squeeze(images, axis=[0])
|
||||
return images
|
||||
|
||||
if method == ResizeMethod.BILINEAR:
|
||||
@ -965,7 +965,7 @@ def resize_images(images,
|
||||
images.set_shape([None, new_height_const, new_width_const, None])
|
||||
|
||||
if not is_batch:
|
||||
images = array_ops.squeeze(images, squeeze_dims=[0])
|
||||
images = array_ops.squeeze(images, axis=[0])
|
||||
return images
|
||||
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user