Merge changes from github.

Change: 128401884
This commit is contained in:
Martin Wicke 2016-07-25 13:48:16 -08:00 committed by TensorFlower Gardener
parent ed281973d6
commit 21716d8f6e
105 changed files with 2576 additions and 1144 deletions

View File

@ -18,7 +18,10 @@ If installed from binary pip package, provide:
1. Which pip package you installed. 1. Which pip package you installed.
2. The output from `python -c "import tensorflow; print(tensorflow.__version__)"`. 2. The output from `python -c "import tensorflow; print(tensorflow.__version__)"`.
If installed from sources, provide the commit hash: If installed from source, provide
1. The commit hash (`git rev-parse HEAD`)
2. The output of `bazel version`
### Steps to reproduce ### Steps to reproduce
1. 1.

View File

@ -1,9 +1,8 @@
package(default_visibility = ["//visibility:public"]) package(default_visibility = ["//visibility:public"])
archive_dir = "eigen-eigen-b4fa9622b809"
cc_library( cc_library(
name = "eigen", name = "eigen",
hdrs = glob([archive_dir+"/**/*.h", archive_dir+"/unsupported/Eigen/*", archive_dir+"/unsupported/Eigen/CXX11/*", archive_dir+"/Eigen/*"]), hdrs = glob(["**/*.h", "unsupported/Eigen/*", "unsupported/Eigen/CXX11/*", "Eigen/*"]),
includes = [ archive_dir ], includes = [ '.' ],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
) )

23
gif.BUILD Normal file
View File

@ -0,0 +1,23 @@
SOURCES = [
"dgif_lib.c",
"egif_lib.c",
"gif_font.c",
"gif_hash.c",
"gifalloc.c",
"openbsd-reallocarray.c",
"gif_err.c",
"quantize.c",
]
prefix_dir = "giflib-5.1.4/lib"
cc_library(
name = "gif",
srcs = [prefix_dir + "/" + source for source in SOURCES],
hdrs = [prefix_dir + "/gif_lib.h"],
includes = [prefix_dir],
defines = [
"HAVE_CONFIG_H",
],
visibility = ["//visibility:public"],
)

View File

@ -7,16 +7,30 @@
include (ExternalProject) include (ExternalProject)
set(eigen_archive_hash "b4fa9622b809") # We parse the current Eigen version and archive hash from the bazel configuration
file(STRINGS ${PROJECT_SOURCE_DIR}/../../workspace.bzl workspace_contents)
foreach(line ${workspace_contents})
string(REGEX MATCH ".*eigen_version.*=.*\"(.*)\"" has_version ${line})
if(has_version)
set(eigen_version ${CMAKE_MATCH_1})
break()
endif()
endforeach()
foreach(line ${workspace_contents})
string(REGEX MATCH ".*eigen_sha256.*=.*\"(.*)\"" has_hash ${line})
if(has_hash)
set(eigen_hash ${CMAKE_MATCH_1})
break()
endif()
endforeach()
set(eigen_INCLUDE_DIRS set(eigen_INCLUDE_DIRS
${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive ${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive
${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive/eigen-eigen-${eigen_archive_hash}
${tensorflow_source_dir}/third_party/eigen3 ${tensorflow_source_dir}/third_party/eigen3
) )
set(eigen_URL https://bitbucket.org/eigen/eigen/get/${eigen_archive_hash}.tar.gz) set(eigen_URL https://bitbucket.org/eigen/eigen/get/${eigen_version}.tar.gz)
set(eigen_HASH SHA256=2862840c2de9c0473a4ef20f8678949ae89ab25965352ee53329e63ba46cec62) set(eigen_HASH SHA256=${eigen_hash})
set(eigen_BUILD ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen) set(eigen_BUILD ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen)
set(eigen_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/eigen/install) set(eigen_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/eigen/install)
@ -30,5 +44,5 @@ ExternalProject_Add(eigen
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=Release
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-DCMAKE_INSTALL_PREFIX:STRING=${eigen_INSTALL} -DCMAKE_INSTALL_PREFIX:STRING=${eigen_INSTALL}
-DINCLUDE_INSTALL_DIR:STRING=${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive/eigen-eigen-${eigen_archive_hash} -DINCLUDE_INSTALL_DIR:STRING=${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive
) )

View File

@ -55,12 +55,8 @@ class KMeansClustering(estimator.Estimator,
distance_metric=clustering_ops.SQUARED_EUCLIDEAN_DISTANCE, distance_metric=clustering_ops.SQUARED_EUCLIDEAN_DISTANCE,
random_seed=0, random_seed=0,
use_mini_batch=True, use_mini_batch=True,
batch_size=128,
steps=10,
kmeans_plus_plus_num_retries=2, kmeans_plus_plus_num_retries=2,
continue_training=False, config=None):
config=None,
verbose=1):
"""Creates a model for running KMeans training and inference. """Creates a model for running KMeans training and inference.
Args: Args:
@ -73,25 +69,17 @@ class KMeansClustering(estimator.Estimator,
random_seed: Python integer. Seed for PRNG used to initialize centers. random_seed: Python integer. Seed for PRNG used to initialize centers.
use_mini_batch: If true, use the mini-batch k-means algorithm. Else assume use_mini_batch: If true, use the mini-batch k-means algorithm. Else assume
full batch. full batch.
batch_size: See TensorFlowEstimator
steps: See TensorFlowEstimator
kmeans_plus_plus_num_retries: For each point that is sampled during kmeans_plus_plus_num_retries: For each point that is sampled during
kmeans++ initialization, this parameter specifies the number of kmeans++ initialization, this parameter specifies the number of
additional points to draw from the current distribution before selecting additional points to draw from the current distribution before selecting
the best. If a negative value is specified, a heuristic is used to the best. If a negative value is specified, a heuristic is used to
sample O(log(num_to_sample)) additional points. sample O(log(num_to_sample)) additional points.
continue_training: See TensorFlowEstimator config: See Estimator
config: See TensorFlowEstimator
verbose: See TensorFlowEstimator
""" """
super(KMeansClustering, self).__init__( super(KMeansClustering, self).__init__(
model_dir=model_dir, model_dir=model_dir,
config=config) config=config)
self.batch_size = batch_size
self.steps = steps
self.kmeans_plus_plus_num_retries = kmeans_plus_plus_num_retries self.kmeans_plus_plus_num_retries = kmeans_plus_plus_num_retries
self.continue_training = continue_training
self.verbose = verbose
self._num_clusters = num_clusters self._num_clusters = num_clusters
self._training_initial_clusters = initial_clusters self._training_initial_clusters = initial_clusters
self._training_graph = None self._training_graph = None
@ -135,11 +123,11 @@ class KMeansClustering(estimator.Estimator,
return relative_change < self._tolerance return relative_change < self._tolerance
# pylint: enable=protected-access # pylint: enable=protected-access
def fit(self, x, y=None, monitors=None, logdir=None, steps=None, def fit(self, x, y=None, monitors=None, logdir=None, steps=None, batch_size=128,
relative_tolerance=None): relative_tolerance=None):
"""Trains a k-means clustering on x. """Trains a k-means clustering on x.
Note: See TensorFlowEstimator for logic for continuous training and graph Note: See Estimator for logic for continuous training and graph
construction across multiple calls to fit. construction across multiple calls to fit.
Args: Args:
@ -151,6 +139,7 @@ class KMeansClustering(estimator.Estimator,
visualization. visualization.
steps: number of training steps. If not None, overrides the value passed steps: number of training steps. If not None, overrides the value passed
in constructor. in constructor.
batch_size: mini-batch size to use. Requires `use_mini_batch=True`.
relative_tolerance: A relative tolerance of change in the loss between relative_tolerance: A relative tolerance of change in the loss between
iterations. Stops learning if the loss changes less than this amount. iterations. Stops learning if the loss changes less than this amount.
Note that this may not work correctly if use_mini_batch=True. Note that this may not work correctly if use_mini_batch=True.
@ -162,7 +151,7 @@ class KMeansClustering(estimator.Estimator,
if logdir is not None: if logdir is not None:
self._model_dir = logdir self._model_dir = logdir
self._data_feeder = data_feeder.setup_train_data_feeder( self._data_feeder = data_feeder.setup_train_data_feeder(
x, None, self._num_clusters, self.batch_size) x, None, self._num_clusters, batch_size if self._use_mini_batch else None)
if relative_tolerance is not None: if relative_tolerance is not None:
if monitors is not None: if monitors is not None:
monitors += [self._StopWhenConverged(relative_tolerance)] monitors += [self._StopWhenConverged(relative_tolerance)]
@ -173,7 +162,7 @@ class KMeansClustering(estimator.Estimator,
or (self.steps is not None)) or (self.steps is not None))
self._train_model(input_fn=self._data_feeder.input_builder, self._train_model(input_fn=self._data_feeder.input_builder,
feed_fn=self._data_feeder.get_feed_dict_fn(), feed_fn=self._data_feeder.get_feed_dict_fn(),
steps=steps or self.steps, steps=steps,
monitors=monitors, monitors=monitors,
init_feed_fn=self._data_feeder.get_feed_dict_fn()) init_feed_fn=self._data_feeder.get_feed_dict_fn())
return self return self

View File

@ -53,13 +53,14 @@ class KMeansTest(tf.test.TestCase):
self.kmeans = KMeans(self.num_centers, self.kmeans = KMeans(self.num_centers,
initial_clusters=kmeans_ops.RANDOM_INIT, initial_clusters=kmeans_ops.RANDOM_INIT,
batch_size=self.batch_size,
use_mini_batch=self.use_mini_batch, use_mini_batch=self.use_mini_batch,
steps=30, config=self.config(14),
continue_training=True,
config=run_config.RunConfig(tf_random_seed=14),
random_seed=12) random_seed=12)
@staticmethod
def config(tf_random_seed):
return run_config.RunConfig(tf_random_seed=tf_random_seed)
@property @property
def batch_size(self): def batch_size(self):
return self.num_points return self.num_points
@ -86,7 +87,7 @@ class KMeansTest(tf.test.TestCase):
def test_clusters(self): def test_clusters(self):
kmeans = self.kmeans kmeans = self.kmeans
kmeans.fit(x=self.points, steps=0) kmeans.fit(x=self.points, steps=1, batch_size=8)
clusters = kmeans.clusters() clusters = kmeans.clusters()
self.assertAllEqual(list(clusters.shape), self.assertAllEqual(list(clusters.shape),
[self.num_centers, self.num_dims]) [self.num_centers, self.num_dims])
@ -97,10 +98,11 @@ class KMeansTest(tf.test.TestCase):
return return
kmeans = self.kmeans kmeans = self.kmeans
kmeans.fit(x=self.points, kmeans.fit(x=self.points,
steps=1) steps=1, batch_size=self.batch_size)
score1 = kmeans.score(x=self.points) score1 = kmeans.score(x=self.points)
kmeans.fit(x=self.points, kmeans.fit(x=self.points,
steps=15 * self.num_points // self.batch_size) steps=15 * self.num_points // self.batch_size,
batch_size=self.batch_size)
score2 = kmeans.score(x=self.points) score2 = kmeans.score(x=self.points)
self.assertTrue(score1 > score2) self.assertTrue(score1 > score2)
self.assertNear(self.true_score, score2, self.true_score * 0.05) self.assertNear(self.true_score, score2, self.true_score * 0.05)
@ -111,39 +113,36 @@ class KMeansTest(tf.test.TestCase):
return return
kmeans = KMeans(self.num_centers, kmeans = KMeans(self.num_centers,
initial_clusters=kmeans_ops.RANDOM_INIT, initial_clusters=kmeans_ops.RANDOM_INIT,
batch_size=self.batch_size,
use_mini_batch=self.use_mini_batch, use_mini_batch=self.use_mini_batch,
# Force it to train forever until the monitor stops it.
steps=None,
continue_training=True,
config=run_config.RunConfig(tf_random_seed=14), config=run_config.RunConfig(tf_random_seed=14),
random_seed=12) random_seed=12)
kmeans.fit(x=self.points, kmeans.fit(x=self.points,
# Force it to train forever until the monitor stops it. # Force it to train forever until the monitor stops it.
steps=None, steps=None,
batch_size=self.batch_size,
relative_tolerance=1e-4) relative_tolerance=1e-4)
score = kmeans.score(x=self.points) score = kmeans.score(x=self.points)
self.assertNear(self.true_score, score, self.true_score * 0.005) self.assertNear(self.true_score, score, self.true_score * 0.005)
def test_infer(self): def test_infer(self):
kmeans = self.kmeans kmeans = self.kmeans
kmeans.fit(x=self.points) kmeans.fit(x=self.points, steps=10, batch_size=128)
clusters = kmeans.clusters() clusters = kmeans.clusters()
# Make a small test set # Make a small test set
points, true_assignments, true_offsets = self.make_random_points(clusters, points, true_assignments, true_offsets = self.make_random_points(clusters,
10) 10)
# Test predict # Test predict
assignments = kmeans.predict(points) assignments = kmeans.predict(points, batch_size=self.batch_size)
self.assertAllEqual(assignments, true_assignments) self.assertAllEqual(assignments, true_assignments)
# Test score # Test score
score = kmeans.score(points) score = kmeans.score(points, batch_size=128)
self.assertNear(score, np.sum(true_offsets), 0.01 * score) self.assertNear(score, np.sum(true_offsets), 0.01 * score)
# Test transform # Test transform
transform = kmeans.transform(points) transform = kmeans.transform(points, batch_size=128)
true_transform = np.maximum( true_transform = np.maximum(
0, 0,
np.sum(np.square(points), axis=1, keepdims=True) - np.sum(np.square(points), axis=1, keepdims=True) -
@ -161,12 +160,9 @@ class KMeansTest(tf.test.TestCase):
initial_clusters=kmeans_ops.RANDOM_INIT, initial_clusters=kmeans_ops.RANDOM_INIT,
distance_metric=kmeans_ops.COSINE_DISTANCE, distance_metric=kmeans_ops.COSINE_DISTANCE,
use_mini_batch=self.use_mini_batch, use_mini_batch=self.use_mini_batch,
batch_size=4, config=self.config(2),
steps=30,
continue_training=True,
config=run_config.RunConfig(tf_random_seed=2),
random_seed=12) random_seed=12)
kmeans.fit(x=points) kmeans.fit(x=points, steps=10, batch_size=4)
centers = normalize(kmeans.clusters()) centers = normalize(kmeans.clusters())
self.assertAllClose(np.sort(centers, axis=0), self.assertAllClose(np.sort(centers, axis=0),
np.sort(true_centers, axis=0)) np.sort(true_centers, axis=0))
@ -184,10 +180,8 @@ class KMeansTest(tf.test.TestCase):
initial_clusters=kmeans_ops.RANDOM_INIT, initial_clusters=kmeans_ops.RANDOM_INIT,
distance_metric=kmeans_ops.COSINE_DISTANCE, distance_metric=kmeans_ops.COSINE_DISTANCE,
use_mini_batch=self.use_mini_batch, use_mini_batch=self.use_mini_batch,
batch_size=8, config=self.config(3))
continue_training=True, kmeans.fit(x=points, steps=30, batch_size=8)
config=run_config.RunConfig(tf_random_seed=3))
kmeans.fit(x=points, steps=30)
centers = normalize(kmeans.clusters()) centers = normalize(kmeans.clusters())
self.assertAllClose(np.sort(centers, axis=0), self.assertAllClose(np.sort(centers, axis=0),
@ -195,7 +189,7 @@ class KMeansTest(tf.test.TestCase):
atol=1e-2) atol=1e-2)
true_transform = 1 - cosine_similarity(points, centers) true_transform = 1 - cosine_similarity(points, centers)
transform = kmeans.transform(points) transform = kmeans.transform(points, batch_size=8)
self.assertAllClose(transform, true_transform, atol=1e-3) self.assertAllClose(transform, true_transform, atol=1e-3)
def test_predict_with_cosine_distance(self): def test_predict_with_cosine_distance(self):
@ -217,20 +211,18 @@ class KMeansTest(tf.test.TestCase):
initial_clusters=kmeans_ops.RANDOM_INIT, initial_clusters=kmeans_ops.RANDOM_INIT,
distance_metric=kmeans_ops.COSINE_DISTANCE, distance_metric=kmeans_ops.COSINE_DISTANCE,
use_mini_batch=self.use_mini_batch, use_mini_batch=self.use_mini_batch,
batch_size=8, config=self.config(3))
continue_training=True, kmeans.fit(x=points, steps=30, batch_size=8)
config=run_config.RunConfig(tf_random_seed=3))
kmeans.fit(x=points, steps=30)
centers = normalize(kmeans.clusters()) centers = normalize(kmeans.clusters())
self.assertAllClose(np.sort(centers, axis=0), self.assertAllClose(np.sort(centers, axis=0),
np.sort(true_centers, axis=0), atol=1e-2) np.sort(true_centers, axis=0), atol=1e-2)
assignments = kmeans.predict(points) assignments = kmeans.predict(points, batch_size=8)
self.assertAllClose(centers[assignments], self.assertAllClose(centers[assignments],
true_centers[true_assignments], atol=1e-2) true_centers[true_assignments], atol=1e-2)
score = kmeans.score(points) score = kmeans.score(points, batch_size=8)
self.assertAllClose(score, true_score, atol=1e-2) self.assertAllClose(score, true_score, atol=1e-2)
def test_predict_with_cosine_distance_and_kmeans_plus_plus(self): def test_predict_with_cosine_distance_and_kmeans_plus_plus(self):
@ -254,21 +246,19 @@ class KMeansTest(tf.test.TestCase):
initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT, initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT,
distance_metric=kmeans_ops.COSINE_DISTANCE, distance_metric=kmeans_ops.COSINE_DISTANCE,
use_mini_batch=self.use_mini_batch, use_mini_batch=self.use_mini_batch,
batch_size=12, config=self.config(3))
continue_training=True, kmeans.fit(x=points, steps=30, batch_size=12)
config=run_config.RunConfig(tf_random_seed=3))
kmeans.fit(x=points, steps=30)
centers = normalize(kmeans.clusters()) centers = normalize(kmeans.clusters())
self.assertAllClose(sorted(centers.tolist()), self.assertAllClose(sorted(centers.tolist()),
sorted(true_centers.tolist()), sorted(true_centers.tolist()),
atol=1e-2) atol=1e-2)
assignments = kmeans.predict(points) assignments = kmeans.predict(points, batch_size=12)
self.assertAllClose(centers[assignments], self.assertAllClose(centers[assignments],
true_centers[true_assignments], atol=1e-2) true_centers[true_assignments], atol=1e-2)
score = kmeans.score(points) score = kmeans.score(points, batch_size=12)
self.assertAllClose(score, true_score, atol=1e-2) self.assertAllClose(score, true_score, atol=1e-2)
def test_fit_raise_if_num_clusters_larger_than_num_points_random_init(self): def test_fit_raise_if_num_clusters_larger_than_num_points_random_init(self):
@ -276,7 +266,7 @@ class KMeansTest(tf.test.TestCase):
with self.assertRaisesOpError('less'): with self.assertRaisesOpError('less'):
kmeans = KMeans(num_clusters=3, initial_clusters=kmeans_ops.RANDOM_INIT) kmeans = KMeans(num_clusters=3, initial_clusters=kmeans_ops.RANDOM_INIT)
kmeans.fit(x=points) kmeans.fit(x=points, steps=10, batch_size=8)
def test_fit_raise_if_num_clusters_larger_than_num_points_kmeans_plus_plus( def test_fit_raise_if_num_clusters_larger_than_num_points_kmeans_plus_plus(
self): self):
@ -285,7 +275,7 @@ class KMeansTest(tf.test.TestCase):
with self.assertRaisesOpError(AssertionError): with self.assertRaisesOpError(AssertionError):
kmeans = KMeans(num_clusters=3, kmeans = KMeans(num_clusters=3,
initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT) initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT)
kmeans.fit(x=points) kmeans.fit(x=points, steps=10, batch_size=8)
class MiniBatchKMeansTest(KMeansTest): class MiniBatchKMeansTest(KMeansTest):

View File

@ -72,5 +72,14 @@ rundown:
unused because no other code references the variables, but in fact their unused because no other code references the variables, but in fact their
constructors have the important side effect of registering the class. constructors have the important side effect of registering the class.
- C++11 support (or later) should be enabled by setting `C++ Language Dialect` to
`GNU++11` (or `GNU++14`), and `C++ Standard Library` to `libc++`.
- The library doesn't currently support bitcode, so you'll need to disable that - The library doesn't currently support bitcode, so you'll need to disable that
in your project settings. in your project settings.
- Remove any use of the `-all_load` flag in your project. The protocol buffers
libraries (full and lite versions) contain duplicate symbols, and the `-all_load`
flag will cause these duplicates to become link errors. If you were using
`-all_load` to avoid issues with Objective-C categories in static libraries,
you may be able to replace it with the `-ObjC` flag.

View File

@ -47,7 +47,9 @@ class Classifier(estimator.Estimator):
Args: Args:
model_fn: (targets, predictions, mode) -> logits, loss, train_op model_fn: (targets, predictions, mode) -> logits, loss, train_op
n_classes: Number of classes n_classes: Number of classes
model_dir: Base directory for output data model_dir: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
config: Configuration object (optional) config: Configuration object (optional)
""" """
self._n_classes = n_classes self._n_classes = n_classes

View File

@ -119,7 +119,9 @@ class DNNClassifier(dnn_linear_combined.DNNLinearCombinedClassifier):
feature_columns: An iterable containing all the feature columns used by feature_columns: An iterable containing all the feature columns used by
the model. All items in the set should be instances of classes derived the model. All items in the set should be instances of classes derived
from `FeatureColumn`. from `FeatureColumn`.
model_dir: Directory to save model parameters, graph and etc. model_dir: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
n_classes: number of target classes. Default is binary classification. n_classes: number of target classes. Default is binary classification.
It must be greater than 1. It must be greater than 1.
weight_column_name: A string defining feature column name representing weight_column_name: A string defining feature column name representing
@ -277,7 +279,9 @@ class DNNRegressor(dnn_linear_combined.DNNLinearCombinedRegressor):
feature_columns: An iterable containing all the feature columns used by feature_columns: An iterable containing all the feature columns used by
the model. All items in the set should be instances of classes derived the model. All items in the set should be instances of classes derived
from `FeatureColumn`. from `FeatureColumn`.
model_dir: Directory to save model parameters, graph and etc. model_dir: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
weight_column_name: A string defining feature column name representing weight_column_name: A string defining feature column name representing
weights. It is used to down weight or boost examples during training. It weights. It is used to down weight or boost examples during training. It
will be multiplied by the loss of the example. will be multiplied by the loss of the example.

View File

@ -72,7 +72,9 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
Args: Args:
target_column: A _TargetColumn object. target_column: A _TargetColumn object.
model_dir: Directory to save model parameters, graph and etc. model_dir: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
linear_feature_columns: An iterable containing all the feature columns linear_feature_columns: An iterable containing all the feature columns
used by linear part of the model. All items in the set should be used by linear part of the model. All items in the set should be
instances of classes derived from `FeatureColumn`. instances of classes derived from `FeatureColumn`.
@ -354,7 +356,9 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
"""Constructs a DNNLinearCombinedClassifier instance. """Constructs a DNNLinearCombinedClassifier instance.
Args: Args:
model_dir: Directory to save model parameters, graph and etc. model_dir: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
n_classes: number of target classes. Default is binary classification. n_classes: number of target classes. Default is binary classification.
weight_column_name: A string defining feature column name representing weight_column_name: A string defining feature column name representing
weights. It is used to down weight or boost examples during training. weights. It is used to down weight or boost examples during training.
@ -537,7 +541,9 @@ class DNNLinearCombinedRegressor(_DNNLinearCombinedBaseEstimator):
"""Initializes a DNNLinearCombinedRegressor instance. """Initializes a DNNLinearCombinedRegressor instance.
Args: Args:
model_dir: Directory to save model parameters, graph and etc. model_dir: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
weight_column_name: A string defining feature column name representing weight_column_name: A string defining feature column name representing
weights. It is used to down weight or boost examples during training. It weights. It is used to down weight or boost examples during training. It
will be multiplied by the loss of the example. will be multiplied by the loss of the example.

View File

@ -158,7 +158,9 @@ class BaseEstimator(sklearn.BaseEstimator):
"""Initializes a BaseEstimator instance. """Initializes a BaseEstimator instance.
Args: Args:
model_dir: Directory to save model parameters, graph and etc. model_dir: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
config: A RunConfig instance. config: A RunConfig instance.
""" """
# Model directory. # Model directory.
@ -766,7 +768,9 @@ class Estimator(BaseEstimator):
is passed to Estimator in `params` parameter. This allows is passed to Estimator in `params` parameter. This allows
to configure Estimators from hyper parameter tunning. to configure Estimators from hyper parameter tunning.
model_dir: Directory to save model parameters, graph and etc. model_dir: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
config: Configuration object. config: Configuration object.
params: `dict` of hyper parameters that will be passed into `model_fn`. params: `dict` of hyper parameters that will be passed into `model_fn`.
Keys are names of parameters, values are basic python types. Keys are names of parameters, values are basic python types.

View File

@ -122,7 +122,9 @@ class LinearClassifier(dnn_linear_combined.DNNLinearCombinedClassifier):
feature_columns: An iterable containing all the feature columns used by feature_columns: An iterable containing all the feature columns used by
the model. All items in the set should be instances of classes derived the model. All items in the set should be instances of classes derived
from `FeatureColumn`. from `FeatureColumn`.
model_dir: Directory to save model parameters, graph and etc. model_dir: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
n_classes: number of target classes. Default is binary classification. n_classes: number of target classes. Default is binary classification.
weight_column_name: A string defining feature column name representing weight_column_name: A string defining feature column name representing
weights. It is used to down weight or boost examples during training. It weights. It is used to down weight or boost examples during training. It
@ -280,7 +282,9 @@ class LinearRegressor(dnn_linear_combined.DNNLinearCombinedRegressor):
feature_columns: An iterable containing all the feature columns used by feature_columns: An iterable containing all the feature columns used by
the model. All items in the set should be instances of classes derived the model. All items in the set should be instances of classes derived
from `FeatureColumn`. from `FeatureColumn`.
model_dir: Directory to save model parameters, graph, etc. model_dir: Directory to save model parameters, graph, etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
weight_column_name: A string defining feature column name representing weight_column_name: A string defining feature column name representing
weights. It is used to down weight or boost examples during training. It weights. It is used to down weight or boost examples during training. It
will be multiplied by the loss of the example. will be multiplied by the loss of the example.

View File

@ -57,7 +57,9 @@ class LogisticRegressor(estimator.Estimator):
expects the returned predictions to be probabilities in [0.0, 1.0]. expects the returned predictions to be probabilities in [0.0, 1.0].
thresholds: List of floating point thresholds to use for accuracy, thresholds: List of floating point thresholds to use for accuracy,
precision, and recall metrics. If None, defaults to [0.5]. precision, and recall metrics. If None, defaults to [0.5].
model_dir: Directory to save model parameters, graphs, etc. model_dir: Directory to save model parameters, graphs, etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
config: A RunConfig configuration object. config: A RunConfig configuration object.
""" """
if thresholds is None: if thresholds is None:

View File

@ -69,8 +69,7 @@ class TensorForestEstimator(estimator.BaseEstimator):
def __init__(self, params, device_assigner=None, model_dir=None, def __init__(self, params, device_assigner=None, model_dir=None,
graph_builder_class=tensor_forest.RandomForestGraphs, graph_builder_class=tensor_forest.RandomForestGraphs,
master='', accuracy_metric=None, master='', accuracy_metric=None,
tf_random_seed=None, continue_training=False, verbose=1, tf_random_seed=None, config=None):
max_to_keep=5, save_checkpoint_secs=300):
self.params = params.fill() self.params = params.fill()
self.accuracy_metric = (accuracy_metric or self.accuracy_metric = (accuracy_metric or
('r2' if self.params.regression else 'accuracy')) ('r2' if self.params.regression else 'accuracy'))
@ -81,12 +80,6 @@ class TensorForestEstimator(estimator.BaseEstimator):
self.training_args = {} self.training_args = {}
self.construction_args = {} self.construction_args = {}
config = run_config.RunConfig(
master=master,
tf_random_seed=(tf_random_seed or int((time.time() * 1000) % 1000)),
save_checkpoints_secs=save_checkpoint_secs,
keep_checkpoint_max=max_to_keep)
super(TensorForestEstimator, self).__init__(model_dir=model_dir, super(TensorForestEstimator, self).__init__(model_dir=model_dir,
config=config) config=config)

View File

@ -74,7 +74,9 @@ class SVM(linear.LinearClassifier):
weight_column_name: A string defining feature column name representing weight_column_name: A string defining feature column name representing
weights. It is used to down weight or boost examples during training. It weights. It is used to down weight or boost examples during training. It
will be multiplied by the loss of the example. will be multiplied by the loss of the example.
model_dir: Directory to save model parameters, graph and etc. model_dir: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
l1_regularization: L1-regularization parameter l1_regularization: L1-regularization parameter
l2_regularization: L2-regularization parameter l2_regularization: L2-regularization parameter
kernels: A list of kernels for the SVM. Currently, no kernels are supported. kernels: A list of kernels for the SVM. Currently, no kernels are supported.

View File

@ -38,29 +38,29 @@ HOST_OBJDIR := $(MAKEFILE_DIR)/gen/host_obj/
HOST_BINDIR := $(MAKEFILE_DIR)/gen/host_bin/ HOST_BINDIR := $(MAKEFILE_DIR)/gen/host_bin/
HOST_GENDIR := $(MAKEFILE_DIR)/gen/host_obj/ HOST_GENDIR := $(MAKEFILE_DIR)/gen/host_obj/
# Find the current Eigen version name from the Bazel build file # Find the current Eigen version from the Bazel configuration
EIGEN_HASH := $(shell cat eigen.BUILD | grep archive_dir | head -1 | cut -f3 -d- | cut -f1 -d\") EIGEN_VERSION := $(shell grep eigen_version tensorflow/workspace.bzl | head -1 | sed -e 's/.*eigen_version.*=.*"\(.*\)"/\1/')
# Settings for the host compiler. # Settings for the host compiler.
HOST_CXX := $(CC_PREFIX) gcc HOST_CXX := $(CC_PREFIX) gcc
HOST_CXXFLAGS := --std=c++11 HOST_CXXFLAGS := --std=c++11
HOST_LDOPTS := \ HOST_LDOPTS :=
-L/usr/local/lib
ifeq ($(HAS_GEN_HOST_PROTOC),true) ifeq ($(HAS_GEN_HOST_PROTOC),true)
HOST_LDOPTS += -L$(MAKEFILE_DIR)/gen/protobuf-host/lib HOST_LDOPTS += -L$(MAKEFILE_DIR)/gen/protobuf-host/lib
endif endif
HOST_LDOPTS += -L/usr/local/lib
HOST_INCLUDES := \ HOST_INCLUDES := \
-I/usr/local/include \
-I. \ -I. \
-I$(MAKEFILE_DIR)/downloads/ \ -I$(MAKEFILE_DIR)/downloads/ \
-I$(MAKEFILE_DIR)/downloads/eigen-eigen-$(EIGEN_HASH) \ -I$(MAKEFILE_DIR)/downloads/eigen-eigen-$(EIGEN_VERSION) \
-I$(HOST_GENDIR) -I$(HOST_GENDIR)
ifeq ($(HAS_GEN_HOST_PROTOC),true) ifeq ($(HAS_GEN_HOST_PROTOC),true)
HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
endif endif
# This is at the end so any globally-installed frameworks like protobuf don't
# override local versions in the source tree.
HOST_INCLUDES += -I/usr/local/include
HOST_LIBS := \ HOST_LIBS := \
-lstdc++ \ -lstdc++ \
@ -120,21 +120,18 @@ CXXFLAGS := --std=c++11 -DIS_SLIM_BUILD $(OPTFLAGS)
LDFLAGS := \ LDFLAGS := \
-L/usr/local/lib -L/usr/local/lib
ifeq ($(HAS_GEN_HOST_PROTOC),true)
HOST_LDOPTS += -L$(MAKEFILE_DIR)/gen/protobuf-host/lib
endif
INCLUDES := \ INCLUDES := \
-I/usr/local/include \
-I. \ -I. \
-I$(MAKEFILE_DIR)/downloads/ \ -I$(MAKEFILE_DIR)/downloads/ \
-I$(MAKEFILE_DIR)/downloads/eigen-eigen-$(EIGEN_HASH) \ -I$(MAKEFILE_DIR)/downloads/eigen-eigen-$(EIGEN_VERSION) \
-I$(PROTOGENDIR) \ -I$(PROTOGENDIR) \
-I$(PBTGENDIR) -I$(PBTGENDIR)
ifeq ($(HAS_GEN_HOST_PROTOC),true) ifeq ($(HAS_GEN_HOST_PROTOC),true)
INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
endif endif
# This is at the end so any globally-installed frameworks like protobuf don't
# override local versions in the source tree.
INCLUDES += -I/usr/local/include
LIBS := \ LIBS := \
-lstdc++ \ -lstdc++ \
@ -211,7 +208,7 @@ ifeq ($(TARGET),ANDROID)
-I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi/include \ -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/armeabi/include \
-I. \ -I. \
-I$(MAKEFILE_DIR)/downloads/ \ -I$(MAKEFILE_DIR)/downloads/ \
-I$(MAKEFILE_DIR)/downloads/eigen-eigen-$(EIGEN_HASH) \ -I$(MAKEFILE_DIR)/downloads/eigen-eigen-$(EIGEN_VERSION) \
-I$(MAKEFILE_DIR)/gen/protobuf/include \ -I$(MAKEFILE_DIR)/gen/protobuf/include \
-I$(PROTOGENDIR) \ -I$(PROTOGENDIR) \
-I$(PBTGENDIR) -I$(PBTGENDIR)
@ -364,7 +361,52 @@ BENCHMARK_NAME := $(BINDIR)benchmark
# What sources we want to compile, derived from the main Bazel build using the # What sources we want to compile, derived from the main Bazel build using the
# gen_file_lists.sh script. # gen_file_lists.sh script.
TF_CC_SRCS := $(shell cat $(MAKEFILE_DIR)/tf_cc_files.txt)
CORE_CC_ALL_SRCS := \
$(wildcard tensorflow/core/*.cc) \
$(wildcard tensorflow/core/common_runtime/*.cc) \
$(wildcard tensorflow/core/debug/*.cc) \
$(wildcard tensorflow/core/framework/*.cc) \
$(wildcard tensorflow/core/graph/*.cc) \
$(wildcard tensorflow/core/lib/*/*.cc) \
$(wildcard tensorflow/core/platform/*.cc) \
$(wildcard tensorflow/core/platform/*/*.cc) \
$(wildcard tensorflow/core/util/*.cc) \
$(wildcard tensorflow/core/util/*/*.cc)
CORE_CC_EXCLUDE_SRCS := \
$(wildcard tensorflow/core/*/*test.cc) \
$(wildcard tensorflow/core/*/*testutil*) \
$(wildcard tensorflow/core/*/*testlib*) \
$(wildcard tensorflow/core/*/*main.cc) \
$(wildcard tensorflow/core/*/*/*test.cc) \
$(wildcard tensorflow/core/*/*/*testutil*) \
$(wildcard tensorflow/core/*/*/*testlib*) \
$(wildcard tensorflow/core/*/*/*main.cc) \
$(wildcard tensorflow/core/graph/dot.*) \
$(wildcard tensorflow/core/lib/gif/*) \
$(wildcard tensorflow/core/lib/jpeg/*) \
$(wildcard tensorflow/core/lib/png/*) \
$(wildcard tensorflow/core/util/checkpoint_reader.*) \
$(wildcard tensorflow/core/util/events_writer.*) \
$(wildcard tensorflow/core/util/reporter.*) \
$(wildcard tensorflow/core/util/tf_status_helper.*) \
$(wildcard tensorflow/core/platform/default/stream_executor.*) \
$(wildcard tensorflow/core/platform/default/test_benchmark.*) \
$(wildcard tensorflow/core/platform/cuda.h) \
$(wildcard tensorflow/core/platform/cloud/*) \
$(wildcard tensorflow/core/platform/google/*) \
$(wildcard tensorflow/core/platform/jpeg.*) \
$(wildcard tensorflow/core/platform/png.*) \
$(wildcard tensorflow/core/platform/stream_executor.*) \
$(wildcard tensorflow/core/user_ops/*.cu.cc) \
$(wildcard tensorflow/core/common_runtime/gpu/*) \
$(wildcard tensorflow/core/common_runtime/gpu_device_factory.*)
# Filter out all the excluded files.
TF_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS))
# Add in any extra files that don't fit the patterns easily
TF_CC_SRCS += tensorflow/core/common_runtime/gpu/gpu_tracer.cc
# Also include the op and kernel definitions.
TF_CC_SRCS += $(shell cat $(MAKEFILE_DIR)/tf_op_files.txt)
PBT_CC_SRCS := $(shell cat $(MAKEFILE_DIR)/tf_pb_text_files.txt) PBT_CC_SRCS := $(shell cat $(MAKEFILE_DIR)/tf_pb_text_files.txt)
PROTO_SRCS := $(shell cat $(MAKEFILE_DIR)/tf_proto_files.txt) PROTO_SRCS := $(shell cat $(MAKEFILE_DIR)/tf_proto_files.txt)
BENCHMARK_SRCS := \ BENCHMARK_SRCS := \

View File

@ -176,15 +176,16 @@ curl -o ~/graphs/inception.zip \
### Building all at once ### Building all at once
If you just want to get the libraries compiled in a hurry, you can run: If you just want to get the libraries compiled in a hurry, you can run this
from the root of your TensorFlow source folder:
```bash ```bash
build_all_ios.sh tensorflow/contrib/makefile/build_all_ios.sh
``` ```
and wait a long time. This process will take around twenty minutes on a modern MacBook Pro.
When this completes, you will have a library for a single architecture and the When it completes, you will have a library for a single architecture and the
benchmark program. Although successfully compiling the benchmark program is a benchmark program. Although successfully compiling the benchmark program is a
sign of success, the program is not a complete iOS app. sign of success, the program is not a complete iOS app.
@ -284,6 +285,17 @@ make -f tensorflow/contrib/makefile/Makefile HOST_OS=PI TARGET=PI \
OPTFLAGS="-Os -mfpu=neon-vfpv4 -funsafe-math-optimizations -ftree-vectorize" OPTFLAGS="-Os -mfpu=neon-vfpv4 -funsafe-math-optimizations -ftree-vectorize"
``` ```
If you hit compilation errors mentioning `__atomic_compare_exchange` and you're
using gcc 4.9, you should try installing gcc 4.8 and using that instead:
```bash
sudo apt-get install -y gcc-4.8 g++-4.8
make -f tensorflow/contrib/makefile/Makefile HOST_OS=PI TARGET=PI \
OPTFLAGS="-Os -mfpu=neon-vfpv4 -funsafe-math-optimizations -ftree-vectorize" \
CXX=g++-4.8
```
# Other notes # Other notes
## Supported Systems ## Supported Systems

View File

@ -1,4 +1,4 @@
#!/bin/bash -x #!/bin/bash -ex
# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -15,11 +15,22 @@
# ============================================================================== # ==============================================================================
DOWNLOADS_DIR=tensorflow/contrib/makefile/downloads DOWNLOADS_DIR=tensorflow/contrib/makefile/downloads
BZL_FILE_PATH=tensorflow/workspace.bzl
mkdir ${DOWNLOADS_DIR} mkdir -p ${DOWNLOADS_DIR}
# Grab the current Eigen version name from the Bazel build file # Grab the current Eigen version name from the Bazel build file
EIGEN_HASH=$(cat eigen.BUILD | grep archive_dir | head -1 | cut -f3 -d- | cut -f1 -d\") EIGEN_HASH=$(cat "${BZL_FILE_PATH}" | egrep "eigen_version.*=.*\".*\"" | awk '{ print $3 }')
# Trim trailing and preceding double quotes
EIGEN_HASH="${EIGEN_HASH%\"}"
EIGEN_HASH="${EIGEN_HASH#\"}"
if [[ -z "${EIGEN_HASH}" ]]; then
echo >&2 "Eigen hash does not exist."
exit 1
else
echo "Eigen hash = ${EIGEN_HASH}"
fi
curl "https://bitbucket.org/eigen/eigen/get/${EIGEN_HASH}.tar.gz" \ curl "https://bitbucket.org/eigen/eigen/get/${EIGEN_HASH}.tar.gz" \
-o /tmp/eigen-${EIGEN_HASH}.tar.gz -o /tmp/eigen-${EIGEN_HASH}.tar.gz
@ -34,3 +45,5 @@ git clone https://github.com/google/protobuf.git ${DOWNLOADS_DIR}/protobuf
cd ${DOWNLOADS_DIR} cd ${DOWNLOADS_DIR}
rm -rf eigen-latest rm -rf eigen-latest
ln -s eigen-eigen-${EIGEN_HASH} eigen-latest ln -s eigen-eigen-${EIGEN_HASH} eigen-latest
echo "download_dependencies.sh completed successfully."

View File

@ -16,16 +16,6 @@
# This script generates the source file lists needed by the makefile by querying # This script generates the source file lists needed by the makefile by querying
# the master Bazel build configuration. # the master Bazel build configuration.
bazel query 'kind("source file", deps(//tensorflow/core:android_tensorflow_lib))' | \
grep "//tensorflow/.*\.cc$" | \
grep -v "gen_proto_text" | \
grep -E -v "jpeg" | \
grep -E -v "png" | \
grep -E -v "zlib" | \
sed -E 's#^//##g' | \
sed -E 's#:#/#g' \
> tensorflow/contrib/makefile/tf_cc_files.txt
bazel query 'kind("source file", deps(//tensorflow/core:android_tensorflow_lib))' | \ bazel query 'kind("source file", deps(//tensorflow/core:android_tensorflow_lib))' | \
grep "//tensorflow/.*\.proto$" | \ grep "//tensorflow/.*\.proto$" | \
sed -E 's#^//##g' | \ sed -E 's#^//##g' | \

View File

@ -1,264 +0,0 @@
tensorflow/core/kernels/xent_op.cc
tensorflow/core/kernels/where_op.cc
tensorflow/core/kernels/variable_ops.cc
tensorflow/core/kernels/unpack_op.cc
tensorflow/core/kernels/transpose_op.cc
tensorflow/core/kernels/transpose_functor_cpu.cc
tensorflow/core/kernels/training_ops.cc
tensorflow/core/kernels/topk_op.cc
tensorflow/core/kernels/tile_ops.cc
tensorflow/core/kernels/strided_slice_op.cc
tensorflow/core/kernels/stack_ops.cc
tensorflow/core/kernels/split_op.cc
tensorflow/core/kernels/split_lib_cpu.cc
tensorflow/core/kernels/sparse_to_dense_op.cc
tensorflow/core/kernels/softsign_op.cc
tensorflow/core/kernels/softplus_op.cc
tensorflow/core/kernels/softmax_op.cc
tensorflow/core/kernels/slice_op.cc
tensorflow/core/kernels/shape_ops.cc
tensorflow/core/kernels/session_ops.cc
tensorflow/core/kernels/sequence_ops.cc
tensorflow/core/kernels/sendrecv_ops.cc
tensorflow/core/kernels/save_restore_tensor.cc
tensorflow/core/kernels/save_op.cc
tensorflow/core/kernels/reverse_sequence_op.cc
tensorflow/core/kernels/reverse_op.cc
tensorflow/core/kernels/restore_op.cc
tensorflow/core/kernels/resize_nearest_neighbor_op.cc
tensorflow/core/kernels/resize_bilinear_op.cc
tensorflow/core/kernels/reshape_op.cc
tensorflow/core/kernels/relu_op.cc
tensorflow/core/kernels/reduction_ops_sum.cc
tensorflow/core/kernels/reduction_ops_prod.cc
tensorflow/core/kernels/reduction_ops_min.cc
tensorflow/core/kernels/reduction_ops_mean.cc
tensorflow/core/kernels/reduction_ops_max.cc
tensorflow/core/kernels/reduction_ops_common.cc
tensorflow/core/kernels/pooling_ops_common.cc
tensorflow/core/kernels/pad_op.cc
tensorflow/core/kernels/pack_op.cc
tensorflow/core/kernels/ops_util.cc
tensorflow/core/kernels/no_op.cc
tensorflow/core/kernels/maxpooling_op.cc
tensorflow/core/kernels/matmul_op.cc
tensorflow/core/kernels/lrn_op.cc
tensorflow/core/kernels/in_topk_op.cc
tensorflow/core/kernels/immutable_constant_op.cc
tensorflow/core/kernels/identity_op.cc
tensorflow/core/kernels/gather_op.cc
tensorflow/core/kernels/fill_functor.cc
tensorflow/core/kernels/example_parsing_ops.cc
tensorflow/core/kernels/dynamic_stitch_op.cc
tensorflow/core/kernels/dynamic_partition_op.cc
tensorflow/core/kernels/dense_update_ops.cc
tensorflow/core/kernels/cwise_ops_common.cc
tensorflow/core/kernels/cwise_op_tanh.cc
tensorflow/core/kernels/cwise_op_sub.cc
tensorflow/core/kernels/cwise_op_squared_difference.cc
tensorflow/core/kernels/cwise_op_square.cc
tensorflow/core/kernels/cwise_op_sqrt.cc
tensorflow/core/kernels/cwise_op_sigmoid.cc
tensorflow/core/kernels/cwise_op_select.cc
tensorflow/core/kernels/cwise_op_rsqrt.cc
tensorflow/core/kernels/cwise_op_neg.cc
tensorflow/core/kernels/cwise_op_mul.cc
tensorflow/core/kernels/cwise_op_minimum.cc
tensorflow/core/kernels/cwise_op_maximum.cc
tensorflow/core/kernels/cwise_op_log.cc
tensorflow/core/kernels/cwise_op_less.cc
tensorflow/core/kernels/cwise_op_isfinite.cc
tensorflow/core/kernels/cwise_op_inverse.cc
tensorflow/core/kernels/cwise_op_greater.cc
tensorflow/core/kernels/cwise_op_exp.cc
tensorflow/core/kernels/cwise_op_equal_to.cc
tensorflow/core/kernels/cwise_op_div.cc
tensorflow/core/kernels/cwise_op_add.cc
tensorflow/core/kernels/ctc_decoder_ops.cc
tensorflow/core/kernels/conv_ops.cc
tensorflow/core/kernels/conv_grad_ops.cc
tensorflow/core/kernels/control_flow_ops.cc
tensorflow/core/kernels/constant_op.cc
tensorflow/core/kernels/concat_op.cc
tensorflow/core/kernels/concat_lib_cpu.cc
tensorflow/core/kernels/check_numerics_op.cc
tensorflow/core/kernels/cast_op.cc
tensorflow/core/kernels/bias_op.cc
tensorflow/core/kernels/bcast_ops.cc
tensorflow/core/kernels/batch_norm_op.cc
tensorflow/core/kernels/avgpooling_op.cc
tensorflow/core/kernels/argmax_op.cc
tensorflow/core/kernels/aggregate_ops.cc
tensorflow/core/util/work_sharder.cc
tensorflow/core/util/util.cc
tensorflow/core/util/use_cudnn.cc
tensorflow/core/util/tensor_slice_writer.cc
tensorflow/core/util/tensor_slice_set.cc
tensorflow/core/util/tensor_slice_reader_cache.cc
tensorflow/core/util/tensor_slice_reader.cc
tensorflow/core/util/tensor_format.cc
tensorflow/core/util/stat_summarizer.cc
tensorflow/core/util/sparse/group_iterator.cc
tensorflow/core/util/saved_tensor_slice_util.cc
tensorflow/core/util/port.cc
tensorflow/core/util/padding.cc
tensorflow/core/util/mirror_pad_mode.cc
tensorflow/core/util/memmapped_file_system_writer.cc
tensorflow/core/util/memmapped_file_system.cc
tensorflow/core/util/guarded_philox_random.cc
tensorflow/core/util/example_proto_helper.cc
tensorflow/core/util/device_name_utils.cc
tensorflow/core/util/command_line_flags.cc
tensorflow/core/util/bcast.cc
tensorflow/core/platform/tracing.cc
tensorflow/core/platform/tensor_coding.cc
tensorflow/core/platform/protobuf_util.cc
tensorflow/core/platform/posix/posix_file_system.cc
tensorflow/core/platform/posix/port.cc
tensorflow/core/platform/posix/env.cc
tensorflow/core/platform/load_library.cc
tensorflow/core/platform/file_system.cc
tensorflow/core/platform/env.cc
tensorflow/core/platform/denormal.cc
tensorflow/core/platform/default/tracing.cc
tensorflow/core/platform/default/logging.cc
tensorflow/core/ops/training_ops.cc
tensorflow/core/ops/string_ops.cc
tensorflow/core/ops/state_ops.cc
tensorflow/core/ops/sparse_ops.cc
tensorflow/core/ops/sendrecv_ops.cc
tensorflow/core/ops/script_ops.cc
tensorflow/core/ops/random_ops.cc
tensorflow/core/ops/random_grad.cc
tensorflow/core/ops/parsing_ops.cc
tensorflow/core/ops/no_op.cc
tensorflow/core/ops/nn_ops.cc
tensorflow/core/ops/nn_grad.cc
tensorflow/core/ops/math_ops.cc
tensorflow/core/ops/math_grad.cc
tensorflow/core/ops/logging_ops.cc
tensorflow/core/ops/linalg_ops.cc
tensorflow/core/ops/io_ops.cc
tensorflow/core/ops/image_ops.cc
tensorflow/core/ops/functional_ops.cc
tensorflow/core/ops/functional_grad.cc
tensorflow/core/ops/function_ops.cc
tensorflow/core/ops/data_flow_ops.cc
tensorflow/core/ops/ctc_ops.cc
tensorflow/core/ops/control_flow_ops.cc
tensorflow/core/ops/candidate_sampling_ops.cc
tensorflow/core/ops/array_ops.cc
tensorflow/core/ops/array_grad.cc
tensorflow/core/lib/wav/wav_io.cc
tensorflow/core/lib/strings/stringprintf.cc
tensorflow/core/lib/strings/strcat.cc
tensorflow/core/lib/strings/str_util.cc
tensorflow/core/lib/strings/scanner.cc
tensorflow/core/lib/strings/proto_text_util.cc
tensorflow/core/lib/strings/ordered_code.cc
tensorflow/core/lib/strings/numbers.cc
tensorflow/core/lib/random/weighted_picker.cc
tensorflow/core/lib/random/simple_philox.cc
tensorflow/core/lib/random/random.cc
tensorflow/core/lib/random/distribution_sampler.cc
tensorflow/core/lib/io/two_level_iterator.cc
tensorflow/core/lib/io/table_builder.cc
tensorflow/core/lib/io/table.cc
tensorflow/core/lib/io/record_writer.cc
tensorflow/core/lib/io/record_reader.cc
tensorflow/core/lib/io/path.cc
tensorflow/core/lib/io/match.cc
tensorflow/core/lib/io/iterator.cc
tensorflow/core/lib/io/inputbuffer.cc
tensorflow/core/lib/io/format.cc
tensorflow/core/lib/io/block_builder.cc
tensorflow/core/lib/io/block.cc
tensorflow/core/lib/histogram/histogram.cc
tensorflow/core/lib/hash/hash.cc
tensorflow/core/lib/hash/crc32c.cc
tensorflow/core/lib/core/threadpool.cc
tensorflow/core/lib/core/stringpiece.cc
tensorflow/core/lib/core/status.cc
tensorflow/core/lib/core/coding.cc
tensorflow/core/lib/core/arena.cc
tensorflow/core/graph/validate.cc
tensorflow/core/graph/tensor_id.cc
tensorflow/core/graph/subgraph.cc
tensorflow/core/graph/quantize_training.cc
tensorflow/core/graph/optimizer_cse.cc
tensorflow/core/graph/node_builder.cc
tensorflow/core/graph/graph_partition.cc
tensorflow/core/graph/graph_def_builder.cc
tensorflow/core/graph/graph_constructor.cc
tensorflow/core/graph/graph.cc
tensorflow/core/graph/gradients.cc
tensorflow/core/graph/equal_graph_def.cc
tensorflow/core/graph/edgeset.cc
tensorflow/core/graph/costmodel.cc
tensorflow/core/graph/colors.cc
tensorflow/core/graph/algorithm.cc
tensorflow/core/framework/versions.cc
tensorflow/core/framework/unique_tensor_references.cc
tensorflow/core/framework/types.cc
tensorflow/core/framework/tracking_allocator.cc
tensorflow/core/framework/tensor_util.cc
tensorflow/core/framework/tensor_slice.cc
tensorflow/core/framework/tensor_shape.cc
tensorflow/core/framework/tensor_reference.cc
tensorflow/core/framework/tensor.cc
tensorflow/core/framework/shape_inference.cc
tensorflow/core/framework/resource_mgr.cc
tensorflow/core/framework/rendezvous.cc
tensorflow/core/framework/reader_op_kernel.cc
tensorflow/core/framework/partial_tensor_shape.cc
tensorflow/core/framework/op_segment.cc
tensorflow/core/framework/op_kernel.cc
tensorflow/core/framework/op_gen_lib.cc
tensorflow/core/framework/op_def_util.cc
tensorflow/core/framework/op_def_builder.cc
tensorflow/core/framework/op.cc
tensorflow/core/framework/node_def_util.cc
tensorflow/core/framework/node_def_builder.cc
tensorflow/core/framework/memory_types.cc
tensorflow/core/framework/lookup_interface.cc
tensorflow/core/framework/log_memory.cc
tensorflow/core/framework/load_library.cc
tensorflow/core/framework/kernel_def_builder.cc
tensorflow/core/framework/graph_def_util.cc
tensorflow/core/framework/function.cc
tensorflow/core/framework/fake_input.cc
tensorflow/core/framework/device_base.cc
tensorflow/core/framework/common_shape_fns.cc
tensorflow/core/framework/cancellation.cc
tensorflow/core/framework/bfloat16.cc
tensorflow/core/framework/attr_value_util.cc
tensorflow/core/framework/allocator.cc
tensorflow/core/common_runtime/threadpool_device_factory.cc
tensorflow/core/common_runtime/threadpool_device.cc
tensorflow/core/common_runtime/step_stats_collector.cc
tensorflow/core/common_runtime/simple_placer.cc
tensorflow/core/common_runtime/simple_graph_execution_state.cc
tensorflow/core/common_runtime/session_state.cc
tensorflow/core/common_runtime/session_options.cc
tensorflow/core/common_runtime/session_factory.cc
tensorflow/core/common_runtime/session.cc
tensorflow/core/common_runtime/rendezvous_mgr.cc
tensorflow/core/common_runtime/process_util.cc
tensorflow/core/common_runtime/memory_types.cc
tensorflow/core/common_runtime/local_device.cc
tensorflow/core/common_runtime/graph_optimizer.cc
tensorflow/core/common_runtime/gpu/gpu_tracer.cc
tensorflow/core/common_runtime/function.cc
tensorflow/core/common_runtime/executor.cc
tensorflow/core/common_runtime/direct_session.cc
tensorflow/core/common_runtime/device_set.cc
tensorflow/core/common_runtime/device_mgr.cc
tensorflow/core/common_runtime/device_factory.cc
tensorflow/core/common_runtime/device.cc
tensorflow/core/common_runtime/costmodel_manager.cc
tensorflow/core/common_runtime/copy_tensor.cc
tensorflow/core/common_runtime/constant_folding.cc
tensorflow/core/common_runtime/build_graph_options.cc
tensorflow/core/common_runtime/bfc_allocator.cc
tensorflow/core/common_runtime/allocator_retry.cc
tensorflow/core/client/tensor_c_api.cc

View File

@ -0,0 +1,124 @@
tensorflow/core/kernels/xent_op.cc
tensorflow/core/kernels/where_op.cc
tensorflow/core/kernels/variable_ops.cc
tensorflow/core/kernels/unpack_op.cc
tensorflow/core/kernels/transpose_op.cc
tensorflow/core/kernels/transpose_functor_cpu.cc
tensorflow/core/kernels/training_ops.cc
tensorflow/core/kernels/topk_op.cc
tensorflow/core/kernels/tile_ops.cc
tensorflow/core/kernels/strided_slice_op_inst_6.cc
tensorflow/core/kernels/strided_slice_op_inst_5.cc
tensorflow/core/kernels/strided_slice_op_inst_4.cc
tensorflow/core/kernels/strided_slice_op_inst_3.cc
tensorflow/core/kernels/strided_slice_op_inst_2.cc
tensorflow/core/kernels/strided_slice_op_inst_1.cc
tensorflow/core/kernels/strided_slice_op.cc
tensorflow/core/kernels/stack_ops.cc
tensorflow/core/kernels/split_op.cc
tensorflow/core/kernels/split_lib_cpu.cc
tensorflow/core/kernels/sparse_to_dense_op.cc
tensorflow/core/kernels/softsign_op.cc
tensorflow/core/kernels/softplus_op.cc
tensorflow/core/kernels/softmax_op.cc
tensorflow/core/kernels/slice_op.cc
tensorflow/core/kernels/shape_ops.cc
tensorflow/core/kernels/session_ops.cc
tensorflow/core/kernels/sequence_ops.cc
tensorflow/core/kernels/sendrecv_ops.cc
tensorflow/core/kernels/save_restore_tensor.cc
tensorflow/core/kernels/save_op.cc
tensorflow/core/kernels/reverse_sequence_op.cc
tensorflow/core/kernels/reverse_op.cc
tensorflow/core/kernels/restore_op.cc
tensorflow/core/kernels/resize_nearest_neighbor_op.cc
tensorflow/core/kernels/resize_bilinear_op.cc
tensorflow/core/kernels/reshape_op.cc
tensorflow/core/kernels/relu_op.cc
tensorflow/core/kernels/reduction_ops_sum.cc
tensorflow/core/kernels/reduction_ops_prod.cc
tensorflow/core/kernels/reduction_ops_min.cc
tensorflow/core/kernels/reduction_ops_mean.cc
tensorflow/core/kernels/reduction_ops_max.cc
tensorflow/core/kernels/reduction_ops_common.cc
tensorflow/core/kernels/pooling_ops_common.cc
tensorflow/core/kernels/pad_op.cc
tensorflow/core/kernels/pack_op.cc
tensorflow/core/kernels/ops_util.cc
tensorflow/core/kernels/no_op.cc
tensorflow/core/kernels/maxpooling_op.cc
tensorflow/core/kernels/matmul_op.cc
tensorflow/core/kernels/lrn_op.cc
tensorflow/core/kernels/in_topk_op.cc
tensorflow/core/kernels/immutable_constant_op.cc
tensorflow/core/kernels/identity_op.cc
tensorflow/core/kernels/gather_op.cc
tensorflow/core/kernels/fill_functor.cc
tensorflow/core/kernels/example_parsing_ops.cc
tensorflow/core/kernels/dynamic_stitch_op.cc
tensorflow/core/kernels/dynamic_partition_op.cc
tensorflow/core/kernels/dense_update_ops.cc
tensorflow/core/kernels/cwise_ops_common.cc
tensorflow/core/kernels/cwise_op_tanh.cc
tensorflow/core/kernels/cwise_op_sub.cc
tensorflow/core/kernels/cwise_op_squared_difference.cc
tensorflow/core/kernels/cwise_op_square.cc
tensorflow/core/kernels/cwise_op_sqrt.cc
tensorflow/core/kernels/cwise_op_sigmoid.cc
tensorflow/core/kernels/cwise_op_select.cc
tensorflow/core/kernels/cwise_op_rsqrt.cc
tensorflow/core/kernels/cwise_op_neg.cc
tensorflow/core/kernels/cwise_op_mul.cc
tensorflow/core/kernels/cwise_op_minimum.cc
tensorflow/core/kernels/cwise_op_maximum.cc
tensorflow/core/kernels/cwise_op_log.cc
tensorflow/core/kernels/cwise_op_less.cc
tensorflow/core/kernels/cwise_op_isfinite.cc
tensorflow/core/kernels/cwise_op_inverse.cc
tensorflow/core/kernels/cwise_op_greater.cc
tensorflow/core/kernels/cwise_op_exp.cc
tensorflow/core/kernels/cwise_op_equal_to.cc
tensorflow/core/kernels/cwise_op_div.cc
tensorflow/core/kernels/cwise_op_add.cc
tensorflow/core/kernels/ctc_decoder_ops.cc
tensorflow/core/kernels/conv_ops.cc
tensorflow/core/kernels/conv_grad_ops.cc
tensorflow/core/kernels/control_flow_ops.cc
tensorflow/core/kernels/constant_op.cc
tensorflow/core/kernels/concat_op.cc
tensorflow/core/kernels/concat_lib_cpu.cc
tensorflow/core/kernels/check_numerics_op.cc
tensorflow/core/kernels/cast_op.cc
tensorflow/core/kernels/bias_op.cc
tensorflow/core/kernels/bcast_ops.cc
tensorflow/core/kernels/batch_norm_op.cc
tensorflow/core/kernels/avgpooling_op.cc
tensorflow/core/kernels/argmax_op.cc
tensorflow/core/kernels/aggregate_ops.cc
tensorflow/core/ops/training_ops.cc
tensorflow/core/ops/string_ops.cc
tensorflow/core/ops/state_ops.cc
tensorflow/core/ops/sparse_ops.cc
tensorflow/core/ops/sendrecv_ops.cc
tensorflow/core/ops/script_ops.cc
tensorflow/core/ops/random_ops.cc
tensorflow/core/ops/random_grad.cc
tensorflow/core/ops/parsing_ops.cc
tensorflow/core/ops/no_op.cc
tensorflow/core/ops/nn_ops.cc
tensorflow/core/ops/nn_grad.cc
tensorflow/core/ops/math_ops.cc
tensorflow/core/ops/math_grad.cc
tensorflow/core/ops/logging_ops.cc
tensorflow/core/ops/linalg_ops.cc
tensorflow/core/ops/io_ops.cc
tensorflow/core/ops/image_ops.cc
tensorflow/core/ops/functional_ops.cc
tensorflow/core/ops/functional_grad.cc
tensorflow/core/ops/function_ops.cc
tensorflow/core/ops/data_flow_ops.cc
tensorflow/core/ops/ctc_ops.cc
tensorflow/core/ops/control_flow_ops.cc
tensorflow/core/ops/candidate_sampling_ops.cc
tensorflow/core/ops/array_ops.cc
tensorflow/core/ops/array_grad.cc

View File

@ -69,6 +69,8 @@ py_library(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":ops", ":ops",
"//tensorflow/contrib/quantization:quantized_ops_py",
"//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
], ],
) )

View File

@ -603,6 +603,7 @@ filegroup(
"graph/dot.*", "graph/dot.*",
"lib/jpeg/**/*", "lib/jpeg/**/*",
"lib/png/**/*", "lib/png/**/*",
"lib/gif/**/*",
"util/checkpoint_reader.*", "util/checkpoint_reader.*",
"util/events_writer.*", "util/events_writer.*",
"util/reporter.*", "util/reporter.*",
@ -613,6 +614,7 @@ filegroup(
"platform/google/**/*", "platform/google/**/*",
"platform/jpeg.*", "platform/jpeg.*",
"platform/png.*", "platform/png.*",
"platform/gif.*",
"platform/stream_executor.*", "platform/stream_executor.*",
"user_ops/**/*.cu.cc", "user_ops/**/*.cu.cc",
"common_runtime/gpu/**/*", "common_runtime/gpu/**/*",
@ -843,6 +845,7 @@ cc_library(
hdrs = [ hdrs = [
"lib/core/blocking_counter.h", "lib/core/blocking_counter.h",
"lib/core/refcount.h", "lib/core/refcount.h",
"lib/gif/gif_io.h",
"lib/gtl/edit_distance.h", "lib/gtl/edit_distance.h",
"lib/gtl/int_type.h", "lib/gtl/int_type.h",
"lib/gtl/iterator_range.h", "lib/gtl/iterator_range.h",
@ -1967,6 +1970,10 @@ filegroup(
"lib/jpeg/testdata/corrupt34_3.jpg", "lib/jpeg/testdata/corrupt34_3.jpg",
# -- hand-edited variant: stops after a restart marker # -- hand-edited variant: stops after a restart marker
"lib/jpeg/testdata/corrupt34_4.jpg", "lib/jpeg/testdata/corrupt34_4.jpg",
# GIF data
"lib/gif/testdata/scan.gif",
# GIF data with optimization
"lib/gif/testdata/optimized.gif",
], ],
) )

View File

@ -859,6 +859,7 @@ tf_kernel_libraries(
"crop_and_resize_op", "crop_and_resize_op",
"decode_jpeg_op", "decode_jpeg_op",
"decode_png_op", "decode_png_op",
"decode_gif_op",
"draw_bounding_box_op", "draw_bounding_box_op",
"encode_jpeg_op", "encode_jpeg_op",
"attention_ops", "attention_ops",
@ -1108,6 +1109,7 @@ tf_kernel_libraries(
"matmul_op", "matmul_op",
"reduction_ops", "reduction_ops",
"segment_reduction_ops", "segment_reduction_ops",
"scan_ops",
"sequence_ops", "sequence_ops",
"sparse_matmul_op", "sparse_matmul_op",
], ],
@ -2040,6 +2042,7 @@ filegroup(
"decode_png_op.*", "decode_png_op.*",
"encode_jpeg_op.*", "encode_jpeg_op.*",
"decode_jpeg_op.*", "decode_jpeg_op.*",
"decode_gif_op.*",
"identity_reader_op.*", "identity_reader_op.*",
"reader_base.*", "reader_base.*",
"fixed_length_record_reader_op.*", "fixed_length_record_reader_op.*",

View File

@ -36,7 +36,7 @@ namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::ThreadPoolDevice CPUDevice;
typedef Eigen::GpuDevice GPUDevice; typedef Eigen::GpuDevice GPUDevice;
template <typename Device> template <typename Device, typename T>
class RGBToHSVOp : public OpKernel { class RGBToHSVOp : public OpKernel {
public: public:
explicit RGBToHSVOp(OpKernelConstruction* context) : OpKernel(context) {} explicit RGBToHSVOp(OpKernelConstruction* context) : OpKernel(context) {}
@ -59,23 +59,23 @@ class RGBToHSVOp : public OpKernel {
// Make a canonical image, maintaining the last (channel) dimension, while // Make a canonical image, maintaining the last (channel) dimension, while
// flattening all others do give the functor easy to work with data. // flattening all others do give the functor easy to work with data.
TTypes<float, 2>::ConstTensor input_data = input.flat_inner_dims<float>(); typename TTypes<T, 2>::ConstTensor input_data = input.flat_inner_dims<T>();
TTypes<float, 2>::Tensor output_data = output->flat_inner_dims<float>(); typename TTypes<T, 2>::Tensor output_data = output->flat_inner_dims<T>();
Tensor trange; Tensor trange;
OP_REQUIRES_OK( OP_REQUIRES_OK(
context, context->allocate_temp(DataTypeToEnum<float>::value, context, context->allocate_temp(DataTypeToEnum<T>::value,
TensorShape({input_data.dimension(0)}), TensorShape({input_data.dimension(0)}),
&trange)); &trange));
TTypes<float, 1>::Tensor range = trange.tensor<float, 1>(); typename TTypes<T, 1>::Tensor range = trange.tensor<T, 1>();
functor::RGBToHSV<Device>()(context->eigen_device<Device>(), input_data, functor::RGBToHSV<Device, T>()(context->eigen_device<Device>(), input_data,
range, output_data); range, output_data);
} }
}; };
template <typename Device> template <typename Device, typename T>
class HSVToRGBOp : public OpKernel { class HSVToRGBOp : public OpKernel {
public: public:
explicit HSVToRGBOp(OpKernelConstruction* context) : OpKernel(context) {} explicit HSVToRGBOp(OpKernelConstruction* context) : OpKernel(context) {}
@ -96,41 +96,54 @@ class HSVToRGBOp : public OpKernel {
OP_REQUIRES_OK(context, OP_REQUIRES_OK(context,
context->allocate_output(0, input.shape(), &output)); context->allocate_output(0, input.shape(), &output));
TTypes<float, 2>::ConstTensor input_data = input.flat_inner_dims<float>(); typename TTypes<T, 2>::ConstTensor input_data = input.flat_inner_dims<T>();
TTypes<float, 2>::Tensor output_data = output->flat_inner_dims<float>(); typename TTypes<T, 2>::Tensor output_data = output->flat_inner_dims<T>();
functor::HSVToRGB<Device>()(context->eigen_device<Device>(), input_data, functor::HSVToRGB<Device, T>()(context->eigen_device<Device>(), input_data,
output_data); output_data);
} }
}; };
REGISTER_KERNEL_BUILDER(Name("RGBToHSV").Device(DEVICE_CPU), #define REGISTER_CPU(T) \
RGBToHSVOp<CPUDevice>); REGISTER_KERNEL_BUILDER(Name("RGBToHSV").Device(DEVICE_CPU) \
template class RGBToHSVOp<CPUDevice>; .TypeConstraint<T>("T"), \
REGISTER_KERNEL_BUILDER(Name("HSVToRGB").Device(DEVICE_CPU), RGBToHSVOp<CPUDevice, T>); \
HSVToRGBOp<CPUDevice>); template class RGBToHSVOp<CPUDevice, T>; \
template class HSVToRGBOp<CPUDevice>; REGISTER_KERNEL_BUILDER(Name("HSVToRGB").Device(DEVICE_CPU) \
.TypeConstraint<T>("T"), \
HSVToRGBOp<CPUDevice, T>); \
template class HSVToRGBOp<CPUDevice, T>;
TF_CALL_float(REGISTER_CPU);
TF_CALL_double(REGISTER_CPU);
#if GOOGLE_CUDA #if GOOGLE_CUDA
// Forward declarations of the function specializations for GPU (to prevent // Forward declarations of the function specializations for GPU (to prevent
// building the GPU versions here, they will be built compiling _gpu.cu.cc). // building the GPU versions here, they will be built compiling _gpu.cu.cc).
namespace functor { namespace functor {
template <> #define DECLARE_GPU(T) \
void RGBToHSV<GPUDevice>::operator()(const GPUDevice& d, template <> \
TTypes<float, 2>::ConstTensor input_data, void RGBToHSV<GPUDevice, T>::operator()(const GPUDevice& d, \
TTypes<float, 1>::Tensor range, TTypes<T, 2>::ConstTensor input_data, \
TTypes<float, 2>::Tensor output_data); TTypes<T, 1>::Tensor range, \
extern template struct RGBToHSV<GPUDevice>; TTypes<T, 2>::Tensor output_data); \
template <> extern template struct RGBToHSV<GPUDevice, T>; \
void HSVToRGB<GPUDevice>::operator()(const GPUDevice& d, template <> \
TTypes<float, 2>::ConstTensor input_data, void HSVToRGB<GPUDevice, T>::operator()(const GPUDevice& d, \
TTypes<float, 2>::Tensor output_data); TTypes<T, 2>::ConstTensor input_data, \
extern template struct HSVToRGB<GPUDevice>; TTypes<T, 2>::Tensor output_data); \
extern template struct HSVToRGB<GPUDevice, T>;
TF_CALL_float(DECLARE_GPU);
TF_CALL_double(DECLARE_GPU);
} // namespace functor } // namespace functor
REGISTER_KERNEL_BUILDER(Name("RGBToHSV").Device(DEVICE_GPU), #define REGISTER_GPU(T) \
RGBToHSVOp<GPUDevice>); REGISTER_KERNEL_BUILDER(Name("RGBToHSV").Device(DEVICE_GPU) \
REGISTER_KERNEL_BUILDER(Name("HSVToRGB").Device(DEVICE_GPU), .TypeConstraint<T>("T"), \
HSVToRGBOp<GPUDevice>); RGBToHSVOp<GPUDevice, T>); \
REGISTER_KERNEL_BUILDER(Name("HSVToRGB").Device(DEVICE_GPU) \
.TypeConstraint<T>("T"), \
HSVToRGBOp<GPUDevice, T>);
TF_CALL_float(REGISTER_GPU);
TF_CALL_double(REGISTER_GPU);
#endif #endif
} // namespace tensorflow } // namespace tensorflow

View File

@ -24,18 +24,19 @@ namespace tensorflow {
namespace functor { namespace functor {
template <typename Device> template <typename Device, typename T>
struct RGBToHSV { struct RGBToHSV {
void operator()(const Device &d, TTypes<float, 2>::ConstTensor input_data, void operator()(const Device &d,
TTypes<float, 1>::Tensor range, typename TTypes<T, 2>::ConstTensor input_data,
TTypes<float, 2>::Tensor output_data) { typename TTypes<T, 1>::Tensor range,
auto H = output_data.chip<1>(0); typename TTypes<T, 2>::Tensor output_data) {
auto S = output_data.chip<1>(1); auto H = output_data.template chip<1>(0);
auto V = output_data.chip<1>(2); auto S = output_data.template chip<1>(1);
auto V = output_data.template chip<1>(2);
auto R = input_data.chip<1>(0); auto R = input_data.template chip<1>(0);
auto G = input_data.chip<1>(1); auto G = input_data.template chip<1>(1);
auto B = input_data.chip<1>(2); auto B = input_data.template chip<1>(2);
#if !defined(EIGEN_HAS_INDEX_LIST) #if !defined(EIGEN_HAS_INDEX_LIST)
Eigen::array<int, 1> channel_axis{{1}}; Eigen::array<int, 1> channel_axis{{1}};
@ -47,38 +48,40 @@ struct RGBToHSV {
range.device(d) = V - input_data.minimum(channel_axis); range.device(d) = V - input_data.minimum(channel_axis);
S.device(d) = (V > 0.f).select(range / V, V.constant(0.f)); S.device(d) = (V > T(0)).select(range / V, V.constant(T(0)));
auto norm = range.inverse() * (1.f / 6.f); auto norm = range.inverse() * (T(1) / T(6));
// TODO(wicke): all these assignments are only necessary because a combined // TODO(wicke): all these assignments are only necessary because a combined
// expression is larger than kernel parameter space. A custom kernel is // expression is larger than kernel parameter space. A custom kernel is
// probably in order. // probably in order.
H.device(d) = (R == V).select(norm * (G - B), H.device(d) = (R == V).select(norm * (G - B),
(G == V).select(norm * (B - R) + 2.f / 6.f, (G == V).select(
norm * (R - G) + 4.f / 6.f)); norm * (B - R) + T(2) / T(6),
H.device(d) = (range > 0.f).select(H, H.constant(0.f)); norm * (R - G) + T(4) / T(6)));
H.device(d) = (H < 0.f).select(H + 1.f, H); H.device(d) = (range > T(0)).select(H, H.constant(T(0)));
H.device(d) = (H < T(0)).select(H + T(1), H);
} }
}; };
template <typename Device> template <typename Device, typename T>
struct HSVToRGB { struct HSVToRGB {
void operator()(const Device &d, TTypes<float, 2>::ConstTensor input_data, void operator()(const Device &d,
TTypes<float, 2>::Tensor output_data) { typename TTypes<T, 2>::ConstTensor input_data,
auto H = input_data.chip<1>(0); typename TTypes<T, 2>::Tensor output_data) {
auto S = input_data.chip<1>(1); auto H = input_data.template chip<1>(0);
auto V = input_data.chip<1>(2); auto S = input_data.template chip<1>(1);
auto V = input_data.template chip<1>(2);
// TODO(wicke): compute only the fractional part of H for robustness // TODO(wicke): compute only the fractional part of H for robustness
auto dh = H * 6.f; auto dh = H * T(6);
auto dr = ((dh - 3.f).abs() - 1.f).cwiseMax(0.f).cwiseMin(1.f); auto dr = ((dh - T(3)).abs() - T(1)).cwiseMax(T(0)).cwiseMin(T(1));
auto dg = (-(dh - 2.f).abs() + 2.f).cwiseMax(0.f).cwiseMin(1.f); auto dg = (-(dh - T(2)).abs() + T(2)).cwiseMax(T(0)).cwiseMin(T(1));
auto db = (-(dh - 4.f).abs() + 2.f).cwiseMax(0.f).cwiseMin(1.f); auto db = (-(dh - T(4)).abs() + T(2)).cwiseMax(T(0)).cwiseMin(T(1));
auto one_s = -S + 1.f; auto one_s = -S + T(1);
auto R = output_data.chip<1>(0); auto R = output_data.template chip<1>(0);
auto G = output_data.chip<1>(1); auto G = output_data.template chip<1>(1);
auto B = output_data.chip<1>(2); auto B = output_data.template chip<1>(2);
R.device(d) = (one_s + S * dr) * V; R.device(d) = (one_s + S * dr) * V;
G.device(d) = (one_s + S * dg) * V; G.device(d) = (one_s + S * dg) * V;

View File

@ -24,8 +24,11 @@ namespace tensorflow {
typedef Eigen::GpuDevice GPUDevice; typedef Eigen::GpuDevice GPUDevice;
template class functor::RGBToHSV<GPUDevice>; #define INSTANTIATE_GPU(T) \
template class functor::HSVToRGB<GPUDevice>; template class functor::RGBToHSV<GPUDevice, T>; \
template class functor::HSVToRGB<GPUDevice, T>;
TF_CALL_float(INSTANTIATE_GPU);
TF_CALL_double(INSTANTIATE_GPU);
} }
#endif // GOOGLE_CUDA #endif // GOOGLE_CUDA

View File

@ -29,183 +29,241 @@ limitations under the License.
namespace tensorflow { namespace tensorflow {
template <typename T>
class RGBToHSVOpTest : public OpsTestBase { class RGBToHSVOpTest : public OpsTestBase {
protected: protected:
RGBToHSVOpTest() { void MakeOp(DataType data_type) {
TF_EXPECT_OK(NodeDefBuilder("rgb_to_hsv_op", "RGBToHSV") TF_EXPECT_OK(NodeDefBuilder("rgb_to_hsv_op", "RGBToHSV")
.Input(FakeInput(DT_FLOAT)) .Input(FakeInput(data_type))
.Finalize(node_def())); .Finalize(node_def()));
TF_EXPECT_OK(InitOp()); TF_EXPECT_OK(InitOp());
} }
void CheckBlack(DataType data_type) {
// Black pixel should map to hsv = [0,0,0]
AddInputFromArray<T>(TensorShape({3}), {0, 0, 0});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {0.0, 0.0, 0.0});
test::ExpectTensorEqual<T>(expected, *GetOutput(0));
}
void CheckGray(DataType data_type) {
// Gray pixel should have hue = saturation = 0.0, value = r/255
AddInputFromArray<T>(TensorShape({3}), {.5, .5, .5});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {0.0, 0.0, .5});
test::ExpectTensorEqual<T>(expected, *GetOutput(0));
}
void CheckWhite(DataType data_type) {
// Gray pixel should have hue = saturation = 0.0, value = 1.0
AddInputFromArray<T>(TensorShape({3}), {1, 1, 1});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {0.0, 0.0, 1.0});
test::ExpectTensorEqual<T>(expected, *GetOutput(0));
}
void CheckRedMax(DataType data_type) {
// Test case where red channel dominates
AddInputFromArray<T>(TensorShape({3}), {.8, .4, .2});
TF_ASSERT_OK(RunOpKernel());
T expected_h = 1. / 6. * .2 / .6;
T expected_s = .6 / .8;
T expected_v = .8 / 1.;
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {expected_h, expected_s, expected_v});
test::ExpectTensorNear<T>(expected, *GetOutput(0), 1e-6);
}
void CheckGreenMax(DataType data_type) {
// Test case where green channel dominates
AddInputFromArray<T>(TensorShape({3}), {.2, .8, .4});
TF_ASSERT_OK(RunOpKernel());
T expected_h = 1. / 6. * (2.0 + (.2 / .6));
T expected_s = .6 / .8;
T expected_v = .8 / 1.;
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {expected_h, expected_s, expected_v});
test::ExpectTensorNear<T>(expected, *GetOutput(0), 1e-6);
}
void CheckBlueMax(DataType data_type) {
// Test case where blue channel dominates
AddInputFromArray<T>(TensorShape({3}), {.4, .2, .8});
TF_ASSERT_OK(RunOpKernel());
T expected_h = 1. / 6. * (4.0 + (.2 / .6));
T expected_s = .6 / .8;
T expected_v = .8 / 1.;
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {expected_h, expected_s, expected_v});
test::ExpectTensorNear<T>(expected, *GetOutput(0), 1e-6);
}
void CheckNegativeDifference(DataType data_type) {
AddInputFromArray<T>(TensorShape({3}), {0, .1, .2});
TF_ASSERT_OK(RunOpKernel());
T expected_h = 1. / 6. * (4.0 + (-.1 / .2));
T expected_s = .2 / .2;
T expected_v = .2 / 1.;
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {expected_h, expected_s, expected_v});
test::ExpectTensorNear<T>(expected, *GetOutput(0), 1e-6);
}
}; };
TEST_F(RGBToHSVOpTest, CheckBlack) { template <typename T>
// Black pixel should map to hsv = [0,0,0]
AddInputFromArray<float>(TensorShape({3}), {0, 0, 0});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), DT_FLOAT, TensorShape({3}));
test::FillValues<float>(&expected, {0.0, 0.0, 0.0});
test::ExpectTensorEqual<float>(expected, *GetOutput(0));
}
TEST_F(RGBToHSVOpTest, CheckGray) {
// Gray pixel should have hue = saturation = 0.0, value = r/255
AddInputFromArray<float>(TensorShape({3}), {.5, .5, .5});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), DT_FLOAT, TensorShape({3}));
test::FillValues<float>(&expected, {0.0, 0.0, .5});
test::ExpectTensorEqual<float>(expected, *GetOutput(0));
}
TEST_F(RGBToHSVOpTest, CheckWhite) {
// Gray pixel should have hue = saturation = 0.0, value = 1.0
AddInputFromArray<float>(TensorShape({3}), {1, 1, 1});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), DT_FLOAT, TensorShape({3}));
test::FillValues<float>(&expected, {0.0, 0.0, 1.0});
test::ExpectTensorEqual<float>(expected, *GetOutput(0));
}
TEST_F(RGBToHSVOpTest, CheckRedMax) {
// Test case where red channel dominates
AddInputFromArray<float>(TensorShape({3}), {.8, .4, .2});
TF_ASSERT_OK(RunOpKernel());
float expected_h = 1. / 6. * .2 / .6;
float expected_s = .6 / .8;
float expected_v = .8 / 1.;
Tensor expected(allocator(), DT_FLOAT, TensorShape({3}));
test::FillValues<float>(&expected, {expected_h, expected_s, expected_v});
test::ExpectTensorNear<float>(expected, *GetOutput(0), 1e-6);
}
TEST_F(RGBToHSVOpTest, CheckGreenMax) {
// Test case where green channel dominates
AddInputFromArray<float>(TensorShape({3}), {.2, .8, .4});
TF_ASSERT_OK(RunOpKernel());
float expected_h = 1. / 6. * (2.0 + (.2 / .6));
float expected_s = .6 / .8;
float expected_v = .8 / 1.;
Tensor expected(allocator(), DT_FLOAT, TensorShape({3}));
test::FillValues<float>(&expected, {expected_h, expected_s, expected_v});
test::ExpectTensorNear<float>(expected, *GetOutput(0), 1e-6);
}
TEST_F(RGBToHSVOpTest, CheckBlueMax) {
// Test case where blue channel dominates
AddInputFromArray<float>(TensorShape({3}), {.4, .2, .8});
TF_ASSERT_OK(RunOpKernel());
float expected_h = 1. / 6. * (4.0 + (.2 / .6));
float expected_s = .6 / .8;
float expected_v = .8 / 1.;
Tensor expected(allocator(), DT_FLOAT, TensorShape({3}));
test::FillValues<float>(&expected, {expected_h, expected_s, expected_v});
test::ExpectTensorNear<float>(expected, *GetOutput(0), 1e-6);
}
TEST_F(RGBToHSVOpTest, CheckNegativeDifference) {
AddInputFromArray<float>(TensorShape({3}), {0, .1, .2});
TF_ASSERT_OK(RunOpKernel());
float expected_h = 1. / 6. * (4.0 + (-.1 / .2));
float expected_s = .2 / .2;
float expected_v = .2 / 1.;
Tensor expected(allocator(), DT_FLOAT, TensorShape({3}));
test::FillValues<float>(&expected, {expected_h, expected_s, expected_v});
test::ExpectTensorNear<float>(expected, *GetOutput(0), 1e-6);
}
class HSVToRGBOpTest : public OpsTestBase { class HSVToRGBOpTest : public OpsTestBase {
protected: protected:
HSVToRGBOpTest() { void MakeOp(DataType data_type) {
TF_EXPECT_OK(NodeDefBuilder("hsv_to_rgb_op", "HSVToRGB") TF_EXPECT_OK(NodeDefBuilder("hsv_to_rgb_op", "HSVToRGB")
.Input(FakeInput(DT_FLOAT)) .Input(FakeInput(data_type))
.Finalize(node_def())); .Finalize(node_def()));
TF_EXPECT_OK(InitOp()); TF_EXPECT_OK(InitOp());
} }
void CheckBlack(DataType data_type) {
// Black pixel should map to rgb = [0,0,0]
AddInputFromArray<T>(TensorShape({3}), {0.0, 0.0, 0.0});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {0, 0, 0});
test::ExpectTensorEqual<T>(expected, *GetOutput(0));
}
void CheckGray(DataType data_type) {
// Gray pixel should have hue = saturation = 0.0, value = r/255
AddInputFromArray<T>(TensorShape({3}), {0.0, 0.0, .5});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {.5, .5, .5});
test::ExpectTensorEqual<T>(expected, *GetOutput(0));
}
void CheckWhite(DataType data_type) {
// Gray pixel should have hue = saturation = 0.0, value = 1.0
AddInputFromArray<T>(TensorShape({3}), {0.0, 0.0, 1.0});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {1, 1, 1});
test::ExpectTensorEqual<T>(expected, *GetOutput(0));
}
void CheckRedMax(DataType data_type) {
// Test case where red channel dominates
T expected_h = 1. / 6. * .2 / .6;
T expected_s = .6 / .8;
T expected_v = .8 / 1.;
AddInputFromArray<T>(TensorShape({3}),
{expected_h, expected_s, expected_v});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {.8, .4, .2});
test::ExpectTensorNear<T>(expected, *GetOutput(0), 1e-6);
}
void CheckGreenMax(DataType data_type) {
// Test case where green channel dominates
T expected_h = 1. / 6. * (2.0 + (.2 / .6));
T expected_s = .6 / .8;
T expected_v = .8 / 1.;
AddInputFromArray<T>(TensorShape({3}),
{expected_h, expected_s, expected_v});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {.2, .8, .4});
test::ExpectTensorNear<T>(expected, *GetOutput(0), 1e-6);
}
void CheckBlueMax(DataType data_type) {
// Test case where blue channel dominates
T expected_h = 1. / 6. * (4.0 + (.2 / .6));
T expected_s = .6 / .8;
T expected_v = .8 / 1.0;
AddInputFromArray<T>(TensorShape({3}),
{expected_h, expected_s, expected_v});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {.4, .2, .8});
test::ExpectTensorNear<T>(expected, *GetOutput(0), 1e-6);
}
void CheckNegativeDifference(DataType data_type) {
T expected_h = 1. / 6. * (4.0 + (-.1 / .2));
T expected_s = .2 / .2;
T expected_v = .2 / 1.;
AddInputFromArray<T>(TensorShape({3}),
{expected_h, expected_s, expected_v});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), data_type, TensorShape({3}));
test::FillValues<T>(&expected, {0, .1, .2});
test::ExpectTensorNear<T>(expected, *GetOutput(0), 1e-6);
}
}; };
TEST_F(HSVToRGBOpTest, CheckBlack) { #define TEST_COLORSPACE(test, dt) \
// Black pixel should map to rgb = [0,0,0] TEST_F(test, CheckBlack) { \
AddInputFromArray<float>(TensorShape({3}), {0.0, 0.0, 0.0}); MakeOp(dt); \
TF_ASSERT_OK(RunOpKernel()); CheckBlack(dt); \
} \
Tensor expected(allocator(), DT_FLOAT, TensorShape({3})); TEST_F(test, CheckGray) { \
test::FillValues<float>(&expected, {0, 0, 0}); MakeOp(dt); \
test::ExpectTensorEqual<float>(expected, *GetOutput(0)); CheckGray(dt); \
} \
TEST_F(test, CheckWhite) { \
MakeOp(dt); \
CheckWhite(dt); \
} \
TEST_F(test, CheckRedMax) { \
MakeOp(dt); \
CheckRedMax(dt); \
} \
TEST_F(test, CheckGreenMax) { \
MakeOp(dt); \
CheckGreenMax(dt); \
} \
TEST_F(test, CheckBlueMax) { \
MakeOp(dt); \
CheckBlueMax(dt); \
} \
TEST_F(test, CheckNegativeDifference) { \
MakeOp(dt); \
CheckNegativeDifference(dt); \
} }
TEST_F(HSVToRGBOpTest, CheckGray) { typedef RGBToHSVOpTest<float> rgb_to_hsv_float;
// Gray pixel should have hue = saturation = 0.0, value = r/255 typedef RGBToHSVOpTest<double> rgb_to_hsv_double;
AddInputFromArray<float>(TensorShape({3}), {0.0, 0.0, .5});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), DT_FLOAT, TensorShape({3})); TEST_COLORSPACE(rgb_to_hsv_float, DT_FLOAT);
test::FillValues<float>(&expected, {.5, .5, .5}); TEST_COLORSPACE(rgb_to_hsv_double, DT_DOUBLE);
test::ExpectTensorEqual<float>(expected, *GetOutput(0));
}
TEST_F(HSVToRGBOpTest, CheckWhite) { typedef HSVToRGBOpTest<float> hsv_to_rgb_float;
// Gray pixel should have hue = saturation = 0.0, value = 1.0 typedef HSVToRGBOpTest<double> hsv_to_rgb_double;
AddInputFromArray<float>(TensorShape({3}), {0.0, 0.0, 1.0});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), DT_FLOAT, TensorShape({3})); TEST_COLORSPACE(hsv_to_rgb_float, DT_FLOAT);
test::FillValues<float>(&expected, {1, 1, 1}); TEST_COLORSPACE(hsv_to_rgb_double, DT_DOUBLE);
test::ExpectTensorEqual<float>(expected, *GetOutput(0));
}
TEST_F(HSVToRGBOpTest, CheckRedMax) {
// Test case where red channel dominates
float expected_h = 1. / 6. * .2 / .6;
float expected_s = .6 / .8;
float expected_v = .8 / 1.;
AddInputFromArray<float>(TensorShape({3}),
{expected_h, expected_s, expected_v});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), DT_FLOAT, TensorShape({3}));
test::FillValues<float>(&expected, {.8, .4, .2});
test::ExpectTensorNear<float>(expected, *GetOutput(0), 1e-6);
}
TEST_F(HSVToRGBOpTest, CheckGreenMax) {
// Test case where green channel dominates
float expected_h = 1. / 6. * (2.0 + (.2 / .6));
float expected_s = .6 / .8;
float expected_v = .8 / 1.;
AddInputFromArray<float>(TensorShape({3}),
{expected_h, expected_s, expected_v});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), DT_FLOAT, TensorShape({3}));
test::FillValues<float>(&expected, {.2, .8, .4});
test::ExpectTensorNear<float>(expected, *GetOutput(0), 1e-6);
}
TEST_F(HSVToRGBOpTest, CheckBlueMax) {
// Test case where blue channel dominates
float expected_h = 1. / 6. * (4.0 + (.2 / .6));
float expected_s = .6 / .8;
float expected_v = .8 / 1.0;
AddInputFromArray<float>(TensorShape({3}),
{expected_h, expected_s, expected_v});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), DT_FLOAT, TensorShape({3}));
test::FillValues<float>(&expected, {.4, .2, .8});
test::ExpectTensorNear<float>(expected, *GetOutput(0), 1e-6);
}
} // namespace tensorflow } // namespace tensorflow

View File

@ -65,7 +65,7 @@ class BinaryOpShared : public OpKernel {
// Coefficient-wise binary operations: // Coefficient-wise binary operations:
// Device: E.g., CPUDevice, GPUDevice. // Device: E.g., CPUDevice, GPUDevice.
// Functor: defined in cwise_functors.h. E.g., functor::add2. // Functor: defined in cwise_ops.h. E.g., functor::add.
template <typename Device, typename Functor> template <typename Device, typename Functor>
class BinaryOp : public BinaryOpShared { class BinaryOp : public BinaryOpShared {
public: public:
@ -162,7 +162,7 @@ class SimpleBinaryOp : public OpKernel {
// Coefficient-wise unary operations: // Coefficient-wise unary operations:
// Device: E.g., CPUDevice, GPUDevice. // Device: E.g., CPUDevice, GPUDevice.
// Functor: defined in cwise_functors.h. E.g., functor::sqrt. // Functor: defined in cwise_ops.h. E.g., functor::sqrt.
template <typename Device, typename Functor> template <typename Device, typename Functor>
class UnaryOp : public OpKernel { class UnaryOp : public OpKernel {
public: public:

View File

@ -0,0 +1,66 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// See docs in ../ops/image_ops.cc
#include <memory>
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/gif/gif_io.h"
#include "tensorflow/core/platform/logging.h"
namespace tensorflow {
// Decode the contents of a GIF file
class DecodeGifOp : public OpKernel {
public:
explicit DecodeGifOp(OpKernelConstruction* context) : OpKernel(context) {}
void Compute(OpKernelContext* context) override {
const Tensor& contents = context->input(0);
OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents.shape()),
errors::InvalidArgument("contents must be scalar, got shape ",
contents.shape().DebugString()));
// Start decoding image to get shape details
const StringPiece input = contents.scalar<string>()();
// Decode image, allocating tensor once the image size is known
Tensor* output = nullptr;
OP_REQUIRES(
context,
gif::Decode(input.data(), input.size(),
[=, &output](int num_frames, int width, int height,
int channels) -> uint8* {
Status status(context->allocate_output(
0, TensorShape({num_frames, height, width, channels}),
&output));
if (!status.ok()) {
VLOG(1) << status;
context->SetStatus(status);
return nullptr;
}
return output->flat<uint8>().data();
}),
errors::InvalidArgument("Invalid GIF data, size ", input.size()));
}
};
REGISTER_KERNEL_BUILDER(Name("DecodeGif").Device(DEVICE_CPU), DecodeGifOp);
} // namespace tensorflow

View File

@ -97,13 +97,7 @@ class ReverseOp : public OpKernel {
.HostMemory("dims"), \ .HostMemory("dims"), \
ReverseOp<CPUDevice, T>) ReverseOp<CPUDevice, T>)
TF_CALL_uint8(REGISTER_KERNEL); TF_CALL_POD_TYPES(REGISTER_KERNEL);
TF_CALL_int8(REGISTER_KERNEL);
TF_CALL_int32(REGISTER_KERNEL);
TF_CALL_bool(REGISTER_KERNEL);
TF_CALL_half(REGISTER_KERNEL);
TF_CALL_float(REGISTER_KERNEL);
TF_CALL_double(REGISTER_KERNEL);
#undef REGISTER_KERNEL #undef REGISTER_KERNEL
#if GOOGLE_CUDA #if GOOGLE_CUDA
@ -136,6 +130,8 @@ TF_CALL_bool(DECLARE_GPU_SPEC);
TF_CALL_half(DECLARE_GPU_SPEC); TF_CALL_half(DECLARE_GPU_SPEC);
TF_CALL_float(DECLARE_GPU_SPEC); TF_CALL_float(DECLARE_GPU_SPEC);
TF_CALL_double(DECLARE_GPU_SPEC); TF_CALL_double(DECLARE_GPU_SPEC);
TF_CALL_complex64(DECLARE_GPU_SPEC);
TF_CALL_complex128(DECLARE_GPU_SPEC);
#undef DECLARE_GPU_SPEC #undef DECLARE_GPU_SPEC
#undef DECLARE_GPU_SPEC_DIM #undef DECLARE_GPU_SPEC_DIM
} // namespace functor } // namespace functor
@ -149,9 +145,15 @@ TF_CALL_double(DECLARE_GPU_SPEC);
ReverseOp<GPUDevice, T>) ReverseOp<GPUDevice, T>)
TF_CALL_uint8(REGISTER_GPU_KERNEL); TF_CALL_uint8(REGISTER_GPU_KERNEL);
TF_CALL_int8(REGISTER_GPU_KERNEL); TF_CALL_int8(REGISTER_GPU_KERNEL);
// TODO Find out why the int32 GPU kernel doesn't work
// and decide whether we want to enable the bool kernel.
//TF_CALL_int32(REGISTER_GPU_KERNEL);
//TF_CALL_bool(REGISTER_GPU_KERNEL);
TF_CALL_half(REGISTER_GPU_KERNEL); TF_CALL_half(REGISTER_GPU_KERNEL);
TF_CALL_float(REGISTER_GPU_KERNEL); TF_CALL_float(REGISTER_GPU_KERNEL);
TF_CALL_double(REGISTER_GPU_KERNEL); TF_CALL_double(REGISTER_GPU_KERNEL);
TF_CALL_complex64(REGISTER_GPU_KERNEL);
TF_CALL_complex128(REGISTER_GPU_KERNEL);
#undef REGISTER_GPU_KERNEL #undef REGISTER_GPU_KERNEL
#endif // GOOGLE_CUDA #endif // GOOGLE_CUDA

View File

@ -25,24 +25,30 @@ namespace tensorflow {
typedef Eigen::GpuDevice GPUDevice; typedef Eigen::GpuDevice GPUDevice;
#define DEFINE_REVERSE(DIM) \ #define DEFINE_REVERSE(T, DIM) \
template struct functor::Reverse<GPUDevice, uint8, DIM>; \ template struct functor::Reverse<GPUDevice, T, DIM>;
template struct functor::Reverse<GPUDevice, int8, DIM>; \ #define DEFINE_REVERSE_ALL_DIMS(T) \
template struct functor::Reverse<GPUDevice, int32, DIM>; \ DEFINE_REVERSE(T, 0) \
template struct functor::Reverse<GPUDevice, bool, DIM>; \ DEFINE_REVERSE(T, 1) \
template struct functor::Reverse<GPUDevice, Eigen::half, DIM>; \ DEFINE_REVERSE(T, 2) \
template struct functor::Reverse<GPUDevice, float, DIM>; \ DEFINE_REVERSE(T, 3) \
template struct functor::Reverse<GPUDevice, double, DIM>; DEFINE_REVERSE(T, 4) \
DEFINE_REVERSE(0) DEFINE_REVERSE(T, 5) \
DEFINE_REVERSE(1) DEFINE_REVERSE(T, 6) \
DEFINE_REVERSE(2) DEFINE_REVERSE(T, 7) \
DEFINE_REVERSE(3) DEFINE_REVERSE(T, 8)
DEFINE_REVERSE(4)
DEFINE_REVERSE(5) TF_CALL_uint8(DEFINE_REVERSE_ALL_DIMS);
DEFINE_REVERSE(6) TF_CALL_int8(DEFINE_REVERSE_ALL_DIMS);
DEFINE_REVERSE(7) TF_CALL_int32(DEFINE_REVERSE_ALL_DIMS);
DEFINE_REVERSE(8) TF_CALL_bool(DEFINE_REVERSE_ALL_DIMS);
TF_CALL_half(DEFINE_REVERSE_ALL_DIMS);
TF_CALL_float(DEFINE_REVERSE_ALL_DIMS);
TF_CALL_double(DEFINE_REVERSE_ALL_DIMS);
TF_CALL_complex64(DEFINE_REVERSE_ALL_DIMS);
TF_CALL_complex128(DEFINE_REVERSE_ALL_DIMS);
#undef DEFINE_REVERSE #undef DEFINE_REVERSE
#undef DEFINE_REVERSE_ALL_DIMS
} // namespace tensorflow } // namespace tensorflow

View File

@ -0,0 +1,177 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#define EIGEN_USE_THREADS
#if GOOGLE_CUDA
#define EIGEN_USE_GPU
#endif // GOOGLE_CUDA
#include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/kernels/bounds_check.h"
#include "third_party/eigen3/Eigen/Core"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/kernels/scan_ops.h"
namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
typedef Eigen::GpuDevice GPUDevice;
template <typename Device, class T, typename Reducer>
class ScanOp : public OpKernel {
public:
explicit ScanOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
OP_REQUIRES_OK(ctx, ctx->GetAttr("reverse", &reverse_));
OP_REQUIRES_OK(ctx, ctx->GetAttr("exclusive", &exclusive_));
}
void Compute(OpKernelContext* ctx) override {
const Tensor& input = ctx->input(0);
const Tensor& tensor_axis = ctx->input(1);
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(tensor_axis.shape()),
errors::InvalidArgument("ScanOp: axis must be a scalar, not ",
tensor_axis.shape().DebugString()));
const int axis = internal::SubtleMustCopy(tensor_axis.scalar<int>()());
OP_REQUIRES(
ctx, FastBoundsCheck(axis, input.dims()),
errors::InvalidArgument("ScanOp: Expected scan axis in the range [", 0,
", ", input.dims(), "), but got ", axis));
TensorShape output_shape = input.shape();
Tensor* output = nullptr;
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output));
const Device& d = ctx->eigen_device<Device>();
Reducer reducer;
#define HANDLE_SCAN(NDIMS) \
case NDIMS: \
functor::Scan<Device, Reducer, T, NDIMS>()( \
d, input.tensor<T, NDIMS>(), output->tensor<T, NDIMS>(), reducer, \
axis, reverse_, exclusive_); \
return;
switch (input.dims()) {
// input.dims() == 0 can't occur as there
// is no valid axis parameter in this case
HANDLE_SCAN(1);
HANDLE_SCAN(2);
HANDLE_SCAN(3);
HANDLE_SCAN(4);
HANDLE_SCAN(5);
HANDLE_SCAN(6);
HANDLE_SCAN(7);
HANDLE_SCAN(8);
default:
OP_REQUIRES(ctx, false, errors::InvalidArgument(
"Scan does not support tensors with "
"more than 8 dimensions",
input.dims()));
}
#undef HANDLE_SCAN
}
private:
bool reverse_;
bool exclusive_;
};
#ifdef GOOGLE_CUDA
namespace functor {
// Forward declarations of GPU functors
#define DECLARE(REDUCER, T, D) \
template <> \
void Scan<GPUDevice, REDUCER, T, D>::operator()( \
const GPUDevice& d, TTypes<T, D>::ConstTensor in, \
TTypes<T, D>::Tensor out, const REDUCER& reducer, \
const Eigen::Index& axis, const bool reverse, const bool exclusive); \
extern template struct Scan<GPUDevice, REDUCER, T, D>;
#define DECLARE_FOR_ALL_DIMS(REDUCER, T) \
DECLARE(REDUCER, T, 1); \
DECLARE(REDUCER, T, 2); \
DECLARE(REDUCER, T, 3); \
DECLARE(REDUCER, T, 4); \
DECLARE(REDUCER, T, 5); \
DECLARE(REDUCER, T, 6); \
DECLARE(REDUCER, T, 7); \
DECLARE(REDUCER, T, 8);
#define DECLARE_FOR_ALL_REDUCERS(T) \
DECLARE_FOR_ALL_DIMS(Eigen::internal::SumReducer<T>, T); \
DECLARE_FOR_ALL_DIMS(Eigen::internal::ProdReducer<T>, T);
TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_ALL_REDUCERS);
#undef DECLARE_FOR_ALL_REDUCERS
#undef DECLARE_FOR_ALL_DIMS
#undef DECLARE
} // namespace functor
#endif // GOOGLE_CUDA
// Register Cumsum kernels
#define REGISTER_CPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER( \
Name("Cumsum").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
ScanOp<CPUDevice, type, Eigen::internal::SumReducer<type>>)
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS
#if GOOGLE_CUDA
#define REGISTER_GPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER( \
Name("Cumsum") \
.Device(DEVICE_GPU) \
.TypeConstraint<type>("T") \
.HostMemory("axis"), \
ScanOp<GPUDevice, type, Eigen::internal::SumReducer<type>>)
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS)
#undef REGISTER_GPU_KERNELS
#endif // GOOGLE_CUDA
// Register Cumprod kernels
#define REGISTER_CPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER( \
Name("Cumprod").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
ScanOp<CPUDevice, type, Eigen::internal::ProdReducer<type>>)
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS
#if GOOGLE_CUDA
#define REGISTER_GPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER( \
Name("Cumprod") \
.Device(DEVICE_GPU) \
.TypeConstraint<type>("T") \
.HostMemory("axis"), \
ScanOp<GPUDevice, type, Eigen::internal::ProdReducer<type>>)
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS)
#undef REGISTER_GPU_KERNELS
#endif // GOOGLE_CUDA
} // namespace tensorflow

View File

@ -0,0 +1,47 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_KERNELS_SCAN_OPS_H_
#define TENSORFLOW_KERNELS_SCAN_OPS_H_
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/tensor_types.h"
namespace tensorflow {
namespace functor {
typedef Eigen::Index Index;
template <typename Device, typename Reducer, typename T, int Dims>
struct Scan {
void operator()(const Device& d, typename TTypes<T, Dims>::ConstTensor in,
typename TTypes<T, Dims>::Tensor out, const Reducer& reducer,
const Index& axis, const bool reverse, const bool exclusive) {
// Perform the reverse ops directly with Eigen, which avoids copying the
// tensor twice compared to using individual ops.
Eigen::array<bool, Dims> dims;
for (int i = 0; i < dims.size(); i++) {
dims[i] = reverse && (i == axis);
}
To32Bit(out).device(d) = To32Bit(in).reverse(dims)
.scan(axis, reducer, exclusive)
.reverse(dims);
}
};
} // namespace functor
} // namespace tensorflow
#endif // TENSORFLOW_KERNELS_SCAN_OPS_H_

View File

@ -0,0 +1,54 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA
#define EIGEN_USE_GPU
#include "tensorflow/core/framework/numeric_types.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/kernels/scan_ops.h"
namespace tensorflow {
typedef Eigen::GpuDevice GPUDevice;
typedef Eigen::Index Index;
#define DEFINE(REDUCER, T, D) \
template struct functor::Scan<GPUDevice, REDUCER, T, D>;
#define DEFINE_FOR_ALL_DIMS(REDUCER, T) \
DEFINE(REDUCER, T, 1); \
DEFINE(REDUCER, T, 2); \
DEFINE(REDUCER, T, 3); \
DEFINE(REDUCER, T, 4); \
DEFINE(REDUCER, T, 5); \
DEFINE(REDUCER, T, 6); \
DEFINE(REDUCER, T, 7); \
DEFINE(REDUCER, T, 8)
#define DEFINE_FOR_ALL_REDUCERS(T) \
DEFINE_FOR_ALL_DIMS(Eigen::internal::SumReducer<T>, T); \
DEFINE_FOR_ALL_DIMS(Eigen::internal::ProdReducer<T>, T);
TF_CALL_GPU_NUMBER_TYPES(DEFINE_FOR_ALL_REDUCERS);
#undef DEFINE_FOR_ALL_REDUCERS
#undef DEFINE_FOR_ALL_DIMS
#undef DEFINE
} // end namespace tensorflow
#endif // GOOGLE_CUDA

View File

@ -59,8 +59,7 @@ struct ApplyAdadelta<CPUDevice, T> {
accum.device(d) = accum.device(d) =
accum * rho() + grad.square() * (static_cast<T>(1) - rho()); accum * rho() + grad.square() * (static_cast<T>(1) - rho());
const auto update = const auto update =
(accum_update + epsilon()).sqrt() * (accum_update + epsilon()).sqrt() * (accum + epsilon()).rsqrt() * grad;
(accum + epsilon()).rsqrt() * grad;
accum_update.device(d) = accum_update.device(d) =
accum_update * rho() + update.square() * (static_cast<T>(1) - rho()); accum_update * rho() + update.square() * (static_cast<T>(1) - rho());
var.device(d) -= update * lr(); var.device(d) -= update * lr();
@ -176,10 +175,14 @@ struct ApplyMomentum<CPUDevice, T> {
typename TTypes<T>::Flat accum, typename TTypes<T>::Flat accum,
typename TTypes<T>::ConstScalar lr, typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstFlat grad, typename TTypes<T>::ConstFlat grad,
typename TTypes<T>::ConstScalar momentum) { typename TTypes<T>::ConstScalar momentum, bool use_nesterov) {
accum.device(d) = accum * momentum() + grad; accum.device(d) = accum * momentum() + grad;
if (use_nesterov) {
var.device(d) -= grad * lr() + accum * momentum() * lr();
} else {
var.device(d) -= accum * lr(); var.device(d) -= accum * lr();
} }
}
}; };
template <typename T> template <typename T>
@ -1515,6 +1518,7 @@ class ApplyMomentumOp : public OpKernel {
public: public:
explicit ApplyMomentumOp(OpKernelConstruction* ctx) : OpKernel(ctx) { explicit ApplyMomentumOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
OP_REQUIRES_OK(ctx, ctx->GetAttr("use_nesterov", &use_nesterov_));
} }
void Compute(OpKernelContext* ctx) override { void Compute(OpKernelContext* ctx) override {
@ -1554,12 +1558,13 @@ class ApplyMomentumOp : public OpKernel {
const Device& device = ctx->template eigen_device<Device>(); const Device& device = ctx->template eigen_device<Device>();
functor::ApplyMomentum<Device, T>()(device, var.flat<T>(), accum.flat<T>(), functor::ApplyMomentum<Device, T>()(device, var.flat<T>(), accum.flat<T>(),
lr.scalar<T>(), grad.flat<T>(), lr.scalar<T>(), grad.flat<T>(),
momentum.scalar<T>()); momentum.scalar<T>(), use_nesterov_);
ctx->forward_ref_input_to_ref_output(0, 0); ctx->forward_ref_input_to_ref_output(0, 0);
} }
private: private:
bool use_exclusive_lock_; bool use_exclusive_lock_;
bool use_nesterov_;
}; };
typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::ThreadPoolDevice CPUDevice;
@ -1584,7 +1589,7 @@ namespace functor {
const GPUDevice& d, typename TTypes<T>::Flat var, \ const GPUDevice& d, typename TTypes<T>::Flat var, \
typename TTypes<T>::Flat accum, typename TTypes<T>::ConstScalar lr, \ typename TTypes<T>::Flat accum, typename TTypes<T>::ConstScalar lr, \
typename TTypes<T>::ConstFlat grad, \ typename TTypes<T>::ConstFlat grad, \
typename TTypes<T>::ConstScalar momentum); \ typename TTypes<T>::ConstScalar momentum, bool use_nesterov); \
extern template struct ApplyMomentum<GPUDevice, T>; extern template struct ApplyMomentum<GPUDevice, T>;
DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(Eigen::half);
DECLARE_GPU_SPEC(float); DECLARE_GPU_SPEC(float);
@ -1605,6 +1610,7 @@ class SparseApplyMomentumOp : public OpKernel {
public: public:
explicit SparseApplyMomentumOp(OpKernelConstruction* ctx) : OpKernel(ctx) { explicit SparseApplyMomentumOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
OP_REQUIRES_OK(ctx, ctx->GetAttr("use_nesterov", &use_nesterov_));
} }
void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS { void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
@ -1672,15 +1678,21 @@ class SparseApplyMomentumOp : public OpKernel {
auto g = grad_flat.template chip<0>(i); auto g = grad_flat.template chip<0>(i);
auto v = var_flat.template chip<0>(index); auto v = var_flat.template chip<0>(index);
a = a * a.constant(momentum_scalar) + g; a = a * a.constant(momentum_scalar) + g;
if (use_nesterov_) {
v -= g.constant(lr_scalar) * g +
a.constant(lr_scalar) * a.constant(momentum_scalar) * a;
} else {
v -= a.constant(lr_scalar) * a; v -= a.constant(lr_scalar) * a;
} }
} }
}
ctx->forward_ref_input_to_ref_output(0, 0); ctx->forward_ref_input_to_ref_output(0, 0);
} }
private: private:
bool use_exclusive_lock_; bool use_exclusive_lock_;
bool use_nesterov_;
}; };
#define REGISTER_KERNELS(T, Tindices) \ #define REGISTER_KERNELS(T, Tindices) \

View File

@ -16,8 +16,8 @@ limitations under the License.
#ifndef TENSORFLOW_KERNELS_TRAINING_OPS_H_ #ifndef TENSORFLOW_KERNELS_TRAINING_OPS_H_
#define TENSORFLOW_KERNELS_TRAINING_OPS_H_ #define TENSORFLOW_KERNELS_TRAINING_OPS_H_
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/tensor_types.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
namespace tensorflow { namespace tensorflow {
namespace functor { namespace functor {
@ -98,7 +98,7 @@ struct ApplyMomentum {
typename TTypes<T>::Flat accum, typename TTypes<T>::Flat accum,
typename TTypes<T>::ConstScalar lr, typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstFlat grad, typename TTypes<T>::ConstFlat grad,
typename TTypes<T>::ConstScalar momentum); typename TTypes<T>::ConstScalar momentum, bool use_nesterov);
}; };
template <typename Device, typename T> template <typename Device, typename T>

View File

@ -17,8 +17,8 @@ limitations under the License.
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/kernels/training_ops.h" #include "tensorflow/core/kernels/training_ops.h"
#include "tensorflow/core/framework/register_types.h"
namespace tensorflow { namespace tensorflow {
@ -84,13 +84,19 @@ struct ApplyMomentum<GPUDevice, T> {
typename TTypes<T>::Flat accum, typename TTypes<T>::Flat accum,
typename TTypes<T>::ConstScalar lr, typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstFlat grad, typename TTypes<T>::ConstFlat grad,
typename TTypes<T>::ConstScalar momentum) { typename TTypes<T>::ConstScalar momentum, bool use_nesterov) {
Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast; Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast;
bcast[0] = grad.dimension(0); bcast[0] = grad.dimension(0);
Eigen::Sizes<1> single; Eigen::Sizes<1> single;
accum.device(d) = accum * momentum.reshape(single).broadcast(bcast) + grad; accum.device(d) = accum * momentum.reshape(single).broadcast(bcast) + grad;
if (use_nesterov) {
var.device(d) -= grad * lr.reshape(single).broadcast(bcast) +
accum * momentum.reshape(single).broadcast(bcast) *
lr.reshape(single).broadcast(bcast);
} else {
var.device(d) -= lr.reshape(single).broadcast(bcast) * accum; var.device(d) -= lr.reshape(single).broadcast(bcast) * accum;
} }
}
}; };
template <typename T> template <typename T>

View File

@ -0,0 +1,95 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Functions to read images in GIF format.
#include "tensorflow/core/lib/gif/gif_io.h"
#include "tensorflow/core/platform/gif.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/mem.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
namespace gif {
int input_callback(GifFileType* gif_file, GifByteType* buf, int size) {
if (gif_file->UserData && memcpy(buf, gif_file->UserData, size)) {
gif_file->UserData = ((uint8_t*)gif_file->UserData) + size;
return size;
}
return 0;
}
uint8* Decode(const void* srcdata, int datasize,
std::function<uint8*(int, int, int, int)> allocate_output) {
int error_code = D_GIF_SUCCEEDED;
GifFileType* gif_file =
DGifOpen(const_cast<void*>(srcdata), &input_callback, &error_code);
if (error_code != D_GIF_SUCCEEDED) {
LOG(ERROR) << "Fail to open gif file, reason: "
<< GifErrorString(error_code);
return nullptr;
}
if (DGifSlurp(gif_file) != GIF_OK) {
LOG(ERROR) << "Fail to slurp gif file, reason: "
<< GifErrorString(gif_file->Error);
return nullptr;
}
if (gif_file->ImageCount <= 0) {
LOG(ERROR) << "Gif file does not contain any image";
return nullptr;
}
int num_frames = gif_file->ImageCount;
int width = gif_file->SWidth;
int height = gif_file->SHeight;
int channel = 3;
uint8* dstdata = allocate_output(num_frames, width, height, channel);
for (int k = 0; k < num_frames; k++) {
SavedImage* this_image = &gif_file->SavedImages[k];
GifImageDesc* img_desc = &this_image->ImageDesc;
if (img_desc->Left != 0 || img_desc->Top != 0 || img_desc->Width != width ||
img_desc->Height != height) {
LOG(ERROR) << "Can't process optimized gif.";
return nullptr;
}
ColorMapObject* color_map = this_image->ImageDesc.ColorMap
? this_image->ImageDesc.ColorMap
: gif_file->SColorMap;
uint8* this_dst = dstdata + k * width * channel * height;
for (int i = 0; i < height; ++i) {
uint8* p_dst = this_dst + i * width * channel;
for (int j = 0; j < width; ++j) {
GifByteType color_index = this_image->RasterBits[i * width + j];
const GifColorType& gif_color = color_map->Colors[color_index];
p_dst[j * channel + 0] = gif_color.Red;
p_dst[j * channel + 1] = gif_color.Green;
p_dst[j * channel + 2] = gif_color.Blue;
}
}
}
if (DGifCloseFile(gif_file, &error_code) != GIF_OK) {
LOG(WARNING) << "Fail to close gif file, reason: "
<< GifErrorString(error_code);
}
return dstdata;
}
} // namespace gif
} // namespace tensorflow

View File

@ -0,0 +1,51 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Functions to read and write images in GIF format.
//
// The advantage over image/codec/png{enc,dec}ocder.h is that this library
// supports both 8 and 16 bit images.
//
// The decoding routine accepts binary image data as a StringPiece. These are
// implicitly constructed from strings or char* so they're completely
// transparent to the caller. They're also very cheap to construct so this
// doesn't introduce any additional overhead.
//
// The primary benefit of StringPieces being, in this case, that APIs already
// returning StringPieces (e.g., Bigtable Scanner) or Cords (e.g., IOBuffer;
// only when they're flat, though) or protocol buffer fields typed to either of
// these can be decoded without copying the data into a C++ string.
#ifndef TENSORFLOW_CORE_LIB_GIF_GIF_IO_H_
#define TENSORFLOW_CORE_LIB_GIF_GIF_IO_H_
#include <functional>
#include <string>
#include <utility>
#include <vector>
#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
namespace gif {
uint8* Decode(const void* srcdata, int datasize,
std::function<uint8*(int, int, int, int)> allocate_output);
} // namespace gif
} // namespace tensorflow
#endif // TENSORFLOW_CORE_LIB_GIF_GIF_IO_H_

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

View File

@ -739,7 +739,7 @@ REGISTER_OP("Reverse")
.Input("tensor: T") .Input("tensor: T")
.Input("dims: bool") .Input("dims: bool")
.Output("output: T") .Output("output: T")
.Attr("T: {uint8, int8, int32, bool, half, float, double}") .Attr("T: {uint8, int8, int32, bool, half, float, double, complex64, complex128}")
.SetShapeFn([](InferenceContext* c) { .SetShapeFn([](InferenceContext* c) {
const Shape* input = c->input(0); const Shape* input = c->input(0);
const Shape* dims; const Shape* dims;

View File

@ -440,10 +440,27 @@ compression: Compression level.
contents: 0-D. PNG-encoded image. contents: 0-D. PNG-encoded image.
)doc"); )doc");
// --------------------------------------------------------------------------
REGISTER_OP("DecodeGif")
.Input("contents: string")
.Output("image: uint8")
.Doc(R"doc(
Decode the first frame of a GIF-encoded image to a uint8 tensor.
GIF with frame or transparency compression are not supported
convert animated GIF from compressed to uncompressed by:
convert $src.gif -coalesce $dst.gif
contents: 0-D. The GIF-encoded image.
image: 4-D with shape `[num_frames, height, width, 3]`. RGB order
)doc");
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
REGISTER_OP("RGBToHSV") REGISTER_OP("RGBToHSV")
.Input("images: float") .Input("images: T")
.Output("output: float") .Output("output: T")
.Attr("T: {float, double} = DT_FLOAT")
.SetShapeFn(ColorspaceShapeFn) .SetShapeFn(ColorspaceShapeFn)
.Doc(R"doc( .Doc(R"doc(
Converts one or more images from RGB to HSV. Converts one or more images from RGB to HSV.
@ -462,8 +479,9 @@ output: `images` converted to HSV.
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
REGISTER_OP("HSVToRGB") REGISTER_OP("HSVToRGB")
.Input("images: float") .Input("images: T")
.Output("output: float") .Output("output: T")
.Attr("T: {float, double} = DT_FLOAT")
.SetShapeFn(ColorspaceShapeFn) .SetShapeFn(ColorspaceShapeFn)
.Doc(R"doc( .Doc(R"doc(
Convert one or more images from HSV to RGB. Convert one or more images from HSV to RGB.

View File

@ -1831,4 +1831,76 @@ b: Another tensor, of same type and shape as `a`.
product: Pairwise cross product of the vectors in `a` and `b`. product: Pairwise cross product of the vectors in `a` and `b`.
)doc"); )doc");
// --------------------------------------------------------------------------
REGISTER_OP("Cumsum")
.Input("x: T")
.Input("axis: int32")
.Attr("exclusive: bool = false")
.Attr("reverse: bool = false")
.Output("out: T")
.Attr("T: numbertype")
.Doc(R"doc(
Compute the cumulative sum of the tensor `x` along `axis`.
By default, this op performs an inclusive cumsum, which means that the first
element of the input is identical to the first element of the output:
```prettyprint
tf.cumsum([a, b, c]) ==> [a, a + b, a + b + c]
```
By setting the `exclusive` kwarg to `True`, an exclusive cumsum is
performed instead:
```prettyprint
tf.cumsum([a, b, c], exclusive=True) ==> [0, a, a + b]
```
By setting the `reverse` kwarg to `True`, the cumsum is performed in the
opposite direction:
```prettyprint
tf.cumsum([a, b, c], reverse=True) ==> [a + b + c, b + c, c]
```
This is more efficient than using separate `tf.reverse` ops.
The `reverse` and `exclusive` kwargs can also be combined:
```prettyprint
tf.cumsum([a, b, c], exclusive=True, reverse=True) ==> [b + c, c, 0]
```
)doc");
REGISTER_OP("Cumprod")
.Input("x: T")
.Input("axis: int32")
.Attr("exclusive: bool = false")
.Attr("reverse: bool = false")
.Output("out: T")
.Attr("T: numbertype")
.Doc(R"doc(
Compute the cumulative product of the tensor `x` along `axis`.
By default, this op performs an inclusive cumprod, which means that the first
element of the input is identical to the first element of the output:
```prettyprint
tf.cumprod([a, b, c]) ==> [a, a * b, a * b * c]
```
By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
performed instead:
```prettyprint
tf.cumprod([a, b, c], exclusive=True) ==> [0, a, a * b]
```
By setting the `reverse` kwarg to `True`, the cumprod is performed in the
opposite direction:
```prettyprint
tf.cumprod([a, b, c], reverse=True) ==> [a * b * c, b * c, c]
```
This is more efficient than using separate `tf.reverse` ops.
The `reverse` and `exclusive` kwargs can also be combined:
```prettyprint
tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
```
)doc");
} // namespace tensorflow } // namespace tensorflow

View File

@ -4342,6 +4342,42 @@ op {
summary: "Decode a PNG-encoded image to a uint8 or uint16 tensor." summary: "Decode a PNG-encoded image to a uint8 or uint16 tensor."
description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n* 0: Use the number of channels in the PNG-encoded image.\n* 1: output a grayscale image.\n* 3: output an RGB image.\n* 4: output an RGBA image.\n\nIf needed, the PNG-encoded image is transformed to match the requested number\nof color channels." description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n* 0: Use the number of channels in the PNG-encoded image.\n* 1: output a grayscale image.\n* 3: output an RGB image.\n* 4: output an RGBA image.\n\nIf needed, the PNG-encoded image is transformed to match the requested number\nof color channels."
} }
op {
name: "DecodeGif"
input_arg {
name: "contents"
description: "0-D. The GIF-encoded image."
type: DT_STRING
}
output_arg {
name: "image"
description: "3-D with shape `[height, width, channels]`."
type_attr: "dtype"
}
attr {
name: "channels"
type: "int"
default_value {
i: 0
}
description: "Number of color channels for the decoded image."
}
attr {
name: "dtype"
type: "type"
default_value {
type: DT_UINT8
}
allowed_values {
list {
type: DT_UINT8
type: DT_UINT16
}
}
}
summary: "Decode a GIF-encoded image to a uint8 or uint16 tensor."
description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n* 0: Use the number of channels in the GIF-encoded image.\n* 1: output a grayscale image.\n* 3: output an RGB image.\n* 4: output an RGBA image.\n\nIf needed, the GIF-encoded image is transformed to match the requested number\nof color channels."
}
op { op {
name: "DecodeRaw" name: "DecodeRaw"
input_arg { input_arg {

View File

@ -488,11 +488,13 @@ REGISTER_OP("ApplyMomentum")
.Output("out: Ref(T)") .Output("out: Ref(T)")
.Attr("T: numbertype") .Attr("T: numbertype")
.Attr("use_locking: bool = false") .Attr("use_locking: bool = false")
.Attr("use_nesterov: bool = false")
.SetShapeFn([](InferenceContext* c) { .SetShapeFn([](InferenceContext* c) {
return ApplyMomentumShapeFn(c, false /* sparse */); return ApplyMomentumShapeFn(c, false /* sparse */);
}) })
.Doc(R"doc( .Doc(R"doc(
Update '*var' according to the momentum scheme. Update '*var' according to the momentum scheme. Set use_nesterov = True if you
want to use Nesterov momentum.
accum = accum * momentum + grad accum = accum * momentum + grad
var -= lr * accum var -= lr * accum
@ -506,6 +508,9 @@ out: Same as "var".
use_locking: If `True`, updating of the var and accum tensors will be protected use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less by a lock; otherwise the behavior is undefined, but may exhibit less
contention. contention.
use_nesterov: If `True`, the tensor passed to compute grad will be
var - lr * momentum * accum, so in the end, the var you get is actually
var - lr * momentum * accum.
)doc"); )doc");
REGISTER_OP("SparseApplyMomentum") REGISTER_OP("SparseApplyMomentum")
@ -519,11 +524,13 @@ REGISTER_OP("SparseApplyMomentum")
.Attr("T: numbertype") .Attr("T: numbertype")
.Attr("Tindices: {int32, int64}") .Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false") .Attr("use_locking: bool = false")
.Attr("use_nesterov: bool = false")
.SetShapeFn([](InferenceContext* c) { .SetShapeFn([](InferenceContext* c) {
return ApplyMomentumShapeFn(c, true /* sparse */); return ApplyMomentumShapeFn(c, true /* sparse */);
}) })
.Doc(R"doc( .Doc(R"doc(
Update relevant entries in '*var' and '*accum' according to the momentum scheme. Update relevant entries in '*var' and '*accum' according to the momentum scheme.
Set use_nesterov = True if you want to use Nesterov momentum.
That is for rows we have grad for, we update var and accum as follows: That is for rows we have grad for, we update var and accum as follows:
@ -540,6 +547,9 @@ out: Same as "var".
use_locking: If `True`, updating of the var and accum tensors will be protected use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less by a lock; otherwise the behavior is undefined, but may exhibit less
contention. contention.
use_nesterov: If `True`, the tensor passed to compute grad will be
var - lr * momentum * accum, so in the end, the var you get is actually
var - lr * momentum * accum.
)doc"); )doc");
static Status ApplyAdamShapeFn(InferenceContext* c, bool sparse) { static Status ApplyAdamShapeFn(InferenceContext* c, bool sparse) {

View File

@ -57,12 +57,13 @@ cc_library(
name = "platformlib", name = "platformlib",
copts = tf_copts(), copts = tf_copts(),
deps = [ deps = [
"//tensorflow/core:protos_cc",
"@farmhash_archive//:farmhash", "@farmhash_archive//:farmhash",
"@gif_archive//:gif",
"@highwayhash//:sip_hash",
"@jpeg_archive//:jpeg", "@jpeg_archive//:jpeg",
"@png_archive//:png", "@png_archive//:png",
"@highwayhash//:sip_hash",
"@re2//:re2", "@re2//:re2",
"//tensorflow/core:protos_cc",
], ],
) )

View File

@ -0,0 +1,29 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_PLATFORM_GIF_H_
#define TENSORFLOW_CORE_PLATFORM_GIF_H_
#include "tensorflow/core/platform/platform.h"
#if defined(PLATFORM_GOOGLE)
#include "tensorflow/core/platform/google/build_config/gif.h"
#elif defined(PLATFORM_POSIX) && !defined(IS_MOBILE_PLATFORM)
#include "giflib-5.1.4/lib/gif_lib.h"
#else
#error Define the appropriate PLATFORM_<foo> macro for this platform
#endif
#endif // TENSORFLOW_CORE_PLATFORM_GIF_H_

View File

@ -94,8 +94,8 @@ def run_training():
saver = tf.train.Saver() saver = tf.train.Saver()
# Create the op for initializing variables. # Create the op for initializing variables.
init_op = tf.initialize_all_variables() init_op = tf.group(tf.initialize_all_variables(),
tf.initialize_local_variables())
# Create a session for running Ops on the Graph. # Create a session for running Ops on the Graph.
sess = tf.Session() sess = tf.Session()

View File

@ -99,8 +99,10 @@ Status ReadTensorFromImageFile(string file_name, const int input_height,
if (tensorflow::StringPiece(file_name).ends_with(".png")) { if (tensorflow::StringPiece(file_name).ends_with(".png")) {
image_reader = DecodePng(root.WithOpName("png_reader"), file_reader, image_reader = DecodePng(root.WithOpName("png_reader"), file_reader,
DecodePng::Channels(wanted_channels)); DecodePng::Channels(wanted_channels));
} else if (tensorflow::StringPiece(file_name).ends_with(".gif")) {
image_reader = DecodeGif(root.WithOpName("gif_reader"), file_reader);
} else { } else {
// Assume if it's not a PNG then it must be a JPEG. // Assume if it's neither a PNG nor a GIF then it must be a JPEG.
image_reader = DecodeJpeg(root.WithOpName("jpeg_reader"), file_reader, image_reader = DecodeJpeg(root.WithOpName("jpeg_reader"), file_reader,
DecodeJpeg::Channels(wanted_channels)); DecodeJpeg::Channels(wanted_channels));
} }

50
tensorflow/examples/skflow/resnet.py Normal file → Executable file
View File

@ -52,13 +52,13 @@ def res_net(x, y, activation=tf.nn.relu):
Predictions and loss tensors. Predictions and loss tensors.
""" """
# Configurations for each bottleneck block. # Configurations for each bottleneck group.
BottleneckBlock = namedtuple( BottleneckGroup = namedtuple(
'BottleneckBlock', ['num_layers', 'num_filters', 'bottleneck_size']) 'BottleneckGroup', ['num_blocks', 'num_filters', 'bottleneck_size'])
blocks = [BottleneckBlock(3, 128, 32), groups = [BottleneckGroup(3, 128, 32),
BottleneckBlock(3, 256, 64), BottleneckGroup(3, 256, 64),
BottleneckBlock(3, 512, 128), BottleneckGroup(3, 512, 128),
BottleneckBlock(3, 1024, 256)] BottleneckGroup(3, 1024, 256)]
input_shape = x.get_shape().as_list() input_shape = x.get_shape().as_list()
@ -78,19 +78,19 @@ def res_net(x, y, activation=tf.nn.relu):
# First chain of resnets # First chain of resnets
with tf.variable_scope('conv_layer2'): with tf.variable_scope('conv_layer2'):
net = learn.ops.conv2d(net, blocks[0].num_filters, net = learn.ops.conv2d(net, groups[0].num_filters,
[1, 1], [1, 1, 1, 1], [1, 1], [1, 1, 1, 1],
padding='VALID', bias=True) padding='VALID', bias=True)
# Create each bottleneck building block for each layer # Create the bottleneck groups, each of which contains `num_blocks`
for block_i, block in enumerate(blocks): # bottleneck groups.
for layer_i in range(block.num_layers): for group_i, group in enumerate(groups):
for block_i in range(group.num_blocks):
name = 'block_%d/layer_%d' % (block_i, layer_i) name = 'group_%d/block_%d' % (group_i, block_i)
# 1x1 convolution responsible for reducing dimension # 1x1 convolution responsible for reducing dimension
with tf.variable_scope(name + '/conv_in'): with tf.variable_scope(name + '/conv_in'):
conv = learn.ops.conv2d(net, block.bottleneck_size, conv = learn.ops.conv2d(net, group.bottleneck_size,
[1, 1], [1, 1, 1, 1], [1, 1], [1, 1, 1, 1],
padding='VALID', padding='VALID',
activation=activation, activation=activation,
@ -98,7 +98,7 @@ def res_net(x, y, activation=tf.nn.relu):
bias=False) bias=False)
with tf.variable_scope(name + '/conv_bottleneck'): with tf.variable_scope(name + '/conv_bottleneck'):
conv = learn.ops.conv2d(conv, block.bottleneck_size, conv = learn.ops.conv2d(conv, group.bottleneck_size,
[3, 3], [1, 1, 1, 1], [3, 3], [1, 1, 1, 1],
padding='SAME', padding='SAME',
activation=activation, activation=activation,
@ -107,7 +107,8 @@ def res_net(x, y, activation=tf.nn.relu):
# 1x1 convolution responsible for restoring dimension # 1x1 convolution responsible for restoring dimension
with tf.variable_scope(name + '/conv_out'): with tf.variable_scope(name + '/conv_out'):
conv = learn.ops.conv2d(conv, block.num_filters, input_dim = net.get_shape()[-1].value
conv = learn.ops.conv2d(conv, input_dim,
[1, 1], [1, 1, 1, 1], [1, 1], [1, 1, 1, 1],
padding='VALID', padding='VALID',
activation=activation, activation=activation,
@ -119,10 +120,10 @@ def res_net(x, y, activation=tf.nn.relu):
net = conv + net net = conv + net
try: try:
# upscale to the next block size # upscale to the next group size
next_block = blocks[block_i + 1] next_group = groups[group_i + 1]
with tf.variable_scope('block_%d/conv_upscale' % block_i): with tf.variable_scope('block_%d/conv_upscale' % group_i):
net = learn.ops.conv2d(net, next_block.num_filters, net = learn.ops.conv2d(net, next_group.num_filters,
[1, 1], [1, 1, 1, 1], [1, 1], [1, 1, 1, 1],
bias=False, bias=False,
padding='SAME') padding='SAME')
@ -139,18 +140,12 @@ def res_net(x, y, activation=tf.nn.relu):
return learn.models.logistic_regression(net, y) return learn.models.logistic_regression(net, y)
# Download and load MNIST data. # Download and load MNIST data.
mnist = input_data.read_data_sets('MNIST_data') mnist = input_data.read_data_sets('MNIST_data')
# Restore model if graph is saved into a folder. # Restore model if graph is saved into a folder.
if os.path.exists('models/resnet/graph.pbtxt'): if os.path.exists('models/resnet/graph.pbtxt'):
classifier = learn.TensorFlowEstimator.restore('models/resnet/') classifier = learn.TensorFlowEstimator.restore('models/resnet/')
else:
# Create a new resnet classifier.
classifier = learn.TensorFlowEstimator(
model_fn=res_net, n_classes=10, batch_size=100, steps=100,
learning_rate=0.001, continue_training=True)
while True: while True:
# Train model and save summaries into logdir. # Train model and save summaries into logdir.
@ -161,6 +156,3 @@ while True:
score = metrics.accuracy_score( score = metrics.accuracy_score(
mnist.test.labels, classifier.predict(mnist.test.images, batch_size=64)) mnist.test.labels, classifier.predict(mnist.test.images, batch_size=64))
print('Accuracy: {0:f}'.format(score)) print('Accuracy: {0:f}'.format(score))
# Save model graph and checkpoints.
classifier.save('models/resnet/')

View File

@ -49,7 +49,7 @@ def train():
# Create a multilayer model. # Create a multilayer model.
# Input placehoolders # Input placeholders
with tf.name_scope('input'): with tf.name_scope('input'):
x = tf.placeholder(tf.float32, [None, 784], name='x-input') x = tf.placeholder(tf.float32, [None, 784], name='x-input')
y_ = tf.placeholder(tf.float32, [None, 10], name='y-input') y_ = tf.placeholder(tf.float32, [None, 10], name='y-input')

View File

@ -6,7 +6,11 @@ Course information can be found at https://www.udacity.com/course/deep-learning-
Running the Docker container from the Google Cloud repository Running the Docker container from the Google Cloud repository
------------------------------------------------------------- -------------------------------------------------------------
docker run -p 8888:8888 -it b.gcr.io/tensorflow-udacity/assignments:0.5.0 docker run -p 8888:8888 --name tensorflow-udacity -it b.gcr.io/tensorflow-udacity/assignments:0.5.0
Note that if you ever exit the container, you can return to it using:
docker start -ai tensorflow-udacity
Accessing the Notebooks Accessing the Notebooks
----------------------- -----------------------
@ -19,21 +23,6 @@ On mac, find the virtual machine's IP using:
Then go to: http://IP:8888 (likely http://192.168.99.100:8888) Then go to: http://IP:8888 (likely http://192.168.99.100:8888)
Saving Your Progress
--------------------
Because of the `--rm` flag above, stopping the docker container removes it, so any changes you've made will disappear. One way around this is to remove the `--rm` flag, and name the container for easy restarting:
```sh
# you only need to "run" the container the first time:
docker run -p 8888:8888 -it --name tensorflow-udacity b.gcr.io/tensorflow-udacity/assignments:0.5.0
# …do various things…
# when you're done, control-C to kill jupyter and stop the container
# when you're ready to do more things, you can now just "start" the container:
docker start -ai tensorflow-udacity
# …do more things…
# …repeat…
```
FAQ FAQ
--- ---

View File

@ -44,7 +44,7 @@ management system used to install and manage software packages written in
Python. Python.
The packages that will be installed or upgraded during the pip install are listed in the The packages that will be installed or upgraded during the pip install are listed in the
[REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py) [REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py).
Install pip (or pip3 for python3) if it is not already installed: Install pip (or pip3 for python3) if it is not already installed:
@ -231,7 +231,7 @@ packages needed by TensorFlow.
Install Anaconda: Install Anaconda:
Follow the instructions on the [Anaconda download site](https://www.continuum.io/downloads) Follow the instructions on the [Anaconda download site](https://www.continuum.io/downloads).
Create a conda environment called `tensorflow`: Create a conda environment called `tensorflow`:
@ -377,6 +377,8 @@ The option `-p 8888:8888` is used to publish the Docker container᾿s internal p
The format of the port mapping is `hostPort:containerPort`. You can specify any valid port number for the host port but have to use `8888` for the container port portion. The format of the port mapping is `hostPort:containerPort`. You can specify any valid port number for the host port but have to use `8888` for the container port portion.
If you're using a container with GPU support, some additional flags must be passed to expose the GPU device to the container.
For NVidia GPU support install latest NVidia drivers and For NVidia GPU support install latest NVidia drivers and
[nvidia-docker](https://github.com/NVIDIA/nvidia-docker). [nvidia-docker](https://github.com/NVIDIA/nvidia-docker).
Run with Run with
@ -385,7 +387,15 @@ Run with
$ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:latest-gpu $ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:latest-gpu
``` ```
For more details see (TensorFlow docker readme)[https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/docker]. If you have a problem running `nvidia-docker`, then using the default config, we include a
[script](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/docker/docker_run_gpu.sh)
in the repo with these flags, so the command-line would look like
```bash
$ path/to/repo/tensorflow/tools/docker/docker_run_gpu.sh -p 8888:8888 gcr.io/tensorflow/tensorflow:latest-gpu
```
For more details see [TensorFlow docker readme](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/docker).
You can now [test your installation](#test-the-tensorflow-installation) within the Docker container. You can now [test your installation](#test-the-tensorflow-installation) within the Docker container.
@ -479,7 +489,7 @@ of tensorflow. If you want to install a specific branch (such as a release branc
pass `-b <branchname>` to the `git clone` command and `--recurse-submodules` for pass `-b <branchname>` to the `git clone` command and `--recurse-submodules` for
r0.8 and earlier to fetch the protobuf library that TensorFlow depends on. r0.8 and earlier to fetch the protobuf library that TensorFlow depends on.
### Installation for Linux ### Prepare environment for Linux
#### Install Bazel #### Install Bazel
@ -508,19 +518,6 @@ $ sudo apt-get install python-numpy swig python-dev python-wheel
$ sudo apt-get install python3-numpy swig python3-dev python3-wheel $ sudo apt-get install python3-numpy swig python3-dev python3-wheel
``` ```
#### Configure the installation
Run the `configure` script at the root of the tree. The configure script
asks you for the path to your python interpreter and allows (optional)
configuration of the CUDA libraries (see [below](#configure-tensorflows-canonical-view-of-cuda-libraries)).
This step is used to locate the python and numpy header files.
```bash
$ ./configure
Please specify the location of python. [Default is /usr/bin/python]:
```
#### Optional: Install CUDA (GPUs on Linux) #### Optional: Install CUDA (GPUs on Linux)
In order to build or run TensorFlow with GPU support, both NVIDIA's Cuda Toolkit (>= 7.0) and In order to build or run TensorFlow with GPU support, both NVIDIA's Cuda Toolkit (>= 7.0) and
@ -564,83 +561,7 @@ sudo cp cuda/lib64/libcudnn* /usr/local/cuda/lib64
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn* sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
``` ```
##### Configure TensorFlow's canonical view of Cuda libraries ### Prepare environment for Mac OS X
When running the `configure` script from the root of your source tree, select
the option `Y` when asked to build TensorFlow with GPU support. If you have
several versions of Cuda or cuDNN installed, you should definitely select
one explicitly instead of relying on the system default. You should see
prompts like the following:
``` bash
$ ./configure
Please specify the location of python. [Default is /usr/bin/python]:
Do you wish to build TensorFlow with GPU support? [y/N] y
GPU support will be enabled for TensorFlow
Please specify which gcc nvcc should use as the host compiler. [Default is
/usr/bin/gcc]: /usr/bin/gcc-4.9
Please specify the Cuda SDK version you want to use, e.g. 7.0. [Leave
empty to use system default]: 7.5
Please specify the location where CUDA 7.5 toolkit is installed. Refer to
README.md for more details. [default is: /usr/local/cuda]: /usr/local/cuda
Please specify the cuDNN version you want to use. [Leave empty to use system
default]: 4.0.4
Please specify the location where the cuDNN 4.0.4 library is installed. Refer to
README.md for more details. [default is: /usr/local/cuda]: /usr/local/cudnn-r4-rc/
Please specify a list of comma-separated Cuda compute capabilities you want to
build with. You can find the compute capability of your device at:
https://developer.nvidia.com/cuda-gpus.
Please note that each additional compute capability significantly increases your
build time and binary size. [Default is: \"3.5,5.2\"]: 3.5
Setting up Cuda include
Setting up Cuda lib64
Setting up Cuda bin
Setting up Cuda nvvm
Setting up CUPTI include
Setting up CUPTI lib64
Configuration finished
```
This creates a canonical set of symbolic links to the Cuda libraries on your system.
Every time you change the Cuda library paths you need to run this step again before
you invoke the bazel build command. For the cuDNN libraries, use '6.5' for R2, '7.0'
for R3, and '4.0.4' for R4-RC.
##### Build your target with GPU support
From the root of your source tree, run:
```bash
$ bazel build -c opt --config=cuda //tensorflow/cc:tutorials_example_trainer
$ bazel-bin/tensorflow/cc/tutorials_example_trainer --use_gpu
# Lots of output. This tutorial iteratively calculates the major eigenvalue of
# a 2x2 matrix, on GPU. The last few lines look like this.
000009/000005 lambda = 2.000000 x = [0.894427 -0.447214] y = [1.788854 -0.894427]
000006/000001 lambda = 2.000000 x = [0.894427 -0.447214] y = [1.788854 -0.894427]
000009/000009 lambda = 2.000000 x = [0.894427 -0.447214] y = [1.788854 -0.894427]
```
Note that "--config=cuda" is needed to enable the GPU support.
##### Known issues
* Although it is possible to build both Cuda and non-Cuda configs under the same
source tree, we recommend to run `bazel clean` when switching between these two
configs in the same source tree.
* You have to run configure before running bazel build. Otherwise, the build
will fail with a clear error message. In the future, we might consider making
this more convenient by including the configure step in our build process.
### Installation for Mac OS X
We recommend using [homebrew](http://brew.sh) to install the bazel and SWIG We recommend using [homebrew](http://brew.sh) to install the bazel and SWIG
dependencies, and installing python dependencies using easy_install or pip. dependencies, and installing python dependencies using easy_install or pip.
@ -713,15 +634,20 @@ $ sudo mv lib/libcudnn* /Developer/NVIDIA/CUDA-7.5/lib
$ sudo ln -s /Developer/NVIDIA/CUDA-7.5/lib/libcudnn* /usr/local/cuda/lib/ $ sudo ln -s /Developer/NVIDIA/CUDA-7.5/lib/libcudnn* /usr/local/cuda/lib/
``` ```
#### Configure the installation ### Configure the installation
Run the `configure` script at the root of the tree. The configure script Run the `configure` script at the root of the tree. The configure script
asks you for the path to your python interpreter. asks you for the path to your python interpreter and allows (optional)
configuration of the CUDA libraries.
This step is used to locate the python and numpy header files as well as This step is used to locate the python and numpy header files as well as
enabling GPU support if you have a CUDA enabled GPU and Toolkit installed. For enabling GPU support if you have a CUDA enabled GPU and Toolkit installed.
example: Select the option `Y` when asked to build TensorFlow with GPU support.
If you have several versions of Cuda or cuDNN installed, you should definitely
select one explicitly instead of relying on the system default.
For example:
```bash ```bash
$ ./configure $ ./configure
@ -748,6 +674,38 @@ Setting up CUPTI lib64
Configuration finished Configuration finished
``` ```
This creates a canonical set of symbolic links to the Cuda libraries on your system.
Every time you change the Cuda library paths you need to run this step again before
you invoke the bazel build command. For the cuDNN libraries, use '6.5' for R2, '7.0'
for R3, and '4.0.4' for R4-RC.
#### Build your target with GPU support
From the root of your source tree, run:
```bash
$ bazel build -c opt --config=cuda //tensorflow/cc:tutorials_example_trainer
$ bazel-bin/tensorflow/cc/tutorials_example_trainer --use_gpu
# Lots of output. This tutorial iteratively calculates the major eigenvalue of
# a 2x2 matrix, on GPU. The last few lines look like this.
000009/000005 lambda = 2.000000 x = [0.894427 -0.447214] y = [1.788854 -0.894427]
000006/000001 lambda = 2.000000 x = [0.894427 -0.447214] y = [1.788854 -0.894427]
000009/000009 lambda = 2.000000 x = [0.894427 -0.447214] y = [1.788854 -0.894427]
```
Note that "--config=cuda" is needed to enable the GPU support.
#### Known issues
* Although it is possible to build both Cuda and non-Cuda configs under the same
source tree, we recommend to run `bazel clean` when switching between these two
configs in the same source tree.
* You have to run configure before running bazel build. Otherwise, the build
will fail with a clear error message. In the future, we might consider making
this more convenient by including the configure step in our build process.
### Create the pip package and install ### Create the pip package and install
When building from source, you will still build a pip package and install that. When building from source, you will still build a pip package and install that.

View File

@ -131,7 +131,7 @@ Once TensorBoard is running, navigate your web browser to `localhost:6006` to vi
The script will log TensorBoard summaries to `/tmp/retrain_logs` by default. You can change the directory with the `--summaries_dir` flag. The script will log TensorBoard summaries to `/tmp/retrain_logs` by default. You can change the directory with the `--summaries_dir` flag.
The [TensorBoard README](../../../tensorboard/README.md) has a lot more information on TensorBoard usage, including tips & tricks, and debugging information. The [TensorBoard README](https://www.tensorflow.org/code/tensorflow/tensorboard/README.md) has a lot more information on TensorBoard usage, including tips & tricks, and debugging information.
## Using the Retrained Model ## Using the Retrained Model

View File

@ -37,6 +37,7 @@ The TensorFlow community has created many great projects around TensorFlow, incl
* [TensorFlow tutorials](https://github.com/pkmital/tensorflow_tutorials) * [TensorFlow tutorials](https://github.com/pkmital/tensorflow_tutorials)
* [Scikit Flow - Simplified Interface for TensorFlow](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/learn/python/learn) * [Scikit Flow - Simplified Interface for TensorFlow](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/learn/python/learn)
* [Caffe to TensorFlow model converter](https://github.com/ethereon/caffe-tensorflow) * [Caffe to TensorFlow model converter](https://github.com/ethereon/caffe-tensorflow)
* [Bitfusion's` GPU-enabled AWS EC2 TensorFlow AMI](https://github.com/bitfusionio/amis/tree/master/awsmrkt-bfboost-ubuntu14-cuda75-tensorflow) ([Launch AMI](https://aws.amazon.com/marketplace/pp/B01EYKBEQ0))
### Development ### Development

View File

@ -190,11 +190,11 @@ accomplished by repeatedly running `train_step`.
```python ```python
for i in range(1000): for i in range(1000):
batch = mnist.train.next_batch(50) batch = mnist.train.next_batch(100)
train_step.run(feed_dict={x: batch[0], y_: batch[1]}) train_step.run(feed_dict={x: batch[0], y_: batch[1]})
``` ```
Each training iteration we load 50 training examples. We then run the Each training iteration we load 100 training examples. We then run the
`train_step` operation, using `feed_dict` to replace the `placeholder` tensors `train_step` operation, using `feed_dict` to replace the `placeholder` tensors
`x` and `y_` with the training examples. `x` and `y_` with the training examples.
Note that you can replace any tensor in your computation graph using `feed_dict` Note that you can replace any tensor in your computation graph using `feed_dict`

View File

@ -178,6 +178,7 @@ https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/get_starte
[bazel](https://github.com/bazelbuild/bazel)). [bazel](https://github.com/bazelbuild/bazel)).
Next: Next:
```bash ```bash
cd tensorflow/models/rnn/ptb cd tensorflow/models/rnn/ptb
python ptb_word_lm.py --data_path=/tmp/simple-examples/data/ --model small python ptb_word_lm.py --data_path=/tmp/simple-examples/data/ --model small

View File

@ -240,10 +240,11 @@ second sample is *Iris virginica*.
* For further reference materials on tf.contrib.learn, see the official * For further reference materials on tf.contrib.learn, see the official
[API docs](../../api_docs/python/contrib.learn.md). [API docs](../../api_docs/python/contrib.learn.md).
<!-- David, will the below be live when this tutorial is released? -->
* To learn more about using tf.contrib.learn to create linear models, see * To learn more about using tf.contrib.learn to create linear models, see
[Large-scale Linear Models with TensorFlow](../linear/). [Large-scale Linear Models with TensorFlow](../linear/).
* To build your own Estimator using tf.contrib.learn APIs, check out [Building Machine Learning Estimator in TensorFlow](http://terrytangyuan.github.io/2016/07/08/understand-and-build-tensorflow-estimator/).
* To experiment with neural network modeling and visualization in the browser, * To experiment with neural network modeling and visualization in the browser,
check out [Deep Playground](http://playground.tensorflow.org/). check out [Deep Playground](http://playground.tensorflow.org/).

View File

@ -378,7 +378,8 @@ class Word2Vec(object):
opts = self._options opts = self._options
with open(os.path.join(opts.save_path, "vocab.txt"), "w") as f: with open(os.path.join(opts.save_path, "vocab.txt"), "w") as f:
for i in xrange(opts.vocab_size): for i in xrange(opts.vocab_size):
f.write("%s %d\n" % (tf.compat.as_text(opts.vocab_words[i]), vocab_word = tf.compat.as_text(opts.vocab_words[i]).encode("utf-8")
f.write("%s %d\n" % (vocab_word,
opts.vocab_counts[i])) opts.vocab_counts[i]))
def _train_thread_body(self): def _train_thread_body(self):

View File

@ -82,10 +82,10 @@ def extract_data(filename, num_images):
print('Extracting', filename) print('Extracting', filename)
with gzip.open(filename) as bytestream: with gzip.open(filename) as bytestream:
bytestream.read(16) bytestream.read(16)
buf = bytestream.read(IMAGE_SIZE * IMAGE_SIZE * num_images) buf = bytestream.read(IMAGE_SIZE * IMAGE_SIZE * num_images * NUM_CHANNELS)
data = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.float32) data = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.float32)
data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE, 1) data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)
return data return data

View File

@ -146,6 +146,7 @@ cuda_py_tests(
"reverse_sequence_op_test.py", "reverse_sequence_op_test.py",
"rnn_cell_test.py", "rnn_cell_test.py",
"scalar_strict_test.py", "scalar_strict_test.py",
"scan_ops_test.py",
"session_ops_test.py", "session_ops_test.py",
"shape_ops_test.py", "shape_ops_test.py",
"softmax_op_test.py", "softmax_op_test.py",

View File

@ -198,14 +198,19 @@ class ReverseTest(test_util.TensorFlowTestCase):
x_tf = array_ops.reverse(x_np, []).eval() x_tf = array_ops.reverse(x_np, []).eval()
self.assertAllEqual(x_tf, x_np) self.assertAllEqual(x_tf, x_np)
def testReverse1DimAuto(self): def _reverse1DimAuto(self, np_dtype):
x_np = [1, 4, 9] x_np = np.array([1, 2, 3, 4, 5], dtype=np_dtype)
for use_gpu in [False, True]: for use_gpu in [False, True]:
with self.test_session(use_gpu=use_gpu): with self.test_session(use_gpu=use_gpu):
x_tf = array_ops.reverse(x_np, [True]).eval() x_tf = array_ops.reverse(x_np, [True]).eval()
self.assertAllEqual(x_tf, np.asarray(x_np)[::-1]) self.assertAllEqual(x_tf, np.asarray(x_np)[::-1])
def testReverse1DimAuto(self):
for dtype in [np.uint8, np.int8, np.int32, np.bool, np.float16,
np.float32, np.float64, np.complex64, np.complex128]:
self._reverse1DimAuto(dtype)
def testUnknownDims(self): def testUnknownDims(self):
data_t = tf.placeholder(tf.float32) data_t = tf.placeholder(tf.float32)
dims_known_t = tf.placeholder(tf.bool, shape=[3]) dims_known_t = tf.placeholder(tf.bool, shape=[3])

View File

@ -432,16 +432,13 @@ class ProdReductionTest(tf.test.TestCase):
self._compareAll(np_arr, [0, 2]) self._compareAll(np_arr, [0, 2])
self._compareAll(np_arr, [0, 1, 2]) self._compareAll(np_arr, [0, 1, 2])
def testGradient(self): def _compareGradient(self, x):
s = [2, 3, 4, 2]
# NOTE(kearnes): divide by 20 so product is a reasonable size
x = np.arange(1.0, 49.0).reshape(s).astype(np.float32) / 20.
with self.test_session(): with self.test_session():
t = tf.convert_to_tensor(x) t = tf.convert_to_tensor(x)
su = tf.reduce_prod(t, []) su = tf.reduce_prod(t, [])
jacob_t, jacob_n = tf.test.compute_gradient(t, jacob_t, jacob_n = tf.test.compute_gradient(t,
s, x.shape,
su, su,
[2, 3, 4, 2], [2, 3, 4, 2],
x_init_value=x, x_init_value=x,
@ -450,7 +447,7 @@ class ProdReductionTest(tf.test.TestCase):
su = tf.reduce_prod(t, [1, 2]) su = tf.reduce_prod(t, [1, 2])
jacob_t, jacob_n = tf.test.compute_gradient(t, jacob_t, jacob_n = tf.test.compute_gradient(t,
s, x.shape,
su, su,
[2, 2], [2, 2],
x_init_value=x, x_init_value=x,
@ -459,26 +456,34 @@ class ProdReductionTest(tf.test.TestCase):
su = tf.reduce_prod(t, [0, 1, 2, 3]) su = tf.reduce_prod(t, [0, 1, 2, 3])
jacob_t, jacob_n = tf.test.compute_gradient(t, jacob_t, jacob_n = tf.test.compute_gradient(t,
s, x.shape,
su, su,
[1], [1],
x_init_value=x, x_init_value=x,
delta=1) delta=1)
self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
# NOTE(kearnes): the current gradient calculation gives NaNs for 0 inputs def testGradientWithZeros(self):
x = np.arange(0.0, 48.0).reshape(s).astype(np.float32) / 20. s = [2, 3, 4, 2]
with self.test_session(): x = np.arange(1.0, 49.0).reshape(s).astype(np.float32) / 20.
t = tf.convert_to_tensor(x) # No zeros in input
su = tf.reduce_prod(t, []) self._compareGradient(x)
jacob_t, _ = tf.test.compute_gradient(t, # Zero at beginning
s, x1 = x.copy()
su, x1[:,:,0,:] = 0
[2, 3, 4, 2], self._compareGradient(x1)
x_init_value=x, # Zero at end
delta=1) x2 = x.copy()
with self.assertRaisesOpError("Tensor had NaN values"): x2[:,:,-1,:] = 0
tf.check_numerics(jacob_t, message="_ProdGrad NaN test").op.run() self._compareGradient(x2)
# Zero in middle
x3 = x.copy()
x3[:,:,2,:] = 0
self._compareGradient(x3)
# All zeros
x4 = x.copy()
x4[:,:,:,:] = 0
self._compareGradient(x4)
def testEmptyGradients(self): def testEmptyGradients(self):
with self.test_session(): with self.test_session():

View File

@ -0,0 +1,229 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functional tests for scan ops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from itertools import combinations
import numpy as np
import tensorflow as tf
def numpy_reverse(x, axis):
ix = [slice(None, None, -1)
if i == axis else slice(None) for i in range(len(x.shape))]
return x[ix]
def handle_options(func, x, axis, exclusive, reverse):
"""Adds tf options to numpy scan ops"""
if reverse:
x = numpy_reverse(x, axis)
if exclusive:
ix_head = [slice(0, 1) if i == axis else slice(None)
for i in range(len(x.shape))]
ix_init = [slice(0, -1) if i == axis else slice(None)
for i in range(len(x.shape))]
if func == np.cumsum:
init = np.zeros_like(x[ix_head])
elif func == np.cumprod:
init = np.ones_like(x[ix_head])
else:
raise ValueError("Unknown scan function")
x = np.concatenate([init, func(x[ix_init], axis)], axis=axis)
else:
x = func(x, axis=axis)
if reverse:
x = numpy_reverse(x, axis)
return x
class CumsumTest(tf.test.TestCase):
valid_dtypes = [np.int32, np.int64, np.float16, np.float32,
np.float64, np.complex64, np.complex128]
def _compare(self, x, axis, exclusive, reverse, use_gpu=False):
np_out = handle_options(np.cumsum, x, axis, exclusive, reverse)
with self.test_session(use_gpu=use_gpu):
tf_out = tf.cumsum(x, axis, exclusive, reverse).eval()
self.assertAllClose(np_out, tf_out)
def _compareAll(self, x, axis):
for exclusive in [True, False]:
for reverse in [True, False]:
for use_gpu in [True, False]:
self._compare(x, axis, exclusive, reverse, use_gpu)
def test1D(self):
for dtype in self.valid_dtypes:
x = np.arange(1, 6).reshape([5]).astype(dtype)
self._compareAll(x, 0)
def test2D(self):
for dtype in self.valid_dtypes:
x = np.arange(0, 10).reshape([2, 5]).astype(dtype)
self._compareAll(x, 0)
self._compareAll(x, 1)
def test3D(self):
for dtype in self.valid_dtypes:
x = np.arange(0, 20).reshape([2, 2, 5]).astype(dtype)
self._compareAll(x, 0)
self._compareAll(x, 1)
self._compareAll(x, 2)
def testInvalidAxis(self):
x = np.arange(0, 10).reshape([2, 5]).astype(np.float32)
input_tensor = tf.convert_to_tensor(x)
with self.test_session():
with self.assertRaisesWithPredicateMatch(
tf.errors.InvalidArgumentError,
lambda e: "Expected scan axis in the range" in str(e)):
tf.cumsum(input_tensor, -1).eval()
with self.assertRaisesWithPredicateMatch(
tf.errors.InvalidArgumentError,
lambda e: "Expected scan axis in the range" in str(e)):
tf.cumsum(input_tensor, 2).eval()
with self.assertRaisesWithPredicateMatch(
tf.errors.InvalidArgumentError,
lambda e: "axis must be a scalar" in str(e)):
tf.cumsum(input_tensor, [0]).eval()
def _compareGradient(self, shape, axis, exclusive, reverse):
x = np.arange(0, 50).reshape(shape).astype(np.float64)
with self.test_session():
t = tf.convert_to_tensor(x)
result = tf.cumsum(t, axis, exclusive, reverse)
jacob_t, jacob_n = tf.test.compute_gradient(t,
shape,
result,
shape,
x_init_value=x,
delta=1)
self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
def testGradient(self):
self._compareGradient([50], 0, False, False)
def testGradientReverse(self):
self._compareGradient([50], 0, False, True)
def testGradientExclusive(self):
self._compareGradient([50], 0, True, False)
def testGradientExclusiveReverse(self):
self._compareGradient([50], 0, True, True)
def testGradient2D(self):
for axis in [0, 1]:
for exclusive in [True, False]:
for reverse in [True, False]:
self._compareGradient([5, 10], axis, exclusive, reverse)
class CumprodTest(tf.test.TestCase):
valid_dtypes = [np.int32, np.int64, np.float16, np.float32,
np.float64, np.complex64, np.complex128]
def _compare(self, x, axis, exclusive, reverse, use_gpu=False):
np_out = handle_options(np.cumprod, x, axis, exclusive, reverse)
with self.test_session(use_gpu=use_gpu):
tf_out = tf.cumprod(x, axis, exclusive, reverse).eval()
self.assertAllClose(np_out, tf_out)
def _compareAll(self, x, axis):
for exclusive in [True, False]:
for reverse in [True, False]:
for use_gpu in [True, False]:
self._compare(x, axis, exclusive, reverse, use_gpu)
def test1D(self):
for dtype in self.valid_dtypes:
x = np.arange(1, 6).reshape([5]).astype(dtype)
self._compareAll(x, 0)
def test2D(self):
for dtype in self.valid_dtypes:
x = np.arange(1, 11).reshape([2, 5]).astype(dtype)
self._compareAll(x, 0)
self._compareAll(x, 1)
def test3D(self):
for dtype in self.valid_dtypes:
x = np.arange(1, 21).reshape([2, 2, 5]).astype(dtype)
self._compareAll(x, 0)
self._compareAll(x, 1)
self._compareAll(x, 2)
def testInvalidAxis(self):
x = np.arange(0, 10).reshape([2, 5]).astype(np.float32)
input_tensor = tf.convert_to_tensor(x)
with self.test_session():
with self.assertRaisesWithPredicateMatch(
tf.errors.InvalidArgumentError,
lambda e: "Expected scan axis in the range" in str(e)):
tf.cumprod(input_tensor, -1).eval()
with self.assertRaisesWithPredicateMatch(
tf.errors.InvalidArgumentError,
lambda e: "Expected scan axis in the range" in str(e)):
tf.cumprod(input_tensor, 2).eval()
with self.assertRaisesWithPredicateMatch(
tf.errors.InvalidArgumentError,
lambda e: "axis must be a scalar" in str(e)):
tf.cumprod(input_tensor, [0]).eval()
def _compareGradient(self, shape, axis, exclusive, reverse):
x = np.arange(1, 9).reshape(shape).astype(np.float64)
with self.test_session():
t = tf.convert_to_tensor(x)
result = tf.cumprod(t, axis, exclusive, reverse)
jacob_t, jacob_n = tf.test.compute_gradient(t,
shape,
result,
shape,
x_init_value=x,
delta=1)
self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
def testGradient(self):
self._compareGradient([8], 0, False, False)
def testGradientReverse(self):
self._compareGradient([8], 0, False, True)
def testGradientExclusive(self):
self._compareGradient([8], 0, True, False)
def testGradientExclusiveReverse(self):
self._compareGradient([8], 0, True, True)
def testGradient2D(self):
for axis in [0, 1]:
for exclusive in [True, False]:
for reverse in [True, False]:
self._compareGradient([2, 4], axis, exclusive, reverse)
if __name__ == "__main__":
tf.test.main()

View File

@ -1021,6 +1021,12 @@ def _ResizeShape(op):
return [tensor_shape.TensorShape( return [tensor_shape.TensorShape(
[input_shape[0], height, width, input_shape[3]])] [input_shape[0], height, width, input_shape[3]])]
@ops.RegisterShape('DecodeGif')
def _ImageDecodeShape(op):
"""Shape function for decode gif."""
unused_input_shape = op.inputs[0].get_shape().merge_with(
tensor_shape.scalar())
return [tensor_shape.TensorShape([None, None, None, 3])]
@ops.RegisterShape('DecodeJpeg') @ops.RegisterShape('DecodeJpeg')
@ops.RegisterShape('DecodePng') @ops.RegisterShape('DecodePng')

View File

@ -27,6 +27,7 @@ from six.moves import xrange # pylint: disable=redefined-builtin
from tensorflow.python.framework import constant_op from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
from tensorflow.python.framework import ops from tensorflow.python.framework import ops
from tensorflow.python.framework import test_util from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops from tensorflow.python.ops import array_ops
@ -42,7 +43,9 @@ class RGBToHSVTest(test_util.TensorFlowTestCase):
np.random.seed(7) np.random.seed(7)
batch_size = 5 batch_size = 5
shape = (batch_size, 2, 7, 3) shape = (batch_size, 2, 7, 3)
inp = np.random.rand(*shape).astype(np.float32)
for nptype in [np.float32, np.float64]:
inp = np.random.rand(*shape).astype(nptype)
# Convert to HSV and back, as a batch and individually # Convert to HSV and back, as a batch and individually
with self.test_session() as sess: with self.test_session() as sess:
@ -63,7 +66,8 @@ class RGBToHSVTest(test_util.TensorFlowTestCase):
def testRGBToHSVRoundTrip(self): def testRGBToHSVRoundTrip(self):
data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
rgb_np = np.array(data, dtype=np.float32).reshape([2, 2, 3]) / 255. for nptype in [np.float32, np.float64]:
rgb_np = np.array(data, dtype=nptype).reshape([2, 2, 3]) / 255.
for use_gpu in [True, False]: for use_gpu in [True, False]:
with self.test_session(use_gpu=use_gpu): with self.test_session(use_gpu=use_gpu):
hsv = image_ops.rgb_to_hsv(rgb_np) hsv = image_ops.rgb_to_hsv(rgb_np)
@ -1609,6 +1613,56 @@ class PngTest(test_util.TensorFlowTestCase):
[None, None, channels or None]) [None, None, channels or None])
class GifTest(test_util.TensorFlowTestCase):
def testValid(self):
# Read some real GIFs
prefix = 'tensorflow/core/lib/gif/testdata/'
filename = 'scan.gif'
WIDTH = 20
HEIGHT = 40
STRIDE = 5
shape = (12, HEIGHT, WIDTH, 3)
with self.test_session() as sess:
gif0 = io_ops.read_file(prefix + filename)
image0 = image_ops.decode_gif(gif0)
gif0, image0 = sess.run([gif0, image0])
self.assertEqual(image0.shape, shape)
for frame_idx, frame in enumerate(image0):
gt = np.zeros(shape[1:], dtype=np.uint8)
start = frame_idx * STRIDE
end = (frame_idx + 1) * STRIDE
print(frame_idx)
if end <= WIDTH:
gt[:, start:end, :] = 255
else:
start -= WIDTH
end -= WIDTH
gt[start:end, :, :] = 255
self.assertAllClose(frame, gt)
def testInValid(self):
# Read some real GIFs
prefix = 'tensorflow/core/lib/gif/testdata/'
filename = 'optimized.gif'
with self.test_session() as sess:
gif0 = io_ops.read_file(prefix + filename)
image0 = image_ops.decode_gif(gif0)
with self.assertRaises(errors.InvalidArgumentError):
gif0, image0 = sess.run([gif0, image0])
def testShape(self):
with self.test_session() as sess:
gif = constant_op.constant('nonsense')
image = image_ops.decode_gif(gif)
self.assertEqual(image.get_shape().as_list(),
[None, None, None, 3])
class ConvertImageTest(test_util.TensorFlowTestCase): class ConvertImageTest(test_util.TensorFlowTestCase):
def _convert(self, original, original_dtype, output_dtype, expected): def _convert(self, original, original_dtype, output_dtype, expected):

View File

@ -109,13 +109,41 @@ def _MeanGrad(op, grad):
@ops.RegisterGradient("Prod") @ops.RegisterGradient("Prod")
def _ProdGrad(op, grad): def _ProdGrad(op, grad):
"""Gradient for Prod.""" """Gradient for Prod."""
# TODO(kearnes): this gives NaNs for 0s in the input tensor # The gradient can be expressed by dividing the product by each entry of the
# input tensor, but this approach can't deal with zeros in the input.
# Here, we avoid this problem by composing the output as a product of two
# cumprod operations.
input_shape = array_ops.shape(op.inputs[0]) input_shape = array_ops.shape(op.inputs[0])
# Expand grad to full input shape
output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims)
grad = array_ops.reshape(grad * op.outputs[0], output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims)
grad = math_ops.div(array_ops.tile(grad, tile_scaling), op.inputs[0]) grad = array_ops.tile(grad, tile_scaling)
return grad, None
# Pack all reduced dimensions into a single one, so we can perform the
# cumprod ops. If the reduction dims list is empty, it defaults to float32,
# so we need to cast here.
reduced = math_ops.cast(op.inputs[1], dtypes.int32)
idx = math_ops.range(0, array_ops.rank(op.inputs[0]))
other, _ = array_ops.listdiff(idx, reduced)
perm = array_ops.concat(0, [reduced, other])
reduced_num = math_ops.reduce_prod(array_ops.gather(input_shape, reduced))
other_num = math_ops.reduce_prod(array_ops.gather(input_shape, other))
permuted = array_ops.transpose(op.inputs[0], perm)
permuted_shape = array_ops.shape(permuted)
reshaped = array_ops.reshape(permuted, (reduced_num, other_num))
# Calculate product, leaving out the current entry
left = math_ops.cumprod(reshaped, axis=0, exclusive=True)
right = math_ops.cumprod(reshaped, axis=0, exclusive=True, reverse=True)
y = array_ops.reshape(left * right, permuted_shape)
# Invert the transpose and reshape operations.
# Make sure to set the statically known shape information through a reshape.
out = grad * array_ops.transpose(y, array_ops.invert_permutation(perm))
return array_ops.reshape(out, input_shape), None
@ops.RegisterGradient("SegmentSum") @ops.RegisterGradient("SegmentSum")
@ -839,3 +867,26 @@ def _CrossGrad(op, grad):
u = op.inputs[0] u = op.inputs[0]
v = op.inputs[1] v = op.inputs[1]
return (math_ops.cross(v, grad), math_ops.cross(grad, u)) return (math_ops.cross(v, grad), math_ops.cross(grad, u))
@ops.RegisterGradient("Cumsum")
def _CumsumGrad(op, grad):
axis = op.inputs[1]
exclusive = op.get_attr("exclusive")
reverse = op.get_attr("reverse")
return [math_ops.cumsum(grad, axis, exclusive=exclusive,
reverse=not reverse), None]
@ops.RegisterGradient("Cumprod")
def _CumprodGrad(op, grad):
x = op.inputs[0]
axis = op.inputs[1]
exclusive = op.get_attr("exclusive")
reverse = op.get_attr("reverse")
# TODO This fails when x contains 0 and should be fixed
prod = math_ops.cumprod(x, axis, exclusive=exclusive, reverse=reverse)
out = math_ops.cumsum(prod * grad, axis, exclusive=exclusive,
reverse=not reverse)
return [out / x, None]

View File

@ -13,7 +13,10 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""## Arithmetic Operators """Note: Elementwise binary operations in TensorFlow follow [numpy-style
broadcasting](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).
## Arithmetic Operators
TensorFlow provides several operations that you can use to add basic arithmetic TensorFlow provides several operations that you can use to add basic arithmetic
operators to your graph. operators to your graph.
@ -145,6 +148,14 @@ common math computations that reduce various dimensions of a tensor.
@@accumulate_n @@accumulate_n
## Scan
TensorFlow provides several operations that you can use to perform scans
(running totals) across one axis of a tensor.
@@cumsum
@@cumprod
## Segmentation ## Segmentation
TensorFlow provides several operations that you can use to perform common TensorFlow provides several operations that you can use to perform common
@ -1585,6 +1596,94 @@ def tanh(x, name=None):
return gen_math_ops._tanh(x, name=name) return gen_math_ops._tanh(x, name=name)
def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
"""Compute the cumulative sum of the tensor `x` along `axis`.
By default, this op performs an inclusive cumsum, which means that the first
element of the input is identical to the first element of the output:
```prettyprint
tf.cumsum([a, b, c]) ==> [a, a + b, a + b + c]
```
By setting the `exclusive` kwarg to `True`, an exclusive cumsum is performed
instead:
```prettyprint
tf.cumsum([a, b, c], exclusive=True) ==> [0, a, a + b]
```
By setting the `reverse` kwarg to `True`, the cumsum is performed in the
opposite direction:
```prettyprint
tf.cumsum([a, b, c], reverse=True) ==> [a + b + c, b + c, c]
```
This is more efficient than using separate `tf.reverse` ops.
The `reverse` and `exclusive` kwargs can also be combined:
```prettyprint
tf.cumsum([a, b, c], exclusive=True, reverse=True) ==> [b + c, c, 0]
```
Args:
x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
`int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
`complex128`, `qint8`, `quint8`, `qint32`, `half`.
axis: A `Tensor` of type `int32` (default: 0).
reverse: A `bool` (default: False).
name: A name for the operation (optional).
Returns:
A `Tensor`. Has the same type as `x`.
"""
with ops.op_scope([x], name, "Cumsum") as name:
x = ops.convert_to_tensor(x, name="x")
return gen_math_ops.cumsum(x, axis, exclusive=exclusive,
reverse=reverse, name=name)
def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
"""Compute the cumulative product of the tensor `x` along `axis`.
By default, this op performs an inclusive cumprod, which means that the first
element of the input is identical to the first element of the output:
```prettyprint
tf.cumprod([a, b, c]) ==> [a, a * b, a * b * c]
```
By setting the `exclusive` kwarg to `True`, an exclusive cumprod is performed
instead:
```prettyprint
tf.cumprod([a, b, c], exclusive=True) ==> [0, a, a * b]
```
By setting the `reverse` kwarg to `True`, the cumprod is performed in the
opposite direction:
```prettyprint
tf.cumprod([a, b, c], reverse=True) ==> [a * b * c, b * c, c]
```
This is more efficient than using separate `tf.reverse` ops.
The `reverse` and `exclusive` kwargs can also be combined:
```prettyprint
tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
```
Args:
x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
`int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
`complex128`, `qint8`, `quint8`, `qint32`, `half`.
axis: A `Tensor` of type `int32` (default: 0).
reverse: A `bool` (default: False).
name: A name for the operation (optional).
Returns:
A `Tensor`. Has the same type as `x`.
"""
with ops.op_scope([x], name, "Cumprod") as name:
x = ops.convert_to_tensor(x, name="x")
return gen_math_ops.cumprod(x, axis, exclusive=exclusive,
reverse=reverse, name=name)
ops.RegisterShape("Abs")(common_shapes.unchanged_shape) ops.RegisterShape("Abs")(common_shapes.unchanged_shape)
ops.RegisterShape("Acos")(common_shapes.unchanged_shape) ops.RegisterShape("Acos")(common_shapes.unchanged_shape)
ops.RegisterShape("Asin")(common_shapes.unchanged_shape) ops.RegisterShape("Asin")(common_shapes.unchanged_shape)
@ -1632,6 +1731,8 @@ ops.RegisterShape("BatchFFT3D")(common_shapes.unchanged_shape)
ops.RegisterShape("BatchIFFT3D")(common_shapes.unchanged_shape) ops.RegisterShape("BatchIFFT3D")(common_shapes.unchanged_shape)
ops.RegisterShape("TanhGrad")(common_shapes.unchanged_shape) ops.RegisterShape("TanhGrad")(common_shapes.unchanged_shape)
ops.RegisterShape("SigmoidGrad")(common_shapes.unchanged_shape) ops.RegisterShape("SigmoidGrad")(common_shapes.unchanged_shape)
ops.RegisterShape("Cumsum")(common_shapes.unchanged_shape)
ops.RegisterShape("Cumprod")(common_shapes.unchanged_shape)
@ops.RegisterShape("Add") @ops.RegisterShape("Add")

View File

@ -648,7 +648,7 @@ class DropoutWrapper(RNNCell):
% input_keep_prob) % input_keep_prob)
if (isinstance(output_keep_prob, float) and if (isinstance(output_keep_prob, float) and
not (output_keep_prob >= 0.0 and output_keep_prob <= 1.0)): not (output_keep_prob >= 0.0 and output_keep_prob <= 1.0)):
raise ValueError("Parameter input_keep_prob must be between 0 and 1: %d" raise ValueError("Parameter output_keep_prob must be between 0 and 1: %d"
% output_keep_prob) % output_keep_prob)
self._cell = cell self._cell = cell
self._input_keep_prob = input_keep_prob self._input_keep_prob = input_keep_prob

View File

@ -395,13 +395,14 @@ def Walk(top, topdown=1, onerror=None):
optional argument "onerror" is specified, it should be a function. It optional argument "onerror" is specified, it should be a function. It
will be called with one argument, an os.error instance. It can return will be called with one argument, an os.error instance. It can return
to continue with the walk, or reraise the exception to abort the walk. to continue with the walk, or reraise the exception to abort the walk.
By default, the walk follows symlinks that resolve into directories.
Yields: Yields:
# Each yield is a 3-tuple: the pathname of a directory, followed # Each yield is a 3-tuple: the pathname of a directory, followed
# by lists of all its subdirectories and leaf files. # by lists of all its subdirectories and leaf files.
(dirname, [subdirname, subdirname, ...], [filename, filename, ...]) (dirname, [subdirname, subdirname, ...], [filename, filename, ...])
""" """
return os.walk(top, topdown=topdown, onerror=onerror) return os.walk(top, topdown=topdown, onerror=onerror, followlinks=True)
def Stat(path): # pylint: disable=invalid-name def Stat(path): # pylint: disable=invalid-name

View File

@ -92,7 +92,7 @@ def input_producer(input_tensor, element_shape=None, num_epochs=None,
"""Output the rows of `input_tensor` to a queue for an input pipeline. """Output the rows of `input_tensor` to a queue for an input pipeline.
Args: Args:
input_tensor: A tensor with the rows to produce. Must be at input_tensor: A tensor with the rows to produce. Must be at least
one-dimensional. Must either have a fully-defined shape, or one-dimensional. Must either have a fully-defined shape, or
`element_shape` must be defined. `element_shape` must be defined.
element_shape: (Optional.) A `TensorShape` representing the shape of a element_shape: (Optional.) A `TensorShape` representing the shape of a

View File

@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import ops from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops from tensorflow.python.ops import math_ops
from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import control_flow_ops
@ -67,8 +68,9 @@ def exponential_decay(learning_rate, global_step, decay_steps, decay_rate,
Must be positive. See the decay computation above. Must be positive. See the decay computation above.
decay_rate: A scalar `float32` or `float64` `Tensor` or a decay_rate: A scalar `float32` or `float64` `Tensor` or a
Python number. The decay rate. Python number. The decay rate.
staircase: Boolean. It `True` decay the learning rate at discrete intervals. staircase: Boolean. It `True` decay the learning rate at discrete intervals
name: String. Optional name of the operation. Defaults to 'ExponentialDecay' name: String. Optional name of the operation. Defaults to
'ExponentialDecay'
Returns: Returns:
A scalar `Tensor` of the same type as `learning_rate`. The decayed A scalar `Tensor` of the same type as `learning_rate`. The decayed
@ -237,3 +239,125 @@ def polynomial_decay(learning_rate, global_step, decay_steps,
return math_ops.add(math_ops.mul(learning_rate - end_learning_rate, return math_ops.add(math_ops.mul(learning_rate - end_learning_rate,
math_ops.pow(1 - p, power)), math_ops.pow(1 - p, power)),
end_learning_rate, name=name) end_learning_rate, name=name)
def natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate,
staircase=False, name=None):
"""Applies natural exponential decay to the initial learning rate.
When training a model, it is often recommended to lower the learning rate as
the training progresses. This function applies an exponential decay function
to a provided initial learning rate. It requires an `global_step` value to
compute the decayed learning rate. You can just pass a TensorFlow variable
that you increment at each training step.
The function returns the decayed learning rate. It is computed as:
```python
decayed_learning_rate = learning_rate * exp(-decay_rate * global_step)
```
Example: decay exponetially with a base of 0.96:
```python
...
global_step = tf.Variable(0, trainable=False)
learning_rate = 0.1
k = 0.5
learning_rate = tf.train.exponential_time_decay(learning_rate, global_step, k)
# Passing global_step to minimize() will increment it at each step.
learning_step = (
tf.GradientDescentOptimizer(learning_rate)
.minimize(...my loss..., global_step=global_step)
)
```
Args:
learning_rate: A scalar `float32` or `float64` `Tensor` or a
Python number. The initial learning rate.
global_step: A Python number.
Global step to use for the decay computation. Must not be negative.
decay_rate: A Python number. The decay rate.
name: String. Optional name of the operation. Defaults to
'ExponentialTimeDecay'
Returns:
A scalar `Tensor` of the same type as `learning_rate`. The decayed
learning rate.
"""
with ops.op_scope([learning_rate, global_step, decay_rate],
name, "NaturalExpDecay") as name:
learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
dtype = learning_rate.dtype
global_step = math_ops.cast(global_step, dtype)
decay_steps = math_ops.cast(decay_steps, dtype)
decay_rate = math_ops.cast(decay_rate, dtype)
p = global_step / decay_steps
if staircase:
p = math_ops.floor(p)
exponent = math_ops.exp(math_ops.mul(math_ops.neg(decay_rate), p))
return math_ops.mul(learning_rate, exponent, name=name)
def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
staircase=False, name=None):
"""Applies inverse time decay to the initial learning rate.
When training a model, it is often recommended to lower the learning rate as
the training progresses. This function applies an inverse decay function
to a provided initial learning rate. It requires an `global_step` value to
compute the decayed learning rate. You can just pass a TensorFlow variable
that you increment at each training step.
The function returns the decayed learning rate. It is computed as:
```python
decayed_learning_rate = learning_rate / (1 + decay_rate * t)
```
Example: decay 1/t with a rate of 0.5:
```python
...
global_step = tf.Variable(0, trainable=False)
learning_rate = 0.1
k = 0.5
learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k)
# Passing global_step to minimize() will increment it at each step.
learning_step = (
tf.GradientDescentOptimizer(learning_rate)
.minimize(...my loss..., global_step=global_step)
)
```
Args:
learning_rate: A scalar `float32` or `float64` `Tensor` or a
Python number. The initial learning rate.
global_step: A Python number.
Global step to use for the decay computation. Must not be negative.
decay_rate: A Python number. The decay rate.
name: String. Optional name of the operation. Defaults to
'InverseTimeDecay'
with ops.op_scope([learning_rate, global_step, decay_rate],
name, "InverseTimeDecay") as name:
Returns:
A scalar `Tensor` of the same type as `learning_rate`. The decayed
learning rate.
"""
with ops.op_scope([learning_rate, global_step, decay_rate],
name, "InverseTimeDecay") as name:
learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
dtype = learning_rate.dtype
global_step = math_ops.cast(global_step, dtype)
decay_steps = math_ops.cast(decay_steps, dtype)
decay_rate = math_ops.cast(decay_rate, dtype)
p = global_step / decay_steps
if staircase:
p = math_ops.floor(p)
const = math_ops.cast(constant_op.constant(1), learning_rate.dtype)
denom = math_ops.add(const, math_ops.mul(decay_rate, p))
return math_ops.div(learning_rate, denom, name=name)

View File

@ -18,6 +18,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import math
from tensorflow.python.framework import dtypes from tensorflow.python.framework import dtypes
from tensorflow.python.framework import test_util from tensorflow.python.framework import test_util
from tensorflow.python.ops import state_ops from tensorflow.python.ops import state_ops
@ -215,5 +217,83 @@ class SqrtDecayTest(test_util.TensorFlowTestCase):
self.assertAllClose(decayed_lr.eval(), expected, 1e-6) self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
class ExponentialDecayTest(test_util.TensorFlowTestCase):
def testDecay(self):
initial_lr = 0.1
k = 10
decay_rate = 0.96
step = state_ops.variable_op([], dtypes.int32)
assign_step = state_ops.assign(step, 0)
increment_step = state_ops.assign_add(step, 1)
decayed_lr = learning_rate_decay.natural_exp_decay(initial_lr, step,
k, decay_rate)
with self.test_session():
assign_step.op.run()
for i in range(k+1):
expected = initial_lr * math.exp(-i / k * decay_rate)
self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
increment_step.op.run()
def testStaircase(self):
initial_lr = 0.1
k = 10
decay_rate = 0.96
step = state_ops.variable_op([], dtypes.int32)
assign_step = state_ops.assign(step, 0)
increment_step = state_ops.assign_add(step, 1)
decayed_lr = learning_rate_decay.natural_exp_decay(initial_lr,
step,
k,
decay_rate,
staircase=True)
with self.test_session():
assign_step.op.run()
for i in range(k+1):
expected = initial_lr * math.exp(-decay_rate * (i // k))
self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
increment_step.op.run()
class InverseDecayTest(test_util.TensorFlowTestCase):
def testDecay(self):
initial_lr = 0.1
k = 10
decay_rate = 0.96
step = state_ops.variable_op([], dtypes.int32)
assign_step = state_ops.assign(step, 0)
increment_step = state_ops.assign_add(step, 1)
decayed_lr = learning_rate_decay.inverse_time_decay(initial_lr,
step,
k,
decay_rate)
with self.test_session():
assign_step.op.run()
for i in range(k+1):
expected = initial_lr / (1 + i / k * decay_rate)
self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
increment_step.op.run()
def testStaircase(self):
initial_lr = 0.1
k = 10
decay_rate = 0.96
step = state_ops.variable_op([], dtypes.int32)
assign_step = state_ops.assign(step, 0)
increment_step = state_ops.assign_add(step, 1)
decayed_lr = learning_rate_decay.inverse_time_decay(initial_lr,
step,
k,
decay_rate,
staircase=True)
with self.test_session():
assign_step.op.run()
for i in range(k+1):
expected = initial_lr / (1 + decay_rate * (i // k))
self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
increment_step.op.run()
if __name__ == "__main__": if __name__ == "__main__":
googletest.main() googletest.main()

View File

@ -31,7 +31,7 @@ class MomentumOptimizer(optimizer.Optimizer):
""" """
def __init__(self, learning_rate, momentum, def __init__(self, learning_rate, momentum,
use_locking=False, name="Momentum"): use_locking=False, name="Momentum", use_nesterov=False):
"""Construct a new Momentum optimizer. """Construct a new Momentum optimizer.
Args: Args:
@ -44,6 +44,7 @@ class MomentumOptimizer(optimizer.Optimizer):
super(MomentumOptimizer, self).__init__(use_locking, name) super(MomentumOptimizer, self).__init__(use_locking, name)
self._learning_rate = learning_rate self._learning_rate = learning_rate
self._momentum = momentum self._momentum = momentum
self._use_nesterov = use_nesterov
def _create_slots(self, var_list): def _create_slots(self, var_list):
for v in var_list: for v in var_list:
@ -62,7 +63,8 @@ class MomentumOptimizer(optimizer.Optimizer):
math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
grad, grad,
math_ops.cast(self._momentum_tensor, var.dtype.base_dtype), math_ops.cast(self._momentum_tensor, var.dtype.base_dtype),
use_locking=self._use_locking).op use_locking=self._use_locking,
use_nesterov=self._use_nesterov).op
def _apply_sparse(self, grad, var): def _apply_sparse(self, grad, var):
mom = self.get_slot(var, "momentum") mom = self.get_slot(var, "momentum")
@ -71,4 +73,5 @@ class MomentumOptimizer(optimizer.Optimizer):
math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
grad.values, grad.indices, grad.values, grad.indices,
math_ops.cast(self._momentum_tensor, var.dtype.base_dtype), math_ops.cast(self._momentum_tensor, var.dtype.base_dtype),
use_locking=self._use_locking).op use_locking=self._use_locking,
use_nesterov=self._use_nesterov).op

View File

@ -25,6 +25,13 @@ import tensorflow as tf
class MomentumOptimizerTest(tf.test.TestCase): class MomentumOptimizerTest(tf.test.TestCase):
def _update_nesterov_momentum_numpy(self, var, accum, g, lr, momentum):
var = var + accum * lr * momentum
accum = accum * momentum + g
var = var - lr * accum
var = var - accum * lr * momentum
return var, accum
def testBasic(self): def testBasic(self):
for dtype in [tf.half, tf.float32, tf.float64]: for dtype in [tf.half, tf.float32, tf.float64]:
with self.test_session(): with self.test_session():
@ -80,6 +87,68 @@ class MomentumOptimizerTest(tf.test.TestCase):
3.98 - ((0.9 * 0.01 + 0.01) * 2.0)]), 3.98 - ((0.9 * 0.01 + 0.01) * 2.0)]),
var1.eval()) var1.eval())
def testNesterovMomentum(self):
for dtype in [tf.float32, tf.float64]:
with self.test_session():
var0 = tf.Variable([1.0, 2.0], dtype=dtype)
var1 = tf.Variable([3.0, 4.0], dtype=dtype)
var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
cost = 5 * var0 * var0 + 3 * var1
global_step = tf.Variable(tf.zeros([], tf.int64), name='global_step')
mom_op = tf.train.MomentumOptimizer(learning_rate=2.0, momentum=0.9,
use_nesterov=True)
opt_op = mom_op.minimize(cost, global_step, [var0, var1])
tf.initialize_all_variables().run()
for t in range(1, 5):
opt_op.run()
var0_np, accum0_np = self._update_nesterov_momentum_numpy(var0_np,
accum0_np, var0_np * 10, 2.0, 0.9)
var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
accum1_np, 3, 2.0, 0.9)
self.assertAllClose(var0_np, var0.eval())
self.assertAllClose(var1_np, var1.eval())
def testSparseNesterovMomentum(self):
for dtype in [tf.float32, tf.float64]:
with self.test_session():
var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
grads = []
for t in range(1, 5):
grads.append(var0_np * 10)
var0_np, accum0_np = self._update_nesterov_momentum_numpy(var0_np,
accum0_np, var0_np * 10, 2.0, 0.9)
var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
accum1_np, 3, 2.0, 0.9)
var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
var0 = tf.Variable(var0_np)
var1 = tf.Variable(var1_np)
loss = 5 * var0 * var0 + 3 * var1
mom_op = tf.train.MomentumOptimizer(learning_rate=2.0, momentum=0.9,
use_nesterov=True)
x_feed = tf.placeholder(dtype)
y_feed = tf.IndexedSlices(x_feed,tf.constant([0, 1]),tf.constant([2]))
grads_and_vars = [(y_feed, var0),
(tf.constant([3.0,3.0],dtype=dtype), var1)]
opt_update = mom_op.apply_gradients(grads_and_vars)
tf.initialize_all_variables().run()
for t in range(1, 5):
opt_update.run(feed_dict = {x_feed:grads[t - 1]})
var0_np, accum0_np = self._update_nesterov_momentum_numpy(var0_np,
accum0_np, var0_np * 10, 2.0, 0.9)
var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
accum1_np, 3, 2.0, 0.9)
self.assertAllClose(var0_np, var0.eval())
self.assertAllClose(var1_np, var1.eval())
def testTensorLearningRateAndMomentum(self): def testTensorLearningRateAndMomentum(self):
for dtype in [tf.half, tf.float32, tf.float64]: for dtype in [tf.half, tf.float32, tf.float64]:
with self.test_session(): with self.test_session():

View File

@ -314,8 +314,17 @@ port::StatusOr<DriverVersion> Diagnostician::FindKernelDriverVersion() {
if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier, (const void**)&cuda_driver_info)) { if (CFDictionaryGetValueIfPresent(kext_infos, kDriverKextIdentifier, (const void**)&cuda_driver_info)) {
// NOTE: OSX CUDA driver does not currently store the same driver version // NOTE: OSX CUDA driver does not currently store the same driver version
// in kCFBundleVersionKey as is returned by cuDriverGetVersion // in kCFBundleVersionKey as is returned by cuDriverGetVersion
const char * version = CFStringGetCStringPtr((CFStringRef)CFDictionaryGetValue(cuda_driver_info, kCFBundleVersionKey), kCFStringEncodingUTF8);
CFRelease(kext_infos); CFRelease(kext_infos);
const CFStringRef str = (CFStringRef)CFDictionaryGetValue(
cuda_driver_info, kCFBundleVersionKey);
const char *version = CFStringGetCStringPtr(str, kCFStringEncodingUTF8);
// version can be NULL in which case treat it as empty string
// see
// https://developer.apple.com/library/mac/documentation/CoreFoundation/Conceptual/CFStrings/Articles/AccessingContents.html#//apple_ref/doc/uid/20001184-100980-TPXREF112
if (version == NULL) {
return StringToDriverVersion("");
}
return StringToDriverVersion(version); return StringToDriverVersion(version);
} }
CFRelease(kext_infos); CFRelease(kext_infos);

View File

@ -54,6 +54,15 @@ NarrowT CheckedNarrowing(const WideT& wide) {
return narrow; return narrow;
} }
// Returns the "Compatibility" version number from the CuDNN version number.
// This is the number that tries to indicate ABI compatibility.
//
// For example, if cudnn_version is 5107, the compatibility version
// number will be 5100.
size_t cudnnCompatibilityVersion(size_t cudnn_version) {
return (cudnn_version / 100) * 100;
}
} // namespace } // namespace
namespace perftools { namespace perftools {
@ -139,13 +148,6 @@ size_t cudnnGetVersion() {
return callable(); return callable();
} }
// Returns whether the currently loaded cuDNN version is R2.
bool IsCudnnR2() {
static auto version = cudnnGetVersion();
DCHECK_GE(version, 2000);
return version < 3000;
}
#define PERFTOOLS_GPUTOOLS_CUDNN_WRAP(__name) \ #define PERFTOOLS_GPUTOOLS_CUDNN_WRAP(__name) \
struct DynLoadShim__##__name { \ struct DynLoadShim__##__name { \
static const char* kName; \ static const char* kName; \
@ -197,26 +199,13 @@ bool IsCudnnR2() {
__macro(cudnnPoolingForward) \ __macro(cudnnPoolingForward) \
__macro(cudnnPoolingBackward) \ __macro(cudnnPoolingBackward) \
__macro(cudnnLRNCrossChannelForward) \ __macro(cudnnLRNCrossChannelForward) \
__macro(cudnnLRNCrossChannelBackward) __macro(cudnnLRNCrossChannelBackward) \
// clang-format on
CUDNN_DNN_ROUTINE_EACH(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
// clang-format off
#if CUDNN_VERSION >= 4000 && CUDNN_VERSION < 5000
#define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \
__macro(cudnnAddTensor_v2) \
__macro(cudnnConvolutionBackwardData_v2) \
__macro(cudnnConvolutionBackwardFilter_v2)
#else
#define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \
__macro(cudnnAddTensor) \ __macro(cudnnAddTensor) \
__macro(cudnnConvolutionBackwardData) \ __macro(cudnnConvolutionBackwardData) \
__macro(cudnnConvolutionBackwardFilter) __macro(cudnnConvolutionBackwardFilter)
#endif
// clang-format on // clang-format on
CUDNN_DNN_ROUTINE_EACH_R2(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH(PERFTOOLS_GPUTOOLS_CUDNN_WRAP)
// APIs available after R3: // APIs available after R3:
#if CUDNN_VERSION >= 3000 #if CUDNN_VERSION >= 3000
@ -340,15 +329,21 @@ port::Status CudnnSupport::Init() {
// Check whether loaded version of CuDNN matches what the source // Check whether loaded version of CuDNN matches what the source
// was built with. // was built with.
size_t loaded_version = dynload::cudnnGetVersion(); size_t loaded_version = dynload::cudnnGetVersion();
bool library_loaded_matches_source = (loaded_version == CUDNN_VERSION); size_t loaded_compat_version = cudnnCompatibilityVersion(loaded_version);
size_t compiled_compat_version = cudnnCompatibilityVersion(CUDNN_VERSION);
bool library_loaded_matches_source =
(loaded_compat_version == compiled_compat_version);
if (!library_loaded_matches_source) { if (!library_loaded_matches_source) {
const string error = const string error =
port::StrCat("Loaded cudnn library: ", loaded_version, port::StrCat("Loaded runtime CuDNN library: ", loaded_version,
" but source was compiled against ", CUDNN_VERSION, " (compatibility version ", loaded_compat_version,
". If using a binary install, upgrade your cudnn " ") but source was compiled with ", CUDNN_VERSION,
" (compatibility version ", compiled_compat_version,
"). If using a binary install, upgrade your CuDNN "
"library to match. If building from sources, " "library to match. If building from sources, "
"make sure the library loaded matches the " "make sure the library loaded at runtime matches a "
"version you specified during compile configuration."); "compatible version specified during compile "
"configuration.");
LOG(ERROR) << error; LOG(ERROR) << error;
return port::Status{port::error::INTERNAL, error}; return port::Status{port::error::INTERNAL, error};
} }
@ -1109,31 +1104,6 @@ bool CudnnSupport::DoConvolveBackwardDataImpl(
ScopedConvolutionDescriptor conv{parent_, convolution_descriptor, ScopedConvolutionDescriptor conv{parent_, convolution_descriptor,
CUDNN_DATA_FLOAT}; CUDNN_DATA_FLOAT};
#if CUDNN_VERSION < 5000
#if CUDNN_VERSION >= 3000
if (dynload::IsCudnnR2()) {
#endif
#if CUDNN_VERSION >= 4000
status = dynload::cudnnConvolutionBackwardData_v2(
#else
status = dynload::cudnnConvolutionBackwardData(
#endif
parent_, ToHandle(dnn_handle_), &alpha, filter.handle(),
filter_data.opaque(), out_back_nd.handle(),
backward_output_data.opaque(), conv.handle(), &beta,
in_back_nd.handle(), backward_input_data->opaque());
if (status != CUDNN_STATUS_SUCCESS) {
LOG(FATAL) << "failed to enqueue convolution on stream: "
<< ToString(status);
return false;
}
return true;
#if CUDNN_VERSION >= 3000
}
#endif
#endif
#if CUDNN_VERSION >= 3000
const bool is_profiling = output_profile_result != nullptr; const bool is_profiling = output_profile_result != nullptr;
cudnnConvolutionBwdDataAlgo_t algo; cudnnConvolutionBwdDataAlgo_t algo;
DeviceMemory<uint8> scratch; DeviceMemory<uint8> scratch;
@ -1284,7 +1254,6 @@ bool CudnnSupport::DoConvolveBackwardDataImpl(
return false; return false;
} }
return true; return true;
#endif
} }
bool CudnnSupport::DoConvolveBackwardData( bool CudnnSupport::DoConvolveBackwardData(
@ -1369,31 +1338,6 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl(
ScopedConvolutionDescriptor conv{parent_, convolution_descriptor, ScopedConvolutionDescriptor conv{parent_, convolution_descriptor,
CUDNN_DATA_FLOAT}; CUDNN_DATA_FLOAT};
#if CUDNN_VERSION < 5000
#if CUDNN_VERSION >= 3000
if (dynload::IsCudnnR2()) {
#endif
#if CUDNN_VERSION >= 4000
status = dynload::cudnnConvolutionBackwardFilter_v2(
#else
status = dynload::cudnnConvolutionBackwardFilter(
#endif
parent_, ToHandle(dnn_handle_), &alpha, input_nd.handle(),
input_data.opaque(), out_back_nd.handle(),
backward_output_data.opaque(), conv.handle(), &beta, filter.handle(),
backward_filter_data->opaque());
if (status != CUDNN_STATUS_SUCCESS) {
LOG(FATAL) << "failed to enqueue convolution on stream: "
<< ToString(status);
return false;
}
return true;
#if CUDNN_VERSION >= 3000
}
#endif
#endif
#if CUDNN_VERSION >= 3000
const bool is_profiling = output_profile_result != nullptr; const bool is_profiling = output_profile_result != nullptr;
cudnnConvolutionBwdFilterAlgo_t algo; cudnnConvolutionBwdFilterAlgo_t algo;
DeviceMemory<uint8> scratch; DeviceMemory<uint8> scratch;
@ -1544,7 +1488,6 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl(
return false; return false;
} }
return true; return true;
#endif
} }
bool CudnnSupport::DoConvolveBackwardFilter( bool CudnnSupport::DoConvolveBackwardFilter(
@ -1824,23 +1767,7 @@ bool CudnnSupport::DoBiasAdd(Stream* stream,
const float alpha = 1.0f; const float alpha = 1.0f;
const float beta = 1.0f; const float beta = 1.0f;
#if CUDNN_VERSION >= 3000
if (dynload::IsCudnnR2()) {
#endif
#if CUDNN_VERSION < 5000
#if CUDNN_VERSION >= 4000
status = dynload::cudnnAddTensor_v2(
#else
status = dynload::cudnnAddTensor(
#endif
parent_, ToHandle(dnn_handle_), CUDNN_ADD_SAME_C, &alpha,
bias_descriptor.handle(), biases.opaque(), &beta,
input_descriptor.handle(), output_data->opaque());
#endif // CUDNN_VERSION < 5000
#if CUDNN_VERSION >= 3000
} else {
#if CUDNN_VERSION >= 5000 #if CUDNN_VERSION >= 5000
status = dynload::cudnnAddTensor( status = dynload::cudnnAddTensor(
#else #else
@ -1849,8 +1776,6 @@ bool CudnnSupport::DoBiasAdd(Stream* stream,
parent_, ToHandle(dnn_handle_), &alpha, bias_descriptor.handle(), parent_, ToHandle(dnn_handle_), &alpha, bias_descriptor.handle(),
biases.opaque(), &beta, input_descriptor.handle(), biases.opaque(), &beta, input_descriptor.handle(),
output_data->opaque()); output_data->opaque());
}
#endif
if (status != CUDNN_STATUS_SUCCESS) { if (status != CUDNN_STATUS_SUCCESS) {
LOG(ERROR) << "stream " << stream << " could not enqueue bias addition."; LOG(ERROR) << "stream " << stream << " could not enqueue bias addition.";

View File

@ -10,10 +10,10 @@ exports_files(["LICENSE"])
filegroup( filegroup(
name = "frontend", name = "frontend",
srcs = [ srcs = [
"TAG",
"dist/index.html", "dist/index.html",
"dist/tf-tensorboard.html", "dist/tf-tensorboard.html",
"TAG", "//tensorflow/tensorboard/bower",
"//tensorflow/tensorboard/bower:bower",
"//tensorflow/tensorboard/lib:all_files", "//tensorflow/tensorboard/lib:all_files",
], ],
) )

View File

@ -21,7 +21,7 @@ directory by creating a `SummaryWriter`:
``` python ``` python
# sess.graph_def is the graph definition; that enables the Graph Visualizer. # sess.graph_def is the graph definition; that enables the Graph Visualizer.
summary_writer = tf.train.SummaryWriter('/path/to/logs', sess.graph_def) summary_writer = tf.train.SummaryWriter('/path/to/logs', sess.graph)
``` ```
For more details, see [this For more details, see [this
@ -115,9 +115,9 @@ For example, here is a well-organized TensorBoard log directory, with two runs,
# The Visualizations # The Visualizations
### Scalar Dashboard ### Events Dashboard
TensorBoard's Scalar Dashboard visualizes scalar statistics that vary over time; TensorBoard's Events Dashboard visualizes scalar statistics that vary over time;
for example, you might want to track the model's loss or learning rate. As for example, you might want to track the model's loss or learning rate. As
described in *Key Concepts*, you can compare multiple runs, and the data is described in *Key Concepts*, you can compare multiple runs, and the data is
organized by tag. The line charts have the following interactions: organized by tag. The line charts have the following interactions:

View File

@ -49,10 +49,11 @@
# to run. # to run.
# #
# Constants:
# Fixed naming patterns for wheel (.whl) files given different python versions # Fixed naming patterns for wheel (.whl) files given different python versions
if [[ $(uname) == "Linux" ]]; then
declare -A WHL_TAGS declare -A WHL_TAGS
WHL_TAGS=(["2.7"]="cp27-none" ["3.4"]="cp34-cp34m" ["3.5"]="cp35-cp35m") WHL_TAGS=(["2.7"]="cp27-none" ["3.4"]="cp34-cp34m" ["3.5"]="cp35-cp35m")
fi
INSTALL_EXTRA_PIP_PACKAGES=${TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES} INSTALL_EXTRA_PIP_PACKAGES=${TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES}

View File

@ -243,6 +243,8 @@ rm -rf ${PY_TEST_DIR}/tensorflow/core/lib/jpeg
cp -r tensorflow/core/lib/jpeg ${PY_TEST_DIR}/tensorflow/core/lib cp -r tensorflow/core/lib/jpeg ${PY_TEST_DIR}/tensorflow/core/lib
rm -rf ${PY_TEST_DIR}/tensorflow/core/lib/png rm -rf ${PY_TEST_DIR}/tensorflow/core/lib/png
cp -r tensorflow/core/lib/png ${PY_TEST_DIR}/tensorflow/core/lib cp -r tensorflow/core/lib/png ${PY_TEST_DIR}/tensorflow/core/lib
rm -rf ${PY_TEST_DIR}/tensorflow/core/lib/gif
cp -r tensorflow/core/lib/gif ${PY_TEST_DIR}/tensorflow/core/lib
# Copy test data from tensorflow/contrib/ffmpeg # Copy test data from tensorflow/contrib/ffmpeg

View File

@ -174,24 +174,57 @@ function get_cuda_capability_version() {
fi fi
} }
# Process container type # Container type, e.g., CPU, GPU
CTYPE=${TF_BUILD_CONTAINER_TYPE} CTYPE=${TF_BUILD_CONTAINER_TYPE}
# Determine if Docker is available
OPT_FLAG="" OPT_FLAG=""
if [[ -z "$(which docker)" ]]; then
DO_DOCKER=0
echo "It appears that Docker is not available on this system. "\
"Will perform build without Docker."
echo "Also, the additional option flags will be applied to the build:"
echo " ${NO_DOCKER_OPT_FLAG}"
MAIN_CMD="${NO_DOCKER_MAIN_CMD} ${CTYPE}"
OPT_FLAG="${OPT_FLAG} ${NO_DOCKER_OPT_FLAG}"
fi
# Process container type
if [[ ${CTYPE} == "cpu" ]]; then if [[ ${CTYPE} == "cpu" ]]; then
: :
elif [[ ${CTYPE} == "gpu" ]]; then elif [[ ${CTYPE} == "gpu" ]]; then
OPT_FLAG="--config=cuda" OPT_FLAG="${OPT_FLAG} --config=cuda"
# Attempt to determine CUDA capability version and use it # Attempt to determine CUDA capability version automatically and use it if
if [[ "${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS}" != \ # CUDA capability version is not specified by the environment variables.
*"TF_CUDA_COMPUTE_CAPABILITIES="* ]]; then
CUDA_CAPA_VER=$(get_cuda_capability_version) CUDA_CAPA_VER=$(get_cuda_capability_version)
if [[ ! -z ${CUDA_CAPA_VER} ]]; then if [[ ! -z ${CUDA_CAPA_VER} ]]; then
echo "TF_CUDA_COMPUTE_CAPABILITIES is not set." AUTO_CUDA_CAPA_VER=0
echo "Using CUDA capability version from deviceQuery: ${CUDA_CAPA_VER}" if [[ ${DO_DOCKER} == "1" ]] && \
[[ "${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS}" != \
*"TF_CUDA_COMPUTE_CAPABILITIES="* ]]; then
AUTO_CUDA_CAPA_VER=1
TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS=\ TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS=\
"${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS} -e "\ "${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS} -e "\
"TF_CUDA_COMPUTE_CAPABILITIES=${CUDA_CAPA_VER}" "TF_CUDA_COMPUTE_CAPABILITIES=${CUDA_CAPA_VER}"
echo "Docker GPU build: TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS="\
"\"${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS}\""
elif [[ ${DO_DOCKER} == "0" ]] && \
[[ -z "${TF_CUDA_COMPUTE_CAPABILITIES}" ]]; then
AUTO_CUDA_CAPA_VER=1
TF_CUDA_COMPUTE_CAPABILITIES="${CUDA_CAPA_VER}"
echo "Non-Docker GPU build: TF_CUDA_COMPUTE_CAPABILITIES="\
"\"${TF_CUDA_COMPUTE_CAPABILITIES}\""
fi
if [[ ${AUTO_CUDA_CAPA_VER} == "1" ]]; then
echo "TF_CUDA_COMPUTE_CAPABILITIES is not set:"
echo "Using CUDA capability version from deviceQuery: ${CUDA_CAPA_VER}"
echo ""
fi fi
fi fi
elif [[ ${CTYPE} == "android" ]]; then elif [[ ${CTYPE} == "android" ]]; then
@ -203,19 +236,6 @@ fi
EXTRA_PARAMS="" EXTRA_PARAMS=""
# Determine if Docker is available
if [[ -z "$(which docker)" ]]; then
DO_DOCKER=0
echo "It appears that Docker is not available on this system. "\
"Will perform build without Docker."
echo "Also, the additional option flags will be applied to the build:"
echo " ${NO_DOCKER_OPT_FLAG}"
MAIN_CMD="${NO_DOCKER_MAIN_CMD} ${CTYPE}"
OPT_FLAG="${OPT_FLAG} ${NO_DOCKER_OPT_FLAG}"
fi
# Determine if this is a benchmarks job # Determine if this is a benchmarks job
RUN_BENCHMARKS=0 RUN_BENCHMARKS=0
if [[ ! -z "${TF_BUILD_RUN_BENCHMARKS}" ]] && if [[ ! -z "${TF_BUILD_RUN_BENCHMARKS}" ]] &&

View File

@ -80,7 +80,7 @@ RUN mkdir /bazel && \
# Download and build TensorFlow. # Download and build TensorFlow.
RUN git clone --recursive https://github.com/tensorflow/tensorflow.git && \ RUN git clone -b r0.9 --recursive --recurse-submodules https://github.com/tensorflow/tensorflow.git && \
cd tensorflow && \ cd tensorflow && \
git checkout r0.9 git checkout r0.9
WORKDIR /tensorflow WORKDIR /tensorflow

View File

@ -16,7 +16,9 @@ RUN ./install_google_cloud_sdk.bash --disable-prompts --install-dir=/var/gcloud
# Install nightly TensorFlow pip # Install nightly TensorFlow pip
RUN pip install \ RUN pip install \
http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-py2-none-any.whl http://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
# Copy test files # Copy test files
COPY python/gcs_smoke.py / RUN mkdir -p /gcs-smoke/python
COPY gcs_smoke_wrapper.sh /gcs-smoke/
COPY python/gcs_smoke.py /gcs-smoke/python/

View File

@ -67,30 +67,8 @@ docker build --no-cache \
# Run the docker image with the GCS key file mapped and the gcloud-required # Run the docker image with the GCS key file mapped and the gcloud-required
# environment variables set. # environment variables set.
LOG_FILE="/tmp/tf-gcs-test.log"
rm -rf ${LOG_FILE}
docker run --rm \ docker run --rm \
-v ${GCLOUD_JSON_KEY_PATH}:/gcloud-key.json \ -v ${GCLOUD_JSON_KEY_PATH}:/gcloud-key.json \
-e "GOOGLE_APPLICATION_CREDENTIALS=/gcloud-key.json" \ -e "GOOGLE_APPLICATION_CREDENTIALS=/gcloud-key.json" \
"${DOCKER_IMG}" \ "${DOCKER_IMG}" \
python /gcs_smoke.py --gcs_bucket_url="${GCS_BUCKET_URL}" \ /gcs-smoke/gcs_smoke_wrapper.sh "${GCS_BUCKET_URL}"
2>&1 > "${LOG_FILE}"
if [[ $? != "0" ]]; then
cat ${LOG_FILE}
die "FAIL: End-to-end test of GCS access from TensorFlow failed."
fi
cat ${LOG_FILE}
echo ""
# Clean up the newly created tfrecord file in GCS bucket
NEW_TFREC_URL=$(grep "Using input path" "${LOG_FILE}" | \
awk '{print $NF}')
if [[ -z ${NEW_TFREC_URL} ]]; then
die "FAIL: Unable to determine the URL to the new tfrecord file in GCS"
fi
gsutil rm "${NEW_TFREC_URL}" && \
echo "Cleaned up new tfrecord file in GCS: ${NEW_TFREC_URL}" || \
die "FAIL: Unable to clean up new tfrecord file in GCS: ${NEW_TFREC_URL}"

View File

@ -0,0 +1,98 @@
#!/usr/bin/env bash
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# In-container wrapper for GCS smoke test.
#
# This script invokes gcs_smoke.py and performs tear down afterwards.
#
# Usage:
# gcs_smoke_wrapper.sh <GCS_BUCKET_URL>
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Helper function: Exit on failure.
die () {
echo $@
exit 1
}
print_usage() {
echo "Usage: gcs_smoke_wrapper.sh <GCS_BUCKET_URL>"
echo ""
}
# Sanity check on command-line arguments.
GCS_BUCKET_URL=$1
if [[ -z "${GCS_BUCKET_URL}" ]]; then
print_usage
die "ERROR: Command-line argument GCS_BUCKET_URL is not supplied"
fi
# Check that gcloud and gsutil binaries are available.
GCLOUD_BIN="/var/gcloud/google-cloud-sdk/bin/gcloud"
if [[ ! -f "${GCLOUD_BIN}" ]]; then
die "ERROR: Unable to find gcloud at path ${GCLOUD_BIN}"
fi
GSUTIL_BIN="/var/gcloud/google-cloud-sdk/bin/gsutil"
if [[ ! -f "${GSUTIL_BIN}" ]]; then
die "ERROR: Unable to find gsutil at path ${GSUTIL_BIN}"
fi
# Check environment variable for gcloud credentials
if [[ -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then
die "ERROR: Required gcloud environment variable "\
"${GOOGLE_APPLICATION_CREDENTIALS} is not set."
fi
# Locate main Python file
GCS_SMOKE_PY="${SCRIPT_DIR}/python/gcs_smoke.py"
if [[ ! -f "${GCS_SMOKE_PY}" ]]; then
die "ERROR: Unable to find Python file at ${GCS_SMOKE_PY}"
fi
LOG_FILE="/tmp/tf-gcs-test.log"
rm -rf ${LOG_FILE} || \
die "ERROR: Failed to remove existing log file ${LOG_FILE}"
# Invoke main Python file
python "${GCS_SMOKE_PY}" --gcs_bucket_url="${GCS_BUCKET_URL}" \
2>&1 > "${LOG_FILE}"
if [[ $? != "0" ]]; then
cat ${LOG_FILE}
die "FAIL: End-to-end test of GCS access from TensorFlow failed."
fi
cat ${LOG_FILE}
echo ""
# Clean up the newly created tfrecord file in GCS bucket.
# First, activate gcloud service account
"${GCLOUD_BIN}" auth activate-service-account \
--key-file "${GOOGLE_APPLICATION_CREDENTIALS}" || \
die "ERROR: Failed to activate gcloud service account with JSON key file"
NEW_TFREC_URL=$(grep "Using input path" "${LOG_FILE}" | \
awk '{print $NF}')
if [[ -z ${NEW_TFREC_URL} ]]; then
die "FAIL: Unable to determine the URL to the new tfrecord file in GCS"
fi
"${GSUTIL_BIN}" rm "${NEW_TFREC_URL}" && \
echo "Cleaned up new tfrecord file in GCS: ${NEW_TFREC_URL}" || \
die "FAIL: Unable to clean up new tfrecord file in GCS: ${NEW_TFREC_URL}"

View File

@ -8,8 +8,8 @@ load("//tensorflow:tensorflow.bzl", "transitive_hdrs")
transitive_hdrs( transitive_hdrs(
name = "other_headers", name = "other_headers",
deps = [ deps = [
"//third_party/eigen3",
"//tensorflow/core:protos_all_cc", "//tensorflow/core:protos_all_cc",
"//third_party/eigen3",
], ],
) )

View File

@ -108,18 +108,13 @@ class InstallHeaders(Command):
# directories for -I # directories for -I
install_dir = re.sub('/google/protobuf/src', '', install_dir) install_dir = re.sub('/google/protobuf/src', '', install_dir)
# Copy eigen code into tensorflow/include, # Copy eigen code into tensorflow/include.
# tensorflow/include/external/eigen_archive/eigen-eigen-<revision>,
# and tensorflow/include/eigen-eigen-<revision>.
# A symlink would do, but the wheel file that gets created ignores # A symlink would do, but the wheel file that gets created ignores
# symlink within the directory hierarchy. # symlink within the directory hierarchy.
# NOTE(keveman): Figure out how to customize bdist_wheel package so # NOTE(keveman): Figure out how to customize bdist_wheel package so
# we can do the symlink. # we can do the symlink.
if re.search(r'(external/eigen_archive/eigen-eigen-\w+)', install_dir): if 'external/eigen_archive/' in install_dir:
extra_dirs = [re.sub('/external/eigen_archive', '', install_dir), extra_dir = install_dir.replace('external/eigen_archive', '')
re.sub(r'external/eigen_archive/eigen-eigen-\w+', '',
install_dir)]
for extra_dir in extra_dirs:
if not os.path.exists(extra_dir): if not os.path.exists(extra_dir):
self.mkpath(extra_dir) self.mkpath(extra_dir)
self.copy_file(header, extra_dir) self.copy_file(header, extra_dir)

View File

@ -4,10 +4,17 @@
# within the workspace (e.g. "tensorflow/"), and tf_repo_name is the name of the # within the workspace (e.g. "tensorflow/"), and tf_repo_name is the name of the
# local_repository rule (e.g. "@tf"). # local_repository rule (e.g. "@tf").
def tf_workspace(path_prefix = "", tf_repo_name = ""): def tf_workspace(path_prefix = "", tf_repo_name = ""):
# These lines need to be changed when updating Eigen. They are parsed from
# this file by the cmake and make builds to determine the eigen version and hash.
eigen_version = "b4fa9622b809"
eigen_sha256 = "2862840c2de9c0473a4ef20f8678949ae89ab25965352ee53329e63ba46cec62"
native.new_http_archive( native.new_http_archive(
name = "eigen_archive", name = "eigen_archive",
url = "https://bitbucket.org/eigen/eigen/get/b4fa9622b809.tar.gz", url = "https://bitbucket.org/eigen/eigen/get/" + eigen_version + ".tar.gz",
sha256 = "2862840c2de9c0473a4ef20f8678949ae89ab25965352ee53329e63ba46cec62", sha256 = eigen_sha256,
strip_prefix = "eigen-eigen-" + eigen_version,
build_file = path_prefix + "eigen.BUILD", build_file = path_prefix + "eigen.BUILD",
) )
@ -56,6 +63,13 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
build_file = path_prefix + "png.BUILD", build_file = path_prefix + "png.BUILD",
) )
native.new_http_archive(
name = "gif_archive",
url = "http://ufpr.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz",
sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1",
build_file = path_prefix + "gif.BUILD",
)
native.new_http_archive( native.new_http_archive(
name = "six_archive", name = "six_archive",
url = "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz#md5=34eed507548117b2ab523ab14b2f8b55", url = "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz#md5=34eed507548117b2ab523ab14b2f8b55",

View File

@ -1,4 +1,3 @@
package(default_visibility = ["//visibility:public"]) package(default_visibility = ["//visibility:public"])
licenses(["notice"]) # Apache 2.0 licenses(["notice"]) # Apache 2.0

View File

@ -13,7 +13,6 @@ cc_library(
"unsupported/Eigen/CXX11/FixedPoint", "unsupported/Eigen/CXX11/FixedPoint",
"unsupported/Eigen/CXX11/src/FixedPoint/*.h", "unsupported/Eigen/CXX11/src/FixedPoint/*.h",
]), ]),
includes = ["."],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
"@eigen_archive//:eigen", "@eigen_archive//:eigen",

View File

@ -1 +1 @@
#include "eigen-eigen-b4fa9622b809/Eigen/Cholesky" #include "Eigen/Cholesky"

View File

@ -1 +1 @@
#include "eigen-eigen-b4fa9622b809/Eigen/Core" #include "Eigen/Core"

View File

@ -1 +1 @@
#include "eigen-eigen-b4fa9622b809/Eigen/Eigenvalues" #include "Eigen/Eigenvalues"

View File

@ -1 +1 @@
#include "eigen-eigen-b4fa9622b809/Eigen/LU" #include "Eigen/LU"

View File

@ -1 +1 @@
#include "eigen-eigen-b4fa9622b809/Eigen/QR" #include "Eigen/QR"

View File

@ -1 +1 @@
#include "eigen-eigen-b4fa9622b809/unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"

Some files were not shown because too many files have changed in this diff Show More