Merge commit for internal changes
This commit is contained in:
commit
b7c416926e
@ -1,6 +1,6 @@
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
archive_dir = "eigen-eigen-d02e6a705c30"
|
||||
archive_dir = "eigen-eigen-0c0b79ecd74c"
|
||||
|
||||
cc_library(
|
||||
name = "eigen",
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
include (ExternalProject)
|
||||
|
||||
set(eigen_archive_hash "d02e6a705c30")
|
||||
set(eigen_archive_hash "0c0b79ecd74c")
|
||||
|
||||
set(eigen_INCLUDE_DIRS
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
@ -16,7 +16,7 @@ set(eigen_INCLUDE_DIRS
|
||||
${tensorflow_source_dir}/third_party/eigen3
|
||||
)
|
||||
set(eigen_URL https://bitbucket.org/eigen/eigen/get/${eigen_archive_hash}.tar.gz)
|
||||
set(eigen_HASH SHA256=532956172daa8aba87c750791ff89a5c38cdb07e2525afe17ecb4bef812d67cf)
|
||||
set(eigen_HASH SHA256=b4b5884b03bd4bae114d02b36e2435ad1504ed8e51431d16c876b6f6a365882b)
|
||||
set(eigen_BUILD ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen)
|
||||
set(eigen_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/eigen/install)
|
||||
|
||||
|
@ -198,6 +198,30 @@ py_test(
|
||||
],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "experiment_test",
|
||||
size = "small",
|
||||
srcs = ["python/learn/tests/experiment_test.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":learn",
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/python:framework_test_lib",
|
||||
],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "learn_runner_test",
|
||||
size = "small",
|
||||
srcs = ["python/learn/tests/learn_runner_test.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":learn",
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/python:framework_test_lib",
|
||||
],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "tensor_signature_test",
|
||||
srcs = ["python/learn/estimators/tensor_signature_test.py"],
|
||||
@ -439,6 +463,19 @@ py_test(
|
||||
],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "stability_test",
|
||||
size = "small",
|
||||
srcs = ["python/learn/tests/stability_test.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":learn",
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/python:framework",
|
||||
"//tensorflow/python:framework_test_lib",
|
||||
],
|
||||
)
|
||||
|
||||
py_binary(
|
||||
name = "inspect_checkpoint",
|
||||
srcs = [
|
||||
|
@ -21,6 +21,7 @@ from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
|
||||
# pylint: disable=wildcard-import
|
||||
from tensorflow.contrib.learn.python.learn import datasets
|
||||
from tensorflow.contrib.learn.python.learn import estimators
|
||||
from tensorflow.contrib.learn.python.learn import graph_actions
|
||||
@ -30,9 +31,9 @@ from tensorflow.contrib.learn.python.learn import monitors
|
||||
from tensorflow.contrib.learn.python.learn import ops
|
||||
from tensorflow.contrib.learn.python.learn import preprocessing
|
||||
from tensorflow.contrib.learn.python.learn import utils
|
||||
# pylint: disable=wildcard-import
|
||||
from tensorflow.contrib.learn.python.learn.dataframe import *
|
||||
from tensorflow.contrib.learn.python.learn.estimators import *
|
||||
from tensorflow.contrib.learn.python.learn.experiment import Experiment
|
||||
from tensorflow.contrib.learn.python.learn.graph_actions import evaluate
|
||||
from tensorflow.contrib.learn.python.learn.graph_actions import infer
|
||||
from tensorflow.contrib.learn.python.learn.graph_actions import NanLossDuringTrainingError
|
||||
|
@ -50,11 +50,11 @@ class DNNClassifier(dnn_linear_combined.DNNLinearCombinedClassifier):
|
||||
|
||||
def input_fn_eval: # returns x, Y
|
||||
pass
|
||||
estimator.evaluate(input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a `KeyError`:
|
||||
if `weight_column_name` is not `None`, a feature with
|
||||
`key=weight_column_name` whose value is a `Tensor`.
|
||||
@ -145,11 +145,11 @@ class DNNRegressor(dnn_linear_combined.DNNLinearCombinedRegressor):
|
||||
|
||||
def input_fn_eval: # returns x, Y
|
||||
pass
|
||||
estimator.evaluate(input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a `KeyError`:
|
||||
if `weight_column_name` is not `None`, a feature with
|
||||
`key=weight_column_name` whose value is a `Tensor`.
|
||||
|
@ -397,12 +397,12 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
|
||||
...
|
||||
def input_fn_eval: # returns x, y
|
||||
...
|
||||
estimator.train(input_fn_train)
|
||||
estimator.evaluate(input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.fit(input_fn=input_fn_train)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a `KeyError`:
|
||||
if `weight_column_name` is not `None`, a feature with
|
||||
`key=weight_column_name` whose value is a `Tensor`.
|
||||
|
@ -42,7 +42,14 @@ def _prepare_iris_data_for_logistic_regression():
|
||||
return iris
|
||||
|
||||
|
||||
def _iris_input_fn():
|
||||
def _iris_input_multiclass_fn():
|
||||
iris = tf.contrib.learn.datasets.load_iris()
|
||||
return {
|
||||
'feature': tf.constant(iris.data, dtype=tf.float32)
|
||||
}, tf.constant(iris.target, shape=[150, 1], dtype=tf.int32)
|
||||
|
||||
|
||||
def _iris_input_logistic_fn():
|
||||
iris = _prepare_iris_data_for_logistic_regression()
|
||||
return {
|
||||
'feature': tf.constant(iris.data, dtype=tf.float32)
|
||||
@ -64,8 +71,8 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
|
||||
dnn_feature_columns=cont_features,
|
||||
dnn_hidden_units=[3, 3])
|
||||
|
||||
classifier.fit(input_fn=_iris_input_fn, steps=100)
|
||||
scores = classifier.evaluate(input_fn=_iris_input_fn, steps=100)
|
||||
classifier.fit(input_fn=_iris_input_logistic_fn, steps=100)
|
||||
scores = classifier.evaluate(input_fn=_iris_input_logistic_fn, steps=100)
|
||||
self.assertGreater(scores['accuracy'], 0.9)
|
||||
|
||||
def testLogisticRegression_TensorData(self):
|
||||
@ -127,8 +134,8 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
|
||||
dnn_feature_columns=cont_features,
|
||||
dnn_hidden_units=[3, 3])
|
||||
|
||||
classifier.fit(input_fn=_iris_input_fn, steps=100)
|
||||
scores = classifier.evaluate(input_fn=_iris_input_fn, steps=100)
|
||||
classifier.fit(input_fn=_iris_input_multiclass_fn, steps=100)
|
||||
scores = classifier.evaluate(input_fn=_iris_input_multiclass_fn, steps=100)
|
||||
self.assertGreater(scores['accuracy'], 0.9)
|
||||
|
||||
def testWeightColumn(self):
|
||||
@ -210,8 +217,8 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
|
||||
dnn_hidden_units=[3, 3],
|
||||
dnn_optimizer=tf.train.AdagradOptimizer(learning_rate=0.1))
|
||||
|
||||
classifier.fit(input_fn=_iris_input_fn, steps=100)
|
||||
scores = classifier.evaluate(input_fn=_iris_input_fn, steps=100)
|
||||
classifier.fit(input_fn=_iris_input_logistic_fn, steps=100)
|
||||
scores = classifier.evaluate(input_fn=_iris_input_logistic_fn, steps=100)
|
||||
self.assertGreater(scores['accuracy'], 0.9)
|
||||
|
||||
def testCustomOptimizerByString(self):
|
||||
@ -230,8 +237,8 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
|
||||
dnn_hidden_units=[3, 3],
|
||||
dnn_optimizer='Adagrad')
|
||||
|
||||
classifier.fit(input_fn=_iris_input_fn, steps=100)
|
||||
scores = classifier.evaluate(input_fn=_iris_input_fn, steps=100)
|
||||
classifier.fit(input_fn=_iris_input_logistic_fn, steps=100)
|
||||
scores = classifier.evaluate(input_fn=_iris_input_logistic_fn, steps=100)
|
||||
self.assertGreater(scores['accuracy'], 0.9)
|
||||
|
||||
def testPredict(self):
|
||||
|
@ -111,8 +111,8 @@ class BaseEstimator(sklearn.BaseEstimator):
|
||||
self._model_dir = model_dir
|
||||
if self._model_dir is None:
|
||||
self._model_dir = tempfile.mkdtemp()
|
||||
logging.info('Using temporary folder as model directory: %s',
|
||||
self._model_dir)
|
||||
logging.warning('Using temporary folder as model directory: %s',
|
||||
self._model_dir)
|
||||
|
||||
# Create a run configuration
|
||||
if config is None:
|
||||
@ -135,9 +135,8 @@ class BaseEstimator(sklearn.BaseEstimator):
|
||||
|
||||
self._graph = None
|
||||
|
||||
def fit(
|
||||
self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
|
||||
monitors=None):
|
||||
def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
|
||||
monitors=None):
|
||||
"""Trains a model given training data `x` predictions and `y` targets.
|
||||
|
||||
Args:
|
||||
@ -421,21 +420,20 @@ class BaseEstimator(sklearn.BaseEstimator):
|
||||
monitors=None,
|
||||
log_every_steps=100,
|
||||
fail_on_nan_loss=True):
|
||||
# TODO(wicke): This is a hack and needs to go.
|
||||
if self._config.execution_mode not in ('all', 'train'):
|
||||
return
|
||||
# TODO(wicke): Remove this once Model and associated code are gone.
|
||||
if hasattr(self._config, 'execution_mode'):
|
||||
if self._config.execution_mode not in ('all', 'train'):
|
||||
return
|
||||
|
||||
if not self._model_dir:
|
||||
raise ValueError('Estimator\'s model_dir should be non-empty.')
|
||||
|
||||
# Stagger startup of worker sessions based on task id.
|
||||
sleep_secs = min(self._config.training_worker_max_startup_secs,
|
||||
self._config.task *
|
||||
self._config.training_worker_session_startup_stagger_secs)
|
||||
if sleep_secs:
|
||||
logging.info('Waiting %d secs before starting task %d.', sleep_secs,
|
||||
self._config.task)
|
||||
time.sleep(sleep_secs)
|
||||
# Stagger startup of worker sessions based on task id.
|
||||
sleep_secs = min(
|
||||
self._config.training_worker_max_startup_secs,
|
||||
self._config.task *
|
||||
self._config.training_worker_session_startup_stagger_secs)
|
||||
if sleep_secs:
|
||||
logging.info('Waiting %d secs before starting task %d.', sleep_secs,
|
||||
self._config.task)
|
||||
time.sleep(sleep_secs)
|
||||
|
||||
# Device allocation
|
||||
device_fn = device_fn or self._device_fn
|
||||
@ -454,7 +452,7 @@ class BaseEstimator(sklearn.BaseEstimator):
|
||||
monitors += monitors_lib.get_default_monitors(
|
||||
loss_op=loss_op,
|
||||
summary_op=logging_ops.get_summary_op(),
|
||||
save_summary_steps=100,
|
||||
save_summary_steps=self._config.save_summary_steps,
|
||||
summary_writer=graph_actions.get_summary_writer(self._model_dir))
|
||||
|
||||
is_chief = self._config.task == 0
|
||||
@ -478,8 +476,9 @@ class BaseEstimator(sklearn.BaseEstimator):
|
||||
log_every_steps=log_every_steps,
|
||||
supervisor_is_chief=is_chief,
|
||||
supervisor_master=self._config.master,
|
||||
supervisor_save_model_secs=self._config.save_checkpoints_secs,
|
||||
feed_fn=feed_fn,
|
||||
max_steps=steps,
|
||||
steps=steps,
|
||||
fail_on_nan_loss=fail_on_nan_loss,
|
||||
monitors=monitors)
|
||||
|
||||
@ -513,8 +512,9 @@ class BaseEstimator(sklearn.BaseEstimator):
|
||||
feed_fn=None,
|
||||
metrics=None,
|
||||
name=''):
|
||||
# TODO(wicke): This is a hack and needs to go.
|
||||
if self._config.execution_mode not in ('all', 'evaluate', 'eval_evalset'):
|
||||
# TODO(wicke): Remove this once Model and associated code are gone.
|
||||
if (hasattr(self._config, 'execution_mode') and
|
||||
self._config.execution_mode not in ('all', 'evaluate', 'eval_evalset')):
|
||||
return
|
||||
|
||||
# Check that model has been trained.
|
||||
|
@ -46,10 +46,10 @@ class LinearClassifier(dnn_linear_combined.DNNLinearCombinedClassifier):
|
||||
...
|
||||
estimator.fit(input_fn=input_fn_train)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a `KeyError`:
|
||||
if `weight_column_name` is not `None`, a feature with
|
||||
`key=weight_column_name` whose value is a `Tensor`.
|
||||
@ -126,10 +126,10 @@ class LinearRegressor(dnn_linear_combined.DNNLinearCombinedRegressor):
|
||||
...
|
||||
estimator.fit(input_fn=input_fn_train)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a KeyError:
|
||||
if `weight_column_name` is not None:
|
||||
key=weight_column_name, value=a `Tensor`
|
||||
|
@ -24,79 +24,54 @@ from tensorflow.python import GPUOptions
|
||||
|
||||
|
||||
class RunConfig(object):
|
||||
"""This class specifies the specific configurations for the run.
|
||||
"""This class specifies the specific configurations for the run."""
|
||||
|
||||
Parameters:
|
||||
execution_mode: Runners use this flag to execute different tasks, like
|
||||
training vs evaluation. 'all' (the default) executes both training and
|
||||
eval.
|
||||
master: TensorFlow master. Empty string (the default) for local.
|
||||
task: Task id of the replica running the training (default: 0).
|
||||
num_ps_replicas: Number of parameter server tasks to use (default: 0).
|
||||
training_worker_session_startup_stagger_secs: Seconds to sleep between the
|
||||
startup of each worker task session (default: 5).
|
||||
training_worker_max_startup_secs: Max seconds to wait before starting any
|
||||
worker (default: 60).
|
||||
eval_delay_secs: Number of seconds between the beginning of each eval run.
|
||||
If one run takes more than this amount of time, the next run will start
|
||||
immediately once that run completes (default 60).
|
||||
eval_steps: Number of steps to run in each eval (default: 100).
|
||||
num_cores: Number of cores to be used (default: 4).
|
||||
verbose: Controls the verbosity, possible values:
|
||||
0: the algorithm and debug information is muted.
|
||||
1: trainer prints the progress.
|
||||
2: log device placement is printed.
|
||||
gpu_memory_fraction: Fraction of GPU memory used by the process on
|
||||
each GPU uniformly on the same machine.
|
||||
tf_random_seed: Random seed for TensorFlow initializers.
|
||||
Setting this value allows consistency between reruns.
|
||||
keep_checkpoint_max: The maximum number of recent checkpoint files to keep.
|
||||
As new files are created, older files are deleted.
|
||||
If None or 0, all checkpoint files are kept.
|
||||
Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
|
||||
keep_checkpoint_every_n_hours: Number of hours between each checkpoint
|
||||
to be saved. The default value of 10,000 hours effectively disables
|
||||
the feature.
|
||||
|
||||
Attributes:
|
||||
tf_master: Tensorflow master.
|
||||
tf_config: Tensorflow Session Config proto.
|
||||
tf_random_seed: Tensorflow random seed.
|
||||
keep_checkpoint_max: Maximum number of checkpoints to keep.
|
||||
keep_checkpoint_every_n_hours: Number of hours between each checkpoint.
|
||||
"""
|
||||
|
||||
# TODO(wicke): Cull unused options.
|
||||
# TODO(wicke): Move options out once functionality is covered by monitors
|
||||
def __init__(self,
|
||||
execution_mode='all',
|
||||
master='',
|
||||
task=0,
|
||||
num_ps_replicas=0,
|
||||
training_worker_session_startup_stagger_secs=5,
|
||||
training_worker_max_startup_secs=60,
|
||||
eval_delay_secs=60,
|
||||
eval_steps=100,
|
||||
num_cores=4,
|
||||
verbose=1,
|
||||
log_device_placement=False,
|
||||
gpu_memory_fraction=1,
|
||||
tf_random_seed=42,
|
||||
save_summary_steps=100,
|
||||
save_checkpoints_secs=60,
|
||||
keep_checkpoint_max=5,
|
||||
keep_checkpoint_every_n_hours=10000):
|
||||
self.execution_mode = execution_mode
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
master: TensorFlow master. Empty string (the default) for local.
|
||||
task: Task id of the replica running the training (default: 0).
|
||||
num_ps_replicas: Number of parameter server tasks to use (default: 0).
|
||||
num_cores: Number of cores to be used (default: 4).
|
||||
log_device_placement: Log the op placement to devices (default: False).
|
||||
gpu_memory_fraction: Fraction of GPU memory used by the process on
|
||||
each GPU uniformly on the same machine.
|
||||
tf_random_seed: Random seed for TensorFlow initializers.
|
||||
Setting this value allows consistency between reruns.
|
||||
save_summary_steps: Save summaries every this many steps.
|
||||
save_checkpoints_secs: Save checkpoints every this many seconds.
|
||||
keep_checkpoint_max: The maximum number of recent checkpoint files to
|
||||
keep. As new files are created, older files are deleted. If None or 0,
|
||||
all checkpoint files are kept. Defaults to 5 (that is, the 5 most recent
|
||||
checkpoint files are kept.)
|
||||
keep_checkpoint_every_n_hours: Number of hours between each checkpoint
|
||||
to be saved. The default value of 10,000 hours effectively disables
|
||||
the feature.
|
||||
"""
|
||||
self.master = master
|
||||
self.task = task
|
||||
self.num_ps_replicas = num_ps_replicas
|
||||
self.training_worker_session_startup_stagger_secs = (
|
||||
training_worker_session_startup_stagger_secs)
|
||||
self.training_worker_max_startup_secs = training_worker_max_startup_secs
|
||||
self.eval_delay_secs = eval_delay_secs
|
||||
self.eval_steps = eval_steps
|
||||
gpu_options = GPUOptions(
|
||||
per_process_gpu_memory_fraction=gpu_memory_fraction)
|
||||
self.tf_config = ConfigProto(log_device_placement=(verbose > 1),
|
||||
self.tf_config = ConfigProto(log_device_placement=log_device_placement,
|
||||
inter_op_parallelism_threads=num_cores,
|
||||
intra_op_parallelism_threads=num_cores,
|
||||
gpu_options=gpu_options)
|
||||
self.tf_random_seed = tf_random_seed
|
||||
self.save_summary_steps = save_summary_steps
|
||||
self.save_checkpoints_secs = save_checkpoints_secs
|
||||
self.keep_checkpoint_max = keep_checkpoint_max
|
||||
self.keep_checkpoint_every_n_hours = keep_checkpoint_every_n_hours
|
||||
|
134
tensorflow/contrib/learn/python/learn/experiment.py
Normal file
134
tensorflow/contrib/learn/python/learn/experiment.py
Normal file
@ -0,0 +1,134 @@
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Experiment class collecting information needed for a single training run."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import time
|
||||
|
||||
from tensorflow.python.platform import tf_logging as logging
|
||||
|
||||
|
||||
class Experiment(object):
|
||||
"""Experiment is a class containing all information needed to train a model.
|
||||
"""
|
||||
|
||||
def __init__(self, estimator,
|
||||
train_input_fn,
|
||||
eval_input_fn,
|
||||
eval_metrics=None):
|
||||
"""Constructor for Experiment.
|
||||
|
||||
Args:
|
||||
estimator: `Estimator` object.
|
||||
train_input_fn: function, returns features and targets for training.
|
||||
eval_input_fn: function, returns features and targets for evaluation.
|
||||
eval_metrics: `dict` of string, metric function. If `None`, default set
|
||||
is used.
|
||||
"""
|
||||
super(Experiment, self).__init__()
|
||||
self._estimator = estimator
|
||||
self._train_input_fn = train_input_fn
|
||||
self._eval_input_fn = eval_input_fn
|
||||
self._eval_metrics = eval_metrics
|
||||
|
||||
def train(self, steps=None, monitors=None, delay_secs=0):
|
||||
"""Fit the estimator using the training data.
|
||||
|
||||
Train the estimator for `steps` steps, after waiting for `delay_secs`
|
||||
seconds. If `steps` is `None`, train forever.
|
||||
|
||||
Args:
|
||||
steps: Perform this many steps of training. `None`, the default, means
|
||||
train forever.
|
||||
monitors: A list of monitors to pass to the `Estimator`'s `fit` function.
|
||||
delay_secs: Start training after this many seconds.
|
||||
|
||||
Returns:
|
||||
The trained estimator.
|
||||
"""
|
||||
|
||||
if delay_secs:
|
||||
logging.info("Waiting %d secs before starting training.", delay_secs)
|
||||
time.sleep(delay_secs)
|
||||
|
||||
return self._estimator.fit(input_fn=self._train_input_fn,
|
||||
steps=steps, monitors=monitors)
|
||||
|
||||
def evaluate(self, steps=None, delay_secs=0):
|
||||
"""Evaluate on the evaluation data.
|
||||
|
||||
Runs evaluation on the evaluation data and returns the result. If `steps`
|
||||
is given, only run for this many steps. Start the evaluation after
|
||||
`delay_secs` seconds.
|
||||
|
||||
Args:
|
||||
steps: Run this many steps of evaluation.
|
||||
delay_secs: Start evaluating after waiting for this many seconds.
|
||||
|
||||
Returns:
|
||||
The result of the `evaluate` call to the `Estimator`.
|
||||
"""
|
||||
|
||||
if delay_secs:
|
||||
logging.info("Waiting %d secs before starting eval.", delay_secs)
|
||||
time.sleep(delay_secs)
|
||||
|
||||
return self._estimator.evaluate(input_fn=self._eval_input_fn,
|
||||
steps=steps,
|
||||
metrics=self._eval_metrics)
|
||||
|
||||
def _continuous_eval(self, input_fn, steps=1000, delay_secs=0,
|
||||
throttle_delay_secs=60):
|
||||
"""Run continuous eval on the eval data.
|
||||
|
||||
Run `steps` steps of evaluation on the evaluation data set. This function
|
||||
starts evaluating after `delay_secs` seconds and then runs no more than one
|
||||
evaluation per `throttle_delay_secs`. It never returns.
|
||||
|
||||
Args:
|
||||
input_fn: The input to use for this eval.
|
||||
steps: Number of steps per evaluation run.
|
||||
delay_secs: Start evaluating after this many seconds.
|
||||
throttle_delay_secs: Do not re-evaluate unless the last evaluation was
|
||||
started at least this many seconds ago.
|
||||
"""
|
||||
if delay_secs:
|
||||
logging.info("Waiting %f secs before starting eval.", delay_secs)
|
||||
time.sleep(delay_secs)
|
||||
|
||||
while True:
|
||||
start = time.time()
|
||||
self._estimator.evaluate(input_fn=input_fn,
|
||||
steps=steps,
|
||||
metrics=self._eval_metrics)
|
||||
duration = time.time() - start
|
||||
if duration < throttle_delay_secs:
|
||||
difference = throttle_delay_secs - duration
|
||||
logging.info("Waiting %f secs before starting next eval run.",
|
||||
difference)
|
||||
time.sleep(difference)
|
||||
|
||||
def continuous_eval(self, steps=1000, delay_secs=0, throttle_delay_secs=60):
|
||||
self._continuous_eval(self._eval_input_fn, steps=steps,
|
||||
delay_secs=delay_secs,
|
||||
throttle_delay_secs=throttle_delay_secs)
|
||||
|
||||
def continuous_eval_on_train_data(self, steps=1000, delay_secs=0,
|
||||
throttle_delay_secs=60):
|
||||
self._continuous_eval(self._train_input_fn, steps=steps,
|
||||
delay_secs=delay_secs,
|
||||
throttle_delay_secs=throttle_delay_secs)
|
@ -30,8 +30,9 @@ from six import reraise
|
||||
|
||||
from tensorflow.contrib.framework.python.ops import ops as contrib_ops
|
||||
from tensorflow.contrib.framework.python.ops import variables as contrib_variables
|
||||
from tensorflow.contrib.layers.python.layers import summaries
|
||||
from tensorflow.contrib.learn.python.learn import monitors as monitors_lib
|
||||
from tensorflow.contrib.learn.python.learn.utils import checkpoints
|
||||
from tensorflow.core.framework import summary_pb2
|
||||
from tensorflow.python.client import session as tf_session
|
||||
from tensorflow.python.framework import errors
|
||||
from tensorflow.python.framework import ops
|
||||
@ -131,7 +132,7 @@ def train(graph,
|
||||
supervisor_save_model_secs=600,
|
||||
supervisor_save_summaries_steps=100,
|
||||
feed_fn=None,
|
||||
max_steps=None,
|
||||
steps=None,
|
||||
fail_on_nan_loss=True,
|
||||
monitors=None):
|
||||
"""Train a model.
|
||||
@ -173,7 +174,7 @@ def train(graph,
|
||||
`supervisor_save_summaries_steps` seconds when training.
|
||||
feed_fn: A function that is called every iteration to produce a `feed_dict`
|
||||
passed to `session.run` calls. Optional.
|
||||
max_steps: Train until `global_step_tensor` evaluates to this value.
|
||||
steps: Trains for this many steps (e.g. current global step + `steps`).
|
||||
fail_on_nan_loss: If true, raise `NanLossDuringTrainingError` if `loss_op`
|
||||
evaluates to `NaN`. If false, continue training as if nothing happened.
|
||||
monitors: List of `BaseMonitor` subclass instances. Used for callbacks
|
||||
@ -192,28 +193,36 @@ def train(graph,
|
||||
if not output_dir:
|
||||
raise ValueError('Output directory should be non-empty.')
|
||||
|
||||
global_step_tensor = contrib_variables.assert_or_get_global_step(
|
||||
graph, global_step_tensor)
|
||||
if global_step_tensor is None:
|
||||
raise ValueError('No "global_step" was provided or found in the graph.')
|
||||
with graph.as_default():
|
||||
global_step_tensor = contrib_variables.assert_or_get_global_step(
|
||||
graph, global_step_tensor)
|
||||
if global_step_tensor is None:
|
||||
raise ValueError('No "global_step" was provided or found in the graph.')
|
||||
|
||||
summary_writer = (get_summary_writer(output_dir)
|
||||
if supervisor_is_chief else None)
|
||||
# Get current step.
|
||||
try:
|
||||
start_step = checkpoints.load_variable(
|
||||
output_dir, global_step_tensor.name)
|
||||
except (errors.NotFoundError, ValueError):
|
||||
start_step = 0
|
||||
|
||||
# TODO(ipolosukhin): Replace all functionality of Supervisor with Monitors.
|
||||
if not supervisor_is_chief:
|
||||
# monitors should run only on the chief.
|
||||
monitors = []
|
||||
elif not monitors:
|
||||
monitors = monitors_lib.get_default_monitors(
|
||||
loss_op=loss_op,
|
||||
summary_op=logging_ops.get_summary_op(),
|
||||
save_summary_steps=supervisor_save_summaries_steps,
|
||||
summary_writer=summary_writer)
|
||||
summary_writer = (get_summary_writer(output_dir)
|
||||
if supervisor_is_chief else None)
|
||||
|
||||
# Start monitors, can create graph parts.
|
||||
for monitor in monitors:
|
||||
monitor.begin(max_steps=max_steps)
|
||||
# TODO(ipolosukhin): Replace all functionality of Supervisor with Monitors.
|
||||
if not supervisor_is_chief:
|
||||
# monitors should run only on the chief.
|
||||
monitors = []
|
||||
elif not monitors:
|
||||
monitors = monitors_lib.get_default_monitors(
|
||||
loss_op=loss_op,
|
||||
summary_op=logging_ops.get_summary_op(),
|
||||
save_summary_steps=supervisor_save_summaries_steps,
|
||||
summary_writer=summary_writer)
|
||||
|
||||
# Start monitors, can create graph parts.
|
||||
for monitor in monitors:
|
||||
monitor.begin(max_steps=start_step + steps)
|
||||
|
||||
supervisor = tf_supervisor.Supervisor(
|
||||
graph,
|
||||
@ -235,6 +244,7 @@ def train(graph,
|
||||
get_current_step = lambda: session.run(global_step_tensor)
|
||||
|
||||
start_step = get_current_step()
|
||||
max_steps = start_step + steps
|
||||
last_step = start_step
|
||||
last_log_step = start_step
|
||||
loss_value = None
|
||||
@ -375,6 +385,28 @@ def _start_queue_runners(session, coord):
|
||||
return threads
|
||||
|
||||
|
||||
def _eval_results_to_str(eval_results):
|
||||
return ', '.join('%s = %s' % (k, v) for k, v in eval_results.items())
|
||||
|
||||
|
||||
def _write_summary_results(output_dir, eval_results, current_global_step):
|
||||
"""Writes eval results into summary file in given dir."""
|
||||
logging.info('Saving evaluation summary for %d step: %s' % (
|
||||
current_global_step, _eval_results_to_str(eval_results)))
|
||||
summary_writer = get_summary_writer(output_dir)
|
||||
summary = summary_pb2.Summary()
|
||||
for key in eval_results:
|
||||
if eval_results[key] is None:
|
||||
continue
|
||||
value = summary.value.add()
|
||||
value.tag = key
|
||||
if (isinstance(eval_results[key], np.float32) or
|
||||
isinstance(eval_results[key], float)):
|
||||
value.simple_value = float(eval_results[key])
|
||||
summary_writer.add_summary(summary, current_global_step)
|
||||
summary_writer.close()
|
||||
|
||||
|
||||
# TODO(ptucker): Add unit test.
|
||||
def evaluate(graph,
|
||||
output_dir,
|
||||
@ -424,32 +456,26 @@ def evaluate(graph,
|
||||
eval steps were run.
|
||||
global_step: The global step this evaluation corresponds to.
|
||||
"""
|
||||
global_step_tensor = contrib_variables.assert_or_get_global_step(
|
||||
graph, global_step_tensor)
|
||||
with graph.as_default():
|
||||
global_step_tensor = contrib_variables.assert_or_get_global_step(
|
||||
graph, global_step_tensor)
|
||||
|
||||
for key, value in eval_dict.items():
|
||||
if not summaries.is_summary_tag_unique(key):
|
||||
continue
|
||||
if isinstance(value, ops.Tensor):
|
||||
summaries.summarize_tensor(value, tag=key)
|
||||
# Create or get summary op, global_step and saver.
|
||||
saver = _get_saver()
|
||||
local_init_op = _get_local_init_op()
|
||||
ready_op = _get_ready_op()
|
||||
|
||||
# Create or get summary op, global_step and saver.
|
||||
summary_op = logging_ops.get_summary_op()
|
||||
saver = _get_saver()
|
||||
local_init_op = _get_local_init_op()
|
||||
ready_op = _get_ready_op()
|
||||
session_manager = session_manager_lib.SessionManager(
|
||||
local_init_op=local_init_op,
|
||||
ready_op=ready_op)
|
||||
session, initialized = session_manager.recover_session(
|
||||
master=supervisor_master,
|
||||
saver=saver,
|
||||
checkpoint_dir=checkpoint_path)
|
||||
|
||||
session_manager = session_manager_lib.SessionManager(
|
||||
local_init_op=local_init_op,
|
||||
ready_op=ready_op)
|
||||
session, initialized = session_manager.recover_session(
|
||||
master=supervisor_master,
|
||||
saver=saver,
|
||||
checkpoint_dir=checkpoint_path)
|
||||
|
||||
# Start queue runners.
|
||||
coord = coordinator.Coordinator()
|
||||
threads = _start_queue_runners(session, coord)
|
||||
# Start queue runners.
|
||||
coord = coordinator.Coordinator()
|
||||
threads = _start_queue_runners(session, coord)
|
||||
|
||||
with session:
|
||||
if not initialized:
|
||||
@ -488,8 +514,7 @@ def evaluate(graph,
|
||||
duration = time.time() - start_time
|
||||
logging.info('Results after %d steps (%.3f sec/batch): %s.',
|
||||
step, float(duration),
|
||||
', '.join('%s = %s' % (k, v)
|
||||
for k, v in eval_results.items()))
|
||||
_eval_results_to_str(eval_results))
|
||||
finally:
|
||||
if eval_results is None or step != eval_step:
|
||||
eval_results = session.run(eval_dict, feed_dict=feed_dict)
|
||||
@ -498,20 +523,6 @@ def evaluate(graph,
|
||||
coord.request_stop()
|
||||
coord.join(threads, stop_grace_period_secs=120)
|
||||
|
||||
# Make our own summary writer and write a summary to the eval dir.
|
||||
# Only is feed_fn is not provided.
|
||||
# TODO(ipolosukhin): Convert evaluation to use streaming_metrics,
|
||||
# then we can save for non feed_fn as well.
|
||||
if summary_op is not None and feed_fn is None:
|
||||
summary_writer = None
|
||||
try:
|
||||
summary_writer = get_summary_writer(output_dir)
|
||||
summary_str = session.run(summary_op)
|
||||
if summary_str:
|
||||
summary_writer.add_summary(summary_str, current_global_step)
|
||||
finally:
|
||||
if summary_writer:
|
||||
summary_writer.close()
|
||||
# catch OutOfRangeError which is thrown when queue is out of data (and for
|
||||
# other reasons as well).
|
||||
except errors.OutOfRangeError as e:
|
||||
@ -526,6 +537,9 @@ def evaluate(graph,
|
||||
else:
|
||||
logging.warn('Input iterator is exhausted: %s.', e)
|
||||
|
||||
# Save summaries for this evaluation.
|
||||
_write_summary_results(output_dir, eval_results, current_global_step)
|
||||
|
||||
return eval_results, current_global_step
|
||||
|
||||
|
||||
|
75
tensorflow/contrib/learn/python/learn/learn_runner.py
Normal file
75
tensorflow/contrib/learn/python/learn/learn_runner.py
Normal file
@ -0,0 +1,75 @@
|
||||
# pylint: disable=g-bad-file-header
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Runs an Experiment."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.contrib.learn.python.learn.experiment import Experiment
|
||||
from tensorflow.python.platform import flags
|
||||
from tensorflow.python.platform import tf_logging as logging
|
||||
|
||||
|
||||
FLAGS = flags.FLAGS
|
||||
|
||||
|
||||
flags.DEFINE_string('schedule', '', 'Schedule to run for this experiment. '
|
||||
'A schedule identifies a method on the Experiment '
|
||||
'instance returned by the function passed to the '
|
||||
'run() call')
|
||||
flags.DEFINE_string('output_dir', '', 'Base output directory. Made '
|
||||
'available to the experiment builder function passed '
|
||||
'to run(). All files written by the Experiment are '
|
||||
'expected to be written into this directory.')
|
||||
|
||||
|
||||
def run(experiment_fn):
|
||||
"""Make and run an experiment."""
|
||||
|
||||
if not FLAGS.output_dir:
|
||||
raise RuntimeError('Must specify an output directory (use --output_dir).')
|
||||
if not FLAGS.schedule:
|
||||
raise RuntimeError('Must specify a schedule (use --schedule).')
|
||||
|
||||
if not callable(experiment_fn):
|
||||
raise TypeError('Experiment builder "%s" is not callable.' %
|
||||
experiment_fn)
|
||||
|
||||
# Call the builder
|
||||
experiment = experiment_fn(output_dir=FLAGS.output_dir)
|
||||
if not isinstance(experiment, Experiment):
|
||||
raise TypeError('Experiment builder did not return an Experiment '
|
||||
'instance, got %s instead.' % type(experiment))
|
||||
|
||||
# Execute the schedule
|
||||
taskname = FLAGS.schedule
|
||||
if not hasattr(experiment, taskname):
|
||||
logging.error('Schedule references non-existent task %s', taskname)
|
||||
valid_tasks = [x for x in experiment.__dict__
|
||||
if callable(getattr(experiment, x))]
|
||||
logging.error('Allowed values for this experiment are: %s', valid_tasks)
|
||||
raise ValueError('Schedule references non-existent task %s', taskname)
|
||||
|
||||
task = getattr(experiment, taskname)
|
||||
if not callable(task):
|
||||
logging.error('Schedule references non-callable member %s', taskname)
|
||||
valid_tasks = [x for x in experiment.__dict__
|
||||
if callable(getattr(experiment, x))]
|
||||
logging.error('Allowed values for this experiment are: %s', valid_tasks)
|
||||
raise TypeError('Schedule references non-callable member %s', taskname)
|
||||
|
||||
return task()
|
@ -19,7 +19,12 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import six
|
||||
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.platform import tf_logging as logging
|
||||
from tensorflow.python.training import saver
|
||||
from tensorflow.python.training import summary_io
|
||||
|
||||
|
||||
@ -135,17 +140,27 @@ class PrintTensor(EveryN):
|
||||
"""
|
||||
|
||||
def __init__(self, tensor_names, every_n=100, first_n=1):
|
||||
"""Initializes PrintTensor monitor.
|
||||
|
||||
Args:
|
||||
tensor_names: `dict` of tag to tensor names or
|
||||
`iterable` of tensor names (strings).
|
||||
every_n: Print every N steps.
|
||||
first_n: Print first N steps.
|
||||
"""
|
||||
super(PrintTensor, self).__init__(every_n, first_n)
|
||||
if not isinstance(tensor_names, dict):
|
||||
tensor_names = {item: item for item in tensor_names}
|
||||
self._tensor_names = tensor_names
|
||||
|
||||
def every_n_step_begin(self, unused_step):
|
||||
return self._tensor_names
|
||||
return list(self._tensor_names.values())
|
||||
|
||||
def every_n_step_end(self, step, outputs):
|
||||
stats = []
|
||||
for name in self._tensor_names:
|
||||
if name in outputs:
|
||||
stats.append("%s = %s" % (name, str(outputs[name])))
|
||||
for tag, tensor_name in six.iteritems(self._tensor_names):
|
||||
if tensor_name in outputs:
|
||||
stats.append("%s = %s" % (tag, str(outputs[tensor_name])))
|
||||
logging.info("Step %d: %s" % (step, ", ".join(stats)))
|
||||
|
||||
|
||||
@ -179,14 +194,45 @@ class SummarySaver(EveryN):
|
||||
|
||||
|
||||
class ValidationMonitor(EveryN):
|
||||
"""Runs evaluation every n steps.
|
||||
|
||||
Can do early stopping on validation loss if `early_stopping_rounds` provided.
|
||||
"""Runs evaluation of the Estimator every n steps.
|
||||
|
||||
Can do early stopping on validation metrics if
|
||||
`early_stopping_rounds` provided.
|
||||
"""
|
||||
|
||||
def __init__(self, x=None, y=None, input_fn=None,
|
||||
every_n_steps=100, early_stopping_rounds=None):
|
||||
def __init__(self, x=None, y=None, input_fn=None, batch_size=None,
|
||||
every_n_steps=100, metrics=None, early_stopping_rounds=None,
|
||||
early_stopping_metric="loss",
|
||||
early_stopping_metric_minimize=True, name=None):
|
||||
"""Initializes ValidationMonitor.
|
||||
|
||||
Args:
|
||||
x: matrix or tensor of shape [n_samples, n_features...]. Can be
|
||||
iterator that returns arrays of features. The training input
|
||||
samples for fitting the model. If set, `input_fn` must be `None`.
|
||||
y: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
|
||||
iterator that returns array of targets. The training target values
|
||||
(class labels in classification, real numbers in regression). If set,
|
||||
`input_fn` must be `None`.
|
||||
input_fn: Input function. If set, `x`, `y`, and `batch_size` must be
|
||||
`None`.
|
||||
batch_size: minibatch size to use on the input, defaults to first
|
||||
dimension of `x`. Must be `None` if `input_fn` is provided.
|
||||
every_n_steps: Runs this monitor every N steps.
|
||||
metrics: Dict of metric ops to run. If None, the default metric functions
|
||||
are used; if {}, no metrics are used.
|
||||
early_stopping_rounds: If validation metric didn't go down for this many
|
||||
steps, then stop training.
|
||||
early_stopping_metric: `str`, name of the metric to early stop.
|
||||
early_stopping_metric_minimize: `bool`, True if minimize, False
|
||||
if maximize. For example, minimize `loss` or `mean_squared_error` and
|
||||
maximize `accuracy` or `f1`.
|
||||
name: `str`, appended to output sub-folder. If None uses `eval`
|
||||
sub-folder, else, `eval-%name%` is used to save sum.
|
||||
|
||||
Raises:
|
||||
ValueError: If both x and input_fn are provided.
|
||||
"""
|
||||
super(ValidationMonitor, self).__init__(every_n_steps=every_n_steps,
|
||||
first_n_steps=-1)
|
||||
if x is None and input_fn is None:
|
||||
@ -194,25 +240,64 @@ class ValidationMonitor(EveryN):
|
||||
self.x = x
|
||||
self.y = y
|
||||
self.input_fn = input_fn
|
||||
self.min_loss_step = 0
|
||||
self.min_loss = None
|
||||
self.batch_size = batch_size
|
||||
self.metrics = metrics
|
||||
self.early_stopping_rounds = early_stopping_rounds
|
||||
self.early_stopping_metric = early_stopping_metric
|
||||
self.early_stopping_metric_minimize = early_stopping_metric_minimize
|
||||
self.name = name
|
||||
self._best_value_step = None
|
||||
self._best_value = None
|
||||
self._early_stopped = False
|
||||
self._latest_path = None
|
||||
self._latest_path_step = None
|
||||
|
||||
@property
|
||||
def early_stopped(self):
|
||||
return self._early_stopped
|
||||
|
||||
@property
|
||||
def best_step(self):
|
||||
return self._best_value_step
|
||||
|
||||
@property
|
||||
def best_value(self):
|
||||
return self._best_value
|
||||
|
||||
def every_n_step_end(self, step, unused_outputs):
|
||||
# Check that we are not running evaluation on the same checkpoint.
|
||||
latest_path = saver.latest_checkpoint(self._estimator.model_dir)
|
||||
if latest_path == self._latest_path:
|
||||
logging.info("Skipping evaluation due to same checkpoint %s for step %d "
|
||||
"as for step %d.", latest_path, step, self._latest_path_step)
|
||||
return False
|
||||
self._latest_path = latest_path
|
||||
self._latest_path_step = step
|
||||
|
||||
# Run evaluation and log it.
|
||||
outputs = self._estimator.evaluate(
|
||||
x=self.x, y=self.y, input_fn=self.input_fn)
|
||||
x=self.x, y=self.y, input_fn=self.input_fn, batch_size=self.batch_size,
|
||||
metrics=self.metrics, name=self.name)
|
||||
stats = []
|
||||
for name in outputs:
|
||||
stats.append("%s = %s" % (name, str(outputs[name])))
|
||||
logging.info("Validation (step %d): %s" % (step, ", ".join(stats)))
|
||||
|
||||
# Early stopping logic.
|
||||
if self.early_stopping_rounds is not None:
|
||||
if self.min_loss is None or outputs["loss"] < self.min_loss:
|
||||
self.min_loss = outputs["loss"]
|
||||
self.min_loss_step = step
|
||||
stop_now = (step - self.min_loss_step >= self.early_stopping_rounds)
|
||||
if (self._best_value is None or
|
||||
(self.early_stopping_metric_minimize and
|
||||
outputs[self.early_stopping_metric] < self._best_value) or
|
||||
(not self.early_stopping_metric_minimize and
|
||||
outputs[self.early_stopping_metric] > self._best_value)):
|
||||
self._best_value = outputs[self.early_stopping_metric]
|
||||
self._best_value_step = step
|
||||
stop_now = (step - self._best_value_step >= self.early_stopping_rounds)
|
||||
if stop_now:
|
||||
logging.info("Stopping. Best step: {} with loss {}."
|
||||
.format(self.min_loss_step, self.min_loss))
|
||||
logging.info("Stopping. Best step: {} with {} = {}."
|
||||
.format(self._best_value_step,
|
||||
self.early_stopping_metric, self._best_value))
|
||||
self._early_stopped = True
|
||||
return True
|
||||
return False
|
||||
|
||||
@ -220,7 +305,7 @@ class ValidationMonitor(EveryN):
|
||||
class CaptureVariable(EveryN):
|
||||
"""Capture a variable value into a `list`.
|
||||
|
||||
It's useful for unit testing.
|
||||
This monitor is useful for unit testing.
|
||||
"""
|
||||
|
||||
def __init__(self, var_name, every_n=100, first_n=1):
|
||||
@ -239,9 +324,85 @@ def get_default_monitors(loss_op=None, summary_op=None, save_summary_steps=100,
|
||||
output_dir=None, summary_writer=None):
|
||||
monitors = []
|
||||
if loss_op is not None:
|
||||
monitors.append(PrintTensor([loss_op.name]))
|
||||
monitors.append(PrintTensor(tensor_names={"loss": loss_op.name}))
|
||||
if summary_op is not None:
|
||||
monitors.append(SummarySaver(summary_op, save_steps=save_summary_steps,
|
||||
output_dir=output_dir,
|
||||
summary_writer=summary_writer))
|
||||
return monitors
|
||||
|
||||
|
||||
class GraphDump(BaseMonitor):
|
||||
"""Dumps almost all tensors in the graph at every step.
|
||||
|
||||
Note, this is very expensive, prefer `PrintTensor` or `CaptureVariable` if
|
||||
you are not debugging.
|
||||
"""
|
||||
|
||||
IGNORE_OPS = ["Const", "Assign", "Identity", "Placeholder",
|
||||
"RandomUniform", "Cast", "RestoreSlice"]
|
||||
|
||||
def __init__(self, ignore_ops=None):
|
||||
"""Initializes GraphDump monitor.
|
||||
|
||||
Args:
|
||||
ignore_ops: `list` of string names of `Operation`s to ignore.
|
||||
If `None` GraphDump.IGNORE_OPS list is used.
|
||||
"""
|
||||
self.ignore_ops = ignore_ops or GraphDump.IGNORE_OPS
|
||||
self._data = []
|
||||
|
||||
def begin(self, max_steps):
|
||||
self.tensors = []
|
||||
graph = ops.get_default_graph()
|
||||
graph_def = graph.as_graph_def()
|
||||
for node in graph_def.node:
|
||||
if node.op in self.ignore_ops:
|
||||
continue
|
||||
try:
|
||||
self.tensors.append(graph.get_tensor_by_name(node.name + ":0"))
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def step_begin(self, step):
|
||||
return self.tensors
|
||||
|
||||
def step_end(self, step, outputs):
|
||||
self._data.append(outputs)
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
return self._data
|
||||
|
||||
def compare(self, other_dump, step, atol=1e-06):
|
||||
"""Compares two `GraphDump` monitors and returns differences.
|
||||
|
||||
Args:
|
||||
other_dump: Another `GraphDump` monitor.
|
||||
step: `int`, step to compare on.
|
||||
atol: `float`, absolute tolerance in comparison of floating arrays.
|
||||
|
||||
Returns:
|
||||
Returns tuple:
|
||||
matched: `list` of keys that matched.
|
||||
non_matched: `dict` of keys to difference.
|
||||
"""
|
||||
non_matched = {}
|
||||
matched = []
|
||||
for key in self.data[step]:
|
||||
if not isinstance(key, str) and not isinstance(key, unicode):
|
||||
continue
|
||||
value1, value2 = self.data[step][key], other_dump.data[step][key]
|
||||
if isinstance(value1, str):
|
||||
continue
|
||||
if isinstance(value1, np.ndarray):
|
||||
if not np.allclose(value1, value2, atol=atol):
|
||||
non_matched[key] = value1 - value2
|
||||
else:
|
||||
matched.append(key)
|
||||
else:
|
||||
if value1 != value2:
|
||||
non_matched[key] = (value1, value2)
|
||||
else:
|
||||
matched.append(key)
|
||||
return matched, non_matched
|
||||
|
@ -28,6 +28,12 @@ from tensorflow.contrib.learn.python.learn.estimators._sklearn import accuracy_s
|
||||
from tensorflow.contrib.learn.python.learn.estimators._sklearn import train_test_split
|
||||
|
||||
|
||||
def _get_summary_events(folder):
|
||||
if not tf.gfile.Exists(folder):
|
||||
raise ValueError('Folder %s doesn\'t exist.' % folder)
|
||||
return tf.contrib.testing.latest_summaries(folder)
|
||||
|
||||
|
||||
class EarlyStoppingTest(tf.test.TestCase):
|
||||
"""Early stopping tests."""
|
||||
|
||||
@ -35,36 +41,54 @@ class EarlyStoppingTest(tf.test.TestCase):
|
||||
random.seed(42)
|
||||
|
||||
iris = datasets.load_iris()
|
||||
x_train, x_test, y_train, y_test = train_test_split(iris.data,
|
||||
iris.target,
|
||||
test_size=0.2,
|
||||
random_state=42)
|
||||
x_train, x_test, y_train, y_test = train_test_split(
|
||||
iris.data, iris.target, test_size=0.2, random_state=42)
|
||||
|
||||
x_train, x_val, y_train, y_val = train_test_split(x_train,
|
||||
y_train,
|
||||
test_size=0.2)
|
||||
val_monitor = learn.monitors.ValidationMonitor(x_val,
|
||||
y_val,
|
||||
early_stopping_rounds=100)
|
||||
x_train, x_val, y_train, y_val = train_test_split(
|
||||
x_train, y_train, test_size=0.2, random_state=42)
|
||||
val_monitor = learn.monitors.ValidationMonitor(
|
||||
x_val, y_val, every_n_steps=50, early_stopping_rounds=100,
|
||||
early_stopping_metric='accuracy', early_stopping_metric_minimize=False)
|
||||
|
||||
# classifier without early stopping - overfitting
|
||||
classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
|
||||
n_classes=3,
|
||||
steps=1000)
|
||||
classifier1 = learn.TensorFlowDNNClassifier(
|
||||
hidden_units=[10, 20, 10], n_classes=3, steps=1000)
|
||||
classifier1.fit(x_train, y_train)
|
||||
accuracy_score(y_test, classifier1.predict(x_test))
|
||||
_ = accuracy_score(y_test, classifier1.predict(x_test))
|
||||
|
||||
# Full 1000 steps, 11 summaries and no evaluation summary.
|
||||
# 11 summaries = first + every 100 out of 1000 steps.
|
||||
self.assertEqual(11, len(_get_summary_events(classifier1.model_dir)))
|
||||
with self.assertRaises(ValueError):
|
||||
_get_summary_events(classifier1.model_dir + '/eval')
|
||||
|
||||
# classifier with early stopping - improved accuracy on testing set
|
||||
classifier2 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
|
||||
n_classes=3,
|
||||
steps=1000)
|
||||
classifier2 = learn.TensorFlowDNNClassifier(
|
||||
hidden_units=[10, 20, 10], n_classes=3, steps=2000,
|
||||
config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
|
||||
|
||||
classifier2.fit(x_train, y_train, monitors=[val_monitor])
|
||||
accuracy_score(y_test, classifier2.predict(x_test))
|
||||
_ = accuracy_score(y_val, classifier2.predict(x_val))
|
||||
_ = accuracy_score(y_test, classifier2.predict(x_test))
|
||||
|
||||
# Note, this test is unstable, so not checking for equality.
|
||||
# See stability_test for examples of stability issues.
|
||||
if val_monitor.early_stopped:
|
||||
self.assertLess(val_monitor.best_step, 2000)
|
||||
# Note, due to validation monitor stopping after the best score occur,
|
||||
# the accuracy at current checkpoint is less.
|
||||
# TODO(ipolosukhin): Time machine for restoring old checkpoints?
|
||||
# flaky, still not always best_value better then score2 value.
|
||||
# self.assertGreater(val_monitor.best_value, score2_val)
|
||||
|
||||
# Early stopped, unstable so checking only < then max.
|
||||
self.assertLess(len(_get_summary_events(classifier2.model_dir)), 21)
|
||||
self.assertLess(len(_get_summary_events(
|
||||
classifier2.model_dir + '/eval')), 4)
|
||||
|
||||
# TODO(ipolosukhin): Restore this?
|
||||
# self.assertGreater(score2, score1, "No improvement using early stopping.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
|
119
tensorflow/contrib/learn/python/learn/tests/experiment_test.py
Normal file
119
tensorflow/contrib/learn/python/learn/tests/experiment_test.py
Normal file
@ -0,0 +1,119 @@
|
||||
# pylint: disable=g-bad-file-header
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for TaskRunner and Experiment class."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import time
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class TestEstimator(object):
|
||||
|
||||
def __init__(self):
|
||||
self.eval_count = 0
|
||||
self.fit_count = 0
|
||||
|
||||
def evaluate(self, **kwargs):
|
||||
tf.logging.info('evaluate called with args: %s' % kwargs)
|
||||
self.eval_count += 1
|
||||
if self.eval_count > 5:
|
||||
tf.logging.info('Ran 6 evals. Done.')
|
||||
raise StopIteration()
|
||||
return [(key, kwargs[key]) for key in sorted(kwargs.keys())]
|
||||
|
||||
def fit(self, **kwargs):
|
||||
tf.logging.info('fit called with args: %s' % kwargs)
|
||||
self.fit_count += 1
|
||||
return [(key, kwargs[key]) for key in sorted(kwargs.keys())]
|
||||
|
||||
|
||||
class ExperimentTest(tf.test.TestCase):
|
||||
|
||||
def test_train(self):
|
||||
est = TestEstimator()
|
||||
ex = tf.contrib.learn.Experiment(est,
|
||||
train_input_fn='train_input',
|
||||
eval_input_fn='eval_input',
|
||||
eval_metrics='eval_metrics')
|
||||
ex.train(delay_secs=0)
|
||||
self.assertEquals(1, est.fit_count)
|
||||
self.assertEquals(0, est.eval_count)
|
||||
|
||||
def test_train_delay(self):
|
||||
est = TestEstimator()
|
||||
ex = tf.contrib.learn.Experiment(est,
|
||||
train_input_fn='train_input',
|
||||
eval_input_fn='eval_input')
|
||||
for delay in [0, 1, 3]:
|
||||
start = time.time()
|
||||
ex.train(delay_secs=delay)
|
||||
duration = time.time() - start
|
||||
tf.logging.info('train duration (expected %f): %f', delay, duration)
|
||||
self.assertTrue(duration > delay - 0.5 and duration < delay + 0.5)
|
||||
|
||||
def test_evaluate(self):
|
||||
est = TestEstimator()
|
||||
ex = tf.contrib.learn.Experiment(est,
|
||||
train_input_fn='train_input',
|
||||
eval_input_fn='eval_input',
|
||||
eval_metrics='eval_metrics')
|
||||
ex.evaluate(steps='steps', delay_secs=0)
|
||||
self.assertEquals(1, est.eval_count)
|
||||
self.assertEquals(0, est.fit_count)
|
||||
|
||||
def test_evaluate_delay(self):
|
||||
est = TestEstimator()
|
||||
ex = tf.contrib.learn.Experiment(est,
|
||||
train_input_fn='train_input',
|
||||
eval_input_fn='eval_input')
|
||||
for delay in [0, 1, 3]:
|
||||
start = time.time()
|
||||
ex.evaluate(delay_secs=delay)
|
||||
duration = time.time() - start
|
||||
tf.logging.info('eval duration (expected %f): %f', delay, duration)
|
||||
self.assertTrue(duration > delay - 0.5 and duration < delay + 0.5)
|
||||
|
||||
def test_continuous_eval(self):
|
||||
est = TestEstimator()
|
||||
ex = tf.contrib.learn.Experiment(est,
|
||||
train_input_fn='train_input',
|
||||
eval_input_fn='eval_input',
|
||||
eval_metrics='eval_metrics')
|
||||
self.assertRaises(StopIteration, ex.continuous_eval,
|
||||
delay_secs=0, throttle_delay_secs=0)
|
||||
self.assertEquals(6, est.eval_count)
|
||||
self.assertEquals(0, est.fit_count)
|
||||
|
||||
def test_continuous_eval_throttle_delay(self):
|
||||
for delay in [0, 1, 2]:
|
||||
est = TestEstimator()
|
||||
ex = tf.contrib.learn.Experiment(est,
|
||||
train_input_fn='train_input',
|
||||
eval_input_fn='eval_input',
|
||||
eval_metrics='eval_metrics')
|
||||
start = time.time()
|
||||
self.assertRaises(StopIteration, ex.continuous_eval,
|
||||
delay_secs=0, throttle_delay_secs=delay)
|
||||
duration = time.time() - start
|
||||
expected = 5 * delay
|
||||
tf.logging.info('eval duration (expected %f): %f', expected, duration)
|
||||
self.assertTrue(duration > expected - 0.5 and duration < expected + 0.5)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
107
tensorflow/contrib/learn/python/learn/tests/learn_runner_test.py
Normal file
107
tensorflow/contrib/learn/python/learn/tests/learn_runner_test.py
Normal file
@ -0,0 +1,107 @@
|
||||
# pylint: disable=g-bad-file-header
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""learn_main tests."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib.learn.python.learn import learn_runner
|
||||
|
||||
|
||||
FLAGS = learn_runner.FLAGS
|
||||
|
||||
|
||||
class TestExperiment(tf.contrib.learn.Experiment):
|
||||
|
||||
def __init__(self, default=None):
|
||||
self.default = default
|
||||
|
||||
def simple_task(self):
|
||||
return "simple_task, default=%s." % self.default
|
||||
|
||||
|
||||
# pylint: disable=unused-argument
|
||||
def build_experiment(output_dir):
|
||||
tf.logging.info("In default build_experiment.")
|
||||
return TestExperiment()
|
||||
|
||||
|
||||
def build_non_experiment(output_dir):
|
||||
return "Ceci n'est pas un Experiment."
|
||||
# pylint: enable=unused-argument
|
||||
|
||||
|
||||
class MainTest(tf.test.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
# Make sure the flags exist. It's unclear why this is necessary.
|
||||
if not hasattr(FLAGS, "output_dir"):
|
||||
learn_runner.flags.DEFINE_string("output_dir", "/tmp", "Fake")
|
||||
if not hasattr(FLAGS, "schedule"):
|
||||
learn_runner.flags.DEFINE_string("schedule", "simple_task", "Fake")
|
||||
|
||||
def test_run(self):
|
||||
FLAGS.output_dir = "/tmp"
|
||||
FLAGS.schedule = "simple_task"
|
||||
self.assertEqual("simple_task, default=None.",
|
||||
learn_runner.run(build_experiment))
|
||||
|
||||
def test_fail_no_output_dir(self):
|
||||
FLAGS.output_dir = ""
|
||||
FLAGS.schedule = "simple_test"
|
||||
self.assertRaisesRegexp(RuntimeError,
|
||||
"Must specify an output directory",
|
||||
learn_runner.run, build_experiment)
|
||||
|
||||
def test_fail_no_schedule(self):
|
||||
FLAGS.output_dir = "/tmp"
|
||||
FLAGS.schedule = ""
|
||||
self.assertRaisesRegexp(RuntimeError, "Must specify a schedule",
|
||||
learn_runner.run, build_experiment)
|
||||
|
||||
def test_fail_non_callable(self):
|
||||
FLAGS.output_dir = "/tmp"
|
||||
FLAGS.schedule = "simple_test"
|
||||
self.assertRaisesRegexp(TypeError,
|
||||
"Experiment builder .* is not callable",
|
||||
learn_runner.run, "not callable")
|
||||
|
||||
def test_fail_not_experiment(self):
|
||||
FLAGS.output_dir = "/tmp"
|
||||
FLAGS.schedule = "simple_test"
|
||||
self.assertRaisesRegexp(
|
||||
TypeError, "Experiment builder did not return an Experiment",
|
||||
learn_runner.run, build_non_experiment)
|
||||
|
||||
def test_fail_non_existent_task(self):
|
||||
FLAGS.output_dir = "/tmp"
|
||||
FLAGS.schedule = "mirage"
|
||||
self.assertRaisesRegexp(
|
||||
ValueError, "Schedule references non-existent task",
|
||||
learn_runner.run, build_experiment)
|
||||
|
||||
def test_fail_non_callable_task(self):
|
||||
FLAGS.output_dir = "/tmp"
|
||||
FLAGS.schedule = "default"
|
||||
self.assertRaisesRegexp(
|
||||
TypeError, "Schedule references non-callable member",
|
||||
learn_runner.run, build_experiment)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.test.main()
|
@ -0,0 +1,88 @@
|
||||
# pylint: disable=g-bad-file-header
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Non-linear estimator tests."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
# import random
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class StabilityTest(tf.test.TestCase):
|
||||
"""Tests that estiamtors are reproducible."""
|
||||
|
||||
def testRandomStability(self):
|
||||
my_seed, minval, maxval = 42, -0.3333, 0.3333
|
||||
with tf.Graph().as_default() as g:
|
||||
with self.test_session(graph=g) as session:
|
||||
tf.set_random_seed(my_seed)
|
||||
x = tf.random_uniform([10, 10], minval=minval, maxval=maxval)
|
||||
val1 = session.run(x)
|
||||
with tf.Graph().as_default() as g:
|
||||
with self.test_session(graph=g) as session:
|
||||
tf.set_random_seed(my_seed)
|
||||
x = tf.random_uniform([10, 10], minval=minval, maxval=maxval)
|
||||
val2 = session.run(x)
|
||||
self.assertAllClose(val1, val2)
|
||||
|
||||
def testLinearRegression(self):
|
||||
# TODO(ipolosukhin): This doesn't pass at all, but should...
|
||||
pass
|
||||
# random.seed(42)
|
||||
# boston = tf.contrib.learn.datasets.load_boston()
|
||||
# regressor = tf.contrib.learn.LinearRegressor()
|
||||
# regressor.fit(x=boston.data, y=boston.target, steps=1)
|
||||
# regressor2 = tf.contrib.learn.LinearRegressor()
|
||||
# regressor2.fit(x=boston.data, y=boston.target, steps=1)
|
||||
# self.assertAllClose(regressor.weights_, regressor2.weights_)
|
||||
# self.assertAllClose(regressor.bias_, regressor2.bias_)
|
||||
# self.assertAllClose(regressor.predict(boston.data),
|
||||
# regressor2.predict(boston.data), atol=1e-05)
|
||||
|
||||
def testDNNRegression(self):
|
||||
# TODO(ipolosukhin): This doesn't pass at all, but should...
|
||||
# Either bugs or just general instability.
|
||||
pass
|
||||
# random.seed(42)
|
||||
# boston = tf.contrib.learn.datasets.load_boston()
|
||||
# regressor = tf.contrib.learn.DNNRegressor(
|
||||
# hidden_units=[10],
|
||||
# optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.001))
|
||||
# graph_dump = tf.contrib.learn.monitors.GraphDump()
|
||||
# regressor.fit(x=boston.data, y=boston.target, steps=1,
|
||||
# monitors=[graph_dump], batch_size=1)
|
||||
# regressor2 = tf.contrib.learn.DNNRegressor(
|
||||
# hidden_units=[10],
|
||||
# optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.001))
|
||||
# graph_dump2 = tf.contrib.learn.monitors.GraphDump()
|
||||
# regressor2.fit(x=boston.data, y=boston.target, steps=1,
|
||||
# monitors=[graph_dump2], batch_size=1)
|
||||
# _, non_match = graph_dump.compare(graph_dump2, 0, atol=1e-02)
|
||||
# self.assertEmpty(non_match.keys())
|
||||
# for weight1, weight2 in zip(regressor.weights_, regressor2.weights_):
|
||||
# self.assertAllClose(weight1, weight2)
|
||||
# for bias1, bias2 in zip(regressor.bias_, regressor2.bias_):
|
||||
# self.assertAllClose(bias1, bias2)
|
||||
# self.assertAllClose(regressor.predict(boston.data),
|
||||
# regressor2.predict(boston.data), atol=1e-05)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
229
tensorflow/core/graph/quantize_training.cc
Normal file
229
tensorflow/core/graph/quantize_training.cc
Normal file
@ -0,0 +1,229 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/graph/quantize_training.h"
|
||||
|
||||
#include "tensorflow/core/common_runtime/executor.h"
|
||||
#include "tensorflow/core/common_runtime/function.h"
|
||||
#include "tensorflow/core/common_runtime/memory_types.h"
|
||||
#include "tensorflow/core/framework/log_memory.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/graph/algorithm.h"
|
||||
#include "tensorflow/core/graph/node_builder.h"
|
||||
#include "tensorflow/core/graph/subgraph.h"
|
||||
#include "tensorflow/core/lib/strings/strcat.h"
|
||||
#include "tensorflow/core/public/session_options.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
// Node types to rewrite. Insert quantize_and_dequantize op for their inputs.
|
||||
const std::unordered_set<string, StringPiece::Hasher> nodes_to_rewrite{
|
||||
"MatMul", "Conv2D"};
|
||||
|
||||
// Contains necessary parameters to convert an edge.
|
||||
struct EdgeToConvert {
|
||||
// Edge is not owned here.
|
||||
const Edge* edge;
|
||||
int32 num_bits;
|
||||
bool signed_input;
|
||||
bool range_given;
|
||||
float input_min;
|
||||
float input_max;
|
||||
|
||||
EdgeToConvert(const Edge* e, int32 bits, bool sign, bool range, float min,
|
||||
float max) {
|
||||
edge = e;
|
||||
num_bits = bits;
|
||||
signed_input = sign;
|
||||
range_given = range;
|
||||
input_min = min;
|
||||
input_max = max;
|
||||
}
|
||||
};
|
||||
|
||||
// Decide if a node is in backward pass by checking if its name is led by
|
||||
// "gradients".
|
||||
// TODO(jmchen): Make this check more robust as it is not guaranteed that the
|
||||
// forward node will not be named with a leading "gradients".
|
||||
inline bool IsGradientNode(const Graph* graph, const Node* node) {
|
||||
static const string tag = "gradients";
|
||||
return (node->name().compare(0, tag.size(), tag) == 0);
|
||||
}
|
||||
|
||||
// Find the type of the input to set the parameters for the
|
||||
// quantize_and_dequantize op.
|
||||
// Returns true if the root tensor op type is known, false otherwise.
|
||||
bool FindType(const Graph* graph, const Node* node, bool* signed_input,
|
||||
bool* range_given, float* input_min, float* input_max) {
|
||||
const string src_op = node->type_string();
|
||||
if (src_op == "Const" || src_op == "Variable") {
|
||||
*signed_input = true;
|
||||
*range_given = false;
|
||||
} else if (src_op == "Relu") {
|
||||
// Range is not given for Relu.
|
||||
*signed_input = false;
|
||||
*range_given = false;
|
||||
} else if (src_op == "Relu6") {
|
||||
*signed_input = false;
|
||||
*range_given = true;
|
||||
*input_min = 0;
|
||||
*input_max = 6;
|
||||
} else if (src_op == "Sigmoid") {
|
||||
*signed_input = false;
|
||||
*range_given = true;
|
||||
*input_min = 0;
|
||||
*input_max = 1;
|
||||
} else if (src_op == "Tanh") {
|
||||
*signed_input = true;
|
||||
*range_given = true;
|
||||
*input_min = -1;
|
||||
*input_max = 1;
|
||||
} else if (src_op == "Reshape") {
|
||||
// Reshape has 2 inputs and the first one is the tensor.
|
||||
for (const Edge* edge : node->in_edges()) {
|
||||
if (edge->src_output() != Graph::kControlSlot && edge->dst_input() == 0) {
|
||||
FindType(graph, edge->src(), signed_input, range_given, input_min,
|
||||
input_max);
|
||||
}
|
||||
}
|
||||
} else if (src_op == "Identity" || src_op == "MaxPool" ||
|
||||
src_op == "AvgPool" || src_op == "MaxPool3D" ||
|
||||
src_op == "AvgPool3D") {
|
||||
// All these Ops only have 1 data input.
|
||||
for (const Edge* edge : node->in_edges()) {
|
||||
if (edge->src_output() != Graph::kControlSlot) {
|
||||
FindType(graph, edge->src(), signed_input, range_given, input_min,
|
||||
input_max);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Unknown type, could be the model input examples.
|
||||
// TODO: Set the params for input with user's hint.
|
||||
*signed_input = true;
|
||||
*range_given = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Insert conversion op, connect it to the graph and remove the old edge.
|
||||
Status ProcessTargetEdges(Graph* graph,
|
||||
const std::vector<EdgeToConvert>& target_edges) {
|
||||
// Remember previous convert ops to avoid duplicated conversion on the same
|
||||
// input.
|
||||
std::unordered_map<string, Node*, StringPiece::Hasher> name_index;
|
||||
for (const EdgeToConvert edge : target_edges) {
|
||||
Node* convert_node;
|
||||
string name =
|
||||
strings::StrCat(edge.edge->src()->name(), "/_QuantizeAndDequantize");
|
||||
|
||||
auto iter = name_index.find(name);
|
||||
if (iter == name_index.end()) {
|
||||
TF_RETURN_IF_ERROR(NodeBuilder(name, "_QuantizeAndDequantize")
|
||||
.Input(edge.edge->src())
|
||||
.Attr("signed_input", edge.signed_input)
|
||||
.Attr("num_bits", edge.num_bits)
|
||||
.Attr("range_given", edge.range_given)
|
||||
.Attr("input_min", edge.input_min)
|
||||
.Attr("input_max", edge.input_max)
|
||||
.Finalize(graph, &convert_node));
|
||||
|
||||
name_index[name] = convert_node;
|
||||
} else {
|
||||
convert_node = iter->second;
|
||||
}
|
||||
|
||||
graph->AddEdge(convert_node, 0, edge.edge->dst(), edge.edge->dst_input());
|
||||
graph->RemoveEdge(edge.edge);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Status DoQuantizeTraining(int32 num_bits, Graph* graph) {
|
||||
if (graph == nullptr) {
|
||||
return errors::InvalidArgument("Cannot accept empty graph pointer.");
|
||||
}
|
||||
|
||||
if (num_bits < 1 || num_bits > 63) {
|
||||
return errors::OutOfRange("num_bits should be in range [1, 63] but is: ",
|
||||
num_bits);
|
||||
}
|
||||
int potential_input = 0;
|
||||
std::vector<EdgeToConvert> target_edges;
|
||||
for (Node* node : graph->nodes()) {
|
||||
if (nodes_to_rewrite.find(node->type_string()) != nodes_to_rewrite.end() &&
|
||||
!IsGradientNode(graph, node)) {
|
||||
// Find out which types are the inputs and convert them accordingly.
|
||||
// 1. Const/Variable OP: This is quantized as signed tensors with no given
|
||||
// range.
|
||||
// 2. Activation OP: Set the range accordingly for different types of
|
||||
// activations. Currently we handle {Relu, Relu6, Sigmoid, Tanh}
|
||||
// 3. Identity OP: The quantization parameters depend on its input.
|
||||
// 4. Pooling OPs: various pooling ops. Also depends on its input.
|
||||
// 5. Reshape OP: Also depends on the first input to this op.
|
||||
// 6. Not-Listed-Above OP: If there is only 1 such op, consider it as the
|
||||
// model input. However, if there are >1 unknown ops, then returns an
|
||||
// error for now to avoid unexpected bahavior.
|
||||
// Note: The list above might not be a complete list. Please let us
|
||||
// know if you see the error so we can handle your case.
|
||||
for (const Edge* edge : node->in_edges()) {
|
||||
if (edge->src_output() == Graph::kControlSlot) {
|
||||
// Skip the control dependency input.
|
||||
continue;
|
||||
} else {
|
||||
bool signed_input = false;
|
||||
bool range_given = false;
|
||||
float input_min = 0;
|
||||
float input_max = 0;
|
||||
bool known_op = FindType(graph, edge->src(), &signed_input,
|
||||
&range_given, &input_min, &input_max);
|
||||
if (!known_op) {
|
||||
// Unknown op is considered as input.
|
||||
// Only support one input for now.
|
||||
// TODO: Make this configurable if this is the desirable way to find
|
||||
// input.
|
||||
if (potential_input > 0) {
|
||||
return errors::Unimplemented(
|
||||
"Find a second unknown op: ", edge->src()->name(),
|
||||
" with type: ", edge->src()->type_string(),
|
||||
"; Unknown ops are considered as model input for now and "
|
||||
"only 1 input is supported currently.");
|
||||
}
|
||||
potential_input++;
|
||||
}
|
||||
|
||||
target_edges.emplace_back(EdgeToConvert(
|
||||
edge, num_bits, signed_input, range_given, input_min, input_max));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TF_RETURN_IF_ERROR(ProcessTargetEdges(graph, target_edges));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
37
tensorflow/core/graph/quantize_training.h
Normal file
37
tensorflow/core/graph/quantize_training.h
Normal file
@ -0,0 +1,37 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_GRAPH_QUANTIZE_TRAINING_H_
|
||||
#define TENSORFLOW_GRAPH_QUANTIZE_TRAINING_H_
|
||||
|
||||
#include "tensorflow/core/graph/graph.h"
|
||||
|
||||
namespace tensorflow {
|
||||
// Rewrites graph for quantized training.
|
||||
// Rewrites the forward pass to include the precision loss with quantization so
|
||||
// the model can learn to deal with such loss and achieve better accuracy when
|
||||
// it is quantized later for inference.
|
||||
// Note that the num_bits should be in [1, 63] and 'g' must be not null.
|
||||
//
|
||||
// On success, returns OK.
|
||||
//
|
||||
// On failure, returns the error status. Possible errors include:
|
||||
// - num_bits out of range.
|
||||
// - g is null.
|
||||
// - More than 1 unknown ops encountered.
|
||||
Status DoQuantizeTraining(int32 num_bits, Graph* g);
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_GRAPH_QUANTIZE_TRAINING_H_
|
161
tensorflow/core/graph/quantize_training_test.cc
Normal file
161
tensorflow/core/graph/quantize_training_test.cc
Normal file
@ -0,0 +1,161 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/graph/quantize_training.h"
|
||||
|
||||
#include "tensorflow/core/common_runtime/device_factory.h"
|
||||
#include "tensorflow/core/common_runtime/device_mgr.h"
|
||||
#include "tensorflow/core/framework/node_def_util.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/framework/tensor_testutil.h"
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
#include "tensorflow/core/graph/node_builder.h"
|
||||
#include "tensorflow/core/graph/testlib.h"
|
||||
#include "tensorflow/core/lib/core/status_test_util.h"
|
||||
#include "tensorflow/core/lib/core/threadpool.h"
|
||||
#include "tensorflow/core/lib/strings/strcat.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/public/session_options.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
|
||||
class QuantizeTrainingTest : public ::testing::Test {
|
||||
protected:
|
||||
QuantizeTrainingTest() { Reset(); }
|
||||
void Reset() { g_.reset(new Graph(OpRegistry::Global())); }
|
||||
|
||||
template <typename T>
|
||||
Node* Constant(gtl::ArraySlice<T> values, TensorShape shape) {
|
||||
return test::graph::Constant(g_.get(), test::AsTensor(values, shape));
|
||||
}
|
||||
|
||||
std::unique_ptr<Graph> g_;
|
||||
};
|
||||
|
||||
TEST_F(QuantizeTrainingTest, NormalGraph) {
|
||||
// Construct the following graph
|
||||
/*
|
||||
m1 m2
|
||||
/ \ / \
|
||||
Relu Identity c
|
||||
| |
|
||||
a b
|
||||
*/
|
||||
Reset();
|
||||
Graph* g = g_.get();
|
||||
Node* a = Constant<float>({1.0, 2.0, 3.0, 4.0}, {2, 2});
|
||||
Node* b = Constant<float>({1.0, 2.0, 3.0, 4.0}, {2, 2});
|
||||
Node* c = Constant<float>({0.0, 1.0, 1.0, 0.0}, {2, 2});
|
||||
g->AddControlEdge(g->source_node(), a);
|
||||
g->AddControlEdge(g->source_node(), b);
|
||||
g->AddControlEdge(g->source_node(), c);
|
||||
Node* relu = test::graph::Relu(g, a);
|
||||
Node* identity = test::graph::Identity(g, b);
|
||||
Node* m1 = test::graph::Matmul(g, relu, identity, false, false);
|
||||
Node* m2 = test::graph::Matmul(g, identity, c, false, false);
|
||||
g->AddControlEdge(m1, g->sink_node());
|
||||
g->AddControlEdge(m2, g->sink_node());
|
||||
|
||||
// The graph after the rewriting should be:
|
||||
// "Q" is the quantize_and_dequantize op.
|
||||
// Note the Q in the middle is shared by both m1 and m2.
|
||||
/*
|
||||
m1 m2
|
||||
/ \ / \
|
||||
Q Q Q
|
||||
| | |
|
||||
Relu Identity c
|
||||
| |
|
||||
a b
|
||||
*/
|
||||
int num_bits = 8;
|
||||
// 4 edges to modify
|
||||
TF_ASSERT_OK(DoQuantizeTraining(num_bits, g));
|
||||
|
||||
// There should be 12 nodes in total including the source and sink nodes.
|
||||
EXPECT_EQ(12, g->num_nodes());
|
||||
// Nodes m1 and m2's inputs should be the quantize_and_dequantize op.
|
||||
std::vector<Node*> target_nodes{m1, m2};
|
||||
for (Node* n : target_nodes) {
|
||||
for (Node* in : n->in_nodes()) {
|
||||
EXPECT_EQ("_QuantizeAndDequantize", in->type_string());
|
||||
}
|
||||
}
|
||||
|
||||
// relu, identity, c should now connect to the quantize_and_dequantize nodes.
|
||||
std::vector<Node*> target_inputs{relu, identity, c};
|
||||
for (Node* n : target_inputs) {
|
||||
for (Node* out : n->out_nodes()) {
|
||||
EXPECT_EQ("_QuantizeAndDequantize", out->type_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Quantize_and_dequantize node for identity should have signed_input==true.
|
||||
NodeDef identity_Q = identity->out_nodes().begin()->def();
|
||||
ASSERT_EQ("true",
|
||||
SummarizeAttrValue(identity_Q.attr().find("signed_input")->second));
|
||||
// Quantize_and_dequantize node for relu should have signed_input==false.
|
||||
NodeDef relu_Q = relu->out_nodes().begin()->def();
|
||||
ASSERT_EQ("false",
|
||||
SummarizeAttrValue(relu_Q.attr().find("signed_input")->second));
|
||||
}
|
||||
|
||||
TEST_F(QuantizeTrainingTest, WithBackwardNodes) {
|
||||
// Construct the same graph plus another backward Matmul.
|
||||
Reset();
|
||||
Graph* g = g_.get();
|
||||
Node* a = Constant<float>({1.0, 2.0, 3.0, 4.0}, {2, 2});
|
||||
Node* b = Constant<float>({1.0, 2.0, 3.0, 4.0}, {2, 2});
|
||||
Node* c = Constant<float>({0.0, 1.0, 1.0, 0.0}, {2, 2});
|
||||
g->AddControlEdge(g->source_node(), a);
|
||||
g->AddControlEdge(g->source_node(), b);
|
||||
g->AddControlEdge(g->source_node(), c);
|
||||
Node* relu = test::graph::Relu(g, a);
|
||||
Node* identity = test::graph::Identity(g, b);
|
||||
Node* m1 = test::graph::Matmul(g, relu, identity, false, false);
|
||||
Node* m2 = test::graph::Matmul(g, identity, c, false, false);
|
||||
g->AddControlEdge(m1, g->sink_node());
|
||||
g->AddControlEdge(m2, g->sink_node());
|
||||
|
||||
// Add a Matmul node with name starting with "gradients".
|
||||
Node* backward_m;
|
||||
TF_ASSERT_OK(NodeBuilder(g->NewName("gradients/n"), "MatMul")
|
||||
.Input(m1)
|
||||
.Input(m2)
|
||||
.Attr("transpose_a", true)
|
||||
.Attr("transpose_b", false)
|
||||
.Finalize(g, &backward_m));
|
||||
g->AddControlEdge(backward_m, g->sink_node());
|
||||
|
||||
int num_bits = 8;
|
||||
// Still 4 changes since the inputs of backward node will not be converted.
|
||||
TF_ASSERT_OK(DoQuantizeTraining(num_bits, g));
|
||||
|
||||
// Nodes m1 and m2's inputs should now be the quantize_and_dequantize op.
|
||||
EXPECT_EQ(13, g->num_nodes());
|
||||
EXPECT_EQ(2, m2->num_inputs());
|
||||
}
|
||||
|
||||
#undef SIMPLE_GRAPH
|
||||
|
||||
} // namespace
|
||||
} // namespace tensorflow
|
@ -384,6 +384,15 @@ Node* GetSessionTensor(Graph* g, Node* in) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
Node* Relu(Graph* g, Node* in) {
|
||||
Node* ret;
|
||||
TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Relu")
|
||||
.Input(in, 0)
|
||||
.Attr("T", DT_FLOAT)
|
||||
.Finalize(g, &ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ToGraphDef(Graph* g, GraphDef* gdef) { g->ToGraphDef(gdef); }
|
||||
|
||||
} // end namespace graph
|
||||
|
@ -169,6 +169,9 @@ Node* GetSessionTensor(Graph* g, Node* in);
|
||||
// given in "tensors".
|
||||
Node* Concat(Graph* g, Node* concat_dim, gtl::ArraySlice<Node*> tensors);
|
||||
|
||||
// Add a Relu node in "g".
|
||||
Node* Relu(Graph* g, Node* in);
|
||||
|
||||
} // end namespace graph
|
||||
} // end namespace test
|
||||
} // end namespace tensorflow
|
||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/framework/tensor_slice.h"
|
||||
@ -99,10 +100,12 @@ class AvgPoolingOp : public UnaryOp<T> {
|
||||
TensorFormat data_format_;
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("AvgPool")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<float>("T"),
|
||||
AvgPoolingOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("AvgPool").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
AvgPoolingOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("AvgPool").Device(DEVICE_CPU).TypeConstraint<Eigen::half>("T"),
|
||||
AvgPoolingOp<CPUDevice, Eigen::half>);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
template <typename T>
|
||||
@ -181,14 +184,17 @@ namespace functor {
|
||||
const Eigen::PaddingType& padding); \
|
||||
extern template struct SpatialAvgPooling<GPUDevice, T>;
|
||||
|
||||
DECLARE_GPU_SPEC(Eigen::half);
|
||||
DECLARE_GPU_SPEC(float);
|
||||
#undef DECLARE_GPU_SPEC
|
||||
} // namespace functor
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("AvgPool")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<float>("T"),
|
||||
AvgPoolingOp<GPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("AvgPool").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
|
||||
AvgPoolingOp<GPUDevice, Eigen::half>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("AvgPool").Device(DEVICE_GPU).TypeConstraint<float>("T"),
|
||||
AvgPoolingOp<GPUDevice, float>);
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
// The operation to compute AvgPool gradients.
|
||||
@ -300,7 +306,7 @@ class AvgPoolingGradOp : public OpKernel {
|
||||
GetBroadcastSize(c, in_cols, window_cols, col_stride,
|
||||
pad_cols, &cindex, &csize));
|
||||
|
||||
T divide_coeff = 1.0 / (rsize * csize);
|
||||
T divide_coeff(1.0 / (rsize * csize));
|
||||
int64 output_index =
|
||||
(b * out_backprop_rows + r) * out_backprop_cols + c;
|
||||
for (int64 r_dst = rindex; r_dst < rindex + rsize; ++r_dst) {
|
||||
@ -337,16 +343,16 @@ class AvgPoolingGradOp : public OpKernel {
|
||||
TensorFormat data_format_;
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<float>("T")
|
||||
.HostMemory("orig_input_shape"),
|
||||
AvgPoolingGradOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<double>("T")
|
||||
.HostMemory("orig_input_shape"),
|
||||
AvgPoolingGradOp<CPUDevice, double>);
|
||||
#define REGISTER_CPU_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<T>("T") \
|
||||
.HostMemory("orig_input_shape"), \
|
||||
AvgPoolingGradOp<CPUDevice, T>);
|
||||
|
||||
TF_CALL_float(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_double(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_half(REGISTER_CPU_KERNEL);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
@ -416,6 +422,12 @@ REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad")
|
||||
.HostMemory("orig_input_shape")
|
||||
.Label("cudnn"),
|
||||
AvgPoolingGradOp<GPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<Eigen::half>("T")
|
||||
.HostMemory("orig_input_shape")
|
||||
.Label("cudnn"),
|
||||
AvgPoolingGradOp<GPUDevice, Eigen::half>);
|
||||
|
||||
// A custom GPU kernel based AvgPoolingGrad implementation. It includes the
|
||||
// padding as the candidates for the pooling operation.
|
||||
@ -532,6 +544,11 @@ REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad")
|
||||
.TypeConstraint<float>("T")
|
||||
.HostMemory("orig_input_shape"),
|
||||
AvgPoolingGradOpCustomGPUKernel<float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<Eigen::half>("T")
|
||||
.HostMemory("orig_input_shape"),
|
||||
AvgPoolingGradOpCustomGPUKernel<Eigen::half>);
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
|
@ -33,6 +33,7 @@ typedef Eigen::GpuDevice GPUDevice;
|
||||
#define DEFINE_GPU_KERNELS(T) \
|
||||
template struct functor::SpatialAvgPooling<GPUDevice, T>;
|
||||
|
||||
DEFINE_GPU_KERNELS(Eigen::half)
|
||||
DEFINE_GPU_KERNELS(float)
|
||||
|
||||
#undef DEFINE_GPU_KERNELS
|
||||
@ -57,7 +58,7 @@ __global__ void AvePoolBackwardNHWC(const int nthreads,
|
||||
const int phend = min(h / stride_h + 1, pooled_height);
|
||||
const int pwstart = (w < kernel_w) ? 0 : (w - kernel_w) / stride_w + 1;
|
||||
const int pwend = min(w / stride_w + 1, pooled_width);
|
||||
dtype gradient = 0;
|
||||
dtype gradient(0);
|
||||
const dtype* const top_diff_slice =
|
||||
top_diff + n * pooled_height * pooled_width * channels + c;
|
||||
for (int ph = phstart; ph < phend; ++ph) {
|
||||
@ -104,6 +105,12 @@ template bool RunAvePoolBackwardNHWC(
|
||||
const int pooled_width, const int kernel_h, const int kernel_w,
|
||||
const int stride_h, const int stride_w, const int pad_t, const int pad_l,
|
||||
float* const bottom_diff, const GPUDevice& d);
|
||||
template bool RunAvePoolBackwardNHWC(
|
||||
const Eigen::half* const top_diff, const int num, const int height,
|
||||
const int width, const int channels, const int pooled_height,
|
||||
const int pooled_width, const int kernel_h, const int kernel_w,
|
||||
const int stride_h, const int stride_w, const int pad_t, const int pad_l,
|
||||
Eigen::half* const bottom_diff, const GPUDevice& d);
|
||||
|
||||
} // end namespace tensorflow
|
||||
|
||||
|
@ -159,9 +159,9 @@ class BatchNormGradOp : public OpKernel {
|
||||
.TypeConstraint<T>("T"), \
|
||||
BatchNormOp<CPUDevice, T>);
|
||||
|
||||
REGISTER_KERNEL(Eigen::half);
|
||||
REGISTER_KERNEL(float);
|
||||
REGISTER_KERNEL(double);
|
||||
TF_CALL_half(REGISTER_KERNEL);
|
||||
TF_CALL_float(REGISTER_KERNEL);
|
||||
TF_CALL_double(REGISTER_KERNEL);
|
||||
#undef REGISTER_KERNEL
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
@ -179,8 +179,8 @@ namespace functor {
|
||||
|
||||
#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
|
||||
|
||||
DECLARE_GPU_SPECS(Eigen::half);
|
||||
DECLARE_GPU_SPECS(float);
|
||||
TF_CALL_half(DECLARE_GPU_SPECS);
|
||||
TF_CALL_float(DECLARE_GPU_SPECS);
|
||||
#undef DECLARE_GPU_SPEC
|
||||
} // namespace functor
|
||||
|
||||
@ -191,8 +191,8 @@ DECLARE_GPU_SPECS(float);
|
||||
.TypeConstraint<T>("T"), \
|
||||
BatchNormOp<GPUDevice, T>);
|
||||
|
||||
REGISTER_GPU_KERNEL(Eigen::half);
|
||||
REGISTER_GPU_KERNEL(float);
|
||||
TF_CALL_half(REGISTER_GPU_KERNEL);
|
||||
TF_CALL_float(REGISTER_GPU_KERNEL);
|
||||
#undef REGISTER_GPU_KERNEL
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
@ -203,9 +203,9 @@ REGISTER_GPU_KERNEL(float);
|
||||
.TypeConstraint<T>("T"), \
|
||||
BatchNormGradOp<CPUDevice, T>);
|
||||
|
||||
REGISTER_KERNEL(Eigen::half);
|
||||
REGISTER_KERNEL(float);
|
||||
REGISTER_KERNEL(double);
|
||||
TF_CALL_half(REGISTER_KERNEL);
|
||||
TF_CALL_float(REGISTER_KERNEL);
|
||||
TF_CALL_double(REGISTER_KERNEL);
|
||||
#undef REGISTER_KERNEL
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
@ -226,8 +226,8 @@ namespace functor {
|
||||
|
||||
#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
|
||||
|
||||
DECLARE_GPU_SPECS(Eigen::half);
|
||||
DECLARE_GPU_SPECS(float);
|
||||
TF_CALL_half(DECLARE_GPU_SPECS);
|
||||
TF_CALL_float(DECLARE_GPU_SPECS);
|
||||
#undef DECLARE_GPU_SPEC
|
||||
} // namespace functor
|
||||
|
||||
@ -238,8 +238,8 @@ DECLARE_GPU_SPECS(float);
|
||||
.TypeConstraint<T>("T"), \
|
||||
BatchNormGradOp<GPUDevice, T>);
|
||||
|
||||
REGISTER_GPU_KERNEL(Eigen::half);
|
||||
REGISTER_GPU_KERNEL(float);
|
||||
TF_CALL_half(REGISTER_GPU_KERNEL);
|
||||
TF_CALL_float(REGISTER_GPU_KERNEL);
|
||||
#undef REGISTER_GPU_KERNEL
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include <numeric>
|
||||
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
|
||||
@ -182,18 +183,14 @@ class CheckNumericsOp<GPUDevice, T> : public OpKernel {
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("CheckNumerics")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<Eigen::half>("T"),
|
||||
CheckNumericsOp<CPUDevice, Eigen::half>);
|
||||
REGISTER_KERNEL_BUILDER(Name("CheckNumerics")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<float>("T"),
|
||||
CheckNumericsOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("CheckNumerics")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<double>("T"),
|
||||
CheckNumericsOp<CPUDevice, double>);
|
||||
#define REGISTER_CPU_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("CheckNumerics").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
CheckNumericsOp<CPUDevice, T>);
|
||||
TF_CALL_half(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_float(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_double(REGISTER_CPU_KERNEL);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER_KERNEL_BUILDER(Name("CheckNumerics")
|
||||
.Device(DEVICE_GPU)
|
||||
|
@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include <vector>
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/framework/tensor_slice.h"
|
||||
@ -622,35 +623,24 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(Conv2DCustomBackpropInputOp);
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Conv2DBackpropInput").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
Conv2DCustomBackpropInputOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropInput")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<Eigen::half>("T"),
|
||||
Conv2DCustomBackpropInputOp<CPUDevice, Eigen::half>);
|
||||
#define REGISTER_CPU_KERNELS(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Conv2DBackpropInput").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
Conv2DCustomBackpropInputOp<CPUDevice, T>); \
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropInput") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.Label("custom") \
|
||||
.TypeConstraint<T>("T"), \
|
||||
Conv2DCustomBackpropInputOp<CPUDevice, T>); \
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropInput") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.Label("eigen_tensor") \
|
||||
.TypeConstraint<T>("T"), \
|
||||
Conv2DFastBackpropInputOp<CPUDevice, T>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropInput")
|
||||
.Device(DEVICE_CPU)
|
||||
.Label("custom")
|
||||
.TypeConstraint<float>("T"),
|
||||
Conv2DCustomBackpropInputOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropInput")
|
||||
.Device(DEVICE_CPU)
|
||||
.Label("custom")
|
||||
.TypeConstraint<Eigen::half>("T"),
|
||||
Conv2DCustomBackpropInputOp<CPUDevice, Eigen::half>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropInput")
|
||||
.Device(DEVICE_CPU)
|
||||
.Label("eigen_tensor")
|
||||
.TypeConstraint<float>("T"),
|
||||
Conv2DFastBackpropInputOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropInput")
|
||||
.Device(DEVICE_CPU)
|
||||
.Label("eigen_tensor")
|
||||
.TypeConstraint<Eigen::half>("T"),
|
||||
Conv2DFastBackpropInputOp<CPUDevice, Eigen::half>);
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
|
||||
template <typename Device, class T>
|
||||
class Conv2DFastBackpropFilterOp : public OpKernel {
|
||||
@ -867,35 +857,24 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(Conv2DCustomBackpropFilterOp);
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Conv2DBackpropFilter").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
Conv2DCustomBackpropFilterOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropFilter")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<Eigen::half>("T"),
|
||||
Conv2DCustomBackpropFilterOp<CPUDevice, Eigen::half>);
|
||||
#define REGISTER_CPU_KERNELS(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Conv2DBackpropFilter").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
Conv2DCustomBackpropFilterOp<CPUDevice, T>); \
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropFilter") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.Label("custom") \
|
||||
.TypeConstraint<T>("T"), \
|
||||
Conv2DCustomBackpropFilterOp<CPUDevice, T>); \
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropFilter") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.Label("eigen_tensor") \
|
||||
.TypeConstraint<T>("T"), \
|
||||
Conv2DFastBackpropFilterOp<CPUDevice, T>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropFilter")
|
||||
.Device(DEVICE_CPU)
|
||||
.Label("custom")
|
||||
.TypeConstraint<float>("T"),
|
||||
Conv2DCustomBackpropFilterOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropFilter")
|
||||
.Device(DEVICE_CPU)
|
||||
.Label("custom")
|
||||
.TypeConstraint<Eigen::half>("T"),
|
||||
Conv2DCustomBackpropFilterOp<CPUDevice, Eigen::half>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropFilter")
|
||||
.Device(DEVICE_CPU)
|
||||
.Label("eigen_tensor")
|
||||
.TypeConstraint<float>("T"),
|
||||
Conv2DFastBackpropFilterOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropFilter")
|
||||
.Device(DEVICE_CPU)
|
||||
.Label("eigen_tensor")
|
||||
.TypeConstraint<Eigen::half>("T"),
|
||||
Conv2DFastBackpropFilterOp<CPUDevice, Eigen::half>);
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
|
||||
// GPU definitions of both ops.
|
||||
#if GOOGLE_CUDA
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/framework/tensor_slice.h"
|
||||
@ -194,14 +195,13 @@ class Conv3DBackpropInputOp : public OpKernel {
|
||||
Padding padding_;
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Conv3DBackpropInput").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
Conv3DBackpropInputOp<CPUDevice, float>);
|
||||
#ifndef IS_MOBILE_PLATFORM
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Conv3DBackpropInput").Device(DEVICE_CPU).TypeConstraint<double>("T"),
|
||||
Conv3DBackpropInputOp<CPUDevice, double>);
|
||||
#endif
|
||||
#define REGISTER_CPU_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Conv3DBackpropInput").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
Conv3DBackpropInputOp<CPUDevice, T>);
|
||||
TF_CALL_float(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_double(REGISTER_CPU_KERNEL);
|
||||
#undef REGISTER_CPU_KERNEL
|
||||
|
||||
// Backprop for filter.
|
||||
template <typename Device, class T>
|
||||
@ -303,14 +303,13 @@ class Conv3DBackpropFilterOp : public OpKernel {
|
||||
Padding padding_;
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Conv3DBackpropFilter").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
Conv3DBackpropFilterOp<CPUDevice, float>);
|
||||
#ifndef IS_MOBILE_PLATFORM
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Conv3DBackpropFilter").Device(DEVICE_CPU).TypeConstraint<double>("T"),
|
||||
Conv3DBackpropFilterOp<CPUDevice, double>);
|
||||
#endif
|
||||
#define REGISTER_CPU_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Conv3DBackpropFilter").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
Conv3DBackpropFilterOp<CPUDevice, T>);
|
||||
TF_CALL_float(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_double(REGISTER_CPU_KERNEL);
|
||||
#undef REGISTER_CPU_KERNEL
|
||||
|
||||
// GPU definitions of both ops.
|
||||
#if GOOGLE_CUDA
|
||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include <vector>
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/framework/tensor_slice.h"
|
||||
@ -245,12 +246,13 @@ class Conv2DOp : public BinaryOp<T> {
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(Conv2DOp);
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Conv2D").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
Conv2DOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Conv2D").Device(DEVICE_CPU).TypeConstraint<Eigen::half>("T"),
|
||||
Conv2DOp<CPUDevice, Eigen::half>);
|
||||
#define REGISTER_CPU(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Conv2D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
Conv2DOp<CPUDevice, T>);
|
||||
|
||||
TF_CALL_half(REGISTER_CPU);
|
||||
TF_CALL_float(REGISTER_CPU);
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
int64 GetCudnnWorkspaceLimit(const string& envvar_in_mb,
|
||||
|
@ -21,6 +21,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
#include "tensorflow/core/framework/tensor_slice.h"
|
||||
@ -120,15 +121,13 @@ class Conv3DOp : public BinaryOp<T> {
|
||||
Padding padding_;
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Conv3D").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
Conv3DOp<CPUDevice, float>);
|
||||
|
||||
#ifndef IS_MOBILE_PLATFORM
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Conv3D").Device(DEVICE_CPU).TypeConstraint<double>("T"),
|
||||
Conv3DOp<CPUDevice, double>);
|
||||
#endif
|
||||
#define REGISTER_CPU_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Conv3D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
Conv3DOp<CPUDevice, T>);
|
||||
TF_CALL_float(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_double(REGISTER_CPU_KERNEL);
|
||||
#undef REGISTER_CPU_KERNEL
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
|
@ -566,16 +566,14 @@ class DepthwiseConv2dNativeBackpropInputOp : public OpKernel {
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(DepthwiseConv2dNativeBackpropInputOp);
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNativeBackpropInput")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<float>("T"),
|
||||
DepthwiseConv2dNativeBackpropInputOp<CPUDevice, float>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("DepthwiseConv2dNativeBackpropInput")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<double>("T"),
|
||||
DepthwiseConv2dNativeBackpropInputOp<CPUDevice, double>);
|
||||
#define REGISTER_CPU_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNativeBackpropInput") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<T>("T"), \
|
||||
DepthwiseConv2dNativeBackpropInputOp<CPUDevice, T>);
|
||||
TF_CALL_float(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_double(REGISTER_CPU_KERNEL);
|
||||
#undef REGISTER_CPU_KERNEL
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNativeBackpropInput")
|
||||
@ -951,17 +949,15 @@ class DepthwiseConv2dNativeBackpropFilterOp : public OpKernel {
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(DepthwiseConv2dNativeBackpropFilterOp);
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("DepthwiseConv2dNativeBackpropFilter")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<float>("T"),
|
||||
DepthwiseConv2dNativeBackpropFilterOp<CPUDevice, float>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("DepthwiseConv2dNativeBackpropFilter")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<double>("T"),
|
||||
DepthwiseConv2dNativeBackpropFilterOp<CPUDevice, double>);
|
||||
#define REGISTER_CPU_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("DepthwiseConv2dNativeBackpropFilter") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<T>("T"), \
|
||||
DepthwiseConv2dNativeBackpropFilterOp<CPUDevice, T>);
|
||||
TF_CALL_float(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_double(REGISTER_CPU_KERNEL);
|
||||
#undef REGISTER_CPU_KERNEL
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
|
@ -376,14 +376,13 @@ class DepthwiseConv2dNativeOp : public BinaryOp<T> {
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(DepthwiseConv2dNativeOp);
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("DepthwiseConv2dNative").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
DepthwiseConv2dNativeOp<CPUDevice, float>);
|
||||
#define REGISTER_CPU_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("DepthwiseConv2dNative").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
DepthwiseConv2dNativeOp<CPUDevice, T>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<double>("T"),
|
||||
DepthwiseConv2dNativeOp<CPUDevice, double>);
|
||||
TF_CALL_float(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_double(REGISTER_CPU_KERNEL);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
|
@ -143,13 +143,11 @@ class DrawBoundingBoxesOp : public OpKernel {
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("DrawBoundingBoxes").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
DrawBoundingBoxesOp<float>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("DrawBoundingBoxes")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<Eigen::half>("T"),
|
||||
DrawBoundingBoxesOp<Eigen::half>);
|
||||
#define REGISTER_CPU_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("DrawBoundingBoxes").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
DrawBoundingBoxesOp<T>);
|
||||
TF_CALL_half(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_float(REGISTER_CPU_KERNEL);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -309,7 +309,7 @@ struct AvgPoolMeanReducer {
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) {
|
||||
typedef typename packet_traits<T>::type Packet;
|
||||
packetCount_ = pset1<Packet>(0.0);
|
||||
packetCount_ = pset1<Packet>(T(0.0));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
|
||||
|
@ -21,6 +21,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/kernels/fill_functor.h"
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
@ -202,17 +203,19 @@ struct MatMulFunctor<CPUDevice, T> {
|
||||
.Label("cublas"), \
|
||||
MatMulOp<GPUDevice, T, true /* cublas */>)
|
||||
|
||||
REGISTER_CPU(float);
|
||||
REGISTER_CPU(double);
|
||||
REGISTER_CPU(int32);
|
||||
REGISTER_CPU(Eigen::half);
|
||||
REGISTER_CPU(complex64);
|
||||
REGISTER_CPU(complex128);
|
||||
TF_CALL_float(REGISTER_CPU);
|
||||
TF_CALL_double(REGISTER_CPU);
|
||||
TF_CALL_half(REGISTER_CPU);
|
||||
|
||||
TF_CALL_int32(REGISTER_CPU);
|
||||
TF_CALL_complex64(REGISTER_CPU);
|
||||
TF_CALL_complex128(REGISTER_CPU);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER_GPU(float);
|
||||
REGISTER_GPU(double);
|
||||
TF_CALL_float(REGISTER_GPU);
|
||||
TF_CALL_double(REGISTER_GPU);
|
||||
#if CUDA_VERSION >= 7050
|
||||
REGISTER_GPU(Eigen::half);
|
||||
TF_CALL_half(REGISTER_GPU);
|
||||
#endif
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
|
@ -160,7 +160,7 @@ static void SpatialMaxPoolWithArgMaxHelper(
|
||||
const int in_end = limit * in_size;
|
||||
EigenMatrixMap in_shard(input_backprop_flat.data() + in_start, 1,
|
||||
in_end - in_start);
|
||||
in_shard.setConstant(0);
|
||||
in_shard.setConstant(T(0));
|
||||
|
||||
// Backpropagate.
|
||||
const int out_size = out_height * out_width * depth;
|
||||
@ -187,8 +187,12 @@ static void SpatialMaxPoolWithArgMaxHelper(
|
||||
params.tensor_in_batch, shard_cost, shard);
|
||||
}
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("MaxPool").Device(DEVICE_CPU),
|
||||
MaxPoolingOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MaxPool").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
MaxPoolingOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MaxPool").Device(DEVICE_CPU).TypeConstraint<Eigen::half>("T"),
|
||||
MaxPoolingOp<CPUDevice, Eigen::half>);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// Forward declarations for the functor specializations for GPU.
|
||||
@ -212,6 +216,7 @@ DECLARE_GPU_SPEC(float);
|
||||
// kernel_label_map.
|
||||
REGISTER_KERNEL_BUILDER(Name("MaxPool")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<float>("T")
|
||||
.Label("eigen_tensor"),
|
||||
MaxPoolingOp<Eigen::GpuDevice, float>);
|
||||
#endif // GOOGLE_CUDA
|
||||
@ -297,11 +302,16 @@ class MaxPoolingGradOp : public OpKernel {
|
||||
TensorFormat data_format_;
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("MaxPoolGrad").Device(DEVICE_CPU),
|
||||
MaxPoolingGradOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MaxPoolGrad").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
MaxPoolingGradOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MaxPoolGrad").Device(DEVICE_CPU).TypeConstraint<Eigen::half>("T"),
|
||||
MaxPoolingGradOp<CPUDevice, Eigen::half>);
|
||||
|
||||
#ifdef GOOGLE_CUDA
|
||||
|
||||
template <typename T>
|
||||
static void MaxPoolingBackwardCustomKernel(
|
||||
OpKernelContext* context, const std::vector<int32>& size,
|
||||
const std::vector<int32>& stride, Padding padding, const Tensor* tensor_in,
|
||||
@ -318,12 +328,12 @@ static void MaxPoolingBackwardCustomKernel(
|
||||
}
|
||||
|
||||
MaxPoolBackwardNoMask(
|
||||
tensor_in->flat<float>().data(), params.tensor_in_batch,
|
||||
tensor_in->flat<T>().data(), params.tensor_in_batch,
|
||||
params.tensor_in_rows, params.tensor_in_cols, params.depth,
|
||||
params.out_height, params.out_width, params.window_rows,
|
||||
params.window_cols, params.row_stride, params.col_stride, params.pad_rows,
|
||||
params.pad_cols, out_backprop.flat<float>().data(),
|
||||
output->flat<float>().data(), context->eigen_device<Eigen::GpuDevice>());
|
||||
params.pad_cols, out_backprop.flat<T>().data(),
|
||||
output->flat<T>().data(), context->eigen_device<Eigen::GpuDevice>());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -378,8 +388,8 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
|
||||
} else {
|
||||
CHECK(data_format_ == FORMAT_NHWC)
|
||||
<< "Non-Cudnn MaxPoolGrad only supports NHWC format";
|
||||
MaxPoolingBackwardCustomKernel(context, ksize_, stride_, padding_,
|
||||
&tensor_in, out_backprop, output_shape);
|
||||
MaxPoolingBackwardCustomKernel<T>(context, ksize_, stride_, padding_,
|
||||
&tensor_in, out_backprop, output_shape);
|
||||
}
|
||||
}
|
||||
|
||||
@ -391,8 +401,12 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
|
||||
bool use_dnn_;
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("MaxPoolGrad").Device(DEVICE_GPU),
|
||||
MaxPoolingGradOp<Eigen::GpuDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MaxPoolGrad").Device(DEVICE_GPU).TypeConstraint<float>("T"),
|
||||
MaxPoolingGradOp<Eigen::GpuDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MaxPoolGrad").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
|
||||
MaxPoolingGradOp<Eigen::GpuDevice, Eigen::half>);
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
@ -625,8 +639,12 @@ struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> {
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("MaxPool").Device(DEVICE_GPU),
|
||||
MaxPoolingNoMaskOp<Eigen::GpuDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MaxPool").Device(DEVICE_GPU).TypeConstraint<float>("T"),
|
||||
MaxPoolingNoMaskOp<Eigen::GpuDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MaxPool").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
|
||||
MaxPoolingNoMaskOp<Eigen::GpuDevice, Eigen::half>);
|
||||
|
||||
template <typename T>
|
||||
struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> {
|
||||
@ -649,8 +667,14 @@ struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> {
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("MaxPoolWithArgmax")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<int64>("Targmax"),
|
||||
.TypeConstraint<int64>("Targmax")
|
||||
.TypeConstraint<float>("T"),
|
||||
MaxPoolingWithArgmaxOp<Eigen::GpuDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("MaxPoolWithArgmax")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<int64>("Targmax")
|
||||
.TypeConstraint<Eigen::half>("T"),
|
||||
MaxPoolingWithArgmaxOp<Eigen::GpuDevice, Eigen::half>);
|
||||
|
||||
template <typename T>
|
||||
struct LaunchMaxPoolingGradWithArgmax<Eigen::GpuDevice, T> {
|
||||
@ -675,10 +699,18 @@ struct LaunchMaxPoolingGradWithArgmax<Eigen::GpuDevice, T> {
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("MaxPoolGradWithArgmax")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<int64>("Targmax"),
|
||||
MaxPoolingGradWithArgmaxOp<Eigen::GpuDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MaxPoolGradWithArgmax")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<float>("T")
|
||||
.TypeConstraint<int64>("Targmax"),
|
||||
MaxPoolingGradWithArgmaxOp<Eigen::GpuDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("MaxPoolGradWithArgmax")
|
||||
.Device(DEVICE_GPU)
|
||||
.TypeConstraint<Eigen::half>("T")
|
||||
.TypeConstraint<int64>("Targmax"),
|
||||
MaxPoolingGradWithArgmaxOp<Eigen::GpuDevice, Eigen::half>);
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
|
@ -110,7 +110,7 @@ __global__ void MaxPoolForwardNHWC(const int nthreads, const dtype* bottom_data,
|
||||
int wend = min(wstart + kernel_w, width);
|
||||
hstart = max(hstart, 0);
|
||||
wstart = max(wstart, 0);
|
||||
dtype maxval = -FLT_MAX;
|
||||
dtype maxval = Eigen::NumTraits<dtype>::lowest();
|
||||
int maxidx = -1;
|
||||
const dtype* bottom_data_n = bottom_data + n * height * width * channels;
|
||||
for (int h = hstart; h < hend; ++h) {
|
||||
@ -149,7 +149,7 @@ __global__ void MaxPoolBackwardNoMaskNHWC(
|
||||
int wend = min(wstart + kernel_w, width);
|
||||
hstart = max(hstart, 0);
|
||||
wstart = max(wstart, 0);
|
||||
dtype maxval = -FLT_MAX;
|
||||
dtype maxval = Eigen::NumTraits<dtype>::lowest();
|
||||
int maxidx = -1;
|
||||
const dtype* bottom_data_n = bottom_data + n * height * width * channels;
|
||||
for (int h = hstart; h < hend; ++h) {
|
||||
@ -165,8 +165,8 @@ __global__ void MaxPoolBackwardNoMaskNHWC(
|
||||
// Atomically accumulate the bottom diff. The index could still be
|
||||
// uninitialized, if all the bottom_data are NaN.
|
||||
if (maxidx != -1) {
|
||||
atomicAdd(bottom_diff + n * height * width * channels + maxidx,
|
||||
top_diff[index]);
|
||||
CudaAtomicAdd(bottom_diff + n * height * width * channels + maxidx,
|
||||
top_diff[index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -185,8 +185,8 @@ __global__ void MaxPoolBackwardNoMaskNHWC(
|
||||
// bottom_offset: the pre-computed per-image offset of the maxpool input.
|
||||
// This is equal to H*W*C.
|
||||
// bottom_diff: the gradient with respect to the input.
|
||||
// This function relies on atomicAdd to avoid race conditions. Also, before the
|
||||
// kernel is run, you will need to make sure that bottom_diff is filled with
|
||||
// This function relies on CudaAtomicAdd to avoid race conditions. Also, before
|
||||
// the kernel is run, you will need to make sure that bottom_diff is filled with
|
||||
// zero first.
|
||||
template <typename dtype>
|
||||
__global__ void MaxPoolBackward(const int nthreads, const dtype* top_diff,
|
||||
@ -194,8 +194,8 @@ __global__ void MaxPoolBackward(const int nthreads, const dtype* top_diff,
|
||||
const int bottom_offset, dtype* bottom_diff) {
|
||||
CUDA_1D_KERNEL_LOOP(index, nthreads) {
|
||||
int image_id = (index / top_offset);
|
||||
atomicAdd(bottom_diff + image_id * bottom_offset + mask[index],
|
||||
top_diff[index]);
|
||||
CudaAtomicAdd(bottom_diff + image_id * bottom_offset + mask[index],
|
||||
top_diff[index]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -219,6 +219,23 @@ bool MaxPoolForwardWithOptionalArgmax(
|
||||
return d.ok();
|
||||
}
|
||||
|
||||
bool MaxPoolForwardWithOptionalArgmax(
|
||||
const Eigen::half* bottom_data, const int batch, const int height,
|
||||
const int width, const int channels, const int pooled_height,
|
||||
const int pooled_width, const int kernel_h, const int kernel_w,
|
||||
const int stride_h, const int stride_w, const int pad_t, const int pad_l,
|
||||
Eigen::half* top_data, int64* mask, const Eigen::GpuDevice& d) {
|
||||
const int kThreadsPerBlock = 1024;
|
||||
const int output_size = batch * channels * pooled_height * pooled_width;
|
||||
|
||||
MaxPoolForwardNHWC<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
|
||||
kThreadsPerBlock, 0, d.stream()>>>(
|
||||
output_size, bottom_data, height, width, channels, pooled_height,
|
||||
pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
|
||||
top_data, mask);
|
||||
return d.ok();
|
||||
}
|
||||
|
||||
bool MaxPoolBackwardNoMask(const float* bottom_data, const int batch,
|
||||
const int height, const int width,
|
||||
const int channels, const int pooled_height,
|
||||
@ -243,6 +260,30 @@ bool MaxPoolBackwardNoMask(const float* bottom_data, const int batch,
|
||||
return d.ok();
|
||||
}
|
||||
|
||||
bool MaxPoolBackwardNoMask(const Eigen::half* bottom_data, const int batch,
|
||||
const int height, const int width,
|
||||
const int channels, const int pooled_height,
|
||||
const int pooled_width, const int kernel_h,
|
||||
const int kernel_w, const int stride_h,
|
||||
const int stride_w, const int pad_t, const int pad_l,
|
||||
const Eigen::half* top_diff, Eigen::half* bottom_diff,
|
||||
const Eigen::GpuDevice& d) {
|
||||
const int kThreadsPerBlock = 1024;
|
||||
const int bottom_size = batch * channels * height * width;
|
||||
const int top_size = batch * channels * pooled_height * pooled_width;
|
||||
|
||||
SetZero<<<(bottom_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
|
||||
kThreadsPerBlock, 0, d.stream()>>>(bottom_size, bottom_diff);
|
||||
|
||||
MaxPoolBackwardNoMaskNHWC<<<(top_size + kThreadsPerBlock - 1) /
|
||||
kThreadsPerBlock,
|
||||
kThreadsPerBlock, 0, d.stream()>>>(
|
||||
top_size, bottom_data, height, width, channels, pooled_height,
|
||||
pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
|
||||
top_diff, bottom_diff);
|
||||
return d.ok();
|
||||
}
|
||||
|
||||
bool MaxPoolBackwardWithArgmax(const int output_size, const int input_size,
|
||||
const float* top_diff, const int64* mask,
|
||||
const int top_offset, const int bottom_offset,
|
||||
@ -256,12 +297,27 @@ bool MaxPoolBackwardWithArgmax(const int output_size, const int input_size,
|
||||
return d.ok();
|
||||
}
|
||||
|
||||
bool MaxPoolBackwardWithArgmax(const int output_size, const int input_size,
|
||||
const Eigen::half* top_diff, const int64* mask,
|
||||
const int top_offset, const int bottom_offset,
|
||||
Eigen::half* bottom_diff,
|
||||
const Eigen::GpuDevice& d) {
|
||||
const int kThreadsPerBlock = 1024;
|
||||
SetZero<<<(input_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
|
||||
kThreadsPerBlock, 0, d.stream()>>>(input_size, bottom_diff);
|
||||
MaxPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
|
||||
kThreadsPerBlock, 0, d.stream()>>>(
|
||||
output_size, top_diff, mask, top_offset, bottom_offset, bottom_diff);
|
||||
return d.ok();
|
||||
}
|
||||
|
||||
typedef Eigen::GpuDevice GPUDevice;
|
||||
|
||||
#define DEFINE_GPU_KERNELS(T) \
|
||||
template struct functor::SpatialMaxPooling<GPUDevice, T>;
|
||||
|
||||
DEFINE_GPU_KERNELS(float)
|
||||
DEFINE_GPU_KERNELS(Eigen::half)
|
||||
|
||||
#undef DEFINE_GPU_KERNELS
|
||||
|
||||
|
@ -37,11 +37,24 @@ bool MaxPoolForwardWithOptionalArgmax(
|
||||
const int stride_h, const int stride_w, const int pad_t, const int pad_l,
|
||||
float* top_data, int64* mask, const Eigen::GpuDevice& d);
|
||||
|
||||
bool MaxPoolForwardWithOptionalArgmax(
|
||||
const Eigen::half* bottom_data, const int batch, const int height,
|
||||
const int width, const int channels, const int pooled_height,
|
||||
const int pooled_width, const int kernel_h, const int kernel_w,
|
||||
const int stride_h, const int stride_w, const int pad_t, const int pad_l,
|
||||
Eigen::half* top_data, int64* mask, const Eigen::GpuDevice& d);
|
||||
|
||||
bool MaxPoolBackwardWithArgmax(const int output_size, const int input_size,
|
||||
const float* top_diff, const int64* mask,
|
||||
const int top_offset, const int bottom_offset,
|
||||
float* bottom_diff, const Eigen::GpuDevice& d);
|
||||
|
||||
bool MaxPoolBackwardWithArgmax(const int output_size, const int input_size,
|
||||
const Eigen::half* top_diff, const int64* mask,
|
||||
const int top_offset, const int bottom_offset,
|
||||
Eigen::half* bottom_diff,
|
||||
const Eigen::GpuDevice& d);
|
||||
|
||||
bool MaxPoolBackwardNoMask(const float* bottom_data, const int batch,
|
||||
const int height, const int width,
|
||||
const int channels, const int pooled_height,
|
||||
@ -51,6 +64,15 @@ bool MaxPoolBackwardNoMask(const float* bottom_data, const int batch,
|
||||
const float* top_diff, float* bottom_diff,
|
||||
const Eigen::GpuDevice& d);
|
||||
|
||||
bool MaxPoolBackwardNoMask(const Eigen::half* bottom_data, const int batch,
|
||||
const int height, const int width,
|
||||
const int channels, const int pooled_height,
|
||||
const int pooled_width, const int kernel_h,
|
||||
const int kernel_w, const int stride_h,
|
||||
const int stride_w, const int pad_t, const int pad_l,
|
||||
const Eigen::half* top_diff, Eigen::half* bottom_diff,
|
||||
const Eigen::GpuDevice& d);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_
|
||||
|
@ -104,10 +104,8 @@ class PackOp : public OpKernel {
|
||||
PackOp<CPUDevice, type>)
|
||||
|
||||
TF_CALL_ALL_TYPES(REGISTER_PACK);
|
||||
REGISTER_PACK(quint8);
|
||||
REGISTER_PACK(qint8);
|
||||
REGISTER_PACK(qint32);
|
||||
REGISTER_PACK(bfloat16);
|
||||
TF_CALL_QUANTIZED_TYPES(REGISTER_PACK);
|
||||
TF_CALL_bfloat16(REGISTER_PACK);
|
||||
|
||||
#undef REGISTER_PACK
|
||||
|
||||
|
@ -124,6 +124,7 @@ namespace functor {
|
||||
extern template struct TransformDepth<GPUDevice, T, Eigen::DenseIndex>;
|
||||
|
||||
DECLARE_GPU_SPEC(float);
|
||||
DECLARE_GPU_SPEC(Eigen::half);
|
||||
#undef DECLARE_GPU_SPEC
|
||||
} // namespace functor
|
||||
|
||||
@ -368,7 +369,9 @@ void DnnPoolingGradOp<T>::Compute(
|
||||
}
|
||||
}
|
||||
|
||||
template class DnnPoolingOp<Eigen::half>;
|
||||
template class DnnPoolingOp<float>;
|
||||
template class DnnPoolingGradOp<Eigen::half>;
|
||||
template class DnnPoolingGradOp<float>;
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
|
@ -311,7 +311,7 @@ void SpatialAvgPool(OpKernelContext* context, Tensor* output,
|
||||
}
|
||||
}
|
||||
}
|
||||
DCHECK_GT(out_count.minCoeff(), 0);
|
||||
DCHECK_GT(out_count.minCoeff(), T(0));
|
||||
out_mat.array().rowwise() /= out_count.transpose().array();
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/type_traits.h"
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
@ -107,15 +108,14 @@ struct QuantizeAndDequantizeOneScaleFunctor<CPUDevice, T> {
|
||||
};
|
||||
} // namespace functor
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("_QuantizeAndDequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<float>("T"),
|
||||
QuantizeAndDequantizeOp<CPUDevice, float>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("_QuantizeAndDequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<double>("T"),
|
||||
QuantizeAndDequantizeOp<CPUDevice, double>);
|
||||
#define REGISTER_CPU_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER(Name("_QuantizeAndDequantize") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<T>("T"), \
|
||||
QuantizeAndDequantizeOp<CPUDevice, T>);
|
||||
TF_CALL_float(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_double(REGISTER_CPU_KERNEL);
|
||||
#undef REGISTER_CPU_KERNEL
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER_KERNEL_BUILDER(Name("_QuantizeAndDequantize")
|
||||
|
@ -451,11 +451,11 @@ class MultinomialOp : public OpKernel {
|
||||
.TypeConstraint<IntType>("Tout"), \
|
||||
RandomUniformIntOp<CPUDevice, IntType>);
|
||||
|
||||
REGISTER(Eigen::half);
|
||||
REGISTER(float);
|
||||
REGISTER(double);
|
||||
REGISTER_INT(int32);
|
||||
REGISTER_INT(int64);
|
||||
TF_CALL_half(REGISTER);
|
||||
TF_CALL_float(REGISTER);
|
||||
TF_CALL_double(REGISTER);
|
||||
TF_CALL_int32(REGISTER_INT);
|
||||
TF_CALL_int64(REGISTER_INT);
|
||||
|
||||
#undef REGISTER
|
||||
#undef REGISTER_INT
|
||||
@ -505,11 +505,11 @@ REGISTER_INT(int64);
|
||||
.TypeConstraint<IntType>("Tout"), \
|
||||
RandomUniformIntOp<GPUDevice, IntType>);
|
||||
|
||||
REGISTER(Eigen::half);
|
||||
REGISTER(float);
|
||||
REGISTER(double);
|
||||
REGISTER_INT(int32);
|
||||
REGISTER_INT(int64);
|
||||
TF_CALL_half(REGISTER);
|
||||
TF_CALL_float(REGISTER);
|
||||
TF_CALL_double(REGISTER);
|
||||
TF_CALL_int32(REGISTER_INT);
|
||||
TF_CALL_int64(REGISTER_INT);
|
||||
|
||||
#undef REGISTER
|
||||
#undef REGISTER_INT
|
||||
|
@ -25,8 +25,8 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
|
||||
// NOTE: We should have mean(complex64,int32), too. But that needs to
|
||||
// change Eigen::internal::MeanReducer to cast int to complex<float>.
|
||||
// We don't see immediate need of mean(complex64,int32) anyway.
|
||||
REGISTER_CPU_KERNELS(complex64);
|
||||
REGISTER_CPU_KERNELS(complex128);
|
||||
TF_CALL_complex64(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_complex128(REGISTER_CPU_KERNELS);
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
@ -159,15 +159,12 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
|
||||
|
||||
#undef REGISTER_KERNEL
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("ResizeBilinearGrad")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<float>("T"),
|
||||
ResizeBilinearOpGrad<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("ResizeBilinearGrad")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<Eigen::half>("T"),
|
||||
ResizeBilinearOpGrad<CPUDevice, Eigen::half>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("ResizeBilinearGrad").Device(DEVICE_CPU).TypeConstraint<double>("T"),
|
||||
ResizeBilinearOpGrad<CPUDevice, double>);
|
||||
#define REGISTER_CPU_GRAD_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ResizeBilinearGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
ResizeBilinearOpGrad<CPUDevice, T>);
|
||||
TF_CALL_half(REGISTER_CPU_GRAD_KERNEL);
|
||||
TF_CALL_float(REGISTER_CPU_GRAD_KERNEL);
|
||||
TF_CALL_double(REGISTER_CPU_GRAD_KERNEL);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -97,13 +97,13 @@ class ReverseOp : public OpKernel {
|
||||
.HostMemory("dims"), \
|
||||
ReverseOp<CPUDevice, T>)
|
||||
|
||||
REGISTER_KERNEL(uint8);
|
||||
REGISTER_KERNEL(int8);
|
||||
REGISTER_KERNEL(int32);
|
||||
REGISTER_KERNEL(bool);
|
||||
REGISTER_KERNEL(Eigen::half);
|
||||
REGISTER_KERNEL(float);
|
||||
REGISTER_KERNEL(double);
|
||||
TF_CALL_uint8(REGISTER_KERNEL);
|
||||
TF_CALL_int8(REGISTER_KERNEL);
|
||||
TF_CALL_int32(REGISTER_KERNEL);
|
||||
TF_CALL_bool(REGISTER_KERNEL);
|
||||
TF_CALL_half(REGISTER_KERNEL);
|
||||
TF_CALL_float(REGISTER_KERNEL);
|
||||
TF_CALL_double(REGISTER_KERNEL);
|
||||
#undef REGISTER_KERNEL
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
@ -129,13 +129,13 @@ namespace functor {
|
||||
DECLARE_GPU_SPEC_DIM(T, 7) \
|
||||
DECLARE_GPU_SPEC_DIM(T, 8)
|
||||
|
||||
DECLARE_GPU_SPEC(uint8);
|
||||
DECLARE_GPU_SPEC(int8);
|
||||
DECLARE_GPU_SPEC(int32);
|
||||
DECLARE_GPU_SPEC(bool);
|
||||
DECLARE_GPU_SPEC(Eigen::half);
|
||||
DECLARE_GPU_SPEC(float);
|
||||
DECLARE_GPU_SPEC(double);
|
||||
TF_CALL_uint8(DECLARE_GPU_SPEC);
|
||||
TF_CALL_int8(DECLARE_GPU_SPEC);
|
||||
TF_CALL_int32(DECLARE_GPU_SPEC);
|
||||
TF_CALL_bool(DECLARE_GPU_SPEC);
|
||||
TF_CALL_half(DECLARE_GPU_SPEC);
|
||||
TF_CALL_float(DECLARE_GPU_SPEC);
|
||||
TF_CALL_double(DECLARE_GPU_SPEC);
|
||||
#undef DECLARE_GPU_SPEC
|
||||
#undef DECLARE_GPU_SPEC_DIM
|
||||
} // namespace functor
|
||||
@ -147,11 +147,11 @@ DECLARE_GPU_SPEC(double);
|
||||
.TypeConstraint<T>("T") \
|
||||
.HostMemory("dims"), \
|
||||
ReverseOp<GPUDevice, T>)
|
||||
REGISTER_GPU_KERNEL(uint8);
|
||||
REGISTER_GPU_KERNEL(int8);
|
||||
REGISTER_GPU_KERNEL(Eigen::half);
|
||||
REGISTER_GPU_KERNEL(float);
|
||||
REGISTER_GPU_KERNEL(double);
|
||||
TF_CALL_uint8(REGISTER_GPU_KERNEL);
|
||||
TF_CALL_int8(REGISTER_GPU_KERNEL);
|
||||
TF_CALL_half(REGISTER_GPU_KERNEL);
|
||||
TF_CALL_float(REGISTER_GPU_KERNEL);
|
||||
TF_CALL_double(REGISTER_GPU_KERNEL);
|
||||
#undef REGISTER_GPU_KERNEL
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
|
@ -118,21 +118,16 @@ class LinSpaceOp : public OpKernel {
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("LinSpace")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<float>("T")
|
||||
.HostMemory("start")
|
||||
.HostMemory("stop")
|
||||
.HostMemory("num")
|
||||
.HostMemory("output"),
|
||||
LinSpaceOp<float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("LinSpace")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<double>("T")
|
||||
.HostMemory("start")
|
||||
.HostMemory("stop")
|
||||
.HostMemory("num")
|
||||
.HostMemory("output"),
|
||||
LinSpaceOp<double>);
|
||||
#define REGISTER_CPU_KERNEL(T) \
|
||||
REGISTER_KERNEL_BUILDER(Name("LinSpace") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<T>("T") \
|
||||
.HostMemory("start") \
|
||||
.HostMemory("stop") \
|
||||
.HostMemory("num") \
|
||||
.HostMemory("output"), \
|
||||
LinSpaceOp<T>);
|
||||
TF_CALL_float(REGISTER_CPU_KERNEL);
|
||||
TF_CALL_double(REGISTER_CPU_KERNEL);
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/softmax_op.h"
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
|
||||
@ -40,27 +41,22 @@ struct SoftmaxFunctor<CPUDevice, T> {
|
||||
};
|
||||
} // namespace functor
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Softmax").Device(DEVICE_CPU).TypeConstraint<Eigen::half>("T"),
|
||||
SoftmaxOp<CPUDevice, Eigen::half>);
|
||||
REGISTER_KERNEL_BUILDER(Name("Softmax")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<float>("T"),
|
||||
SoftmaxOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("Softmax")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<double>("T"),
|
||||
SoftmaxOp<CPUDevice, double>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("LogSoftmax").Device(DEVICE_CPU).TypeConstraint<Eigen::half>("T"),
|
||||
SoftmaxOp<CPUDevice, Eigen::half>);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("LogSoftmax").Device(DEVICE_CPU).TypeConstraint<float>("T"),
|
||||
SoftmaxOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("LogSoftmax")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<double>("T"),
|
||||
SoftmaxOp<CPUDevice, double>);
|
||||
#define REGISTER_CPU(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Softmax").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
SoftmaxOp<CPUDevice, T>);
|
||||
TF_CALL_half(REGISTER_CPU);
|
||||
TF_CALL_float(REGISTER_CPU);
|
||||
TF_CALL_double(REGISTER_CPU);
|
||||
|
||||
#undef REGISTER_CPU
|
||||
#define REGISTER_CPU(T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("LogSoftmax").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
|
||||
SoftmaxOp<CPUDevice, T>);
|
||||
TF_CALL_half(REGISTER_CPU);
|
||||
TF_CALL_float(REGISTER_CPU);
|
||||
TF_CALL_double(REGISTER_CPU);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
|
@ -157,10 +157,15 @@ EIGEN_STRONG_INLINE Packet8f pinterleave4x64<Packet8f>(const Packet8f& from) {
|
||||
return _mm256_castsi256_ps(_mm256_permute4x64_epi64(_mm256_castps_si256(from),
|
||||
_MM_SHUFFLE(3, 1, 2, 0)));
|
||||
#else
|
||||
__int64_t tmp1 = _mm256_extract_epi64(_mm256_castps_si256(from), 1);
|
||||
__int64_t tmp2 = _mm256_extract_epi64(_mm256_castps_si256(from), 2);
|
||||
__m256i tmp3 = _mm256_insert_epi64(_mm256_castps_si256(from), tmp1, 2);
|
||||
return _mm256_castsi256_ps(_mm256_insert_epi64(tmp3, tmp2, 1));
|
||||
auto tmp1 = _mm256_extract_epi32(_mm256_castps_si256(from), 2);
|
||||
auto tmp2 = _mm256_extract_epi32(_mm256_castps_si256(from), 3);
|
||||
auto tmp3 = _mm256_extract_epi32(_mm256_castps_si256(from), 4);
|
||||
auto tmp4 = _mm256_extract_epi32(_mm256_castps_si256(from), 5);
|
||||
auto tmp5 = _mm256_insert_epi32(_mm256_castps_si256(from), tmp1, 4);
|
||||
tmp5 = _mm256_insert_epi32(tmp5, tmp2, 5);
|
||||
tmp5 = _mm256_insert_epi32(tmp5, tmp3, 2);
|
||||
tmp5 = _mm256_insert_epi32(tmp5, tmp4, 3);
|
||||
return _mm256_castsi256_ps(tmp5);
|
||||
#endif
|
||||
}
|
||||
// Return a Packet with 4 floats loaded from 4 bfloat16 values
|
||||
|
@ -238,25 +238,25 @@ class SparseMatmulOpTest : public ::testing::Test {
|
||||
|
||||
TEST_F(SparseMatmulOpTest, BroadcastPacketTest) {
|
||||
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[0];
|
||||
internal::pstore(data2, internal::pbroadcast_first<Packet>(
|
||||
internal::pload<Packet>(data1)));
|
||||
internal::pstoreu(data2, internal::pbroadcast_first<Packet>(
|
||||
internal::ploadu<Packet>(data1)));
|
||||
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
|
||||
if (PacketSize > 1) {
|
||||
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[1];
|
||||
internal::pstore(data2, internal::pbroadcast_second<Packet>(
|
||||
internal::pload<Packet>(data1)));
|
||||
internal::pstoreu(data2, internal::pbroadcast_second<Packet>(
|
||||
internal::ploadu<Packet>(data1)));
|
||||
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
|
||||
|
||||
if (PacketSize > 2) {
|
||||
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[2];
|
||||
internal::pstore(data2, internal::pbroadcast_third<Packet>(
|
||||
internal::pload<Packet>(data1)));
|
||||
internal::pstoreu(data2, internal::pbroadcast_third<Packet>(
|
||||
internal::ploadu<Packet>(data1)));
|
||||
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
|
||||
|
||||
if (PacketSize > 3) {
|
||||
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[3];
|
||||
internal::pstore(data2, internal::pbroadcast_fourth<Packet>(
|
||||
internal::pload<Packet>(data1)));
|
||||
internal::pstoreu(data2, internal::pbroadcast_fourth<Packet>(
|
||||
internal::ploadu<Packet>(data1)));
|
||||
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
|
||||
}
|
||||
}
|
||||
@ -276,8 +276,8 @@ TEST_F(SparseMatmulOpTest, InterleavePacketTest) {
|
||||
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[i];
|
||||
}
|
||||
|
||||
internal::pstore(
|
||||
data2, internal::pinterleave4x64<Packet>(internal::pload<Packet>(data1)));
|
||||
internal::pstoreu(data2, internal::pinterleave4x64<Packet>(
|
||||
internal::ploadu<Packet>(data1)));
|
||||
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
|
||||
}
|
||||
|
||||
@ -294,8 +294,8 @@ TEST_F(SparseMatmulOpTest, Bfloat16ExpandTest) {
|
||||
ref[i] = data3[i];
|
||||
}
|
||||
}
|
||||
internal::pstore(data2, internal::pexpand_bf16_l<Packet>(
|
||||
internal::pload<Packet>(data3_bfloat16)));
|
||||
internal::pstoreu(data2, internal::pexpand_bf16_l<Packet>(
|
||||
internal::ploadu<Packet>(data3_bfloat16)));
|
||||
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
|
||||
|
||||
if (PacketSize == 8) { // AVX
|
||||
@ -311,18 +311,18 @@ TEST_F(SparseMatmulOpTest, Bfloat16ExpandTest) {
|
||||
}
|
||||
}
|
||||
|
||||
internal::pstore(data2, internal::pexpand_bf16_u<Packet>(
|
||||
internal::pload<Packet>(data3_bfloat16)));
|
||||
internal::pstoreu(data2, internal::pexpand_bf16_u<Packet>(
|
||||
internal::ploadu<Packet>(data3_bfloat16)));
|
||||
ASSERT_TRUE(areApprox(ref, data2, PacketSize));
|
||||
}
|
||||
|
||||
TEST_F(SparseMatmulOpTest, Bfloat16LoadTest) {
|
||||
if (PacketSize >= 4) {
|
||||
for (int i = 0; i < 4; ++i) ref[i] = data3[i];
|
||||
internal::pstore(data2, internal::pload4bf16<Packet>(data3_bfloat16));
|
||||
internal::pstoreu(data2, internal::pload4bf16<Packet>(data3_bfloat16));
|
||||
ASSERT_TRUE(areApprox(ref, data2, 4));
|
||||
|
||||
internal::pstore(data2, internal::pload2bf16<Packet>(data3_bfloat16));
|
||||
internal::pstoreu(data2, internal::pload2bf16<Packet>(data3_bfloat16));
|
||||
ASSERT_TRUE(areApprox(ref, data2, 2));
|
||||
}
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ limitations under the License.
|
||||
#include <vector>
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/type_index.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
@ -92,18 +93,22 @@ class TileOp : public OpKernel {
|
||||
HANDLE_DIM(T, 4) \
|
||||
HANDLE_DIM(T, 5)
|
||||
|
||||
HANDLE_TYPE(DT_BOOL);
|
||||
HANDLE_TYPE(DT_FLOAT);
|
||||
HANDLE_TYPE(DT_DOUBLE);
|
||||
HANDLE_TYPE(DT_UINT8);
|
||||
HANDLE_TYPE(DT_INT32);
|
||||
HANDLE_TYPE(DT_INT16);
|
||||
HANDLE_TYPE(DT_INT64);
|
||||
HANDLE_TYPE(DT_HALF);
|
||||
HANDLE_TYPE(DT_COMPLEX64);
|
||||
HANDLE_TYPE(DT_COMPLEX128);
|
||||
HANDLE_TYPE(DT_STRING); // when DEVICE=CPUDevice.
|
||||
#define HANDLE_TYPE_NAME(T) HANDLE_TYPE(DataTypeToEnum<T>::value)
|
||||
|
||||
// Invoke macro using TF_CALL_* so type-filtering for platform applies.
|
||||
TF_CALL_bool(HANDLE_TYPE_NAME);
|
||||
TF_CALL_float(HANDLE_TYPE_NAME);
|
||||
TF_CALL_double(HANDLE_TYPE_NAME);
|
||||
TF_CALL_uint8(HANDLE_TYPE_NAME);
|
||||
TF_CALL_int32(HANDLE_TYPE_NAME);
|
||||
TF_CALL_int16(HANDLE_TYPE_NAME);
|
||||
TF_CALL_int64(HANDLE_TYPE_NAME);
|
||||
TF_CALL_half(HANDLE_TYPE_NAME);
|
||||
TF_CALL_string(HANDLE_TYPE_NAME); // when DEVICE=CPUDevice.
|
||||
TF_CALL_complex64(HANDLE_TYPE_NAME);
|
||||
TF_CALL_complex128(HANDLE_TYPE_NAME);
|
||||
|
||||
#undef HANDLE_TYPE_NAME
|
||||
#undef HANDLE_TYPE
|
||||
#undef HANDLE_DIM
|
||||
|
||||
@ -165,17 +170,20 @@ inline void TileOp<Device>::HandleCase(
|
||||
HANDLE_CASE(device, dtype, 4); \
|
||||
HANDLE_CASE(device, dtype, 5);
|
||||
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_BOOL);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_FLOAT);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_DOUBLE);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_UINT8);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_INT32);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_INT16);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_INT64);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_HALF);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_COMPLEX64);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_COMPLEX128);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_STRING);
|
||||
#define HANDLE_TYPE_NAME_CPU(T) \
|
||||
HANDLE_CASE_DIM(CPUDevice, DataTypeToEnum<T>::value);
|
||||
|
||||
TF_CALL_bool(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_float(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_double(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_uint8(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_int32(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_int16(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_int64(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_half(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_complex64(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_complex128(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_string(HANDLE_TYPE_NAME_CPU);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
HANDLE_CASE_DIM(GPUDevice, DT_FLOAT);
|
||||
@ -186,6 +194,7 @@ HANDLE_CASE_DIM(GPUDevice, DT_INT64);
|
||||
HANDLE_CASE_DIM(GPUDevice, DT_HALF);
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
#undef HANDLE_TYPE_NAME_CPU
|
||||
#undef HANDLE_CASE_DIM
|
||||
#undef HANDLE_CASE
|
||||
|
||||
@ -249,13 +258,16 @@ class TileGradientOp : public OpKernel {
|
||||
HANDLE_DIM(T, 4) \
|
||||
HANDLE_DIM(T, 5)
|
||||
|
||||
HANDLE_TYPE(DT_FLOAT);
|
||||
HANDLE_TYPE(DT_DOUBLE);
|
||||
HANDLE_TYPE(DT_INT32);
|
||||
HANDLE_TYPE(DT_INT16);
|
||||
HANDLE_TYPE(DT_INT64);
|
||||
HANDLE_TYPE(DT_HALF);
|
||||
#define HANDLE_TYPE_NAME(T) HANDLE_TYPE(DataTypeToEnum<T>::value)
|
||||
|
||||
TF_CALL_float(HANDLE_TYPE_NAME);
|
||||
TF_CALL_double(HANDLE_TYPE_NAME);
|
||||
TF_CALL_int32(HANDLE_TYPE_NAME);
|
||||
TF_CALL_int16(HANDLE_TYPE_NAME);
|
||||
TF_CALL_int64(HANDLE_TYPE_NAME);
|
||||
TF_CALL_half(HANDLE_TYPE_NAME);
|
||||
|
||||
#undef HANDLE_TYPE_NAME
|
||||
#undef HANDLE_TYPE
|
||||
#undef HANDLE_DIM
|
||||
|
||||
@ -390,14 +402,17 @@ inline void TileGradientOp<Device>::HandleCase(
|
||||
HANDLE_CASE(device, dtype, 4); \
|
||||
HANDLE_CASE(device, dtype, 5);
|
||||
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_FLOAT);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_DOUBLE);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_INT16);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_INT32);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_INT64);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_HALF);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_COMPLEX64);
|
||||
HANDLE_CASE_DIM(CPUDevice, DT_COMPLEX128);
|
||||
#define HANDLE_TYPE_NAME_CPU(T) \
|
||||
HANDLE_CASE_DIM(CPUDevice, DataTypeToEnum<T>::value);
|
||||
|
||||
TF_CALL_float(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_double(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_int16(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_int32(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_int64(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_half(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_complex64(HANDLE_TYPE_NAME_CPU);
|
||||
TF_CALL_complex128(HANDLE_TYPE_NAME_CPU);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
HANDLE_CASE_DIM(GPUDevice, DT_FLOAT);
|
||||
@ -409,6 +424,7 @@ HANDLE_CASE_DIM(GPUDevice, DT_HALF);
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
#undef HANDLE_TYPE_NAME_CPU
|
||||
#undef HANDLE_CASE_DIM
|
||||
#undef HANDLE_CASE
|
||||
|
||||
|
@ -16,17 +16,28 @@ limitations under the License.
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include "tensorflow/core/kernels/training_ops.h"
|
||||
#include <algorithm>
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/kernels/bounds_check.h"
|
||||
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
typedef Eigen::GpuDevice GPUDevice;
|
||||
|
||||
namespace functor {
|
||||
namespace {
|
||||
template <class T>
|
||||
inline T sgn(const T x) {
|
||||
T zero(0);
|
||||
T one(1);
|
||||
return (x == zero ? zero : (x < zero ? -one : one));
|
||||
}
|
||||
}
|
||||
|
||||
namespace functor {
|
||||
template <typename T>
|
||||
struct ApplyGradientDescent<CPUDevice, T> {
|
||||
void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
|
||||
@ -56,6 +67,34 @@ struct ApplyAdadelta<CPUDevice, T> {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct ApplyProximalGradientDescent<CPUDevice, T> {
|
||||
void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
|
||||
typename TTypes<T>::ConstScalar lr,
|
||||
typename TTypes<T>::ConstScalar l1,
|
||||
typename TTypes<T>::ConstScalar l2,
|
||||
typename TTypes<T>::ConstFlat grad) {
|
||||
// Note that here is Fobos update, for details please refer:
|
||||
// http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf
|
||||
// TODO(xbing): merge the logic for ProximalGradientDescent and
|
||||
// ProximalAdagrad.
|
||||
auto prox_var = var;
|
||||
// compute v = w - lr * grad.
|
||||
prox_var.device(d) -= grad * lr();
|
||||
if (l1() > 0) {
|
||||
var.device(d) = prox_var.abs() - var.constant(lr() * l1());
|
||||
// compute sign(v) * max(|v| - lr * l1, 0)
|
||||
var.device(d) = prox_var.sign() * var.cwiseMax(T(0.0));
|
||||
} else {
|
||||
var.device(d) = prox_var;
|
||||
}
|
||||
if (l2() > 0) {
|
||||
// compute v / (1.0 + l2 * lr)
|
||||
var.device(d) = var / (var.constant(1.0) + var.constant(l2() * lr()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct ApplyAdagrad<CPUDevice, T> {
|
||||
void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
|
||||
@ -67,6 +106,35 @@ struct ApplyAdagrad<CPUDevice, T> {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct ApplyProximalAdagrad<CPUDevice, T> {
|
||||
void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
|
||||
typename TTypes<T>::Flat accum,
|
||||
typename TTypes<T>::ConstScalar lr,
|
||||
typename TTypes<T>::ConstScalar l1,
|
||||
typename TTypes<T>::ConstScalar l2,
|
||||
typename TTypes<T>::ConstFlat grad) {
|
||||
// Fobos update per paper with Adagrad learning rate.
|
||||
accum.device(d) += grad.square();
|
||||
// Adagrad learning rate.
|
||||
auto learning_rate = accum.constant(lr()) * accum.rsqrt();
|
||||
auto prox_var = var;
|
||||
// compute v = w - lr * grad.
|
||||
prox_var.device(d) -= grad * learning_rate;
|
||||
if (l1() > 0) {
|
||||
var.device(d) = prox_var.abs() - learning_rate * prox_var.constant(l1());
|
||||
// compute sign(v) * max(|v| - lr * l1, 0)
|
||||
var.device(d) = prox_var.sign() * var.cwiseMax(T(0.0));
|
||||
} else {
|
||||
var.device(d) = prox_var;
|
||||
}
|
||||
if (l2() > 0) {
|
||||
var.device(d) =
|
||||
var / (var.constant(1.0) + var.constant(l2()) * learning_rate);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct ApplyFtrl<CPUDevice, T> {
|
||||
void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
|
||||
@ -221,10 +289,11 @@ class ApplyGradientDescentOp : public OpKernel {
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ApplyGradientDescent").Device(DEVICE_##D).TypeConstraint<T>("T"), \
|
||||
ApplyGradientDescentOp<D##Device, T>);
|
||||
#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T);
|
||||
|
||||
REGISTER_KERNELS(CPU, Eigen::half);
|
||||
REGISTER_KERNELS(CPU, float);
|
||||
REGISTER_KERNELS(CPU, double);
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_double(REGISTER_CPU_KERNELS);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// Forward declarations of the functor specializations for GPU.
|
||||
@ -246,6 +315,7 @@ REGISTER_KERNELS(GPU, Eigen::half);
|
||||
REGISTER_KERNELS(GPU, float);
|
||||
REGISTER_KERNELS(GPU, double);
|
||||
#endif
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
template <typename Device, typename T>
|
||||
@ -347,10 +417,11 @@ typedef Eigen::GpuDevice GPUDevice;
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ApplyAdadelta").Device(DEVICE_##D).TypeConstraint<T>("T"), \
|
||||
ApplyAdadeltaOp<D##Device, T>);
|
||||
#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T);
|
||||
|
||||
REGISTER_KERNELS(CPU, Eigen::half);
|
||||
REGISTER_KERNELS(CPU, float);
|
||||
REGISTER_KERNELS(CPU, double);
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_double(REGISTER_CPU_KERNELS);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// Forward declarations of the functor specializations for GPU.
|
||||
@ -374,6 +445,7 @@ REGISTER_KERNELS(GPU, Eigen::half);
|
||||
REGISTER_KERNELS(GPU, float);
|
||||
REGISTER_KERNELS(GPU, double);
|
||||
#endif
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
// Note, this op works on cpu only.
|
||||
@ -483,7 +555,6 @@ class SparseApplyAdadeltaOp : public OpKernel {
|
||||
accum_update_ =
|
||||
accum_update_ * accum_update_.constant(rho_scalar) +
|
||||
update.square() * update.constant(static_cast<T>(1) - rho_scalar);
|
||||
|
||||
auto v = var_flat.template chip<0>(index);
|
||||
v -= update * update.constant(lr_scalar);
|
||||
}
|
||||
@ -505,14 +576,213 @@ class SparseApplyAdadeltaOp : public OpKernel {
|
||||
.TypeConstraint<T>("T") \
|
||||
.TypeConstraint<Tindices>("Tindices"), \
|
||||
SparseApplyAdadeltaOp<T, Tindices>);
|
||||
#define REGISTER_CPU_KERNELS(T) \
|
||||
REGISTER_KERNELS(T, int32); \
|
||||
REGISTER_KERNELS(T, int64);
|
||||
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_double(REGISTER_CPU_KERNELS);
|
||||
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
// Note, this op works on cpu only.
|
||||
template <typename Device, typename T>
|
||||
class ApplyProximalGradientDescentOp : public OpKernel {
|
||||
public:
|
||||
explicit ApplyProximalGradientDescentOp(OpKernelConstruction* ctx)
|
||||
: OpKernel(ctx) {
|
||||
OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
auto locks = MaybeLockMutexesInOrder(ctx, use_exclusive_lock_, {0});
|
||||
Tensor var = ctx->mutable_input(0, use_exclusive_lock_);
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, var.IsInitialized(),
|
||||
errors::FailedPrecondition(
|
||||
"Attempting to use uninitialized variables: ", def().input(0)));
|
||||
const Tensor& alpha = ctx->input(1);
|
||||
OP_REQUIRES(ctx, IsLegacyScalar(alpha.shape()),
|
||||
errors::InvalidArgument("alpha is not a scalar: ",
|
||||
alpha.shape().DebugString()));
|
||||
const Tensor& l1 = ctx->input(2);
|
||||
OP_REQUIRES(
|
||||
ctx, TensorShapeUtils::IsScalar(l1.shape()),
|
||||
errors::InvalidArgument("l1 regularization strength is not a scalar: ",
|
||||
l1.shape().DebugString()));
|
||||
const Tensor& l2 = ctx->input(3);
|
||||
OP_REQUIRES(
|
||||
ctx, TensorShapeUtils::IsScalar(l2.shape()),
|
||||
errors::InvalidArgument("l2 regularization strength is not a scalar: ",
|
||||
l2.shape().DebugString()));
|
||||
|
||||
const Tensor& delta = ctx->input(4);
|
||||
OP_REQUIRES(
|
||||
ctx, var.shape().IsSameSize(delta.shape()),
|
||||
errors::InvalidArgument("var and delta do not have the same shape",
|
||||
var.shape().DebugString(), " ",
|
||||
delta.shape().DebugString()));
|
||||
|
||||
const Device& device = ctx->template eigen_device<Device>();
|
||||
functor::ApplyProximalGradientDescent<Device, T>()(
|
||||
device, var.flat<T>(), alpha.scalar<T>(), l1.scalar<T>(),
|
||||
l2.scalar<T>(), delta.flat<T>());
|
||||
|
||||
ctx->forward_ref_input_to_ref_output(0, 0);
|
||||
}
|
||||
|
||||
private:
|
||||
bool use_exclusive_lock_;
|
||||
};
|
||||
|
||||
#define REGISTER_KERNELS(D, T) \
|
||||
REGISTER_KERNEL_BUILDER(Name("ApplyProximalGradientDescent") \
|
||||
.Device(DEVICE_##D) \
|
||||
.TypeConstraint<T>("T"), \
|
||||
ApplyProximalGradientDescentOp<D##Device, T>);
|
||||
|
||||
REGISTER_KERNELS(CPU, float);
|
||||
REGISTER_KERNELS(CPU, double);
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
// Note, this op works on cpu only.
|
||||
template <typename T, typename Tindex>
|
||||
class SparseApplyProximalGradientDescentOp : public OpKernel {
|
||||
public:
|
||||
explicit SparseApplyProximalGradientDescentOp(OpKernelConstruction* ctx)
|
||||
: OpKernel(ctx) {
|
||||
OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
|
||||
auto locks = MaybeLockMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
|
||||
Tensor var = ctx->mutable_input(0, use_exclusive_lock_);
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsVectorOrHigher(var.shape()),
|
||||
errors::InvalidArgument("var must be at least 1 dimensional"));
|
||||
|
||||
const Tensor& lr = ctx->input(1);
|
||||
OP_REQUIRES(ctx, IsLegacyScalar(lr.shape()),
|
||||
errors::InvalidArgument("lr is not a scalar: ",
|
||||
lr.shape().DebugString()));
|
||||
const Tensor& l1 = ctx->input(2);
|
||||
OP_REQUIRES(
|
||||
ctx, TensorShapeUtils::IsScalar(l1.shape()),
|
||||
errors::InvalidArgument("l1 regularization strength is not a scalar: ",
|
||||
l1.shape().DebugString()));
|
||||
const Tensor& l2 = ctx->input(3);
|
||||
OP_REQUIRES(
|
||||
ctx, TensorShapeUtils::IsScalar(l2.shape()),
|
||||
errors::InvalidArgument("l2 regularization strength is not a scalar: ",
|
||||
l2.shape().DebugString()));
|
||||
|
||||
const Tensor& grad = ctx->input(4);
|
||||
const Tensor& indices = ctx->input(5);
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsVector(indices.shape()),
|
||||
errors::InvalidArgument("indices must be one-dimensional"));
|
||||
|
||||
int64 inner_dim = 1;
|
||||
for (int d = 1; d < var.dims(); d++) {
|
||||
OP_REQUIRES(ctx, var.dim_size(d) == grad.dim_size(d),
|
||||
errors::InvalidArgument(strings::StrCat(
|
||||
"var and grad must match in dimension ", d)));
|
||||
inner_dim *= grad.dim_size(d);
|
||||
}
|
||||
const Tindex N = indices.dim_size(0);
|
||||
OP_REQUIRES(
|
||||
ctx, grad.dim_size(0) == N,
|
||||
errors::InvalidArgument(
|
||||
"grad must be the same size as indices in the first dimension."));
|
||||
|
||||
if (N > 0) {
|
||||
if (inner_dim > 1) {
|
||||
const Tindex first_dim_size = var.dim_size(0);
|
||||
auto indices_vec = indices.vec<Tindex>();
|
||||
auto var_flat = var.flat_outer_dims<T>();
|
||||
auto grad_flat = grad.flat_outer_dims<T>();
|
||||
T lr_scalar = lr.scalar<T>()();
|
||||
T l1_scalar = l1.scalar<T>()();
|
||||
T l2_scalar = l2.scalar<T>()();
|
||||
|
||||
// TODO(xbing): extract the common logic for the Fobos update.
|
||||
for (Tindex i = 0; i < N; i++) {
|
||||
const Tindex index = internal::SubtleMustCopy(indices_vec(i));
|
||||
OP_REQUIRES(ctx, FastBoundsCheck(index, first_dim_size),
|
||||
errors::InvalidArgument(
|
||||
strings::StrCat("Index ", index, " at offset ", i,
|
||||
" in indices is out of range")));
|
||||
auto g = grad_flat.template chip<0>(i);
|
||||
auto v = var_flat.template chip<0>(index);
|
||||
// compute learning_rate for current step.
|
||||
auto learning_rate = v.constant(lr_scalar);
|
||||
auto prox_v = v;
|
||||
// v = w - g * learning_rate.
|
||||
prox_v -= g * learning_rate;
|
||||
if (l1_scalar > 0) {
|
||||
v = prox_v.abs() - learning_rate * prox_v.constant(l1_scalar);
|
||||
// compute sign(v) * max(|v|, 0)
|
||||
v = prox_v.sign() * v.cwiseMax(static_cast<T>(0.0));
|
||||
} else {
|
||||
v = prox_v;
|
||||
}
|
||||
if (l2_scalar > 0) {
|
||||
v /= (v.constant(1.0) + v.constant(l2_scalar) * learning_rate);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
CHECK_EQ(1, inner_dim);
|
||||
auto indices_vec = indices.vec<Tindex>();
|
||||
auto var_flat = var.flat<T>();
|
||||
auto grad_flat = grad.flat<T>();
|
||||
T lr_scalar = lr.scalar<T>()();
|
||||
T l1_scalar = l1.scalar<T>()();
|
||||
T l2_scalar = l2.scalar<T>()();
|
||||
const Tindex first_dim_size = var_flat.size();
|
||||
|
||||
for (Tindex i = 0; i < N; i++) {
|
||||
const Tindex index = internal::SubtleMustCopy(indices_vec(i));
|
||||
OP_REQUIRES(ctx, FastBoundsCheck(index, first_dim_size),
|
||||
errors::InvalidArgument(
|
||||
strings::StrCat("Index ", index, " at offset ", i,
|
||||
" in indices is out of range")));
|
||||
const T& g = grad_flat(i);
|
||||
auto learning_rate = lr_scalar;
|
||||
auto prox_v = var_flat(index);
|
||||
prox_v -= learning_rate * g;
|
||||
if (l1_scalar > 0) {
|
||||
var_flat(index) = std::abs(prox_v) - learning_rate * l1_scalar;
|
||||
var_flat(index) =
|
||||
sgn(prox_v) * std::max(var_flat(index), static_cast<T>(0.0));
|
||||
} else {
|
||||
var_flat(index) = prox_v;
|
||||
}
|
||||
if (l2_scalar > 0) {
|
||||
var_flat(index) /= (1.0 + l2_scalar * learning_rate);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ctx->forward_ref_input_to_ref_output(0, 0);
|
||||
}
|
||||
|
||||
private:
|
||||
bool use_exclusive_lock_;
|
||||
};
|
||||
|
||||
#define REGISTER_KERNELS(T, Tindices) \
|
||||
REGISTER_KERNEL_BUILDER(Name("SparseApplyProximalGradientDescent") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<T>("T") \
|
||||
.TypeConstraint<Tindices>("Tindices"), \
|
||||
SparseApplyProximalGradientDescentOp<T, Tindices>);
|
||||
|
||||
REGISTER_KERNELS(Eigen::half, int32);
|
||||
REGISTER_KERNELS(Eigen::half, int64);
|
||||
REGISTER_KERNELS(float, int32);
|
||||
REGISTER_KERNELS(float, int64);
|
||||
REGISTER_KERNELS(double, int32);
|
||||
REGISTER_KERNELS(double, int64);
|
||||
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
template <typename Device, typename T>
|
||||
@ -568,10 +838,11 @@ typedef Eigen::GpuDevice GPUDevice;
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ApplyAdagrad").Device(DEVICE_##D).TypeConstraint<T>("T"), \
|
||||
ApplyAdagradOp<D##Device, T>);
|
||||
#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T);
|
||||
|
||||
REGISTER_KERNELS(CPU, Eigen::half);
|
||||
REGISTER_KERNELS(CPU, float);
|
||||
REGISTER_KERNELS(CPU, double);
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_double(REGISTER_CPU_KERNELS);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// Forward declarations of the functor specializations for GPU.
|
||||
@ -593,15 +864,80 @@ REGISTER_KERNELS(GPU, Eigen::half);
|
||||
REGISTER_KERNELS(GPU, float);
|
||||
REGISTER_KERNELS(GPU, double);
|
||||
#endif
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
template <typename Device, typename T>
|
||||
class ApplyProximalAdagradOp : public OpKernel {
|
||||
public:
|
||||
explicit ApplyProximalAdagradOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
|
||||
OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
auto locks = MaybeLockMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
|
||||
Tensor var = ctx->mutable_input(0, use_exclusive_lock_);
|
||||
Tensor accum = ctx->mutable_input(1, use_exclusive_lock_);
|
||||
OP_REQUIRES(
|
||||
ctx, var.IsInitialized(),
|
||||
errors::FailedPrecondition(
|
||||
"Attempting to use uninitialized variables: ", def().input(0)));
|
||||
OP_REQUIRES(
|
||||
ctx, accum.IsInitialized(),
|
||||
errors::FailedPrecondition(
|
||||
"Attempting to use uninitialized variables: ", def().input(1)));
|
||||
OP_REQUIRES(
|
||||
ctx, var.shape().IsSameSize(accum.shape()),
|
||||
errors::InvalidArgument("var and accum do not have the same shape",
|
||||
var.shape().DebugString(), " ",
|
||||
accum.shape().DebugString()));
|
||||
const Tensor& lr = ctx->input(2);
|
||||
OP_REQUIRES(ctx, IsLegacyScalar(lr.shape()),
|
||||
errors::InvalidArgument("lr is not a scalar: ",
|
||||
lr.shape().DebugString()));
|
||||
const Tensor& l1 = ctx->input(3);
|
||||
OP_REQUIRES(
|
||||
ctx, TensorShapeUtils::IsScalar(l1.shape()),
|
||||
errors::InvalidArgument("l1 regularization strength is not a scalar: ",
|
||||
l1.shape().DebugString()));
|
||||
const Tensor& l2 = ctx->input(4);
|
||||
OP_REQUIRES(
|
||||
ctx, TensorShapeUtils::IsScalar(l2.shape()),
|
||||
errors::InvalidArgument("l2 regularization strength is not a scalar: ",
|
||||
l2.shape().DebugString()));
|
||||
|
||||
const Tensor& grad = ctx->input(5);
|
||||
OP_REQUIRES(
|
||||
ctx, var.shape().IsSameSize(grad.shape()),
|
||||
errors::InvalidArgument("var and grad do not have the same shape",
|
||||
var.shape().DebugString(), " ",
|
||||
grad.shape().DebugString()));
|
||||
|
||||
const Device& device = ctx->template eigen_device<Device>();
|
||||
functor::ApplyProximalAdagrad<Device, T>()(
|
||||
device, var.flat<T>(), accum.flat<T>(), lr.scalar<T>(), l1.scalar<T>(),
|
||||
l2.scalar<T>(), grad.flat<T>());
|
||||
|
||||
ctx->forward_ref_input_to_ref_output(0, 0);
|
||||
}
|
||||
|
||||
private:
|
||||
bool use_exclusive_lock_;
|
||||
};
|
||||
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
typedef Eigen::GpuDevice GPUDevice;
|
||||
|
||||
#define REGISTER_KERNELS(D, T) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ApplyProximalAdagrad").Device(DEVICE_##D).TypeConstraint<T>("T"), \
|
||||
ApplyProximalAdagradOp<D##Device, T>);
|
||||
|
||||
REGISTER_KERNELS(CPU, float);
|
||||
REGISTER_KERNELS(CPU, double);
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
namespace {
|
||||
template <class T>
|
||||
inline T sgn(const T x) {
|
||||
T zero(0);
|
||||
T one(1);
|
||||
return (x == zero ? zero : (x < zero ? -one : one));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T FtrlCompute(const T& accum, const T& linear, const T& lr, const T& l1,
|
||||
@ -730,9 +1066,167 @@ class SparseApplyAdagradOp : public OpKernel {
|
||||
.TypeConstraint<T>("T") \
|
||||
.TypeConstraint<Tindices>("Tindices"), \
|
||||
SparseApplyAdagradOp<T, Tindices>);
|
||||
#define REGISTER_CPU_KERNELS(T) \
|
||||
REGISTER_KERNELS(T, int32); \
|
||||
REGISTER_KERNELS(T, int64);
|
||||
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_double(REGISTER_CPU_KERNELS);
|
||||
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
// Note, this op works on cpu only.
|
||||
template <typename T, typename Tindex>
|
||||
class SparseApplyProximalAdagradOp : public OpKernel {
|
||||
public:
|
||||
explicit SparseApplyProximalAdagradOp(OpKernelConstruction* ctx)
|
||||
: OpKernel(ctx) {
|
||||
OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
|
||||
auto locks = MaybeLockMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
|
||||
Tensor var = ctx->mutable_input(0, use_exclusive_lock_);
|
||||
Tensor accum = ctx->mutable_input(1, use_exclusive_lock_);
|
||||
OP_REQUIRES(
|
||||
ctx, var.IsInitialized(),
|
||||
errors::FailedPrecondition(
|
||||
"Attempting to use uninitialized variables: ", def().input(0)));
|
||||
OP_REQUIRES(
|
||||
ctx, accum.IsInitialized(),
|
||||
errors::FailedPrecondition(
|
||||
"Attempting to use uninitialized variables: ", def().input(1)));
|
||||
OP_REQUIRES(
|
||||
ctx, var.shape().IsSameSize(accum.shape()),
|
||||
errors::InvalidArgument("var and accum do not have the same shape",
|
||||
var.shape().DebugString(), " ",
|
||||
accum.shape().DebugString()));
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsVectorOrHigher(var.shape()),
|
||||
errors::InvalidArgument("var must be at least 1 dimensional"));
|
||||
|
||||
const Tensor& lr = ctx->input(2);
|
||||
OP_REQUIRES(ctx, IsLegacyScalar(lr.shape()),
|
||||
errors::InvalidArgument("lr is not a scalar: ",
|
||||
lr.shape().DebugString()));
|
||||
const Tensor& l1 = ctx->input(3);
|
||||
OP_REQUIRES(
|
||||
ctx, TensorShapeUtils::IsScalar(l1.shape()),
|
||||
errors::InvalidArgument("l1 regularization strength is not a scalar: ",
|
||||
l1.shape().DebugString()));
|
||||
const Tensor& l2 = ctx->input(4);
|
||||
OP_REQUIRES(
|
||||
ctx, TensorShapeUtils::IsScalar(l2.shape()),
|
||||
errors::InvalidArgument("l2 regularization strength is not a scalar: ",
|
||||
l2.shape().DebugString()));
|
||||
|
||||
const Tensor& grad = ctx->input(5);
|
||||
const Tensor& indices = ctx->input(6);
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsVector(indices.shape()),
|
||||
errors::InvalidArgument("indices must be one-dimensional"));
|
||||
|
||||
int64 inner_dim = 1;
|
||||
for (int d = 1; d < var.dims(); d++) {
|
||||
OP_REQUIRES(ctx, var.dim_size(d) == grad.dim_size(d),
|
||||
errors::InvalidArgument(strings::StrCat(
|
||||
"var and grad must match in dimension ", d)));
|
||||
inner_dim *= grad.dim_size(d);
|
||||
}
|
||||
const Tindex N = indices.dim_size(0);
|
||||
OP_REQUIRES(
|
||||
ctx, grad.dim_size(0) == N,
|
||||
errors::InvalidArgument(
|
||||
"grad must be the same size as indices in the first dimension."));
|
||||
|
||||
if (N > 0) {
|
||||
if (inner_dim > 1) {
|
||||
const Tindex first_dim_size = var.dim_size(0);
|
||||
auto indices_vec = indices.vec<Tindex>();
|
||||
auto var_flat = var.flat_outer_dims<T>();
|
||||
auto accum_flat = accum.flat_outer_dims<T>();
|
||||
auto grad_flat = grad.flat_outer_dims<T>();
|
||||
T lr_scalar = lr.scalar<T>()();
|
||||
T l1_scalar = l1.scalar<T>()();
|
||||
T l2_scalar = l2.scalar<T>()();
|
||||
|
||||
for (Tindex i = 0; i < N; i++) {
|
||||
const Tindex index = internal::SubtleMustCopy(indices_vec(i));
|
||||
OP_REQUIRES(ctx, FastBoundsCheck(index, first_dim_size),
|
||||
errors::InvalidArgument(
|
||||
strings::StrCat("Index ", index, " at offset ", i,
|
||||
" in indices is out of range")));
|
||||
auto a = accum_flat.template chip<0>(index);
|
||||
auto g = grad_flat.template chip<0>(i);
|
||||
auto v = var_flat.template chip<0>(index);
|
||||
a += g.square();
|
||||
// compute learning_rate for current step.
|
||||
auto learning_rate = a.constant(lr_scalar) * a.rsqrt();
|
||||
auto prox_v = v;
|
||||
// v = w - g * learning_rate.
|
||||
prox_v -= g * learning_rate;
|
||||
if (l1_scalar > 0) {
|
||||
v = prox_v.abs() - learning_rate * prox_v.constant(l1_scalar);
|
||||
// compute sign(v) * max(|v|, 0)
|
||||
v = prox_v.sign() * v.cwiseMax(static_cast<T>(0.0));
|
||||
} else {
|
||||
v = prox_v;
|
||||
}
|
||||
if (l2_scalar > 0) {
|
||||
v /= (v.constant(1.0) + v.constant(l2_scalar) * learning_rate);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
CHECK_EQ(1, inner_dim);
|
||||
auto indices_vec = indices.vec<Tindex>();
|
||||
auto var_flat = var.flat<T>();
|
||||
auto accum_flat = accum.flat<T>();
|
||||
auto grad_flat = grad.flat<T>();
|
||||
T lr_scalar = lr.scalar<T>()();
|
||||
T l1_scalar = l1.scalar<T>()();
|
||||
T l2_scalar = l2.scalar<T>()();
|
||||
const Tindex first_dim_size = accum_flat.size();
|
||||
|
||||
for (Tindex i = 0; i < N; i++) {
|
||||
const Tindex index = internal::SubtleMustCopy(indices_vec(i));
|
||||
OP_REQUIRES(ctx, FastBoundsCheck(index, first_dim_size),
|
||||
errors::InvalidArgument(
|
||||
strings::StrCat("Index ", index, " at offset ", i,
|
||||
" in indices is out of range")));
|
||||
T& a = accum_flat(index);
|
||||
const T& g = grad_flat(i);
|
||||
a += g * g;
|
||||
auto learning_rate = lr_scalar / std::sqrt(a);
|
||||
auto prox_v = var_flat(index);
|
||||
prox_v -= learning_rate * g;
|
||||
if (l1_scalar > 0) {
|
||||
var_flat(index) = std::abs(prox_v) - learning_rate * l1_scalar;
|
||||
var_flat(index) =
|
||||
sgn(prox_v) * std::max(var_flat(index), static_cast<T>(0.0));
|
||||
} else {
|
||||
var_flat(index) = prox_v;
|
||||
}
|
||||
if (l2_scalar > 0) {
|
||||
var_flat(index) /= (1.0 + l2_scalar * learning_rate);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ctx->forward_ref_input_to_ref_output(0, 0);
|
||||
}
|
||||
|
||||
private:
|
||||
bool use_exclusive_lock_;
|
||||
};
|
||||
|
||||
#define REGISTER_KERNELS(T, Tindices) \
|
||||
REGISTER_KERNEL_BUILDER(Name("SparseApplyProximalAdagrad") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<T>("T") \
|
||||
.TypeConstraint<Tindices>("Tindices"), \
|
||||
SparseApplyProximalAdagradOp<T, Tindices>);
|
||||
|
||||
REGISTER_KERNELS(Eigen::half, int32);
|
||||
REGISTER_KERNELS(Eigen::half, int64);
|
||||
REGISTER_KERNELS(float, int32);
|
||||
REGISTER_KERNELS(float, int64);
|
||||
REGISTER_KERNELS(double, int32);
|
||||
@ -822,10 +1316,13 @@ typedef Eigen::GpuDevice GPUDevice;
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ApplyFtrl").Device(DEVICE_##D).TypeConstraint<T>("T"), \
|
||||
ApplyFtrlOp<D##Device, T>);
|
||||
#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T);
|
||||
|
||||
REGISTER_KERNELS(CPU, Eigen::half);
|
||||
REGISTER_KERNELS(CPU, float);
|
||||
REGISTER_KERNELS(CPU, double);
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_double(REGISTER_CPU_KERNELS);
|
||||
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
// Note, this op works on cpu only.
|
||||
@ -1002,13 +1499,15 @@ class SparseApplyFtrlOp : public OpKernel {
|
||||
.TypeConstraint<T>("T") \
|
||||
.TypeConstraint<Tindices>("Tindices"), \
|
||||
SparseApplyFtrlOp<CPUDevice, T, Tindices>);
|
||||
#define REGISTER_CPU_KERNELS(T) \
|
||||
REGISTER_KERNELS(T, int32); \
|
||||
REGISTER_KERNELS(T, int64);
|
||||
|
||||
REGISTER_KERNELS(Eigen::half, int32);
|
||||
REGISTER_KERNELS(Eigen::half, int64);
|
||||
REGISTER_KERNELS(float, int32);
|
||||
REGISTER_KERNELS(float, int64);
|
||||
REGISTER_KERNELS(double, int32);
|
||||
REGISTER_KERNELS(double, int64);
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_double(REGISTER_CPU_KERNELS);
|
||||
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
template <typename Device, typename T>
|
||||
@ -1070,10 +1569,11 @@ typedef Eigen::GpuDevice GPUDevice;
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ApplyMomentum").Device(DEVICE_##D).TypeConstraint<T>("T"), \
|
||||
ApplyMomentumOp<D##Device, T>);
|
||||
#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T);
|
||||
|
||||
REGISTER_KERNELS(CPU, Eigen::half);
|
||||
REGISTER_KERNELS(CPU, float);
|
||||
REGISTER_KERNELS(CPU, double);
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_double(REGISTER_CPU_KERNELS);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// Forward declarations of the functor specializations for GPU.
|
||||
@ -1096,6 +1596,7 @@ REGISTER_KERNELS(GPU, Eigen::half);
|
||||
REGISTER_KERNELS(GPU, float);
|
||||
REGISTER_KERNELS(GPU, double);
|
||||
#endif
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
// Note, this op works on cpu only.
|
||||
@ -1129,7 +1630,7 @@ class SparseApplyMomentumOp : public OpKernel {
|
||||
|
||||
const Tensor& lr = ctx->input(2);
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr.shape()),
|
||||
errors::InvalidArgument("lr is not a scalar: ",
|
||||
errors::InvalidArgument("lr is not a scalar : ",
|
||||
lr.shape().DebugString()));
|
||||
const Tensor& grad = ctx->input(3);
|
||||
const Tensor& indices = ctx->input(4);
|
||||
@ -1188,13 +1689,15 @@ class SparseApplyMomentumOp : public OpKernel {
|
||||
.TypeConstraint<T>("T") \
|
||||
.TypeConstraint<Tindices>("Tindices"), \
|
||||
SparseApplyMomentumOp<T, Tindices>);
|
||||
#define REGISTER_CPU_KERNELS(T) \
|
||||
REGISTER_KERNELS(T, int32); \
|
||||
REGISTER_KERNELS(T, int64);
|
||||
|
||||
REGISTER_KERNELS(Eigen::half, int32);
|
||||
REGISTER_KERNELS(Eigen::half, int64);
|
||||
REGISTER_KERNELS(float, int32);
|
||||
REGISTER_KERNELS(float, int64);
|
||||
REGISTER_KERNELS(double, int32);
|
||||
REGISTER_KERNELS(double, int64);
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_double(REGISTER_CPU_KERNELS);
|
||||
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
template <typename Device, typename T>
|
||||
@ -1237,7 +1740,7 @@ class ApplyAdamOp : public OpKernel {
|
||||
errors::InvalidArgument("beta2_power is not a scalar: ",
|
||||
beta2_power.shape().DebugString()));
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr.shape()),
|
||||
errors::InvalidArgument("lr is not a scalar: ",
|
||||
errors::InvalidArgument("lr is not a scalar : ",
|
||||
lr.shape().DebugString()));
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta1.shape()),
|
||||
errors::InvalidArgument("beta1 is not a scalar: ",
|
||||
@ -1285,10 +1788,11 @@ typedef Eigen::GpuDevice GPUDevice;
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ApplyAdam").Device(DEVICE_##D).TypeConstraint<T>("T"), \
|
||||
ApplyAdamOp<D##Device, T>);
|
||||
#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T);
|
||||
|
||||
REGISTER_KERNELS(CPU, Eigen::half);
|
||||
REGISTER_KERNELS(CPU, float);
|
||||
REGISTER_KERNELS(CPU, double);
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_double(REGISTER_CPU_KERNELS);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// Forward declarations of the functor specializations for GPU.
|
||||
@ -1316,6 +1820,7 @@ REGISTER_KERNELS(GPU, Eigen::half);
|
||||
REGISTER_KERNELS(GPU, float);
|
||||
REGISTER_KERNELS(GPU, double);
|
||||
#endif
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
template <typename Device, typename T>
|
||||
@ -1352,7 +1857,7 @@ class ApplyRMSPropOp : public OpKernel {
|
||||
const Tensor& grad = ctx->input(7);
|
||||
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr.shape()),
|
||||
errors::InvalidArgument("lr is not a scalar: ",
|
||||
errors::InvalidArgument("lr is not a scalar : ",
|
||||
lr.shape().DebugString()));
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(rho.shape()),
|
||||
errors::InvalidArgument("rho is not a scalar: ",
|
||||
@ -1400,10 +1905,11 @@ typedef Eigen::GpuDevice GPUDevice;
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("ApplyRMSProp").Device(DEVICE_##D).TypeConstraint<T>("T"), \
|
||||
ApplyRMSPropOp<D##Device, T>);
|
||||
#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T);
|
||||
|
||||
REGISTER_KERNELS(CPU, Eigen::half);
|
||||
REGISTER_KERNELS(CPU, float);
|
||||
REGISTER_KERNELS(CPU, double);
|
||||
TF_CALL_half(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_float(REGISTER_CPU_KERNELS);
|
||||
TF_CALL_double(REGISTER_CPU_KERNELS);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// Forward declarations of the functor specializations for GPU.
|
||||
@ -1428,6 +1934,7 @@ REGISTER_KERNELS(GPU, Eigen::half);
|
||||
REGISTER_KERNELS(GPU, float);
|
||||
REGISTER_KERNELS(GPU, double);
|
||||
#endif
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
#undef REGISTER_KERNELS
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -44,6 +44,24 @@ struct ApplyAdadelta {
|
||||
typename TTypes<T>::ConstFlat grad);
|
||||
};
|
||||
|
||||
template <typename Device, typename T>
|
||||
struct FobosElasticNet {
|
||||
void operator()(const Device& d, typename TTypes<T>::Flat var,
|
||||
typename TTypes<T>::ConstScalar lr,
|
||||
typename TTypes<T>::ConstScalar l1,
|
||||
typename TTypes<T>::ConstScalar l2,
|
||||
typename TTypes<T>::ConstFlat grad);
|
||||
};
|
||||
|
||||
template <typename Device, typename T>
|
||||
struct ApplyProximalGradientDescent {
|
||||
void operator()(const Device& d, typename TTypes<T>::Flat var,
|
||||
typename TTypes<T>::ConstScalar lr,
|
||||
typename TTypes<T>::ConstScalar l1,
|
||||
typename TTypes<T>::ConstScalar l2,
|
||||
typename TTypes<T>::ConstFlat grad);
|
||||
};
|
||||
|
||||
template <typename Device, typename T>
|
||||
struct ApplyAdagrad {
|
||||
void operator()(const Device& d, typename TTypes<T>::Flat var,
|
||||
@ -52,6 +70,16 @@ struct ApplyAdagrad {
|
||||
typename TTypes<T>::ConstFlat grad);
|
||||
};
|
||||
|
||||
template <typename Device, typename T>
|
||||
struct ApplyProximalAdagrad {
|
||||
void operator()(const Device& d, typename TTypes<T>::Flat var,
|
||||
typename TTypes<T>::Flat accum,
|
||||
typename TTypes<T>::ConstScalar lr,
|
||||
typename TTypes<T>::ConstScalar l1,
|
||||
typename TTypes<T>::ConstScalar l2,
|
||||
typename TTypes<T>::ConstFlat grad);
|
||||
};
|
||||
|
||||
template <typename Device, typename T>
|
||||
struct ApplyFtrl {
|
||||
void operator()(const Device& d, typename TTypes<T>::Flat var,
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/xent_op.h"
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/framework/tensor_shape.h"
|
||||
|
||||
@ -86,18 +87,14 @@ struct XentFunctor<CPUDevice, T> {
|
||||
};
|
||||
} // namespace functor
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<Eigen::half>("T"),
|
||||
SoftmaxXentWithLogitsOp<CPUDevice, Eigen::half>);
|
||||
REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<float>("T"),
|
||||
SoftmaxXentWithLogitsOp<CPUDevice, float>);
|
||||
REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<double>("T"),
|
||||
SoftmaxXentWithLogitsOp<CPUDevice, double>);
|
||||
#define REGISTER_CPU(T) \
|
||||
REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<T>("T"), \
|
||||
SoftmaxXentWithLogitsOp<CPUDevice, T>);
|
||||
TF_CALL_half(REGISTER_CPU);
|
||||
TF_CALL_float(REGISTER_CPU);
|
||||
TF_CALL_double(REGISTER_CPU);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits")
|
||||
|
@ -1825,6 +1825,127 @@ op {
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "ApplyProximalAdagrad"
|
||||
input_arg {
|
||||
name: "var"
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "accum"
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "lr"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l1"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l2"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "grad"
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "out"
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_DOUBLE
|
||||
type: DT_INT64
|
||||
type: DT_INT32
|
||||
type: DT_UINT8
|
||||
type: DT_UINT16
|
||||
type: DT_INT16
|
||||
type: DT_INT8
|
||||
type: DT_COMPLEX64
|
||||
type: DT_COMPLEX128
|
||||
type: DT_QINT8
|
||||
type: DT_QUINT8
|
||||
type: DT_QINT32
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "use_locking"
|
||||
type: "bool"
|
||||
default_value {
|
||||
b: false
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "ApplyProximalGradientDescent"
|
||||
input_arg {
|
||||
name: "var"
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "alpha"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l1"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l2"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "delta"
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "out"
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_DOUBLE
|
||||
type: DT_INT64
|
||||
type: DT_INT32
|
||||
type: DT_UINT8
|
||||
type: DT_UINT16
|
||||
type: DT_INT16
|
||||
type: DT_INT8
|
||||
type: DT_COMPLEX64
|
||||
type: DT_COMPLEX128
|
||||
type: DT_QINT8
|
||||
type: DT_QUINT8
|
||||
type: DT_QINT32
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "use_locking"
|
||||
type: "bool"
|
||||
default_value {
|
||||
b: false
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "ApplyRMSProp"
|
||||
input_arg {
|
||||
@ -3011,6 +3132,63 @@ op {
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "AvgPool"
|
||||
input_arg {
|
||||
name: "value"
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "output"
|
||||
type_attr: "T"
|
||||
}
|
||||
attr {
|
||||
name: "ksize"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "strides"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "padding"
|
||||
type: "string"
|
||||
allowed_values {
|
||||
list {
|
||||
s: "SAME"
|
||||
s: "VALID"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "data_format"
|
||||
type: "string"
|
||||
default_value {
|
||||
s: "NHWC"
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
s: "NHWC"
|
||||
s: "NCHW"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
type: DT_DOUBLE
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "AvgPool3D"
|
||||
input_arg {
|
||||
@ -3232,6 +3410,67 @@ op {
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "AvgPoolGrad"
|
||||
input_arg {
|
||||
name: "orig_input_shape"
|
||||
type: DT_INT32
|
||||
}
|
||||
input_arg {
|
||||
name: "grad"
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "output"
|
||||
type_attr: "T"
|
||||
}
|
||||
attr {
|
||||
name: "ksize"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "strides"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "padding"
|
||||
type: "string"
|
||||
allowed_values {
|
||||
list {
|
||||
s: "SAME"
|
||||
s: "VALID"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "data_format"
|
||||
type: "string"
|
||||
default_value {
|
||||
s: "NHWC"
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
s: "NHWC"
|
||||
s: "NCHW"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
type: DT_DOUBLE
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "BatchCholesky"
|
||||
input_arg {
|
||||
@ -11801,6 +12040,124 @@ op {
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "MaxPool"
|
||||
input_arg {
|
||||
name: "input"
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "output"
|
||||
type_attr: "T"
|
||||
}
|
||||
attr {
|
||||
name: "ksize"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "strides"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "padding"
|
||||
type: "string"
|
||||
allowed_values {
|
||||
list {
|
||||
s: "SAME"
|
||||
s: "VALID"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "data_format"
|
||||
type: "string"
|
||||
default_value {
|
||||
s: "NHWC"
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
s: "NHWC"
|
||||
s: "NCHW"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "MaxPool"
|
||||
input_arg {
|
||||
name: "input"
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "output"
|
||||
type_attr: "T"
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "ksize"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "strides"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "padding"
|
||||
type: "string"
|
||||
allowed_values {
|
||||
list {
|
||||
s: "SAME"
|
||||
s: "VALID"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "data_format"
|
||||
type: "string"
|
||||
default_value {
|
||||
s: "NHWC"
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
s: "NHWC"
|
||||
s: "NCHW"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "MaxPool3D"
|
||||
input_arg {
|
||||
@ -12014,6 +12371,73 @@ op {
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "MaxPoolGrad"
|
||||
input_arg {
|
||||
name: "orig_input"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "orig_output"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "grad"
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "output"
|
||||
type_attr: "T"
|
||||
}
|
||||
attr {
|
||||
name: "ksize"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "strides"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "padding"
|
||||
type: "string"
|
||||
allowed_values {
|
||||
list {
|
||||
s: "SAME"
|
||||
s: "VALID"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "data_format"
|
||||
type: "string"
|
||||
default_value {
|
||||
s: "NHWC"
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
s: "NHWC"
|
||||
s: "NCHW"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "MaxPoolGradWithArgmax"
|
||||
input_arg {
|
||||
@ -12065,6 +12489,70 @@ op {
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "MaxPoolGradWithArgmax"
|
||||
input_arg {
|
||||
name: "input"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "grad"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "argmax"
|
||||
type_attr: "Targmax"
|
||||
}
|
||||
output_arg {
|
||||
name: "output"
|
||||
type_attr: "T"
|
||||
}
|
||||
attr {
|
||||
name: "ksize"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "strides"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "padding"
|
||||
type: "string"
|
||||
allowed_values {
|
||||
list {
|
||||
s: "SAME"
|
||||
s: "VALID"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "Targmax"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_INT32
|
||||
type: DT_INT64
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "MaxPoolWithArgmax"
|
||||
input_arg {
|
||||
@ -12115,6 +12603,69 @@ op {
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "MaxPoolWithArgmax"
|
||||
input_arg {
|
||||
name: "input"
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "output"
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "argmax"
|
||||
type_attr: "Targmax"
|
||||
}
|
||||
attr {
|
||||
name: "ksize"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "strides"
|
||||
type: "list(int)"
|
||||
has_minimum: true
|
||||
minimum: 4
|
||||
}
|
||||
attr {
|
||||
name: "Targmax"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_INT64
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_INT32
|
||||
type: DT_INT64
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "padding"
|
||||
type: "string"
|
||||
allowed_values {
|
||||
list {
|
||||
s: "SAME"
|
||||
s: "VALID"
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "Maximum"
|
||||
input_arg {
|
||||
@ -20499,6 +21050,155 @@ op {
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "SparseApplyProximalAdagrad"
|
||||
input_arg {
|
||||
name: "var"
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "accum"
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "lr"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l1"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l2"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "grad"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "indices"
|
||||
type_attr: "Tindices"
|
||||
}
|
||||
output_arg {
|
||||
name: "out"
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_DOUBLE
|
||||
type: DT_INT64
|
||||
type: DT_INT32
|
||||
type: DT_UINT8
|
||||
type: DT_UINT16
|
||||
type: DT_INT16
|
||||
type: DT_INT8
|
||||
type: DT_COMPLEX64
|
||||
type: DT_COMPLEX128
|
||||
type: DT_QINT8
|
||||
type: DT_QUINT8
|
||||
type: DT_QINT32
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "Tindices"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_INT32
|
||||
type: DT_INT64
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "use_locking"
|
||||
type: "bool"
|
||||
default_value {
|
||||
b: false
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "SparseApplyProximalGradientDescent"
|
||||
input_arg {
|
||||
name: "var"
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "alpha"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l1"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l2"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "grad"
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "indices"
|
||||
type_attr: "Tindices"
|
||||
}
|
||||
output_arg {
|
||||
name: "out"
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_DOUBLE
|
||||
type: DT_INT64
|
||||
type: DT_INT32
|
||||
type: DT_UINT8
|
||||
type: DT_UINT16
|
||||
type: DT_INT16
|
||||
type: DT_INT8
|
||||
type: DT_COMPLEX64
|
||||
type: DT_COMPLEX128
|
||||
type: DT_QINT8
|
||||
type: DT_QUINT8
|
||||
type: DT_QINT32
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "Tindices"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_INT32
|
||||
type: DT_INT64
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "use_locking"
|
||||
type: "bool"
|
||||
default_value {
|
||||
b: false
|
||||
}
|
||||
}
|
||||
}
|
||||
op {
|
||||
name: "SparseConcat"
|
||||
input_arg {
|
||||
|
@ -154,22 +154,25 @@ Status MaxPoolGrad(const AttrSlice& attrs, FunctionDef* g) {
|
||||
// clang-format off
|
||||
*g = FDH::Define(
|
||||
// Arg defs
|
||||
{"input: float", "grad: float"},
|
||||
{"input: T", "grad: T"},
|
||||
// Ret val defs
|
||||
{"output: float"},
|
||||
{"output: T"},
|
||||
// Attr defs
|
||||
{"ksize: list(int) >= 4",
|
||||
{"T: {float, half} = DT_FLOAT",
|
||||
"ksize: list(int) >= 4",
|
||||
"strides: list(int) >= 4",
|
||||
GetPaddingAttrString()},
|
||||
// Nodes
|
||||
{
|
||||
// Invoke MaxPool again to recompute the outputs (removed by CSE?).
|
||||
{{"maxpool"}, "MaxPool", {"input"},
|
||||
/*Attrs=*/{{"ksize", "$ksize"},
|
||||
/*Attrs=*/{{"T", "$T"},
|
||||
{"ksize", "$ksize"},
|
||||
{"strides", "$strides"},
|
||||
{"padding", "$padding"}}},
|
||||
{{"output"}, "MaxPoolGrad", {"input", "maxpool", "grad"},
|
||||
/*Attrs=*/{{"ksize", "$ksize"},
|
||||
/*Attrs=*/{{"T", "$T"},
|
||||
{"ksize", "$ksize"},
|
||||
{"strides", "$strides"},
|
||||
{"padding", "$padding"}}}
|
||||
});
|
||||
|
@ -28,7 +28,7 @@ REGISTER_OP("AvgPool")
|
||||
.Attr("strides: list(int) >= 4")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.Attr(GetConvnetDataFormatAttrString())
|
||||
.Attr("T: {float, double}")
|
||||
.Attr("T: {float, half, double}")
|
||||
.Doc(R"doc(
|
||||
Performs average pooling on the input.
|
||||
|
||||
@ -55,7 +55,7 @@ REGISTER_OP("AvgPoolGrad")
|
||||
.Attr("strides: list(int) >= 4")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.Attr(GetConvnetDataFormatAttrString())
|
||||
.Attr("T: {float, double}")
|
||||
.Attr("T: {float, half, double}")
|
||||
.Doc(R"doc(
|
||||
Computes gradients of the average pooling function.
|
||||
|
||||
@ -642,12 +642,13 @@ output: The gradients for LRN.
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
REGISTER_OP("MaxPool")
|
||||
.Attr("T: {float, half} = DT_FLOAT")
|
||||
.Attr("ksize: list(int) >= 4")
|
||||
.Attr("strides: list(int) >= 4")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.Attr(GetConvnetDataFormatAttrString())
|
||||
.Input("input: float")
|
||||
.Output("output: float")
|
||||
.Input("input: T")
|
||||
.Output("output: T")
|
||||
.Doc(R"doc(
|
||||
Performs max pooling on the input.
|
||||
|
||||
@ -669,10 +670,11 @@ REGISTER_OP("MaxPoolGrad")
|
||||
.Attr("strides: list(int) >= 4")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.Attr(GetConvnetDataFormatAttrString())
|
||||
.Input("orig_input: float")
|
||||
.Input("orig_output: float")
|
||||
.Input("grad: float")
|
||||
.Output("output: float")
|
||||
.Input("orig_input: T")
|
||||
.Input("orig_output: T")
|
||||
.Input("grad: T")
|
||||
.Output("output: T")
|
||||
.Attr("T: {float, half} = DT_FLOAT")
|
||||
.Doc(R"doc(
|
||||
Computes gradients of the maxpooling function.
|
||||
|
||||
@ -696,9 +698,10 @@ REGISTER_OP("MaxPoolWithArgmax")
|
||||
.Attr("strides: list(int) >= 4")
|
||||
.Attr("Targmax: {int32, int64} = DT_INT64")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.Input("input: float")
|
||||
.Output("output: float")
|
||||
.Input("input: T")
|
||||
.Output("output: T")
|
||||
.Output("argmax: Targmax")
|
||||
.Attr("T: {float, half} = DT_FLOAT")
|
||||
.Doc(R"doc(
|
||||
Performs max pooling on the input and outputs both max values and indices.
|
||||
|
||||
@ -720,10 +723,11 @@ REGISTER_OP("MaxPoolGradWithArgmax")
|
||||
.Attr("strides: list(int) >= 4")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.Attr("Targmax: {int32, int64}")
|
||||
.Input("input: float")
|
||||
.Input("grad: float")
|
||||
.Input("input: T")
|
||||
.Input("grad: T")
|
||||
.Input("argmax: Targmax")
|
||||
.Output("output: float")
|
||||
.Output("output: T")
|
||||
.Attr("T: {float, half} = DT_FLOAT")
|
||||
.Doc(R"doc(
|
||||
Computes gradients of the maxpooling function.
|
||||
|
||||
|
@ -591,12 +591,12 @@ op {
|
||||
}
|
||||
input_arg {
|
||||
name: "l1"
|
||||
description: "Scaling factor. Must be a scalar."
|
||||
description: "L1 regulariation. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l2"
|
||||
description: "Scaling factor. Must be a scalar."
|
||||
description: "L2 regulariation. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
@ -767,6 +767,146 @@ op {
|
||||
summary: "Update \'*var\' according to the momentum scheme."
|
||||
description: "accum = accum * momentum + grad\nvar -= lr * accum"
|
||||
}
|
||||
op {
|
||||
name: "ApplyProximalAdagrad"
|
||||
input_arg {
|
||||
name: "var"
|
||||
description: "Should be from a Variable()."
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "accum"
|
||||
description: "Should be from a Variable()."
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "lr"
|
||||
description: "Scaling factor. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l1"
|
||||
description: "L1 regularization. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l2"
|
||||
description: "L2 regularization. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "grad"
|
||||
description: "The gradient."
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "out"
|
||||
description: "Same as \"var\"."
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_DOUBLE
|
||||
type: DT_INT64
|
||||
type: DT_INT32
|
||||
type: DT_UINT8
|
||||
type: DT_UINT16
|
||||
type: DT_INT16
|
||||
type: DT_INT8
|
||||
type: DT_COMPLEX64
|
||||
type: DT_COMPLEX128
|
||||
type: DT_QINT8
|
||||
type: DT_QUINT8
|
||||
type: DT_QINT32
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "use_locking"
|
||||
type: "bool"
|
||||
default_value {
|
||||
b: false
|
||||
}
|
||||
description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
|
||||
}
|
||||
summary: "Update \'*var\' and \'*accum\' according to FOBOS with Adagrad learning rate."
|
||||
description: "accum += grad * grad\nprox_v = var - lr * grad * (1 / sqrt(accum))\nvar = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}"
|
||||
}
|
||||
op {
|
||||
name: "ApplyProximalGradientDescent"
|
||||
input_arg {
|
||||
name: "var"
|
||||
description: "Should be from a Variable()."
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "alpha"
|
||||
description: "Scaling factor. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l1"
|
||||
description: "L1 regularization. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l2"
|
||||
description: "L2 regularization. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "delta"
|
||||
description: "The change."
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "out"
|
||||
description: "Same as \"var\"."
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_DOUBLE
|
||||
type: DT_INT64
|
||||
type: DT_INT32
|
||||
type: DT_UINT8
|
||||
type: DT_UINT16
|
||||
type: DT_INT16
|
||||
type: DT_INT8
|
||||
type: DT_COMPLEX64
|
||||
type: DT_COMPLEX128
|
||||
type: DT_QINT8
|
||||
type: DT_QUINT8
|
||||
type: DT_QINT32
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "use_locking"
|
||||
type: "bool"
|
||||
default_value {
|
||||
b: false
|
||||
}
|
||||
description: "If True, the subtraction will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
|
||||
}
|
||||
summary: "Update \'*var\' as FOBOS algorithm with fixed learning rate."
|
||||
description: "prox_v = var - alpha * delta\nvar = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}"
|
||||
}
|
||||
op {
|
||||
name: "ApplyRMSProp"
|
||||
input_arg {
|
||||
@ -1251,6 +1391,7 @@ op {
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
type: DT_DOUBLE
|
||||
}
|
||||
}
|
||||
@ -1447,6 +1588,7 @@ op {
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
type: DT_DOUBLE
|
||||
}
|
||||
}
|
||||
@ -6614,12 +6756,25 @@ op {
|
||||
input_arg {
|
||||
name: "input"
|
||||
description: "4-D input to pool over."
|
||||
type: DT_FLOAT
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "output"
|
||||
description: "The max pooled output tensor."
|
||||
type: DT_FLOAT
|
||||
type_attr: "T"
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "ksize"
|
||||
@ -6798,22 +6953,22 @@ op {
|
||||
input_arg {
|
||||
name: "orig_input"
|
||||
description: "The original input tensor."
|
||||
type: DT_FLOAT
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "orig_output"
|
||||
description: "The original output tensor."
|
||||
type: DT_FLOAT
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "grad"
|
||||
description: "4-D. Gradients w.r.t. the output of `max_pool`."
|
||||
type: DT_FLOAT
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "output"
|
||||
description: "Gradients w.r.t. the input to `max_pool`."
|
||||
type: DT_FLOAT
|
||||
type_attr: "T"
|
||||
}
|
||||
attr {
|
||||
name: "ksize"
|
||||
@ -6854,6 +7009,19 @@ op {
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
summary: "Computes gradients of the maxpooling function."
|
||||
}
|
||||
op {
|
||||
@ -6861,12 +7029,12 @@ op {
|
||||
input_arg {
|
||||
name: "input"
|
||||
description: "The original input."
|
||||
type: DT_FLOAT
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "grad"
|
||||
description: "4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the\noutput of `max_pool`."
|
||||
type: DT_FLOAT
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "argmax"
|
||||
@ -6876,7 +7044,7 @@ op {
|
||||
output_arg {
|
||||
name: "output"
|
||||
description: "Gradients w.r.t. the input of `max_pool`."
|
||||
type: DT_FLOAT
|
||||
type_attr: "T"
|
||||
}
|
||||
attr {
|
||||
name: "ksize"
|
||||
@ -6913,6 +7081,19 @@ op {
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
summary: "Computes gradients of the maxpooling function."
|
||||
}
|
||||
op {
|
||||
@ -6920,12 +7101,12 @@ op {
|
||||
input_arg {
|
||||
name: "input"
|
||||
description: "4-D with shape `[batch, height, width, channels]`. Input to pool over."
|
||||
type: DT_FLOAT
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "output"
|
||||
description: "The max pooled output tensor."
|
||||
type: DT_FLOAT
|
||||
type_attr: "T"
|
||||
}
|
||||
output_arg {
|
||||
name: "argmax"
|
||||
@ -6970,6 +7151,19 @@ op {
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
summary: "Performs max pooling on the input and outputs both max values and indices."
|
||||
description: "The indices in `argmax` are flattened, so that a maximum value at position\n`[b, y, x, c]` becomes flattened index\n`((b * height + y) * width + x) * channels + c`."
|
||||
}
|
||||
@ -11380,12 +11574,12 @@ op {
|
||||
}
|
||||
input_arg {
|
||||
name: "l1"
|
||||
description: "Scaling factor. Must be a scalar."
|
||||
description: "L1 regularization. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l2"
|
||||
description: "Scaling factor. Must be a scalar."
|
||||
description: "L2 regularization. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
@ -11525,6 +11719,176 @@ op {
|
||||
summary: "Update relevant entries in \'*var\' and \'*accum\' according to the momentum scheme."
|
||||
description: "That is for rows we have grad for, we update var and accum as follows:\n\naccum = accum * momentum + grad\nvar -= lr * accum"
|
||||
}
|
||||
op {
|
||||
name: "SparseApplyProximalAdagrad"
|
||||
input_arg {
|
||||
name: "var"
|
||||
description: "Should be from a Variable()."
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "accum"
|
||||
description: "Should be from a Variable()."
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "lr"
|
||||
description: "Learning rate. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l1"
|
||||
description: "L1 regularization. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l2"
|
||||
description: "L2 regularization. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "grad"
|
||||
description: "The gradient."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "indices"
|
||||
description: "A vector of indices into the first dimension of var and accum."
|
||||
type_attr: "Tindices"
|
||||
}
|
||||
output_arg {
|
||||
name: "out"
|
||||
description: "Same as \"var\"."
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_DOUBLE
|
||||
type: DT_INT64
|
||||
type: DT_INT32
|
||||
type: DT_UINT8
|
||||
type: DT_UINT16
|
||||
type: DT_INT16
|
||||
type: DT_INT8
|
||||
type: DT_COMPLEX64
|
||||
type: DT_COMPLEX128
|
||||
type: DT_QINT8
|
||||
type: DT_QUINT8
|
||||
type: DT_QINT32
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "Tindices"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_INT32
|
||||
type: DT_INT64
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "use_locking"
|
||||
type: "bool"
|
||||
default_value {
|
||||
b: false
|
||||
}
|
||||
description: "If True, updating of the var and accum tensors will be protected by\na lock; otherwise the behavior is undefined, but may exhibit less contention."
|
||||
}
|
||||
summary: "Sparse update entries in \'*var\' and \'*accum\' according to FOBOS algorithm."
|
||||
description: "That is for rows we have grad for, we update var and accum as follows:\naccum += grad * grad\nprox_v = var\nprox_v -= lr * grad * (1 / sqrt(accum))\nvar = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}"
|
||||
}
|
||||
op {
|
||||
name: "SparseApplyProximalGradientDescent"
|
||||
input_arg {
|
||||
name: "var"
|
||||
description: "Should be from a Variable()."
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
input_arg {
|
||||
name: "alpha"
|
||||
description: "Scaling factor. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l1"
|
||||
description: "L1 regularization. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "l2"
|
||||
description: "L2 regularization. Must be a scalar."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "grad"
|
||||
description: "The gradient."
|
||||
type_attr: "T"
|
||||
}
|
||||
input_arg {
|
||||
name: "indices"
|
||||
description: "A vector of indices into the first dimension of var and accum."
|
||||
type_attr: "Tindices"
|
||||
}
|
||||
output_arg {
|
||||
name: "out"
|
||||
description: "Same as \"var\"."
|
||||
type_attr: "T"
|
||||
is_ref: true
|
||||
}
|
||||
attr {
|
||||
name: "T"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_FLOAT
|
||||
type: DT_DOUBLE
|
||||
type: DT_INT64
|
||||
type: DT_INT32
|
||||
type: DT_UINT8
|
||||
type: DT_UINT16
|
||||
type: DT_INT16
|
||||
type: DT_INT8
|
||||
type: DT_COMPLEX64
|
||||
type: DT_COMPLEX128
|
||||
type: DT_QINT8
|
||||
type: DT_QUINT8
|
||||
type: DT_QINT32
|
||||
type: DT_HALF
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "Tindices"
|
||||
type: "type"
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_INT32
|
||||
type: DT_INT64
|
||||
}
|
||||
}
|
||||
}
|
||||
attr {
|
||||
name: "use_locking"
|
||||
type: "bool"
|
||||
default_value {
|
||||
b: false
|
||||
}
|
||||
description: "If True, the subtraction will be protected by a lock;\notherwise the behavior is undefined, but may exhibit less contention."
|
||||
}
|
||||
summary: "Sparse update \'*var\' as FOBOS algorithm with fixed learning rate."
|
||||
description: "That is for rows we have grad for, we update var as follows:\nprox_v = var - alpha * grad\nvar = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}"
|
||||
}
|
||||
op {
|
||||
name: "SparseConcat"
|
||||
input_arg {
|
||||
|
@ -35,6 +35,59 @@ use_locking: If `True`, the subtraction will be protected by a lock;
|
||||
otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ApplyProximalGradientDescent")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("alpha: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("delta: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.Doc(R"doc(
|
||||
Update '*var' as FOBOS algorithm with fixed learning rate.
|
||||
prox_v = var - alpha * delta
|
||||
var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
alpha: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
delta: The change.
|
||||
out: Same as "var".
|
||||
use_locking: If True, the subtraction will be protected by a lock;
|
||||
otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyProximalGradientDescent")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("alpha: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.Doc(R"doc(
|
||||
Sparse update '*var' as FOBOS algorithm with fixed learning rate.
|
||||
|
||||
That is for rows we have grad for, we update var as follows:
|
||||
prox_v = var - alpha * grad
|
||||
var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
alpha: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
out: Same as "var".
|
||||
use_locking: If True, the subtraction will be protected by a lock;
|
||||
otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ApplyAdadelta")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
@ -117,6 +170,33 @@ use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ApplyProximalAdagrad")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("grad: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.Doc(R"doc(
|
||||
Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
|
||||
accum += grad * grad
|
||||
prox_v = var - lr * grad * (1 / sqrt(accum))
|
||||
var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyAdagrad")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
@ -145,6 +225,39 @@ use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyProximalAdagrad")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.Doc(R"doc(
|
||||
Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
|
||||
|
||||
That is for rows we have grad for, we update var and accum as follows:
|
||||
accum += grad * grad
|
||||
prox_v = var
|
||||
prox_v -= lr * grad * (1 / sqrt(accum))
|
||||
var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
lr: Learning rate. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
out: Same as "var".
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ApplyFtrl")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
@ -171,8 +284,8 @@ accum: Should be from a Variable().
|
||||
linear: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: Scaling factor. Must be a scalar.
|
||||
l2: Scaling factor. Must be a scalar.
|
||||
l1: L1 regulariation. Must be a scalar.
|
||||
l2: L2 regulariation. Must be a scalar.
|
||||
lr_power: Scaling factor. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
@ -210,8 +323,8 @@ linear: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: Scaling factor. Must be a scalar.
|
||||
l2: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
lr_power: Scaling factor. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
|
@ -69,9 +69,9 @@ class Stat {
|
||||
: static_cast<HighPrecisionValueType>(sum_) / count_;
|
||||
}
|
||||
|
||||
ValueType rms() const { return sqrt(squared_sum_ / count_); }
|
||||
|
||||
ValueType std_deviation() const { return all_same() ? 0 : rms() - avg(); }
|
||||
ValueType std_deviation() const {
|
||||
return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg());
|
||||
}
|
||||
|
||||
void OutputToStream(std::ostream* stream) const {
|
||||
if (empty()) {
|
||||
|
@ -18,35 +18,38 @@ from __future__ import print_function
|
||||
from sklearn import datasets
|
||||
from sklearn import metrics
|
||||
from sklearn.cross_validation import train_test_split
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow.contrib import learn
|
||||
|
||||
|
||||
iris = datasets.load_iris()
|
||||
X_train, X_test, y_train, y_test = train_test_split(iris.data,
|
||||
iris.target,
|
||||
test_size=0.2,
|
||||
random_state=42)
|
||||
def main(unused_argv):
|
||||
iris = datasets.load_iris()
|
||||
x_train, x_test, y_train, y_test = train_test_split(
|
||||
iris.data, iris.target, test_size=0.2, random_state=42)
|
||||
|
||||
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train,
|
||||
test_size=0.2,
|
||||
random_state=42)
|
||||
val_monitor = learn.monitors.ValidationMonitor(X_val, y_val,
|
||||
early_stopping_rounds=200)
|
||||
x_train, x_val, y_train, y_val = train_test_split(
|
||||
x_train, y_train, test_size=0.2, random_state=42)
|
||||
val_monitor = learn.monitors.ValidationMonitor(
|
||||
x_val, y_val, early_stopping_rounds=200)
|
||||
|
||||
# classifier with early stopping on training data
|
||||
classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
|
||||
n_classes=3,
|
||||
model_dir='/tmp/iris_model/')
|
||||
classifier1.fit(X_train, y_train, steps=2000)
|
||||
score1 = metrics.accuracy_score(y_test, classifier1.predict(X_test))
|
||||
# classifier with early stopping on training data
|
||||
classifier1 = learn.TensorFlowDNNClassifier(
|
||||
hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model/')
|
||||
classifier1.fit(x=x_train, y=y_train, steps=2000)
|
||||
score1 = metrics.accuracy_score(y_test, classifier1.predict(x_test))
|
||||
|
||||
# classifier with early stopping on validation data
|
||||
classifier2 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
|
||||
n_classes=3,
|
||||
model_dir='/tmp/iris_model_val/')
|
||||
classifier2.fit(X_train, y_train, val_monitor, steps=2000)
|
||||
score2 = metrics.accuracy_score(y_test, classifier2.predict(X_test))
|
||||
# classifier with early stopping on validation data, save frequently for
|
||||
# monitor to pick up new checkpoints.
|
||||
classifier2 = learn.TensorFlowDNNClassifier(
|
||||
hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model_val/',
|
||||
config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
|
||||
classifier2.fit(x=x_train, y=y_train, steps=2000, monitors=[val_monitor])
|
||||
score2 = metrics.accuracy_score(y_test, classifier2.predict(x_test))
|
||||
|
||||
# In many applications, the score is improved by using early stopping
|
||||
print(score2 > score1)
|
||||
# In many applications, the score is improved by using early stopping
|
||||
print(score2 > score1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
||||
|
@ -164,7 +164,7 @@ def train():
|
||||
feed_dict=feed_dict(True),
|
||||
options=run_options,
|
||||
run_metadata=run_metadata)
|
||||
train_writer.add_run_metadata(run_metadata, 'step%d' % i)
|
||||
train_writer.add_run_metadata(run_metadata, 'step%03d' % i)
|
||||
train_writer.add_summary(summary, i)
|
||||
print('Adding run metadata for', i)
|
||||
else: # Record a summary
|
||||
|
@ -216,7 +216,7 @@ This operation returns a 1-D integer tensor representing the shape of `input`.
|
||||
|
||||
For example:
|
||||
|
||||
```prettyprint
|
||||
```python
|
||||
# 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
|
||||
shape(t) ==> [2, 2, 3]
|
||||
```
|
||||
@ -224,7 +224,7 @@ shape(t) ==> [2, 2, 3]
|
||||
##### Args:
|
||||
|
||||
|
||||
* <b>`input`</b>: A `Tensor`.
|
||||
* <b>`input`</b>: A `Tensor` or `SparseTensor`.
|
||||
* <b>`name`</b>: A name for the operation (optional).
|
||||
|
||||
##### Returns:
|
||||
|
@ -796,11 +796,11 @@ A classifier for TensorFlow DNN models.
|
||||
|
||||
def input_fn_eval: # returns x, Y
|
||||
pass
|
||||
estimator.evaluate(input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a `KeyError`:
|
||||
if `weight_column_name` is not `None`, a feature with
|
||||
`key=weight_column_name` whose value is a `Tensor`.
|
||||
@ -1140,11 +1140,11 @@ A regressor for TensorFlow DNN models.
|
||||
|
||||
def input_fn_eval: # returns x, Y
|
||||
pass
|
||||
estimator.evaluate(input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a `KeyError`:
|
||||
if `weight_column_name` is not `None`, a feature with
|
||||
`key=weight_column_name` whose value is a `Tensor`.
|
||||
@ -2245,10 +2245,10 @@ Linear classifier model.
|
||||
...
|
||||
estimator.fit(input_fn=input_fn_train)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a `KeyError`:
|
||||
if `weight_column_name` is not `None`, a feature with
|
||||
`key=weight_column_name` whose value is a `Tensor`.
|
||||
@ -2579,10 +2579,10 @@ Linear regressor model.
|
||||
...
|
||||
estimator.fit(input_fn=input_fn_train)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a KeyError:
|
||||
if `weight_column_name` is not None:
|
||||
key=weight_column_name, value=a `Tensor`
|
||||
@ -4238,50 +4238,33 @@ Perform various training, evaluation, and inference actions on a graph.
|
||||
### `class tf.contrib.learn.RunConfig` {#RunConfig}
|
||||
|
||||
This class specifies the specific configurations for the run.
|
||||
|
||||
Parameters:
|
||||
execution_mode: Runners use this flag to execute different tasks, like
|
||||
training vs evaluation. 'all' (the default) executes both training and
|
||||
eval.
|
||||
master: TensorFlow master. Empty string (the default) for local.
|
||||
task: Task id of the replica running the training (default: 0).
|
||||
num_ps_replicas: Number of parameter server tasks to use (default: 0).
|
||||
training_worker_session_startup_stagger_secs: Seconds to sleep between the
|
||||
startup of each worker task session (default: 5).
|
||||
training_worker_max_startup_secs: Max seconds to wait before starting any
|
||||
worker (default: 60).
|
||||
eval_delay_secs: Number of seconds between the beginning of each eval run.
|
||||
If one run takes more than this amount of time, the next run will start
|
||||
immediately once that run completes (default 60).
|
||||
eval_steps: Number of steps to run in each eval (default: 100).
|
||||
num_cores: Number of cores to be used (default: 4).
|
||||
verbose: Controls the verbosity, possible values:
|
||||
0: the algorithm and debug information is muted.
|
||||
1: trainer prints the progress.
|
||||
2: log device placement is printed.
|
||||
gpu_memory_fraction: Fraction of GPU memory used by the process on
|
||||
each GPU uniformly on the same machine.
|
||||
tf_random_seed: Random seed for TensorFlow initializers.
|
||||
Setting this value allows consistency between reruns.
|
||||
keep_checkpoint_max: The maximum number of recent checkpoint files to keep.
|
||||
As new files are created, older files are deleted.
|
||||
If None or 0, all checkpoint files are kept.
|
||||
Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
|
||||
keep_checkpoint_every_n_hours: Number of hours between each checkpoint
|
||||
to be saved. The default value of 10,000 hours effectively disables
|
||||
the feature.
|
||||
|
||||
Attributes:
|
||||
tf_master: Tensorflow master.
|
||||
tf_config: Tensorflow Session Config proto.
|
||||
tf_random_seed: Tensorflow random seed.
|
||||
keep_checkpoint_max: Maximum number of checkpoints to keep.
|
||||
keep_checkpoint_every_n_hours: Number of hours between each checkpoint.
|
||||
- - -
|
||||
|
||||
#### `tf.contrib.learn.RunConfig.__init__(execution_mode='all', master='', task=0, num_ps_replicas=0, training_worker_session_startup_stagger_secs=5, training_worker_max_startup_secs=60, eval_delay_secs=60, eval_steps=100, num_cores=4, verbose=1, gpu_memory_fraction=1, tf_random_seed=42, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000)` {#RunConfig.__init__}
|
||||
#### `tf.contrib.learn.RunConfig.__init__(master='', task=0, num_ps_replicas=0, num_cores=4, log_device_placement=False, gpu_memory_fraction=1, tf_random_seed=42, save_summary_steps=100, save_checkpoints_secs=60, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000)` {#RunConfig.__init__}
|
||||
|
||||
Constructor.
|
||||
|
||||
##### Args:
|
||||
|
||||
|
||||
* <b>`master`</b>: TensorFlow master. Empty string (the default) for local.
|
||||
* <b>`task`</b>: Task id of the replica running the training (default: 0).
|
||||
* <b>`num_ps_replicas`</b>: Number of parameter server tasks to use (default: 0).
|
||||
* <b>`num_cores`</b>: Number of cores to be used (default: 4).
|
||||
* <b>`log_device_placement`</b>: Log the op placement to devices (default: False).
|
||||
* <b>`gpu_memory_fraction`</b>: Fraction of GPU memory used by the process on
|
||||
each GPU uniformly on the same machine.
|
||||
* <b>`tf_random_seed`</b>: Random seed for TensorFlow initializers.
|
||||
Setting this value allows consistency between reruns.
|
||||
* <b>`save_summary_steps`</b>: Save summaries every this many steps.
|
||||
* <b>`save_checkpoints_secs`</b>: Save checkpoints every this many seconds.
|
||||
* <b>`keep_checkpoint_max`</b>: The maximum number of recent checkpoint files to
|
||||
keep. As new files are created, older files are deleted. If None or 0,
|
||||
all checkpoint files are kept. Defaults to 5 (that is, the 5 most recent
|
||||
checkpoint files are kept.)
|
||||
* <b>`keep_checkpoint_every_n_hours`</b>: Number of hours between each checkpoint
|
||||
to be saved. The default value of 10,000 hours effectively disables
|
||||
the feature.
|
||||
|
||||
|
||||
|
||||
@ -4394,7 +4377,7 @@ Run `output_dict` tensors `n` times, with the same `feed_dict` each run.
|
||||
|
||||
- - -
|
||||
|
||||
### `tf.contrib.learn.train(graph, output_dir, train_op, loss_op, global_step_tensor=None, init_op=None, init_feed_dict=None, init_fn=None, log_every_steps=10, supervisor_is_chief=True, supervisor_master='', supervisor_save_model_secs=600, supervisor_save_summaries_steps=100, feed_fn=None, max_steps=None, fail_on_nan_loss=True, monitors=None)` {#train}
|
||||
### `tf.contrib.learn.train(graph, output_dir, train_op, loss_op, global_step_tensor=None, init_op=None, init_feed_dict=None, init_fn=None, log_every_steps=10, supervisor_is_chief=True, supervisor_master='', supervisor_save_model_secs=600, supervisor_save_summaries_steps=100, feed_fn=None, steps=None, fail_on_nan_loss=True, monitors=None)` {#train}
|
||||
|
||||
Train a model.
|
||||
|
||||
@ -4437,7 +4420,7 @@ program is terminated with exit code 1.
|
||||
`supervisor_save_summaries_steps` seconds when training.
|
||||
* <b>`feed_fn`</b>: A function that is called every iteration to produce a `feed_dict`
|
||||
passed to `session.run` calls. Optional.
|
||||
* <b>`max_steps`</b>: Train until `global_step_tensor` evaluates to this value.
|
||||
* <b>`steps`</b>: Trains for this many steps (e.g. current global step + `steps`).
|
||||
* <b>`fail_on_nan_loss`</b>: If true, raise `NanLossDuringTrainingError` if `loss_op`
|
||||
evaluates to `NaN`. If false, continue training as if nothing happened.
|
||||
* <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
|
||||
|
@ -18,10 +18,10 @@ Linear regressor model.
|
||||
...
|
||||
estimator.fit(input_fn=input_fn_train)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a KeyError:
|
||||
if `weight_column_name` is not None:
|
||||
key=weight_column_name, value=a `Tensor`
|
||||
|
@ -18,10 +18,10 @@ Linear classifier model.
|
||||
...
|
||||
estimator.fit(input_fn=input_fn_train)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a `KeyError`:
|
||||
if `weight_column_name` is not `None`, a feature with
|
||||
`key=weight_column_name` whose value is a `Tensor`.
|
||||
|
@ -1,4 +1,4 @@
|
||||
### `tf.contrib.learn.train(graph, output_dir, train_op, loss_op, global_step_tensor=None, init_op=None, init_feed_dict=None, init_fn=None, log_every_steps=10, supervisor_is_chief=True, supervisor_master='', supervisor_save_model_secs=600, supervisor_save_summaries_steps=100, feed_fn=None, max_steps=None, fail_on_nan_loss=True, monitors=None)` {#train}
|
||||
### `tf.contrib.learn.train(graph, output_dir, train_op, loss_op, global_step_tensor=None, init_op=None, init_feed_dict=None, init_fn=None, log_every_steps=10, supervisor_is_chief=True, supervisor_master='', supervisor_save_model_secs=600, supervisor_save_summaries_steps=100, feed_fn=None, steps=None, fail_on_nan_loss=True, monitors=None)` {#train}
|
||||
|
||||
Train a model.
|
||||
|
||||
@ -41,7 +41,7 @@ program is terminated with exit code 1.
|
||||
`supervisor_save_summaries_steps` seconds when training.
|
||||
* <b>`feed_fn`</b>: A function that is called every iteration to produce a `feed_dict`
|
||||
passed to `session.run` calls. Optional.
|
||||
* <b>`max_steps`</b>: Train until `global_step_tensor` evaluates to this value.
|
||||
* <b>`steps`</b>: Trains for this many steps (e.g. current global step + `steps`).
|
||||
* <b>`fail_on_nan_loss`</b>: If true, raise `NanLossDuringTrainingError` if `loss_op`
|
||||
evaluates to `NaN`. If false, continue training as if nothing happened.
|
||||
* <b>`monitors`</b>: List of `BaseMonitor` subclass instances. Used for callbacks
|
||||
|
@ -6,7 +6,7 @@ This operation returns a 1-D integer tensor representing the shape of `input`.
|
||||
|
||||
For example:
|
||||
|
||||
```prettyprint
|
||||
```python
|
||||
# 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
|
||||
shape(t) ==> [2, 2, 3]
|
||||
```
|
||||
@ -14,7 +14,7 @@ shape(t) ==> [2, 2, 3]
|
||||
##### Args:
|
||||
|
||||
|
||||
* <b>`input`</b>: A `Tensor`.
|
||||
* <b>`input`</b>: A `Tensor` or `SparseTensor`.
|
||||
* <b>`name`</b>: A name for the operation (optional).
|
||||
|
||||
##### Returns:
|
||||
|
@ -21,11 +21,11 @@ A classifier for TensorFlow DNN models.
|
||||
|
||||
def input_fn_eval: # returns x, Y
|
||||
pass
|
||||
estimator.evaluate(input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a `KeyError`:
|
||||
if `weight_column_name` is not `None`, a feature with
|
||||
`key=weight_column_name` whose value is a `Tensor`.
|
||||
|
@ -1,47 +1,30 @@
|
||||
This class specifies the specific configurations for the run.
|
||||
- - -
|
||||
|
||||
Parameters:
|
||||
execution_mode: Runners use this flag to execute different tasks, like
|
||||
training vs evaluation. 'all' (the default) executes both training and
|
||||
eval.
|
||||
master: TensorFlow master. Empty string (the default) for local.
|
||||
task: Task id of the replica running the training (default: 0).
|
||||
num_ps_replicas: Number of parameter server tasks to use (default: 0).
|
||||
training_worker_session_startup_stagger_secs: Seconds to sleep between the
|
||||
startup of each worker task session (default: 5).
|
||||
training_worker_max_startup_secs: Max seconds to wait before starting any
|
||||
worker (default: 60).
|
||||
eval_delay_secs: Number of seconds between the beginning of each eval run.
|
||||
If one run takes more than this amount of time, the next run will start
|
||||
immediately once that run completes (default 60).
|
||||
eval_steps: Number of steps to run in each eval (default: 100).
|
||||
num_cores: Number of cores to be used (default: 4).
|
||||
verbose: Controls the verbosity, possible values:
|
||||
0: the algorithm and debug information is muted.
|
||||
1: trainer prints the progress.
|
||||
2: log device placement is printed.
|
||||
gpu_memory_fraction: Fraction of GPU memory used by the process on
|
||||
#### `tf.contrib.learn.RunConfig.__init__(master='', task=0, num_ps_replicas=0, num_cores=4, log_device_placement=False, gpu_memory_fraction=1, tf_random_seed=42, save_summary_steps=100, save_checkpoints_secs=60, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000)` {#RunConfig.__init__}
|
||||
|
||||
Constructor.
|
||||
|
||||
##### Args:
|
||||
|
||||
|
||||
* <b>`master`</b>: TensorFlow master. Empty string (the default) for local.
|
||||
* <b>`task`</b>: Task id of the replica running the training (default: 0).
|
||||
* <b>`num_ps_replicas`</b>: Number of parameter server tasks to use (default: 0).
|
||||
* <b>`num_cores`</b>: Number of cores to be used (default: 4).
|
||||
* <b>`log_device_placement`</b>: Log the op placement to devices (default: False).
|
||||
* <b>`gpu_memory_fraction`</b>: Fraction of GPU memory used by the process on
|
||||
each GPU uniformly on the same machine.
|
||||
tf_random_seed: Random seed for TensorFlow initializers.
|
||||
* <b>`tf_random_seed`</b>: Random seed for TensorFlow initializers.
|
||||
Setting this value allows consistency between reruns.
|
||||
keep_checkpoint_max: The maximum number of recent checkpoint files to keep.
|
||||
As new files are created, older files are deleted.
|
||||
If None or 0, all checkpoint files are kept.
|
||||
Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
|
||||
keep_checkpoint_every_n_hours: Number of hours between each checkpoint
|
||||
* <b>`save_summary_steps`</b>: Save summaries every this many steps.
|
||||
* <b>`save_checkpoints_secs`</b>: Save checkpoints every this many seconds.
|
||||
* <b>`keep_checkpoint_max`</b>: The maximum number of recent checkpoint files to
|
||||
keep. As new files are created, older files are deleted. If None or 0,
|
||||
all checkpoint files are kept. Defaults to 5 (that is, the 5 most recent
|
||||
checkpoint files are kept.)
|
||||
* <b>`keep_checkpoint_every_n_hours`</b>: Number of hours between each checkpoint
|
||||
to be saved. The default value of 10,000 hours effectively disables
|
||||
the feature.
|
||||
|
||||
Attributes:
|
||||
tf_master: Tensorflow master.
|
||||
tf_config: Tensorflow Session Config proto.
|
||||
tf_random_seed: Tensorflow random seed.
|
||||
keep_checkpoint_max: Maximum number of checkpoints to keep.
|
||||
keep_checkpoint_every_n_hours: Number of hours between each checkpoint.
|
||||
- - -
|
||||
|
||||
#### `tf.contrib.learn.RunConfig.__init__(execution_mode='all', master='', task=0, num_ps_replicas=0, training_worker_session_startup_stagger_secs=5, training_worker_max_startup_secs=60, eval_delay_secs=60, eval_steps=100, num_cores=4, verbose=1, gpu_memory_fraction=1, tf_random_seed=42, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000)` {#RunConfig.__init__}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -18,21 +18,28 @@ a probability distribution for each entry, see
|
||||
on `logits` internally for efficiency. Do not call this op with the
|
||||
output of `softmax`, as it will produce incorrect results.
|
||||
|
||||
`logits` must have the shape `[batch_size, num_classes]`
|
||||
and dtype `float32` or `float64`.
|
||||
|
||||
`labels` must have the shape `[batch_size]` and dtype `int32` or `int64`.
|
||||
A common use case is to have logits of shape `[batch_size, num_classes]` and
|
||||
labels of shape `[batch_size]`. But higher dimensions are supported.
|
||||
|
||||
##### Args:
|
||||
|
||||
|
||||
* <b>`logits`</b>: Unscaled log probabilities.
|
||||
* <b>`labels`</b>: Each entry `labels[i]` must be an index in `[0, num_classes)`. Other
|
||||
values will result in a loss of 0, but incorrect gradient computations.
|
||||
* <b>`logits`</b>: Unscaled log probabilities of rank `r` and shape
|
||||
`[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`.
|
||||
* <b>`labels`</b>: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
|
||||
`int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
|
||||
Other values will result in a loss of 0, but incorrect gradient
|
||||
computations.
|
||||
* <b>`name`</b>: A name for the operation (optional).
|
||||
|
||||
##### Returns:
|
||||
|
||||
A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the
|
||||
softmax cross entropy loss.
|
||||
A `Tensor` of the same shape as `labels` and of the same type as `logits`
|
||||
with the softmax cross entropy loss.
|
||||
|
||||
##### Raises:
|
||||
|
||||
|
||||
* <b>`ValueError`</b>: If logits are scalars (need to have rank >= 1) or if the rank
|
||||
of the labels is not equal to the rank of the labels minus one.
|
||||
|
||||
|
@ -21,11 +21,11 @@ A regressor for TensorFlow DNN models.
|
||||
|
||||
def input_fn_eval: # returns x, Y
|
||||
pass
|
||||
estimator.evaluate(input_fn_eval)
|
||||
estimator.predict(x)
|
||||
estimator.evaluate(input_fn=input_fn_eval)
|
||||
estimator.predict(x=x)
|
||||
```
|
||||
|
||||
Input of `fit`, `train`, and `evaluate` should have following features,
|
||||
Input of `fit` and `evaluate` should have following features,
|
||||
otherwise there will be a `KeyError`:
|
||||
if `weight_column_name` is not `None`, a feature with
|
||||
`key=weight_column_name` whose value is a `Tensor`.
|
||||
|
@ -9,7 +9,7 @@ The indices in `argmax` are flattened, so that a maximum value at position
|
||||
##### Args:
|
||||
|
||||
|
||||
* <b>`input`</b>: A `Tensor` of type `float32`.
|
||||
* <b>`input`</b>: A `Tensor`. Must be one of the following types: `float32`, `half`.
|
||||
4-D with shape `[batch, height, width, channels]`. Input to pool over.
|
||||
* <b>`ksize`</b>: A list of `ints` that has length `>= 4`.
|
||||
The size of the window for each dimension of the input tensor.
|
||||
@ -25,6 +25,6 @@ The indices in `argmax` are flattened, so that a maximum value at position
|
||||
|
||||
A tuple of `Tensor` objects (output, argmax).
|
||||
|
||||
* <b>`output`</b>: A `Tensor` of type `float32`. The max pooled output tensor.
|
||||
* <b>`output`</b>: A `Tensor`. Has the same type as `input`. The max pooled output tensor.
|
||||
* <b>`argmax`</b>: A `Tensor` of type `Targmax`. 4-D. The flattened indices of the max values chosen for each output.
|
||||
|
||||
|
@ -3,8 +3,8 @@
|
||||
Masks elements of `IndexedSlices`.
|
||||
|
||||
Given an `IndexedSlices` instance `a`, returns another `IndexedSlices` that
|
||||
contains a subset of the slices of `a`. Only the slices at indices specified
|
||||
in `mask_indices` are returned.
|
||||
contains a subset of the slices of `a`. Only the slices at indices not
|
||||
specified in `mask_indices` are returned.
|
||||
|
||||
This is useful when you need to extract a subset of slices in an
|
||||
`IndexedSlices` object.
|
||||
@ -18,7 +18,7 @@ a.indices => [12, 26, 37, 45]
|
||||
tf.shape(a.values) => [4, 10]
|
||||
|
||||
# `b` will be the subset of `a` slices at its second and third indices, so
|
||||
# we want to mask of its first and last indices (which are at absolute
|
||||
# we want to mask its first and last indices (which are at absolute
|
||||
# indices 12, 45)
|
||||
b = tf.sparse_mask(a, [12, 45])
|
||||
|
||||
|
@ -690,7 +690,7 @@ The indices in `argmax` are flattened, so that a maximum value at position
|
||||
##### Args:
|
||||
|
||||
|
||||
* <b>`input`</b>: A `Tensor` of type `float32`.
|
||||
* <b>`input`</b>: A `Tensor`. Must be one of the following types: `float32`, `half`.
|
||||
4-D with shape `[batch, height, width, channels]`. Input to pool over.
|
||||
* <b>`ksize`</b>: A list of `ints` that has length `>= 4`.
|
||||
The size of the window for each dimension of the input tensor.
|
||||
@ -706,7 +706,7 @@ The indices in `argmax` are flattened, so that a maximum value at position
|
||||
|
||||
A tuple of `Tensor` objects (output, argmax).
|
||||
|
||||
* <b>`output`</b>: A `Tensor` of type `float32`. The max pooled output tensor.
|
||||
* <b>`output`</b>: A `Tensor`. Has the same type as `input`. The max pooled output tensor.
|
||||
* <b>`argmax`</b>: A `Tensor` of type `Targmax`. 4-D. The flattened indices of the max values chosen for each output.
|
||||
|
||||
|
||||
@ -1244,23 +1244,30 @@ a probability distribution for each entry, see
|
||||
on `logits` internally for efficiency. Do not call this op with the
|
||||
output of `softmax`, as it will produce incorrect results.
|
||||
|
||||
`logits` must have the shape `[batch_size, num_classes]`
|
||||
and dtype `float32` or `float64`.
|
||||
|
||||
`labels` must have the shape `[batch_size]` and dtype `int32` or `int64`.
|
||||
A common use case is to have logits of shape `[batch_size, num_classes]` and
|
||||
labels of shape `[batch_size]`. But higher dimensions are supported.
|
||||
|
||||
##### Args:
|
||||
|
||||
|
||||
* <b>`logits`</b>: Unscaled log probabilities.
|
||||
* <b>`labels`</b>: Each entry `labels[i]` must be an index in `[0, num_classes)`. Other
|
||||
values will result in a loss of 0, but incorrect gradient computations.
|
||||
* <b>`logits`</b>: Unscaled log probabilities of rank `r` and shape
|
||||
`[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`.
|
||||
* <b>`labels`</b>: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
|
||||
`int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
|
||||
Other values will result in a loss of 0, but incorrect gradient
|
||||
computations.
|
||||
* <b>`name`</b>: A name for the operation (optional).
|
||||
|
||||
##### Returns:
|
||||
|
||||
A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the
|
||||
softmax cross entropy loss.
|
||||
A `Tensor` of the same shape as `labels` and of the same type as `logits`
|
||||
with the softmax cross entropy loss.
|
||||
|
||||
##### Raises:
|
||||
|
||||
|
||||
* <b>`ValueError`</b>: If logits are scalars (need to have rank >= 1) or if the rank
|
||||
of the labels is not equal to the rank of the labels minus one.
|
||||
|
||||
|
||||
- - -
|
||||
|
@ -1895,8 +1895,8 @@ Requires `updates.shape = indices.shape + ref.shape[1:]`.
|
||||
Masks elements of `IndexedSlices`.
|
||||
|
||||
Given an `IndexedSlices` instance `a`, returns another `IndexedSlices` that
|
||||
contains a subset of the slices of `a`. Only the slices at indices specified
|
||||
in `mask_indices` are returned.
|
||||
contains a subset of the slices of `a`. Only the slices at indices not
|
||||
specified in `mask_indices` are returned.
|
||||
|
||||
This is useful when you need to extract a subset of slices in an
|
||||
`IndexedSlices` object.
|
||||
@ -1910,7 +1910,7 @@ a.indices => [12, 26, 37, 45]
|
||||
tf.shape(a.values) => [4, 10]
|
||||
|
||||
# `b` will be the subset of `a` slices at its second and third indices, so
|
||||
# we want to mask of its first and last indices (which are at absolute
|
||||
# we want to mask its first and last indices (which are at absolute
|
||||
# indices 12, 45)
|
||||
b = tf.sparse_mask(a, [12, 45])
|
||||
|
||||
|
@ -10,7 +10,7 @@ There are three main methods of getting data into a TensorFlow program:
|
||||
|
||||
[TOC]
|
||||
|
||||
## Feeding
|
||||
## Feeding
|
||||
|
||||
TensorFlow's feed mechanism lets you inject data into any Tensor in a
|
||||
computation graph. A python computation can thus feed data directly into the
|
||||
@ -377,11 +377,11 @@ Again, the example queue will have some elements queued, so training will
|
||||
continue until those are exhausted. If the example queue is a
|
||||
[`RandomShuffleQueue`](../../api_docs/python/io_ops.md#RandomShuffleQueue), say
|
||||
because you are using `shuffle_batch` or `shuffle_batch_join`, it normally will
|
||||
avoid ever going having fewer than its `min_after_dequeue` attr elements
|
||||
buffered. However, once the queue is closed that restriction will be lifted and
|
||||
the queue will eventually empty. At that point the actual training threads,
|
||||
when they try and dequeue from example queue, will start getting `OutOfRange`
|
||||
errors and exiting. Once all the training threads are done,
|
||||
avoid ever having fewer than its `min_after_dequeue` attr elements buffered.
|
||||
However, once the queue is closed that restriction will be lifted and the queue
|
||||
will eventually empty. At that point the actual training threads, when they
|
||||
try and dequeue from example queue, will start getting `OutOfRange` errors and
|
||||
exiting. Once all the training threads are done,
|
||||
[`tf.train.Coordinator.join`](../../api_docs/python/train.md#Coordinator.join)
|
||||
will return and you can exit cleanly.
|
||||
|
||||
|
@ -146,7 +146,7 @@ for step in xrange(1000000):
|
||||
# When done, ask the threads to stop.
|
||||
coord.request_stop()
|
||||
# And wait for them to actually do it.
|
||||
coord.join(threads)
|
||||
coord.join(enqueue_threads)
|
||||
```
|
||||
|
||||
## Handling Exceptions
|
||||
|
@ -178,9 +178,9 @@ https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/get_starte
|
||||
[bazel](https://github.com/bazelbuild/bazel)).
|
||||
|
||||
Next:
|
||||
```
|
||||
```bash
|
||||
cd tensorflow/models/rnn/ptb
|
||||
python ptb_word_lm --data_path=/tmp/simple-examples/data/ --model small
|
||||
python ptb_word_lm.py --data_path=/tmp/simple-examples/data/ --model small
|
||||
```
|
||||
|
||||
There are 3 supported model configurations in the tutorial code: "small",
|
||||
|
@ -48,9 +48,19 @@ EVAL_FREQUENCY = 100 # Number of steps between evaluations.
|
||||
|
||||
|
||||
tf.app.flags.DEFINE_boolean("self_test", False, "True if running a self test.")
|
||||
tf.app.flags.DEFINE_boolean('use_fp16', False,
|
||||
"Use half floats instead of full floats if True.")
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
|
||||
def data_type():
|
||||
"""Return the type of the activations, weights, and placeholder variables."""
|
||||
if FLAGS.use_fp16:
|
||||
return tf.float16
|
||||
else:
|
||||
return tf.float32
|
||||
|
||||
|
||||
def maybe_download(filename):
|
||||
"""Download the data from Yann's website, unless it's already here."""
|
||||
if not tf.gfile.Exists(WORK_DIRECTORY):
|
||||
@ -142,11 +152,11 @@ def main(argv=None): # pylint: disable=unused-argument
|
||||
# These placeholder nodes will be fed a batch of training data at each
|
||||
# training step using the {feed_dict} argument to the Run() call below.
|
||||
train_data_node = tf.placeholder(
|
||||
tf.float32,
|
||||
data_type(),
|
||||
shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
|
||||
train_labels_node = tf.placeholder(tf.int64, shape=(BATCH_SIZE,))
|
||||
eval_data = tf.placeholder(
|
||||
tf.float32,
|
||||
data_type(),
|
||||
shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
|
||||
|
||||
# The variables below hold all the trainable weights. They are passed an
|
||||
@ -155,24 +165,24 @@ def main(argv=None): # pylint: disable=unused-argument
|
||||
conv1_weights = tf.Variable(
|
||||
tf.truncated_normal([5, 5, NUM_CHANNELS, 32], # 5x5 filter, depth 32.
|
||||
stddev=0.1,
|
||||
seed=SEED))
|
||||
conv1_biases = tf.Variable(tf.zeros([32]))
|
||||
conv2_weights = tf.Variable(
|
||||
tf.truncated_normal([5, 5, 32, 64],
|
||||
stddev=0.1,
|
||||
seed=SEED))
|
||||
conv2_biases = tf.Variable(tf.constant(0.1, shape=[64]))
|
||||
seed=SEED, dtype=data_type()))
|
||||
conv1_biases = tf.Variable(tf.zeros([32], dtype=data_type()))
|
||||
conv2_weights = tf.Variable(tf.truncated_normal(
|
||||
[5, 5, 32, 64], stddev=0.1,
|
||||
seed=SEED, dtype=data_type()))
|
||||
conv2_biases = tf.Variable(tf.constant(0.1, shape=[64], dtype=data_type()))
|
||||
fc1_weights = tf.Variable( # fully connected, depth 512.
|
||||
tf.truncated_normal(
|
||||
[IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64, 512],
|
||||
stddev=0.1,
|
||||
seed=SEED))
|
||||
fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))
|
||||
fc2_weights = tf.Variable(
|
||||
tf.truncated_normal([512, NUM_LABELS],
|
||||
tf.truncated_normal([IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64, 512],
|
||||
stddev=0.1,
|
||||
seed=SEED))
|
||||
fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))
|
||||
seed=SEED,
|
||||
dtype=data_type()))
|
||||
fc1_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=data_type()))
|
||||
fc2_weights = tf.Variable(tf.truncated_normal([512, NUM_LABELS],
|
||||
stddev=0.1,
|
||||
seed=SEED,
|
||||
dtype=data_type()))
|
||||
fc2_biases = tf.Variable(tf.constant(
|
||||
0.1, shape=[NUM_LABELS], dtype=data_type()))
|
||||
|
||||
# We will replicate the model structure for the training subgraph, as well
|
||||
# as the evaluation subgraphs, while sharing the trainable parameters.
|
||||
@ -230,7 +240,7 @@ def main(argv=None): # pylint: disable=unused-argument
|
||||
|
||||
# Optimizer: set up a variable that's incremented once per batch and
|
||||
# controls the learning rate decay.
|
||||
batch = tf.Variable(0)
|
||||
batch = tf.Variable(0, dtype=data_type())
|
||||
# Decay once per epoch, using an exponential schedule starting at 0.01.
|
||||
learning_rate = tf.train.exponential_decay(
|
||||
0.01, # Base learning rate.
|
||||
|
@ -99,6 +99,42 @@ def GetShrunkInceptionMaxPoolShapes(shrink=30):
|
||||
|
||||
class PoolingTest(tf.test.TestCase):
|
||||
|
||||
def _VerifyOneType(self, pool_func, input_sizes, ksize, strides, padding,
|
||||
data_format, data_type, expected, use_gpu):
|
||||
"""Verifies the output values of the pooling function.
|
||||
|
||||
Args:
|
||||
pool_func: Function to be called, co.MaxPool, co.AvgPool,
|
||||
or the Lua version.
|
||||
input_sizes: Input tensor dimensions.
|
||||
ksize: The kernel size dimensions
|
||||
strides: The stride dimensions
|
||||
padding: Padding type.
|
||||
data_format: The data format we use to run the pooling operation.
|
||||
data_type: The data type to use to run the pooling operation.
|
||||
expected: An array containing the expected operation outputs.
|
||||
use_gpu: Whether we are running on GPU.
|
||||
"""
|
||||
total_size = 1
|
||||
for s in input_sizes:
|
||||
total_size *= s
|
||||
# Initializes the input tensor with array containing incrementing
|
||||
# numbers from 1.
|
||||
x = [f * 1.0 for f in range(1, total_size + 1)]
|
||||
with self.test_session(use_gpu=use_gpu) as sess:
|
||||
t = tf.constant(x, shape=input_sizes, dtype=data_type)
|
||||
if data_format == "NCHW":
|
||||
t = NHWCToNCHW(t)
|
||||
ksize = NHWCToNCHW(ksize)
|
||||
strides = NHWCToNCHW(strides)
|
||||
t = pool_func(t, ksize=ksize, strides=strides, padding=padding,
|
||||
data_format=data_format)
|
||||
if data_format == "NCHW":
|
||||
t = NCHWToNHWC(t)
|
||||
actual = t.eval()
|
||||
self.assertAllCloseAccordingToType(expected, actual.flatten())
|
||||
self.assertShapeEqual(actual, t)
|
||||
|
||||
def _VerifyOneTest(self, pool_func, input_sizes, ksize, strides, padding,
|
||||
data_format, expected, use_gpu):
|
||||
"""Verifies the output values of the pooling function.
|
||||
@ -114,25 +150,12 @@ class PoolingTest(tf.test.TestCase):
|
||||
expected: An array containing the expected operation outputs.
|
||||
use_gpu: Whether we are running on GPU.
|
||||
"""
|
||||
total_size = 1
|
||||
for s in input_sizes:
|
||||
total_size *= s
|
||||
# Initializes the input tensor with array containing incrementing
|
||||
# numbers from 1.
|
||||
x = [f * 1.0 for f in range(1, total_size + 1)]
|
||||
with self.test_session(use_gpu=use_gpu) as sess:
|
||||
t = tf.constant(x, shape=input_sizes)
|
||||
if data_format == "NCHW":
|
||||
t = NHWCToNCHW(t)
|
||||
ksize = NHWCToNCHW(ksize)
|
||||
strides = NHWCToNCHW(strides)
|
||||
t = pool_func(t, ksize=ksize, strides=strides, padding=padding,
|
||||
data_format=data_format)
|
||||
if data_format == "NCHW":
|
||||
t = NCHWToNHWC(t)
|
||||
actual = t.eval()
|
||||
self.assertAllClose(expected, actual.flatten())
|
||||
self.assertShapeEqual(actual, t)
|
||||
self._VerifyOneType(pool_func, input_sizes, ksize, strides, padding,
|
||||
data_format, tf.float32, expected, use_gpu)
|
||||
|
||||
if not use_gpu or test_util.CudaSupportsHalfMatMulAndConv():
|
||||
self._VerifyOneType(pool_func, input_sizes, ksize, strides, padding,
|
||||
data_format, tf.float16, expected, use_gpu)
|
||||
|
||||
def _VerifyValues(self, pool_func, input_sizes, ksize, strides, padding,
|
||||
expected, use_gpu):
|
||||
@ -372,32 +395,40 @@ class PoolingTest(tf.test.TestCase):
|
||||
|
||||
def testKernelSmallerThanStrideValid(self):
|
||||
for use_gpu in [True, False]:
|
||||
self._VerifyValues(tf.nn.max_pool, input_sizes=[1, 7, 7, 1],
|
||||
ksize=[1, 2, 2, 1], strides=[1, 3, 3, 1],
|
||||
padding="VALID",
|
||||
expected=[9, 12, 30, 33],
|
||||
use_gpu=use_gpu)
|
||||
self._VerifyValues(tf.nn.max_pool,
|
||||
input_sizes=[1, 7, 7, 1],
|
||||
ksize=[1, 2, 2, 1],
|
||||
strides=[1, 3, 3, 1],
|
||||
padding="VALID",
|
||||
expected=[9, 12, 30, 33],
|
||||
use_gpu=use_gpu)
|
||||
|
||||
self._VerifyValues(tf.nn.avg_pool, input_sizes=[1, 7, 7, 1],
|
||||
ksize=[1, 2, 2, 1], strides=[1, 3, 3, 1],
|
||||
padding="VALID",
|
||||
expected=[5, 8, 26, 29],
|
||||
use_gpu=use_gpu)
|
||||
self._VerifyValues(tf.nn.avg_pool,
|
||||
input_sizes=[1, 7, 7, 1],
|
||||
ksize=[1, 2, 2, 1],
|
||||
strides=[1, 3, 3, 1],
|
||||
padding="VALID",
|
||||
expected=[5, 8, 26, 29],
|
||||
use_gpu=use_gpu)
|
||||
|
||||
def testKernelSmallerThanStrideSame(self):
|
||||
for use_gpu in [True, False]:
|
||||
for pool_func in [tf.nn.max_pool, tf.nn.avg_pool]:
|
||||
self._VerifyValues(pool_func, input_sizes=[1, 3, 3, 1],
|
||||
ksize=[1, 1, 1, 1], strides=[1, 2, 2, 1],
|
||||
padding="SAME",
|
||||
expected=[1, 3, 7, 9],
|
||||
use_gpu=use_gpu)
|
||||
for pool_func in [tf.nn.max_pool, tf.nn.avg_pool]:
|
||||
self._VerifyValues(pool_func,
|
||||
input_sizes=[1, 3, 3, 1],
|
||||
ksize=[1, 1, 1, 1],
|
||||
strides=[1, 2, 2, 1],
|
||||
padding="SAME",
|
||||
expected=[1, 3, 7, 9],
|
||||
use_gpu=use_gpu)
|
||||
|
||||
self._VerifyValues(pool_func, input_sizes=[1, 4, 4, 1],
|
||||
ksize=[1, 1, 1, 1], strides=[1, 2, 2, 1],
|
||||
padding="SAME",
|
||||
expected=[1, 3, 9, 11],
|
||||
use_gpu=use_gpu)
|
||||
self._VerifyValues(pool_func,
|
||||
input_sizes=[1, 4, 4, 1],
|
||||
ksize=[1, 1, 1, 1],
|
||||
strides=[1, 2, 2, 1],
|
||||
padding="SAME",
|
||||
expected=[1, 3, 9, 11],
|
||||
use_gpu=use_gpu)
|
||||
|
||||
def _testDepthwiseMaxPoolInvalidConfig(self, in_size, ksize, strides,
|
||||
error_msg, use_gpu=False):
|
||||
@ -425,43 +456,49 @@ class PoolingTest(tf.test.TestCase):
|
||||
# The following are tests that verify that the CPU and GPU implementations
|
||||
# produce the same resuts.
|
||||
def _CompareMaxPoolingFwd(self, input_shape, ksize, strides, padding):
|
||||
tensor_input = np.random.rand(*input_shape).astype(np.float32)
|
||||
with self.test_session(use_gpu=True):
|
||||
t = tf.constant(tensor_input, shape=input_shape)
|
||||
out_op, _ = tf.nn.max_pool_with_argmax(t, ksize, strides, padding)
|
||||
gpu_val = out_op.eval()
|
||||
with self.test_session(use_gpu=False):
|
||||
t = tf.constant(tensor_input, shape=input_shape)
|
||||
out_op = tf.nn.max_pool(t, ksize, strides, padding)
|
||||
cpu_val = out_op.eval()
|
||||
self.assertAllClose(cpu_val, gpu_val, rtol=1e-5, atol=1e-5)
|
||||
for dtype in np.float32, np.float16:
|
||||
tensor_input = np.random.rand(*input_shape).astype(dtype)
|
||||
with self.test_session(use_gpu=True):
|
||||
t = tf.constant(tensor_input, shape=input_shape)
|
||||
out_op, _ = tf.nn.max_pool_with_argmax(t, ksize, strides, padding)
|
||||
gpu_val = out_op.eval()
|
||||
with self.test_session(use_gpu=False):
|
||||
t = tf.constant(tensor_input, shape=input_shape)
|
||||
out_op = tf.nn.max_pool(t, ksize, strides, padding)
|
||||
cpu_val = out_op.eval()
|
||||
self.assertAllCloseAccordingToType(cpu_val, gpu_val)
|
||||
|
||||
def _CompareMaxPoolingBk(self, input_shape, output_shape, ksize, strides,
|
||||
padding):
|
||||
# Generate numbers in a narrow range, so that there are many duplicates
|
||||
# in the input.
|
||||
tensor_input = np.random.random_integers(0, 3,
|
||||
input_shape).astype(np.float32)
|
||||
tensor_output = np.random.rand(*output_shape).astype(np.float32)
|
||||
with self.test_session(use_gpu=True):
|
||||
t = tf.constant(tensor_input, shape=input_shape)
|
||||
_, argmax_op = tf.nn.max_pool_with_argmax(t, ksize, strides, padding)
|
||||
argmax = argmax_op.eval()
|
||||
grad_in = tf.constant(tensor_output, shape=output_shape)
|
||||
out_op = gen_nn_ops._max_pool_grad_with_argmax(t, grad_in, argmax,
|
||||
ksize, strides, padding)
|
||||
gpu_val = out_op.eval()
|
||||
self.assertShapeEqual(gpu_val, out_op)
|
||||
with self.test_session(use_gpu=False):
|
||||
t = tf.constant(tensor_input, shape=input_shape)
|
||||
out_op = tf.nn.max_pool(t, ksize, strides, padding)
|
||||
orig_out = out_op.eval()
|
||||
grad_in = tf.constant(tensor_output, shape=output_shape)
|
||||
out_op = gen_nn_ops._max_pool_grad(t, orig_out, grad_in, ksize,
|
||||
strides, padding)
|
||||
cpu_val = out_op.eval()
|
||||
self.assertShapeEqual(cpu_val, out_op)
|
||||
self.assertAllClose(cpu_val, gpu_val, rtol=1e-5, atol=1e-5)
|
||||
for dtype in np.float32, np.float16:
|
||||
# Generate numbers in a narrow range, so that there are many duplicates
|
||||
# in the input.
|
||||
tensor_input = np.random.random_integers(0, 3, input_shape).astype(dtype)
|
||||
tensor_output = np.random.rand(*output_shape).astype(dtype)
|
||||
with self.test_session(use_gpu=True):
|
||||
t = tf.constant(tensor_input, shape=input_shape)
|
||||
_, argmax_op = tf.nn.max_pool_with_argmax(t, ksize, strides, padding)
|
||||
argmax = argmax_op.eval()
|
||||
grad_in = tf.constant(tensor_output, shape=output_shape)
|
||||
out_op = gen_nn_ops._max_pool_grad_with_argmax(t, grad_in, argmax,
|
||||
ksize, strides, padding)
|
||||
gpu_val = out_op.eval()
|
||||
self.assertShapeEqual(gpu_val, out_op)
|
||||
with self.test_session(use_gpu=False):
|
||||
t = tf.constant(tensor_input, shape=input_shape)
|
||||
out_op = tf.nn.max_pool(t, ksize, strides, padding)
|
||||
orig_out = out_op.eval()
|
||||
grad_in = tf.constant(tensor_output, shape=output_shape)
|
||||
out_op = gen_nn_ops._max_pool_grad(t, orig_out, grad_in, ksize, strides,
|
||||
padding)
|
||||
cpu_val = out_op.eval()
|
||||
self.assertShapeEqual(cpu_val, out_op)
|
||||
if dtype == np.float16:
|
||||
# The CPU version accumulates its gradient on fp16, so it's less
|
||||
# accurate than the GPU version that does the accumulation on fp32
|
||||
self.assertAllClose(cpu_val, gpu_val, rtol=0.01, atol=0.01)
|
||||
else:
|
||||
self.assertAllClose(cpu_val, gpu_val)
|
||||
|
||||
def testMaxPoolingWithArgmax(self):
|
||||
# MaxPoolWithArgMax is implemented only on GPU.
|
||||
|
@ -30,6 +30,9 @@ from tensorflow.python.ops import sparse_ops
|
||||
class SparseXentTest(tf.test.TestCase):
|
||||
|
||||
def _npXent(self, features, labels):
|
||||
is_higher_dim = len(features.shape) > 2
|
||||
features = np.reshape(features, [-1, features.shape[-1]])
|
||||
labels = np.reshape(labels, [-1])
|
||||
batch_dim = 0
|
||||
class_dim = 1
|
||||
batch_size = features.shape[batch_dim]
|
||||
@ -40,14 +43,15 @@ class SparseXentTest(tf.test.TestCase):
|
||||
labels_mat[np.arange(batch_size), labels] = 1.0
|
||||
bp = (probs - labels_mat)
|
||||
l = -np.sum(labels_mat * np.log(probs + 1.0e-20), axis=1)
|
||||
return l, bp
|
||||
return l, bp, is_higher_dim
|
||||
|
||||
def _testXent(self, np_features, np_labels, use_gpu=False):
|
||||
np_loss, np_backprop = self._npXent(np_features, np_labels)
|
||||
np_loss, np_backprop, is_higher_dim = self._npXent(np_features, np_labels)
|
||||
with self.test_session(use_gpu=use_gpu) as sess:
|
||||
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
|
||||
np_features, np_labels)
|
||||
backprop = loss.op.outputs[1]
|
||||
backprop = (loss.op.inputs[0].op.outputs[1] if is_higher_dim
|
||||
else loss.op.outputs[1])
|
||||
tf_loss, tf_backprop = sess.run([loss, backprop])
|
||||
self.assertAllCloseAccordingToType(np_loss, tf_loss)
|
||||
self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
|
||||
@ -71,14 +75,6 @@ class SparseXentTest(tf.test.TestCase):
|
||||
self._testSingleClass(use_gpu=True)
|
||||
self._testSingleClass(use_gpu=False)
|
||||
|
||||
def testRankTooLarge(self):
|
||||
np_features = np.array(
|
||||
[[[1., 1., 1., 1.]], [[1., 2., 3., 4.]]]).astype(np.float32)
|
||||
np_labels = np.array([1, 2])
|
||||
self.assertRaisesRegexp(
|
||||
ValueError, "must have rank 2",
|
||||
tf.nn.sparse_softmax_cross_entropy_with_logits, np_features, np_labels)
|
||||
|
||||
def testNpXent(self):
|
||||
# We create 2 batches of logits for testing.
|
||||
# batch 0 is the boring uniform distribution: 1, 1, 1, 1, with target 3.
|
||||
@ -104,7 +100,7 @@ class SparseXentTest(tf.test.TestCase):
|
||||
# With a hard 1, the backprop is [0.032 - 1.0 = -0.968, 0.087, 0.237, 0.644]
|
||||
# The loss for this batch is [1.0 * -log(0.25), 1.0 * -log(0.032)]
|
||||
# = [1.3862, 3.4420]
|
||||
np_loss, np_backprop = self._npXent(np.array(features), np.array(labels))
|
||||
np_loss, np_backprop, _ = self._npXent(np.array(features), np.array(labels))
|
||||
self.assertAllClose(np.array([[0.25, 0.25, 0.25, -0.75],
|
||||
[-0.968, 0.087, 0.237, 0.6439]]),
|
||||
np_backprop,
|
||||
@ -114,15 +110,21 @@ class SparseXentTest(tf.test.TestCase):
|
||||
|
||||
def testShapeMismatch(self):
|
||||
with self.test_session():
|
||||
with self.assertRaises(ValueError):
|
||||
with self.assertRaisesRegexp(ValueError, ".*Rank mismatch:*"):
|
||||
tf.nn.sparse_softmax_cross_entropy_with_logits(
|
||||
[[0., 1.], [2., 3.]], [[0, 2]])
|
||||
[[0., 1.], [2., 3.], [2., 3.]], [[0, 2]])
|
||||
|
||||
def testNotMatrix(self):
|
||||
def testScalar(self):
|
||||
with self.test_session():
|
||||
with self.assertRaises(ValueError):
|
||||
with self.assertRaisesRegexp(ValueError, ".*Logits cannot be scalars*"):
|
||||
tf.nn.sparse_softmax_cross_entropy_with_logits(
|
||||
[0., 1., 2., 3.], [0, 2])
|
||||
tf.constant(1.0), tf.constant(0))
|
||||
|
||||
def testVector(self):
|
||||
with self.test_session():
|
||||
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
|
||||
tf.constant([1.0]), tf.constant(0))
|
||||
self.assertAllClose(0.0, loss.eval())
|
||||
|
||||
def testFloat(self):
|
||||
for label_dtype in np.int32, np.int64:
|
||||
@ -155,6 +157,31 @@ class SparseXentTest(tf.test.TestCase):
|
||||
print("cross entropy gradient err = ", err)
|
||||
self.assertLess(err, 5e-8)
|
||||
|
||||
def _testHighDim(self, use_gpu, features, labels):
|
||||
np_loss, np_backprop, _ = self._npXent(np.array(features), np.array(labels))
|
||||
# manually reshape loss
|
||||
np_loss = np.reshape(np_loss, np.array(labels).shape)
|
||||
with self.test_session(use_gpu=use_gpu) as sess:
|
||||
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
|
||||
features, labels)
|
||||
backprop = loss.op.inputs[0].op.outputs[1]
|
||||
tf_loss, tf_backprop = sess.run([loss, backprop])
|
||||
self.assertAllCloseAccordingToType(np_loss, tf_loss)
|
||||
self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
|
||||
|
||||
def testHighDim(self):
|
||||
features = [[[1., 1., 1., 1.]], [[1., 2., 3., 4.]]]
|
||||
labels = [[3], [0]]
|
||||
self._testHighDim(True, features, labels)
|
||||
self._testHighDim(False, features, labels)
|
||||
|
||||
def testHighDim2(self):
|
||||
features = [[[1., 1., 1., 1.], [2., 2., 2., 2.]],
|
||||
[[1., 2., 3., 4.], [5., 6., 7., 8.]]]
|
||||
labels = [[3, 2], [0, 3]]
|
||||
self._testHighDim(True, features, labels)
|
||||
self._testHighDim(False, features, labels)
|
||||
|
||||
|
||||
def _sparse_vs_dense_xent_benchmark_dense(labels, logits):
|
||||
labels = tf.identity(labels)
|
||||
|
@ -126,6 +126,7 @@ def shape(input, name=None):
|
||||
else:
|
||||
return gen_array_ops.shape(input, name=name)
|
||||
|
||||
|
||||
def rank(input, name=None):
|
||||
"""Returns the rank of a tensor.
|
||||
|
||||
@ -612,8 +613,8 @@ def sparse_mask(a, mask_indices, name=None):
|
||||
"""Masks elements of `IndexedSlices`.
|
||||
|
||||
Given an `IndexedSlices` instance `a`, returns another `IndexedSlices` that
|
||||
contains a subset of the slices of `a`. Only the slices at indices specified
|
||||
in `mask_indices` are returned.
|
||||
contains a subset of the slices of `a`. Only the slices at indices not
|
||||
specified in `mask_indices` are returned.
|
||||
|
||||
This is useful when you need to extract a subset of slices in an
|
||||
`IndexedSlices` object.
|
||||
@ -627,7 +628,7 @@ def sparse_mask(a, mask_indices, name=None):
|
||||
tf.shape(a.values) => [4, 10]
|
||||
|
||||
# `b` will be the subset of `a` slices at its second and third indices, so
|
||||
# we want to mask of its first and last indices (which are at absolute
|
||||
# we want to mask its first and last indices (which are at absolute
|
||||
# indices 12, 45)
|
||||
b = tf.sparse_mask(a, [12, 45])
|
||||
|
||||
|
@ -440,30 +440,65 @@ def sparse_softmax_cross_entropy_with_logits(logits, labels, name=None):
|
||||
on `logits` internally for efficiency. Do not call this op with the
|
||||
output of `softmax`, as it will produce incorrect results.
|
||||
|
||||
`logits` must have the shape `[batch_size, num_classes]`
|
||||
and dtype `float32` or `float64`.
|
||||
|
||||
`labels` must have the shape `[batch_size]` and dtype `int32` or `int64`.
|
||||
A common use case is to have logits of shape `[batch_size, num_classes]` and
|
||||
labels of shape `[batch_size]`. But higher dimensions are supported.
|
||||
|
||||
Args:
|
||||
logits: Unscaled log probabilities.
|
||||
labels: Each entry `labels[i]` must be an index in `[0, num_classes)`. Other
|
||||
values will result in a loss of 0, but incorrect gradient computations.
|
||||
logits: Unscaled log probabilities of rank `r` and shape
|
||||
`[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`.
|
||||
labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
|
||||
`int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
|
||||
Other values will result in a loss of 0, but incorrect gradient
|
||||
computations.
|
||||
name: A name for the operation (optional).
|
||||
|
||||
Returns:
|
||||
A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the
|
||||
softmax cross entropy loss.
|
||||
A `Tensor` of the same shape as `labels` and of the same type as `logits`
|
||||
with the softmax cross entropy loss.
|
||||
|
||||
Raises:
|
||||
ValueError: If logits are scalars (need to have rank >= 1) or if the rank
|
||||
of the labels is not equal to the rank of the labels minus one.
|
||||
"""
|
||||
# TODO(pcmurray) Raise an error when the label is not an index in
|
||||
# [0, num_classes). Note: This could break users who call this with bad
|
||||
# labels, but disregard the bad results.
|
||||
|
||||
# The second output tensor contains the gradients. We use it in
|
||||
# _CrossEntropyGrad() in nn_grad but not here.
|
||||
cost, unused_backprop = gen_nn_ops._sparse_softmax_cross_entropy_with_logits(
|
||||
logits, labels, name=name)
|
||||
return cost
|
||||
# Reshape logits and labels to rank 2.
|
||||
with ops.op_scope([labels, logits], name,
|
||||
"SparseSoftmaxCrossEntropyWithLogits"):
|
||||
labels = ops.convert_to_tensor(labels)
|
||||
logits = ops.convert_to_tensor(logits)
|
||||
|
||||
# Store label shape for result later.
|
||||
labels_static_shape = labels.get_shape()
|
||||
labels_shape = array_ops.shape(labels)
|
||||
if logits.get_shape().ndims is not None and logits.get_shape().ndims == 0:
|
||||
raise ValueError("Logits cannot be scalars - received shape %s.",
|
||||
logits.get_shape())
|
||||
if logits.get_shape().ndims is not None and (
|
||||
labels_static_shape.ndims is not None and
|
||||
labels_static_shape.ndims != logits.get_shape().ndims - 1):
|
||||
raise ValueError("Rank mismatch: Labels rank (received %s) should equal "
|
||||
"logits rank (received %s) - 1.",
|
||||
labels_static_shape.ndims, logits.get_shape().ndims)
|
||||
# Check if no reshapes are required.
|
||||
if logits.get_shape().ndims == 2:
|
||||
cost, _ = gen_nn_ops._sparse_softmax_cross_entropy_with_logits(
|
||||
logits, labels, name=name)
|
||||
return cost
|
||||
# Reshape logits to 2 dim, labels to 1 dim.
|
||||
num_classes = array_ops.gather(array_ops.shape(logits),
|
||||
array_ops.rank(logits) - 1)
|
||||
logits = array_ops.reshape(logits, [-1, num_classes])
|
||||
labels = array_ops.reshape(labels, [-1])
|
||||
# The second output tensor contains the gradients. We use it in
|
||||
# _CrossEntropyGrad() in nn_grad but not here.
|
||||
cost, _ = gen_nn_ops._sparse_softmax_cross_entropy_with_logits(
|
||||
logits, labels, name=name)
|
||||
cost = array_ops.reshape(cost, labels_shape)
|
||||
cost.set_shape(labels_static_shape)
|
||||
return cost
|
||||
|
||||
|
||||
@ops.RegisterShape("SparseSoftmaxCrossEntropyWithLogits")
|
||||
|
101
tensorflow/python/training/proximal_adagrad.py
Normal file
101
tensorflow/python/training/proximal_adagrad.py
Normal file
@ -0,0 +1,101 @@
|
||||
# Copyright 2015 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""ProximalAdagrad for TensorFlow."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.ops import constant_op
|
||||
from tensorflow.python.training import optimizer
|
||||
from tensorflow.python.training import training_ops
|
||||
|
||||
|
||||
class ProximalAdagradOptimizer(optimizer.Optimizer):
|
||||
# pylint: disable=line-too-long
|
||||
"""Optimizer that implements the Proximal Adagrad algorithm.
|
||||
|
||||
See this [paper](http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf).
|
||||
|
||||
@@__init__
|
||||
"""
|
||||
|
||||
def __init__(self, learning_rate, initial_accumulator_value=0.1,
|
||||
l1_regularization_strength=0.0, l2_regularization_strength=0.0,
|
||||
use_locking=False, name="ProximalAdagrad"):
|
||||
"""Construct a new ProximalAdagrad optimizer.
|
||||
|
||||
Args:
|
||||
learning_rate: A `Tensor` or a floating point value. The learning rate.
|
||||
initial_accumulator_value: A floating point value.
|
||||
Starting value for the accumulators, must be positive.
|
||||
l1_regularization_strength: A float value, must be greater than or
|
||||
equal to zero.
|
||||
l2_regularization_strength: A float value, must be greater than or
|
||||
equal to zero.
|
||||
use_locking: If `True` use locks for update operations.
|
||||
name: Optional name prefix for the operations created when applying
|
||||
gradients. Defaults to "Adagrad".
|
||||
|
||||
Raises:
|
||||
ValueError: If the `initial_accumulator_value` is invalid.
|
||||
"""
|
||||
if initial_accumulator_value <= 0.0:
|
||||
raise ValueError("initial_accumulator_value must be positive: %s" %
|
||||
initial_accumulator_value)
|
||||
super(ProximalAdagradOptimizer, self).__init__(use_locking, name)
|
||||
self._learning_rate = learning_rate
|
||||
self._initial_accumulator_value = initial_accumulator_value
|
||||
self._l1_regularization_strength = l1_regularization_strength
|
||||
self._l2_regularization_strength = l2_regularization_strength
|
||||
# Created in Initialize.
|
||||
self._l1_regularization_strength_tensor = None
|
||||
self._l2_regularization_strength_tensor = None
|
||||
self._learning_rate_tensor = None
|
||||
|
||||
def _create_slots(self, var_list):
|
||||
for v in var_list:
|
||||
with ops.colocate_with(v):
|
||||
val = constant_op.constant(self._initial_accumulator_value,
|
||||
shape=v.get_shape())
|
||||
self._get_or_make_slot(v, val, "accumulator", self._name)
|
||||
|
||||
def _prepare(self):
|
||||
self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate,
|
||||
name="learning_rate")
|
||||
self._l1_regularization_strength_tensor = ops.convert_to_tensor(
|
||||
self._l1_regularization_strength,
|
||||
name="l1_regularization_strength")
|
||||
self._l2_regularization_strength_tensor = ops.convert_to_tensor(
|
||||
self._l2_regularization_strength,
|
||||
name="l2_regularization_strength")
|
||||
|
||||
def _apply_dense(self, grad, var):
|
||||
acc = self.get_slot(var, "accumulator")
|
||||
return training_ops.apply_proximal_adagrad(
|
||||
var, acc, self._learning_rate_tensor,
|
||||
self._l1_regularization_strength_tensor,
|
||||
self._l2_regularization_strength_tensor,
|
||||
grad, use_locking=self._use_locking)
|
||||
|
||||
def _apply_sparse(self, grad, var):
|
||||
acc = self.get_slot(var, "accumulator")
|
||||
return training_ops.sparse_apply_proximal_adagrad(
|
||||
var, acc, self._learning_rate_tensor,
|
||||
self._l1_regularization_strength_tensor,
|
||||
self._l2_regularization_strength_tensor,
|
||||
grad.values, grad.indices,
|
||||
use_locking=self._use_locking)
|
205
tensorflow/python/training/proximal_adagrad_test.py
Normal file
205
tensorflow/python/training/proximal_adagrad_test.py
Normal file
@ -0,0 +1,205 @@
|
||||
# Copyright 2015 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Functional tests for Proximal Adagrad operations."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class ProximalAdagradOptimizerTest(tf.test.TestCase):
|
||||
|
||||
def testProximalAdagradwithoutRegularization(self):
|
||||
with self.test_session() as sess:
|
||||
var0 = tf.Variable([0.0, 0.0])
|
||||
var1 = tf.Variable([0.0, 0.0])
|
||||
grads0 = tf.constant([0.1, 0.2])
|
||||
grads1 = tf.constant([0.01, 0.02])
|
||||
opt = tf.train.ProximalAdagradOptimizer(3.0,
|
||||
initial_accumulator_value=0.1,
|
||||
l1_regularization_strength=0.0,
|
||||
l2_regularization_strength=0.0)
|
||||
update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
|
||||
tf.initialize_all_variables().run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose([0.0, 0.0], v0_val)
|
||||
self.assertAllClose([0.0, 0.0], v1_val)
|
||||
|
||||
# Run 3 steps Proximal Adagrad.
|
||||
for _ in range(3):
|
||||
update.run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose(np.array([-2.60260963, -4.29698515]),
|
||||
v0_val)
|
||||
self.assertAllClose(np.array([-0.28432083, -0.56694895]),
|
||||
v1_val)
|
||||
|
||||
def testProximalAdagradwithoutRegularization2(self):
|
||||
with self.test_session() as sess:
|
||||
var0 = tf.Variable([1.0, 2.0])
|
||||
var1 = tf.Variable([4.0, 3.0])
|
||||
grads0 = tf.constant([0.1, 0.2])
|
||||
grads1 = tf.constant([0.01, 0.02])
|
||||
|
||||
opt = tf.train.ProximalAdagradOptimizer(3.0,
|
||||
initial_accumulator_value=0.1,
|
||||
l1_regularization_strength=0.0,
|
||||
l2_regularization_strength=0.0)
|
||||
update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
|
||||
tf.initialize_all_variables().run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose([1.0, 2.0], v0_val)
|
||||
self.assertAllClose([4.0, 3.0], v1_val)
|
||||
|
||||
# Run 3 steps Proximal Adagrad.
|
||||
for _ in range(3):
|
||||
update.run()
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose(np.array([-1.60261, -2.296985]),
|
||||
v0_val)
|
||||
self.assertAllClose(np.array([3.715679, 2.433051]),
|
||||
v1_val)
|
||||
|
||||
def testProximalAdagradWithL1(self):
|
||||
with self.test_session() as sess:
|
||||
var0 = tf.Variable([1.0, 2.0])
|
||||
var1 = tf.Variable([4.0, 3.0])
|
||||
grads0 = tf.constant([0.1, 0.2])
|
||||
grads1 = tf.constant([0.01, 0.02])
|
||||
|
||||
opt = tf.train.ProximalAdagradOptimizer(3.0,
|
||||
initial_accumulator_value=0.1,
|
||||
l1_regularization_strength=0.001,
|
||||
l2_regularization_strength=0.0)
|
||||
update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
|
||||
tf.initialize_all_variables().run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose([1.0, 2.0], v0_val)
|
||||
self.assertAllClose([4.0, 3.0], v1_val)
|
||||
|
||||
# Run 10 steps Proximal Adagrad
|
||||
for _ in range(10):
|
||||
update.run()
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose(np.array([0.662907, 0.767398]),
|
||||
v0_val)
|
||||
self.assertAllClose(np.array([2.959304, 1.029232]),
|
||||
v1_val)
|
||||
|
||||
def testProximalAdagradWithL1_L2(self):
|
||||
with self.test_session() as sess:
|
||||
var0 = tf.Variable([1.0, 2.0])
|
||||
var1 = tf.Variable([4.0, 3.0])
|
||||
grads0 = tf.constant([0.1, 0.2])
|
||||
grads1 = tf.constant([0.01, 0.02])
|
||||
|
||||
opt = tf.train.ProximalAdagradOptimizer(3.0,
|
||||
initial_accumulator_value=0.1,
|
||||
l1_regularization_strength=0.001,
|
||||
l2_regularization_strength=2.0)
|
||||
update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
|
||||
tf.initialize_all_variables().run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose([1.0, 2.0], v0_val)
|
||||
self.assertAllClose([4.0, 3.0], v1_val)
|
||||
|
||||
# Run 10 steps Proximal Adagrad.
|
||||
for _ in range(10):
|
||||
update.run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose(np.array([0.043069, 0.080461]),
|
||||
v0_val)
|
||||
self.assertAllClose(np.array([0.004069, 0.008578]),
|
||||
v1_val)
|
||||
|
||||
def applyOptimizer(self, opt, steps=5, is_sparse=False):
|
||||
if is_sparse:
|
||||
var0 = tf.Variable([[1.0], [2.0]])
|
||||
var1 = tf.Variable([[3.0], [4.0]])
|
||||
grads0 = tf.IndexedSlices(tf.constant([0.1], shape=[1, 1]),
|
||||
tf.constant([0]),
|
||||
tf.constant([2, 1]))
|
||||
grads1 = tf.IndexedSlices(tf.constant([0.02], shape=[1, 1]),
|
||||
tf.constant([1]),
|
||||
tf.constant([2, 1]))
|
||||
else:
|
||||
var0 = tf.Variable([1.0, 2.0])
|
||||
var1 = tf.Variable([3.0, 4.0])
|
||||
grads0 = tf.constant([0.1, 0.2])
|
||||
grads1 = tf.constant([0.01, 0.02])
|
||||
|
||||
update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
|
||||
tf.initialize_all_variables().run()
|
||||
|
||||
sess = tf.get_default_session()
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
if is_sparse:
|
||||
self.assertAllClose([[1.0], [2.0]], v0_val)
|
||||
self.assertAllClose([[3.0], [4.0]], v1_val)
|
||||
else:
|
||||
self.assertAllClose([1.0, 2.0], v0_val)
|
||||
self.assertAllClose([3.0, 4.0], v1_val)
|
||||
|
||||
# Run ProximalAdagrad for a few steps
|
||||
for _ in range(steps):
|
||||
update.run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
return v0_val, v1_val
|
||||
|
||||
def testEquivAdagradwithoutRegularization(self):
|
||||
with self.test_session():
|
||||
val0, val1 = self.applyOptimizer(
|
||||
tf.train.ProximalAdagradOptimizer(3.0,
|
||||
initial_accumulator_value=0.1,
|
||||
l1_regularization_strength=0.0,
|
||||
l2_regularization_strength=0.0))
|
||||
|
||||
with self.test_session():
|
||||
val2, val3 = self.applyOptimizer(
|
||||
tf.train.AdagradOptimizer(3.0, initial_accumulator_value=0.1))
|
||||
|
||||
self.assertAllClose(val0, val2)
|
||||
self.assertAllClose(val1, val3)
|
||||
|
||||
def testEquivSparseAdagradwithoutRegularization(self):
|
||||
with self.test_session():
|
||||
val0, val1 = self.applyOptimizer(
|
||||
tf.train.ProximalAdagradOptimizer(3.0,
|
||||
initial_accumulator_value=0.1,
|
||||
l1_regularization_strength=0.0,
|
||||
l2_regularization_strength=0.0),
|
||||
is_sparse=True)
|
||||
|
||||
with self.test_session():
|
||||
val2, val3 = self.applyOptimizer(
|
||||
tf.train.AdagradOptimizer(3.0, initial_accumulator_value=0.1),
|
||||
is_sparse=True)
|
||||
|
||||
self.assertAllClose(val0, val2)
|
||||
self.assertAllClose(val1, val3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.test.main()
|
81
tensorflow/python/training/proximal_gradient_descent.py
Normal file
81
tensorflow/python/training/proximal_gradient_descent.py
Normal file
@ -0,0 +1,81 @@
|
||||
# Copyright 2015 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""ProximalGradientDescent for TensorFlow."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.python.framework import ops
|
||||
# pylint: disable=unused-import
|
||||
from tensorflow.python.ops import math_ops
|
||||
# pylint: enable=unused-import
|
||||
from tensorflow.python.training import optimizer
|
||||
from tensorflow.python.training import training_ops
|
||||
|
||||
|
||||
class ProximalGradientDescentOptimizer(optimizer.Optimizer):
|
||||
# pylint: disable=line-too-long
|
||||
"""Optimizer that implements the proximal gradient descent algorithm.
|
||||
|
||||
See this [paper](http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf).
|
||||
|
||||
@@__init__
|
||||
"""
|
||||
|
||||
def __init__(self, learning_rate, l1_regularization_strength=0.0,
|
||||
l2_regularization_strength=0.0, use_locking=False,
|
||||
name="ProximalGradientDescent"):
|
||||
"""Construct a new proximal gradient descent optimizer.
|
||||
|
||||
Args:
|
||||
learning_rate: A Tensor or a floating point value. The learning
|
||||
rate to use.
|
||||
l1_regularization_strength: A float value, must be greater than or
|
||||
equal to zero.
|
||||
l2_regularization_strength: A float value, must be greater than or
|
||||
equal to zero.
|
||||
use_locking: If True use locks for update operations.
|
||||
name: Optional name prefix for the operations created when applying
|
||||
gradients. Defaults to "GradientDescent".
|
||||
"""
|
||||
super(ProximalGradientDescentOptimizer, self).__init__(use_locking, name)
|
||||
self._learning_rate = learning_rate
|
||||
self._l1_regularization_strength = l1_regularization_strength
|
||||
self._l2_regularization_strength = l2_regularization_strength
|
||||
self._l1_regularization_strength_tensor = None
|
||||
self._l2_regularization_strength_tensor = None
|
||||
|
||||
def _apply_dense(self, grad, var):
|
||||
return training_ops.apply_proximal_gradient_descent(
|
||||
var,
|
||||
self._learning_rate_tensor,
|
||||
self._l1_regularization_strength_tensor,
|
||||
self._l2_regularization_strength_tensor,
|
||||
grad,
|
||||
use_locking=self._use_locking).op
|
||||
|
||||
def _apply_sparse(self, grad, var):
|
||||
delta = ops.IndexedSlices(grad.values * self._learning_rate_tensor,
|
||||
grad.indices, grad.dense_shape)
|
||||
return var.scatter_sub(delta, use_locking=self._use_locking)
|
||||
|
||||
def _prepare(self):
|
||||
self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate,
|
||||
name="learning_rate")
|
||||
self._l1_regularization_strength_tensor = ops.convert_to_tensor(
|
||||
self._l1_regularization_strength, name="l1_regularization_strength")
|
||||
self._l2_regularization_strength_tensor = ops.convert_to_tensor(
|
||||
self._l2_regularization_strength, name="l2_regularization_strength")
|
178
tensorflow/python/training/proximal_gradient_descent_test.py
Normal file
178
tensorflow/python/training/proximal_gradient_descent_test.py
Normal file
@ -0,0 +1,178 @@
|
||||
# Copyright 2015 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Functional tests for Proximal Gradient Descent operations."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class ProximalGradientDescentOptimizerTest(tf.test.TestCase):
|
||||
|
||||
def testProximalGradientDescentwithoutRegularization(self):
|
||||
with self.test_session() as sess:
|
||||
var0 = tf.Variable([0.0, 0.0])
|
||||
var1 = tf.Variable([0.0, 0.0])
|
||||
grads0 = tf.constant([0.1, 0.2])
|
||||
grads1 = tf.constant([0.01, 0.02])
|
||||
opt = tf.train.ProximalGradientDescentOptimizer(
|
||||
3.0,
|
||||
l1_regularization_strength=0.0,
|
||||
l2_regularization_strength=0.0)
|
||||
update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
|
||||
tf.initialize_all_variables().run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose([0.0, 0.0], v0_val)
|
||||
self.assertAllClose([0.0, 0.0], v1_val)
|
||||
|
||||
# Run 3 steps Proximal Gradient Descent.
|
||||
for _ in range(3):
|
||||
update.run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose(np.array([-0.9, -1.8]),
|
||||
v0_val)
|
||||
self.assertAllClose(np.array([-0.09, -0.18]),
|
||||
v1_val)
|
||||
|
||||
def testProximalGradientDescentwithoutRegularization2(self):
|
||||
with self.test_session() as sess:
|
||||
var0 = tf.Variable([1.0, 2.0])
|
||||
var1 = tf.Variable([4.0, 3.0])
|
||||
grads0 = tf.constant([0.1, 0.2])
|
||||
grads1 = tf.constant([0.01, 0.02])
|
||||
|
||||
opt = tf.train.ProximalGradientDescentOptimizer(
|
||||
3.0,
|
||||
l1_regularization_strength=0.0,
|
||||
l2_regularization_strength=0.0)
|
||||
update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
|
||||
tf.initialize_all_variables().run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose([1.0, 2.0], v0_val)
|
||||
self.assertAllClose([4.0, 3.0], v1_val)
|
||||
|
||||
# Run 3 steps Proximal Gradient Descent
|
||||
for _ in range(3):
|
||||
update.run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose(np.array([0.1, 0.2]),
|
||||
v0_val)
|
||||
self.assertAllClose(np.array([3.91, 2.82]),
|
||||
v1_val)
|
||||
|
||||
def testProximalGradientDescentWithL1_L2(self):
|
||||
with self.test_session() as sess:
|
||||
var0 = tf.Variable([1.0, 2.0])
|
||||
var1 = tf.Variable([4.0, 3.0])
|
||||
grads0 = tf.constant([0.1, 0.2])
|
||||
grads1 = tf.constant([0.01, 0.02])
|
||||
|
||||
opt = tf.train.ProximalGradientDescentOptimizer(
|
||||
3.0,
|
||||
l1_regularization_strength=0.001,
|
||||
l2_regularization_strength=2.0)
|
||||
update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
|
||||
tf.initialize_all_variables().run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose([1.0, 2.0], v0_val)
|
||||
self.assertAllClose([4.0, 3.0], v1_val)
|
||||
|
||||
# Run 10 steps Proximal Gradient Descent
|
||||
for _ in range(10):
|
||||
update.run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
self.assertAllClose(np.array([0.037125, 0.074625]),
|
||||
v0_val)
|
||||
self.assertAllClose(np.array([0.003375, 0.007125]),
|
||||
v1_val)
|
||||
|
||||
def applyOptimizer(self, opt, steps=5, is_sparse=False):
|
||||
if is_sparse:
|
||||
var0 = tf.Variable([[1.0], [2.0]])
|
||||
var1 = tf.Variable([[3.0], [4.0]])
|
||||
grads0 = tf.IndexedSlices(tf.constant([0.1], shape=[1, 1]),
|
||||
tf.constant([0]),
|
||||
tf.constant([2, 1]))
|
||||
grads1 = tf.IndexedSlices(tf.constant([0.02], shape=[1, 1]),
|
||||
tf.constant([1]),
|
||||
tf.constant([2, 1]))
|
||||
else:
|
||||
var0 = tf.Variable([1.0, 2.0])
|
||||
var1 = tf.Variable([3.0, 4.0])
|
||||
grads0 = tf.constant([0.1, 0.2])
|
||||
grads1 = tf.constant([0.01, 0.02])
|
||||
|
||||
update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
|
||||
tf.initialize_all_variables().run()
|
||||
|
||||
sess = tf.get_default_session()
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
if is_sparse:
|
||||
self.assertAllClose([[1.0], [2.0]], v0_val)
|
||||
self.assertAllClose([[3.0], [4.0]], v1_val)
|
||||
else:
|
||||
self.assertAllClose([1.0, 2.0], v0_val)
|
||||
self.assertAllClose([3.0, 4.0], v1_val)
|
||||
|
||||
# Run ProximalAdagrad for a few steps
|
||||
for _ in range(steps):
|
||||
update.run()
|
||||
|
||||
v0_val, v1_val = sess.run([var0, var1])
|
||||
return v0_val, v1_val
|
||||
|
||||
def testEquivSparseGradientDescentwithoutRegularizaion(self):
|
||||
with self.test_session():
|
||||
val0, val1 = self.applyOptimizer(
|
||||
tf.train.ProximalGradientDescentOptimizer(
|
||||
3.0,
|
||||
l1_regularization_strength=0.0,
|
||||
l2_regularization_strength=0.0),
|
||||
is_sparse=True)
|
||||
|
||||
with self.test_session():
|
||||
val2, val3 = self.applyOptimizer(
|
||||
tf.train.GradientDescentOptimizer(3.0), is_sparse=True)
|
||||
|
||||
self.assertAllClose(val0, val2)
|
||||
self.assertAllClose(val1, val3)
|
||||
|
||||
def testEquivGradientDescentwithoutRegularizaion(self):
|
||||
with self.test_session():
|
||||
val0, val1 = self.applyOptimizer(
|
||||
tf.train.ProximalGradientDescentOptimizer(
|
||||
3.0,
|
||||
l1_regularization_strength=0.0,
|
||||
l2_regularization_strength=0.0))
|
||||
|
||||
with self.test_session():
|
||||
val2, val3 = self.applyOptimizer(
|
||||
tf.train.GradientDescentOptimizer(3.0))
|
||||
|
||||
self.assertAllClose(val0, val2)
|
||||
self.assertAllClose(val1, val3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tf.test.main()
|
@ -151,6 +151,7 @@ from tensorflow.python.ops import state_ops
|
||||
|
||||
from tensorflow.python.training.adadelta import AdadeltaOptimizer
|
||||
from tensorflow.python.training.adagrad import AdagradOptimizer
|
||||
from tensorflow.python.training.proximal_adagrad import ProximalAdagradOptimizer
|
||||
from tensorflow.python.training.adam import AdamOptimizer
|
||||
from tensorflow.python.training.ftrl import FtrlOptimizer
|
||||
from tensorflow.python.training.momentum import MomentumOptimizer
|
||||
@ -158,6 +159,7 @@ from tensorflow.python.training.moving_averages import ExponentialMovingAverage
|
||||
from tensorflow.python.training.optimizer import Optimizer
|
||||
from tensorflow.python.training.rmsprop import RMSPropOptimizer
|
||||
from tensorflow.python.training.gradient_descent import GradientDescentOptimizer
|
||||
from tensorflow.python.training.proximal_gradient_descent import ProximalGradientDescentOptimizer
|
||||
from tensorflow.python.training.sync_replicas_optimizer import SyncReplicasOptimizer
|
||||
|
||||
# Utility classes for training.
|
||||
|
@ -69,6 +69,17 @@ def _ApplyAdagradShape(op):
|
||||
grad_shape = op.inputs[3].get_shape().merge_with(accum_shape)
|
||||
return [grad_shape]
|
||||
|
||||
@ops.RegisterShape("ApplyProximalAdagrad")
|
||||
def _ApplyProximalAdagradShape(op):
|
||||
"""Shape function for the ApplyProximalAdagrad op."""
|
||||
var_shape = op.inputs[0].get_shape()
|
||||
accum_shape = op.inputs[1].get_shape().merge_with(var_shape)
|
||||
_AssertInputIsScalar(op, 2) # lr
|
||||
_AssertInputIsScalar(op, 3) # l1
|
||||
_AssertInputIsScalar(op, 4) # l2
|
||||
grad_shape = op.inputs[5].get_shape().merge_with(accum_shape)
|
||||
return [grad_shape]
|
||||
|
||||
|
||||
@ops.RegisterShape("ApplyFtrl")
|
||||
def _ApplyFtrlShape(op):
|
||||
@ -133,6 +144,32 @@ def _ApplyGradientDescentShape(op):
|
||||
delta_shape = op.inputs[2].get_shape().merge_with(var_shape)
|
||||
return [delta_shape]
|
||||
|
||||
|
||||
@ops.RegisterShape("ApplyProximalGradientDescent")
|
||||
def _ApplyProximalGradientDescentShape(op):
|
||||
"""Shape function for the ApplyProximalGradientDescent op."""
|
||||
var_shape = op.inputs[0].get_shape()
|
||||
_AssertInputIsScalar(op, 1) # alpha
|
||||
_AssertInputIsScalar(op, 2) # l1
|
||||
_AssertInputIsScalar(op, 3) # l2
|
||||
delta_shape = op.inputs[4].get_shape().merge_with(var_shape)
|
||||
return [delta_shape]
|
||||
|
||||
|
||||
@ops.RegisterShape("SparseApplyProximalGradientDescent")
|
||||
def _SparseApplyProximalGradientDescentShape(op):
|
||||
"""Shape function for the SparseApplyGradientDescent op."""
|
||||
var_shape = op.inputs[0].get_shape()
|
||||
_AssertInputIsScalar(op, 1) # lr
|
||||
_AssertInputIsScalar(op, 2) # l1
|
||||
_AssertInputIsScalar(op, 3) # l2
|
||||
grad_shape = op.inputs[4].get_shape().merge_with(
|
||||
tensor_shape.TensorShape([None]).concatenate(var_shape[1:]))
|
||||
unused_indices_shape = op.inputs[5].get_shape().merge_with(
|
||||
tensor_shape.vector(grad_shape[0]))
|
||||
return [var_shape]
|
||||
|
||||
|
||||
@ops.RegisterShape("SparseApplyAdadelta")
|
||||
def _SparseApplyAdadeltaShape(op):
|
||||
"""Shape function for the SparseApplyAdadelta op."""
|
||||
@ -148,6 +185,7 @@ def _SparseApplyAdadeltaShape(op):
|
||||
tensor_shape.vector(grad_shape[0]))
|
||||
return [accum_update_shape]
|
||||
|
||||
|
||||
@ops.RegisterShape("SparseApplyAdagrad")
|
||||
def _SparseApplyAdagradShape(op):
|
||||
"""Shape function for the SparseApplyAdagrad op."""
|
||||
@ -161,6 +199,21 @@ def _SparseApplyAdagradShape(op):
|
||||
return [accum_shape]
|
||||
|
||||
|
||||
@ops.RegisterShape("SparseApplyProximalAdagrad")
|
||||
def _SparseApplyProximalAdagradShape(op):
|
||||
"""Shape function for the SparseApplyProximalAdagrad op."""
|
||||
var_shape = op.inputs[0].get_shape()
|
||||
accum_shape = op.inputs[1].get_shape().merge_with(var_shape)
|
||||
_AssertInputIsScalar(op, 2) # lr
|
||||
_AssertInputIsScalar(op, 3) # l1
|
||||
_AssertInputIsScalar(op, 4) # l2
|
||||
grad_shape = op.inputs[5].get_shape().merge_with(
|
||||
tensor_shape.TensorShape([None]).concatenate(accum_shape[1:]))
|
||||
unused_indices_shape = op.inputs[6].get_shape().merge_with(
|
||||
tensor_shape.vector(grad_shape[0]))
|
||||
return [accum_shape]
|
||||
|
||||
|
||||
@ops.RegisterShape("SparseApplyFtrl")
|
||||
def _SparseApplyFtrlShape(op):
|
||||
"""Shape function for the SparseApplyFtrl op."""
|
||||
|
@ -1876,6 +1876,40 @@ bool CudnnSupport::DoPoolForward(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoPoolForward(
|
||||
Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions,
|
||||
const dnn::BatchDescriptor& input_dimensions,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const dnn::BatchDescriptor& output_dimensions,
|
||||
DeviceMemory<Eigen::half>* output_data) {
|
||||
mutex_lock lock{dnn_handle_mutex_};
|
||||
auto status = dynload::cudnnSetStream(parent_, ToHandle(dnn_handle_),
|
||||
AsCUDAStreamValue(stream));
|
||||
if (status != CUDNN_STATUS_SUCCESS) {
|
||||
LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Alpha is the scaling factor for input.
|
||||
float alpha = 1.0;
|
||||
// Beta is the scaling factor for output.
|
||||
float beta = 0.0;
|
||||
|
||||
ScopedTensorDescriptor src_desc{parent_, input_dimensions, CUDNN_DATA_HALF};
|
||||
ScopedTensorDescriptor dest_desc{parent_, output_dimensions, CUDNN_DATA_HALF};
|
||||
ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions};
|
||||
status = dynload::cudnnPoolingForward(
|
||||
parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha,
|
||||
src_desc.handle(), input_data.opaque(), &beta, dest_desc.handle(),
|
||||
output_data->opaque());
|
||||
if (status != CUDNN_STATUS_SUCCESS) {
|
||||
LOG(ERROR) << "failed to enqueue forward pooling on stream: "
|
||||
<< ToString(status);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoPoolBackward(
|
||||
Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions,
|
||||
const dnn::BatchDescriptor& input_dimensions,
|
||||
@ -1914,6 +1948,43 @@ bool CudnnSupport::DoPoolBackward(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoPoolBackward(
|
||||
Stream* stream, const dnn::PoolingDescriptor& pooling_dimensions,
|
||||
const dnn::BatchDescriptor& input_dimensions,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const dnn::BatchDescriptor& output_dimensions,
|
||||
const DeviceMemory<Eigen::half>& output_data,
|
||||
const DeviceMemory<Eigen::half>& input_diff_data,
|
||||
DeviceMemory<Eigen::half>* output_diff_data) {
|
||||
mutex_lock lock{dnn_handle_mutex_};
|
||||
auto status = dynload::cudnnSetStream(parent_, ToHandle(dnn_handle_),
|
||||
AsCUDAStreamValue(stream));
|
||||
if (status != CUDNN_STATUS_SUCCESS) {
|
||||
LOG(ERROR) << "failed to set stream for cudnn handle: " << ToString(status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Alpha is the scaling factor for input.
|
||||
float alpha = 1.0;
|
||||
// Beta is the scaling factor for output.
|
||||
float beta = 0.0;
|
||||
|
||||
ScopedTensorDescriptor src_desc{parent_, input_dimensions, CUDNN_DATA_HALF};
|
||||
ScopedTensorDescriptor dest_desc{parent_, output_dimensions, CUDNN_DATA_HALF};
|
||||
ScopedPoolingDescriptor pooling_desc{parent_, pooling_dimensions};
|
||||
status = dynload::cudnnPoolingBackward(
|
||||
parent_, ToHandle(dnn_handle_), pooling_desc.handle(), &alpha,
|
||||
dest_desc.handle(), output_data.opaque(), dest_desc.handle(),
|
||||
input_diff_data.opaque(), src_desc.handle(), input_data.opaque(), &beta,
|
||||
src_desc.handle(), output_diff_data->opaque());
|
||||
if (status != CUDNN_STATUS_SUCCESS) {
|
||||
LOG(ERROR) << "failed to enqueue backward pooling on stream: "
|
||||
<< ToString(status);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoNormalize(
|
||||
Stream* stream, const dnn::NormalizeDescriptor& normalize_descriptor,
|
||||
const DeviceMemory<float>& input_data, DeviceMemory<float>* output_data) {
|
||||
|
@ -201,6 +201,13 @@ class CudnnSupport : public dnn::DnnSupport {
|
||||
const dnn::BatchDescriptor& output_dimensions,
|
||||
DeviceMemory<float>* output_data) override;
|
||||
|
||||
bool DoPoolForward(Stream* stream,
|
||||
const dnn::PoolingDescriptor& pooling_dimensions,
|
||||
const dnn::BatchDescriptor& input_dimensions,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const dnn::BatchDescriptor& output_dimensions,
|
||||
DeviceMemory<Eigen::half>* output_data) override;
|
||||
|
||||
bool DoPoolBackward(Stream* stream,
|
||||
const dnn::PoolingDescriptor& pooling_dimensions,
|
||||
const dnn::BatchDescriptor& input_dimensions,
|
||||
@ -210,6 +217,15 @@ class CudnnSupport : public dnn::DnnSupport {
|
||||
const DeviceMemory<float>& input_diff_data,
|
||||
DeviceMemory<float>* output_diff_data) override;
|
||||
|
||||
bool DoPoolBackward(Stream* stream,
|
||||
const dnn::PoolingDescriptor& pooling_dimensions,
|
||||
const dnn::BatchDescriptor& input_dimensions,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const dnn::BatchDescriptor& output_dimensions,
|
||||
const DeviceMemory<Eigen::half>& output_data,
|
||||
const DeviceMemory<Eigen::half>& input_diff_data,
|
||||
DeviceMemory<Eigen::half>* output_diff_data) override;
|
||||
|
||||
bool DoNormalize(Stream* stream,
|
||||
const dnn::NormalizeDescriptor& normalize_descriptor,
|
||||
const DeviceMemory<float>& input_data,
|
||||
|
@ -1011,6 +1011,13 @@ class DnnSupport {
|
||||
const dnn::BatchDescriptor& output_dimensions,
|
||||
DeviceMemory<float>* output_data) = 0;
|
||||
|
||||
virtual bool DoPoolForward(Stream* stream,
|
||||
const dnn::PoolingDescriptor& pooling_dimensions,
|
||||
const dnn::BatchDescriptor& input_dimensions,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const dnn::BatchDescriptor& output_dimensions,
|
||||
DeviceMemory<Eigen::half>* output_data) = 0;
|
||||
|
||||
// Performs differentiation of the pooling operation.
|
||||
virtual bool DoPoolBackward(Stream* stream,
|
||||
const dnn::PoolingDescriptor& pooling_dimensions,
|
||||
@ -1021,6 +1028,15 @@ class DnnSupport {
|
||||
const DeviceMemory<float>& input_diff_data,
|
||||
DeviceMemory<float>* output_diff_data) = 0;
|
||||
|
||||
virtual bool DoPoolBackward(Stream* stream,
|
||||
const dnn::PoolingDescriptor& pooling_dimensions,
|
||||
const dnn::BatchDescriptor& input_dimensions,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const dnn::BatchDescriptor& output_dimensions,
|
||||
const DeviceMemory<Eigen::half>& output_data,
|
||||
const DeviceMemory<Eigen::half>& input_diff_data,
|
||||
DeviceMemory<Eigen::half>* output_diff_data) = 0;
|
||||
|
||||
// Applies local response normalization to the values from
|
||||
// input_data and writes the result to output_data. See comments on
|
||||
// NormalizeDescriptor for a description of local response
|
||||
|
@ -909,6 +909,30 @@ Stream &Stream::ThenPoolForward(
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::ThenPoolForward(
|
||||
const dnn::PoolingDescriptor &pooling_dimensions,
|
||||
const dnn::BatchDescriptor &input_dimensions,
|
||||
const DeviceMemory<Eigen::half> &input_data,
|
||||
const dnn::BatchDescriptor &output_dimensions,
|
||||
DeviceMemory<Eigen::half> *output_data) {
|
||||
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
|
||||
PARAM(input_data), PARAM(output_dimensions), PARAM(output_data));
|
||||
|
||||
if (ok()) {
|
||||
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
|
||||
CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
|
||||
input_data, output_dimensions,
|
||||
output_data));
|
||||
} else {
|
||||
SetError();
|
||||
LOG(WARNING)
|
||||
<< "attempting to perform DNN operation using StreamExecutor "
|
||||
"without DNN support";
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::ThenPoolBackward(
|
||||
const dnn::PoolingDescriptor &pooling_dimensions,
|
||||
const dnn::BatchDescriptor &input_dimensions,
|
||||
@ -936,6 +960,33 @@ Stream &Stream::ThenPoolBackward(
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::ThenPoolBackward(
|
||||
const dnn::PoolingDescriptor &pooling_dimensions,
|
||||
const dnn::BatchDescriptor &input_dimensions,
|
||||
const DeviceMemory<Eigen::half> &input_data,
|
||||
const dnn::BatchDescriptor &output_dimensions,
|
||||
const DeviceMemory<Eigen::half> &output_data,
|
||||
const DeviceMemory<Eigen::half> &input_diff_data,
|
||||
DeviceMemory<Eigen::half> *output_diff_data) {
|
||||
VLOG_CALL(PARAM(pooling_dimensions), PARAM(input_dimensions),
|
||||
PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
|
||||
PARAM(input_diff_data), PARAM(output_diff_data));
|
||||
|
||||
if (ok()) {
|
||||
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
|
||||
CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
|
||||
input_data, output_dimensions, output_data,
|
||||
input_diff_data, output_diff_data));
|
||||
} else {
|
||||
SetError();
|
||||
LOG(WARNING)
|
||||
<< "attempting to perform DNN operation using StreamExecutor "
|
||||
"without DNN support";
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::ThenNormalize(
|
||||
const dnn::NormalizeDescriptor &normalize_descriptor,
|
||||
const DeviceMemory<float> &input_data, DeviceMemory<float> *output_data) {
|
||||
|
@ -421,6 +421,12 @@ class Stream {
|
||||
const dnn::BatchDescriptor &output_dimensions,
|
||||
DeviceMemory<float> *output_data);
|
||||
|
||||
Stream &ThenPoolForward(const dnn::PoolingDescriptor &pooling_dimensions,
|
||||
const dnn::BatchDescriptor &input_dimensions,
|
||||
const DeviceMemory<Eigen::half> &input_data,
|
||||
const dnn::BatchDescriptor &output_dimensions,
|
||||
DeviceMemory<Eigen::half> *output_data);
|
||||
|
||||
Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions,
|
||||
const dnn::BatchDescriptor &input_dimensions,
|
||||
const DeviceMemory<float> &input_data,
|
||||
@ -429,6 +435,14 @@ class Stream {
|
||||
const DeviceMemory<float> &input_diff_data,
|
||||
DeviceMemory<float> *output_diff_data);
|
||||
|
||||
Stream &ThenPoolBackward(const dnn::PoolingDescriptor &pooling_dimensions,
|
||||
const dnn::BatchDescriptor &input_dimensions,
|
||||
const DeviceMemory<Eigen::half> &input_data,
|
||||
const dnn::BatchDescriptor &output_dimensions,
|
||||
const DeviceMemory<Eigen::half> &output_data,
|
||||
const DeviceMemory<Eigen::half> &input_diff_data,
|
||||
DeviceMemory<Eigen::half> *output_diff_data);
|
||||
|
||||
Stream &ThenNormalize(const dnn::NormalizeDescriptor &normalize_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
DeviceMemory<float> *output_data);
|
||||
|
@ -6,8 +6,8 @@
|
||||
def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
native.new_http_archive(
|
||||
name = "eigen_archive",
|
||||
url = "https://bitbucket.org/eigen/eigen/get/d02e6a705c30.tar.gz",
|
||||
sha256 = "532956172daa8aba87c750791ff89a5c38cdb07e2525afe17ecb4bef812d67cf",
|
||||
url = "https://bitbucket.org/eigen/eigen/get/0c0b79ecd74c.tar.gz",
|
||||
sha256 = "b4b5884b03bd4bae114d02b36e2435ad1504ed8e51431d16c876b6f6a365882b",
|
||||
build_file = path_prefix + "eigen.BUILD",
|
||||
)
|
||||
|
||||
|
2
third_party/eigen3/Eigen/Cholesky
vendored
2
third_party/eigen3/Eigen/Cholesky
vendored
@ -1 +1 @@
|
||||
#include "eigen-eigen-d02e6a705c30/Eigen/Cholesky"
|
||||
#include "eigen-eigen-0c0b79ecd74c/Eigen/Cholesky"
|
||||
|
2
third_party/eigen3/Eigen/Core
vendored
2
third_party/eigen3/Eigen/Core
vendored
@ -1 +1 @@
|
||||
#include "eigen-eigen-d02e6a705c30/Eigen/Core"
|
||||
#include "eigen-eigen-0c0b79ecd74c/Eigen/Core"
|
||||
|
2
third_party/eigen3/Eigen/Eigenvalues
vendored
2
third_party/eigen3/Eigen/Eigenvalues
vendored
@ -1 +1 @@
|
||||
#include "eigen-eigen-d02e6a705c30/Eigen/Eigenvalues"
|
||||
#include "eigen-eigen-0c0b79ecd74c/Eigen/Eigenvalues"
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user