Merge commit for internal changes

This commit is contained in:
Yifei Feng 2016-07-30 19:18:40 -07:00
commit 0a77ad6f3b
52 changed files with 4887 additions and 137 deletions

View File

@ -104,6 +104,7 @@ filegroup(
"//tensorflow/contrib/testing:all_files",
"//tensorflow/contrib/util:all_files",
"//tensorflow/core:all_files",
"//tensorflow/core/debug:all_files",
"//tensorflow/core/distributed_runtime:all_files",
"//tensorflow/core/distributed_runtime/rpc:all_files",
"//tensorflow/core/kernels:all_files",

View File

@ -38,7 +38,6 @@ namespace {
const char kFfmpegExecutable[] = "ffmpeg";
const int32 kDefaultProbeSize = 5000000; // 5MB
std::vector<string> FfmpegCommandLine(const string& input_filename,
const string& output_filename,
const string& input_format_id,
@ -63,6 +62,39 @@ std::vector<string> FfmpegCommandLine(const string& input_filename,
};
}
// Is a named binary installed and executable by the current process?
// Note that this is harder than it seems like it should be...
bool IsBinaryInstalled(const string& binary_name) {
string path = ::getenv("PATH");
for (const string& dir : str_util::Split(path, ':')) {
const string binary_path = io::JoinPath(dir, binary_name);
char absolute_path[PATH_MAX + 1];
::realpath(binary_path.c_str(), absolute_path);
struct stat statinfo;
int result = ::stat(absolute_path, &statinfo);
if (result < 0) {
continue;
}
if (!S_ISREG(statinfo.st_mode)) {
continue;
}
// Is the current user able to execute the file?
if (statinfo.st_uid == ::geteuid() && statinfo.st_mode & S_IXUSR) {
return true;
}
// Is the current group able to execute the file?
if (statinfo.st_uid == ::getegid() && statinfo.st_mode & S_IXGRP) {
return true;
}
// Is anyone able to execute the file?
if (statinfo.st_mode & S_IXOTH) {
return true;
}
}
return false;
}
[[noreturn]] int ExecuteFfmpeg(const std::vector<string>& args) {
std::vector<char*> args_chars;
std::transform(args.begin(), args.end(), std::back_inserter(args_chars),
@ -191,6 +223,14 @@ Status ReadAudioFile(const string& filename,
FfmpegCommandLine(filename, output_filename, audio_format_id,
samples_per_second, channel_count);
// Unfortunately, it's impossible to differentiate an exec failure due to the
// binary being missing and an error from the binary's execution. Therefore,
// check to see if the binary *should* be available. If not, return an error
// that will be converted into a helpful error message by the TensorFlow op.
if (!IsBinaryInstalled(kFfmpegExecutable)) {
return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found."));
}
// Execute ffmpeg and report errors.
pid_t child_pid = ::fork();
if (child_pid < 0) {
@ -202,7 +242,7 @@ Status ReadAudioFile(const string& filename,
int status_code;
::waitpid(child_pid, &status_code, 0);
if (status_code) {
return Status(error::Code::NOT_FOUND,
return Status(error::Code::UNKNOWN,
StrCat("FFmpeg execution failed: ", status_code));
}
*output_samples = ReadPcmFile(output_filename);

View File

@ -818,7 +818,7 @@ class DropoutTest(tf.test.TestCase):
with self.test_session():
images = np.random.uniform(size=(5, height, width, 3))
output = tf.contrib.layers.dropout(images)
self.assertEquals(output.op.name, 'Dropout/dropout/mul_1')
self.assertEquals(output.op.name, 'Dropout/dropout/mul')
output.get_shape().assert_is_compatible_with(
tf.convert_to_tensor(images).get_shape())
@ -828,7 +828,7 @@ class DropoutTest(tf.test.TestCase):
is_training = tf.constant(True)
images = tf.random_uniform((5, height, width, 3), seed=1)
output = tf.contrib.layers.dropout(images, is_training=is_training)
self.assertEquals(output.op.name, 'Dropout/dropout/mul_1')
self.assertEquals(output.op.name, 'Dropout/dropout/mul')
output.get_shape().assert_is_compatible_with(images.get_shape())
def testCreateDropoutWithConstantFalse(self):

View File

@ -22,6 +22,7 @@ import inspect
import six
from tensorflow.contrib import losses
from tensorflow.contrib import metrics as metrics_lib
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -29,7 +30,6 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import logging_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import nn_ops
def regression_target(label_name=None,
@ -297,8 +297,17 @@ class _BinarySvmTargetColumn(_MultiClassTargetColumn):
"""_TargetColumn for binary classification using SVMs."""
def __init__(self, label_name, weight_column_name):
def loss_fn(logits, target):
check_shape_op = logging_ops.Assert(
math_ops.less_equal(array_ops.rank(target), 2),
["target's shape should be either [batch_size, 1] or [batch_size]"])
with ops.control_dependencies([check_shape_op]):
target = array_ops.reshape(
target, shape=[array_ops.shape(target)[0], 1])
return losses.hinge_loss(logits, target)
super(_BinarySvmTargetColumn, self).__init__(
loss_fn=_binary_hinge_loss,
loss_fn=loss_fn,
n_classes=2,
label_name=label_name,
weight_column_name=weight_column_name)
@ -331,22 +340,6 @@ def _log_loss_with_two_classes(logits, target):
return loss_vec
# TODO(sibyl-vie3Poto): Move this to contrib/losses/python/losses/loss_ops.py.
def _binary_hinge_loss(logits, target):
"""Method that returns the loss vector for binary hinge loss."""
check_shape_op = logging_ops.Assert(
math_ops.less_equal(
array_ops.rank(target), 2),
["target's shape should be either [batch_size, 1] or [batch_size]"])
with ops.control_dependencies([check_shape_op]):
target = array_ops.reshape(target, shape=[array_ops.shape(target)[0], 1])
# First need to convert binary labels to -1/1 labels (as floats).
all_ones = array_ops.ones_like(logits)
labels = math_ops.sub(2 * math_ops.to_float(target), all_ones)
loss_vec = nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels, logits)))
return loss_vec
def _softmax_cross_entropy_loss(logits, target):
# sigmoid_cross_entropy_with_logits requires [batch_size, 1] target.
# Check that we got int32/int64 for classification.

View File

@ -61,13 +61,13 @@ class SVM(linear.LinearClassifier):
whose `value` is a `SparseTensor`.
- if `column` is a `RealValuedColumn, a feature with `key=column.name`
whose `value` is a `Tensor`.
- if `feauture_columns` is None, then `input` must contains only real
- if `feature_columns` is None, then `input` must contains only real
valued `Tensor`.
Parameters:
example_id_column: A string defining the feature column name representing
example ids. Used do initialize the underlying optimizer.
example ids. Used to initialize the underlying optimizer.
feature_columns: An iterable containing all the feature columns used by the
model. All items in the set should be instances of classes derived from
`FeatureColumn`.
@ -75,10 +75,12 @@ class SVM(linear.LinearClassifier):
weights. It is used to down weight or boost examples during training. It
will be multiplied by the loss of the example.
model_dir: Directory to save model parameters, graph and etc. This can also
be used to load checkpoints from the directory into a estimator to continue
training a previously saved model.
l1_regularization: L1-regularization parameter
l2_regularization: L2-regularization parameter
be used to load checkpoints from the directory into a estimator to
continue training a previously saved model.
l1_regularization: L1-regularization parameter. Refers to global L1
regularization (across all examples).
l2_regularization: L2-regularization parameter. Refers to global L2
regularization (across all examples).
kernels: A list of kernels for the SVM. Currently, no kernels are supported.
Reserved for future use for non-linear SVMs
config: RunConfig object to configure the runtime settings.
@ -100,12 +102,13 @@ class SVM(linear.LinearClassifier):
symmetric_l1_regularization=l1_regularization,
symmetric_l2_regularization=l2_regularization)
super(SVM, self).__init__(model_dir=model_dir,
n_classes=2,
weight_column_name=weight_column_name,
feature_columns=feature_columns,
optimizer=optimizer,
config=config)
super(SVM, self).__init__(
model_dir=model_dir,
n_classes=2,
weight_column_name=weight_column_name,
feature_columns=feature_columns,
optimizer=optimizer,
config=config)
self._target_column = layers.binary_svm_target(
weight_column_name=weight_column_name)

View File

@ -106,6 +106,7 @@ weighted average over the individual prediction errors:
@@absolute_difference
@@add_loss
@@hinge_loss
@@cosine_distance
@@get_losses
@@get_regularization_losses

View File

@ -25,6 +25,7 @@ from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import nn_ops
__all__ = ["absolute_difference",
@ -33,6 +34,7 @@ __all__ = ["absolute_difference",
"get_losses",
"get_regularization_losses",
"get_total_loss",
"hinge_loss",
"log_loss",
"sigmoid_cross_entropy",
"softmax_cross_entropy",
@ -410,6 +412,31 @@ def log_loss(predictions, targets, weight=1.0, epsilon=1e-7, scope=None):
return _compute_weighted_loss(losses, weight)
def hinge_loss(logits, target, scope=None):
"""Method that returns the loss tensor for hinge loss.
Args:
logits: The logits, a float tensor.
target: The ground truth output tensor. Its shape should match the shape of
logits. The values of the tensor are expected to be 0.0 or 1.0.
scope: The scope for the operations performed in computing the loss.
Returns:
A `Tensor` of same shape as logits and target representing the loss values
across the batch.
Raises:
ValueError: If the shapes of `logits` and `target` don't match.
"""
with ops.op_scope([logits, target], scope, "hinge_loss") as scope:
logits.get_shape().assert_is_compatible_with(target.get_shape())
# We first need to convert binary labels to -1/1 labels (as floats).
target = math_ops.to_float(target)
all_ones = array_ops.ones_like(target)
labels = math_ops.sub(2 * target, all_ones)
return nn_ops.relu(math_ops.sub(all_ones, math_ops.mul(labels, logits)))
def sum_of_squares(predictions, targets, weight=1.0, scope=None):
"""Adds a Sum-of-Squares loss to the training procedure.

View File

@ -499,6 +499,42 @@ class LogLossTest(tf.test.TestCase):
self.assertAlmostEqual(0.0, loss.eval(), 3)
class HingeLossTest(tf.test.TestCase):
def testIncompatibleShapes(self):
with self.test_session():
logits = tf.constant([[-1.0], [2.1]])
target = tf.constant([0.0, 1.0])
with self.assertRaises(ValueError):
_ = tf.contrib.losses.hinge_loss(logits, target).eval()
def testAllOutsideMargin(self):
with self.test_session():
logits = tf.constant([1.2, -1.4, -1.0, 2.1])
target = tf.constant([1.0, 0.0, 0.0, 1.0])
loss = tf.contrib.losses.hinge_loss(logits, target)
self.assertAllClose(loss.eval(), [0.0, 0.0, 0.0, 0.0], atol=1e-3)
def testSomeInsideMargin(self):
with self.test_session():
logits = tf.constant([[-0.7], [-1.4], [1.4], [0.6]])
target = tf.constant([[0.0], [0.0], [1.0], [1.0]])
loss = tf.contrib.losses.hinge_loss(logits, target)
# Examples 1 and 4 are on the correct side of the hyperplane but within
# the margin so they incur some (small) loss.
self.assertAllClose(loss.eval(), [[0.3], [0.0], [0.0], [0.4]], atol=1e-3)
def testSomeMisclassified(self):
with self.test_session():
logits = tf.constant([[[1.2], [0.4], [-1.0], [-1.1]]])
target = tf.constant([[[1.0], [0.0], [0.0], [1.0]]])
loss = tf.contrib.losses.hinge_loss(logits, target)
# Examples 2 and 4 are on the wrong side of the hyperplane so they incur
# some (fairly large) loss.
self.assertAllClose(
loss.eval(), [[[0.0], [1.4], [0.0], [2.1]]], atol=1e-3)
class SumOfSquaresLossTest(tf.test.TestCase):
def setUp(self):

View File

@ -9,10 +9,14 @@ exports_files(["LICENSE"])
package(default_visibility = ["//tensorflow:__subpackages__"])
load("//tensorflow:tensorflow.bzl", "cuda_py_tests")
load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")
py_library(
name = "rnn_py",
srcs = ["__init__.py"] + glob(["python/ops/*.py"]),
data = [
":python/ops/_lstm_ops.so",
],
srcs_version = "PY2AND3",
)
@ -27,6 +31,33 @@ cuda_py_tests(
],
)
cuda_py_tests(
name = "lstm_ops_test",
size = "small",
srcs = ["python/kernel_tests/lstm_ops_test.py"],
additional_deps = [
":rnn_py",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:platform_test",
],
)
tf_custom_op_library(
name = "python/ops/_lstm_ops.so",
srcs = [
"kernels/lstm_ops.cc",
"kernels/lstm_ops.h",
"ops/lstm_ops.cc",
],
gpu_srcs = [
"kernels/lstm_ops_gpu.cu.cc",
"kernels/lstm_ops.h",
],
deps = [
"//tensorflow/core/kernels:eigen_helpers",
],
)
filegroup(
name = "all_files",
srcs = glob(

View File

@ -12,14 +12,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Ops for representing statistical distributions.
"""Additional RNN operations and cells.
## This package provides classes for statistical distributions.
## This package provides additional contributed RNNCells.
### Fused RNNCells
@@LSTMFusedCell
### LSTM-like cells
@@CoupledInputForgetGateLSTMCell
@@TimeFreqLSTMCell
@@GridLSTMCell
### RNNCell wrappers
@@AttentionCellWrapper
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# pylint: disable=unused-import,wildcard-import, line-too-long
from tensorflow.contrib.rnn.python.ops.lstm_ops import *
from tensorflow.contrib.rnn.python.ops.rnn_cell import *

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,420 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_RNN_KERNELS_LSTM_OPS_H_
#define THIRD_PARTY_TENSORFLOW_CONTRIB_RNN_KERNELS_LSTM_OPS_H_
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/kernels/eigen_activations.h"
#include "tensorflow/core/platform/types.h"
namespace perftools {
namespace gputools {
class Stream;
} // end namespace gputools
} // end namespace perftools
namespace tensorflow {
class OpKernelContext;
namespace functor {
template <typename Device, typename T>
struct TensorZero {
void operator()(const Device& d, typename TTypes<T>::Flat t) {
t.device(d) = t.constant(T(0));
}
};
template <typename Device, typename T>
struct TensorCopy {
void operator()(const Device& d, typename TTypes<T>::ConstFlat src,
typename TTypes<T>::Flat dst) {
dst.device(d) = src;
}
};
template <typename Device, typename T>
struct TensorAdd {
void operator()(const Device& d, typename TTypes<T>::ConstFlat a,
typename TTypes<T>::ConstFlat b, typename TTypes<T>::Flat c) {
c.device(d) = a + b;
}
};
template <typename Device, typename T>
struct TensorZeroPadding {
void operator()(const Device& d, const int64 time_idx,
typename TTypes<int64>::ConstVec seq_len,
typename TTypes<float>::Vec mask,
typename TTypes<float>::Matrix m) {
// mask is shape [batch_size].
mask.device(d) = seq_len.constant(time_idx) < seq_len;
// m_shape is [batch_size, 1].
Eigen::array<Eigen::DenseIndex, 2> m_shape({m.dimensions()[0], 1});
// broadcast_shape is [1, units].
Eigen::array<Eigen::DenseIndex, 2> broadcast_shape({1, m.dimensions()[1]});
// m is shape [batch_size, units].
m.device(d) = m * mask.reshape(m_shape).broadcast(broadcast_shape);
}
};
template <typename T>
struct TensorCuBlasGemm {
void operator()(OpKernelContext* ctx, perftools::gputools::Stream* stream,
bool transa, bool transb, uint64 m, uint64 n, uint64 k,
T alpha, const T* a, int lda, const T* b, int ldb, T beta,
T* c, int ldc);
};
template <typename Device, typename T, bool USE_CUBLAS>
struct TensorBlasGemm;
template <typename Device, typename T>
struct TensorBlasGemm<Device, T, true /* USE_CUBLAS */> {
static void compute(OpKernelContext* ctx, perftools::gputools::Stream* stream,
const Device& d, bool transa, bool transb, T alpha,
typename TTypes<T>::ConstMatrix a,
typename TTypes<T>::ConstMatrix b, T beta,
typename TTypes<T>::Matrix c) {
int64 m = c.dimensions()[0];
int64 n = c.dimensions()[1];
int64 k = transa ? a.dimensions()[0] : a.dimensions()[1];
TensorCuBlasGemm<T>()(ctx, stream, transb, transa, n, m, k, alpha, b.data(),
transb ? k : n, a.data(), transa ? m : k, beta,
c.data(), n);
}
};
template <typename Device, typename T>
struct TensorBlasGemm<Device, T, false /* USE_CUBLAS */> {
static void compute(OpKernelContext* ctx, perftools::gputools::Stream* stream,
const Device& d, bool transa, bool transb, T alpha,
typename TTypes<T>::ConstMatrix a,
typename TTypes<T>::ConstMatrix b, T beta,
typename TTypes<T>::Matrix c) {
Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> contract_pairs;
contract_pairs[0] =
Eigen::IndexPair<Eigen::DenseIndex>(transa == false, transb == true);
if (alpha == T(1) && beta == T(0)) {
c.device(d) = a.contract(b, contract_pairs);
} else if (alpha == T(1) && beta == T(1)) {
c.device(d) += a.contract(b, contract_pairs);
} else {
c.device(d) = c.constant(alpha) * a.contract(b, contract_pairs) +
c.constant(beta) * c;
}
}
};
struct LSTMFusedCell {
LSTMFusedCell(const int batch_size, const int input_size, const int cell_size)
: batch_size_(batch_size),
input_size_(input_size),
cell_size_(cell_size) {}
inline Eigen::array<Eigen::DenseIndex, 2> icfo_i_offsets() const {
return {0, 0};
}
inline Eigen::array<Eigen::DenseIndex, 2> icfo_c_offsets() const {
return {0, cell_size_};
}
inline Eigen::array<Eigen::DenseIndex, 2> icfo_f_offsets() const {
return {0, cell_size_ * 2};
}
inline Eigen::array<Eigen::DenseIndex, 2> icfo_o_offsets() const {
return {0, cell_size_ * 3};
}
inline Eigen::array<Eigen::DenseIndex, 2> cell_extents() const {
return {batch_size_, cell_size_};
}
inline Eigen::array<Eigen::DenseIndex, 2> xh_x_offsets() const {
return {0, 0};
}
inline Eigen::array<Eigen::DenseIndex, 2> xh_x_extents() const {
return {batch_size_, input_size_};
}
inline Eigen::array<Eigen::DenseIndex, 2> xh_h_offsets() const {
return {0, input_size_};
}
inline Eigen::array<Eigen::DenseIndex, 2> xh_h_extents() const {
return {batch_size_, cell_size_};
}
protected:
const int batch_size_;
const int input_size_;
const int cell_size_;
};
template <typename Device, typename T, bool USE_CUBLAS>
struct LSTMFusedCellFprop : public LSTMFusedCell {
LSTMFusedCellFprop(const int batch_size, const int input_size,
const int cell_size)
: LSTMFusedCell(batch_size, input_size, cell_size) {}
void operator()(OpKernelContext* ctx, perftools::gputools::Stream* stream,
const Device& d, const T forget_bias, const T cell_clip,
bool use_peephole, typename TTypes<T>::ConstMatrix x,
typename TTypes<T>::ConstMatrix cs_prev,
typename TTypes<T>::ConstMatrix h_prev,
typename TTypes<T>::ConstMatrix w,
typename TTypes<T>::ConstVec wci,
typename TTypes<T>::ConstVec wcf,
typename TTypes<T>::ConstVec wco,
typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,
typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,
typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,
typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,
typename TTypes<T>::Matrix icfo,
typename TTypes<T>::Matrix h) {
// Concat xh = [x, h].
xh.slice(xh_x_offsets(), xh_x_extents()).device(d) = x;
xh.slice(xh_h_offsets(), xh_h_extents()).device(d) = h_prev;
// states1 = xh * w + b
typename TTypes<T>::ConstMatrix const_xh(xh.data(), xh.dimensions());
TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
ctx, stream, d, false, false, T(1), const_xh, w, T(0), icfo);
Eigen::array<Eigen::DenseIndex, 2> b_shape({1, b.dimensions()[0]});
Eigen::array<Eigen::DenseIndex, 2> broadcast_shape({batch_size_, 1});
icfo.device(d) += b.reshape(b_shape).broadcast(broadcast_shape);
Eigen::array<Eigen::DenseIndex, 2> p_shape({1, cell_size_});
Eigen::array<Eigen::DenseIndex, 2> p_broadcast_shape({batch_size_, 1});
// Input gate.
if (use_peephole) {
auto i_peep = cs_prev * wci.reshape(p_shape).broadcast(p_broadcast_shape);
i.device(d) =
(icfo.slice(icfo_i_offsets(), cell_extents()) + i_peep).sigmoid();
} else {
i.device(d) = icfo.slice(icfo_i_offsets(), cell_extents()).sigmoid();
}
// Cell input.
ci.device(d) = icfo.slice(icfo_c_offsets(), cell_extents()).tanh();
// Forget gate (w/ bias).
if (use_peephole) {
auto f_peep = cs_prev * wcf.reshape(p_shape).broadcast(p_broadcast_shape);
f.device(d) = (icfo.slice(icfo_f_offsets(), cell_extents()) +
f.constant(forget_bias) + f_peep)
.sigmoid();
} else {
f.device(d) = (icfo.slice(icfo_f_offsets(), cell_extents()) +
f.constant(forget_bias))
.sigmoid();
}
// cs = ci .* i + f .* cs_prev
cs.device(d) = i * ci + f * cs_prev;
if (cell_clip > 0.0f) {
cs.device(d) =
cs.binaryExpr(cs.constant(cell_clip), Eigen::scalar_clip_op<T>());
}
// co = tanh(cs)
co.device(d) = cs.tanh();
// Output gate.
if (use_peephole) {
auto o_peep = cs * wco.reshape(p_shape).broadcast(p_broadcast_shape);
o.device(d) =
(icfo.slice(icfo_o_offsets(), cell_extents()) + o_peep).sigmoid();
} else {
o.device(d) = icfo.slice(icfo_o_offsets(), cell_extents()).sigmoid();
}
// h = o .* co
h.device(d) = o * co;
}
};
template <typename Device, typename T, bool USE_CUBLAS>
struct LSTMFusedCellBprop : public LSTMFusedCell {
LSTMFusedCellBprop(const int batch_size, const int input_size,
const int cell_size)
: LSTMFusedCell(batch_size, input_size, cell_size) {}
void operator()(
OpKernelContext* ctx, perftools::gputools::Stream* stream,
const Device& d, bool use_peephole, typename TTypes<T>::ConstMatrix x,
typename TTypes<T>::ConstMatrix cs_prev,
typename TTypes<T>::ConstMatrix h_prev, typename TTypes<T>::ConstMatrix w,
typename TTypes<T>::ConstVec wci, typename TTypes<T>::ConstVec wcf,
typename TTypes<T>::ConstVec wco, typename TTypes<T>::ConstVec b,
typename TTypes<T>::ConstMatrix i, typename TTypes<T>::ConstMatrix cs,
typename TTypes<T>::ConstMatrix f, typename TTypes<T>::ConstMatrix o,
typename TTypes<T>::ConstMatrix ci, typename TTypes<T>::ConstMatrix co,
typename TTypes<T>::ConstMatrix cs_grad,
typename TTypes<T>::ConstMatrix h_grad, typename TTypes<T>::Matrix do_,
typename TTypes<T>::Matrix dcs, typename TTypes<T>::Matrix dci,
typename TTypes<T>::Matrix df, typename TTypes<T>::Matrix di,
typename TTypes<T>::Matrix dicfo, typename TTypes<T>::Matrix cs_prev_grad,
typename TTypes<T>::Vec wci_grad, typename TTypes<T>::Vec wcf_grad,
typename TTypes<T>::Vec wco_grad) {
// do[t] = sigm'(o[t]) .* dh[t] .* co[t]
do_.device(d) = o * (o.constant(T(1)) - o) * h_grad * co;
// dcs[t] += tanh'(cs[t]) .* dh[t] .* o[t] + dcs[t + 1] .* f[t + 1]
dcs.device(d) = (co.constant(T(1)) - co * co) * h_grad * o + cs_grad;
Eigen::array<Eigen::DenseIndex, 2> p_shape({1, cell_size_});
Eigen::array<Eigen::DenseIndex, 2> p_broadcast_shape({batch_size_, 1});
if (use_peephole) {
dcs.device(d) =
dcs + do_ * wco.reshape(p_shape).broadcast(p_broadcast_shape);
}
// dci[t] = tanh'(ci[t]) dcs[t] i[t]
dci.device(d) = (ci.constant(T(1)) - ci * ci) * dcs * i;
// df[t] = sigm'(f[t]) dcs[t] cs[t - 1]
df.device(d) = f * (f.constant(T(1)) - f) * dcs * cs_prev;
// di[t] = sigm'(i[t]) dcs[t] ci[t]
di.device(d) = i * (i.constant(T(1)) - i) * dcs * ci;
dicfo.slice(icfo_i_offsets(), cell_extents()).device(d) = di;
dicfo.slice(icfo_c_offsets(), cell_extents()).device(d) = dci;
dicfo.slice(icfo_f_offsets(), cell_extents()).device(d) = df;
dicfo.slice(icfo_o_offsets(), cell_extents()).device(d) = do_;
cs_prev_grad.device(d) = dcs * f;
if (use_peephole) {
cs_prev_grad.device(d) =
cs_prev_grad +
di * wci.reshape(p_shape).broadcast(p_broadcast_shape) +
df * wcf.reshape(p_shape).broadcast(p_broadcast_shape);
}
if (use_peephole) {
wci_grad.device(d) = (di * cs_prev).sum(Eigen::array<int, 1>({0}));
wcf_grad.device(d) = (df * cs_prev).sum(Eigen::array<int, 1>({0}));
wco_grad.device(d) = (do_ * cs).sum(Eigen::array<int, 1>({0}));
}
}
};
template <typename Device, typename T, bool USE_CUBLAS>
struct FusedLSTMBprop : public LSTMFusedCell {
FusedLSTMBprop(const int batch_size, const int input_size,
const int cell_size)
: LSTMFusedCell(batch_size, input_size, cell_size) {}
void operator()(
OpKernelContext* ctx, perftools::gputools::Stream* stream,
const Device& d, bool use_peephole, typename TTypes<T>::ConstMatrix x,
typename TTypes<T>::ConstMatrix cs_prev,
typename TTypes<T>::ConstMatrix h_prev, typename TTypes<T>::ConstMatrix w,
typename TTypes<T>::ConstVec wci, typename TTypes<T>::ConstVec wcf,
typename TTypes<T>::ConstVec wco, typename TTypes<T>::ConstVec b,
typename TTypes<T>::Matrix xh, typename TTypes<T>::ConstMatrix i,
typename TTypes<T>::ConstMatrix cs, typename TTypes<T>::ConstMatrix f,
typename TTypes<T>::ConstMatrix o, typename TTypes<T>::ConstMatrix ci,
typename TTypes<T>::ConstMatrix co,
typename TTypes<T>::ConstMatrix cs_grad,
typename TTypes<T>::ConstMatrix h_grad, typename TTypes<T>::Matrix do_,
typename TTypes<T>::Matrix dcs, typename TTypes<T>::Matrix dci,
typename TTypes<T>::Matrix df, typename TTypes<T>::Matrix di,
typename TTypes<T>::Matrix dicfo, typename TTypes<T>::Matrix cs_prev_grad,
typename TTypes<T>::Matrix h_prev_grad,
typename TTypes<T>::Matrix xh_grad, typename TTypes<T>::Matrix x_grad,
typename TTypes<T>::Matrix w_grad, typename TTypes<T>::Vec wci_grad,
typename TTypes<T>::Vec wcf_grad, typename TTypes<T>::Vec wco_grad,
typename TTypes<T>::Vec b_grad) {
// do[t] = sigm'(o[t]) .* dh[t] .* co[t]
do_.device(d) = o * (o.constant(T(1)) - o) * h_grad * co;
// dcs[t] += tanh'(cs[t]) .* dh[t] .* o[t] + dcs[t + 1] .* f[t + 1]
dcs.device(d) = (co.constant(T(1)) - co * co) * h_grad * o + cs_grad;
Eigen::array<Eigen::DenseIndex, 2> p_shape({1, cell_size_});
Eigen::array<Eigen::DenseIndex, 2> p_broadcast_shape({batch_size_, 1});
if (use_peephole) {
dcs.device(d) =
dcs + do_ * wco.reshape(p_shape).broadcast(p_broadcast_shape);
}
// dci[t] = tanh'(ci[t]) dcs[t] i[t]
dci.device(d) = (ci.constant(T(1)) - ci * ci) * dcs * i;
// df[t] = sigm'(f[t]) dcs[t] cs[t - 1]
df.device(d) = f * (f.constant(T(1)) - f) * dcs * cs_prev;
// di[t] = sigm'(i[t]) dcs[t] ci[t]
di.device(d) = i * (i.constant(T(1)) - i) * dcs * ci;
dicfo.slice(icfo_i_offsets(), cell_extents()).device(d) = di;
dicfo.slice(icfo_c_offsets(), cell_extents()).device(d) = dci;
dicfo.slice(icfo_f_offsets(), cell_extents()).device(d) = df;
dicfo.slice(icfo_o_offsets(), cell_extents()).device(d) = do_;
cs_prev_grad.device(d) = dcs * f;
if (use_peephole) {
cs_prev_grad.device(d) =
cs_prev_grad +
di * wci.reshape(p_shape).broadcast(p_broadcast_shape) +
df * wcf.reshape(p_shape).broadcast(p_broadcast_shape);
}
// xh_grad.
typename TTypes<T>::ConstMatrix const_dicfo(dicfo.data(),
dicfo.dimensions());
TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
ctx, stream, d, false, true, T(1), const_dicfo, w, T(0), xh_grad);
// xh.
xh.slice(xh_x_offsets(), xh_x_extents()).device(d) = x;
xh.slice(xh_h_offsets(), xh_h_extents()).device(d) = h_prev;
typename TTypes<T>::ConstMatrix const_xh(xh.data(), xh.dimensions());
// x_grad.
x_grad.device(d) = xh_grad.slice(xh_x_offsets(), xh_x_extents());
h_prev_grad.device(d) = xh_grad.slice(xh_h_offsets(), xh_h_extents());
// w_grad.
TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
ctx, stream, d, true, false, T(1), const_xh, const_dicfo, T(1), w_grad);
// b_grad.
b_grad.device(d) += dicfo.sum(Eigen::array<int, 1>({0}));
if (use_peephole) {
wci_grad.device(d) += (di * cs_prev).sum(Eigen::array<int, 1>({0}));
wcf_grad.device(d) += (df * cs_prev).sum(Eigen::array<int, 1>({0}));
wco_grad.device(d) += (do_ * cs).sum(Eigen::array<int, 1>({0}));
}
}
};
} // namespace functor
} // namespace tensorflow
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_RNN_KERNELS_LSTM_OPS_H_

View File

@ -0,0 +1,41 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA
#define EIGEN_USE_GPU
#include "tensorflow/contrib/rnn/kernels/lstm_ops.h"
namespace tensorflow {
namespace functor {
typedef Eigen::GpuDevice GPUDevice;
#define DEFINE_GPU_SPECS(T) \
template struct TensorZero<GPUDevice, T>; \
template struct TensorCopy<GPUDevice, T>; \
template struct TensorAdd<GPUDevice, T>; \
template struct LSTMFusedCellFprop<GPUDevice, T, true>; \
template struct LSTMFusedCellBprop<GPUDevice, T, true>; \
template struct FusedLSTMBprop<GPUDevice, T, true>;
DEFINE_GPU_SPECS(float);
// DEFINE_GPU_SPECS(double);
#undef DEFINE_GPU_SPECS
} // end namespace functor
} // end namespace tensorflow
#endif // GOOGLE_CUDA

View File

@ -0,0 +1,180 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
namespace tensorflow {
REGISTER_OP("LSTMFusedCell")
.Input("x: T")
.Input("cs_prev: T")
.Input("h_prev: T")
.Input("w: T")
.Input("wci: T")
.Input("wcf: T")
.Input("wco: T")
.Input("b: T")
.Output("i: T")
.Output("cs: T")
.Output("f: T")
.Output("o: T")
.Output("ci: T")
.Output("co: T")
.Output("h: T")
.Attr("forget_bias: float = 1.0")
.Attr("cell_clip: float = 3.0")
.Attr("use_peephole: bool = false")
.Attr("T: {float}")
.Doc(R"doc(
Computes the LSTM cell forward propagation for 1 time step.
This implementation uses 1 weight matrix and 1 bias vector, there is no
diagonal peephole connection.
This kernel op implements the following mathematical equations:
```python
xh = [x, h_prev]
[i, f, ci, o] = xh * w + b
f = f + forget_bias
i = sigmoid(i)
f = sigmoid(f)
ci = tanh(ci)
o = sigmoid(o)
cs = ci .* i + cs_prev .* f
co = tanh(cs)
h = co .* o
```
forget_bias: The forget gate bias.
x: The input to the LSTM cell.
w: The weight matrix.
b: The bias vector.
i: The input gate.
cs: The cell state before the tanh.
f: The forget gate.
o: The output gate.
ci: The cell input.
co: The cell after the tanh.
h: The output h vector.
)doc");
REGISTER_OP("LSTMFusedCellGrad")
.Input("x: T")
.Input("cs_prev: T")
.Input("h_prev: T")
.Input("w: T")
.Input("wci: T")
.Input("wcf: T")
.Input("wco: T")
.Input("b: T")
.Input("i: T")
.Input("cs: T")
.Input("f: T")
.Input("o: T")
.Input("ci: T")
.Input("co: T")
.Input("cs_grad: T")
.Input("h_grad: T")
.Output("cs_prev_grad: T")
.Output("dicfo: T")
.Output("wci_grad: T")
.Output("wcf_grad: T")
.Output("wco_grad: T")
.Attr("use_peephole: bool")
.Attr("T: {float}")
.Doc(R"doc(
Computes the LSTM cell backward propagation for 1 timestep.
This implementation is to be used in conjunction of LSTMFusedCell.
x: The input to the LSTM cell.
cs_prev: The previous cell state.
h_prev: The previous h state.
w: The weight matrix.
b: The bias vector.
i: The input gate.
cs: The cell state before the tanh.
f: The forget gate.
o: The output gate.
ci: The cell input.
co: The cell after the tanh.
h_grad: THe gradient of h vector.
cs_prev_grad: The gradient of cs.
dicfo: The derivative wrt to [i, cs, f, o].
)doc");
REGISTER_OP("FusedLSTM")
.Input("seq_len_max: int64")
.Input("x: max_len * T")
.Input("cs_prev: T")
.Input("h_prev: T")
.Input("w: T")
.Input("wci: T")
.Input("wcf: T")
.Input("wco: T")
.Input("b: T")
.Output("i: max_len * T")
.Output("cs: max_len * T")
.Output("f: max_len * T")
.Output("o: max_len * T")
.Output("ci: max_len * T")
.Output("co: max_len * T")
.Output("h: max_len * T")
.Attr("max_len: int")
.Attr("forget_bias: float = 1.0")
.Attr("cell_clip: float = 3.0")
.Attr("use_peephole: bool = false")
.Attr("T: {float}")
.Doc(R"doc(
)doc");
REGISTER_OP("FusedLSTMGrad")
.Input("seq_len_max: int64")
.Input("x: max_len * T")
.Input("cs_prev: T")
.Input("h_prev: T")
.Input("w: T")
.Input("wci: T")
.Input("wcf: T")
.Input("wco: T")
.Input("b: T")
.Input("i: max_len * T")
.Input("cs: max_len * T")
.Input("f: max_len * T")
.Input("o: max_len * T")
.Input("ci: max_len * T")
.Input("co: max_len * T")
.Input("h: max_len * T")
.Input("cs_grad: max_len * T")
.Input("h_grad: max_len * T")
.Output("x_grad: max_len * T")
.Output("cs_prev_grad: T")
.Output("h_prev_grad: T")
.Output("w_grad: T")
.Output("wci_grad: T")
.Output("wcf_grad: T")
.Output("wco_grad: T")
.Output("b_grad: T")
.Attr("max_len: int")
.Attr("use_peephole: bool")
.Attr("T: {float}")
.Doc(R"doc(
)doc");
} // end namespace tensorflow

View File

@ -0,0 +1,290 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""LSTM Fused Cell ops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.contrib.rnn.python.ops import lstm_ops
fused_lstm = lstm_ops._fused_lstm # pylint: disable=protected-access
class LSTMFusedCellTest(tf.test.TestCase):
_use_gpu = False
def testNoneDimsWithDynamicRNN(self):
with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()) as sess:
batch_size = 4
num_steps = 5
input_dim = 6
cell_size = 7
cell = tf.contrib.rnn.LSTMFusedCell(cell_size)
x = tf.placeholder(tf.float32, shape=(None, None, input_dim))
output, _ = tf.nn.dynamic_rnn(cell, x, time_major=True, dtype=tf.float32)
sess.run(tf.initialize_all_variables())
feed = {}
feed[x] = np.random.randn(num_steps, batch_size, input_dim)
sess.run(output, feed)
def testLSTMFusedCell(self):
with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()) as sess:
with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
x = tf.zeros([1, 2])
m0 = tf.zeros([1, 2])
m1 = tf.zeros([1, 2])
m2 = tf.zeros([1, 2])
m3 = tf.zeros([1, 2])
g, ((out_m0, out_m1), (out_m2, out_m3)) = tf.nn.rnn_cell.MultiRNNCell(
[tf.contrib.rnn.LSTMFusedCell(2)] * 2,
state_is_tuple=True)(x, ((m0, m1), (m2, m3)))
sess.run([tf.initialize_all_variables()])
res = sess.run([g, out_m0, out_m1, out_m2, out_m3],
{x.name: np.array([[1., 1.]]),
m0.name: 0.1 * np.ones([1, 2]),
m1.name: 0.1 * np.ones([1, 2]),
m2.name: 0.1 * np.ones([1, 2]),
m3.name: 0.1 * np.ones([1, 2])})
self.assertEqual(len(res), 5)
self.assertAllClose(res[0], [[0.24024698, 0.24024698]])
# These numbers are from testBasicLSTMCell and only test c/h.
self.assertAllClose(res[1], [[0.68967271, 0.68967271]])
self.assertAllClose(res[2], [[0.44848421, 0.44848421]])
self.assertAllClose(res[3], [[0.39897051, 0.39897051]])
self.assertAllClose(res[4], [[0.24024698, 0.24024698]])
def testLSTMBasicToBlockCell(self):
with self.test_session(use_gpu=self._use_gpu) as sess:
x = tf.zeros([1, 2])
x_values = np.random.randn(1, 2)
m0_val = 0.1 * np.ones([1, 2])
m1_val = -0.1 * np.ones([1, 2])
m2_val = -0.2 * np.ones([1, 2])
m3_val = 0.2 * np.ones([1, 2])
initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890212)
with tf.variable_scope("basic", initializer=initializer):
m0 = tf.zeros([1, 2])
m1 = tf.zeros([1, 2])
m2 = tf.zeros([1, 2])
m3 = tf.zeros([1, 2])
g, ((out_m0, out_m1), (out_m2, out_m3)) = tf.nn.rnn_cell.MultiRNNCell(
[tf.nn.rnn_cell.BasicLSTMCell(2, state_is_tuple=True)] * 2,
state_is_tuple=True)(x, ((m0, m1), (m2, m3)))
sess.run([tf.initialize_all_variables()])
basic_res = sess.run([g, out_m0, out_m1, out_m2, out_m3],
{x.name: x_values,
m0.name: m0_val,
m1.name: m1_val,
m2.name: m2_val,
m3.name: m3_val})
with tf.variable_scope("block", initializer=initializer):
m0 = tf.zeros([1, 2])
m1 = tf.zeros([1, 2])
m2 = tf.zeros([1, 2])
m3 = tf.zeros([1, 2])
g, ((out_m0, out_m1), (out_m2, out_m3)) = tf.nn.rnn_cell.MultiRNNCell(
[tf.contrib.rnn.LSTMFusedCell(2)] * 2,
state_is_tuple=True)(x, ((m0, m1), (m2, m3)))
sess.run([tf.initialize_all_variables()])
block_res = sess.run([g, out_m0, out_m1, out_m2, out_m3],
{x.name: x_values,
m0.name: m0_val,
m1.name: m1_val,
m2.name: m2_val,
m3.name: m3_val})
self.assertEqual(len(basic_res), len(block_res))
for basic, block in zip(basic_res, block_res):
self.assertAllClose(basic, block)
def testLSTMBasicToBlockCellPeeping(self):
with self.test_session(use_gpu=self._use_gpu) as sess:
x = tf.zeros([1, 2])
x_values = np.random.randn(1, 2)
m0_val = 0.1 * np.ones([1, 2])
m1_val = -0.1 * np.ones([1, 2])
m2_val = -0.2 * np.ones([1, 2])
m3_val = 0.2 * np.ones([1, 2])
initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890212)
with tf.variable_scope("basic", initializer=initializer):
m0 = tf.zeros([1, 2])
m1 = tf.zeros([1, 2])
m2 = tf.zeros([1, 2])
m3 = tf.zeros([1, 2])
g, ((out_m0, out_m1), (out_m2, out_m3)) = tf.nn.rnn_cell.MultiRNNCell(
[tf.nn.rnn_cell.LSTMCell(2,
use_peepholes=True,
state_is_tuple=True)] * 2,
state_is_tuple=True)(x, ((m0, m1), (m2, m3)))
sess.run([tf.initialize_all_variables()])
basic_res = sess.run([g, out_m0, out_m1, out_m2, out_m3],
{x.name: x_values,
m0.name: m0_val,
m1.name: m1_val,
m2.name: m2_val,
m3.name: m3_val})
with tf.variable_scope("block", initializer=initializer):
m0 = tf.zeros([1, 2])
m1 = tf.zeros([1, 2])
m2 = tf.zeros([1, 2])
m3 = tf.zeros([1, 2])
g, ((out_m0, out_m1), (out_m2, out_m3)) = tf.nn.rnn_cell.MultiRNNCell(
[tf.contrib.rnn.LSTMFusedCell(2, use_peephole=True)] * 2,
state_is_tuple=True)(x, ((m0, m1), (m2, m3)))
sess.run([tf.initialize_all_variables()])
block_res = sess.run([g, out_m0, out_m1, out_m2, out_m3],
{x.name: x_values,
m0.name: m0_val,
m1.name: m1_val,
m2.name: m2_val,
m3.name: m3_val})
self.assertEqual(len(basic_res), len(block_res))
for basic, block in zip(basic_res, block_res):
self.assertAllClose(basic, block)
def testLSTMBasicToBlock(self):
with self.test_session(use_gpu=self._use_gpu) as sess:
batch_size = 2
input_size = 3
cell_size = 4
sequence_length = 5
inputs = []
for _ in range(sequence_length):
inp = tf.convert_to_tensor(
np.random.randn(batch_size, input_size),
dtype=tf.float32)
inputs.append(inp)
initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890212)
with tf.variable_scope("basic", initializer=initializer):
cell = tf.nn.rnn_cell.BasicLSTMCell(cell_size, state_is_tuple=True)
outputs, _ = tf.nn.rnn(cell, inputs, dtype=tf.float32)
sess.run([tf.initialize_all_variables()])
basic_outputs = sess.run(outputs)
basic_grads = sess.run(tf.gradients(outputs, inputs))
basic_wgrads = sess.run(tf.gradients(outputs, tf.trainable_variables()))
with tf.variable_scope("block", initializer=initializer):
w = tf.get_variable("w",
shape=[input_size + cell_size, cell_size * 4],
dtype=tf.float32)
b = tf.get_variable("b",
shape=[cell_size * 4],
dtype=tf.float32,
initializer=tf.zeros_initializer)
_, _, _, _, _, _, outputs = fused_lstm(
tf.convert_to_tensor(sequence_length,
dtype=tf.int64),
inputs,
w,
b,
cell_clip=0)
sess.run([tf.initialize_all_variables()])
block_outputs = sess.run(outputs)
block_grads = sess.run(tf.gradients(outputs, inputs))
block_wgrads = sess.run(tf.gradients(outputs, [w, b]))
self.assertAllClose(basic_outputs, block_outputs)
self.assertAllClose(basic_grads, block_grads)
for basic, block in zip(basic_wgrads, block_wgrads):
self.assertAllClose(basic, block, rtol=1e-2, atol=1e-2)
def testLSTMBasicToBlockPeeping(self):
with self.test_session(use_gpu=self._use_gpu) as sess:
batch_size = 2
input_size = 3
cell_size = 4
sequence_length = 5
inputs = []
for _ in range(sequence_length):
inp = tf.convert_to_tensor(
np.random.randn(batch_size, input_size),
dtype=tf.float32)
inputs.append(inp)
initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890212)
with tf.variable_scope("basic", initializer=initializer):
cell = tf.nn.rnn_cell.LSTMCell(cell_size,
use_peepholes=True,
state_is_tuple=True)
outputs, _ = tf.nn.rnn(cell, inputs, dtype=tf.float32)
sess.run([tf.initialize_all_variables()])
basic_outputs = sess.run(outputs)
basic_grads = sess.run(tf.gradients(outputs, inputs))
basic_wgrads = sess.run(tf.gradients(outputs, tf.trainable_variables()))
with tf.variable_scope("block", initializer=initializer):
w = tf.get_variable("w",
shape=[input_size + cell_size, cell_size * 4],
dtype=tf.float32)
b = tf.get_variable("b",
shape=[cell_size * 4],
dtype=tf.float32,
initializer=tf.zeros_initializer)
wci = tf.get_variable("wci", shape=[cell_size], dtype=tf.float32)
wcf = tf.get_variable("wcf", shape=[cell_size], dtype=tf.float32)
wco = tf.get_variable("wco", shape=[cell_size], dtype=tf.float32)
_, _, _, _, _, _, outputs = fused_lstm(
tf.convert_to_tensor(sequence_length,
dtype=tf.int64),
inputs,
w,
b,
wci=wci,
wcf=wcf,
wco=wco,
cell_clip=0,
use_peephole=True)
sess.run([tf.initialize_all_variables()])
block_outputs = sess.run(outputs)
block_grads = sess.run(tf.gradients(outputs, inputs))
block_wgrads = sess.run(tf.gradients(outputs, [w, b, wci, wcf, wco]))
self.assertAllClose(basic_outputs, block_outputs)
self.assertAllClose(basic_grads, block_grads)
for basic, block in zip(basic_wgrads, block_wgrads):
self.assertAllClose(basic, block, rtol=1e-2, atol=1e-2)
class LSTMFusedCellGpuTest(LSTMFusedCellTest):
_use_gpu = True
if __name__ == "__main__":
tf.test.main()

View File

@ -0,0 +1,456 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""LSTM Fused Cell ops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import load_library
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import rnn_cell
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.platform import resource_loader
_lstm_ops_so = load_library.load_op_library(
resource_loader.get_path_to_datafile("_lstm_ops.so"))
assert _lstm_ops_so, "Could not load _lstm_ops.so."
# pylint: disable=invalid-name
def _lstm_fused_cell(x,
cs_prev,
h_prev,
w,
b,
wci=None,
wcf=None,
wco=None,
forget_bias=None,
cell_clip=None,
use_peephole=None,
name=None):
r"""Computes the LSTM cell forward propagation for 1 time step.
This implementation uses 1 weight matrix and 1 bias vector, there is no
diagonal peephole connection.
This kernel op implements the following mathematical equations:
```python
xh = [x, h_prev]
[i, f, ci, o] = xh * w + b
f = f + forget_bias
i = sigmoid(i)
f = sigmoid(f)
ci = tanh(ci)
o = sigmoid(o)
cs = ci .* i + cs_prev .* f
co = tanh(cs)
h = co .* o
```
Args:
x: A `Tensor`. Must be one of the following types: `float32`, `float64`.
The input to the LSTM cell.
cs_prev: A `Tensor`. Must have the same type as `x`.
h_prev: A `Tensor`. Must have the same type as `x`.
w: A `Tensor`. Must have the same type as `x`. The weight matrix.
b: A `Tensor`. Must have the same type as `x`. The bias vector.
wci: A `Tensor`. Must have the same type as `x`.
wcf: A `Tensor`. Must have the same type as `x`.
wco: A `Tensor`. Must have the same type as `x`.
forget_bias: An optional `float`. Defaults to `1`. The forget gate bias.
cell_clip: An optional `float`. Defaults to `3`.
use_peephole: An optional `bool`. Defaults to `False`.
name: A name for the operation (optional).
Returns:
A tuple of `Tensor` objects (i, cs, f, o, ci, co, h).
i: A `Tensor`. Has the same type as `x`. The input gate.
cs: A `Tensor`. Has the same type as `x`. The cell state before the tanh.
f: A `Tensor`. Has the same type as `x`. The forget gate.
o: A `Tensor`. Has the same type as `x`. The output gate.
ci: A `Tensor`. Has the same type as `x`. The cell input.
co: A `Tensor`. Has the same type as `x`. The cell after the tanh.
h: A `Tensor`. Has the same type as `x`. The output h vector.
Raises:
ValueError: If cell_size is None.
"""
if wci is None:
cell_size = cs_prev.get_shape().with_rank(2)[1].value
if cell_size is None:
raise ValueError("cell_size from `cs_prev` should not be None.")
wci = array_ops.constant(0, dtype=dtypes.float32, shape=[cell_size])
wco = wci
wcf = wci
# pylint: disable=protected-access
return _lstm_ops_so.lstm_fused_cell(x=x,
cs_prev=cs_prev,
h_prev=h_prev,
w=w,
wci=wci,
wco=wco,
wcf=wcf,
b=b,
forget_bias=forget_bias,
cell_clip=cell_clip,
use_peephole=use_peephole,
name=name)
# pylint: enable=protected-access
def _fused_lstm(seq_len_max,
x,
w,
b,
cs_prev=None,
h_prev=None,
wci=None,
wcf=None,
wco=None,
forget_bias=None,
cell_clip=None,
use_peephole=None,
name=None):
r"""TODO(williamchan): add doc.
Args:
seq_len_max: A `Tensor` of type `int64`.
x: A list of at least 1 `Tensor` objects of the same type in: `float32`.
w: A `Tensor`. Must have the same type as `x`.
b: A `Tensor`. Must have the same type as `x`.
cs_prev: A `Tensor`. Must have the same type as `x`.
h_prev: A `Tensor`. Must have the same type as `x`.
wci: A `Tensor`. Must have the same type as `x`.
wcf: A `Tensor`. Must have the same type as `x`.
wco: A `Tensor`. Must have the same type as `x`.
forget_bias: An optional `float`. Defaults to `1`.
cell_clip: An optional `float`. Defaults to `3`.
use_peephole: An optional `bool`. Defaults to `False`.
name: A name for the operation (optional).
Returns:
A tuple of `Tensor` objects (i, cs, f, o, ci, co, h).
i: A list with the same number of `Tensor` objects as `x` of `Tensor`
objects of the same type as x.
cs: A list with the same number of `Tensor` objects as `x` of `Tensor`
objects of the same type as x.
f: A list with the same number of `Tensor` objects as `x` of `Tensor`
objects of the same type as x.
o: A list with the same number of `Tensor` objects as `x` of `Tensor`
objects of the same type as x.
ci: A list with the same number of `Tensor` objects as `x` of `Tensor`
objects of the same type as x.
co: A list with the same number of `Tensor` objects as `x` of `Tensor`
objects of the same type as x.
h: A list with the same number of `Tensor` objects as `x` of `Tensor`
objects of the same type as x.
Raises:
ValueError: If `b` does not have a valid shape.
"""
batch_size = x[0].get_shape().with_rank(2)[0].value
cell_size4 = b.get_shape().with_rank(1)[0].value
if cell_size4 is None:
raise ValueError("`b` shape must not be None.")
cell_size = cell_size4 / 4
zero_state = None
if cs_prev is None or h_prev is None:
zero_state = array_ops.constant(0,
dtype=dtypes.float32,
shape=[batch_size, cell_size])
if cs_prev is None:
cs_prev = zero_state
if h_prev is None:
h_prev = zero_state
if wci is None:
wci = array_ops.constant(0, dtype=dtypes.float32, shape=[cell_size])
wco = wci
wcf = wci
# pylint: disable=protected-access
return _lstm_ops_so.fused_lstm(seq_len_max=seq_len_max,
x=x,
cs_prev=cs_prev,
h_prev=h_prev,
w=w,
wci=wci,
wco=wco,
wcf=wcf,
b=b,
forget_bias=forget_bias,
cell_clip=cell_clip,
name=name,
use_peephole=use_peephole)
# pylint: enable=protected-access
# pylint: enable=invalid-name
ops.RegisterShape("LSTMFusedCell")(None)
_lstm_fused_cell_grad_outputs = ["cs_prev_grad", "dicfo"]
@ops.RegisterShape("LSTMFusedCell")
def _LSTMFusedCellShape(op):
batch_size = op.inputs[0].get_shape().with_rank(2)[0].value
cell_size = op.inputs[1].get_shape().with_rank(2)[1].value
return (tensor_shape.TensorShape([batch_size, cell_size]),
tensor_shape.TensorShape([batch_size, cell_size]),
tensor_shape.TensorShape([batch_size, cell_size]),
tensor_shape.TensorShape([batch_size, cell_size]),
tensor_shape.TensorShape([batch_size, cell_size]),
tensor_shape.TensorShape([batch_size, cell_size]),
tensor_shape.TensorShape([batch_size, cell_size]))
@ops.RegisterGradient("LSTMFusedCell")
def _LSTMFusedCellGrad(op, *grad):
"""Gradient for LSTMFusedCell."""
(x, cs_prev, h_prev, w, wci, wco, wcf, b) = op.inputs
(i, cs, f, o, ci, co, _) = op.outputs
(_, cs_grad, _, _, _, _, h_grad) = grad
batch_size = x.get_shape().with_rank(2)[0].value
if batch_size is None:
batch_size = -1
input_size = x.get_shape().with_rank(2)[1].value
if input_size is None:
raise ValueError("input_size from `x` should not be None.")
cell_size = cs_prev.get_shape().with_rank(2)[1].value
if cell_size is None:
raise ValueError("cell_size from `cs_prev` should not be None.")
(cs_prev_grad, dicfo, wci_grad, wcf_grad,
wco_grad) = _lstm_ops_so.lstm_fused_cell_grad(
x,
cs_prev,
h_prev,
w,
wci,
wcf,
wco,
b,
i,
cs,
f,
o,
ci,
co,
cs_grad,
h_grad,
use_peephole=op.get_attr("use_peephole"))
# Backprop from dicfo to xh.
xh_grad = math_ops.matmul(dicfo, w, transpose_b=True)
x_grad = array_ops.slice(xh_grad, (0, 0), (batch_size, input_size))
x_grad.get_shape().merge_with(x.get_shape())
h_prev_grad = array_ops.slice(xh_grad, (0, input_size),
(batch_size, cell_size))
h_prev_grad.get_shape().merge_with(h_prev.get_shape())
# Backprop from dicfo to w.
xh = array_ops.concat(1, [x, h_prev])
w_grad = math_ops.matmul(xh, dicfo, transpose_a=True)
w_grad.get_shape().merge_with(w.get_shape())
# Backprop from dicfo to b.
b_grad = nn_ops.bias_add_grad(dicfo)
b_grad.get_shape().merge_with(b.get_shape())
return (x_grad, cs_prev_grad, h_prev_grad, w_grad, wci_grad, wcf_grad,
wco_grad, b_grad)
@ops.RegisterShape("LSTMFusedCellGrad")
def _LSTMFusedCellGradShape(op):
batch_size = op.inputs[0].get_shape().with_rank(2)[0].value
cell_size = op.inputs[1].get_shape().with_rank(2)[1].value
return [tensor_shape.TensorShape([batch_size, cell_size]),
tensor_shape.TensorShape([batch_size, cell_size * 4]),
tensor_shape.TensorShape([cell_size]),
tensor_shape.TensorShape([cell_size]),
tensor_shape.TensorShape([cell_size])]
@ops.RegisterShape("FusedLSTM")
def _FusedLSTMShape(op):
max_len = op.get_attr("max_len")
x = op.inputs[1]
b = op.inputs[-1]
batch_size = x.get_shape().with_rank(2)[0].value
cell_size = b.get_shape().with_rank(1)[0].value / 4
return [tensor_shape.TensorShape([batch_size, cell_size])] * max_len * 7
@ops.RegisterGradient("FusedLSTM")
def _FusedLSTMGrad(op, *grad):
"""Gradient for FusedLSTM."""
max_len = op.get_attr("max_len")
seq_len_max = op.inputs[0]
x = op.inputs[1:1 + max_len]
cs_prev = op.inputs[-7]
h_prev = op.inputs[-6]
w = op.inputs[-5]
wci = op.inputs[-4]
wco = op.inputs[-3]
wcf = op.inputs[-2]
b = op.inputs[-1]
i = op.outputs[0 * max_len:1 * max_len]
cs = op.outputs[1 * max_len:2 * max_len]
f = op.outputs[2 * max_len:3 * max_len]
o = op.outputs[3 * max_len:4 * max_len]
ci = op.outputs[4 * max_len:5 * max_len]
co = op.outputs[5 * max_len:6 * max_len]
h = op.outputs[6 * max_len:7 * max_len]
cs_grad = grad[-max_len * 2:-max_len]
h_grad = grad[-max_len:]
(x_grad, cs_prev_grad, h_prev_grad, w_grad, wci_grad, wco_grad, wcf_grad,
b_grad) = _lstm_ops_so.fused_lstm_grad(
seq_len_max,
x,
cs_prev,
h_prev,
w,
wci,
wco,
wcf,
b,
i,
cs,
f,
o,
ci,
co,
h,
cs_grad,
h_grad,
use_peephole=op.get_attr("use_peephole"))
return [None] + x_grad + [cs_prev_grad, h_prev_grad, w_grad, wci_grad,
wco_grad, wcf_grad, b_grad]
@ops.RegisterShape("FusedLSTMGrad")
def _FusedLSTMGradShape(op):
"""Shape for FusedLSTM."""
max_len = op.get_attr("max_len")
x = op.inputs[1]
cs_prev = op.inputs[1 + max_len]
h_prev = op.inputs[2 + max_len]
w = op.inputs[3 + max_len]
wci = op.inputs[4 + max_len]
wco = op.inputs[5 + max_len]
wcf = op.inputs[6 + max_len]
b = op.inputs[7 + max_len]
x_shape = x.get_shape().with_rank(2)
cs_prev_shape = cs_prev.get_shape().with_rank(2)
h_prev_shape = h_prev.get_shape().with_rank(2)
w_shape = w.get_shape().with_rank(2)
wci_shape = wci.get_shape().with_rank(1)
wco_shape = wco.get_shape().with_rank(1)
wcf_shape = wcf.get_shape().with_rank(1)
b_shape = b.get_shape().with_rank(1)
return [x_shape] * max_len + [cs_prev_shape, h_prev_shape, w_shape, wci_shape,
wco_shape, wcf_shape, b_shape]
class LSTMFusedCell(rnn_cell.RNNCell):
"""Basic LSTM recurrent network cell.
The implementation is based on: http://arxiv.org/abs/1409.2329.
We add forget_bias (default: 1) to the biases of the forget gate in order to
reduce the scale of forgetting in the beginning of the training.
Unlike BasicLSTMCell, this is a monolithic op and should be much faster. The
weight and bias matrixes should be compatible as long as the variabel scope
matches.
"""
def __init__(self, num_units, forget_bias=1.0, use_peephole=False):
"""Initialize the basic LSTM cell.
Args:
num_units: int, The number of units in the LSTM cell.
forget_bias: float, The bias added to forget gates (see above).
use_peephole: Whether to use peephole connectios or not.
"""
self._num_units = num_units
self._forget_bias = forget_bias
self._use_peephole = use_peephole
@property
def state_size(self):
return (self._num_units,) * 2
@property
def output_size(self):
return self._num_units
def __call__(self, x, states_prev, scope=None):
"""Long short-term memory cell (LSTM)."""
with vs.variable_scope(scope or type(self).__name__):
x_shape = x.get_shape().with_rank(2)
if not x_shape[1]:
raise ValueError("Expecting x_shape[1] to be sets: %s" % str(x_shape))
if len(states_prev) != 2:
raise ValueError("Expecting states_prev to be a tuple with length 2.")
input_size = x_shape[1]
w = vs.get_variable("W", [input_size + self._num_units,
self._num_units * 4])
b = vs.get_variable("b", [w.get_shape().with_rank(2)[1]],
initializer=init_ops.constant_initializer(0.0))
wci = vs.get_variable("wci", [self._num_units])
wco = vs.get_variable("wco", [self._num_units])
wcf = vs.get_variable("wcf", [self._num_units])
(cs_prev, h_prev) = states_prev
(_, cs, _, _, _, _, h) = _lstm_fused_cell(x,
cs_prev,
h_prev,
w,
b,
wci=wci,
wco=wco,
wcf=wcf,
forget_bias=self._forget_bias,
use_peephole=self._use_peephole)
return (h, (cs, h))

View File

@ -27,12 +27,6 @@ from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import rnn_cell
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops.math_ops import reduce_sum
from tensorflow.python.ops.math_ops import sigmoid
from tensorflow.python.ops.math_ops import tanh
from tensorflow.python.ops.nn_ops import conv2d
from tensorflow.python.ops.nn_ops import softmax
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.util import nest
@ -104,7 +98,7 @@ class CoupledInputForgetGateLSTMCell(rnn_cell.RNNCell):
initializer=None, num_proj=None, proj_clip=None,
num_unit_shards=1, num_proj_shards=1,
forget_bias=1.0, state_is_tuple=False,
activation=tanh):
activation=math_ops.tanh):
"""Initialize the parameters for an LSTM cell.
Args:
@ -188,6 +182,8 @@ class CoupledInputForgetGateLSTMCell(rnn_cell.RNNCell):
ValueError: If input size cannot be inferred from inputs via
static shape inference.
"""
sigmoid = math_ops.sigmoid
num_proj = self._num_units if self._num_proj is None else self._num_proj
if self._state_is_tuple:
@ -322,6 +318,8 @@ class TimeFreqLSTMCell(rnn_cell.RNNCell):
ValueError: if an input_size was specified and the provided inputs have
a different dimension.
"""
sigmoid = math_ops.sigmoid
tanh = math_ops.tanh
freq_inputs = self._make_tf_features(inputs)
dtype = inputs.dtype
@ -489,6 +487,8 @@ class GridLSTMCell(rnn_cell.RNNCell):
ValueError: if an input_size was specified and the provided inputs have
a different dimension.
"""
sigmoid = math_ops.sigmoid
tanh = math_ops.tanh
freq_inputs = self._make_tf_features(inputs)
dtype = inputs.dtype
@ -771,6 +771,11 @@ class AttentionCellWrapper(rnn_cell.RNNCell):
return output, new_state
def _attention(self, query, attn_states):
conv2d = nn_ops.conv2d
reduce_sum = math_ops.reduce_sum
softmax = nn_ops.softmax
tanh = math_ops.tanh
with vs.variable_scope("Attention"):
k = vs.get_variable("AttnW", [1, 1, self._attn_size, self._attn_vec_size])
v = vs.get_variable("AttnV", [self._attn_vec_size])

View File

@ -570,6 +570,7 @@ filegroup(
name = "android_srcs",
srcs = [
":proto_text_srcs_all",
"//tensorflow/core/debug:android_srcs",
"//tensorflow/core/kernels:android_srcs",
"//tensorflow/core/platform/default/build_config:android_srcs",
"//tensorflow/core/util/ctc:android_srcs",
@ -580,8 +581,6 @@ filegroup(
"client/**/*.cc",
"common_runtime/**/*.h",
"common_runtime/**/*.cc",
"debug/**/*.h",
"debug/**/*.cc",
"framework/**/*.h",
"framework/**/*.cc",
"graph/**/*.h",
@ -1103,49 +1102,13 @@ tf_cuda_library(
linkstatic = 1,
deps = [
":core_cpu_internal",
":debug_graph_utils",
":framework",
":gpu_tracer",
":lib",
":lib_internal",
":proto_text",
":protos_all_cc",
],
alwayslink = 1,
)
tf_cuda_library(
name = "debug_gateway_internal",
srcs = ["debug/debug_gateway.cc"],
hdrs = ["debug/debug_gateway.h"],
copts = tf_copts(),
linkstatic = 1,
deps = [
":core_cpu_internal",
":direct_session_internal",
":framework",
":gpu_tracer",
":lib",
":lib_internal",
":proto_text",
":protos_all_cc",
],
alwayslink = 1,
)
tf_cuda_library(
name = "debug_graph_utils",
srcs = ["debug/debug_graph_utils.cc"],
hdrs = ["debug/debug_graph_utils.h"],
copts = tf_copts(),
linkstatic = 1,
deps = [
":core_cpu_internal",
":framework",
":lib",
":lib_internal",
":proto_text",
":protos_all_cc",
"//tensorflow/core/debug:debug_graph_utils",
],
alwayslink = 1,
)
@ -1604,35 +1567,6 @@ tf_cc_test(
],
)
tf_cc_test_gpu(
name = "debug/debug_gateway_test",
size = "small",
args = ["--heap_check=local"],
linkstatic = tf_kernel_tests_linkstatic(),
tags = tf_cuda_tests_tags() + ["nomac"],
deps = [
":all_kernels",
":core_cpu",
":core_cpu_internal",
":debug_gateway_internal",
":debug_graph_utils",
":direct_session",
":direct_session_internal",
":framework",
":framework_internal",
":gpu_runtime",
":lib",
":lib_internal",
":protos_all_cc",
":test",
":test_main",
":testlib",
"//tensorflow/cc:cc_ops",
"//tensorflow/core/kernels:debug_ops",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "common_runtime/direct_session_with_tracking_alloc_test",
size = "small",

157
tensorflow/core/debug/BUILD Normal file
View File

@ -0,0 +1,157 @@
# Description:
# TensorFlow Debugger (tfdbg).
#
# Public Android targets:
# filegroup ":android_srcs" - Debugger source files for Android.
package(
default_visibility = ["//tensorflow:internal"],
features = ["-parse_headers"],
)
licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"])
load(
"//tensorflow:tensorflow.bzl",
"tf_copts",
"tf_cc_test",
"tf_cuda_library",
)
load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu")
# For platform specific build config
load(
"//tensorflow/core:platform/default/build_config.bzl",
"tf_kernel_tests_linkstatic",
)
load(
"//tensorflow/core:platform/default/build_config_root.bzl",
"tf_cuda_tests_tags",
)
tf_cuda_library(
name = "debug_gateway_internal",
srcs = ["debug_gateway.cc"],
hdrs = ["debug_gateway.h"],
copts = tf_copts(),
linkstatic = 1,
deps = [
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:direct_session_internal",
"//tensorflow/core:framework",
"//tensorflow/core:gpu_tracer",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:proto_text",
"//tensorflow/core:protos_all_cc",
],
alwayslink = 1,
)
tf_cuda_library(
name = "debug_graph_utils",
srcs = ["debug_graph_utils.cc"],
hdrs = ["debug_graph_utils.h"],
copts = tf_copts(),
linkstatic = 1,
deps = [
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:proto_text",
"//tensorflow/core:protos_all_cc",
],
alwayslink = 1,
)
tf_cuda_library(
name = "debug_io_utils",
srcs = ["debug_io_utils.cc"],
hdrs = ["debug_io_utils.h"],
copts = tf_copts(),
linkstatic = 1,
deps = [
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:proto_text",
"//tensorflow/core:protos_all_cc",
],
alwayslink = 1,
)
tf_cc_test_gpu(
name = "debug_gateway_test",
size = "small",
args = ["--heap_check=local"],
linkstatic = tf_kernel_tests_linkstatic(),
tags = tf_cuda_tests_tags() + ["nomac"],
deps = [
":debug_gateway_internal",
":debug_graph_utils",
"//tensorflow/cc:cc_ops",
"//tensorflow/core:all_kernels",
"//tensorflow/core:core_cpu",
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:direct_session",
"//tensorflow/core:direct_session_internal",
"//tensorflow/core:framework",
"//tensorflow/core:framework_internal",
"//tensorflow/core:gpu_runtime",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:debug_ops",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "debug_io_utils_test",
size = "small",
linkstatic = tf_kernel_tests_linkstatic(),
deps = [
":debug_io_utils",
"//tensorflow/core:core_cpu",
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
"//tensorflow/core:framework_internal",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
],
)
filegroup(
name = "android_srcs",
srcs = [
"debug_graph_utils.cc",
"debug_graph_utils.h",
],
visibility = ["//visibility:public"],
)
# -----------------------------------------------------------------------------
# Google-internal targets. These must be at the end for syncrepo.
filegroup(
name = "all_files",
srcs = glob(
["**/*"],
exclude = [
"**/METADATA",
"**/OWNERS",
],
),
visibility = ["//tensorflow:__subpackages__"],
)

View File

@ -36,6 +36,8 @@ Status DebugNodeInserter::InsertNodes(
// A map from tensor name (e.g., "node_a:0") to list of debug op names
// (e.g., {"DebugIdentity", "DebugNanCount"})
std::unordered_map<string, std::vector<string>> tensor_watches;
// A map from tensor name to debug_url.
std::unordered_map<string, std::vector<string>> tensor_watch_urls;
// Cache the proto content for fast lookup later
for (const DebugTensorWatch& watch : watches) {
@ -58,6 +60,12 @@ Status DebugNodeInserter::InsertNodes(
}
tensor_watches[tensor_name] = debug_ops;
std::vector<string> urls;
for (const string& url : watch.debug_urls()) {
urls.push_back(url);
}
tensor_watch_urls[tensor_name] = urls;
}
if (tensor_watches.empty()) {
@ -150,9 +158,9 @@ Status DebugNodeInserter::InsertNodes(
const string& debug_op_name = tensor_watches[tensor_name][i];
Node* debug_node;
Status debug_s =
CreateDebugNode(graph, device_type, copy_node->name(), src_dt,
tensor_name, i, debug_op_name, &debug_node);
Status debug_s = CreateDebugNode(
graph, device_type, copy_node->name(), src_dt, tensor_name,
tensor_watch_urls[tensor_name], i, debug_op_name, &debug_node);
if (!debug_s.ok()) {
return Status(
error::FAILED_PRECONDITION,
@ -267,17 +275,17 @@ Status DebugNodeInserter::CreateCopyNode(
Status DebugNodeInserter::CreateDebugNode(
Graph* graph, const DeviceType device_type,
const string& src_copy_node_name, const DataType src_dt,
const string& tensor_name, const int debug_op_num,
const string& debug_op_name, Node** debug_node) {
const string& tensor_name, const std::vector<string>& debug_urls,
const int debug_op_num, const string& debug_op_name, Node** debug_node) {
NodeDef node_def;
const KernelDef* kdef;
const string debug_node_name =
GetDebugNodeName(tensor_name, debug_op_num, debug_op_name);
// TODO(cais): Hook up with DebugTensorWatch proto
auto builder = NodeDefBuilder(debug_node_name, debug_op_name)
.Input(src_copy_node_name, 0, src_dt)
.Attr("tensor_name", tensor_name);
.Attr("tensor_name", tensor_name)
.Attr("debug_urls", debug_urls);
if (!builder.Finalize(&node_def).ok()) {
return Status(

View File

@ -94,6 +94,7 @@ class DebugNodeInserter {
const string& src_copy_node_name,
const DataType src_dt,
const string& tensor_name,
const std::vector<string>& debug_urls,
const int debug_op_num,
const string& debug_op_name, Node** debug_node);
};

View File

@ -0,0 +1,211 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/debug/debug_io_utils.h"
#include <vector>
#include "tensorflow/core/framework/summary.pb.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/util/event.pb.h"
namespace tensorflow {
namespace {
// Encapsulate the tensor value inside a Summary proto, and then inside an
// Event proto.
Event WrapTensorAsEvent(const string& tensor_name, const string& debug_op,
const Tensor& tensor, const uint64 wall_time_us) {
Event event;
event.set_wall_time(static_cast<double>(wall_time_us));
Summary::Value* summ_val = event.mutable_summary()->add_value();
// Create the debug node_name in the Summary proto.
// For example, if tensor_name = "foo/node_a:0", and the debug_op is
// "DebugIdentity", the debug node_name in the Summary proto will be
// "foo/node_a:0:DebugIdentity".
const string debug_node_name = strings::StrCat(tensor_name, ":", debug_op);
summ_val->set_node_name(debug_node_name);
if (tensor.dtype() == DT_STRING) {
// Treat DT_STRING specially, so that tensor_util.MakeNdarray can convert
// the TensorProto to string-type numpy array. MakeNdarray does not work
// with strings encoded by AsProtoTensorContent() in tensor_content.
tensor.AsProtoField(summ_val->mutable_tensor());
} else {
tensor.AsProtoTensorContent(summ_val->mutable_tensor());
}
return event;
}
} // namespace
// static
const char* const DebugIO::kFileURLScheme = "file://";
// static
const char* const DebugIO::kGrpcURLScheme = "grpc://";
Status DebugIO::PublishDebugTensor(const string& tensor_name,
const string& debug_op, const Tensor& tensor,
const uint64 wall_time_us,
const gtl::ArraySlice<string>& debug_urls) {
// Split the tensor_name into node name and output slot index.
std::vector<string> name_items = str_util::Split(tensor_name, ':');
string node_name;
int32 output_slot = 0;
if (name_items.size() == 2) {
node_name = name_items[0];
if (!strings::safe_strto32(name_items[1], &output_slot)) {
return Status(error::INVALID_ARGUMENT,
strings::StrCat("Invalid string value for output_slot: \"",
name_items[1], "\""));
}
} else if (name_items.size() == 1) {
node_name = name_items[0];
} else {
return Status(
error::INVALID_ARGUMENT,
strings::StrCat("Failed to parse tensor name: \"", tensor_name, "\""));
}
int num_failed_urls = 0;
for (const string& url : debug_urls) {
if (str_util::Lowercase(url).find(kFileURLScheme) == 0) {
const string dump_root_dir = url.substr(strlen(kFileURLScheme));
Status s =
DebugFileIO::DumpTensorToDir(node_name, output_slot, debug_op, tensor,
wall_time_us, dump_root_dir, nullptr);
if (!s.ok()) {
num_failed_urls++;
}
} else if (str_util::Lowercase(url).find(kGrpcURLScheme) == 0) {
// TODO(cais): Implement PublishTensor with grpc urls.
return Status(error::UNIMPLEMENTED,
strings::StrCat("Puslishing to GRPC debug target is not ",
"implemented yet"));
} else {
return Status(error::UNAVAILABLE,
strings::StrCat("Invalid debug target URL: ", url));
}
}
if (num_failed_urls == 0) {
return Status::OK();
} else {
return Status(
error::INTERNAL,
strings::StrCat("Puslishing to ", num_failed_urls, " of ",
debug_urls.size(), " debug target URLs failed"));
}
}
// static
Status DebugFileIO::DumpTensorToDir(
const string& node_name, const int32 output_slot, const string& debug_op,
const Tensor& tensor, const uint64 wall_time_us,
const string& dump_root_dir, string* dump_file_path) {
const string file_path = GetDumpFilePath(dump_root_dir, node_name,
output_slot, debug_op, wall_time_us);
if (dump_file_path != nullptr) {
*dump_file_path = file_path;
}
return DumpTensorToEventFile(node_name, output_slot, debug_op, tensor,
wall_time_us, file_path);
}
// static
string DebugFileIO::GetDumpFilePath(const string& dump_root_dir,
const string& node_name,
const int32 output_slot,
const string& debug_op,
const uint64 wall_time_us) {
return io::JoinPath(
dump_root_dir, strings::StrCat(node_name, "_", output_slot, "_", debug_op,
"_", wall_time_us));
}
// static
Status DebugFileIO::DumpTensorToEventFile(
const string& node_name, const int32 output_slot, const string& debug_op,
const Tensor& tensor, const uint64 wall_time_us, const string& file_path) {
Env* env(Env::Default());
// Create the directory if necessary.
string file_dir = io::Dirname(file_path).ToString();
Status s = DebugFileIO::RecursiveCreateDir(env, file_dir);
if (!s.ok()) {
return Status(error::FAILED_PRECONDITION,
strings::StrCat("Failed to create directory ", file_dir,
", due to: ", s.error_message()));
}
const string tensor_name = strings::StrCat(node_name, ":", output_slot);
Event event = WrapTensorAsEvent(tensor_name, debug_op, tensor, wall_time_us);
string event_str;
event.SerializeToString(&event_str);
std::unique_ptr<WritableFile> f = nullptr;
TF_CHECK_OK(env->NewWritableFile(file_path, &f));
f->Append(event_str);
TF_CHECK_OK(f->Close());
return Status::OK();
}
// static
Status DebugFileIO::RecursiveCreateDir(Env* env, const string& dir) {
if (env->FileExists(dir) && env->IsDirectory(dir).ok()) {
// The path already exists as a directory. Return OK right away.
return Status::OK();
}
string parent_dir = io::Dirname(dir).ToString();
if (!env->FileExists(parent_dir)) {
// The parent path does not exist yet, create it first.
Status s = RecursiveCreateDir(env, parent_dir); // Recursive call
if (!s.ok()) {
return Status(
error::FAILED_PRECONDITION,
strings::StrCat("Failed to create directory ", parent_dir));
}
} else if (env->FileExists(parent_dir) &&
!env->IsDirectory(parent_dir).ok()) {
// The path exists, but it is a file.
return Status(error::FAILED_PRECONDITION,
strings::StrCat("Failed to create directory ", parent_dir,
" because the path exists as a file "));
}
env->CreateDir(dir);
// Guard against potential race in creating directories by doing a check
// after the CreateDir call.
if (env->FileExists(dir) && env->IsDirectory(dir).ok()) {
return Status::OK();
} else {
return Status(error::ABORTED,
strings::StrCat("Failed to create directory ", parent_dir));
}
}
} // namespace tensorflow

View File

@ -0,0 +1,107 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_DEBUG_IO_UTILS_H_
#define TENSORFLOW_DEBUG_IO_UTILS_H_
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/gtl/array_slice.h"
#include "tensorflow/core/platform/env.h"
namespace tensorflow {
class DebugIO {
public:
// Publish a tensor to a debug target URL.
//
// Args:
// tensor_name: Name of the tensor being published: node_name followed by
// a colon, followed by the output slot index. E.g., "node_a:0".
// debug_op: Name of the debug op, e.g., "DebugIdentity".
// tensor: The Tensor object being published.
// wall_time_us: Time stamp for the Tensor. Unit: microseconds (us).
// debug_urls: An array of debug target URLs, e.g.,
// "file:///foo/tfdbg_dump", "grpc://localhot:11011"
static Status PublishDebugTensor(const string& tensor_name,
const string& debug_op, const Tensor& tensor,
const uint64 wall_time_us,
const gtl::ArraySlice<string>& debug_urls);
private:
static const char* const kFileURLScheme;
static const char* const kGrpcURLScheme;
};
// Helper class for debug ops.
class DebugFileIO {
public:
// Encapsulate the Tensor in an Event protobuf and write it to a directory.
// The actual path of the dump file will be a contactenation of
// dump_root_dir, tensor_name, along with the wall_time.
//
// For example:
// let dump_root_dir = "/tmp/tfdbg_dump",
// node_name = "foo/bar",
// output_slot = 0,
// debug_op = DebugIdentity,
// and wall_time_us = 1467891234512345,
// the dump file will be generated at path:
// /tmp/tfdbg_dump/foo/bar_0_DebugIdentity_1467891234512345.
//
// Args:
// node_name: Name of the node from which the tensor is output.
// output_slot: Output slot index.
// debug_op: Name of the debug op, e.g., "DebugIdentity".
// tensor: The Tensor object to be dumped to file.
// wall_time_us: Wall time at which the Tensor is generated during graph
// execution. Unit: microseconds (us).
// dump_root_dir: Root diretory for dumping the tensor.
// dump_file_path: The actual dump file path (passed as reference).
static Status DumpTensorToDir(const string& node_name,
const int32 output_slot, const string& debug_op,
const Tensor& tensor, const uint64 wall_time_us,
const string& dump_root_dir,
string* dump_file_path);
// Get the full path to the dump file.
//
// Args:
// dump_root_dir: The dump root directory, e.g., /tmp/tfdbg_dump
// node_name: Name of the node from which the dumped tensor is generated,
// e.g., foo/bar/node_a
// output_slot: Output slot index of the said node, e.g., 0.
// debug_op: Name of the debug op, e.g., DebugIdentity.
// wall_time_us: Time stamp of the dumped tensor, in microseconds (us).
static string GetDumpFilePath(const string& dump_root_dir,
const string& node_name,
const int32 output_slot, const string& debug_op,
const uint64 wall_time_us);
private:
// Encapsulate the Tensor in an Event protobuf and write it to file.
static Status DumpTensorToEventFile(
const string& node_name, const int32 output_slot, const string& debug_op,
const Tensor& tensor, const uint64 wall_time_us, const string& file_path);
// Implemented ad hoc here for now.
// TODO(cais): Replace with shared implementation once http://b/30497715 is
// fixed.
static Status RecursiveCreateDir(Env* env, const string& dir);
};
} // namespace tensorflow
#endif // TENSORFLOW_DEBUG_IO_UTILS_H_

View File

@ -0,0 +1,382 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/debug/debug_io_utils.h"
#include "tensorflow/core/framework/tensor_testutil.h"
#include "tensorflow/core/lib/core/notification.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/util/event.pb.h"
namespace tensorflow {
namespace {
class DebugIOUtilsTest : public ::testing::Test {
public:
void Initialize() {
env_ = Env::Default();
tensor_a_.reset(new Tensor(DT_FLOAT, TensorShape({2, 2})));
tensor_a_->flat<float>()(0) = 5.0;
tensor_a_->flat<float>()(1) = 3.0;
tensor_a_->flat<float>()(2) = -1.0;
tensor_a_->flat<float>()(3) = 0.0;
tensor_b_.reset(new Tensor(DT_STRING, TensorShape{2}));
tensor_b_->flat<string>()(0) = "corge";
tensor_b_->flat<string>()(1) = "garply";
}
Status ReadEventFromFile(const string& dump_file_path, Event* event) {
string content;
uint64 file_size = 0;
Status s = env_->GetFileSize(dump_file_path, &file_size);
if (!s.ok()) {
return s;
}
content.resize(file_size);
std::unique_ptr<RandomAccessFile> file;
s = env_->NewRandomAccessFile(dump_file_path, &file);
if (!s.ok()) {
return s;
}
StringPiece result;
s = file->Read(0, file_size, &result, &(content)[0]);
if (!s.ok()) {
return s;
}
event->ParseFromString(content);
return Status::OK();
}
Env* env_;
std::unique_ptr<Tensor> tensor_a_;
std::unique_ptr<Tensor> tensor_b_;
};
TEST_F(DebugIOUtilsTest, DumpFloatTensorToFileSunnyDay) {
Initialize();
const string test_dir = testing::TmpDir();
// Append levels of nonexisting directories, to test that the function can
// create directories.
const string kNodeName = "foo/bar/qux/tensor_a";
const string kDebugOpName = "DebugIdentity";
const int32 output_slot = 0;
uint64 wall_time = env_->NowMicros();
string dump_file_path;
TF_ASSERT_OK(DebugFileIO::DumpTensorToDir(kNodeName, output_slot,
kDebugOpName, *tensor_a_, wall_time,
test_dir, &dump_file_path));
// Read the file into a Event proto.
Event event;
TF_ASSERT_OK(ReadEventFromFile(dump_file_path, &event));
ASSERT_GE(wall_time, event.wall_time());
ASSERT_EQ(1, event.summary().value().size());
ASSERT_EQ(strings::StrCat(kNodeName, ":", output_slot, ":", kDebugOpName),
event.summary().value(0).node_name());
Tensor a_prime(DT_FLOAT);
ASSERT_TRUE(a_prime.FromProto(event.summary().value(0).tensor()));
// Verify tensor shape and value.
ASSERT_EQ(tensor_a_->shape(), a_prime.shape());
for (int i = 0; i < a_prime.flat<float>().size(); ++i) {
ASSERT_EQ(tensor_a_->flat<float>()(i), a_prime.flat<float>()(i));
}
// Tear down temporary file and directories.
int64 undeleted_files = 0;
int64 undeleted_dirs = 0;
ASSERT_TRUE(
env_->DeleteRecursively(test_dir, &undeleted_files, &undeleted_dirs)
.ok());
ASSERT_EQ(0, undeleted_files);
ASSERT_EQ(0, undeleted_dirs);
}
TEST_F(DebugIOUtilsTest, DumpStringTensorToFileSunnyDay) {
Initialize();
const string test_dir = testing::TmpDir();
const string kNodeName = "quux/grault/tensor_b";
const string kDebugOpName = "DebugIdentity";
const int32 output_slot = 1;
uint64 wall_time = env_->NowMicros();
string dump_file_name;
Status s = DebugFileIO::DumpTensorToDir(kNodeName, output_slot, kDebugOpName,
*tensor_b_, wall_time, test_dir,
&dump_file_name);
ASSERT_TRUE(s.ok());
// Read the file into a Event proto.
Event event;
TF_ASSERT_OK(ReadEventFromFile(dump_file_name, &event));
ASSERT_GE(wall_time, event.wall_time());
ASSERT_EQ(1, event.summary().value().size());
ASSERT_EQ(strings::StrCat(kNodeName, ":", output_slot, ":", kDebugOpName),
event.summary().value(0).node_name());
Tensor b_prime(DT_STRING);
ASSERT_TRUE(b_prime.FromProto(event.summary().value(0).tensor()));
// Verify tensor shape and value.
ASSERT_EQ(tensor_b_->shape(), b_prime.shape());
for (int i = 0; i < b_prime.flat<string>().size(); ++i) {
ASSERT_EQ(tensor_b_->flat<string>()(i), b_prime.flat<string>()(i));
}
// Tear down temporary file and directories.
int64 undeleted_files = 0;
int64 undeleted_dirs = 0;
ASSERT_TRUE(
env_->DeleteRecursively(test_dir, &undeleted_files, &undeleted_dirs)
.ok());
ASSERT_EQ(0, undeleted_files);
ASSERT_EQ(0, undeleted_dirs);
}
TEST_F(DebugIOUtilsTest, DumpTensorToFileCannotCreateDirectory) {
Initialize();
// First, create the file at the path.
const string test_dir = testing::TmpDir();
const string txt_file_name = strings::StrCat(test_dir, "/baz");
if (!env_->FileExists(test_dir)) {
ASSERT_TRUE(env_->CreateDir(test_dir).ok());
}
ASSERT_FALSE(env_->FileExists(txt_file_name));
std::unique_ptr<WritableFile> file;
ASSERT_TRUE(env_->NewWritableFile(txt_file_name, &file).ok());
file->Append("text in baz");
file->Flush();
file->Close();
// Verify that the path exists and that it is a file, not a directory.
ASSERT_TRUE(env_->FileExists(txt_file_name));
ASSERT_FALSE(env_->IsDirectory(txt_file_name).ok());
// Second, try to dump the tensor to a path that requires "baz" to be a
// directory, which should lead to an error.
const string kNodeName = "baz/tensor_a";
const string kDebugOpName = "DebugIdentity";
const int32 output_slot = 0;
uint64 wall_time = env_->NowMicros();
string dump_file_name;
Status s = DebugFileIO::DumpTensorToDir(kNodeName, output_slot, kDebugOpName,
*tensor_a_, wall_time, test_dir,
&dump_file_name);
ASSERT_FALSE(s.ok());
// Tear down temporary file and directories.
int64 undeleted_files = 0;
int64 undeleted_dirs = 0;
ASSERT_TRUE(
env_->DeleteRecursively(test_dir, &undeleted_files, &undeleted_dirs)
.ok());
ASSERT_EQ(0, undeleted_files);
ASSERT_EQ(0, undeleted_dirs);
}
TEST_F(DebugIOUtilsTest, PublishTensorToMultipleFileURLs) {
Initialize();
const int kNumDumpRoots = 3;
const string kNodeName = "foo/bar/qux/tensor_a";
const string kDebugOpName = "DebugIdentity";
const int32 output_slot = 0;
uint64 wall_time = env_->NowMicros();
std::vector<string> dump_roots;
std::vector<string> dump_file_paths;
std::vector<string> urls;
for (int i = 0; i < kNumDumpRoots; ++i) {
string dump_root = strings::StrCat(testing::TmpDir(), "/", i);
dump_roots.push_back(dump_root);
dump_file_paths.push_back(DebugFileIO::GetDumpFilePath(
dump_root, kNodeName, output_slot, kDebugOpName, wall_time));
urls.push_back(strings::StrCat("file://", dump_root));
}
for (int i = 1; i < kNumDumpRoots; ++i) {
ASSERT_NE(dump_roots[0], dump_roots[i]);
}
const string tensor_name = strings::StrCat(kNodeName, ":", output_slot);
const string debug_node_name =
strings::StrCat(tensor_name, ":", kDebugOpName);
Status s = DebugIO::PublishDebugTensor(tensor_name, kDebugOpName, *tensor_a_,
wall_time, urls);
ASSERT_TRUE(s.ok());
// Try reading the file into a Event proto.
for (int i = 0; i < kNumDumpRoots; ++i) {
// Read the file into a Event proto.
Event event;
TF_ASSERT_OK(ReadEventFromFile(dump_file_paths[i], &event));
ASSERT_GE(wall_time, event.wall_time());
ASSERT_EQ(1, event.summary().value().size());
ASSERT_EQ(debug_node_name, event.summary().value(0).node_name());
Tensor a_prime(DT_FLOAT);
ASSERT_TRUE(a_prime.FromProto(event.summary().value(0).tensor()));
// Verify tensor shape and value.
ASSERT_EQ(tensor_a_->shape(), a_prime.shape());
for (int i = 0; i < a_prime.flat<float>().size(); ++i) {
ASSERT_EQ(tensor_a_->flat<float>()(i), a_prime.flat<float>()(i));
}
}
// Tear down temporary file and directories.
for (int i = 0; i < kNumDumpRoots; ++i) {
int64 undeleted_files = 0;
int64 undeleted_dirs = 0;
ASSERT_TRUE(env_->DeleteRecursively(dump_roots[i], &undeleted_files,
&undeleted_dirs)
.ok());
ASSERT_EQ(0, undeleted_files);
ASSERT_EQ(0, undeleted_dirs);
}
}
TEST_F(DebugIOUtilsTest, PublishTensorConcurrentlyToPartiallyOverlappingPaths) {
Initialize();
const int kConcurrentPubs = 3;
const string kNodeName = "tensor_a";
const string kDebugOpName = "DebugIdentity";
const int32 kOutputSlot = 0;
thread::ThreadPool* tp =
new thread::ThreadPool(Env::Default(), "test", kConcurrentPubs);
uint64 wall_time = env_->NowMicros();
const string dump_root_base = testing::TmpDir();
const string tensor_name = strings::StrCat(kNodeName, ":", kOutputSlot);
const string debug_node_name =
strings::StrCat(tensor_name, ":", kDebugOpName);
mutex mu;
std::vector<string> dump_roots GUARDED_BY(mu);
std::vector<string> dump_file_paths GUARDED_BY(mu);
int dump_count GUARDED_BY(mu) = 0;
int done_count GUARDED_BY(mu) = 0;
Notification all_done;
auto fn = [this, &dump_count, &done_count, &mu, &dump_root_base, &dump_roots,
&dump_file_paths, &wall_time, &tensor_name, &debug_node_name,
&kNodeName, &kDebugOpName, &kConcurrentPubs, &all_done]() {
// "gumpy" is the shared directory part of the path.
string dump_root;
string debug_url;
{
mutex_lock l(mu);
dump_root =
strings::StrCat(dump_root_base, "grumpy/", "dump_", dump_count++);
dump_roots.push_back(dump_root);
dump_file_paths.push_back(DebugFileIO::GetDumpFilePath(
dump_root, kNodeName, kOutputSlot, kDebugOpName, wall_time));
debug_url = strings::StrCat("file://", dump_root);
}
std::vector<string> urls;
urls.push_back(debug_url);
Status s = DebugIO::PublishDebugTensor(tensor_name, kDebugOpName,
*tensor_a_, wall_time, urls);
ASSERT_TRUE(s.ok());
{
mutex_lock l(mu);
done_count++;
if (done_count == kConcurrentPubs) {
all_done.Notify();
}
}
};
for (int i = 0; i < kConcurrentPubs; ++i) {
tp->Schedule(fn);
}
// Wait for all dumping calls to finish.
all_done.WaitForNotification();
delete tp;
{
mutex_lock l(mu);
for (int i = 1; i < kConcurrentPubs; ++i) {
ASSERT_NE(dump_roots[0], dump_roots[i]);
}
// Try reading the file into a Event proto.
for (int i = 0; i < kConcurrentPubs; ++i) {
// Read the file into a Event proto.
Event event;
TF_ASSERT_OK(ReadEventFromFile(dump_file_paths[i], &event));
ASSERT_GE(wall_time, event.wall_time());
ASSERT_EQ(1, event.summary().value().size());
ASSERT_EQ(debug_node_name, event.summary().value(0).node_name());
Tensor a_prime(DT_FLOAT);
ASSERT_TRUE(a_prime.FromProto(event.summary().value(0).tensor()));
// Verify tensor shape and value.
ASSERT_EQ(tensor_a_->shape(), a_prime.shape());
for (int i = 0; i < a_prime.flat<float>().size(); ++i) {
ASSERT_EQ(tensor_a_->flat<float>()(i), a_prime.flat<float>()(i));
}
}
// Tear down temporary file and directories.
int64 undeleted_files = 0;
int64 undeleted_dirs = 0;
ASSERT_TRUE(env_->DeleteRecursively(dump_root_base, &undeleted_files,
&undeleted_dirs)
.ok());
ASSERT_EQ(0, undeleted_files);
ASSERT_EQ(0, undeleted_dirs);
}
}
} // namespace
} // namespace tensorflow

View File

@ -423,6 +423,7 @@ tf_kernel_libraries(
"//tensorflow/core:lib_internal",
"//tensorflow/core:proto_text",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/debug:debug_io_utils",
"//third_party/eigen3",
],
)

View File

@ -17,6 +17,7 @@ limitations under the License.
#define TENSORFLOW_KERNELS_DEBUG_OP_H_
#include "tensorflow/core/common_runtime/gpu/gpu_util.h"
#include "tensorflow/core/debug/debug_io_utils.h"
#include "tensorflow/core/framework/device_base.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor_util.h"
@ -73,10 +74,16 @@ class DebugIdentityOp : public OpKernel {
public:
explicit DebugIdentityOp(OpKernelConstruction* context) : OpKernel(context) {
OP_REQUIRES_OK(context, context->GetAttr("tensor_name", &tensor_name_));
// TODO(cais): Add debug_url
OP_REQUIRES_OK(context, context->GetAttr("debug_urls", &debug_urls_));
}
void Compute(OpKernelContext* context) override {
if (!debug_urls_.empty()) {
DebugIO::PublishDebugTensor(tensor_name_, "DebugIdentity",
context->input(0),
Env::Default()->NowMicros(), debug_urls_);
}
context->set_output(0, context->input(0));
}
@ -84,6 +91,7 @@ class DebugIdentityOp : public OpKernel {
private:
string tensor_name_;
std::vector<string> debug_urls_;
};
// NaN-counter op for debugging.
@ -92,6 +100,7 @@ class DebugNanCountOp : public OpKernel {
public:
explicit DebugNanCountOp(OpKernelConstruction* context) : OpKernel(context) {
OP_REQUIRES_OK(context, context->GetAttr("tensor_name", &tensor_name_));
OP_REQUIRES_OK(context, context->GetAttr("debug_urls", &debug_urls_));
}
void Compute(OpKernelContext* context) override {
@ -120,6 +129,7 @@ class DebugNanCountOp : public OpKernel {
private:
string tensor_name_;
std::vector<string> debug_urls_;
};
// TODO(cais): Add DebugInfinityCount

View File

@ -13,6 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <dirent.h>
#include <string.h>
#include <fstream>
#include <vector>
#include "tensorflow/core/framework/fake_input.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/node_def_builder.h"
@ -22,20 +27,32 @@ limitations under the License.
#include "tensorflow/core/kernels/ops_testutil.h"
#include "tensorflow/core/kernels/ops_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/util/event.pb.h"
namespace tensorflow {
namespace {
class DebugIdentityOpTest : public OpsTestBase {
protected:
Status Init(DataType input_type) {
Status Init(DataType input_type, const std::vector<string> debug_urls) {
env_ = Env::Default();
TF_CHECK_OK(NodeDefBuilder("op", "DebugIdentity")
.Input(FakeInput(input_type))
.Attr("tensor_name", "FakeTensor:0")
.Attr("debug_urls", debug_urls)
.Finalize(node_def()));
return InitOp();
}
Status Init(DataType input_type) {
std::vector<string> empty_debug_urls;
return Init(input_type, empty_debug_urls);
}
Env* env_;
};
TEST_F(DebugIdentityOpTest, Int32Success_6) {
@ -48,6 +65,80 @@ TEST_F(DebugIdentityOpTest, Int32Success_6) {
test::ExpectTensorEqual<int32>(expected, *GetOutput(0));
}
TEST_F(DebugIdentityOpTest, Int32Success_6_FileURLs) {
const int kNumDumpDirs = 3;
const string tmp_dir = testing::TmpDir();
std::vector<string> dump_roots;
std::vector<string> debug_urls;
for (int i = 0; i < kNumDumpDirs; ++i) {
const string dump_root = strings::StrCat(tmp_dir, "_", i);
dump_roots.push_back(dump_root);
debug_urls.push_back(strings::StrCat("file://", dump_root));
}
uint64 wall_time = Env::Default()->NowMicros();
TF_ASSERT_OK(Init(DT_INT32, debug_urls));
AddInputFromArray<int32>(TensorShape({6}), {1, 2, 3, 4, 5, 6});
TF_ASSERT_OK(RunOpKernel());
Tensor expected(allocator(), DT_INT32, TensorShape({6}));
test::FillValues<int32>(&expected, {1, 2, 3, 4, 5, 6});
// Verify the identity output
test::ExpectTensorEqual<int32>(expected, *GetOutput(0));
for (int i = 0; i < kNumDumpDirs; ++i) {
ASSERT_TRUE(env_->FileExists(dump_roots[i]));
ASSERT_TRUE(env_->IsDirectory(dump_roots[i]).ok());
DIR* dir = opendir(dump_roots[i].c_str());
struct dirent* ent;
int dump_files_found = 0;
while ((ent = readdir(dir)) != NULL) {
if (strcmp(ent->d_name, ".") && strcmp(ent->d_name, "..")) {
dump_files_found++;
// Try reading the file into a Event proto.
const string dump_file_path =
strings::StrCat(dump_roots[i], "/", ent->d_name);
std::fstream ifs(dump_file_path, std::ios::in | std::ios::binary);
Event event;
event.ParseFromIstream(&ifs);
ifs.close();
ASSERT_GE(event.wall_time(), wall_time);
ASSERT_EQ(1, event.summary().value().size());
ASSERT_EQ(strings::StrCat("FakeTensor", ":", 0, ":", "DebugIdentity"),
event.summary().value(0).node_name());
Tensor tensor_prime(DT_INT32);
ASSERT_TRUE(tensor_prime.FromProto(event.summary().value(0).tensor()));
// Verify tensor shape and value from the dump file.
ASSERT_EQ(TensorShape({6}), tensor_prime.shape());
for (int j = 0; j < 6; ++j) {
ASSERT_EQ(j + 1, tensor_prime.flat<int32>()(j));
}
}
}
closedir(dir);
ASSERT_EQ(1, dump_files_found);
// Remove temporary dump directory and file.
int64 undeleted_files = 0;
int64 undeleted_dirs = 0;
ASSERT_TRUE(env_->DeleteRecursively(dump_roots[i], &undeleted_files,
&undeleted_dirs)
.ok());
ASSERT_EQ(0, undeleted_files);
ASSERT_EQ(0, undeleted_dirs);
}
}
TEST_F(DebugIdentityOpTest, Int32Success_2_3) {
TF_ASSERT_OK(Init(DT_INT32));
AddInputFromArray<int32>(TensorShape({2, 3}), {1, 2, 3, 4, 5, 6});
@ -66,8 +157,6 @@ TEST_F(DebugIdentityOpTest, StringSuccess) {
test::ExpectTensorEqual<string>(expected, *GetOutput(0));
}
TEST_F(DebugIdentityOpTest, RefInputError) { TF_ASSERT_OK(Init(DT_INT32_REF)); }
// Tests for DebugNanCountOp
class DebugNanCountOpTest : public OpsTestBase {
protected:

View File

@ -253,6 +253,8 @@ class ExpandDimsOp : public OpKernel {
" and output shape ", output_shape.DebugString()));
}
}
bool IsExpensive() override { return false; }
};
REGISTER_KERNEL_BUILDER(Name("ExpandDims").Device(DEVICE_CPU).HostMemory("dim"),
ExpandDimsOp);
@ -342,6 +344,8 @@ class SqueezeOp : public OpKernel {
}
}
bool IsExpensive() override { return false; }
private:
std::unordered_set<int32> squeeze_dims_;
};

View File

@ -2985,6 +2985,7 @@ REGISTER_OP("DebugIdentity")
.Output("output: T")
.Attr("T: type")
.Attr("tensor_name: string = ''")
.Attr("debug_urls: list(string) = []")
.Doc(R"doc(
Debug Identity Op.
@ -2993,6 +2994,8 @@ Provides an identity mapping of the non-Ref type input tensor for debugging.
input: Input tensor, non-Reference type.
output: Output tensor that equals the input tensor.
tensor_name: Name of the input tensor.
debug_urls: List of URLs to debug targets, e.g.,
file:///foo/tfdbg_dump, grpc:://localhost:11011
)doc");
REGISTER_OP("DebugNanCount")
@ -3000,6 +3003,7 @@ REGISTER_OP("DebugNanCount")
.Output("output: int64") // The debug signal (nan count) is int64
.Attr("T: type")
.Attr("tensor_name: string = ''")
.Attr("debug_urls: list(string) = []")
.Doc(R"doc(
Debug NaN Value Counter Op
@ -3008,6 +3012,8 @@ Counts number of NaNs in the input tensor, for debugging.
input: Input tensor, non-Reference type.
output: An integer output tensor that is the number of NaNs in the input.
tensor_name: Name of the input tensor.
debug_urls: List of URLs to debug targets, e.g.,
file:///foo/tfdbg_dump, grpc:://localhost:11011
)doc");
} // namespace tensorflow

View File

@ -7876,6 +7876,36 @@ op {
}
}
}
op {
name: "DebugIdentity"
input_arg {
name: "input"
type_attr: "T"
}
output_arg {
name: "output"
type_attr: "T"
}
attr {
name: "T"
type: "type"
}
attr {
name: "tensor_name"
type: "string"
default_value {
s: ""
}
}
attr {
name: "debug_urls"
type: "list(string)"
default_value {
list {
}
}
}
}
op {
name: "DebugNanCount"
input_arg {
@ -7898,6 +7928,36 @@ op {
}
}
}
op {
name: "DebugNanCount"
input_arg {
name: "input"
type_attr: "T"
}
output_arg {
name: "output"
type: DT_INT64
}
attr {
name: "T"
type: "type"
}
attr {
name: "tensor_name"
type: "string"
default_value {
s: ""
}
}
attr {
name: "debug_urls"
type: "list(string)"
default_value {
list {
}
}
}
}
op {
name: "DecodeCSV"
input_arg {

View File

@ -4278,6 +4278,15 @@ op {
}
description: "Name of the input tensor."
}
attr {
name: "debug_urls"
type: "list(string)"
default_value {
list {
}
}
description: "List of URLs to debug targets, e.g.,\nfile:///foo/tfdbg_dump, grpc:://localhost:11011"
}
summary: "Debug Identity Op."
description: "Provides an identity mapping of the non-Ref type input tensor for debugging."
}
@ -4305,6 +4314,15 @@ op {
}
description: "Name of the input tensor."
}
attr {
name: "debug_urls"
type: "list(string)"
default_value {
list {
}
}
description: "List of URLs to debug targets, e.g.,\nfile:///foo/tfdbg_dump, grpc:://localhost:11011"
}
summary: "Debug NaN Value Counter Op"
description: "Counts number of NaNs in the input tensor, for debugging."
}

View File

@ -662,13 +662,19 @@ keep_dims: If true, retain reduced dimensions with length 1.
output: `R-K`-D. The reduced Tensor.
)doc");
#define SPARSE_DENSE_CWISE_SIGNATURE() \
Input("sp_indices: int64") \
.Input("sp_values: T") \
.Input("sp_shape: int64") \
.Input("dense: T") \
.Output("output: T") \
.Attr("T: numbertype")
#define SPARSE_DENSE_CWISE_SIGNATURE() \
Input("sp_indices: int64") \
.Input("sp_values: T") \
.Input("sp_shape: int64") \
.Input("dense: T") \
.Output("output: T") \
.Attr("T: numbertype") \
.SetShapeFn([](InferenceContext* c) { \
const Shape* input; \
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &input)); \
c->set_output(0, c->Vector(c->Dim(input, 0))); \
return Status::OK(); \
})
REGISTER_OP("SparseDenseCwiseMul").SPARSE_DENSE_CWISE_SIGNATURE().Doc(R"doc(
Component-wise multiplies a SparseTensor by a dense Tensor.
@ -722,6 +728,8 @@ dense: `R`-D. The dense Tensor operand.
output: 1-D. The `N` values that are operated on.
)doc");
#undef SPARSE_DENSE_CWISE_SIGNATURE
REGISTER_OP("SparseSoftmax")
.Input("sp_indices: int64")
.Input("sp_values: T")

View File

@ -180,7 +180,7 @@ message ConfigProto {
int64 operation_timeout_in_ms = 11;
};
// EXPERIMENTAL. Option for watching a node
// EXPERIMENTAL. Option for watching a node.
message DebugTensorWatch {
// Name of the node to watch.
string node_name = 1;
@ -196,6 +196,12 @@ message DebugTensorWatch {
// One or more than one probes on a tensor.
// e.g., {"DebugIdentity", "DebugNanCount"}
repeated string debug_ops = 3;
// URL(s) for debug targets(s).
// E.g., "file:///foo/tfdbg_dump", "grpc://localhost:11011"
// Each debug op listed in debug_ops will publish its output tensor (debug
// signal) to all URLs in debug_urls.
repeated string debug_urls = 4;
}
// EXPERIMENTAL. Options for a single Run() call.

View File

@ -140,6 +140,31 @@ Notice that the function adds the given losses to the regularization losses.
* <b>`ValueError`</b>: if `losses` is not iterable.
- - -
### `tf.contrib.losses.hinge_loss(logits, target, scope=None)` {#hinge_loss}
Method that returns the loss tensor for hinge loss.
##### Args:
* <b>`logits`</b>: The logits, a float tensor.
* <b>`target`</b>: The ground truth output tensor. Its shape should match the shape of
logits. The values of the tensor are expected to be 0.0 or 1.0.
* <b>`scope`</b>: The scope for the operations performed in computing the loss.
##### Returns:
A `Tensor` of same shape as logits and target representing the loss values
across the batch.
##### Raises:
* <b>`ValueError`</b>: If the shapes of `logits` and `target` don't match.
- - -
### `tf.contrib.losses.log_loss(predictions, targets, weight=1.0, epsilon=1e-07, scope=None)` {#log_loss}

View File

@ -0,0 +1,409 @@
<!-- This file is machine generated: DO NOT EDIT! -->
# RNN (contrib)
[TOC]
Additional RNN operations and cells.
## This package provides additional contributed RNNCells.
### Fused RNNCells
- - -
### `class tf.contrib.rnn.LSTMFusedCell` {#LSTMFusedCell}
Basic LSTM recurrent network cell.
The implementation is based on: http://arxiv.org/abs/1409.2329.
We add forget_bias (default: 1) to the biases of the forget gate in order to
reduce the scale of forgetting in the beginning of the training.
Unlike BasicLSTMCell, this is a monolithic op and should be much faster. The
weight and bias matrixes should be compatible as long as the variabel scope
matches.
- - -
#### `tf.contrib.rnn.LSTMFusedCell.__init__(num_units, forget_bias=1.0, use_peephole=False)` {#LSTMFusedCell.__init__}
Initialize the basic LSTM cell.
##### Args:
* <b>`num_units`</b>: int, The number of units in the LSTM cell.
* <b>`forget_bias`</b>: float, The bias added to forget gates (see above).
* <b>`use_peephole`</b>: Whether to use peephole connectios or not.
- - -
#### `tf.contrib.rnn.LSTMFusedCell.output_size` {#LSTMFusedCell.output_size}
- - -
#### `tf.contrib.rnn.LSTMFusedCell.state_size` {#LSTMFusedCell.state_size}
- - -
#### `tf.contrib.rnn.LSTMFusedCell.zero_state(batch_size, dtype)` {#LSTMFusedCell.zero_state}
Return zero-filled state tensor(s).
##### Args:
* <b>`batch_size`</b>: int, float, or unit Tensor representing the batch size.
* <b>`dtype`</b>: the data type to use for the state.
##### Returns:
If `state_size` is an int or TensorShape, then the return value is a
`N-D` tensor of shape `[batch_size x state_size]` filled with zeros.
If `state_size` is a nested list or tuple, then the return value is
a nested list or tuple (of the same structure) of `2-D` tensors with
the shapes `[batch_size x s]` for each s in `state_size`.
### LSTM-like cells
- - -
### `class tf.contrib.rnn.CoupledInputForgetGateLSTMCell` {#CoupledInputForgetGateLSTMCell}
Long short-term memory unit (LSTM) recurrent network cell.
The default non-peephole implementation is based on:
http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
S. Hochreiter and J. Schmidhuber.
"Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997.
The peephole implementation is based on:
https://research.google.com/pubs/archive/43905.pdf
Hasim Sak, Andrew Senior, and Francoise Beaufays.
"Long short-term memory recurrent neural network architectures for
large scale acoustic modeling." INTERSPEECH, 2014.
The coupling of input and forget gate is based on:
http://arxiv.org/pdf/1503.04069.pdf
Greff et al. "LSTM: A Search Space Odyssey"
The class uses optional peep-hole connections, and an optional projection
layer.
- - -
#### `tf.contrib.rnn.CoupledInputForgetGateLSTMCell.__init__(num_units, use_peepholes=False, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=1, num_proj_shards=1, forget_bias=1.0, state_is_tuple=False, activation=tanh)` {#CoupledInputForgetGateLSTMCell.__init__}
Initialize the parameters for an LSTM cell.
##### Args:
* <b>`num_units`</b>: int, The number of units in the LSTM cell
* <b>`use_peepholes`</b>: bool, set True to enable diagonal/peephole connections.
* <b>`initializer`</b>: (optional) The initializer to use for the weight and
projection matrices.
* <b>`num_proj`</b>: (optional) int, The output dimensionality for the projection
matrices. If None, no projection is performed.
* <b>`proj_clip`</b>: (optional) A float value. If `num_proj > 0` and `proj_clip` is
provided, then the projected values are clipped elementwise to within
`[-proj_clip, proj_clip]`.
* <b>`num_unit_shards`</b>: How to split the weight matrix. If >1, the weight
matrix is stored across num_unit_shards.
* <b>`num_proj_shards`</b>: How to split the projection matrix. If >1, the
projection matrix is stored across num_proj_shards.
* <b>`forget_bias`</b>: Biases of the forget gate are initialized by default to 1
in order to reduce the scale of forgetting at the beginning of
the training.
* <b>`state_is_tuple`</b>: If True, accepted and returned states are 2-tuples of
the `c_state` and `m_state`. By default (False), they are concatenated
along the column axis. This default behavior will soon be deprecated.
* <b>`activation`</b>: Activation function of the inner states.
- - -
#### `tf.contrib.rnn.CoupledInputForgetGateLSTMCell.output_size` {#CoupledInputForgetGateLSTMCell.output_size}
- - -
#### `tf.contrib.rnn.CoupledInputForgetGateLSTMCell.state_size` {#CoupledInputForgetGateLSTMCell.state_size}
- - -
#### `tf.contrib.rnn.CoupledInputForgetGateLSTMCell.zero_state(batch_size, dtype)` {#CoupledInputForgetGateLSTMCell.zero_state}
Return zero-filled state tensor(s).
##### Args:
* <b>`batch_size`</b>: int, float, or unit Tensor representing the batch size.
* <b>`dtype`</b>: the data type to use for the state.
##### Returns:
If `state_size` is an int or TensorShape, then the return value is a
`N-D` tensor of shape `[batch_size x state_size]` filled with zeros.
If `state_size` is a nested list or tuple, then the return value is
a nested list or tuple (of the same structure) of `2-D` tensors with
the shapes `[batch_size x s]` for each s in `state_size`.
- - -
### `class tf.contrib.rnn.TimeFreqLSTMCell` {#TimeFreqLSTMCell}
Time-Frequency Long short-term memory unit (LSTM) recurrent network cell.
This implementation is based on:
Tara N. Sainath and Bo Li
"Modeling Time-Frequency Patterns with LSTM vs. Convolutional Architectures
for LVCSR Tasks." submitted to INTERSPEECH, 2016.
It uses peep-hole connections and optional cell clipping.
- - -
#### `tf.contrib.rnn.TimeFreqLSTMCell.__init__(num_units, use_peepholes=False, cell_clip=None, initializer=None, num_unit_shards=1, forget_bias=1.0, feature_size=None, frequency_skip=None)` {#TimeFreqLSTMCell.__init__}
Initialize the parameters for an LSTM cell.
##### Args:
* <b>`num_units`</b>: int, The number of units in the LSTM cell
* <b>`use_peepholes`</b>: bool, set True to enable diagonal/peephole connections.
* <b>`cell_clip`</b>: (optional) A float value, if provided the cell state is clipped
by this value prior to the cell output activation.
* <b>`initializer`</b>: (optional) The initializer to use for the weight and
projection matrices.
* <b>`num_unit_shards`</b>: int, How to split the weight matrix. If >1, the weight
matrix is stored across num_unit_shards.
* <b>`forget_bias`</b>: float, Biases of the forget gate are initialized by default
to 1 in order to reduce the scale of forgetting at the beginning
of the training.
* <b>`feature_size`</b>: int, The size of the input feature the LSTM spans over.
* <b>`frequency_skip`</b>: int, The amount the LSTM filter is shifted by in
frequency.
- - -
#### `tf.contrib.rnn.TimeFreqLSTMCell.output_size` {#TimeFreqLSTMCell.output_size}
- - -
#### `tf.contrib.rnn.TimeFreqLSTMCell.state_size` {#TimeFreqLSTMCell.state_size}
- - -
#### `tf.contrib.rnn.TimeFreqLSTMCell.zero_state(batch_size, dtype)` {#TimeFreqLSTMCell.zero_state}
Return zero-filled state tensor(s).
##### Args:
* <b>`batch_size`</b>: int, float, or unit Tensor representing the batch size.
* <b>`dtype`</b>: the data type to use for the state.
##### Returns:
If `state_size` is an int or TensorShape, then the return value is a
`N-D` tensor of shape `[batch_size x state_size]` filled with zeros.
If `state_size` is a nested list or tuple, then the return value is
a nested list or tuple (of the same structure) of `2-D` tensors with
the shapes `[batch_size x s]` for each s in `state_size`.
- - -
### `class tf.contrib.rnn.GridLSTMCell` {#GridLSTMCell}
Grid Long short-term memory unit (LSTM) recurrent network cell.
The default is based on:
Nal Kalchbrenner, Ivo Danihelka and Alex Graves
"Grid Long Short-Term Memory," Proc. ICLR 2016.
http://arxiv.org/abs/1507.01526
When peephole connections are used, the implementation is based on:
Tara N. Sainath and Bo Li
"Modeling Time-Frequency Patterns with LSTM vs. Convolutional Architectures
for LVCSR Tasks." submitted to INTERSPEECH, 2016.
The code uses optional peephole connections, shared_weights and cell clipping.
- - -
#### `tf.contrib.rnn.GridLSTMCell.__init__(num_units, use_peepholes=False, share_time_frequency_weights=False, cell_clip=None, initializer=None, num_unit_shards=1, forget_bias=1.0, feature_size=None, frequency_skip=None)` {#GridLSTMCell.__init__}
Initialize the parameters for an LSTM cell.
##### Args:
* <b>`num_units`</b>: int, The number of units in the LSTM cell
* <b>`use_peepholes`</b>: bool, default False. Set True to enable diagonal/peephole
connections.
* <b>`share_time_frequency_weights`</b>: bool, default False. Set True to enable
shared cell weights between time and frequency LSTMs.
* <b>`cell_clip`</b>: (optional) A float value, if provided the cell state is clipped
by this value prior to the cell output activation.
* <b>`initializer`</b>: (optional) The initializer to use for the weight and
projection matrices.
* <b>`num_unit_shards`</b>: int, How to split the weight matrix. If >1, the weight
matrix is stored across num_unit_shards.
* <b>`forget_bias`</b>: float, Biases of the forget gate are initialized by default
to 1 in order to reduce the scale of forgetting at the beginning
of the training.
* <b>`feature_size`</b>: int, The size of the input feature the LSTM spans over.
* <b>`frequency_skip`</b>: int, The amount the LSTM filter is shifted by in
frequency.
- - -
#### `tf.contrib.rnn.GridLSTMCell.output_size` {#GridLSTMCell.output_size}
- - -
#### `tf.contrib.rnn.GridLSTMCell.state_size` {#GridLSTMCell.state_size}
- - -
#### `tf.contrib.rnn.GridLSTMCell.zero_state(batch_size, dtype)` {#GridLSTMCell.zero_state}
Return zero-filled state tensor(s).
##### Args:
* <b>`batch_size`</b>: int, float, or unit Tensor representing the batch size.
* <b>`dtype`</b>: the data type to use for the state.
##### Returns:
If `state_size` is an int or TensorShape, then the return value is a
`N-D` tensor of shape `[batch_size x state_size]` filled with zeros.
If `state_size` is a nested list or tuple, then the return value is
a nested list or tuple (of the same structure) of `2-D` tensors with
the shapes `[batch_size x s]` for each s in `state_size`.
### RNNCell wrappers
- - -
### `class tf.contrib.rnn.AttentionCellWrapper` {#AttentionCellWrapper}
Basic attention cell wrapper.
Implementation based on https://arxiv.org/pdf/1601.06733.pdf.
- - -
#### `tf.contrib.rnn.AttentionCellWrapper.__init__(cell, attn_length, attn_size=None, attn_vec_size=None, input_size=None, state_is_tuple=False)` {#AttentionCellWrapper.__init__}
Create a cell with attention.
##### Args:
* <b>`cell`</b>: an RNNCell, an attention is added to it.
* <b>`attn_length`</b>: integer, the size of an attention window.
* <b>`attn_size`</b>: integer, the size of an attention vector. Equal to
cell.output_size by default.
* <b>`attn_vec_size`</b>: integer, the number of convolutional features calculated
on attention state and a size of the hidden layer built from
base cell state. Equal attn_size to by default.
* <b>`input_size`</b>: integer, the size of a hidden linear layer,
built from inputs and attention. Derived from the input tensor
by default.
* <b>`state_is_tuple`</b>: If True, accepted and returned states are n-tuples, where
`n = len(cells)`. By default (False), the states are all
concatenated along the column axis.
##### Raises:
* <b>`TypeError`</b>: if cell is not an RNNCell.
* <b>`ValueError`</b>: if cell returns a state tuple but the flag
`state_is_tuple` is `False` or if attn_length is zero or less.
- - -
#### `tf.contrib.rnn.AttentionCellWrapper.output_size` {#AttentionCellWrapper.output_size}
- - -
#### `tf.contrib.rnn.AttentionCellWrapper.state_size` {#AttentionCellWrapper.state_size}
- - -
#### `tf.contrib.rnn.AttentionCellWrapper.zero_state(batch_size, dtype)` {#AttentionCellWrapper.zero_state}
Return zero-filled state tensor(s).
##### Args:
* <b>`batch_size`</b>: int, float, or unit Tensor representing the batch size.
* <b>`dtype`</b>: the data type to use for the state.
##### Returns:
If `state_size` is an int or TensorShape, then the return value is a
`N-D` tensor of shape `[batch_size x state_size]` filled with zeros.
If `state_size` is a nested list or tuple, then the return value is
a nested list or tuple (of the same structure) of `2-D` tensors with
the shapes `[batch_size x s]` for each s in `state_size`.

View File

@ -1105,7 +1105,10 @@ DEPRECATED: Use outputs.
### `class tf.Tensor` {#Tensor}
Represents a value produced by an `Operation`.
Represents one of the outputs of an `Operation`.
*Note:* the `Tensor` class will be replaced by `Output` in the future.
Currently these two are aliases for each other.
A `Tensor` is a symbolic handle to one of the outputs of an
`Operation`. It does not hold the values of that operation's output,

View File

@ -0,0 +1,70 @@
Basic attention cell wrapper.
Implementation based on https://arxiv.org/pdf/1601.06733.pdf.
- - -
#### `tf.contrib.rnn.AttentionCellWrapper.__init__(cell, attn_length, attn_size=None, attn_vec_size=None, input_size=None, state_is_tuple=False)` {#AttentionCellWrapper.__init__}
Create a cell with attention.
##### Args:
* <b>`cell`</b>: an RNNCell, an attention is added to it.
* <b>`attn_length`</b>: integer, the size of an attention window.
* <b>`attn_size`</b>: integer, the size of an attention vector. Equal to
cell.output_size by default.
* <b>`attn_vec_size`</b>: integer, the number of convolutional features calculated
on attention state and a size of the hidden layer built from
base cell state. Equal attn_size to by default.
* <b>`input_size`</b>: integer, the size of a hidden linear layer,
built from inputs and attention. Derived from the input tensor
by default.
* <b>`state_is_tuple`</b>: If True, accepted and returned states are n-tuples, where
`n = len(cells)`. By default (False), the states are all
concatenated along the column axis.
##### Raises:
* <b>`TypeError`</b>: if cell is not an RNNCell.
* <b>`ValueError`</b>: if cell returns a state tuple but the flag
`state_is_tuple` is `False` or if attn_length is zero or less.
- - -
#### `tf.contrib.rnn.AttentionCellWrapper.output_size` {#AttentionCellWrapper.output_size}
- - -
#### `tf.contrib.rnn.AttentionCellWrapper.state_size` {#AttentionCellWrapper.state_size}
- - -
#### `tf.contrib.rnn.AttentionCellWrapper.zero_state(batch_size, dtype)` {#AttentionCellWrapper.zero_state}
Return zero-filled state tensor(s).
##### Args:
* <b>`batch_size`</b>: int, float, or unit Tensor representing the batch size.
* <b>`dtype`</b>: the data type to use for the state.
##### Returns:
If `state_size` is an int or TensorShape, then the return value is a
`N-D` tensor of shape `[batch_size x state_size]` filled with zeros.
If `state_size` is a nested list or tuple, then the return value is
a nested list or tuple (of the same structure) of `2-D` tensors with
the shapes `[batch_size x s]` for each s in `state_size`.

View File

@ -1,4 +1,7 @@
Represents a value produced by an `Operation`.
Represents one of the outputs of an `Operation`.
*Note:* the `Tensor` class will be replaced by `Output` in the future.
Currently these two are aliases for each other.
A `Tensor` is a symbolic handle to one of the outputs of an
`Operation`. It does not hold the values of that operation's output,

View File

@ -0,0 +1,22 @@
### `tf.contrib.losses.hinge_loss(logits, target, scope=None)` {#hinge_loss}
Method that returns the loss tensor for hinge loss.
##### Args:
* <b>`logits`</b>: The logits, a float tensor.
* <b>`target`</b>: The ground truth output tensor. Its shape should match the shape of
logits. The values of the tensor are expected to be 0.0 or 1.0.
* <b>`scope`</b>: The scope for the operations performed in computing the loss.
##### Returns:
A `Tensor` of same shape as logits and target representing the loss values
across the batch.
##### Raises:
* <b>`ValueError`</b>: If the shapes of `logits` and `target` don't match.

View File

@ -0,0 +1,77 @@
Grid Long short-term memory unit (LSTM) recurrent network cell.
The default is based on:
Nal Kalchbrenner, Ivo Danihelka and Alex Graves
"Grid Long Short-Term Memory," Proc. ICLR 2016.
http://arxiv.org/abs/1507.01526
When peephole connections are used, the implementation is based on:
Tara N. Sainath and Bo Li
"Modeling Time-Frequency Patterns with LSTM vs. Convolutional Architectures
for LVCSR Tasks." submitted to INTERSPEECH, 2016.
The code uses optional peephole connections, shared_weights and cell clipping.
- - -
#### `tf.contrib.rnn.GridLSTMCell.__init__(num_units, use_peepholes=False, share_time_frequency_weights=False, cell_clip=None, initializer=None, num_unit_shards=1, forget_bias=1.0, feature_size=None, frequency_skip=None)` {#GridLSTMCell.__init__}
Initialize the parameters for an LSTM cell.
##### Args:
* <b>`num_units`</b>: int, The number of units in the LSTM cell
* <b>`use_peepholes`</b>: bool, default False. Set True to enable diagonal/peephole
connections.
* <b>`share_time_frequency_weights`</b>: bool, default False. Set True to enable
shared cell weights between time and frequency LSTMs.
* <b>`cell_clip`</b>: (optional) A float value, if provided the cell state is clipped
by this value prior to the cell output activation.
* <b>`initializer`</b>: (optional) The initializer to use for the weight and
projection matrices.
* <b>`num_unit_shards`</b>: int, How to split the weight matrix. If >1, the weight
matrix is stored across num_unit_shards.
* <b>`forget_bias`</b>: float, Biases of the forget gate are initialized by default
to 1 in order to reduce the scale of forgetting at the beginning
of the training.
* <b>`feature_size`</b>: int, The size of the input feature the LSTM spans over.
* <b>`frequency_skip`</b>: int, The amount the LSTM filter is shifted by in
frequency.
- - -
#### `tf.contrib.rnn.GridLSTMCell.output_size` {#GridLSTMCell.output_size}
- - -
#### `tf.contrib.rnn.GridLSTMCell.state_size` {#GridLSTMCell.state_size}
- - -
#### `tf.contrib.rnn.GridLSTMCell.zero_state(batch_size, dtype)` {#GridLSTMCell.zero_state}
Return zero-filled state tensor(s).
##### Args:
* <b>`batch_size`</b>: int, float, or unit Tensor representing the batch size.
* <b>`dtype`</b>: the data type to use for the state.
##### Returns:
If `state_size` is an int or TensorShape, then the return value is a
`N-D` tensor of shape `[batch_size x state_size]` filled with zeros.
If `state_size` is a nested list or tuple, then the return value is
a nested list or tuple (of the same structure) of `2-D` tensors with
the shapes `[batch_size x s]` for each s in `state_size`.

View File

@ -0,0 +1,93 @@
Long short-term memory unit (LSTM) recurrent network cell.
The default non-peephole implementation is based on:
http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
S. Hochreiter and J. Schmidhuber.
"Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997.
The peephole implementation is based on:
https://research.google.com/pubs/archive/43905.pdf
Hasim Sak, Andrew Senior, and Francoise Beaufays.
"Long short-term memory recurrent neural network architectures for
large scale acoustic modeling." INTERSPEECH, 2014.
The coupling of input and forget gate is based on:
http://arxiv.org/pdf/1503.04069.pdf
Greff et al. "LSTM: A Search Space Odyssey"
The class uses optional peep-hole connections, and an optional projection
layer.
- - -
#### `tf.contrib.rnn.CoupledInputForgetGateLSTMCell.__init__(num_units, use_peepholes=False, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=1, num_proj_shards=1, forget_bias=1.0, state_is_tuple=False, activation=tanh)` {#CoupledInputForgetGateLSTMCell.__init__}
Initialize the parameters for an LSTM cell.
##### Args:
* <b>`num_units`</b>: int, The number of units in the LSTM cell
* <b>`use_peepholes`</b>: bool, set True to enable diagonal/peephole connections.
* <b>`initializer`</b>: (optional) The initializer to use for the weight and
projection matrices.
* <b>`num_proj`</b>: (optional) int, The output dimensionality for the projection
matrices. If None, no projection is performed.
* <b>`proj_clip`</b>: (optional) A float value. If `num_proj > 0` and `proj_clip` is
provided, then the projected values are clipped elementwise to within
`[-proj_clip, proj_clip]`.
* <b>`num_unit_shards`</b>: How to split the weight matrix. If >1, the weight
matrix is stored across num_unit_shards.
* <b>`num_proj_shards`</b>: How to split the projection matrix. If >1, the
projection matrix is stored across num_proj_shards.
* <b>`forget_bias`</b>: Biases of the forget gate are initialized by default to 1
in order to reduce the scale of forgetting at the beginning of
the training.
* <b>`state_is_tuple`</b>: If True, accepted and returned states are 2-tuples of
the `c_state` and `m_state`. By default (False), they are concatenated
along the column axis. This default behavior will soon be deprecated.
* <b>`activation`</b>: Activation function of the inner states.
- - -
#### `tf.contrib.rnn.CoupledInputForgetGateLSTMCell.output_size` {#CoupledInputForgetGateLSTMCell.output_size}
- - -
#### `tf.contrib.rnn.CoupledInputForgetGateLSTMCell.state_size` {#CoupledInputForgetGateLSTMCell.state_size}
- - -
#### `tf.contrib.rnn.CoupledInputForgetGateLSTMCell.zero_state(batch_size, dtype)` {#CoupledInputForgetGateLSTMCell.zero_state}
Return zero-filled state tensor(s).
##### Args:
* <b>`batch_size`</b>: int, float, or unit Tensor representing the batch size.
* <b>`dtype`</b>: the data type to use for the state.
##### Returns:
If `state_size` is an int or TensorShape, then the return value is a
`N-D` tensor of shape `[batch_size x state_size]` filled with zeros.
If `state_size` is a nested list or tuple, then the return value is
a nested list or tuple (of the same structure) of `2-D` tensors with
the shapes `[batch_size x s]` for each s in `state_size`.

View File

@ -0,0 +1,70 @@
Time-Frequency Long short-term memory unit (LSTM) recurrent network cell.
This implementation is based on:
Tara N. Sainath and Bo Li
"Modeling Time-Frequency Patterns with LSTM vs. Convolutional Architectures
for LVCSR Tasks." submitted to INTERSPEECH, 2016.
It uses peep-hole connections and optional cell clipping.
- - -
#### `tf.contrib.rnn.TimeFreqLSTMCell.__init__(num_units, use_peepholes=False, cell_clip=None, initializer=None, num_unit_shards=1, forget_bias=1.0, feature_size=None, frequency_skip=None)` {#TimeFreqLSTMCell.__init__}
Initialize the parameters for an LSTM cell.
##### Args:
* <b>`num_units`</b>: int, The number of units in the LSTM cell
* <b>`use_peepholes`</b>: bool, set True to enable diagonal/peephole connections.
* <b>`cell_clip`</b>: (optional) A float value, if provided the cell state is clipped
by this value prior to the cell output activation.
* <b>`initializer`</b>: (optional) The initializer to use for the weight and
projection matrices.
* <b>`num_unit_shards`</b>: int, How to split the weight matrix. If >1, the weight
matrix is stored across num_unit_shards.
* <b>`forget_bias`</b>: float, Biases of the forget gate are initialized by default
to 1 in order to reduce the scale of forgetting at the beginning
of the training.
* <b>`feature_size`</b>: int, The size of the input feature the LSTM spans over.
* <b>`frequency_skip`</b>: int, The amount the LSTM filter is shifted by in
frequency.
- - -
#### `tf.contrib.rnn.TimeFreqLSTMCell.output_size` {#TimeFreqLSTMCell.output_size}
- - -
#### `tf.contrib.rnn.TimeFreqLSTMCell.state_size` {#TimeFreqLSTMCell.state_size}
- - -
#### `tf.contrib.rnn.TimeFreqLSTMCell.zero_state(batch_size, dtype)` {#TimeFreqLSTMCell.zero_state}
Return zero-filled state tensor(s).
##### Args:
* <b>`batch_size`</b>: int, float, or unit Tensor representing the batch size.
* <b>`dtype`</b>: the data type to use for the state.
##### Returns:
If `state_size` is an int or TensorShape, then the return value is a
`N-D` tensor of shape `[batch_size x state_size]` filled with zeros.
If `state_size` is a nested list or tuple, then the return value is
a nested list or tuple (of the same structure) of `2-D` tensors with
the shapes `[batch_size x s]` for each s in `state_size`.

View File

@ -0,0 +1,60 @@
Basic LSTM recurrent network cell.
The implementation is based on: http://arxiv.org/abs/1409.2329.
We add forget_bias (default: 1) to the biases of the forget gate in order to
reduce the scale of forgetting in the beginning of the training.
Unlike BasicLSTMCell, this is a monolithic op and should be much faster. The
weight and bias matrixes should be compatible as long as the variabel scope
matches.
- - -
#### `tf.contrib.rnn.LSTMFusedCell.__init__(num_units, forget_bias=1.0, use_peephole=False)` {#LSTMFusedCell.__init__}
Initialize the basic LSTM cell.
##### Args:
* <b>`num_units`</b>: int, The number of units in the LSTM cell.
* <b>`forget_bias`</b>: float, The bias added to forget gates (see above).
* <b>`use_peephole`</b>: Whether to use peephole connectios or not.
- - -
#### `tf.contrib.rnn.LSTMFusedCell.output_size` {#LSTMFusedCell.output_size}
- - -
#### `tf.contrib.rnn.LSTMFusedCell.state_size` {#LSTMFusedCell.state_size}
- - -
#### `tf.contrib.rnn.LSTMFusedCell.zero_state(batch_size, dtype)` {#LSTMFusedCell.zero_state}
Return zero-filled state tensor(s).
##### Args:
* <b>`batch_size`</b>: int, float, or unit Tensor representing the batch size.
* <b>`dtype`</b>: the data type to use for the state.
##### Returns:
If `state_size` is an int or TensorShape, then the return value is a
`N-D` tensor of shape `[batch_size x state_size]` filled with zeros.
If `state_size` is a nested list or tuple, then the return value is
a nested list or tuple (of the same structure) of `2-D` tensors with
the shapes `[batch_size x s]` for each s in `state_size`.

View File

@ -745,12 +745,20 @@
* [`get_losses`](../../api_docs/python/contrib.losses.md#get_losses)
* [`get_regularization_losses`](../../api_docs/python/contrib.losses.md#get_regularization_losses)
* [`get_total_loss`](../../api_docs/python/contrib.losses.md#get_total_loss)
* [`hinge_loss`](../../api_docs/python/contrib.losses.md#hinge_loss)
* [`log_loss`](../../api_docs/python/contrib.losses.md#log_loss)
* [`sigmoid_cross_entropy`](../../api_docs/python/contrib.losses.md#sigmoid_cross_entropy)
* [`softmax_cross_entropy`](../../api_docs/python/contrib.losses.md#softmax_cross_entropy)
* [`sum_of_pairwise_squares`](../../api_docs/python/contrib.losses.md#sum_of_pairwise_squares)
* [`sum_of_squares`](../../api_docs/python/contrib.losses.md#sum_of_squares)
* **[RNN (contrib)](../../api_docs/python/contrib.rnn.md)**:
* [`AttentionCellWrapper`](../../api_docs/python/contrib.rnn.md#AttentionCellWrapper)
* [`CoupledInputForgetGateLSTMCell`](../../api_docs/python/contrib.rnn.md#CoupledInputForgetGateLSTMCell)
* [`GridLSTMCell`](../../api_docs/python/contrib.rnn.md#GridLSTMCell)
* [`LSTMFusedCell`](../../api_docs/python/contrib.rnn.md#LSTMFusedCell)
* [`TimeFreqLSTMCell`](../../api_docs/python/contrib.rnn.md#TimeFreqLSTMCell)
* **[Metrics (contrib)](../../api_docs/python/contrib.metrics.md)**:
* [`accuracy`](../../api_docs/python/contrib.metrics.md#accuracy)
* [`aggregate_metric_map`](../../api_docs/python/contrib.metrics.md#aggregate_metric_map)

View File

@ -1182,6 +1182,18 @@ py_test(
],
)
py_test(
name = "session_debug_test",
size = "small",
srcs = ["debug/session_debug_test.py"],
srcs_version = "PY2AND3",
deps = [
":framework",
":framework_test_lib",
":session",
],
)
cuda_py_test(
name = "timeline_test",
size = "small",

View File

@ -0,0 +1,298 @@
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for debugger functionalities in tf.Session."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import glob
import os
import shutil
import tempfile
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
from tensorflow.core.protobuf import config_pb2
from tensorflow.core.util import event_pb2
from tensorflow.python.client import session
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import tensor_util
from tensorflow.python.framework import test_util
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import state_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import googletest
class SessionDebugTest(test_util.TensorFlowTestCase):
def setUp(self):
self.dump_root_ = tempfile.mkdtemp()
def tearDown(self):
# Tear down temporary dump directory.
shutil.rmtree(self.dump_root_)
def _addDebugTensorWatch(self,
run_opts,
node_name,
output_slot,
debug_op="DebugIdentity",
debug_urls=None):
watch_opts = run_opts.debug_tensor_watch_opts
# Add debug tensor watch for u.
watch = watch_opts.add()
watch.node_name = node_name
watch.output_slot = 0
watch.debug_ops.append(debug_op)
if debug_urls:
for debug_url in debug_urls:
watch.debug_urls.append(debug_url)
def _verifyTensorDumpFile(self, dump_file, expected_tensor_name, debug_op,
wall_time_lower_bound, expected_tensor_val):
"""Helper method: Verify a Tensor debug dump file and its content.
Args:
dump_file: Path to the dump file.
expected_tensor_name: Expected name of the tensor, e.g., node_a:0.
debug_op: Name of the debug Op, e.g., DebugIdentity.
wall_time_lower_bound: Lower bound of the wall time.
expected_tensor_val: Expected tensor value, as a numpy array.
"""
self.assertTrue(os.path.isfile(dump_file))
event = event_pb2.Event()
f = open(dump_file, "rb")
event.ParseFromString(f.read())
wall_time = event.wall_time
debg_node_name = event.summary.value[0].node_name
tensor_value = tensor_util.MakeNdarray(event.summary.value[0].tensor)
self.assertGreater(wall_time, wall_time_lower_bound)
self.assertEqual("%s:%s" % (expected_tensor_name, debug_op), debg_node_name)
if expected_tensor_val.dtype.type is np.string_:
self.assertEqual(str(expected_tensor_val), str(tensor_value))
else:
self.assertAllClose(expected_tensor_val, tensor_value)
def testDumpToFileOverlaoppinpParentDir(self):
with session.Session() as sess:
u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]])
v_init_val = np.array([[2.0], [-1.0]])
# Use node names with overlapping namespace (i.e., parent directory) to
# test concurrent, non-racing directory creation.
u_name = "testDumpToFile/u"
v_name = "testDumpToFile/v"
u_init = constant_op.constant(u_init_val, shape=[2, 2])
u = variables.Variable(u_init, name=u_name)
v_init = constant_op.constant(v_init_val, shape=[2, 1])
v = variables.Variable(v_init, name=v_name)
w = math_ops.matmul(u, v, name="testDumpToFile/matmul")
u.initializer.run()
v.initializer.run()
run_options = config_pb2.RunOptions()
debug_url = "file://%s" % self.dump_root_
# Add debug tensor watch for u.
self._addDebugTensorWatch(
run_options, "%s/read" % u_name, 0, debug_urls=[debug_url])
# Add debug tensor watch for v.
self._addDebugTensorWatch(
run_options, "%s/read" % v_name, 0, debug_urls=[debug_url])
run_metadata = config_pb2.RunMetadata()
# Invoke Session.run().
sess.run(w, options=run_options, run_metadata=run_metadata)
# Verify the dump file for u.
dump_files = os.listdir(os.path.join(self.dump_root_, u_name))
self.assertEqual(1, len(dump_files))
self.assertTrue(dump_files[0].startswith("read_0_"))
dump_file = os.path.join(self.dump_root_, u_name, dump_files[0])
self._verifyTensorDumpFile(dump_file, "%s/read:0" % u_name,
"DebugIdentity", 0, u_init_val)
# Verify the dump file for v.
dump_files = os.listdir(os.path.join(self.dump_root_, v_name))
self.assertEqual(1, len(dump_files))
self.assertTrue(dump_files[0].startswith("read_0_"))
dump_file = os.path.join(self.dump_root_, v_name, dump_files[0])
self._verifyTensorDumpFile(dump_file, "%s/read:0" % v_name,
"DebugIdentity", 0, v_init_val)
def testDumpStringTensorsToFileSystem(self):
with session.Session() as sess:
str1_init_val = np.array(b"abc")
str2_init_val = np.array(b"def")
str1_init = constant_op.constant(str1_init_val)
str2_init = constant_op.constant(str2_init_val)
str1_name = "str1"
str2_name = "str2"
str1 = variables.Variable(str1_init, name=str1_name)
str2 = variables.Variable(str2_init, name=str2_name)
# Concatenate str1 and str2
str_concat = math_ops.add(str1, str2, name="str_concat")
str1.initializer.run()
str2.initializer.run()
run_options = config_pb2.RunOptions()
debug_url = "file://%s" % self.dump_root_
# Add debug tensor watch for u.
self._addDebugTensorWatch(
run_options, "%s/read" % str1_name, 0, debug_urls=[debug_url])
# Add debug tensor watch for v.
self._addDebugTensorWatch(
run_options, "%s/read" % str2_name, 0, debug_urls=[debug_url])
run_metadata = config_pb2.RunMetadata()
# Invoke Session.run().
sess.run(str_concat, options=run_options, run_metadata=run_metadata)
# Verify the dump file for str1.
dump_files = os.listdir(os.path.join(self.dump_root_, str1_name))
self.assertEqual(1, len(dump_files))
self.assertTrue(dump_files[0].startswith("read_0_"))
dump_file = os.path.join(self.dump_root_, str1_name, dump_files[0])
self._verifyTensorDumpFile(dump_file, "%s/read:0" % str1_name,
"DebugIdentity", 0, str1_init_val)
# Verify the dump file for str2.
dump_files = os.listdir(os.path.join(self.dump_root_, str2_name))
self.assertEqual(1, len(dump_files))
self.assertTrue(dump_files[0].startswith("read_0_"))
dump_file = os.path.join(self.dump_root_, str2_name, dump_files[0])
self._verifyTensorDumpFile(dump_file, "%s/read:0" % str2_name,
"DebugIdentity", 0, str2_init_val)
def testDumpToFileWhileLoop(self):
with session.Session() as sess:
num_iter = 10
# "u" is the Variable being updated in the loop.
u_name = "testDumpToFileWhileLoop/u"
u_namespace = u_name.split("/")[0]
u_init_val = np.array(11.0)
u_init = constant_op.constant(u_init_val)
u = variables.Variable(u_init, name=u_name)
# "v" is the increment.
v_name = "testDumpToFileWhileLoop/v"
v_namespace = v_name.split("/")[0]
v_init_val = np.array(2.0)
v_init = constant_op.constant(v_init_val)
v = variables.Variable(v_init, name=v_name)
u.initializer.run()
v.initializer.run()
i = constant_op.constant(0, name="testDumpToFileWhileLoop/i")
def cond(i):
return math_ops.less(i, num_iter)
def body(i):
new_u = state_ops.assign_add(u, v)
new_i = math_ops.add(i, 1)
op = control_flow_ops.group(new_u)
new_i = control_flow_ops.with_dependencies([op], new_i)
return [new_i]
loop = control_flow_ops.while_loop(cond, body, [i], parallel_iterations=1)
# Create RunOptions for debug-watching tensors
run_options = config_pb2.RunOptions()
debug_url = "file://%s" % self.dump_root_
# Add debug tensor watch for u.
self._addDebugTensorWatch(run_options, u_name, 0, debug_urls=[debug_url])
# Add debug tensor watch for v.
self._addDebugTensorWatch(
run_options, "%s/read" % v_name, 0, debug_urls=[debug_url])
# Add debug tensor watch for while/Identity.
self._addDebugTensorWatch(
run_options, "while/Identity", 0, debug_urls=[debug_url])
run_metadata = config_pb2.RunMetadata()
r = sess.run(loop, options=run_options, run_metadata=run_metadata)
self.assertEqual(num_iter, r)
u_val_final = sess.run(u)
self.assertAllClose(u_init_val + num_iter * v_init_val, u_val_final)
# Verify dump files
self.assertTrue(os.path.isdir(self.dump_root_))
self.assertTrue(os.path.isdir(os.path.join(self.dump_root_, u_namespace)))
self.assertTrue(
os.path.isdir(os.path.join(self.dump_root_, v_namespace, "v")))
# Verify the dump file for tensor "u".
dump_files = glob.glob(
os.path.join(self.dump_root_, u_namespace, "u_0_*"))
self.assertEqual(1, len(dump_files))
dump_file = os.path.join(self.dump_root_, u_namespace, dump_files[0])
self.assertTrue(os.path.isfile(dump_file))
self._verifyTensorDumpFile(dump_file, "%s:0" % u_name, "DebugIdentity", 0,
u_init_val)
# Verify the dump file for tensor "v".
dump_files = os.listdir(os.path.join(self.dump_root_, v_name))
self.assertEqual(1, len(dump_files))
self.assertTrue(dump_files[0].startswith("read_0_"))
dump_file = os.path.join(self.dump_root_, v_name, dump_files[0])
self._verifyTensorDumpFile(dump_file, "%s/read:0" % v_name,
"DebugIdentity", 0, v_init_val)
# Verify the dump files for tensor while/Identity
while_identity_dump_files = sorted(
os.listdir(os.path.join(self.dump_root_, "while")))
self.assertEqual(num_iter, len(while_identity_dump_files))
# Verify the content of the individual
for k in xrange(len(while_identity_dump_files)):
dump_file_path = os.path.join(self.dump_root_, "while",
while_identity_dump_files[k])
self._verifyTensorDumpFile(dump_file_path, "while/Identity:0",
"DebugIdentity", 0, np.array(k))
if __name__ == "__main__":
googletest.main()

View File

@ -72,6 +72,7 @@ from tensorflow.python.framework.device import DeviceSpec
from tensorflow.python.framework.ops import Graph
from tensorflow.python.framework.ops import Operation
from tensorflow.python.framework.ops import Tensor
from tensorflow.python.framework.ops import Output
from tensorflow.python.framework.ops import SparseTensor
from tensorflow.python.framework.ops import SparseTensorValue
from tensorflow.python.framework.ops import IndexedSlices

View File

@ -65,6 +65,7 @@ def get_module_to_name():
tf.contrib.learn.monitors: (
"tf.contrib.learn.monitors"),
tf.contrib.losses: "tf.contrib.losses",
tf.contrib.rnn: "tf.contrib.rnn",
tf.contrib.metrics: "tf.contrib.metrics",
tf.contrib.util: "tf.contrib.util",
}
@ -171,6 +172,7 @@ def all_libraries(module_to_name, members, documented):
library("contrib.learn.monitors", "Monitors (contrib)",
tf.contrib.learn.monitors),
library("contrib.losses", "Losses (contrib)", tf.contrib.losses),
library("contrib.rnn", "RNN (contrib)", tf.contrib.rnn),
library("contrib.metrics", "Metrics (contrib)", tf.contrib.metrics),
library("contrib.util", "Utilities (contrib)", tf.contrib.util),
library("contrib.copy_graph", "Copying Graph Elements (contrib)",

View File

@ -185,7 +185,10 @@ def register_dense_tensor_like_type(tensor_type):
class Tensor(object):
"""Represents a value produced by an `Operation`.
"""Represents one of the outputs of an `Operation`.
*Note:* the `Tensor` class will be replaced by `Output` in the future.
Currently these two are aliases for each other.
A `Tensor` is a symbolic handle to one of the outputs of an
`Operation`. It does not hold the values of that operation's output,
@ -556,6 +559,10 @@ class Tensor(object):
return _eval_using_default_session(self, feed_dict, self.graph, session)
# TODO(josh11b): Switch everyone from "Tensor" to "Output" to match C++ API.
Output = Tensor
def _TensorTensorConversionFunction(t, dtype=None, name=None, as_ref=False):
_ = name, as_ref
if dtype and not dtype.is_compatible_with(t.dtype):

View File

@ -846,8 +846,7 @@ def per_image_whitening(image):
stddev = math_ops.sqrt(variance)
# Apply a minimum normalization that protects us against uniform images.
min_stddev = math_ops.inv(
math_ops.sqrt(math_ops.cast(num_pixels, dtypes.float32)))
min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
pixel_value_scale = math_ops.maximum(stddev, min_stddev)
pixel_value_offset = image_mean

View File

@ -161,7 +161,7 @@ def _SegmentMeanGrad(op, grad):
array_ops.fill(array_ops.expand_dims(input_rank - 1, 0), 1)])
ones = array_ops.fill(ones_shape,
constant_op.constant(1, dtype=grad.dtype))
scaled_grad = grad * math_ops.inv(math_ops.segment_sum(ones, op.inputs[1]))
scaled_grad = math_ops.div(grad, math_ops.segment_sum(ones, op.inputs[1]))
return array_ops.gather(scaled_grad, op.inputs[1]), None

View File

@ -1125,7 +1125,7 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None):
dtype=x.dtype)
# 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
binary_tensor = math_ops.floor(random_tensor)
ret = x * math_ops.inv(keep_prob) * binary_tensor
ret = math_ops.div(x, keep_prob) * binary_tensor
ret.set_shape(x.get_shape())
return ret