Allow is_initialized and initializer to be called on MirroredVariables and TowerLocalVariables.

PiperOrigin-RevId: 203520287
This commit is contained in:
Anjali Sridhar 2018-07-06 13:50:29 -07:00 committed by Yifei Feng
parent d64754c5c7
commit 90fc5e3819
84 changed files with 10517 additions and 239 deletions

View File

@ -40,23 +40,7 @@ namespace tensorflow {
XlaCompilationCache::XlaCompilationCache(xla::LocalClient* client,
DeviceType device_type)
: client_(client), device_type_(std::move(device_type)) {}
XlaCompilationCache::~XlaCompilationCache() {
// Ensure any use of our programs have completed by waiting for all stream
// executors to complete.
for (auto* executor : client_->backend().stream_executors()) {
bool ok = executor->SynchronizeAllActivity();
if (!ok) {
LOG(ERROR) << "Error synchronizing activity while waiting for all "
"programs to complete";
}
}
// TODO(b/110813685): Think about the program ownership model. Programs are
// currently owned by the compilation cache which means we must wait for
// program completion in the destructor. There are multiple compilation caches
// around, which complicates things a little. Perhaps having programs be
// shared_ptrs (an invasive change) would make the model easier to reason
// about?
}
XlaCompilationCache::~XlaCompilationCache() = default;
string XlaCompilationCache::DebugString() {
return "XLA JIT compilation cache";

View File

@ -67,53 +67,36 @@ Status XlaTransferManager::TransferLiteralToDevice(
xla::Shape xla_shape;
TF_RETURN_IF_ERROR(TensorShapeToXLAShape(host_tensor.dtype(),
host_tensor.shape(), &xla_shape));
// Create a reference to hold onto host_tensor until after the literal has
// been transferred. Also make sure the literal exists until the function
// asynchronously completes, as it will be wrapped in an xla::LiteralSlice.
TensorReference ref(host_tensor);
auto literal = std::make_shared<xla::BorrowingLiteral>(
xla::BorrowingLiteral literal(
static_cast<const char*>(DMAHelper::base(&host_tensor)), xla_shape);
const xla::ShapedBuffer& shaped_buffer =
XlaTensor::FromTensor(device_tensor)->shaped_buffer();
VLOG(1) << "Transfer to device as literal: " << literal->ToString() << " "
VLOG(1) << "Transfer to device as literal: " << literal.ToString() << " "
<< shaped_buffer.ToString();
TF_RETURN_IF_ERROR(transfer_manager_->TransferLiteralToDeviceAsync(
stream_, *literal, shaped_buffer));
// Unref the host tensor, and capture the literal shared_ptr too so it goes
// out of scope when the lambda completes.
stream_->ThenDoHostCallback([ref, literal]() { ref.Unref(); });
return Status::OK();
return transfer_manager_->TransferLiteralToDevice(stream_, literal,
shaped_buffer);
}
void XlaTransferManager::TransferLiteralFromDevice(
Tensor* host_tensor, const Tensor& device_tensor,
const StatusCallback& done) const {
Status XlaTransferManager::TransferLiteralFromDevice(
Tensor* host_tensor, const Tensor& device_tensor) const {
const xla::ShapedBuffer& shaped_buffer =
XlaTensor::FromTensor(&device_tensor)->shaped_buffer();
TensorReference ref(device_tensor);
transfer_manager_->TransferLiteralFromDevice(
stream_, shaped_buffer,
[=, &shaped_buffer](
xla::StatusOr<std::unique_ptr<xla::Literal> > literal_or) {
ref.Unref();
done([&]() -> Status {
TF_ASSIGN_OR_RETURN(auto literal, std::move(literal_or));
VLOG(1) << "Transfer from device as literal: " << literal->ToString()
<< " " << shaped_buffer.ToString();
TF_ASSIGN_OR_RETURN(
std::unique_ptr<xla::Literal> literal,
transfer_manager_->TransferLiteralFromDevice(stream_, shaped_buffer));
VLOG(1) << "Transfer from device as literal: " << literal->ToString() << " "
<< shaped_buffer.ToString();
Tensor tensor;
TF_RETURN_IF_ERROR(
LiteralToHostTensor(*literal, host_tensor->dtype(), &tensor));
// Reshape the tensor back to its declared shape.
Status status;
if (!host_tensor->CopyFrom(tensor, device_tensor.shape())) {
status = errors::Internal(
return errors::Internal(
"Tensor::CopyFrom failed when copying from XLA device to CPU");
}
return status;
}());
});
return Status::OK();
}
void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
@ -136,7 +119,6 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
XlaTensor* xla_tensor = XlaTensor::FromTensor(device_tensor);
CHECK(xla_tensor);
Status status;
xla::StatusOr<TensorShape> shape_or_status = shape_representation_fn_(
device_tensor->shape(), device_tensor->dtype());
if (!shape_or_status.ok()) {
@ -145,14 +127,16 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
}
TensorShape shape = shape_or_status.ValueOrDie();
if (!xla_tensor->has_shaped_buffer()) {
status = xla_tensor->AllocateShapedBuffer(
Status s = xla_tensor->AllocateShapedBuffer(
device_tensor->dtype(), shape, client_,
stream_->parent()->device_ordinal());
if (!status.ok()) {
return done(status);
if (!s.ok()) {
done(s);
return;
}
}
Status status;
if (transfer_as_literal_) {
Tensor reshaped_cpu_tensor;
if (!reshaped_cpu_tensor.CopyFrom(*cpu_tensor, shape)) {
@ -205,8 +189,7 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor,
Status status;
if (transfer_as_literal_) {
TransferLiteralFromDevice(cpu_tensor, *device_tensor, done);
return;
status = TransferLiteralFromDevice(cpu_tensor, *device_tensor);
} else {
stream_->ThenMemcpy(dst_ptr, dev_src_ptr, total_bytes);
// TODO(hpucha): Make this asynchronous.
@ -216,8 +199,9 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor,
"Failed to complete data transfer on stream %p: %s", stream_,
block_status.error_message().c_str());
}
done(status);
}
done(status);
return;
}
@ -228,8 +212,8 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor,
void XlaTransferManager::CopyDeviceTensorToDevice(const Tensor& src_tensor,
Tensor* dst_tensor,
const StatusCallback& done) {
// Perform memory allocation now, and enqueue the device-to-device transfer.
Status status = [&]() -> Status {
// TODO(phawkins): replace this code with an asynchronous implementation.
auto body = [&]() {
if (src_tensor.NumElements() == 0) {
return Status::OK();
}
@ -245,20 +229,21 @@ void XlaTransferManager::CopyDeviceTensorToDevice(const Tensor& src_tensor,
xla_dst->AllocateShapedBuffer(src_tensor.dtype(), shape, client_,
stream_->parent()->device_ordinal()));
}
auto from_iter = xla_src->shaped_buffer().buffers().begin();
auto to_iter = xla_dst->shaped_buffer().buffers().begin();
for (auto end_iter = xla_src->shaped_buffer().buffers().end();
from_iter != end_iter; ++from_iter, ++to_iter) {
stream_->ThenMemcpyD2D(&to_iter->second, from_iter->second,
to_iter->second.size());
TF_RETURN_IF_ERROR(
xla_dst->shaped_buffer().buffers().ForEachMutableElementWithStatus(
[&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) {
const se::DeviceMemoryBase& from_buffer =
xla_src->shaped_buffer().buffers().element(index);
CHECK_EQ(buffer->size(), from_buffer.size());
if (!stream_->parent()->SynchronousMemcpy(buffer, from_buffer,
buffer->size())) {
return errors::Internal("Device to device memcpy failed");
}
return Status::OK();
}();
if (!status.ok()) {
return done(status);
} else {
stream_->ThenDoHostCallback([=]() { done(Status::OK()); });
}
}));
return Status::OK();
};
done(body());
}
XlaDeviceContext::XlaDeviceContext(

View File

@ -64,9 +64,8 @@ class XlaTransferManager {
private:
Status TransferLiteralToDevice(const Tensor& host_tensor,
Tensor* device_tensor) const;
void TransferLiteralFromDevice(Tensor* host_tensor,
const Tensor& device_tensor,
const StatusCallback& done) const;
Status TransferLiteralFromDevice(Tensor* host_tensor,
const Tensor& device_tensor) const;
// Stream obtained from a Device, used to transfer tensors between
// CPU and device.

View File

@ -82,18 +82,7 @@ StatusOr<ScopedShapedBuffer> Executable::ExecuteOnStreamWrapper(
StatusOr<ScopedShapedBuffer> return_value =
ExecuteOnStream(run_options, arguments, profile_ptr.get());
if (!return_value.status().ok()) {
if (profile != nullptr) {
// Ensure the ThenStartTimer call has completed before we destroy timer.
// We already have a failure status to return, so just log this if it
// fails.
Status status = stream->BlockHostUntilDone();
if (!status.ok()) {
LOG(ERROR) << "Failed to BlockHostUntilDone: " << status;
}
}
return return_value.status();
}
TF_RETURN_IF_ERROR(return_value.status());
if (profile != nullptr) {
VLOG(1) << "enqueueing 'stop timer' and blocking host until done...";

View File

@ -180,12 +180,8 @@ StatusOr<ScopedShapedBuffer> HloRunner::ExecuteWithDeviceBuffers(
TF_ASSIGN_OR_RETURN(std::unique_ptr<Executable> executable,
CreateExecutable(std::move(module), run_hlo_passes));
TF_ASSIGN_OR_RETURN(
ScopedShapedBuffer retval,
executable->ExecuteOnStreamWrapper(&service_run_options,
/*profile=*/profile, arguments));
TF_RETURN_IF_ERROR(stream.BlockHostUntilDone());
return std::move(retval);
return executable->ExecuteOnStreamWrapper(&service_run_options,
/*profile=*/profile, arguments);
}
StatusOr<ScopedShapedBuffer> HloRunner::ExecuteWithDeviceBuffers(
@ -313,7 +309,6 @@ StatusOr<std::vector<std::unique_ptr<Literal>>> HloRunner::ExecuteReplicated(
std::vector<std::unique_ptr<Literal>> exec_results;
for (int64 i = 0; i < options.num_replicas; ++i) {
TF_RETURN_IF_ERROR(streams[i]->BlockHostUntilDone());
TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> literal,
backend().transfer_manager()->TransferLiteralFromDevice(
streams[i].get(), results[i]));

View File

@ -772,10 +772,6 @@ XLA_TEST_F(LocalClientExecuteTest, CompileExecutable) {
ScopedShapedBuffer result =
executable->Run({&x_array}, DefaultExecutableRunOptions())
.ConsumeValueOrDie();
ASSERT_IS_OK(local_client_->mutable_backend()
->BorrowStream(0)
.ValueOrDie()
->BlockHostUntilDone());
LiteralTestUtil::ExpectR1Near<float>(
{2.0f, 4.0f, 6.0f}, *ShapedBufferToLiteral(result), error_spec_);

View File

@ -189,19 +189,7 @@ StatusOr<ScopedShapedBuffer> LocalClientTestBase::ExecuteLocally(
TF_ASSIGN_OR_RETURN(
std::unique_ptr<LocalExecutable> executable,
local_client_->Compile(computation, argument_layouts, build_options));
TF_ASSIGN_OR_RETURN(auto ret, executable->Run(arguments, run_options));
auto device_ordinal =
build_options.device_ordinal() == -1 ? 0 : build_options.device_ordinal();
auto* stream = run_options.stream();
if (!stream) {
stream = local_client_->mutable_backend()
->BorrowStream(device_ordinal)
.ValueOrDie()
.get();
}
TF_RETURN_IF_ERROR(stream->BlockHostUntilDone());
return std::move(ret);
return executable->Run(arguments, run_options);
}
} // namespace xla

View File

@ -168,7 +168,6 @@ void ExecuteAndFetchProfile(string* profile_output, LocalClient* client,
auto execution_result,
executable->ExecuteOnStream(&run_options, {&lhs_arg, &rhs_arg},
&hlo_execution_profile));
TF_ASSERT_OK(stream_ptr->BlockHostUntilDone());
(void)execution_result;
*profile_output =

View File

@ -922,5 +922,49 @@ class MirroredVariableUpdateTest(test.TestCase):
self.assertEquals(4.5, self.evaluate(mirrored_var))
class MirroredAndTowerLocalVariableInitializerTest(test.TestCase):
config = config_pb2.ConfigProto()
config.allow_soft_placement = True
def testAssignMirroredVarInitializer(self):
# This test is not eager compatible since in eager variables are initialized
# upon construction instead of once the initialization op is run.
with context.graph_mode():
def var_fn():
v = variable_scope.variable(1.0, name="foo")
return v
dist = mirrored_strategy.MirroredStrategy(
["/device:GPU:0", "/device:CPU:0"])
with dist.scope():
mirrored_var = dist.call_for_each_tower(var_fn)
self.assertIsInstance(mirrored_var, values.MirroredVariable)
self.assertFalse(self.evaluate(mirrored_var.is_initialized()))
self.evaluate(mirrored_var.initializer)
self.assertTrue(self.evaluate(mirrored_var.is_initialized()))
def testAssignTowerLocalVarInitializer(self):
# This test is not eager compatible since in eager variables are initialized
# upon construction instead of once the initialization op is run.
with context.graph_mode():
def model_fn():
tower_context = distribute_lib.get_tower_context()
with tower_context.tower_local_var_scope(
variable_scope.VariableAggregation.SUM):
v_sum = variable_scope.variable(1.0)
self.assertTrue(isinstance(v_sum, values.TowerLocalVariable))
return v_sum
dist = mirrored_strategy.MirroredStrategy(
["/device:GPU:0", "/device:CPU:0"])
with dist.scope():
tower_local_var = dist.call_for_each_tower(model_fn)
self.assertTrue(isinstance(tower_local_var, values.TowerLocalVariable))
self.assertFalse(self.evaluate(tower_local_var.is_initialized()))
self.evaluate(tower_local_var.initializer)
self.assertTrue(self.evaluate(tower_local_var.is_initialized()))
if __name__ == "__main__":
test.main()

View File

@ -297,6 +297,12 @@ class MirroredVariable(DistributedVariable, Mirrored,
for v in six.itervalues(index):
v._mirrored_container = weakref.ref(self) # pylint: disable=protected-access
self._primary_var = primary_var
# tf.keras keeps track of variables initialized using this attribute. When
# tf.keras gets the default session, it initializes all uninitialized vars.
# We need to make _keras_initialized a member of MirroredVariable because
# without this it will use `__getattr__` which will delegate to a component
# variable.
self._keras_initialized = False
self._aggregation = aggregation
super(MirroredVariable, self).__init__(index)
@ -348,6 +354,28 @@ class MirroredVariable(DistributedVariable, Mirrored,
def assign(self, *args, **kwargs):
return self._assign_func(f=state_ops.assign, *args, **kwargs)
def is_initialized(self, name=None):
# We have to cast the self._index.values() to a `list` because when we
# use `model_to_estimator` to run tf.keras models, self._index.values() is
# of type `dict_values` and not `list`.
values_list = list(self._index.values())
result = values_list[0].is_initialized()
# We iterate through the list of values except the last one to allow us to
# name the final `logical_and` op the same name that is passed by the user
# to the `is_initialized` op. For mirrored variables, the `is_initialized`
# op is a `logical_and` op.
for v in values_list[1:-1]:
result = math_ops.logical_and(result, v.is_initialized())
result = math_ops.logical_and(result, values_list[-1].is_initialized(),
name=name)
return result
@property
def initializer(self):
# return grouped ops of all the var initializations of component values of
# the mirrored variable
return control_flow_ops.group([v.initializer for v in self._index.values()])
@property
def aggregation(self):
return self._aggregation
@ -435,6 +463,12 @@ class TowerLocalVariable(DistributedVariable, PerDevice,
def __init__(self, index, primary_var, aggregation):
self._primary_var = primary_var
self._aggregation = aggregation
# tf.keras keeps track of variables initialized using this attribute. When
# tf.keras gets the default session, it initializes all uninitialized vars.
# We need to make _keras_initialized a member of TowerLocalVariable because
# without this it will use `__getattr__` which will delegate to a component
# variable.
self._keras_initialized = False
super(TowerLocalVariable, self).__init__(index)
def assign_sub(self, *args, **kwargs):
@ -449,6 +483,28 @@ class TowerLocalVariable(DistributedVariable, PerDevice,
_assert_tower_context()
return self.get().assign(*args, **kwargs)
def is_initialized(self, name=None):
# We have to cast the self._index.values() to a `list` because when we
# use `model_to_estimator` to run tf.keras models, self._index.values() is
# of type `dict_values` and not `list`.
values_list = list(self._index.values())
result = values_list[0].is_initialized()
# We iterate through the list of values except the last one to allow us to
# name the final `logical_and` op the same name that is passed by the user
# to the `is_initialized` op. For tower local variables, the
# `is_initialized` op is a `logical_and` op.
for v in values_list[1:-1]:
result = math_ops.logical_and(result, v.is_initialized())
result = math_ops.logical_and(result, values_list[-1].is_initialized(),
name=name)
return result
@property
def initializer(self):
# return grouped ops of all the var initializations of component values of
# the tower local variable
return control_flow_ops.group([v.initializer for v in self._index.values()])
@property
def aggregation(self):
return self._aggregation

View File

@ -0,0 +1,429 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "U9i2Dsh-ziXr"
},
"source": [
"# An introduction to TensorFlow\n",
"\n",
"This is an introductory tutorial for using TensorFlow. It will cover:\n",
"\n",
"* Importing required packages\n",
"* Creating and using Tensors\n",
"* Using GPU acceleration\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "z1JcS5iBXMRO"
},
"source": [
"## Import TensorFlow\n",
"\n",
"To get started, import the `tensorflow` module and enable eager execution.\n",
"Eager execution enables a more interactive frontend to TensorFlow, the details of which we will discuss much later."
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"cellView": "code",
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
}
},
"colab_type": "code",
"id": "RlIWhyeLoYnG"
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"\n",
"tf.enable_eager_execution()"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "H9UySOPLXdaw"
},
"source": [
"## Tensors\n",
"\n",
"A Tensor is a multi-dimensional array. Similar to NumPy `ndarray` objects, `Tensor` objects have a data type and a shape. Additionally, Tensors can reside in accelerator (like GPU) memory. TensorFlow offers a rich library of operations ([tf.add](https://www.tensorflow.org/api_docs/python/tf/add), [tf.matmul](https://www.tensorflow.org/api_docs/python/tf/matmul), [tf.linalg.inv](https://www.tensorflow.org/api_docs/python/tf/linalg/inv) etc.) that consume and produce Tensors. These operations automatically convert native Python types. For example:\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"cellView": "code",
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"height": 125
},
"colab_type": "code",
"executionInfo": {
"elapsed": 320,
"status": "ok",
"timestamp": 1526420535530,
"user": {
"displayName": "",
"photoUrl": "",
"userId": ""
},
"user_tz": 420
},
"id": "ngUe237Wt48W",
"outputId": "b1a1cd60-4eb3-443d-cd6b-68406390784e"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tf.Tensor(3, shape=(), dtype=int32)\n",
"tf.Tensor([4 6], shape=(2,), dtype=int32)\n",
"tf.Tensor(25, shape=(), dtype=int32)\n",
"tf.Tensor(6, shape=(), dtype=int32)\n",
"tf.Tensor(aGVsbG8gd29ybGQ, shape=(), dtype=string)\n",
"tf.Tensor(13, shape=(), dtype=int32)\n"
]
}
],
"source": [
"print(tf.add(1, 2))\n",
"print(tf.add([1, 2], [3, 4]))\n",
"print(tf.square(5))\n",
"print(tf.reduce_sum([1, 2, 3]))\n",
"print(tf.encode_base64(\"hello world\"))\n",
"\n",
"# Operator overloading is also supported\n",
"print(tf.square(2) + tf.square(3))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "IDY4WsYRhP81"
},
"source": [
"Each Tensor has a shape and a datatype"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"height": 53
},
"colab_type": "code",
"executionInfo": {
"elapsed": 215,
"status": "ok",
"timestamp": 1526420538162,
"user": {
"displayName": "",
"photoUrl": "",
"userId": ""
},
"user_tz": 420
},
"id": "srYWH1MdJNG7",
"outputId": "5e4ac41c-5115-4e50-eba0-42e249c16561"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1, 2)\n",
"\u003cdtype: 'int32'\u003e\n"
]
}
],
"source": [
"x = tf.matmul([[1]], [[2, 3]])\n",
"print(x.shape)\n",
"print(x.dtype)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "eBPw8e8vrsom"
},
"source": [
"The most obvious differences between NumPy arrays and TensorFlow Tensors are:\n",
"\n",
"1. Tensors can be backed by accelerator memory (like GPU, TPU).\n",
"2. Tensors are immutable."
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "Dwi1tdW3JBw6"
},
"source": [
"### NumPy Compatibility\n",
"\n",
"Conversion between TensorFlow Tensors and NumPy ndarrays is quite simple as:\n",
"* TensorFlow operations automatically convert NumPy ndarrays to Tensors.\n",
"* NumPy operations automatically convert Tensors to NumPy ndarrays.\n",
"\n",
"Tensors can be explicitly converted to NumPy ndarrays by invoking the `.numpy()` method on them.\n",
"These conversions are typically cheap as the array and Tensor share the underlying memory representation if possible. However, sharing the underlying representation isn't always possible since the Tensor may be hosted in GPU memory while NumPy arrays are always backed by host memory, and the conversion will thus involve a copy from GPU to host memory."
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"height": 251
},
"colab_type": "code",
"executionInfo": {
"elapsed": 238,
"status": "ok",
"timestamp": 1526420540562,
"user": {
"displayName": "",
"photoUrl": "",
"userId": ""
},
"user_tz": 420
},
"id": "lCUWzso6mbqR",
"outputId": "fd0a22bc-8249-49dd-fcbd-63161cc47e46"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TensorFlow operations convert numpy arrays to Tensors automatically\n",
"tf.Tensor(\n",
"[[ 42. 42. 42.]\n",
" [ 42. 42. 42.]\n",
" [ 42. 42. 42.]], shape=(3, 3), dtype=float64)\n",
"And NumPy operations convert Tensors to numpy arrays automatically\n",
"[[ 43. 43. 43.]\n",
" [ 43. 43. 43.]\n",
" [ 43. 43. 43.]]\n",
"The .numpy() method explicitly converts a Tensor to a numpy array\n",
"[[ 42. 42. 42.]\n",
" [ 42. 42. 42.]\n",
" [ 42. 42. 42.]]\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"ndarray = np.ones([3, 3])\n",
"\n",
"print(\"TensorFlow operations convert numpy arrays to Tensors automatically\")\n",
"tensor = tf.multiply(ndarray, 42)\n",
"print(tensor)\n",
"\n",
"\n",
"print(\"And NumPy operations convert Tensors to numpy arrays automatically\")\n",
"print(np.add(tensor, 1))\n",
"\n",
"print(\"The .numpy() method explicitly converts a Tensor to a numpy array\")\n",
"print(tensor.numpy())"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "PBNP8yTRfu_X"
},
"source": [
"## GPU acceleration\n",
"\n",
"Many TensorFlow operations can be accelerated by using the GPU for computation. Without any annotations, TensorFlow automatically decides whether to use the GPU or CPU for an operation (and copies the tensor between CPU and GPU memory if necessary). Tensors produced by an operation are typically backed by the memory of the device on which the operation executed. For example:"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"cellView": "code",
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"height": 53
},
"colab_type": "code",
"executionInfo": {
"elapsed": 340,
"status": "ok",
"timestamp": 1526420543562,
"user": {
"displayName": "",
"photoUrl": "",
"userId": ""
},
"user_tz": 420
},
"id": "3Twf_Rw-gQFM",
"outputId": "2239ae2b-adf3-4895-b1f3-464cf5361d1b"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Is there a GPU available: False\n",
"Is the Tensor on GPU #0: False\n"
]
}
],
"source": [
"x = tf.random_uniform([3, 3])\n",
"\n",
"print(\"Is there a GPU available: \"),\n",
"print(tf.test.is_gpu_available())\n",
"\n",
"print(\"Is the Tensor on GPU #0: \"),\n",
"print(x.device.endswith('GPU:0'))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "vpgYzgVXW2Ud"
},
"source": [
"### Device Names\n",
"\n",
"The `Tensor.device` property provides a fully qualified string name of the device hosting the contents of the Tensor. This name encodes a bunch of details, such as an identifier of the network address of the host on which this program is executing and the device within that host. This is required for distributed execution of TensorFlow programs, but we'll skip that for now. The string will end with `GPU:\u003cN\u003e` if the tensor is placed on the `N`-th tensor on the host."
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "ZWZQCimzuqyP"
},
"source": [
"\n",
"\n",
"### Explicit Device Placement\n",
"\n",
"The term \"placement\" in TensorFlow refers to how individual operations are assigned (placed on) a device for execution. As mentioned above, when there is no explicit guidance provided, TensorFlow automatically decides which device to execute an operation, and copies Tensors to that device if needed. However, TensorFlow operations can be explicitly placed on specific devices using the `tf.device` context manager. For example:"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"height": 53
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1762,
"status": "ok",
"timestamp": 1526420547562,
"user": {
"displayName": "",
"photoUrl": "",
"userId": ""
},
"user_tz": 420
},
"id": "RjkNZTuauy-Q",
"outputId": "2e613293-ccac-4db2-b793-8ceb5b5adcfd"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"On CPU:\n",
"10 loops, best of 3: 35.8 ms per loop\n"
]
}
],
"source": [
"def time_matmul(x):\n",
" %timeit tf.matmul(x, x)\n",
"\n",
"# Force execution on CPU\n",
"print(\"On CPU:\")\n",
"with tf.device(\"CPU:0\"):\n",
" x = tf.random_uniform([1000, 1000])\n",
" assert x.device.endswith(\"CPU:0\")\n",
" time_matmul(x)\n",
"\n",
"# Force execution on GPU #0 if available\n",
"if tf.test.is_gpu_available():\n",
" with tf.device(\"GPU:0\"): # Or GPU:1 for the 2nd GPU, GPU:2 for the 3rd etc.\n",
" x = tf.random_uniform([1000, 1000])\n",
" assert x.device.endswith(\"GPU:0\")\n",
" time_matmul(x)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "YEOJTNiOvnpQ"
},
"source": [
"## Next Steps\n",
"\n",
"In this tutorial we covered the most fundamental concepts in TensorFlow - `Tensor`s, operations, and devices.\n",
"In [the next tutorial](https://github.com/tensorflow/models/tree/master/official/contrib/eager/python/examples/notebooks/2_gradients.ipynb) we will cover automatic differentiation - a building block required for training many machine learning models like neural networks."
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"default_view": {},
"name": "TensorFlow: An introduction",
"provenance": [],
"version": "0.3.2",
"views": {}
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,209 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "U9i2Dsh-ziXr"
},
"source": [
"# Eager Execution Tutorial: Importing Data\n",
"\n",
"This notebook demonstrates the use of the [`tf.data.Dataset` API](https://www.tensorflow.org/guide/datasets) to build pipelines to feed data to your program. It covers:\n",
"\n",
"* Creating a `Dataset`.\n",
"* Iteration over a `Dataset` with eager execution enabled.\n",
"\n",
"We recommend using the `Dataset`s API for building performant, complex input pipelines from simple, re-usable pieces that will feed your model's training or evaluation loops.\n",
"\n",
"If you're familiar with TensorFlow graphs, the API for constructing the `Dataset` object remains exactly the same when eager execution is enabled, but the process of iterating over elements of the dataset is slightly simpler.\n",
"You can use Python iteration over the `tf.data.Dataset` object and do not need to explicitly create an `tf.data.Iterator` object.\n",
"As a result, the discussion on iterators in the [TensorFlow Guide](https://www.tensorflow.org/guide/datasets) is not relevant when eager execution is enabled."
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "z1JcS5iBXMRO"
},
"source": [
"# Setup: Enable eager execution\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"cellView": "code",
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
}
},
"colab_type": "code",
"id": "RlIWhyeLoYnG"
},
"outputs": [],
"source": [
"# Import TensorFlow.\n",
"import tensorflow as tf\n",
"\n",
"# Enable eager execution\n",
"tf.enable_eager_execution()"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "H9UySOPLXdaw"
},
"source": [
"# Step 1: Create a source `Dataset`\n",
"\n",
"Create a _source_ dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices) or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [TensorFlow Guide](https://www.tensorflow.org/guide/datasets#reading_input_data) for more information."
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"cellView": "code",
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
}
},
"colab_type": "code",
"id": "WPTUfGq6kJ5w"
},
"outputs": [],
"source": [
"ds_tensors = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])\n",
"\n",
"# Create a CSV file\n",
"import tempfile\n",
"_, filename = tempfile.mkstemp()\n",
"with open(filename, 'w') as f:\n",
" f.write(\"\"\"Line 1\n",
"Line 2\n",
"Line 3\n",
" \"\"\")\n",
"ds_file = tf.data.TextLineDataset(filename)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "twBfWd5xyu_d"
},
"source": [
"# Step 2: Apply transformations\n",
"\n",
"Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch), [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) etc. to apply transformations to the records of the dataset. See the [API documentation for `tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) for details."
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"cellView": "code",
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
}
},
"colab_type": "code",
"id": "ngUe237Wt48W"
},
"outputs": [],
"source": [
"ds_tensors = ds_tensors.map(tf.square).shuffle(2).batch(2)\n",
"ds_file = ds_file.batch(2)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "IDY4WsYRhP81"
},
"source": [
"# Step 3: Iterate\n",
"\n",
"When eager execution is enabled `Dataset` objects support iteration.\n",
"If you're familiar with the use of `Dataset`s in TensorFlow graphs, note that there is no need for calls to `Dataset.make_one_shot_iterator()` or `get_next()` calls."
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"base_uri": "https://localhost:8080/",
"height": 153
},
"colab_type": "code",
"executionInfo": {
"elapsed": 388,
"status": "ok",
"timestamp": 1525154629129,
"user": {
"displayName": "",
"photoUrl": "",
"userId": ""
},
"user_tz": 420
},
"id": "lCUWzso6mbqR",
"outputId": "8e4b0298-d27d-4ac7-e26a-ef94af0594ec"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Elements of ds_tensors:\n",
"tf.Tensor([1 9], shape=(2,), dtype=int32)\n",
"tf.Tensor([16 25], shape=(2,), dtype=int32)\n",
"tf.Tensor([ 4 36], shape=(2,), dtype=int32)\n",
"\n",
"Elements in ds_file:\n",
"tf.Tensor(['Line 1' 'Line 2'], shape=(2,), dtype=string)\n",
"tf.Tensor(['Line 3' ' '], shape=(2,), dtype=string)\n"
]
}
],
"source": [
"print('Elements of ds_tensors:')\n",
"for x in ds_tensors:\n",
" print(x)\n",
"\n",
"print('\\nElements in ds_file:')\n",
"for x in ds_file:\n",
" print(x)"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"default_view": {},
"name": "Eager Execution Tutorial: Importing Data",
"provenance": [],
"version": "0.3.2",
"views": {}
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,551 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
}
},
"colab_type": "code",
"id": "pwX7Fii1rwsJ"
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"tf.enable_eager_execution()\n",
"tfe = tf.contrib.eager\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "UEu3q4jmpKVT"
},
"source": [
"# High level API\n",
"\n",
"We recommend using `tf.keras` as a high-level API for building neural networks. That said, most TensorFlow APIs are usable with eager execution.\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "zSFfVVjkrrsI"
},
"source": [
"## Layers: common sets of useful operations\n",
"\n",
"Most of the time when writing code for machine learning models you want to operate at a higher level of abstraction than individual operations and manipulation of individual variables.\n",
"\n",
"Many machine learning models are expressible as the composition and stacking of relatively simple layers, and TensorFlow provides both a set of many common layers as a well as easy ways for you to write your own application-specific layers either from scratch or as the composition of existing layers.\n",
"\n",
"TensorFlow includes the full [Keras](https://keras.io) API in the tf.keras package, and the Keras layers are very useful when building your own models.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
}
},
"colab_type": "code",
"id": "8PyXlPl-4TzQ"
},
"outputs": [],
"source": [
"# In the tf.keras.layers package, layers are objects. To construct a layer,\n",
"# simply construct the object. Most layers take as a first argument the number\n",
"# of output dimensions / channels.\n",
"layer = tf.keras.layers.Dense(100)\n",
"# The number of input dimensions is often unnecessary, as it can be inferred\n",
"# the first time the layer is used, but it can be provided if you want to \n",
"# specify it manually, which is useful in some complex models.\n",
"layer = tf.keras.layers.Dense(10, input_shape=(None, 5))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "Fn69xxPO5Psr"
},
"source": [
"The full list of pre-existing layers can be seen in [the documentation](https://www.tensorflow.org/api_docs/python/tf/keras/layers). It includes Dense (a fully-connected layer),\n",
"Conv2D, LSTM, BatchNormalization, Dropout, and many others."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 244,
"status": "ok",
"timestamp": 1527783641557,
"user": {
"displayName": "",
"photoUrl": "",
"userId": ""
},
"user_tz": 420
},
"id": "E3XKNknP5Mhb",
"outputId": "c5d52434-d980-4488-efa7-5660819d0207"
},
"outputs": [
{
"data": {
"text/plain": [
"\u003ctf.Tensor: id=30, shape=(10, 10), dtype=float32, numpy=\n",
"array([[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)\u003e"
]
},
"execution_count": 3,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"# To use a layer, simply call it.\n",
"layer(tf.zeros([10, 5]))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"height": 221
},
"colab_type": "code",
"executionInfo": {
"elapsed": 320,
"status": "ok",
"timestamp": 1527783642457,
"user": {
"displayName": "",
"photoUrl": "",
"userId": ""
},
"user_tz": 420
},
"id": "Wt_Nsv-L5t2s",
"outputId": "f0d96dce-0128-4080-bfe2-0ee6fbc0ad90"
},
"outputs": [
{
"data": {
"text/plain": [
"[\u003ctf.Variable 'dense_1/kernel:0' shape=(5, 10) dtype=float32, numpy=\n",
" array([[ 0.43788117, -0.62099844, -0.30525017, -0.59352523, 0.1783089 ,\n",
" 0.47078604, -0.23620895, -0.30482283, 0.01366901, -0.1288507 ],\n",
" [ 0.18407935, -0.56550485, 0.54180616, -0.42254075, 0.3702994 ,\n",
" 0.36705834, -0.29678228, 0.36660975, 0.36717761, 0.46269661],\n",
" [ 0.1709305 , -0.11529458, 0.32710236, 0.46300393, -0.62802851,\n",
" 0.51641601, 0.39624029, 0.26918125, -0.25196898, 0.21353298],\n",
" [ 0.35752094, 0.44161648, 0.61500639, -0.12653333, 0.41629118,\n",
" 0.36193585, 0.066082 , -0.59253877, 0.47318751, 0.17115968],\n",
" [-0.22554061, -0.17727301, 0.5525015 , 0.3678053 , -0.00454676,\n",
" 0.24066836, -0.53640735, 0.13792562, -0.10727292, 0.59708995]], dtype=float32)\u003e,\n",
" \u003ctf.Variable 'dense_1/bias:0' shape=(10,) dtype=float32, numpy=array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)\u003e]"
]
},
"execution_count": 4,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"# Layers have many useful methods. For example, you can inspect all variables\n",
"# in a layer by calling layer.variables. In this case a fully-connected layer\n",
"# will have variables for weights and biases.\n",
"layer.variables"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"height": 221
},
"colab_type": "code",
"executionInfo": {
"elapsed": 226,
"status": "ok",
"timestamp": 1527783643252,
"user": {
"displayName": "",
"photoUrl": "",
"userId": ""
},
"user_tz": 420
},
"id": "6ilvKjz8_4MQ",
"outputId": "f647fced-c2d7-41a3-c237-242036784665"
},
"outputs": [
{
"data": {
"text/plain": [
"(\u003ctf.Variable 'dense_1/kernel:0' shape=(5, 10) dtype=float32, numpy=\n",
" array([[ 0.43788117, -0.62099844, -0.30525017, -0.59352523, 0.1783089 ,\n",
" 0.47078604, -0.23620895, -0.30482283, 0.01366901, -0.1288507 ],\n",
" [ 0.18407935, -0.56550485, 0.54180616, -0.42254075, 0.3702994 ,\n",
" 0.36705834, -0.29678228, 0.36660975, 0.36717761, 0.46269661],\n",
" [ 0.1709305 , -0.11529458, 0.32710236, 0.46300393, -0.62802851,\n",
" 0.51641601, 0.39624029, 0.26918125, -0.25196898, 0.21353298],\n",
" [ 0.35752094, 0.44161648, 0.61500639, -0.12653333, 0.41629118,\n",
" 0.36193585, 0.066082 , -0.59253877, 0.47318751, 0.17115968],\n",
" [-0.22554061, -0.17727301, 0.5525015 , 0.3678053 , -0.00454676,\n",
" 0.24066836, -0.53640735, 0.13792562, -0.10727292, 0.59708995]], dtype=float32)\u003e,\n",
" \u003ctf.Variable 'dense_1/bias:0' shape=(10,) dtype=float32, numpy=array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)\u003e)"
]
},
"execution_count": 5,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"# The variables are also accessible through nice accessors\n",
"layer.kernel, layer.bias"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "O0kDbE54-5VS"
},
"source": [
"## Implementing custom layers\n",
"The best way to implement your own layer is extending the tf.keras.Layer class and implementing:\n",
" * `__init__` , where you can do all input-independent initialization\n",
" * `build`, where you know the shapes of the input tensors and can do the rest of the initialization\n",
" * `call`, where you do the forward computation\n",
"\n",
"Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes required to create the variables will need to be explicitly specified."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"height": 391
},
"colab_type": "code",
"executionInfo": {
"elapsed": 251,
"status": "ok",
"timestamp": 1527783661512,
"user": {
"displayName": "",
"photoUrl": "",
"userId": ""
},
"user_tz": 420
},
"id": "5Byl3n1k5kIy",
"outputId": "6e7f9285-649a-4132-82ce-73ea92f15862"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tf.Tensor(\n",
"[[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
" [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
" [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
" [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
" [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
" [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
" [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
" [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
" [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n",
" [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]], shape=(10, 10), dtype=float32)\n",
"[\u003ctf.Variable 'my_dense_layer_1/kernel:0' shape=(5, 10) dtype=float32, numpy=\n",
"array([[-0.4011991 , 0.22458655, -0.33237562, -0.25117266, 0.33528614,\n",
" -0.01392961, 0.58580834, -0.16346583, 0.28465688, -0.47191954],\n",
" [-0.52922136, 0.22416979, -0.58209574, -0.60914612, 0.05226624,\n",
" -0.18325993, 0.5591442 , -0.24718609, 0.37148207, 0.40475875],\n",
" [ 0.16912812, -0.47618777, -0.38989353, 0.30105609, -0.08085585,\n",
" 0.44758242, 0.545829 , 0.51421839, 0.11063248, 0.20159996],\n",
" [ 0.34073615, -0.59835428, 0.06498981, -0.44489855, -0.34302285,\n",
" 0.20969599, 0.35527444, -0.03173476, -0.22227573, 0.09303057],\n",
" [ 0.41764337, -0.06435019, -0.52509922, -0.39957345, 0.56811184,\n",
" 0.23481232, -0.61666459, 0.31144124, -0.11532354, -0.42421889]], dtype=float32)\u003e]\n"
]
}
],
"source": [
"class MyDenseLayer(tf.keras.layers.Layer):\n",
" def __init__(self, num_outputs):\n",
" super(MyDenseLayer, self).__init__()\n",
" self.num_outputs = num_outputs\n",
" \n",
" def build(self, input_shape):\n",
" self.kernel = self.add_variable(\"kernel\", \n",
" shape=[input_shape[-1].value, \n",
" self.num_outputs])\n",
" \n",
" def call(self, input):\n",
" return tf.matmul(input, self.kernel)\n",
" \n",
"layer = MyDenseLayer(10)\n",
"print(layer(tf.zeros([10, 5])))\n",
"print(layer.variables)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "tk8E2vY0-z4Z"
},
"source": [
"Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`.\n",
"\n",
"Overall code is easier to read and maintain if it uses standard layers whenever possible, as other readers will be familiar with the behavior of standard layers. If you want to use a layer which is not present in tf.keras.layers or tf.contrib.layers, consider filing a [github issue](http://github.com/tensorflow/tensorflow/issues/new) or, even better, sending us a pull request!"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "Qhg4KlbKrs3G"
},
"source": [
"## Models: composing layers\n",
"\n",
"Many interesting layer-like things in machine learning models are implemented by composing existing layers. For example, each residual block in a resnet is a composition of convolutions, batch normalizations, and a shortcut.\n",
"\n",
"The main class used when creating a layer-like thing which contains other layers is tf.keras.Model. Implementing one is done by inheriting from tf.keras.Model."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"height": 190
},
"colab_type": "code",
"executionInfo": {
"elapsed": 420,
"status": "ok",
"timestamp": 1527783698512,
"user": {
"displayName": "",
"photoUrl": "",
"userId": ""
},
"user_tz": 420
},
"id": "N30DTXiRASlb",
"outputId": "a8b23a8e-5cf9-4bbf-f93b-6c763d74e2b3"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tf.Tensor(\n",
"[[[[ 0. 0. 0.]\n",
" [ 0. 0. 0.]\n",
" [ 0. 0. 0.]]\n",
"\n",
" [[ 0. 0. 0.]\n",
" [ 0. 0. 0.]\n",
" [ 0. 0. 0.]]]], shape=(1, 2, 3, 3), dtype=float32)\n",
"['resnet_identity_block_1/conv2d_3/kernel:0', 'resnet_identity_block_1/conv2d_3/bias:0', 'resnet_identity_block_1/batch_normalization_3/gamma:0', 'resnet_identity_block_1/batch_normalization_3/beta:0', 'resnet_identity_block_1/conv2d_4/kernel:0', 'resnet_identity_block_1/conv2d_4/bias:0', 'resnet_identity_block_1/batch_normalization_4/gamma:0', 'resnet_identity_block_1/batch_normalization_4/beta:0', 'resnet_identity_block_1/conv2d_5/kernel:0', 'resnet_identity_block_1/conv2d_5/bias:0', 'resnet_identity_block_1/batch_normalization_5/gamma:0', 'resnet_identity_block_1/batch_normalization_5/beta:0', 'resnet_identity_block_1/batch_normalization_3/moving_mean:0', 'resnet_identity_block_1/batch_normalization_3/moving_variance:0', 'resnet_identity_block_1/batch_normalization_4/moving_mean:0', 'resnet_identity_block_1/batch_normalization_4/moving_variance:0', 'resnet_identity_block_1/batch_normalization_5/moving_mean:0', 'resnet_identity_block_1/batch_normalization_5/moving_variance:0']\n"
]
}
],
"source": [
"class ResnetIdentityBlock(tf.keras.Model):\n",
" def __init__(self, kernel_size, filters):\n",
" super(ResnetIdentityBlock, self).__init__(name='')\n",
" filters1, filters2, filters3 = filters\n",
"\n",
" self.conv2a = tf.keras.layers.Conv2D(filters1, (1, 1))\n",
" self.bn2a = tf.keras.layers.BatchNormalization()\n",
"\n",
" self.conv2b = tf.keras.layers.Conv2D(filters2, kernel_size, padding='same')\n",
" self.bn2b = tf.keras.layers.BatchNormalization()\n",
"\n",
" self.conv2c = tf.keras.layers.Conv2D(filters3, (1, 1))\n",
" self.bn2c = tf.keras.layers.BatchNormalization()\n",
"\n",
" def call(self, input_tensor, training=False):\n",
" x = self.conv2a(input_tensor)\n",
" x = self.bn2a(x, training=training)\n",
" x = tf.nn.relu(x)\n",
"\n",
" x = self.conv2b(x)\n",
" x = self.bn2b(x, training=training)\n",
" x = tf.nn.relu(x)\n",
"\n",
" x = self.conv2c(x)\n",
" x = self.bn2c(x, training=training)\n",
"\n",
" x += input_tensor\n",
" return tf.nn.relu(x)\n",
"\n",
" \n",
"block = ResnetIdentityBlock(1, [1, 2, 3])\n",
"print(block(tf.zeros([1, 2, 3, 3])))\n",
"print([x.name for x in block.variables])"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "wYfucVw65PMj"
},
"source": [
"Much of the time, however, models which compose many layers simply call one layer after the other. This can be done in very little code using tf.keras.Sequential"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {
"autoexec": {
"startup": false,
"wait_interval": 0
},
"base_uri": "https://localhost:8080/",
"height": 153
},
"colab_type": "code",
"executionInfo": {
"elapsed": 361,
"status": "ok",
"timestamp": 1526674830777,
"user": {
"displayName": "Alexandre Passos",
"photoUrl": "//lh4.googleusercontent.com/-kmTTWXEgAPw/AAAAAAAAAAI/AAAAAAAAAC0/q_DoOzKGwds/s50-c-k-no/photo.jpg",
"userId": "108023195365833072773"
},
"user_tz": 420
},
"id": "L9frk7Ur4uvJ",
"outputId": "882e9076-b6d9-4380-bb1e-7c6b57d54c39"
},
"outputs": [
{
"data": {
"text/plain": [
"\u003ctf.Tensor: id=1423, shape=(1, 2, 3, 3), dtype=float32, numpy=\n",
"array([[[[0., 0., 0.],\n",
" [0., 0., 0.],\n",
" [0., 0., 0.]],\n",
"\n",
" [[0., 0., 0.],\n",
" [0., 0., 0.],\n",
" [0., 0., 0.]]]], dtype=float32)\u003e"
]
},
"execution_count": 26,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
" my_seq = tf.keras.Sequential([tf.keras.layers.Conv2D(1, (1, 1)),\n",
" tf.keras.layers.BatchNormalization(),\n",
" tf.keras.layers.Conv2D(2, 1, \n",
" padding='same'),\n",
" tf.keras.layers.BatchNormalization(),\n",
" tf.keras.layers.Conv2D(3, (1, 1)),\n",
" tf.keras.layers.BatchNormalization()])\n",
"my_seq(tf.zeros([1, 2, 3, 3]))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "c5YwYcnuK-wc"
},
"source": [
"# Next steps\n",
"\n",
"Now you can go back to the previous notebook and adapt the linear regression example to use layers and models to be better structured."
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"default_view": {},
"name": "4 - High level API - TensorFlow Eager.ipynb",
"provenance": [],
"version": "0.3.2",
"views": {}
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,44 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
namespace tensorflow {
REGISTER_OP("KafkaDataset")
.Input("topics: string")
.Input("servers: string")
.Input("group: string")
.Input("eof: bool")
.Input("timeout: int64")
.Output("handle: variant")
.SetIsStateful()
.SetShapeFn(shape_inference::ScalarShape)
.Doc(R"doc(
Creates a dataset that emits the messages of one or more Kafka topics.
topics: A `tf.string` tensor containing one or more subscriptions,
in the format of [topic:partition:offset:length],
by default length is -1 for unlimited.
servers: A list of bootstrap servers.
group: The consumer group id.
eof: If True, the kafka reader will stop on EOF.
timeout: The timeout value for the Kafka Consumer to wait
(in millisecond).
)doc");
} // namespace tensorflow

View File

@ -0,0 +1,178 @@
# Performance Benchmark numbers
This document contains the performance benchmark numbers for running a few well
known models on some Android and iOS devices.
The benchmark numbers were generated by running the [TFLite benchmark
binary](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark)
on Android and running the [iOS benchmark
app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark/ios)
on iOS.
# Android benchmarks
When running Android benchmarks, the CPU affinity is set to use big cores on the
device to reduce variance (see
[details](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark#reducing-variance-between-runs-on-android)).
Models are assumed to have been downloaded from the link, unzipped and pushed to
`/data/local/tmp/tflite_models` folder. The benchmark binary is built according
to instructions listed
[here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark#on-android)
and is assumed to have been pushed to `/data/local/tmp`.
The following command was used to run the benchmark:
```
adb shell taskset ${CPU_MASK} /data/local/tmp/benchmark_model \
--num_threads=1 \
--graph=/data/local/tmp/tflite_models/${GRAPH} \
--warmup_runs=1 \
--num_runs=50 \
--use_nnapi=false
```
where `${GRAPH}` is the name of model and `${CPU_MASK}` is the CPU affinity
chosen according to the following table:
Device | CPU_MASK |
-------| ----------
Pixel 2 | f0 |
Pixel xl | 0c |
<table>
<thead>
<tr>
<th>Model Name</th>
<th>Device </th>
<th>Mean inference time (std dev)</th>
</tr>
</thead>
<tr>
<td rowspan = 2>
<a href="http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz">Mobilenet_1.0_224(float)</a>
</td>
<td>Pixel 2 </td>
<td>166.5 ms (2.6 ms)</td>
</tr>
<tr>
<td>Pixel xl </td>
<td>122.9 ms (1.8 ms) </td>
</tr>
<tr>
<td rowspan = 2>
<a href="http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224_quant.tgz">Mobilenet_1.0_224 (quant)</a>
</td>
<td>Pixel 2 </td>
<td>69.5 ms (0.9 ms)</td>
</tr>
<tr>
<td>Pixel xl </td>
<td>78.9 ms (2.2 ms) </td>
</tr>
<tr>
<td rowspan = 2>
<a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/nasnet_mobile_2018_04_27.tgz">NASNet mobile</a>
</td>
<td>Pixel 2 </td>
<td>273.8 ms (3.5 ms)</td>
</tr>
<tr>
<td>Pixel xl </td>
<td>210.8 ms (4.2 ms)</td>
</tr>
<tr>
<td rowspan = 2>
<a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz">SqueezeNet</a>
</td>
<td>Pixel 2 </td>
<td>234.0 ms (2.1 ms)</td>
</tr>
<tr>
<td>Pixel xl </td>
<td>158.0 ms (2.1 ms)</td>
</tr>
<tr>
<td rowspan = 2>
<a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_resnet_v2_2018_04_27.tgz">Inception_ResNet_V2</a>
</td>
<td>Pixel 2 </td>
<td>2846.0 ms (15.0 ms)</td>
</tr>
<tr>
<td>Pixel xl </td>
<td>1973.0 ms (15.0 ms) </td>
</tr>
<tr>
<td rowspan = 2>
<a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v4_2018_04_27.tgz">Inception_V4</a>
</td>
<td>Pixel 2 </td>
<td>3180.0 ms (11.7 ms)</td>
</tr>
<tr>
<td>Pixel xl </td>
<td>2262.0 ms (21.0 ms) </td>
</tr>
</table>
# iOS benchmarks
For running iOS benchmarks, the [benchmark
app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark/ios)
was modified to include the appropriate model and `benchmark_params.json` was
modified to set `num_threads` to 1.
<table>
<thead>
<tr>
<th>Model Name</th>
<th>Device </th>
<th>Mean inference time (std dev)</th>
</tr>
</thead>
<tr>
<td>
<a href="http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz">Mobilenet_1.0_224(float)</a>
</td>
<td>iPhone 8 </td>
<td>32.2 ms (0.8 ms)</td>
</tr>
<tr>
<td>
<a href="http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224_quant.tgz)">Mobilenet_1.0_224 (quant)</a>
</td>
<td>iPhone 8 </td>
<td>24.4 ms (0.8 ms)</td>
</tr>
<tr>
<td>
<a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/nasnet_mobile_2018_04_27.tgz">NASNet mobile</a>
</td>
<td>iPhone 8 </td>
<td>60.3 ms (0.6 ms)</td>
</tr>
<tr>
<td>
<a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz">SqueezeNet</a>
</td>
<td>iPhone 8 </td>
<td>44.3 (0.7 ms)</td>
</tr>
<tr>
<td>
<a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_resnet_v2_2018_04_27.tgz">Inception_ResNet_V2</a>
</td>
<td>iPhone 8</td>
<td>562.4 ms (18.2 ms)</td>
</tr>
<tr>
<td>
<a href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v4_2018_04_27.tgz">Inception_V4</a>
</td>
<td>iPhone 8 </td>
<td>661.0 ms (29.2 ms)</td>
</tr>
</table>

View File

@ -0,0 +1,163 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
"""Inter-process communication using MPI."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.python.framework import errors
from tensorflow.python.framework import load_library
from tensorflow.python.framework import ops
from tensorflow.python.platform import resource_loader
from tensorflow.python.platform import tf_logging as logging
def _load_library(name, op_list=None):
"""Loads a .so file containing the specified operators.
Args:
name: The name of the .so file to load.
op_list: A list of names of operators that the library should have. If None
then the .so file's contents will not be verified.
Raises:
NameError if one of the required ops is missing.
"""
try:
filename = resource_loader.get_path_to_datafile(name)
library = load_library.load_op_library(filename)
for expected_op in (op_list or []):
for lib_op in library.OP_LIST.op:
if lib_op.name == expected_op:
break
else:
raise NameError('Could not find operator %s in dynamic library %s' %
(expected_op, name))
return library
except errors.NotFoundError:
logging.warning('%s file could not be loaded.', name)
MPI_LIB = _load_library(
'mpi_collectives.so',
['MPISize', 'MPIRank', 'MPILocalRank', 'MPIAllgather', 'MPIAllreduce'])
def size(name=None):
"""An op which returns the number of MPI processes.
This is equivalent to running `MPI_Comm_size(MPI_COMM_WORLD, ...)` to get the
size of the global communicator.
Returns:
An integer scalar containing the number of MPI processes.
"""
return MPI_LIB.mpi_size(name=name)
ops.NotDifferentiable('MPISize')
def rank(name=None):
"""An op which returns the MPI rank of the calling process.
This is equivalent to running `MPI_Comm_rank(MPI_COMM_WORLD, ...)` to get the
rank of the current process in the global communicator.
Returns:
An integer scalar with the MPI rank of the calling process.
"""
return MPI_LIB.mpi_rank(name=name)
ops.NotDifferentiable('MPIRank')
def init(name=None):
"""An op which initializes MPI on the device on which it is run.
All future MPI ops must be run on the same device that the `init` op was run
on.
"""
return MPI_LIB.mpi_init(name=name)
ops.NotDifferentiable('MPIInit')
def local_rank(name=None):
"""An op which returns the local MPI rank of the calling process, within the
node that it is running on. For example, if there are seven processes running
on a node, their local ranks will be zero through six, inclusive.
This is equivalent to running `MPI_Comm_rank(...)` on a new communicator
which only includes processes on the same node.
Returns:
An integer scalar with the local MPI rank of the calling process.
"""
return MPI_LIB.mpi_local_rank(name=name)
ops.NotDifferentiable('MPILocalRank')
def _allreduce(tensor, name=None):
"""An op which sums an input tensor over all the MPI processes.
The reduction operation is keyed by the name of the op. The tensor type and
shape must be the same on all MPI processes for a given name. The reduction
will not start until all processes are ready to send and receive the tensor.
Returns:
A tensor of the same shape and type as `tensor`, summed across all
processes.
"""
return MPI_LIB.mpi_allreduce(tensor, name=name)
ops.NotDifferentiable('MPIAllreduce')
def allgather(tensor, name=None):
"""An op which concatenates the input tensor with the same input tensor on
all other MPI processes.
The concatenation is done on the first dimension, so the input tensors on the
different processes must have the same rank and shape, except for the first
dimension, which is allowed to be different.
Returns:
A tensor of the same type as `tensor`, concatenated on dimension zero
across all processes. The shape is identical to the input shape, except for
the first dimension, which may be greater and is the sum of all first
dimensions of the tensors in different MPI processes.
"""
# Specify that first allgather is to collect the tensor gather sizes,
# indicated by passing in a scalar (0-D tensor) of value 0
sizes_flag = tf.constant(0, dtype=tf.int64, name='size_flag_const')
my_size = tf.slice(
tf.shape(tensor, out_type=tf.int64), [0], [1], name='size_slice')
if name is None:
name = 'allgather'
sizing_name = '{}_sizing'.format(name)
sizes = MPI_LIB.mpi_allgather(my_size, sizes_flag, name=sizing_name)
return MPI_LIB.mpi_allgather(tensor, sizes, name=name)
ops.NotDifferentiable('MPIAllgather')

View File

@ -0,0 +1,80 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifdef TENSORFLOW_USE_MPI
#define EIGEN_USE_THREADS
#include "tensorflow/contrib/mpi_collectives/ring.h"
namespace tensorflow {
namespace contrib {
namespace mpi {
using CPUDevice = Eigen::ThreadPoolDevice;
extern template MPI_Datatype MPIType<float>();
extern template MPI_Datatype MPIType<int>();
extern template MPI_Datatype MPIType<long long>();
extern template DataType TensorFlowDataType<float>();
extern template DataType TensorFlowDataType<int>();
extern template DataType TensorFlowDataType<long long>();
// Generate all necessary specializations for RingAllreduce.
template Status RingAllreduce<CPUDevice, int>(OpKernelContext*, const Tensor*,
Tensor*, Tensor*);
template Status RingAllreduce<CPUDevice, long long>(OpKernelContext*,
const Tensor*, Tensor*,
Tensor*);
template Status RingAllreduce<CPUDevice, float>(OpKernelContext*, const Tensor*,
Tensor*, Tensor*);
// Generate all necessary specializations for RingAllgather.
template Status RingAllgather<CPUDevice, int>(OpKernelContext*, const Tensor*,
const std::vector<size_t>&,
Tensor*);
template Status RingAllgather<CPUDevice, long long>(OpKernelContext*,
const Tensor*,
const std::vector<size_t>&,
Tensor*);
template Status RingAllgather<CPUDevice, float>(OpKernelContext*, const Tensor*,
const std::vector<size_t>&,
Tensor*);
// Copy data on a CPU using a straight-forward memcpy.
template <>
void CopyTensorData<CPUDevice>(void* dst, void* src, size_t size) {
std::memcpy(dst, src, size);
};
// Accumulate values on a CPU.
#define GENERATE_ACCUMULATE(type) \
template <> \
void AccumulateTensorData<CPUDevice, type>(type * dst, type * src, \
size_t size) { \
for (unsigned int i = 0; i < size; i++) { \
dst[i] += src[i]; \
} \
};
GENERATE_ACCUMULATE(int);
GENERATE_ACCUMULATE(long long);
GENERATE_ACCUMULATE(float);
#undef GENERATE_ACCUMULATE
} // namespace mpi
} // namespace contrib
} // namespace tensorflow
#endif // TENSORFLOW_USE_MPI

View File

@ -0,0 +1,117 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifdef TENSORFLOW_USE_MPI
#if GOOGLE_CUDA
#define EIGEN_USE_GPU
#include "tensorflow/contrib/mpi_collectives/ring.h"
namespace tensorflow {
namespace contrib {
namespace mpi {
using CPUDevice = Eigen::ThreadPoolDevice;
template <>
MPI_Datatype MPIType<float>() {
return MPI_FLOAT;
};
template <>
MPI_Datatype MPIType<int>() {
return MPI_INT;
};
template <>
MPI_Datatype MPIType<long long>() {
return MPI_LONG_LONG;
};
template <>
DataType TensorFlowDataType<float>() {
return DT_FLOAT;
};
template <>
DataType TensorFlowDataType<int>() {
return DT_INT32;
};
template <>
DataType TensorFlowDataType<long long>() {
return DT_INT64;
};
// Generate all necessary specializations for RingAllreduce.
template Status RingAllreduce<GPUDevice, int>(OpKernelContext*, const Tensor*,
Tensor*, Tensor*);
template Status RingAllreduce<GPUDevice, long long>(OpKernelContext*,
const Tensor*, Tensor*,
Tensor*);
template Status RingAllreduce<GPUDevice, float>(OpKernelContext*, const Tensor*,
Tensor*, Tensor*);
// Generate all necessary specializations for RingAllgather.
template Status RingAllgather<GPUDevice, int>(OpKernelContext*, const Tensor*,
const std::vector<size_t>&,
Tensor*);
template Status RingAllgather<GPUDevice, long long>(OpKernelContext*,
const Tensor*,
const std::vector<size_t>&,
Tensor*);
template Status RingAllgather<GPUDevice, float>(OpKernelContext*, const Tensor*,
const std::vector<size_t>&,
Tensor*);
// Synchronously copy data on the GPU, using a different stream than the default
// and than TensorFlow to avoid synchronizing on operations unrelated to the
// allreduce.
template <>
void CopyTensorData<GPUDevice>(void* dst, void* src, size_t size) {
auto stream = CudaStreamForMPI();
cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToDevice, stream);
cudaStreamSynchronize(stream);
};
// Elementwise accumulation kernel for GPU.
template <typename T>
__global__ void elemwise_accum(T* out, const T* in, const size_t N) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
i += blockDim.x * gridDim.x) {
out[i] += in[i];
}
}
// Synchronously accumulate tensors on the GPU, using a different stream than
// the default and than TensorFlow to avoid synchronizing on operations
// unrelated to the allreduce.
#define GENERATE_ACCUMULATE(type) \
template <> \
void AccumulateTensorData<GPUDevice, type>(type * dst, type * src, \
size_t size) { \
auto stream = CudaStreamForMPI(); \
elemwise_accum<type><<<32, 256, 0, stream>>>(dst, src, size); \
cudaStreamSynchronize(stream); \
};
GENERATE_ACCUMULATE(int);
GENERATE_ACCUMULATE(long long);
GENERATE_ACCUMULATE(float);
#undef GENERATE_ACCUMULATE
} // namespace mpi
} // namespace contrib
} // namespace tensorflow
#endif // GOOGLE_CUDA
#endif // TENSORFLOW_USE_MPI

View File

@ -0,0 +1,327 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CONTRIB_MPI_H_
#define TENSORFLOW_CONTRIB_MPI_H_
#ifdef TENSORFLOW_USE_MPI
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/tensor_types.h"
#if GOOGLE_CUDA
#include "cuda_runtime.h"
#endif
// Needed to avoid header issues with C++-supporting MPI implementations
#define OMPI_SKIP_MPICXX
#include "third_party/mpi/mpi.h"
#define TAG_TENSOR 12
namespace tensorflow {
namespace contrib {
namespace mpi {
using CPUDevice = Eigen::ThreadPoolDevice;
using GPUDevice = Eigen::GpuDevice;
// Convert from templated types to values we can pass to MPI.
template <typename T>
MPI_Datatype MPIType();
// Convert from templated types to TensorFlow data types.
template <typename T>
DataType TensorFlowDataType();
#define MPI_REQUIRES_OK(MPI_STATUS) \
if ((MPI_STATUS) != MPI_SUCCESS) { \
return errors::Unknown("MPI operation failed unexpectedly."); \
}
// Copy data from one tensor to another tensor.
// This uses a custom CUDA stream on GPU, which is necessary to overlay the
// backpropagation computations with the allreduce.
template <typename Device>
void CopyTensorData(void* destination, void* source, size_t size);
// Add a tensor into another tensor, accumulating in place.
// This uses a custom CUDA stream on GPU, which is necessary to overlay the
// backpropagation computations with the allreduce.
template <typename Device, typename T>
void AccumulateTensorData(T* destination, T* source, size_t size);
// We need to get the right stream for doing CUDA memory transfers and
// operations, which is possibly different from the standard TensorFlow stream.
#if GOOGLE_CUDA
cudaStream_t CudaStreamForMPI();
#endif
/* Perform a ring allreduce on the data. Allocate the necessary output tensor
* and store it in the output parameter.
*
* Assumes that all MPI processes are doing an allreduce of the same tensor,
* with the same dimensions.
*
* A ring allreduce is a bandwidth-optimal way to do an allreduce. To do the
* allreduce, the nodes involved are arranged in a ring:
*
* .--0--.
* / \
* 3 1
* \ /
* *--2--*
*
* Each node always sends to the next clockwise node in the ring, and receives
* from the previous one.
*
* The allreduce is done in two parts: a scatter-reduce and an allgather. In
* the scatter reduce, a reduction is done, so that each node ends up with a
* chunk of the final output tensor which has contributions from all other
* nodes. In the allgather, those chunks are distributed among all the nodes,
* so that all nodes have the entire output tensor.
*
* Both of these operations are done by dividing the input tensor into N
* evenly sized chunks (where N is the number of nodes in the ring).
*
* The scatter-reduce is done in N-1 steps. In the ith step, node j will send
* the (j - i)th chunk and receive the (j - i - 1)th chunk, adding it in to
* its existing data for that chunk. For example, in the first iteration with
* the ring depicted above, you will have the following transfers:
*
* Segment 0: Node 0 --> Node 1
* Segment 1: Node 1 --> Node 2
* Segment 2: Node 2 --> Node 3
* Segment 3: Node 3 --> Node 0
*
* In the second iteration, you'll have the following transfers:
*
* Segment 0: Node 1 --> Node 2
* Segment 1: Node 2 --> Node 3
* Segment 2: Node 3 --> Node 0
* Segment 3: Node 0 --> Node 1
*
* After this iteration, Node 2 has 3 of the four contributions to Segment 0.
* The last iteration has the following transfers:
*
* Segment 0: Node 2 --> Node 3
* Segment 1: Node 3 --> Node 0
* Segment 2: Node 0 --> Node 1
* Segment 3: Node 1 --> Node 2
*
* After this iteration, Node 3 has the fully accumulated Segment 0; Node 0
* has the fully accumulated Segment 1; and so on. The scatter-reduce is
* complete.
*
* Next, the allgather distributes these fully accumululated chunks across all
* nodes. Communication proceeds in the same ring, once again in N-1 steps. At
* the ith step, node j will send chunk (j - i + 1) and receive chunk (j - i).
* For example, at the first iteration, the following transfers will occur:
*
* Segment 0: Node 3 --> Node 0
* Segment 1: Node 0 --> Node 1
* Segment 2: Node 1 --> Node 2
* Segment 3: Node 2 --> Node 3
*
* After the first iteration, Node 0 will have a fully accumulated Segment 0
* (from Node 3) and Segment 1. In the next iteration, Node 0 will send its
* just-received Segment 0 onward to Node 1, and receive Segment 3 from Node 3.
* After this has continued for N - 1 iterations, all nodes will have a the
* fully accumulated tensor.
*
* Each node will do (N-1) sends for the scatter-reduce and (N-1) sends for the
* allgather. Each send will contain K / N bytes, if there are K bytes in the
* original tensor on every node. Thus, each node sends and receives 2K(N - 1)/N
* bytes of data, and the performance of the allreduce (assuming no latency in
* connections) is constrained by the slowest interconnect between the nodes.
*
*/
template <typename Device, typename T>
Status RingAllreduce(OpKernelContext* context, const Tensor* input,
Tensor* temp, Tensor* output) {
// Acquire MPI size and rank
int n, r;
MPI_REQUIRES_OK(MPI_Comm_size(MPI_COMM_WORLD, &n));
MPI_REQUIRES_OK(MPI_Comm_rank(MPI_COMM_WORLD, &r));
T* buffer = (T*)output->tensor_data().data();
CopyTensorData<Device>((void*)buffer, (void*)input->tensor_data().data(),
output->tensor_data().size());
// Calculate segment sizes and segment ends
const size_t elements_to_reduce = input->NumElements();
const size_t segment_size = elements_to_reduce / n;
std::vector<size_t> segment_sizes(n, segment_size);
const size_t residual = elements_to_reduce % n;
for (size_t i = 0; i < residual; ++i) {
segment_sizes[i]++;
}
std::vector<size_t> segment_starts(n);
segment_starts[0] = 0;
for (size_t i = 1; i < segment_starts.size(); ++i) {
segment_starts[i] = segment_starts[i - 1] + segment_sizes[i - 1];
}
assert(segment_starts[n - 1] + segment_sizes[n - 1] == elements_to_reduce);
T* segment_recv = (T*)temp->tensor_data().data();
// Receive from your left neighbor with wrap-around
const size_t recv_from = ((r - 1) + n) % n;
// Send to your right neighbor with wrap-around
const size_t send_to = (r + 1) % n;
MPI_Status recv_status;
MPI_Request recv_req;
// Now start ring. At every step, for every rank, we iterate through
// segments with wraparound and send and recv from our neighbors and reduce
// locally. At the i'th iteration, rank r, sends segment (r-i) and receives
// segment (r-i-1).
for (int i = 0; i < n - 1; i++) {
const size_t send_seg_id = ((r - i) + n) % n;
const size_t recv_seg_id = ((r - i - 1) + n) % n;
T* segment_send = &(buffer[segment_starts[send_seg_id]]);
MPI_REQUIRES_OK(MPI_Irecv(segment_recv, segment_sizes[recv_seg_id],
MPIType<T>(), recv_from, TAG_TENSOR,
MPI_COMM_WORLD, &recv_req));
MPI_REQUIRES_OK(MPI_Send(segment_send, segment_sizes[send_seg_id],
MPIType<T>(), send_to, TAG_TENSOR,
MPI_COMM_WORLD));
T* segment_update = &(buffer[segment_starts[recv_seg_id]]);
// Wait for recv to complete before reduction
MPI_REQUIRES_OK(MPI_Wait(&recv_req, &recv_status));
const size_t recv_seg_size = segment_sizes[recv_seg_id];
AccumulateTensorData<Device, T>(segment_update, segment_recv,
recv_seg_size);
}
// Now start pipelined ring allgather. At every step, for every rank, we
// iterate through segments with wraparound and send and recv from our
// neighbors. At the i'th iteration, rank r, sends segment (r-i+1) and
// receives segment (r-i).
for (size_t i = 0; i < n - 1; ++i) {
const size_t send_seg_id = ((r - i + 1) + n) % n;
const size_t recv_seg_id = ((r - i) + n) % n;
// Segment to send - at every iteration we send segment (r-i+1)
T* segment_send = &(buffer[segment_starts[send_seg_id]]);
// Segment to recv - at every iteration we receive segment (r-i)
T* segment_recv = &(buffer[segment_starts[recv_seg_id]]);
MPI_REQUIRES_OK(MPI_Sendrecv(
segment_send, segment_sizes[send_seg_id], MPIType<T>(), send_to,
TAG_TENSOR, segment_recv, segment_sizes[recv_seg_id], MPIType<T>(),
recv_from, TAG_TENSOR, MPI_COMM_WORLD, &recv_status));
}
return Status::OK();
}
// Perform a ring allgather on a Tensor. Other ranks may allgather with a
// tensor which differs in the first dimension only; all other dimensions must
// be the same.
//
// For more information on the ring allgather, read the documentation for the
// ring allreduce, which includes a ring allgather.
template <typename Device, typename T>
Status RingAllgather(OpKernelContext* context, const Tensor* input,
const std::vector<size_t>& sizes, Tensor* output) {
// Acquire MPI size and rank
int n, r;
MPI_REQUIRES_OK(MPI_Comm_size(MPI_COMM_WORLD, &n));
MPI_REQUIRES_OK(MPI_Comm_rank(MPI_COMM_WORLD, &r));
assert(sizes.size() == n);
assert(input->dim_size(0) == sizes[r]);
// Compute number of elements in every "row". We can't compute number of
// elements in every chunks, because those chunks are variable length.
size_t elements_per_row = 1;
for (int i = 1; i < input->shape().dims(); i++) {
elements_per_row *= input->dim_size(i);
}
// Copy data from input tensor to correct place in output tensor.
std::vector<size_t> segment_starts(n);
segment_starts[0] = 0;
for (int i = 1; i < n; i++) {
segment_starts[i] = segment_starts[i - 1] + elements_per_row * sizes[i - 1];
}
size_t offset = segment_starts[r];
// Copy data to the right offset for this rank.
T* buffer = (T*)output->tensor_data().data();
CopyTensorData<Device>((void*)(buffer + offset),
(void*)input->tensor_data().data(),
elements_per_row * sizes[r] * sizeof(T));
// Receive from your left neighbor with wrap-around
const size_t recv_from = ((r - 1) + n) % n;
// Send to your right neighbor with wrap-around
const size_t send_to = (r + 1) % n;
// Perform a ring allgather. At every step, for every rank, we iterate
// through segments with wraparound and send and recv from our neighbors.
// At the i'th iteration, rank r, sends segment (r-i) and receives segment
// (r-1-i).
MPI_Status recv_status;
for (size_t i = 0; i < n - 1; ++i) {
const size_t send_seg_id = ((r - i) + n) % n;
const size_t recv_seg_id = ((r - i - 1) + n) % n;
// Segment to send - at every iteration we send segment (r-i)
size_t offset_send = segment_starts[send_seg_id];
size_t rows_send = sizes[send_seg_id];
T* segment_send = &(buffer[offset_send]);
// Segment to recv - at every iteration we receive segment (r-1-i)
size_t offset_recv = segment_starts[recv_seg_id];
size_t rows_recv = sizes[recv_seg_id];
T* segment_recv = &(buffer[offset_recv]);
MPI_REQUIRES_OK(MPI_Sendrecv(
segment_send, elements_per_row * rows_send, MPIType<T>(), send_to,
TAG_TENSOR, segment_recv, elements_per_row * rows_recv, MPIType<T>(),
recv_from, TAG_TENSOR, MPI_COMM_WORLD, &recv_status));
}
return Status::OK();
}
} // namespace mpi
} // namespace contrib
} // namespace tensorflow
#endif // TENSORFLOW_USE_MPI
#undef TENSORFLOW_CONTRIB_MPI_H_
#endif // TENSORFLOW_CONTRIB_MPI_H_

View File

@ -0,0 +1,136 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/contrib/tensorrt/kernels/trt_calib_op.h"
#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h"
#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h"
#include "tensorflow/contrib/tensorrt/resources/trt_resources.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/platform/stream_executor.h"
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
#include "cuda/include/cuda_runtime_api.h"
#include "tensorrt/include/NvInfer.h"
namespace tensorflow {
namespace tensorrt {
TRTCalibOp::TRTCalibOp(OpKernelConstruction* context) : OpKernel(context) {
OP_REQUIRES_OK(context, context->GetAttr("segment_nodes", &segment_nodes_));
OP_REQUIRES_OK(context, context->GetAttr("input_names", &input_names_));
OP_REQUIRES_OK(context, context->GetAttr("resource_name", &resource_name_));
};
#define TYPECASE(dt, X, Y) \
case dt: { \
return (void*)X->flat<tensorflow::EnumToDataType<dt>::Type>().data(); \
}
void* GetTensorAddress(const Tensor* tensor_ptr) {
auto tensor_type = tensor_ptr->dtype();
switch (tensor_type) {
TYPECASE(tensorflow::DT_FLOAT, tensor_ptr, dest_ptr);
TYPECASE(tensorflow::DT_HALF, tensor_ptr, dest_ptr);
TYPECASE(tensorflow::DT_INT8, tensor_ptr, dest_ptr);
default: {
LOG(FATAL) << "Unsupported Data type "
<< tensorflow::DataTypeString(tensor_type);
return nullptr;
}
}
}
void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) {
// TODO(aaroey): make sure ctx->resource_mgr() is used in future PR.
auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance();
auto res_mgr = trt_rm->getManager("TRTCalibOps");
tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr;
auto status = res_mgr->Lookup(resource_name_, resource_name_, &calib_res);
if (!status.ok()) {
ctx->SetStatus(status);
return;
}
int num_inputs = ctx->num_inputs();
// first run instantiate calibrator
if (calib_res->calibrator_ == nullptr) {
dev_tensors_.resize(num_inputs);
int batch_size = ctx->input(0).dim_size(0);
VLOG(1) << " Constructing calibrator";
for (int i = 0; i < num_inputs; i++) {
// allocate workspace on device for inputs
const tensorflow::Tensor& t = ctx->input(i);
OP_REQUIRES_OK(ctx,
ctx->allocate_persistent(t.dtype(), t.shape(),
&dev_tensors_.at(i), nullptr));
const auto device_tensor = dev_tensors_.at(i).AccessTensor(ctx);
CHECK_EQ(t.TotalBytes(), device_tensor->TotalBytes());
void* device_address = GetTensorAddress(device_tensor);
device_buffers_.emplace(input_names_.at(i),
std::pair<void*, size_t>(
device_address, device_tensor->TotalBytes()));
}
calib_res->calibrator_ =
new TRTInt8Calibrator(device_buffers_, batch_size, resource_name_);
string label(resource_name_);
calib_res->thr_ = new std::thread([calib_res, label]() {
VLOG(1) << "Starting calibration thread, Calibration Resource @ "
<< calib_res;
calib_res->builder_->setInt8Calibrator(calib_res->calibrator_);
calib_res->builder_->setInt8Mode(true);
calib_res->engine_ = calib_res->builder_->buildCudaEngine(
*calib_res->network_); // will loop until we terminate calibrator
VLOG(1) << "Calibration loop terminated " << label;
});
VLOG(1) << "initialized calibrator resource";
} // calibrator initialized
// Pass input data to calibrator
std::unordered_map<string, void*> input_data;
for (int i = 0; i < num_inputs; i++) {
const Tensor& t = ctx->input(i);
void* data_address = GetTensorAddress(&t);
const auto device_tensor = dev_tensors_.at(i).AccessTensor(ctx);
CHECK_EQ(t.TotalBytes(),
device_tensor->TotalBytes()); // use the tensor so FW keeps it
input_data.emplace(input_names_.at(i), data_address);
ctx->set_output(i, t);
}
VLOG(2) << "Filled map for sending";
// copied from cuda_kernel_helper since it seems only valid in *.cu.cc files
const cudaStream_t* stream = CHECK_NOTNULL(
reinterpret_cast<const cudaStream_t*>(ctx->op_device_context()
->stream()
->implementation()
->CudaStreamMemberHack()));
calib_res->calibrator_->setBatch(input_data, *stream);
VLOG(2) << "Passed calibration data";
// TODO(aaroey): make sure we wait for the completion of calibration on the
// last batch in future PR.
};
#undef TYPECASE
REGISTER_KERNEL_BUILDER(Name("TRTCalibOp").Device(DEVICE_GPU), TRTCalibOp);
} // namespace tensorrt
} // namespace tensorflow
#endif
#endif

View File

@ -0,0 +1,52 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H
#define TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/platform/types.h"
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
namespace tensorflow {
namespace tensorrt {
// TODO(sami): Convert this to async kernel!
class TRTCalibOp : public OpKernel {
public:
explicit TRTCalibOp(OpKernelConstruction* context);
void Compute(OpKernelContext* context) override;
private:
string resource_name_;
std::vector<string> segment_nodes_;
std::vector<string> input_names_;
std::vector<tensorflow::TensorShape> shapes_;
std::unordered_map<string, std::pair<void*, size_t>> device_buffers_;
std::vector<tensorflow::PersistentTensor> dev_tensors_;
};
} // namespace tensorrt
} // namespace tensorflow
#endif
#endif
#endif // TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H

View File

@ -0,0 +1,37 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
namespace tensorflow {
REGISTER_OP("TRTCalibOp")
.Attr("segment_nodes: list(string)") // names of the ops in segment
.Attr("segment_output_names: list(string)") // names of the output ops in
// segment
.Attr("input_names: list(string)") // names of the inputs for
// passing into tensorrt
.Attr("resource_name: string")
.Attr("InT: list({int8, float16, float32})")
.Input("in_tensor: InT")
.Output("out_tensor: InT")
.SetShapeFn([](tensorflow::shape_inference::InferenceContext* c) {
for (int i = 0; i < c->num_inputs(); i++) {
c->set_output(i, c->input(i));
}
return Status::OK();
});
} // namespace tensorflow

View File

@ -0,0 +1,187 @@
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SGDR learning rate decay function."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops, control_flow_ops
def sgdr_decay(learning_rate, global_step, initial_period_steps,
t_mul=2.0, m_mul=1.0, name=None):
"""Implements Stochastic Gradient Descent with Warm Restarts (SGDR).
As described in "SGDR: Stochastic Gradient Descent
with Warm Restarts" by Ilya Loshchilov & Frank Hutter, Proceedings of
ICLR'2017, available at https://arxiv.org/pdf/1608.03983.pdf
The learning rate decreases according to cosine annealing:
```python
learning_rate * 0.5 * (1 + cos(x_val * pi)) # for x_val defined in [0, 1]
```
Thus, at the beginning (when the restart index i = 0),
the learning rate decreases for `initial_period_steps` steps from the initial
learning rate `learning_rate` (when `x_val=0`, we get `cos(0)=1`) to
0 (when `x_val=1`, we get `cos(pi)=-1`).
The decrease within the i-th period takes `t_i` steps,
where `t_0` = `initial_period_steps` is the user-defined number of batch
iterations (not epochs as in the paper) to be performed before the first
restart is launched.
Then, we perform the first restart (i=1) by setting the learning rate to
`learning_rate*(m_mul^i)`, where `m_mul in [0,1]` (set to 1 by default).
The i-th restart runs for `t_i=t_0*(t_mul^i)` steps, i.e., every new
restart runs `t_mul` times longer than the previous one.
Importantly, when one has no access to a validation set, SGDR suggests
to report the best expected / recommended solution in the following way:
When we are within our initial run (i=0), every new solution represents
SGDR's recommended solution. Instead, when i>0, the recommended solution is
the one obtained at the end of each restart.
Note that the minimum learning rate is set to 0 for simplicity,
you can adjust the code to deal with any positive minimum learning rate
as defined in the paper.
`initial_period_steps` is the duration of the first period measured in terms
of number of minibatch updates. If one wants to use epochs, one should compute
the number of updates required for an epoch.
For example, assume the following parameters and intention:
Minibatch size: 100
Training dataset size: 10000
If the user wants the first decay period to span across 5 epochs, then
`initial_period_steps` = 5 * 10000/100 = 500
Train for 10000 batch iterations with the initial learning rate set to
0.1, then restart to run 2 times longer, i.e, for 20000 batch iterations
and with the initial learning rate 0.05, then restart again and again,
doubling the runtime of each new period and with two times smaller
initial learning rate.
To accomplish the above, one would write:
```python
...
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.1
learning_rate = sgdr_decay(starter_learning_rate, global_step,
initial_period_steps=10000, t_mul=2, m_mul=0.5)
# Passing global_step to minimize() will increment it at each step.
learning_step = (
tf.train.GradientDescentOptimizer(learning_rate)
.minimize(...my loss..., global_step=global_step)
)
# Step | 0 | 1000 | 5000 | 9000 | 9999 | 10000 | 11000 |
# LR | 0.1 | 0.097 | 0.05 | 0.002 | 0.00 | 0.05 | 0.0496 |
# Step | 20000 | 29000 | 29999 | 30000 |
# LR | 0.025 | 0.0003 | 0.00 | 0.025 |
```
Args:
learning_rate: A scalar `float32` or `float64` `Tensor` or a
Python number. The initial learning rate.
global_step: A scalar `int32` or `int64` `Tensor` or a Python number.
Global step to use for the decay computation. Must not be negative.
initial_period_steps: Duration of the first period measured as the number
of minibatch updates, if one wants to use epochs, one should compute
the number of updates required for an epoch.
t_mul: A scalar `float32` or `float64` `Tensor` or a Python number.
Must be positive.
Used to derive the number of iterations in the i-th period:
`initial_period_steps * (t_mul^i)`. Defaults to 2.0.
m_mul: A scalar `float32` or `float64` `Tensor` or a Python number.
Must be positive.
Used to derive the initial learning rate of the i-th period:
`learning_rate * (m_mul^i)`. Defaults to 1.0
Returns:
A scalar `Tensor` of the same type as `learning_rate`.
The learning rate for a provided global_step.
Raises:
ValueError: if `global_step` is not supplied.
"""
if global_step is None:
raise ValueError("global_step is required for sgdr_decay.")
with ops.name_scope(name, "SGDRDecay",
[learning_rate, global_step,
initial_period_steps, t_mul, m_mul]) as name:
learning_rate = ops.convert_to_tensor(learning_rate,
name="initial_learning_rate")
dtype = learning_rate.dtype
global_step = math_ops.cast(global_step, dtype)
t_0 = math_ops.cast(initial_period_steps, dtype)
t_mul = math_ops.cast(t_mul, dtype)
m_mul = math_ops.cast(m_mul, dtype)
c_one = math_ops.cast(constant_op.constant(1.0), dtype)
c_half = math_ops.cast(constant_op.constant(0.5), dtype)
c_pi = math_ops.cast(constant_op.constant(math.pi), dtype)
# Find normalized value of the current step
x_val = math_ops.div(global_step, t_0)
def compute_step(x_val, geometric=False):
if geometric:
# Consider geometric series where t_mul != 1
# 1 + t_mul + t_mul^2 ... = (1 - t_mul^i_restart) / (1 - t_mul)
# First find how many restarts were performed for a given x_val
# Find maximal integer i_restart value for which this equation holds
# x_val >= (1 - t_mul^i_restart) / (1 - t_mul)
# x_val * (1 - t_mul) <= (1 - t_mul^i_restart)
# t_mul^i_restart <= (1 - x_val * (1 - t_mul))
# tensorflow allows only log with base e
# i_restart <= log(1 - x_val * (1 - t_mul) / log(t_mul)
# Find how many restarts were performed
i_restart = math_ops.floor(
math_ops.log(c_one - x_val * (c_one - t_mul)) / math_ops.log(t_mul))
# Compute the sum of all restarts before the current one
sum_r = (c_one - t_mul ** i_restart) / (c_one - t_mul)
# Compute our position within the current restart
x_val = (x_val - sum_r) / t_mul ** i_restart
else:
# Find how many restarts were performed
i_restart = math_ops.floor(x_val)
# Compute our position within the current restart
x_val = x_val - i_restart
return i_restart, x_val
i_restart, x_val = control_flow_ops.cond(
math_ops.equal(t_mul, c_one),
lambda: compute_step(x_val, geometric=False),
lambda: compute_step(x_val, geometric=True))
# If m_mul < 1, then the initial learning rate of every new restart will be
# smaller, i.e., by a factor of m_mul ** i_restart at i_restart-th restart
m_fac = learning_rate * (m_mul ** i_restart)
return math_ops.multiply(c_half * m_fac,
(math_ops.cos(x_val * c_pi) + c_one), name=name)

View File

@ -0,0 +1,145 @@
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functional test for sgdr learning rate decay."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from sgdr_learning_rate_decay import sgdr_decay
from tensorflow.python.platform import googletest
from tensorflow.python.framework import test_util
from tensorflow.python.framework import dtypes
from tensorflow import placeholder
class SGDRDecayTest(test_util.TensorFlowTestCase):
"""Unit tests for SGDR learning rate decay."""
def get_original_values(self, lr, t_e, mult_factor, iter_per_epoch, epochs):
"""Get an array with learning rate values from the consecutive steps using
the original implementation
(https://github.com/loshchil/SGDR/blob/master/SGDR_WRNs.py)."""
t0 = math.pi / 2.0
tt = 0
te_next = t_e
lr_values = []
sh_lr = lr
for epoch in range(epochs):
for _ in range(iter_per_epoch):
# In the original approach training function is executed here
lr_values.append(sh_lr)
dt = 2.0 * math.pi / float(2.0 * t_e)
tt = tt + float(dt) / iter_per_epoch
if tt >= math.pi:
tt = tt - math.pi
cur_t = t0 + tt
new_lr = lr * (1.0 + math.sin(cur_t)) / 2.0 # lr_min = 0, lr_max = lr
sh_lr = new_lr
if (epoch + 1) == te_next: # time to restart
sh_lr = lr
tt = 0 # by setting to 0 we set lr to lr_max, see above
t_e = t_e * mult_factor # change the period of restarts
te_next = te_next + t_e # note the next restart's epoch
return lr_values
def get_sgdr_values(self, lr, initial_period_steps, t_mul, iters):
"""Get an array with learning rate values from the consecutive steps
using current tensorflow implementation."""
with self.test_session():
step = placeholder(dtypes.int32)
decay = sgdr_decay(lr, step, initial_period_steps, t_mul)
lr_values = []
for i in range(iters):
lr_values.append(decay.eval(feed_dict={step: i}))
return lr_values
def testCompareToOriginal(self):
"""Compare values generated by tensorflow implementation to the values
generated by the original implementation
(https://github.com/loshchil/SGDR/blob/master/SGDR_WRNs.py)."""
with self.test_session():
lr = 10.0
init_steps = 2
t_mul = 3
iters = 10
epochs = 50
org_lr = self.get_original_values(lr, init_steps, t_mul, iters, epochs)
sgdr_lr = self.get_sgdr_values(lr, init_steps*iters, t_mul, iters*epochs)
for org, sgdr in zip(org_lr, sgdr_lr):
self.assertAllClose(org, sgdr)
def testMDecay(self):
"""Test m_mul argument. Check values for learning rate at the beginning
of the first, second, third and fourth period. """
with self.test_session():
step = placeholder(dtypes.int32)
lr = 0.1
t_e = 10
t_mul = 3
m_mul = 0.9
decay = sgdr_decay(lr, step, t_e, t_mul, m_mul)
test_step = 0
self.assertAllClose(decay.eval(feed_dict={step: test_step}),
lr)
test_step = t_e
self.assertAllClose(decay.eval(feed_dict={step: test_step}),
lr * m_mul)
test_step = t_e + t_e*t_mul
self.assertAllClose(decay.eval(feed_dict={step: test_step}),
lr * m_mul**2)
test_step = t_e + t_e*t_mul + t_e * (t_mul**2)
self.assertAllClose(decay.eval(feed_dict={step: test_step}),
lr * (m_mul**3))
def testCos(self):
"""Check learning rate values at the beginning, in the middle
and at the end of the period."""
with self.test_session():
step = placeholder(dtypes.int32)
lr = 0.2
t_e = 1000
t_mul = 1
decay = sgdr_decay(lr, step, t_e, t_mul)
test_step = 0
self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr)
test_step = t_e//2
self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr/2)
test_step = t_e
self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr)
test_step = t_e*3//2
self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr/2)
if __name__ == "__main__":
googletest.main()

View File

@ -150,7 +150,6 @@ load(
"//third_party/mkl:build_defs.bzl",
"if_mkl",
)
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library")
exports_files(["ops/ops.pbtxt"])

View File

@ -13,16 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"
// Unfortunately we can't add the #include, since it breaks compilation for
// non-GPU targets. This only breaks in clang, because it's more strict for
// template code and CudaAtomicMax is used in template context.
#ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
#define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
// This file requires the following include because it uses CudaAtomicMax:
// #include "tensorflow/core/util/cuda_kernel_helper.h"

View File

@ -0,0 +1,113 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/platform/s3/s3_crypto.h"
#include <openssl/hmac.h>
#include <openssl/sha.h>
#include <aws/core/utils/crypto/HashResult.h>
#include <aws/s3/S3Client.h>
namespace tensorflow {
class S3Sha256HMACOpenSSLImpl : public Aws::Utils::Crypto::HMAC {
public:
S3Sha256HMACOpenSSLImpl() {}
virtual ~S3Sha256HMACOpenSSLImpl() = default;
virtual Aws::Utils::Crypto::HashResult Calculate(
const Aws::Utils::ByteBuffer& toSign,
const Aws::Utils::ByteBuffer& secret) override {
unsigned int length = SHA256_DIGEST_LENGTH;
Aws::Utils::ByteBuffer digest(length);
memset(digest.GetUnderlyingData(), 0, length);
HMAC_CTX ctx;
HMAC_CTX_init(&ctx);
HMAC_Init_ex(&ctx, secret.GetUnderlyingData(),
static_cast<int>(secret.GetLength()), EVP_sha256(), NULL);
HMAC_Update(&ctx, toSign.GetUnderlyingData(), toSign.GetLength());
HMAC_Final(&ctx, digest.GetUnderlyingData(), &length);
HMAC_CTX_cleanup(&ctx);
return Aws::Utils::Crypto::HashResult(std::move(digest));
}
};
class S3Sha256OpenSSLImpl : public Aws::Utils::Crypto::Hash {
public:
S3Sha256OpenSSLImpl() {}
virtual ~S3Sha256OpenSSLImpl() = default;
virtual Aws::Utils::Crypto::HashResult Calculate(
const Aws::String& str) override {
SHA256_CTX sha256;
SHA256_Init(&sha256);
SHA256_Update(&sha256, str.data(), str.size());
Aws::Utils::ByteBuffer hash(SHA256_DIGEST_LENGTH);
SHA256_Final(hash.GetUnderlyingData(), &sha256);
return Aws::Utils::Crypto::HashResult(std::move(hash));
}
virtual Aws::Utils::Crypto::HashResult Calculate(
Aws::IStream& stream) override {
SHA256_CTX sha256;
SHA256_Init(&sha256);
auto currentPos = stream.tellg();
if (currentPos == std::streampos(std::streamoff(-1))) {
currentPos = 0;
stream.clear();
}
stream.seekg(0, stream.beg);
char streamBuffer
[Aws::Utils::Crypto::Hash::INTERNAL_HASH_STREAM_BUFFER_SIZE];
while (stream.good()) {
stream.read(streamBuffer,
Aws::Utils::Crypto::Hash::INTERNAL_HASH_STREAM_BUFFER_SIZE);
auto bytesRead = stream.gcount();
if (bytesRead > 0) {
SHA256_Update(&sha256, streamBuffer, static_cast<size_t>(bytesRead));
}
}
stream.clear();
stream.seekg(currentPos, stream.beg);
Aws::Utils::ByteBuffer hash(SHA256_DIGEST_LENGTH);
SHA256_Final(hash.GetUnderlyingData(), &sha256);
return Aws::Utils::Crypto::HashResult(std::move(hash));
}
};
std::shared_ptr<Aws::Utils::Crypto::Hash>
S3SHA256Factory::CreateImplementation() const {
return Aws::MakeShared<S3Sha256OpenSSLImpl>(S3CryptoAllocationTag);
}
std::shared_ptr<Aws::Utils::Crypto::HMAC>
S3SHA256HmacFactory::CreateImplementation() const {
return Aws::MakeShared<S3Sha256HMACOpenSSLImpl>(S3CryptoAllocationTag);
}
} // namespace tensorflow

View File

@ -0,0 +1,35 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <aws/core/Aws.h>
#include <aws/core/utils/crypto/Factories.h>
#include <aws/core/utils/crypto/HMAC.h>
#include <aws/core/utils/crypto/Hash.h>
namespace tensorflow {
static const char* S3CryptoAllocationTag = "S3CryptoAllocation";
class S3SHA256Factory : public Aws::Utils::Crypto::HashFactory {
public:
std::shared_ptr<Aws::Utils::Crypto::Hash> CreateImplementation()
const override;
};
class S3SHA256HmacFactory : public Aws::Utils::Crypto::HMACFactory {
public:
std::shared_ptr<Aws::Utils::Crypto::HMAC> CreateImplementation()
const override;
};
} // namespace tensorflow

View File

@ -0,0 +1,28 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/test_benchmark.h"
namespace tensorflow {
static void BM_DisabledVlog(int iters) {
for (int i = 0; i < iters; ++i) {
VLOG(1) << "Testing VLOG(1)!";
}
}
BENCHMARK(BM_DisabledVlog);
} // namespace tensorflow

View File

@ -0,0 +1,117 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Test that popens a child process with the VLOG-ing environment variable set
// for the logging framework, and observes VLOG_IS_ON and VLOG macro output.
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/platform.h"
#include "tensorflow/core/platform/test.h"
#include <string.h>
namespace tensorflow {
namespace {
int RealMain(const char* argv0, bool do_vlog) {
if (do_vlog) {
#if !defined(PLATFORM_GOOGLE)
// Note, we only test this when !defined(PLATFORM_GOOGLE) because
// VmoduleActivated doesn't exist in that implementation.
//
// Also, we call this internal API to simulate what would happen if
// differently-named translation units attempted to VLOG, so we don't need
// to create dummy translation unit files.
bool ok = internal::LogMessage::VmoduleActivated("vmodule_test.cc", 7) &&
internal::LogMessage::VmoduleActivated("shoobadooba.h", 3);
if (!ok) {
fprintf(stderr, "vmodule activated levels not as expected.\n");
return EXIT_FAILURE;
}
#endif
// Print info on which VLOG levels are activated.
fprintf(stderr, "VLOG_IS_ON(8)? %d\n", VLOG_IS_ON(8));
fprintf(stderr, "VLOG_IS_ON(7)? %d\n", VLOG_IS_ON(7));
fprintf(stderr, "VLOG_IS_ON(6)? %d\n", VLOG_IS_ON(6));
// Do some VLOG-ing.
VLOG(8) << "VLOG(8)";
VLOG(7) << "VLOG(7)";
VLOG(6) << "VLOG(6)";
LOG(INFO) << "INFO";
return EXIT_SUCCESS;
}
// Popen the child process.
std::string command = std::string(argv0);
#if defined(PLATFORM_GOOGLE)
command = command + " do_vlog --vmodule=vmodule_test=7 --alsologtostderr";
#else
command =
"TF_CPP_VMODULE=vmodule_test=7,shoobadooba=3 " + command + " do_vlog";
#endif
command += " 2>&1";
fprintf(stderr, "Running: \"%s\"\n", command.c_str());
FILE* f = popen(command.c_str(), "r");
if (f == nullptr) {
fprintf(stderr, "Failed to popen child: %s\n", strerror(errno));
return EXIT_FAILURE;
}
// Read data from the child's stdout.
constexpr int kBufferSizeBytes = 4096;
char buffer[kBufferSizeBytes];
size_t result = fread(buffer, sizeof(buffer[0]), kBufferSizeBytes - 1, f);
if (result == 0) {
fprintf(stderr, "Failed to read from child stdout: %zu %s\n", result,
strerror(errno));
return EXIT_FAILURE;
}
buffer[result] = '\0';
int status = pclose(f);
if (status == -1) {
fprintf(stderr, "Failed to close popen child: %s\n", strerror(errno));
return EXIT_FAILURE;
}
// Check output is as expected.
const char kExpected[] =
"VLOG_IS_ON(8)? 0\nVLOG_IS_ON(7)? 1\nVLOG_IS_ON(6)? 1\n";
if (strstr(buffer, kExpected) == nullptr) {
fprintf(stderr, "error: unexpected output from child: \"%.*s\"\n",
kBufferSizeBytes, buffer);
return EXIT_FAILURE;
}
bool ok = strstr(buffer, "VLOG(7)\n") != nullptr &&
strstr(buffer, "VLOG(6)\n") != nullptr &&
strstr(buffer, "VLOG(8)\n") == nullptr;
if (!ok) {
fprintf(stderr, "error: VLOG output not as expected: \"%.*s\"\n",
kBufferSizeBytes, buffer);
return EXIT_FAILURE;
}
// Success!
return EXIT_SUCCESS;
}
} // namespace
} // namespace tensorflow
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
bool do_vlog = argc >= 2 && strcmp(argv[1], "do_vlog") == 0;
return tensorflow::RealMain(argv[0], do_vlog);
}

View File

@ -0,0 +1,249 @@
project_path: /_project.yaml
book_path: /_book.yaml
description: <!--no description-->
landing_page:
show_side_navs: True
rows:
- description: >
<h1 class="hide-from-toc">Get Started with TensorFlow</h1>
<p>
TensorFlow is an open-source machine learning library for research and
production. TensorFlow offers APIs for beginners and experts to develop
for desktop, mobile, web, and cloud. See the sections below to get
started.
</p>
items:
- custom_html: >
<style>
.tfo-button-primary {
background-color: #fca851;
}
.tfo-button-primary:hover {
background-color: #ef6c02;
}
a.colab-button {
display: inline-block;
background: rgba(255, 255, 255, 0.75);
padding: 4px 8px;
border-radius: 4px;
font-size: 11px!important;
text-decoration: none;
color:#aaa;border: none;
font-weight: 300;
border: solid 1px rgba(0, 0, 0, 0.08);
border-bottom-color: rgba(0, 0, 0, 0.15);
text-transform: uppercase;
line-height: 16px
}
a.colab-button:hover {
color: #666;
background: white;
border-color: rgba(0, 0, 0, 0.2);
}
a.colab-button span {
background-image: url("/images/colab_logo_button.svg");
background-repeat:no-repeat;background-size:20px;
background-position-y:2px;display:inline-block;
padding-left:24px;border-radius:4px;
text-decoration:none;
}
/* adjust code block for smaller screens */
@media screen and (max-width: 1000px) {
.tfo-landing-row-item-code-block {
flex-direction: column !important;
}
.tfo-landing-row-item-code-block > .devsite-landing-row-item-code {
/*display: none;*/
width: 100%;
}
}
@media screen and (max-width: 720px) {
.tfo-landing-row-item-code-block {
display: none;
}
}
</style>
<div class="devsite-landing-row-item-description">
<h3 class="hide-from-toc">Learn and use ML</h3>
<div class="devsite-landing-row-item-description-content">
<p>
The high-level Keras API provides building blocks to create and
train deep learning models. Start with these beginner-friendly
notebook examples, then read the
<a href="/guide/keras">TensorFlow Keras guide</a>.
</p>
<ol style="padding-left:20px;">
<li><a href="/get_started/basic_classification">Basic classification</a></li>
<li><a href="/get_started/basic_text_classification">Text classification</a></li>
<li><a href="/get_started/basic_regression">Regression</a></li>
<li><a href="/get_started/overfit_and_underfit">Overfitting and underfitting</a></li>
<li><a href="/get_started/save_and_restore_models">Save and load</a></li>
</ol>
</div>
<div class="devsite-landing-row-item-buttons" style="margin-top:0;">
<a class="button button-primary tfo-button-primary" href="/guide/keras">Read the Keras guide</a>
</div>
</div>
- classname: tfo-landing-row-item-code-block
code_block: |
<pre class="prettyprint">
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)
</pre>
{% dynamic if request.tld != 'cn' %}
<a class="colab-button" target="_blank" href="https://colab.sandbox.google.com/github/tensorflow/models/blob/master/samples/core/get_started/_index.ipynb">Run in a <span>Notebook</span></a>
{% dynamic endif %}
- items:
- custom_html: >
<div class="devsite-landing-row-item-description" style="border-right: 2px solid #eee;">
<h3 class="hide-from-toc">Research and experimentation</h3>
<div class="devsite-landing-row-item-description-content">
<p>
Eager execution provides an imperative, define-by-run interface for advanced operations. Write custom layers, forward passes, and training loops with autodifferentiation. Start with
these notebooks, then read the <a href="/guide/eager">eager execution guide</a>.
</p>
<ol style="padding-left:20px;">
<li>
{% dynamic if request.tld == 'cn' %}
<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb" class="external">Eager execution basics</a>
{% dynamic else %}
<a href="https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb" class="external">Eager execution basics</a>
{% dynamic endif %}
</li>
<li>
{% dynamic if request.tld == 'cn' %}
<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb" class="external">Automatic differentiation and gradient tapes</a>
{% dynamic else %}
<a href="https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb" class="external">Automatic differentiation and gradient tapes</a>
{% dynamic endif %}
</li>
<li>
{% dynamic if request.tld == 'cn' %}
<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/notebooks/3_training_models.ipynb" class="external">Variables, models, and training</a>
{% dynamic else %}
<a href="https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/notebooks/3_training_models.ipynb" class="external">Variables, models, and training</a>
{% dynamic endif %}
</li>
<li>
{% dynamic if request.tld == 'cn' %}
<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb" class="external">Custom layers</a>
{% dynamic else %}
<a href="https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/notebooks/4_high_level.ipynb" class="external">Custom layers</a>
{% dynamic endif %}
</li>
<li><a href="/get_started/eager">Custom training walkthrough</a></li>
<li>
{% dynamic if request.tld == 'cn' %}
<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb" class="external">Example: Neural machine translation w/ attention</a>
{% dynamic else %}
<a href="https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb" class="external">Example: Neural machine translation w/ attention</a>
{% dynamic endif %}
</li>
</ol>
</div>
<div class="devsite-landing-row-item-buttons">
<a class="button button-primary tfo-button-primary" href="/guide/eager">Read the eager execution guide</a>
</div>
</div>
- custom_html: >
<div class="devsite-landing-row-item-description">
<h3 class="hide-from-toc">ML at production scale</h3>
<div class="devsite-landing-row-item-description-content">
<p>
Estimators can train large models on multiple machines in a
production environment. Try the examples below and read the
<a href="/guide/estimators">Estimators guide</a>.
</p>
<ol style="padding-left: 20px;">
<li><a href="/tutorials/text_classification_with_tf_hub">How to build a simple text classifier with TF-Hub</a></li>
<li><a href="https://github.com/tensorflow/models/tree/master/official/boosted_trees">Classifying Higgs boson processes</a></li>
<li><a href="/tutorials/wide_and_deep">Wide and deep learning using estimators</a></li>
</ol>
</div>
<div class="devsite-landing-row-item-buttons">
<a class="button button-primary tfo-button-primary" href="/guide/estimators">Read the Estimators guide</a>
</div>
</div>
- description: >
<h2 class="hide-from-toc">Google Colab&#58; An easy way to learn and use TensorFlow</h2>
<p>
<a href="https://colab.sandbox.google.com/notebooks/welcome.ipynb" class="external">Colaboratory</a>
is a Google research project created to help disseminate machine learning
education and research. It's a Jupyter notebook environment that requires
no setup to use and runs entirely in the cloud.
<a href="https://medium.com/tensorflow/colab-an-easy-way-to-learn-and-use-tensorflow-d74d1686e309" class="external">Read the blog post</a>.
</p>
- description: >
<h2 class="hide-from-toc">Build your first ML app</h2>
<p>Create and deploy TensorFlow models on web and mobile.</p>
background: grey
items:
- custom_html: >
<div class="devsite-landing-row-item-description" style="background: #fff; padding:32px;">
<a href="https://js.tensorflow.org">
<h3 class="hide-from-toc">Web developers</h3>
</a>
<div class="devsite-landing-row-item-description-content">
TensorFlow.js is a WebGL accelerated, JavaScript library to train and
deploy ML models in the browser and for Node.js.
</div>
</div>
- custom_html: >
<div class="devsite-landing-row-item-description" style="background: #fff; padding:32px;">
<a href="/mobile/tflite/">
<h3 class="hide-from-toc">Mobile developers</h3>
</a>
<div class="devsite-landing-row-item-description-content">
TensorFlow Lite is lightweight solution for mobile and embedded devices.
</div>
</div>
- description: >
<h2 class="hide-from-toc">Videos and updates</h2>
<p>
Subscribe to the TensorFlow
<a href="https://www.youtube.com/tensorflow" class="external">YouTube channel</a>
and <a href="https://blog.tensorflow.org" class="external">blog</a> for
the latest videos and updates.
</p>
items:
- description: >
<h3 class="hide-from-toc">Get started with TensorFlow's High-Level APIs</h3>
youtube_id: tjsHSIG8I08
buttons:
- label: Watch the video
path: https://www.youtube.com/watch?v=tjsHSIG8I08
- description: >
<h3 class="hide-from-toc">Eager execution</h3>
youtube_id: T8AW0fKP0Hs
background: grey
buttons:
- label: Watch the video
path: https://www.youtube.com/watch?v=T8AW0fKP0Hs
- description: >
<h3 class="hide-from-toc">tf.data: Fast, flexible, and easy-to-use input pipelines</h3>
youtube_id: uIcqeP7MFH0
buttons:
- label: Watch the video
path: https://www.youtube.com/watch?v=uIcqeP7MFH0

View File

@ -0,0 +1,3 @@
# Basic Classification
[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/master/samples/core/get_started/basic_classification.ipynb)

View File

@ -0,0 +1,3 @@
# Basic Regression
[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/master/samples/core/get_started/basic_regression.ipynb)

View File

@ -0,0 +1,3 @@
# Basic Text Classification
[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/master/samples/core/get_started/basic_text_classification.ipynb)

View File

@ -0,0 +1,3 @@
# Custom Training Walkthrough
[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.9.0/samples/core/get_started/eager.ipynb)

View File

@ -0,0 +1,29 @@
# Get Started
If you are new to machine learning, we recommend taking the following online
course prior to diving into TensorFlow documentation:
* [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/),
which introduces machine learning concepts and encourages experimentation
with existing TensorFlow code.
TensorFlow is a tool for machine learning. While it contains a wide range of
functionality, TensorFlow is mainly designed for deep neural network models.
The easiest way to get started with TensorFlow is by using Eager Execution.
* @{$get_started/eager}, is for anyone new to machine learning or TensorFlow.
TensorFlow provides many APIs. The remainder of this section focuses on the
Estimator API which provide scalable, high-performance models. See the
@{$estimators} guide.
For more advanced users:
* The @{$low_level_intro$Low Level Introduction} demonstrates how to use
TensorFlow outside of the Estimator framework, for debugging and
experimentation.
* The @{$guide$Programmer's Guide} details major
TensorFlow components.
* The @{$tutorials$Tutorials} provide walkthroughs of a variety of
TensorFlow models.

View File

@ -0,0 +1,10 @@
### Learn and use ML
basic_classification.md: Basic classification
basic_text_classification.md: Text classification
basic_regression.md: Regression
overfit_and_underfit.md
save_and_restore_models.md
next_steps.md
### Research and experimentation
eager.md

View File

@ -0,0 +1,36 @@
# Next steps
## Learn more about TensorFlow
* The [TensorFlow Guide](/guide) includes usage guides for the
high-level APIs, as well as advanced TensorFlow operations.
* [Premade Estimators](/guide/premade_estimators) are designed to
get results out of the box. Use TensorFlow without building your own models.
* [TensorFlow.js](https://js.tensorflow.org/) allows web developers to train and
deploy ML models in the browser and using Node.js.
* [TFLite](/mobile/tflite) allows mobile developers to do inference efficiently
on mobile devices.
* [TensorFlow Serving](/serving) is an open-source project that can put
TensorFlow models in production quickly.
* The [ecosystem](/ecosystem) contains more projects, including
[Magenta](https://magenta.tensorflow.org/), [TFX](/tfx),
[Swift for TensorFlow](https://github.com/tensorflow/swift), and more.
## Learn more about machine learning
Recommended resources include:
* [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/),
a course from Google that introduces machine learning concepts.
* [CS 20: Tensorflow for Deep Learning Research](http://web.stanford.edu/class/cs20si/),
notes from an intro course from Stanford.
* [CS231n: Convolutional Neural Networks for Visual Recognition](http://cs231n.stanford.edu/),
a course that teaches how convolutional networks work.
* [Machine Learning Recipes](https://www.youtube.com/watch?v=cKxRvEZd3Mw&list=PLOU2XLYxmsIIuiBfYad6rFYQU_jL2ryal),
a video series that introduces basic machine learning concepts with few prerequisites.
* [Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python),
a book by Francois Chollet about the Keras API, as well as an excellent hands on intro to Deep Learning.
* [Hands-on Machine Learning with Scikit-Learn and TensorFlow](https://github.com/ageron/handson-ml),
a book by Aurélien Geron's that is a clear getting-started guide to data science and deep learning.
* [Deep Learning](https://www.deeplearningbook.org/), a book by Ian Goodfellow et al.
that provides a technical dive into learning machine learning.

View File

@ -0,0 +1,3 @@
# Overfitting and Underfitting
[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/master/samples/core/get_started/overfit_and_underfit.ipynb)

View File

@ -0,0 +1,3 @@
# Save and restore Models
[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/master/samples/core/get_started/save_and_restore_models.ipynb)

View File

@ -0,0 +1,631 @@
# Simple Audio Recognition
This tutorial will show you how to build a basic speech recognition network that
recognizes ten different words. It's important to know that real speech and
audio recognition systems are much more complex, but like MNIST for images, it
should give you a basic understanding of the techniques involved. Once you've
completed this tutorial, you'll have a model that tries to classify a one second
audio clip as either silence, an unknown word, "yes", "no", "up", "down",
"left", "right", "on", "off", "stop", or "go". You'll also be able to take this
model and run it in an Android application.
## Preparation
You should make sure you have TensorFlow installed, and since the script
downloads over 1GB of training data, you'll need a good internet connection and
enough free space on your machine. The training process itself can take several
hours, so make sure you have a machine available for that long.
## Training
To begin the training process, go to the TensorFlow source tree and run:
```bash
python tensorflow/examples/speech_commands/train.py
```
The script will start off by downloading the [Speech Commands
dataset](https://storage.cloud.google.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz),
which consists of over 105,000 WAVE audio files of people saying thirty
different words. This data was collected by Google and released under a CC BY
license, and you can help improve it by [contributing five minutes of your own
voice](https://aiyprojects.withgoogle.com/open_speech_recording). The archive is
over 2GB, so this part may take a while, but you should see progress logs, and
once it's been downloaded once you won't need to do this step again. You can
find more information about this dataset in this
[Speech Commands paper](https://arxiv.org/abs/1804.03209).
Once the downloading has completed, you'll see logging information that looks
like this:
```
I0730 16:53:44.766740 55030 train.py:176] Training from step: 1
I0730 16:53:47.289078 55030 train.py:217] Step #1: rate 0.001000, accuracy 7.0%, cross entropy 2.611571
```
This shows that the initialization process is done and the training loop has
begun. You'll see that it outputs information for every training step. Here's a
break down of what it means:
`Step #1` shows that we're on the first step of the training loop. In this case
there are going to be 18,000 steps in total, so you can look at the step number
to get an idea of how close it is to finishing.
`rate 0.001000` is the learning rate that's controlling the speed of the
network's weight updates. Early on this is a comparatively high number (0.001),
but for later training cycles it will be reduced 10x, to 0.0001.
`accuracy 7.0%` is the how many classes were correctly predicted on this
training step. This value will often fluctuate a lot, but should increase on
average as training progresses. The model outputs an array of numbers, one for
each label, and each number is the predicted likelihood of the input being that
class. The predicted label is picked by choosing the entry with the highest
score. The scores are always between zero and one, with higher values
representing more confidence in the result.
`cross entropy 2.611571` is the result of the loss function that we're using to
guide the training process. This is a score that's obtained by comparing the
vector of scores from the current training run to the correct labels, and this
should trend downwards during training.
After a hundred steps, you should see a line like this:
`I0730 16:54:41.813438 55030 train.py:252] Saving to
"/tmp/speech_commands_train/conv.ckpt-100"`
This is saving out the current trained weights to a checkpoint file. If your
training script gets interrupted, you can look for the last saved checkpoint and
then restart the script with
`--start_checkpoint=/tmp/speech_commands_train/conv.ckpt-100` as a command line
argument to start from that point.
## Confusion Matrix
After four hundred steps, this information will be logged:
```
I0730 16:57:38.073667 55030 train.py:243] Confusion Matrix:
[[258 0 0 0 0 0 0 0 0 0 0 0]
[ 7 6 26 94 7 49 1 15 40 2 0 11]
[ 10 1 107 80 13 22 0 13 10 1 0 4]
[ 1 3 16 163 6 48 0 5 10 1 0 17]
[ 15 1 17 114 55 13 0 9 22 5 0 9]
[ 1 1 6 97 3 87 1 12 46 0 0 10]
[ 8 6 86 84 13 24 1 9 9 1 0 6]
[ 9 3 32 112 9 26 1 36 19 0 0 9]
[ 8 2 12 94 9 52 0 6 72 0 0 2]
[ 16 1 39 74 29 42 0 6 37 9 0 3]
[ 15 6 17 71 50 37 0 6 32 2 1 9]
[ 11 1 6 151 5 42 0 8 16 0 0 20]]
```
The first section is a [confusion
matrix](https://www.tensorflow.org/api_docs/python/tf/confusion_matrix). To
understand what it means, you first need to know the labels being used, which in
this case are "_silence_", "_unknown_", "yes", "no", "up", "down", "left",
"right", "on", "off", "stop", and "go". Each column represents a set of samples
that were predicted to be each label, so the first column represents all the
clips that were predicted to be silence, the second all those that were
predicted to be unknown words, the third "yes", and so on.
Each row represents clips by their correct, ground truth labels. The first row
is all the clips that were silence, the second clips that were unknown words,
the third "yes", etc.
This matrix can be more useful than just a single accuracy score because it
gives a good summary of what mistakes the network is making. In this example you
can see that all of the entries in the first row are zero, apart from the
initial one. Because the first row is all the clips that are actually silence,
this means that none of them were mistakenly labeled as words, so we have no
false negatives for silence. This shows the network is already getting pretty
good at distinguishing silence from words.
If we look down the first column though, we see a lot of non-zero values. The
column represents all the clips that were predicted to be silence, so positive
numbers outside of the first cell are errors. This means that some clips of real
spoken words are actually being predicted to be silence, so we do have quite a
few false positives.
A perfect model would produce a confusion matrix where all of the entries were
zero apart from a diagonal line through the center. Spotting deviations from
that pattern can help you figure out how the model is most easily confused, and
once you've identified the problems you can address them by adding more data or
cleaning up categories.
## Validation
After the confusion matrix, you should see a line like this:
`I0730 16:57:38.073777 55030 train.py:245] Step 400: Validation accuracy = 26.3%
(N=3093)`
It's good practice to separate your data set into three categories. The largest
(in this case roughly 80% of the data) is used for training the network, a
smaller set (10% here, known as "validation") is reserved for evaluation of the
accuracy during training, and another set (the last 10%, "testing") is used to
evaluate the accuracy once after the training is complete.
The reason for this split is that there's always a danger that networks will
start memorizing their inputs during training. By keeping the validation set
separate, you can ensure that the model works with data it's never seen before.
The testing set is an additional safeguard to make sure that you haven't just
been tweaking your model in a way that happens to work for both the training and
validation sets, but not a broader range of inputs.
The training script automatically separates the data set into these three
categories, and the logging line above shows the accuracy of model when run on
the validation set. Ideally, this should stick fairly close to the training
accuracy. If the training accuracy increases but the validation doesn't, that's
a sign that overfitting is occurring, and your model is only learning things
about the training clips, not broader patterns that generalize.
## Tensorboard
A good way to visualize how the training is progressing is using Tensorboard. By
default, the script saves out events to /tmp/retrain_logs, and you can load
these by running:
`tensorboard --logdir /tmp/retrain_logs`
Then navigate to [http://localhost:6006](http://localhost:6006) in your browser,
and you'll see charts and graphs showing your models progress.
<div style="width:50%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://storage.googleapis.com/download.tensorflow.org/example_images/speech_commands_tensorflow.png"/>
</div>
## Training Finished
After a few hours of training (depending on your machine's speed), the script
should have completed all 18,000 steps. It will print out a final confusion
matrix, along with an accuracy score, all run on the testing set. With the
default settings, you should see an accuracy of between 85% and 90%.
Because audio recognition is particularly useful on mobile devices, next we'll
export it to a compact format that's easy to work with on those platforms. To do
that, run this command line:
```
python tensorflow/examples/speech_commands/freeze.py \
--start_checkpoint=/tmp/speech_commands_train/conv.ckpt-18000 \
--output_file=/tmp/my_frozen_graph.pb
```
Once the frozen model has been created, you can test it with the `label_wav.py`
script, like this:
```
python tensorflow/examples/speech_commands/label_wav.py \
--graph=/tmp/my_frozen_graph.pb \
--labels=/tmp/speech_commands_train/conv_labels.txt \
--wav=/tmp/speech_dataset/left/a5d485dc_nohash_0.wav
```
This should print out three labels:
```
left (score = 0.81477)
right (score = 0.14139)
_unknown_ (score = 0.03808)
```
Hopefully "left" is the top score since that's the correct label, but since the
training is random it may not for the first file you try. Experiment with some
of the other .wav files in that same folder to see how well it does.
The scores are between zero and one, and higher values mean the model is more
confident in its prediction.
## Running the Model in an Android App
The easiest way to see how this model works in a real application is to download
[the prebuilt Android demo
applications](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#prebuilt-components)
and install them on your phone. You'll see 'TF Speech' appear in your app list,
and opening it will show you the same list of action words we've just trained
our model on, starting with "Yes" and "No". Once you've given the app permission
to use the microphone, you should be able to try saying those words and see them
highlighted in the UI when the model recognizes one of them.
You can also build this application yourself, since it's open source and
[available as part of the TensorFlow repository on
github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#building-in-android-studio-using-the-tensorflow-aar-from-jcenter).
By default it downloads [a pretrained model from
tensorflow.org](http://download.tensorflow.org/models/speech_commands_v0.02.zip),
but you can easily [replace it with a model you've trained
yourself](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#install-model-files-optional).
If you do this, you'll need to make sure that the constants in [the main
SpeechActivity Java source
file](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java)
like `SAMPLE_RATE` and `SAMPLE_DURATION` match any changes you've made to the
defaults while training. You'll also see that there's a [Java version of the
RecognizeCommands
module](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java)
that's very similar to the C++ version in this tutorial. If you've tweaked
parameters for that, you can also update them in SpeechActivity to get the same
results as in your server testing.
The demo app updates its UI list of results automatically based on the labels
text file you copy into assets alongside your frozen graph, which means you can
easily try out different models without needing to make any code changes. You
will need to update `LABEL_FILENAME` and `MODEL_FILENAME` to point to the files
you've added if you change the paths though.
## How does this Model Work?
The architecture used in this tutorial is based on some described in the paper
[Convolutional Neural Networks for Small-footprint Keyword
Spotting](http://www.isca-speech.org/archive/interspeech_2015/papers/i15_1478.pdf).
It was chosen because it's comparatively simple, quick to train, and easy to
understand, rather than being state of the art. There are lots of different
approaches to building neural network models to work with audio, including
[recurrent networks](https://svds.com/tensorflow-rnn-tutorial/) or [dilated
(atrous)
convolutions](https://deepmind.com/blog/wavenet-generative-model-raw-audio/).
This tutorial is based on the kind of convolutional network that will feel very
familiar to anyone who's worked with image recognition. That may seem surprising
at first though, since audio is inherently a one-dimensional continuous signal
across time, not a 2D spatial problem.
We solve that issue by defining a window of time we believe our spoken words
should fit into, and converting the audio signal in that window into an image.
This is done by grouping the incoming audio samples into short segments, just a
few milliseconds long, and calculating the strength of the frequencies across a
set of bands. Each set of frequency strengths from a segment is treated as a
vector of numbers, and those vectors are arranged in time order to form a
two-dimensional array. This array of values can then be treated like a
single-channel image, and is known as a
[spectrogram](https://en.wikipedia.org/wiki/Spectrogram). If you want to view
what kind of image an audio sample produces, you can run the `wav_to_spectrogram
tool:
```
bazel run tensorflow/examples/wav_to_spectrogram:wav_to_spectrogram -- \
--input_wav=/tmp/speech_dataset/happy/ab00c4b2_nohash_0.wav \
--output_image=/tmp/spectrogram.png
```
If you open up `/tmp/spectrogram.png` you should see something like this:
<div style="width:50%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://storage.googleapis.com/download.tensorflow.org/example_images/spectrogram.png"/>
</div>
Because of TensorFlow's memory order, time in this image is increasing from top
to bottom, with frequencies going from left to right, unlike the usual
convention for spectrograms where time is left to right. You should be able to
see a couple of distinct parts, with the first syllable "Ha" distinct from
"ppy".
Because the human ear is more sensitive to some frequencies than others, it's
been traditional in speech recognition to do further processing to this
representation to turn it into a set of [Mel-Frequency Cepstral
Coefficients](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum), or MFCCs
for short. This is also a two-dimensional, one-channel representation so it can
be treated like an image too. If you're targeting general sounds rather than
speech you may find you can skip this step and operate directly on the
spectrograms.
The image that's produced by these processing steps is then fed into a
multi-layer convolutional neural network, with a fully-connected layer followed
by a softmax at the end. You can see the definition of this portion in
[tensorflow/examples/speech_commands/models.py](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/speech_commands/models.py).
## Streaming Accuracy
Most audio recognition applications need to run on a continuous stream of audio,
rather than on individual clips. A typical way to use a model in this
environment is to apply it repeatedly at different offsets in time and average
the results over a short window to produce a smoothed prediction. If you think
of the input as an image, it's continuously scrolling along the time axis. The
words we want to recognize can start at any time, so we need to take a series of
snapshots to have a chance of having an alignment that captures most of the
utterance in the time window we feed into the model. If we sample at a high
enough rate, then we have a good chance of capturing the word in multiple
windows, so averaging the results improves the overall confidence of the
prediction.
For an example of how you can use your model on streaming data, you can look at
[test_streaming_accuracy.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/speech_commands/).
This uses the
[RecognizeCommands](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/speech_commands/recognize_commands.h)
class to run through a long-form input audio, try to spot words, and compare
those predictions against a ground truth list of labels and times. This makes it
a good example of applying a model to a stream of audio signals over time.
You'll need a long audio file to test it against, along with labels showing
where each word was spoken. If you don't want to record one yourself, you can
generate some synthetic test data using the `generate_streaming_test_wav`
utility. By default this will create a ten minute .wav file with words roughly
every three seconds, and a text file containing the ground truth of when each
word was spoken. These words are pulled from the test portion of your current
dataset, mixed in with background noise. To run it, use:
```
bazel run tensorflow/examples/speech_commands:generate_streaming_test_wav
```
This will save a .wav file to `/tmp/speech_commands_train/streaming_test.wav`,
and a text file listing the labels to
`/tmp/speech_commands_train/streaming_test_labels.txt`. You can then run
accuracy testing with:
```
bazel run tensorflow/examples/speech_commands:test_streaming_accuracy -- \
--graph=/tmp/my_frozen_graph.pb \
--labels=/tmp/speech_commands_train/conv_labels.txt \
--wav=/tmp/speech_commands_train/streaming_test.wav \
--ground_truth=/tmp/speech_commands_train/streaming_test_labels.txt \
--verbose
```
This will output information about the number of words correctly matched, how
many were given the wrong labels, and how many times the model triggered when
there was no real word spoken. There are various parameters that control how the
signal averaging works, including `--average_window_ms` which sets the length of
time to average results over, `--clip_stride_ms` which is the time between
applications of the model, `--suppression_ms` which stops subsequent word
detections from triggering for a certain time after an initial one is found, and
`--detection_threshold`, which controls how high the average score must be
before it's considered a solid result.
You'll see that the streaming accuracy outputs three numbers, rather than just
the one metric used in training. This is because different applications have
varying requirements, with some being able to tolerate frequent incorrect
results as long as real words are found (high recall), while others very focused
on ensuring the predicted labels are highly likely to be correct even if some
aren't detected (high precision). The numbers from the tool give you an idea of
how your model will perform in an application, and you can try tweaking the
signal averaging parameters to tune it to give the kind of performance you want.
To understand what the right parameters are for your application, you can look
at generating an [ROC
curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) to help
you understand the tradeoffs.
## RecognizeCommands
The streaming accuracy tool uses a simple decoder contained in a small C++ class
called
[RecognizeCommands](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/speech_commands/recognize_commands.h).
This class is fed the output of running the TensorFlow model over time, it
averages the signals, and returns information about a label when it has enough
evidence to think that a recognized word has been found. The implementation is
fairly small, just keeping track of the last few predictions and averaging them,
so it's easy to port to other platforms and languages as needed. For example,
it's convenient to do something similar at the Java level on Android, or Python
on the Raspberry Pi. As long as these implementations share the same logic, you
can tune the parameters that control the averaging using the streaming test
tool, and then transfer them over to your application to get similar results.
## Advanced Training
The defaults for the training script are designed to produce good end to end
results in a comparatively small file, but there are a lot of options you can
change to customize the results for your own requirements.
### Custom Training Data
By default the script will download the [Speech Commands
dataset](https://download.tensorflow.org/data/speech_commands_v0.01.tgz), but
you can also supply your own training data. To train on your own data, you
should make sure that you have at least several hundred recordings of each sound
you would like to recognize, and arrange them into folders by class. For
example, if you were trying to recognize dog barks from cat miaows, you would
create a root folder called `animal_sounds`, and then within that two
sub-folders called `bark` and `miaow`. You would then organize your audio files
into the appropriate folders.
To point the script to your new audio files, you'll need to set `--data_url=` to
disable downloading of the Speech Commands dataset, and
`--data_dir=/your/data/folder/` to find the files you've just created.
The files themselves should be 16-bit little-endian PCM-encoded WAVE format. The
sample rate defaults to 16,000, but as long as all your audio is consistently
the same rate (the script doesn't support resampling) you can change this with
the `--sample_rate` argument. The clips should also all be roughly the same
duration. The default expected duration is one second, but you can set this with
the `--clip_duration_ms` flag. If you have clips with variable amounts of
silence at the start, you can look at word alignment tools to standardize them
([here's a quick and dirty approach you can use
too](https://petewarden.com/2017/07/17/a-quick-hack-to-align-single-word-audio-recordings/)).
One issue to watch out for is that you may have very similar repetitions of the
same sounds in your dataset, and these can give misleading metrics if they're
spread across your training, validation, and test sets. For example, the Speech
Commands set has people repeating the same word multiple times. Each one of
those repetitions is likely to be pretty close to the others, so if training was
overfitting and memorizing one, it could perform unrealistically well when it
saw a very similar copy in the test set. To avoid this danger, Speech Commands
trys to ensure that all clips featuring the same word spoken by a single person
are put into the same partition. Clips are assigned to training, test, or
validation sets based on a hash of their filename, to ensure that the
assignments remain steady even as new clips are added and avoid any training
samples migrating into the other sets. To make sure that all a given speaker's
words are in the same bucket, [the hashing
function](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/speech_commands/input_data.py)
ignores anything in a filename after '_nohash_' when calculating the
assignments. This means that if you have file names like `pete_nohash_0.wav` and
`pete_nohash_1.wav`, they're guaranteed to be in the same set.
### Unknown Class
It's likely that your application will hear sounds that aren't in your training
set, and you'll want the model to indicate that it doesn't recognize the noise
in those cases. To help the network learn what sounds to ignore, you need to
provide some clips of audio that are neither of your classes. To do this, you'd
create `quack`, `oink`, and `moo` subfolders and populate them with noises from
other animals your users might encounter. The `--wanted_words` argument to the
script defines which classes you care about, all the others mentioned in
subfolder names will be used to populate an `_unknown_` class during training.
The Speech Commands dataset has twenty words in its unknown classes, including
the digits zero through nine and random names like "Sheila".
By default 10% of the training examples are picked from the unknown classes, but
you can control this with the `--unknown_percentage` flag. Increasing this will
make the model less likely to mistake unknown words for wanted ones, but making
it too large can backfire as the model might decide it's safest to categorize
all words as unknown!
### Background Noise
Real applications have to recognize audio even when there are other irrelevant
sounds happening in the environment. To build a model that's robust to this kind
of interference, we need to train against recorded audio with similar
properties. The files in the Speech Commands dataset were captured on a variety
of devices by users in many different environments, not in a studio, so that
helps add some realism to the training. To add even more, you can mix in random
segments of environmental audio to the training inputs. In the Speech Commands
set there's a special folder called `_background_noise_` which contains
minute-long WAVE files with white noise and recordings of machinery and everyday
household activity.
Small snippets of these files are chosen at random and mixed at a low volume
into clips during training. The loudness is also chosen randomly, and controlled
by the `--background_volume` argument as a proportion where 0 is silence, and 1
is full volume. Not all clips have background added, so the
`--background_frequency` flag controls what proportion have them mixed in.
Your own application might operate in its own environment with different
background noise patterns than these defaults, so you can supply your own audio
clips in the `_background_noise_` folder. These should be the same sample rate
as your main dataset, but much longer in duration so that a good set of random
segments can be selected from them.
### Silence
In most cases the sounds you care about will be intermittent and so it's
important to know when there's no matching audio. To support this, there's a
special `_silence_` label that indicates when the model detects nothing
interesting. Because there's never complete silence in real environments, we
actually have to supply examples with quiet and irrelevant audio. For this, we
reuse the `_background_noise_` folder that's also mixed in to real clips,
pulling short sections of the audio data and feeding those in with the ground
truth class of `_silence_`. By default 10% of the training data is supplied like
this, but the `--silence_percentage` can be used to control the proportion. As
with unknown words, setting this higher can weight the model results in favor of
true positives for silence, at the expense of false negatives for words, but too
large a proportion can cause it to fall into the trap of always guessing
silence.
### Time Shifting
Adding in background noise is one way of distorting the training data in a
realistic way to effectively increase the size of the dataset, and so increase
overall accuracy, and time shifting is another. This involves a random offset in
time of the training sample data, so that a small part of the start or end is
cut off and the opposite section is padded with zeroes. This mimics the natural
variations in starting time in the training data, and is controlled with the
`--time_shift_ms` flag, which defaults to 100ms. Increasing this value will
provide more variation, but at the risk of cutting off important parts of the
audio. A related way of augmenting the data with realistic distortions is by
using [time stretching and pitch
scaling](https://en.wikipedia.org/wiki/Audio_time_stretching_and_pitch_scaling),
but that's outside the scope of this tutorial.
## Customizing the Model
The default model used for this script is pretty large, taking over 800 million
FLOPs for each inference and using 940,000 weight parameters. This runs at
usable speeds on desktop machines or modern phones, but it involves too many
calculations to run at interactive speeds on devices with more limited
resources. To support these use cases, there's a couple of alternatives
available:
**low_latency_conv**
Based on the 'cnn-one-fstride4' topology described in the [Convolutional
Neural Networks for Small-footprint Keyword Spotting
paper](http://www.isca-speech.org/archive/interspeech_2015/papers/i15_1478.pdf).
The accuracy is slightly lower than 'conv' but the number of weight parameters
is about the same, and it only needs 11 million FLOPs to run one prediction,
making it much faster.
To use this model, you specify `--model_architecture=low_latency_conv` on
the command line. You'll also need to update the training rates and the number
of steps, so the full command will look like:
```
python tensorflow/examples/speech_commands/train \
--model_architecture=low_latency_conv \
--how_many_training_steps=20000,6000 \
--learning_rate=0.01,0.001
```
This asks the script to train with a learning rate of 0.01 for 20,000 steps, and
then do a fine-tuning pass of 6,000 steps with a 10x smaller rate.
**low_latency_svdf**
Based on the topology presented in the [Compressing Deep Neural Networks using a
Rank-Constrained Topology paper](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43813.pdf).
The accuracy is also lower than 'conv' but it only uses about 750 thousand
parameters, and most significantly, it allows for an optimized execution at
test time (i.e. when you will actually use it in your application), resulting
in 750 thousand FLOPs.
To use this model, you specify `--model_architecture=low_latency_svdf` on
the command line, and update the training rates and the number
of steps, so the full command will look like:
```
python tensorflow/examples/speech_commands/train \
--model_architecture=low_latency_svdf \
--how_many_training_steps=100000,35000 \
--learning_rate=0.01,0.005
```
Note that despite requiring a larger number of steps than the previous two
topologies, the reduced number of computations means that training should take
about the same time, and at the end reach an accuracy of around 85%.
You can also further tune the topology fairly easily for computation and
accuracy by changing these parameters in the SVDF layer:
* rank - The rank of the approximation (higher typically better, but results in
more computation).
* num_units - Similar to other layer types, specifies the number of nodes in
the layer (more nodes better quality, and more computation).
Regarding runtime, since the layer allows optimizations by caching some of the
internal neural network activations, you need to make sure to use a consistent
stride (e.g. 'clip_stride_ms' flag) both when you freeze the graph, and when
executing the model in streaming mode (e.g. test_streaming_accuracy.cc).
**Other parameters to customize**
If you want to experiment with customizing models, a good place to start is by
tweaking the spectrogram creation parameters. This has the effect of altering
the size of the input image to the model, and the creation code in
[models.py](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/speech_commands/models.py)
will adjust the number of computations and weights automatically to fit with
different dimensions. If you make the input smaller, the model will need fewer
computations to process it, so it can be a great way to trade off some accuracy
for improved latency. The `--window_stride_ms` controls how far apart each
frequency analysis sample is from the previous. If you increase this value, then
fewer samples will be taken for a given duration, and the time axis of the input
will shrink. The `--dct_coefficient_count` flag controls how many buckets are
used for the frequency counting, so reducing this will shrink the input in the
other dimension. The `--window_size_ms` argument doesn't affect the size, but
does control how wide the area used to calculate the frequencies is for each
sample. Reducing the duration of the training samples, controlled by
`--clip_duration_ms`, can also help if the sounds you're looking for are short,
since that also reduces the time dimension of the input. You'll need to make
sure that all your training data contains the right audio in the initial portion
of the clip though.
If you have an entirely different model in mind for your problem, you may find
that you can plug it into
[models.py](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/speech_commands/models.py)
and have the rest of the script handle all of the preprocessing and training
mechanics. You would add a new clause to `create_model`, looking for the name of
your architecture and then calling a model creation function. This function is
given the size of the spectrogram input, along with other model information, and
is expected to create TensorFlow ops to read that in and produce an output
prediction vector, and a placeholder to control the dropout rate. The rest of
the script will handle integrating this model into a larger graph doing the
input calculations and applying softmax and a loss function to train it.
One common problem when you're adjusting models and training hyper-parameters is
that not-a-number values can creep in, thanks to numerical precision issues. In
general you can solve these by reducing the magnitude of things like learning
rates and weight initialization functions, but if they're persistent you can
enable the `--check_nans` flag to track down the source of the errors. This will
insert check ops between most regular operations in TensorFlow, and abort the
training process with a useful error message when they're encountered.

View File

@ -0,0 +1,452 @@
# Convolutional Neural Networks
> **NOTE:** This tutorial is intended for *advanced* users of TensorFlow
and assumes expertise and experience in machine learning.
## Overview
CIFAR-10 classification is a common benchmark problem in machine learning. The
problem is to classify RGB 32x32 pixel images across 10 categories:
```
airplane, automobile, bird, cat, deer, dog, frog, horse, ship, and truck.
```
For more details refer to the [CIFAR-10 page](https://www.cs.toronto.edu/~kriz/cifar.html)
and a [Tech Report](https://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf)
by Alex Krizhevsky.
### Goals
The goal of this tutorial is to build a relatively small [convolutional neural
network](https://en.wikipedia.org/wiki/Convolutional_neural_network) (CNN) for
recognizing images. In the process, this tutorial:
1. Highlights a canonical organization for network architecture,
training and evaluation.
2. Provides a template for constructing larger and more sophisticated models.
The reason CIFAR-10 was selected was that it is complex enough to exercise
much of TensorFlow's ability to scale to large models. At the same time,
the model is small enough to train fast, which is ideal for trying out
new ideas and experimenting with new techniques.
### Highlights of the Tutorial
The CIFAR-10 tutorial demonstrates several important constructs for
designing larger and more sophisticated models in TensorFlow:
* Core mathematical components including @{tf.nn.conv2d$convolution}
([wiki](https://en.wikipedia.org/wiki/Convolution)),
@{tf.nn.relu$rectified linear activations}
([wiki](https://en.wikipedia.org/wiki/Rectifier_(neural_networks))),
@{tf.nn.max_pool$max pooling}
([wiki](https://en.wikipedia.org/wiki/Convolutional_neural_network#Pooling_layer))
and @{tf.nn.local_response_normalization$local response normalization}
(Chapter 3.3 in
[AlexNet paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)).
* @{$summaries_and_tensorboard$Visualization}
of network activities during training, including input images,
losses and distributions of activations and gradients.
* Routines for calculating the
@{tf.train.ExponentialMovingAverage$moving average}
of learned parameters and using these averages
during evaluation to boost predictive performance.
* Implementation of a
@{tf.train.exponential_decay$learning rate schedule}
that systematically decrements over time.
* Prefetching @{tf.train.shuffle_batch$queues}
for input
data to isolate the model from disk latency and expensive image pre-processing.
We also provide a [multi-GPU version](#training-a-model-using-multiple-gpu-cards)
of the model which demonstrates:
* Configuring a model to train across multiple GPU cards in parallel.
* Sharing and updating variables among multiple GPUs.
We hope that this tutorial provides a launch point for building larger CNNs for
vision tasks on TensorFlow.
### Model Architecture
The model in this CIFAR-10 tutorial is a multi-layer architecture consisting of
alternating convolutions and nonlinearities. These layers are followed by fully
connected layers leading into a softmax classifier. The model follows the
architecture described by
[Alex Krizhevsky](https://code.google.com/p/cuda-convnet/), with a few
differences in the top few layers.
This model achieves a peak performance of about 86% accuracy within a few hours
of training time on a GPU. Please see [below](#evaluating-a-model) and the code
for details. It consists of 1,068,298 learnable parameters and requires about
19.5M multiply-add operations to compute inference on a single image.
## Code Organization
The code for this tutorial resides in
[`models/tutorials/image/cifar10/`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/).
File | Purpose
--- | ---
[`cifar10_input.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_input.py) | Reads the native CIFAR-10 binary file format.
[`cifar10.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model.
[`cifar10_train.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU.
[`cifar10_multi_gpu_train.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs.
[`cifar10_eval.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model.
## CIFAR-10 Model
The CIFAR-10 network is largely contained in
[`cifar10.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10.py).
The complete training
graph contains roughly 765 operations. We find that we can make the code most
reusable by constructing the graph with the following modules:
1. [**Model inputs:**](#model-inputs) `inputs()` and `distorted_inputs()` add
operations that read and preprocess CIFAR images for evaluation and training,
respectively.
1. [**Model prediction:**](#model-prediction) `inference()`
adds operations that perform inference, i.e. classification, on supplied images.
1. [**Model training:**](#model-training) `loss()` and `train()`
add operations that compute the loss,
gradients, variable updates and visualization summaries.
### Model Inputs
The input part of the model is built by the functions `inputs()` and
`distorted_inputs()` which read images from the CIFAR-10 binary data files.
These files contain fixed byte length records, so we use
@{tf.FixedLengthRecordReader}.
See @{$reading_data#reading-from-files$Reading Data} to
learn more about how the `Reader` class works.
The images are processed as follows:
* They are cropped to 24 x 24 pixels, centrally for evaluation or
@{tf.random_crop$randomly} for training.
* They are @{tf.image.per_image_standardization$approximately whitened}
to make the model insensitive to dynamic range.
For training, we additionally apply a series of random distortions to
artificially increase the data set size:
* @{tf.image.random_flip_left_right$Randomly flip} the image from left to right.
* Randomly distort the @{tf.image.random_brightness$image brightness}.
* Randomly distort the @{tf.image.random_contrast$image contrast}.
Please see the @{$python/image$Images} page for the list of
available distortions. We also attach an
@{tf.summary.image} to the images
so that we may visualize them in @{$summaries_and_tensorboard$TensorBoard}.
This is a good practice to verify that inputs are built correctly.
<div style="width:50%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:70%" src="https://www.tensorflow.org/images/cifar_image_summary.png">
</div>
Reading images from disk and distorting them can use a non-trivial amount of
processing time. To prevent these operations from slowing down training, we run
them inside 16 separate threads which continuously fill a TensorFlow
@{tf.train.shuffle_batch$queue}.
### Model Prediction
The prediction part of the model is constructed by the `inference()` function
which adds operations to compute the *logits* of the predictions. That part of
the model is organized as follows:
Layer Name | Description
--- | ---
`conv1` | @{tf.nn.conv2d$convolution} and @{tf.nn.relu$rectified linear} activation.
`pool1` | @{tf.nn.max_pool$max pooling}.
`norm1` | @{tf.nn.local_response_normalization$local response normalization}.
`conv2` | @{tf.nn.conv2d$convolution} and @{tf.nn.relu$rectified linear} activation.
`norm2` | @{tf.nn.local_response_normalization$local response normalization}.
`pool2` | @{tf.nn.max_pool$max pooling}.
`local3` | @{$python/nn$fully connected layer with rectified linear activation}.
`local4` | @{$python/nn$fully connected layer with rectified linear activation}.
`softmax_linear` | linear transformation to produce logits.
Here is a graph generated from TensorBoard describing the inference operation:
<div style="width:15%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://www.tensorflow.org/images/cifar_graph.png">
</div>
> **EXERCISE**: The output of `inference` are un-normalized logits. Try editing
the network architecture to return normalized predictions using
@{tf.nn.softmax}.
The `inputs()` and `inference()` functions provide all the components
necessary to perform an evaluation of a model. We now shift our focus towards
building operations for training a model.
> **EXERCISE:** The model architecture in `inference()` differs slightly from
the CIFAR-10 model specified in
[cuda-convnet](https://code.google.com/p/cuda-convnet/). In particular, the top
layers of Alex's original model are locally connected and not fully connected.
Try editing the architecture to exactly reproduce the locally connected
architecture in the top layer.
### Model Training
The usual method for training a network to perform N-way classification is
[multinomial logistic regression](https://en.wikipedia.org/wiki/Multinomial_logistic_regression),
aka. *softmax regression*. Softmax regression applies a
@{tf.nn.softmax$softmax} nonlinearity to the
output of the network and calculates the
@{tf.nn.sparse_softmax_cross_entropy_with_logits$cross-entropy}
between the normalized predictions and the label index.
For regularization, we also apply the usual
@{tf.nn.l2_loss$weight decay} losses to all learned
variables. The objective function for the model is the sum of the cross entropy
loss and all these weight decay terms, as returned by the `loss()` function.
We visualize it in TensorBoard with a @{tf.summary.scalar}:
![CIFAR-10 Loss](https://www.tensorflow.org/images/cifar_loss.png "CIFAR-10 Total Loss")
We train the model using standard
[gradient descent](https://en.wikipedia.org/wiki/Gradient_descent)
algorithm (see @{$python/train$Training} for other methods)
with a learning rate that
@{tf.train.exponential_decay$exponentially decays}
over time.
![CIFAR-10 Learning Rate Decay](https://www.tensorflow.org/images/cifar_lr_decay.png "CIFAR-10 Learning Rate Decay")
The `train()` function adds the operations needed to minimize the objective by
calculating the gradient and updating the learned variables (see
@{tf.train.GradientDescentOptimizer}
for details). It returns an operation that executes all the calculations
needed to train and update the model for one batch of images.
## Launching and Training the Model
We have built the model, let's now launch it and run the training operation with
the script `cifar10_train.py`.
```shell
python cifar10_train.py
```
> **NOTE:** The first time you run any target in the CIFAR-10 tutorial,
the CIFAR-10 dataset is automatically downloaded. The data set is ~160MB
so you may want to grab a quick cup of coffee for your first run.
You should see the output:
```shell
Filling queue with 20000 CIFAR images before starting to train. This will take a few minutes.
2015-11-04 11:45:45.927302: step 0, loss = 4.68 (2.0 examples/sec; 64.221 sec/batch)
2015-11-04 11:45:49.133065: step 10, loss = 4.66 (533.8 examples/sec; 0.240 sec/batch)
2015-11-04 11:45:51.397710: step 20, loss = 4.64 (597.4 examples/sec; 0.214 sec/batch)
2015-11-04 11:45:54.446850: step 30, loss = 4.62 (391.0 examples/sec; 0.327 sec/batch)
2015-11-04 11:45:57.152676: step 40, loss = 4.61 (430.2 examples/sec; 0.298 sec/batch)
2015-11-04 11:46:00.437717: step 50, loss = 4.59 (406.4 examples/sec; 0.315 sec/batch)
...
```
The script reports the total loss every 10 steps as well as the speed at which
the last batch of data was processed. A few comments:
* The first batch of data can be inordinately slow (e.g. several minutes) as the
preprocessing threads fill up the shuffling queue with 20,000 processed CIFAR
images.
* The reported loss is the average loss of the most recent batch. Remember that
this loss is the sum of the cross entropy and all weight decay terms.
* Keep an eye on the processing speed of a batch. The numbers shown above were
obtained on a Tesla K40c. If you are running on a CPU, expect slower performance.
> **EXERCISE:** When experimenting, it is sometimes annoying that the first
training step can take so long. Try decreasing the number of images that
initially fill up the queue. Search for `min_fraction_of_examples_in_queue`
in `cifar10_input.py`.
`cifar10_train.py` periodically @{tf.train.Saver$saves}
all model parameters in
@{$guide/saved_model$checkpoint files}
but it does *not* evaluate the model. The checkpoint file
will be used by `cifar10_eval.py` to measure the predictive
performance (see [Evaluating a Model](#evaluating-a-model) below).
If you followed the previous steps, then you have now started training
a CIFAR-10 model. [Congratulations!](https://www.youtube.com/watch?v=9bZkp7q19f0)
The terminal text returned from `cifar10_train.py` provides minimal insight into
how the model is training. We want more insight into the model during training:
* Is the loss *really* decreasing or is that just noise?
* Is the model being provided appropriate images?
* Are the gradients, activations and weights reasonable?
* What is the learning rate currently at?
@{$summaries_and_tensorboard$TensorBoard} provides this
functionality, displaying data exported periodically from `cifar10_train.py` via
a
@{tf.summary.FileWriter}.
For instance, we can watch how the distribution of activations and degree of
sparsity in `local3` features evolve during training:
<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px; display: flex; flex-direction: row">
<img style="flex-grow:1; flex-shrink:1;" src="https://www.tensorflow.org/images/cifar_sparsity.png">
<img style="flex-grow:1; flex-shrink:1;" src="https://www.tensorflow.org/images/cifar_activations.png">
</div>
Individual loss functions, as well as the total loss, are particularly
interesting to track over time. However, the loss exhibits a considerable amount
of noise due to the small batch size employed by training. In practice we find
it extremely useful to visualize their moving averages in addition to their raw
values. See how the scripts use
@{tf.train.ExponentialMovingAverage}
for this purpose.
## Evaluating a Model
Let us now evaluate how well the trained model performs on a hold-out data set.
The model is evaluated by the script `cifar10_eval.py`. It constructs the model
with the `inference()` function and uses all 10,000 images in the evaluation set
of CIFAR-10. It calculates the *precision at 1:* how often the top prediction
matches the true label of the image.
To monitor how the model improves during training, the evaluation script runs
periodically on the latest checkpoint files created by the `cifar10_train.py`.
```shell
python cifar10_eval.py
```
> Be careful not to run the evaluation and training binary on the same GPU or
else you might run out of memory. Consider running the evaluation on
a separate GPU if available or suspending the training binary while running
the evaluation on the same GPU.
You should see the output:
```shell
2015-11-06 08:30:44.391206: precision @ 1 = 0.860
...
```
The script merely returns the precision @ 1 periodically -- in this case
it returned 86% accuracy. `cifar10_eval.py` also
exports summaries that may be visualized in TensorBoard. These summaries
provide additional insight into the model during evaluation.
The training script calculates the
@{tf.train.ExponentialMovingAverage$moving average}
version of all learned variables. The evaluation script substitutes
all learned model parameters with the moving average version. This
substitution boosts model performance at evaluation time.
> **EXERCISE:** Employing averaged parameters may boost predictive performance
by about 3% as measured by precision @ 1. Edit `cifar10_eval.py` to not employ
the averaged parameters for the model and verify that the predictive performance
drops.
## Training a Model Using Multiple GPU Cards
Modern workstations may contain multiple GPUs for scientific computation.
TensorFlow can leverage this environment to run the training operation
concurrently across multiple cards.
Training a model in a parallel, distributed fashion requires
coordinating training processes. For what follows we term *model replica*
to be one copy of a model training on a subset of data.
Naively employing asynchronous updates of model parameters
leads to sub-optimal training performance
because an individual model replica might be trained on a stale
copy of the model parameters. Conversely, employing fully synchronous
updates will be as slow as the slowest model replica.
In a workstation with multiple GPU cards, each GPU will have similar speed
and contain enough memory to run an entire CIFAR-10 model. Thus, we opt to
design our training system in the following manner:
* Place an individual model replica on each GPU.
* Update model parameters synchronously by waiting for all GPUs to finish
processing a batch of data.
Here is a diagram of this model:
<div style="width:40%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://www.tensorflow.org/images/Parallelism.png">
</div>
Note that each GPU computes inference as well as the gradients for a unique
batch of data. This setup effectively permits dividing up a larger batch
of data across the GPUs.
This setup requires that all GPUs share the model parameters. A well-known
fact is that transferring data to and from GPUs is quite slow. For this
reason, we decide to store and update all model parameters on the CPU (see
green box). A fresh set of model parameters is transferred to the GPU
when a new batch of data is processed by all GPUs.
The GPUs are synchronized in operation. All gradients are accumulated from
the GPUs and averaged (see green box). The model parameters are updated with
the gradients averaged across all model replicas.
### Placing Variables and Operations on Devices
Placing operations and variables on devices requires some special
abstractions.
The first abstraction we require is a function for computing inference and
gradients for a single model replica. In the code we term this abstraction
a "tower". We must set two attributes for each tower:
* A unique name for all operations within a tower.
@{tf.name_scope} provides
this unique name by prepending a scope. For instance, all operations in
the first tower are prepended with `tower_0`, e.g. `tower_0/conv1/Conv2D`.
* A preferred hardware device to run the operation within a tower.
@{tf.device} specifies this. For
instance, all operations in the first tower reside within `device('/device:GPU:0')`
scope indicating that they should be run on the first GPU.
All variables are pinned to the CPU and accessed via
@{tf.get_variable}
in order to share them in a multi-GPU version.
See how-to on @{$variables$Sharing Variables}.
### Launching and Training the Model on Multiple GPU cards
If you have several GPU cards installed on your machine you can use them to
train the model faster with the `cifar10_multi_gpu_train.py` script. This
version of the training script parallelizes the model across multiple GPU cards.
```shell
python cifar10_multi_gpu_train.py --num_gpus=2
```
Note that the number of GPU cards used defaults to 1. Additionally, if only 1
GPU is available on your machine, all computations will be placed on it, even if
you ask for more.
> **EXERCISE:** The default settings for `cifar10_train.py` is to
run on a batch size of 128. Try running `cifar10_multi_gpu_train.py` on 2 GPUs
with a batch size of 64 and compare the training speed.
## Next Steps
[Congratulations!](https://www.youtube.com/watch?v=9bZkp7q19f0) You have
completed the CIFAR-10 tutorial.
If you are now interested in developing and training your own image
classification system, we recommend forking this tutorial and replacing
components to address your image classification problem.
> **EXERCISE:** Download the
[Street View House Numbers (SVHN)](http://ufldl.stanford.edu/housenumbers/) data set.
Fork the CIFAR-10 tutorial and swap in the SVHN as the input data. Try adapting
the network architecture to improve predictive performance.

View File

@ -0,0 +1,456 @@
# Image Recognition
Our brains make vision seem easy. It doesn't take any effort for humans to
tell apart a lion and a jaguar, read a sign, or recognize a human's face.
But these are actually hard problems to solve with a computer: they only
seem easy because our brains are incredibly good at understanding images.
In the last few years, the field of machine learning has made tremendous
progress on addressing these difficult problems. In particular, we've
found that a kind of model called a deep
[convolutional neural network](https://colah.github.io/posts/2014-07-Conv-Nets-Modular/)
can achieve reasonable performance on hard visual recognition tasks --
matching or exceeding human performance in some domains.
Researchers have demonstrated steady progress
in computer vision by validating their work against
[ImageNet](http://www.image-net.org) -- an academic benchmark for computer vision.
Successive models continue to show improvements, each time achieving
a new state-of-the-art result:
[QuocNet], [AlexNet], [Inception (GoogLeNet)], [BN-Inception-v2].
Researchers both internal and external to Google have published papers describing all
these models but the results are still hard to reproduce.
We're now taking the next step by releasing code for running image recognition
on our latest model, [Inception-v3].
[QuocNet]: https://static.googleusercontent.com/media/research.google.com/en//archive/unsupervised_icml2012.pdf
[AlexNet]: https://www.cs.toronto.edu/~fritz/absps/imagenet.pdf
[Inception (GoogLeNet)]: https://arxiv.org/abs/1409.4842
[BN-Inception-v2]: https://arxiv.org/abs/1502.03167
[Inception-v3]: https://arxiv.org/abs/1512.00567
Inception-v3 is trained for the [ImageNet] Large Visual Recognition Challenge
using the data from 2012. This is a standard task in computer vision,
where models try to classify entire
images into [1000 classes], like "Zebra", "Dalmatian", and "Dishwasher".
For example, here are the results from [AlexNet] classifying some images:
<div style="width:50%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://www.tensorflow.org/images/AlexClassification.png">
</div>
To compare models, we examine how often the model fails to predict the
correct answer as one of their top 5 guesses -- termed "top-5 error rate".
[AlexNet] achieved by setting a top-5 error rate of 15.3% on the 2012
validation data set; [Inception (GoogLeNet)] achieved 6.67%;
[BN-Inception-v2] achieved 4.9%; [Inception-v3] reaches 3.46%.
> How well do humans do on ImageNet Challenge? There's a [blog post] by
Andrej Karpathy who attempted to measure his own performance. He reached
5.1% top-5 error rate.
[ImageNet]: http://image-net.org/
[1000 classes]: http://image-net.org/challenges/LSVRC/2014/browse-synsets
[blog post]: https://karpathy.github.io/2014/09/02/what-i-learned-from-competing-against-a-convnet-on-imagenet/
This tutorial will teach you how to use [Inception-v3]. You'll learn how to
classify images into [1000 classes] in Python or C++. We'll also discuss how to
extract higher level features from this model which may be reused for other
vision tasks.
We're excited to see what the community will do with this model.
##Usage with Python API
`classify_image.py` downloads the trained model from `tensorflow.org`
when the program is run for the first time. You'll need about 200M of free space
available on your hard disk.
Start by cloning the [TensorFlow models repo](https://github.com/tensorflow/models) from GitHub. Run the following commands:
cd models/tutorials/image/imagenet
python classify_image.py
The above command will classify a supplied image of a panda bear.
<div style="width:15%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://www.tensorflow.org/images/cropped_panda.jpg">
</div>
If the model runs correctly, the script will produce the following output:
giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca (score = 0.88493)
indri, indris, Indri indri, Indri brevicaudatus (score = 0.00878)
lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens (score = 0.00317)
custard apple (score = 0.00149)
earthstar (score = 0.00127)
If you wish to supply other JPEG images, you may do so by editing
the `--image_file` argument.
> If you download the model data to a different directory, you
will need to point `--model_dir` to the directory used.
## Usage with the C++ API
You can run the same [Inception-v3] model in C++ for use in production
environments. You can download the archive containing the GraphDef that defines
the model like this (running from the root directory of the TensorFlow
repository):
```bash
curl -L "https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz" |
tar -C tensorflow/examples/label_image/data -xz
```
Next, we need to compile the C++ binary that includes the code to load and run the graph.
If you've followed
@{$install_sources$the instructions to download the source installation of TensorFlow}
for your platform, you should be able to build the example by
running this command from your shell terminal:
```bash
bazel build tensorflow/examples/label_image/...
```
That should create a binary executable that you can then run like this:
```bash
bazel-bin/tensorflow/examples/label_image/label_image
```
This uses the default example image that ships with the framework, and should
output something similar to this:
```
I tensorflow/examples/label_image/main.cc:206] military uniform (653): 0.834306
I tensorflow/examples/label_image/main.cc:206] mortarboard (668): 0.0218692
I tensorflow/examples/label_image/main.cc:206] academic gown (401): 0.0103579
I tensorflow/examples/label_image/main.cc:206] pickelhaube (716): 0.00800814
I tensorflow/examples/label_image/main.cc:206] bulletproof vest (466): 0.00535088
```
In this case, we're using the default image of
[Admiral Grace Hopper](https://en.wikipedia.org/wiki/Grace_Hopper), and you can
see the network correctly identifies she's wearing a military uniform, with a high
score of 0.8.
<div style="width:45%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://www.tensorflow.org/images/grace_hopper.jpg">
</div>
Next, try it out on your own images by supplying the --image= argument, e.g.
```bash
bazel-bin/tensorflow/examples/label_image/label_image --image=my_image.png
```
If you look inside the [`tensorflow/examples/label_image/main.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc)
file, you can find out
how it works. We hope this code will help you integrate TensorFlow into
your own applications, so we will walk step by step through the main functions:
The command line flags control where the files are loaded from, and properties of the input images.
The model expects to get square 299x299 RGB images, so those are the `input_width`
and `input_height` flags. We also need to scale the pixel values from integers that
are between 0 and 255 to the floating point values that the graph operates on.
We control the scaling with the `input_mean` and `input_std` flags: we first subtract
`input_mean` from each pixel value, then divide it by `input_std`.
These values probably look somewhat magical, but they are just defined by the
original model author based on what he/she wanted to use as input images for
training. If you have a graph that you've trained yourself, you'll just need
to adjust the values to match whatever you used during your training process.
You can see how they're applied to an image in the
[`ReadTensorFromImageFile()`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc#L88)
function.
```C++
// Given an image file name, read in the data, try to decode it as an image,
// resize it to the requested size, and then scale the values as desired.
Status ReadTensorFromImageFile(string file_name, const int input_height,
const int input_width, const float input_mean,
const float input_std,
std::vector<Tensor>* out_tensors) {
tensorflow::GraphDefBuilder b;
```
We start by creating a `GraphDefBuilder`, which is an object we can use to
specify a model to run or load.
```C++
string input_name = "file_reader";
string output_name = "normalized";
tensorflow::Node* file_reader =
tensorflow::ops::ReadFile(tensorflow::ops::Const(file_name, b.opts()),
b.opts().WithName(input_name));
```
We then start creating nodes for the small model we want to run
to load, resize, and scale the pixel values to get the result the main model
expects as its input. The first node we create is just a `Const` op that holds a
tensor with the file name of the image we want to load. That's then passed as the
first input to the `ReadFile` op. You might notice we're passing `b.opts()` as the last
argument to all the op creation functions. The argument ensures that the node is added to
the model definition held in the `GraphDefBuilder`. We also name the `ReadFile`
operator by making the `WithName()` call to `b.opts()`. This gives a name to the node,
which isn't strictly necessary since an automatic name will be assigned if you don't
do this, but it does make debugging a bit easier.
```C++
// Now try to figure out what kind of file it is and decode it.
const int wanted_channels = 3;
tensorflow::Node* image_reader;
if (tensorflow::StringPiece(file_name).ends_with(".png")) {
image_reader = tensorflow::ops::DecodePng(
file_reader,
b.opts().WithAttr("channels", wanted_channels).WithName("png_reader"));
} else {
// Assume if it's not a PNG then it must be a JPEG.
image_reader = tensorflow::ops::DecodeJpeg(
file_reader,
b.opts().WithAttr("channels", wanted_channels).WithName("jpeg_reader"));
}
// Now cast the image data to float so we can do normal math on it.
tensorflow::Node* float_caster = tensorflow::ops::Cast(
image_reader, tensorflow::DT_FLOAT, b.opts().WithName("float_caster"));
// The convention for image ops in TensorFlow is that all images are expected
// to be in batches, so that they're four-dimensional arrays with indices of
// [batch, height, width, channel]. Because we only have a single image, we
// have to add a batch dimension of 1 to the start with ExpandDims().
tensorflow::Node* dims_expander = tensorflow::ops::ExpandDims(
float_caster, tensorflow::ops::Const(0, b.opts()), b.opts());
// Bilinearly resize the image to fit the required dimensions.
tensorflow::Node* resized = tensorflow::ops::ResizeBilinear(
dims_expander, tensorflow::ops::Const({input_height, input_width},
b.opts().WithName("size")),
b.opts());
// Subtract the mean and divide by the scale.
tensorflow::ops::Div(
tensorflow::ops::Sub(
resized, tensorflow::ops::Const({input_mean}, b.opts()), b.opts()),
tensorflow::ops::Const({input_std}, b.opts()),
b.opts().WithName(output_name));
```
We then keep adding more nodes, to decode the file data as an image, to cast the
integers into floating point values, to resize it, and then finally to run the
subtraction and division operations on the pixel values.
```C++
// This runs the GraphDef network definition that we've just constructed, and
// returns the results in the output tensor.
tensorflow::GraphDef graph;
TF_RETURN_IF_ERROR(b.ToGraphDef(&graph));
```
At the end of this we have
a model definition stored in the b variable, which we turn into a full graph
definition with the `ToGraphDef()` function.
```C++
std::unique_ptr<tensorflow::Session> session(
tensorflow::NewSession(tensorflow::SessionOptions()));
TF_RETURN_IF_ERROR(session->Create(graph));
TF_RETURN_IF_ERROR(session->Run({}, {output_name}, {}, out_tensors));
return Status::OK();
```
Then we create a @{tf.Session}
object, which is the interface to actually running the graph, and run it,
specifying which node we want to get the output from, and where to put the
output data.
This gives us a vector of `Tensor` objects, which in this case we know will only be a
single object long. You can think of a `Tensor` as a multi-dimensional array in this
context, and it holds a 299 pixel high, 299 pixel wide, 3 channel image as float
values. If you have your own image-processing framework in your product already, you
should be able to use that instead, as long as you apply the same transformations
before you feed images into the main graph.
This is a simple example of creating a small TensorFlow graph dynamically in C++,
but for the pre-trained Inception model we want to load a much larger definition from
a file. You can see how we do that in the `LoadGraph()` function.
```C++
// Reads a model graph definition from disk, and creates a session object you
// can use to run it.
Status LoadGraph(string graph_file_name,
std::unique_ptr<tensorflow::Session>* session) {
tensorflow::GraphDef graph_def;
Status load_graph_status =
ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def);
if (!load_graph_status.ok()) {
return tensorflow::errors::NotFound("Failed to load compute graph at '",
graph_file_name, "'");
}
```
If you've looked through the image loading code, a lot of the terms should seem familiar. Rather than
using a `GraphDefBuilder` to produce a `GraphDef` object, we load a protobuf file that
directly contains the `GraphDef`.
```C++
session->reset(tensorflow::NewSession(tensorflow::SessionOptions()));
Status session_create_status = (*session)->Create(graph_def);
if (!session_create_status.ok()) {
return session_create_status;
}
return Status::OK();
}
```
Then we create a Session object from that `GraphDef` and
pass it back to the caller so that they can run it at a later time.
The `GetTopLabels()` function is a lot like the image loading, except that in this case
we want to take the results of running the main graph, and turn it into a sorted list
of the highest-scoring labels. Just like the image loader, it creates a
`GraphDefBuilder`, adds a couple of nodes to it, and then runs the short graph to get a
pair of output tensors. In this case they represent the sorted scores and index
positions of the highest results.
```C++
// Analyzes the output of the Inception graph to retrieve the highest scores and
// their positions in the tensor, which correspond to categories.
Status GetTopLabels(const std::vector<Tensor>& outputs, int how_many_labels,
Tensor* indices, Tensor* scores) {
tensorflow::GraphDefBuilder b;
string output_name = "top_k";
tensorflow::ops::TopK(tensorflow::ops::Const(outputs[0], b.opts()),
how_many_labels, b.opts().WithName(output_name));
// This runs the GraphDef network definition that we've just constructed, and
// returns the results in the output tensors.
tensorflow::GraphDef graph;
TF_RETURN_IF_ERROR(b.ToGraphDef(&graph));
std::unique_ptr<tensorflow::Session> session(
tensorflow::NewSession(tensorflow::SessionOptions()));
TF_RETURN_IF_ERROR(session->Create(graph));
// The TopK node returns two outputs, the scores and their original indices,
// so we have to append :0 and :1 to specify them both.
std::vector<Tensor> out_tensors;
TF_RETURN_IF_ERROR(session->Run({}, {output_name + ":0", output_name + ":1"},
{}, &out_tensors));
*scores = out_tensors[0];
*indices = out_tensors[1];
return Status::OK();
```
The `PrintTopLabels()` function takes those sorted results, and prints them out in a
friendly way. The `CheckTopLabel()` function is very similar, but just makes sure that
the top label is the one we expect, for debugging purposes.
At the end, [`main()`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc#L252)
ties together all of these calls.
```C++
int main(int argc, char* argv[]) {
// We need to call this to set up global state for TensorFlow.
tensorflow::port::InitMain(argv[0], &argc, &argv);
Status s = tensorflow::ParseCommandLineFlags(&argc, argv);
if (!s.ok()) {
LOG(ERROR) << "Error parsing command line flags: " << s.ToString();
return -1;
}
// First we load and initialize the model.
std::unique_ptr<tensorflow::Session> session;
string graph_path = tensorflow::io::JoinPath(FLAGS_root_dir, FLAGS_graph);
Status load_graph_status = LoadGraph(graph_path, &session);
if (!load_graph_status.ok()) {
LOG(ERROR) << load_graph_status;
return -1;
}
```
We load the main graph.
```C++
// Get the image from disk as a float array of numbers, resized and normalized
// to the specifications the main graph expects.
std::vector<Tensor> resized_tensors;
string image_path = tensorflow::io::JoinPath(FLAGS_root_dir, FLAGS_image);
Status read_tensor_status = ReadTensorFromImageFile(
image_path, FLAGS_input_height, FLAGS_input_width, FLAGS_input_mean,
FLAGS_input_std, &resized_tensors);
if (!read_tensor_status.ok()) {
LOG(ERROR) << read_tensor_status;
return -1;
}
const Tensor& resized_tensor = resized_tensors[0];
```
Load, resize, and process the input image.
```C++
// Actually run the image through the model.
std::vector<Tensor> outputs;
Status run_status = session->Run({{FLAGS_input_layer, resized_tensor}},
{FLAGS_output_layer}, {}, &outputs);
if (!run_status.ok()) {
LOG(ERROR) << "Running model failed: " << run_status;
return -1;
}
```
Here we run the loaded graph with the image as an input.
```C++
// This is for automated testing to make sure we get the expected result with
// the default settings. We know that label 866 (military uniform) should be
// the top label for the Admiral Hopper image.
if (FLAGS_self_test) {
bool expected_matches;
Status check_status = CheckTopLabel(outputs, 866, &expected_matches);
if (!check_status.ok()) {
LOG(ERROR) << "Running check failed: " << check_status;
return -1;
}
if (!expected_matches) {
LOG(ERROR) << "Self-test failed!";
return -1;
}
}
```
For testing purposes we can check to make sure we get the output we expect here.
```C++
// Do something interesting with the results we've generated.
Status print_status = PrintTopLabels(outputs, FLAGS_labels);
```
Finally we print the labels we found.
```C++
if (!print_status.ok()) {
LOG(ERROR) << "Running print failed: " << print_status;
return -1;
}
```
The error handling here is using TensorFlow's `Status`
object, which is very convenient because it lets you know whether any error has
occurred with the `ok()` checker, and then can be printed out to give a readable error
message.
In this case we are demonstrating object recognition, but you should be able to
use very similar code on other models you've found or trained yourself, across
all
sorts of domains. We hope this small example gives you some ideas on how to use
TensorFlow within your own products.
> **EXERCISE**: Transfer learning is the idea that, if you know how to solve a task well, you
should be able to transfer some of that understanding to solving related
problems. One way to perform transfer learning is to remove the final
classification layer of the network and extract
the [next-to-last layer of the CNN](https://arxiv.org/abs/1310.1531), in this case a 2048 dimensional vector.
There's a guide to doing this @{$image_retraining$in the how-to section}.
## Resources for Learning More
To learn about neural networks in general, Michael Nielsen's
[free online book](http://neuralnetworksanddeeplearning.com/chap1.html)
is an excellent resource. For convolutional neural networks in particular,
Chris Olah has some
[nice blog posts](https://colah.github.io/posts/2014-07-Conv-Nets-Modular/),
and Michael Nielsen's book has a
[great chapter](http://neuralnetworksanddeeplearning.com/chap6.html)
covering them.
To find out more about implementing convolutional neural networks, you can jump
to the TensorFlow @{$deep_cnn$deep convolutional networks tutorial},
or start a bit more gently with our @{$layers$MNIST starter tutorial}.
Finally, if you want to get up to speed on research in this area, you can
read the recent work of all the papers referenced in this tutorial.

View File

@ -0,0 +1,4 @@
# How to Retrain Inception's Final Layer for New Categories
**NOTE: This tutorial has moved to**
https://github.com/tensorflow/hub/tree/master/docs/tutorials/image_retraining.md

View File

@ -80,21 +80,21 @@ for details. It consists of 1,068,298 learnable parameters and requires about
## Code Organization
The code for this tutorial resides in
[`models/tutorials/image/cifar10/`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/).
[`models/tutorials/image/cifar10/`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/).
File | Purpose
--- | ---
[`cifar10_input.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_input.py) | Reads the native CIFAR-10 binary file format.
[`cifar10.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model.
[`cifar10_train.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU.
[`cifar10_multi_gpu_train.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs.
[`cifar10_eval.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model.
[`cifar10_input.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_input.py) | Reads the native CIFAR-10 binary file format.
[`cifar10.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model.
[`cifar10_train.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU.
[`cifar10_multi_gpu_train.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs.
[`cifar10_eval.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model.
## CIFAR-10 Model
The CIFAR-10 network is largely contained in
[`cifar10.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10.py).
[`cifar10.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10.py).
The complete training
graph contains roughly 765 operations. We find that we can make the code most
reusable by constructing the graph with the following modules:

View File

@ -0,0 +1,59 @@
# Tutorials
This section contains tutorials demonstrating how to do specific tasks
in TensorFlow. If you are new to TensorFlow, we recommend reading
[Get Started with TensorFlow](/get_started/).
## Images
These tutorials cover different aspects of image recognition:
* @{$layers$MNIST}, which introduces convolutional neural networks (CNNs) and
demonstrates how to build a CNN in TensorFlow.
* @{$image_recognition}, which introduces the field of image recognition and
uses a pre-trained model (Inception) for recognizing images.
* @{$image_retraining}, which has a wonderfully self-explanatory title.
* @{$deep_cnn}, which demonstrates how to build a small CNN for recognizing
images. This tutorial is aimed at advanced TensorFlow users.
## Sequences
These tutorials focus on machine learning problems dealing with sequence data.
* @{$recurrent}, which demonstrates how to use a
recurrent neural network to predict the next word in a sentence.
* @{$seq2seq}, which demonstrates how to use a
sequence-to-sequence model to translate text from English to French.
* @{$recurrent_quickdraw}
builds a classification model for drawings, directly from the sequence of
pen strokes.
* @{$audio_recognition}, which shows how to
build a basic speech recognition network.
## Data representation
These tutorials demonstrate various data representations that can be used in
TensorFlow.
* @{$wide}, uses
@{tf.feature_column$feature columns} to feed a variety of data types
to linear model, to solve a classification problem.
* @{$wide_and_deep}, builds on the
above linear model tutorial, adding a deep feed-forward neural network
component and a DNN-compatible data representation.
* @{$word2vec}, which demonstrates how to
create an embedding for words.
* @{$kernel_methods},
which shows how to improve the quality of a linear model by using explicit
kernel mappings.
## Non Machine Learning
Although TensorFlow specializes in machine learning, the core of TensorFlow is
a powerful numeric computation system which you can also use to solve other
kinds of math problems. For example:
* @{$mandelbrot}
* @{$pdes}

View File

@ -0,0 +1,304 @@
# Improving Linear Models Using Explicit Kernel Methods
Note: This document uses a deprecated version of @{tf.estimator},
which has a @{tf.contrib.learn.Estimator$different interface}.
It also uses other `contrib` methods whose
@{$version_compat#not_covered$API may not be stable}.
In this tutorial, we demonstrate how combining (explicit) kernel methods with
linear models can drastically increase the latters' quality of predictions
without significantly increasing training and inference times. Unlike dual
kernel methods, explicit (primal) kernel methods scale well with the size of the
training dataset both in terms of training/inference times and in terms of
memory requirements.
**Intended audience:** Even though we provide a high-level overview of concepts
related to explicit kernel methods, this tutorial primarily targets readers who
already have at least basic knowledge of kernel methods and Support Vector
Machines (SVMs). If you are new to kernel methods, refer to either of the
following sources for an introduction:
* If you have a strong mathematical background:
[Kernel Methods in Machine Learning](https://arxiv.org/pdf/math/0701907.pdf)
* [Kernel method wikipedia page](https://en.wikipedia.org/wiki/Kernel_method)
Currently, TensorFlow supports explicit kernel mappings for dense features only;
TensorFlow will provide support for sparse features at a later release.
This tutorial uses [tf.contrib.learn](https://www.tensorflow.org/code/tensorflow/contrib/learn/python/learn)
(TensorFlow's high-level Machine Learning API) Estimators for our ML models.
If you are not familiar with this API, [tf.estimator Quickstart](https://www.tensorflow.org/get_started/estimator)
is a good place to start. We will use the MNIST dataset. The tutorial consists
of the following steps:
* Load and prepare MNIST data for classification.
* Construct a simple linear model, train it, and evaluate it on the eval data.
* Replace the linear model with a kernelized linear model, re-train, and
re-evaluate.
## Load and prepare MNIST data for classification
Run the following utility command to load the MNIST dataset:
```python
data = tf.contrib.learn.datasets.mnist.load_mnist()
```
The preceding method loads the entire MNIST dataset (containing 70K samples) and
splits it into train, validation, and test data with 55K, 5K, and 10K samples
respectively. Each split contains one numpy array for images (with shape
[sample_size, 784]) and one for labels (with shape [sample_size, 1]). In this
tutorial, we only use the train and validation splits to train and evaluate our
models respectively.
In order to feed data to a `tf.contrib.learn Estimator`, it is helpful to convert
it to Tensors. For this, we will use an `input function` which adds Ops to the
TensorFlow graph that, when executed, create mini-batches of Tensors to be used
downstream. For more background on input functions, check
@{$premade_estimators#create_input_functions$this section on input functions}.
In this example, we will use the `tf.train.shuffle_batch` Op which, besides
converting numpy arrays to Tensors, allows us to specify the batch_size and
whether to randomize the input every time the input_fn Ops are executed
(randomization typically expedites convergence during training). The full code
for loading and preparing the data is shown in the snippet below. In this
example, we use mini-batches of size 256 for training and the entire sample
(5K entries) for evaluation. Feel free to experiment with different batch sizes.
```python
import numpy as np
import tensorflow as tf
def get_input_fn(dataset_split, batch_size, capacity=10000, min_after_dequeue=3000):
def _input_fn():
images_batch, labels_batch = tf.train.shuffle_batch(
tensors=[dataset_split.images, dataset_split.labels.astype(np.int32)],
batch_size=batch_size,
capacity=capacity,
min_after_dequeue=min_after_dequeue,
enqueue_many=True,
num_threads=4)
features_map = {'images': images_batch}
return features_map, labels_batch
return _input_fn
data = tf.contrib.learn.datasets.mnist.load_mnist()
train_input_fn = get_input_fn(data.train, batch_size=256)
eval_input_fn = get_input_fn(data.validation, batch_size=5000)
```
## Training a simple linear model
We can now train a linear model over the MNIST dataset. We will use the
@{tf.contrib.learn.LinearClassifier} estimator with 10 classes representing the
10 digits. The input features form a 784-dimensional dense vector which can
be specified as follows:
```python
image_column = tf.contrib.layers.real_valued_column('images', dimension=784)
```
The full code for constructing, training and evaluating a LinearClassifier
estimator is as follows:
```python
import time
# Specify the feature(s) to be used by the estimator.
image_column = tf.contrib.layers.real_valued_column('images', dimension=784)
estimator = tf.contrib.learn.LinearClassifier(feature_columns=[image_column], n_classes=10)
# Train.
start = time.time()
estimator.fit(input_fn=train_input_fn, steps=2000)
end = time.time()
print('Elapsed time: {} seconds'.format(end - start))
# Evaluate and report metrics.
eval_metrics = estimator.evaluate(input_fn=eval_input_fn, steps=1)
print(eval_metrics)
```
The following table summarizes the results on the eval data.
metric | value
:------------ | :------------
loss | 0.25 to 0.30
accuracy | 92.5%
training time | ~25 seconds on my machine
Note: Metrics will vary depending on various factors.
In addition to experimenting with the (training) batch size and the number of
training steps, there are a couple other parameters that can be tuned as well.
For instance, you can change the optimization method used to minimize the loss
by explicitly selecting another optimizer from the collection of
[available optimizers](https://www.tensorflow.org/code/tensorflow/python/training).
As an example, the following code constructs a LinearClassifier estimator that
uses the Follow-The-Regularized-Leader (FTRL) optimization strategy with a
specific learning rate and L2-regularization.
```python
optimizer = tf.train.FtrlOptimizer(learning_rate=5.0, l2_regularization_strength=1.0)
estimator = tf.contrib.learn.LinearClassifier(
feature_columns=[image_column], n_classes=10, optimizer=optimizer)
```
Regardless of the values of the parameters, the maximum accuracy a linear model
can achieve on this dataset caps at around **93%**.
## Using explicit kernel mappings with the linear model.
The relatively high error (~7%) of the linear model over MNIST indicates that
the input data is not linearly separable. We will use explicit kernel mappings
to reduce the classification error.
**Intuition:** The high-level idea is to use a non-linear map to transform the
input space to another feature space (of possibly higher dimension) where the
(transformed) features are (almost) linearly separable and then apply a linear
model on the mapped features. This is shown in the following figure:
<div style="text-align:center">
<img src="https://www.tensorflow.org/versions/master/images/kernel_mapping.png" />
</div>
### Technical details
In this example we will use **Random Fourier Features**, introduced in the
["Random Features for Large-Scale Kernel Machines"](https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf)
paper by Rahimi and Recht, to map the input data. Random Fourier Features map a
vector \\(\mathbf{x} \in \mathbb{R}^d\\) to \\(\mathbf{x'} \in \mathbb{R}^D\\)
via the following mapping:
$$
RFFM(\cdot): \mathbb{R}^d \to \mathbb{R}^D, \quad
RFFM(\mathbf{x}) = \cos(\mathbf{\Omega} \cdot \mathbf{x}+ \mathbf{b})
$$
where \\(\mathbf{\Omega} \in \mathbb{R}^{D \times d}\\),
\\(\mathbf{x} \in \mathbb{R}^d,\\) \\(\mathbf{b} \in \mathbb{R}^D\\) and the
cosine is applied element-wise.
In this example, the entries of \\(\mathbf{\Omega}\\) and \\(\mathbf{b}\\) are
sampled from distributions such that the mapping satisfies the following
property:
$$
RFFM(\mathbf{x})^T \cdot RFFM(\mathbf{y}) \approx
e^{-\frac{\|\mathbf{x} - \mathbf{y}\|^2}{2 \sigma^2}}
$$
The right-hand-side quantity of the expression above is known as the RBF (or
Gaussian) kernel function. This function is one of the most-widely used kernel
functions in Machine Learning and implicitly measures similarity in a different,
much higher dimensional space than the original one. See
[Radial basis function kernel](https://en.wikipedia.org/wiki/Radial_basis_function_kernel)
for more details.
### Kernel classifier
@{tf.contrib.kernel_methods.KernelLinearClassifier} is a pre-packaged
`tf.contrib.learn` estimator that combines the power of explicit kernel mappings
with linear models. Its constructor is almost identical to that of the
LinearClassifier estimator with the additional option to specify a list of
explicit kernel mappings to be applied to each feature the classifier uses. The
following code snippet demonstrates how to replace LinearClassifier with
KernelLinearClassifier.
```python
# Specify the feature(s) to be used by the estimator. This is identical to the
# code used for the LinearClassifier.
image_column = tf.contrib.layers.real_valued_column('images', dimension=784)
optimizer = tf.train.FtrlOptimizer(
learning_rate=50.0, l2_regularization_strength=0.001)
kernel_mapper = tf.contrib.kernel_methods.RandomFourierFeatureMapper(
input_dim=784, output_dim=2000, stddev=5.0, name='rffm')
kernel_mappers = {image_column: [kernel_mapper]}
estimator = tf.contrib.kernel_methods.KernelLinearClassifier(
n_classes=10, optimizer=optimizer, kernel_mappers=kernel_mappers)
# Train.
start = time.time()
estimator.fit(input_fn=train_input_fn, steps=2000)
end = time.time()
print('Elapsed time: {} seconds'.format(end - start))
# Evaluate and report metrics.
eval_metrics = estimator.evaluate(input_fn=eval_input_fn, steps=1)
print(eval_metrics)
```
The only additional parameter passed to `KernelLinearClassifier` is a dictionary
from feature_columns to a list of kernel mappings to be applied to the
corresponding feature column. The following lines instruct the classifier to
first map the initial 784-dimensional images to 2000-dimensional vectors using
random Fourier features and then learn a linear model on the transformed
vectors:
```python
kernel_mapper = tf.contrib.kernel_methods.RandomFourierFeatureMapper(
input_dim=784, output_dim=2000, stddev=5.0, name='rffm')
kernel_mappers = {image_column: [kernel_mapper]}
estimator = tf.contrib.kernel_methods.KernelLinearClassifier(
n_classes=10, optimizer=optimizer, kernel_mappers=kernel_mappers)
```
Notice the `stddev` parameter. This is the standard deviation (\\(\sigma\\)) of
the approximated RBF kernel and controls the similarity measure used in
classification. `stddev` is typically determined via hyperparameter tuning.
The results of running the preceding code are summarized in the following table.
We can further increase the accuracy by increasing the output dimension of the
mapping and tuning the standard deviation.
metric | value
:------------ | :------------
loss | 0.10
accuracy | 97%
training time | ~35 seconds on my machine
### stddev
The classification quality is very sensitive to the value of stddev. The
following table shows the accuracy of the classifier on the eval data for
different values of stddev. The optimal value is stddev=5.0. Notice how too
small or too high stddev values can dramatically decrease the accuracy of the
classification.
stddev | eval accuracy
:----- | :------------
1.0 | 0.1362
2.0 | 0.4764
4.0 | 0.9654
5.0 | 0.9766
8.0 | 0.9714
16.0 | 0.8878
### Output dimension
Intuitively, the larger the output dimension of the mapping, the closer the
inner product of two mapped vectors approximates the kernel, which typically
translates to better classification accuracy. Another way to think about this is
that the output dimension equals the number of weights of the linear model; the
larger this dimension, the larger the "degrees of freedom" of the model.
However, after a certain threshold, higher output dimensions increase the
accuracy by very little, while making training take more time. This is shown in
the following two Figures which depict the eval accuracy as a function of the
output dimension and the training time, respectively.
![image](https://www.tensorflow.org/versions/master/images/acc_vs_outdim.png)
![image](https://www.tensorflow.org/versions/master/images/acc-vs-trn_time.png)
## Summary
Explicit kernel mappings combine the predictive power of nonlinear models with
the scalability of linear models. Unlike traditional dual kernel methods,
explicit kernel methods can scale to millions or hundreds of millions of
samples. When using explicit kernel mappings, consider the following tips:
* Random Fourier Features can be particularly effective for datasets with dense
features.
* The parameters of the kernel mapping are often data-dependent. Model quality
can be very sensitive to these parameters. Use hyperparameter tuning to find the
optimal values.
* If you have multiple numerical features, concatenate them into a single
multi-dimensional feature and apply the kernel mapping to the concatenated
vector.

View File

@ -0,0 +1,694 @@
# A Guide to TF Layers: Building a Convolutional Neural Network
The TensorFlow @{tf.layers$`layers` module} provides a high-level API that makes
it easy to construct a neural network. It provides methods that facilitate the
creation of dense (fully connected) layers and convolutional layers, adding
activation functions, and applying dropout regularization. In this tutorial,
you'll learn how to use `layers` to build a convolutional neural network model
to recognize the handwritten digits in the MNIST data set.
![handwritten digits 09 from the MNIST data set](https://www.tensorflow.org/images/mnist_0-9.png)
**The [MNIST dataset](http://yann.lecun.com/exdb/mnist/) comprises 60,000
training examples and 10,000 test examples of the handwritten digits 09,
formatted as 28x28-pixel monochrome images.**
## Getting Started
Let's set up the skeleton for our TensorFlow program. Create a file called
`cnn_mnist.py`, and add the following code:
```python
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Imports
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
# Our application logic will be added here
if __name__ == "__main__":
tf.app.run()
```
As you work through the tutorial, you'll add code to construct, train, and
evaluate the convolutional neural network. The complete, final code can be
[found here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/layers/cnn_mnist.py).
## Intro to Convolutional Neural Networks
Convolutional neural networks (CNNs) are the current state-of-the-art model
architecture for image classification tasks. CNNs apply a series of filters to
the raw pixel data of an image to extract and learn higher-level features, which
the model can then use for classification. CNNs contains three components:
* **Convolutional layers**, which apply a specified number of convolution
filters to the image. For each subregion, the layer performs a set of
mathematical operations to produce a single value in the output feature map.
Convolutional layers then typically apply a
[ReLU activation function](https://en.wikipedia.org/wiki/Rectifier_\(neural_networks\)) to
the output to introduce nonlinearities into the model.
* **Pooling layers**, which
[downsample the image data](https://en.wikipedia.org/wiki/Convolutional_neural_network#Pooling_layer)
extracted by the convolutional layers to reduce the dimensionality of the
feature map in order to decrease processing time. A commonly used pooling
algorithm is max pooling, which extracts subregions of the feature map
(e.g., 2x2-pixel tiles), keeps their maximum value, and discards all other
values.
* **Dense (fully connected) layers**, which perform classification on the
features extracted by the convolutional layers and downsampled by the
pooling layers. In a dense layer, every node in the layer is connected to
every node in the preceding layer.
Typically, a CNN is composed of a stack of convolutional modules that perform
feature extraction. Each module consists of a convolutional layer followed by a
pooling layer. The last convolutional module is followed by one or more dense
layers that perform classification. The final dense layer in a CNN contains a
single node for each target class in the model (all the possible classes the
model may predict), with a
[softmax](https://en.wikipedia.org/wiki/Softmax_function) activation function to
generate a value between 01 for each node (the sum of all these softmax values
is equal to 1). We can interpret the softmax values for a given image as
relative measurements of how likely it is that the image falls into each target
class.
> Note: For a more comprehensive walkthrough of CNN architecture, see Stanford
> University's <a href="https://cs231n.github.io/convolutional-networks/">
> Convolutional Neural Networks for Visual Recognition course materials</a>.</p>
## Building the CNN MNIST Classifier {#building_the_cnn_mnist_classifier}
Let's build a model to classify the images in the MNIST dataset using the
following CNN architecture:
1. **Convolutional Layer #1**: Applies 32 5x5 filters (extracting 5x5-pixel
subregions), with ReLU activation function
2. **Pooling Layer #1**: Performs max pooling with a 2x2 filter and stride of 2
(which specifies that pooled regions do not overlap)
3. **Convolutional Layer #2**: Applies 64 5x5 filters, with ReLU activation
function
4. **Pooling Layer #2**: Again, performs max pooling with a 2x2 filter and
stride of 2
5. **Dense Layer #1**: 1,024 neurons, with dropout regularization rate of 0.4
(probability of 0.4 that any given element will be dropped during training)
6. **Dense Layer #2 (Logits Layer)**: 10 neurons, one for each digit target
class (09).
The `tf.layers` module contains methods to create each of the three layer types
above:
* `conv2d()`. Constructs a two-dimensional convolutional layer. Takes number
of filters, filter kernel size, padding, and activation function as
arguments.
* `max_pooling2d()`. Constructs a two-dimensional pooling layer using the
max-pooling algorithm. Takes pooling filter size and stride as arguments.
* `dense()`. Constructs a dense layer. Takes number of neurons and activation
function as arguments.
Each of these methods accepts a tensor as input and returns a transformed tensor
as output. This makes it easy to connect one layer to another: just take the
output from one layer-creation method and supply it as input to another.
Open `cnn_mnist.py` and add the following `cnn_model_fn` function, which
conforms to the interface expected by TensorFlow's Estimator API (more on this
later in [Create the Estimator](#create-the-estimator)). `cnn_mnist.py` takes
MNIST feature data, labels, and
@{tf.estimator.ModeKeys$model mode} (`TRAIN`, `EVAL`, `PREDICT`) as arguments;
configures the CNN; and returns predictions, loss, and a training operation:
```python
def cnn_model_fn(features, labels, mode):
"""Model function for CNN."""
# Input Layer
input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])
# Convolutional Layer #1
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
# Pooling Layer #1
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
# Convolutional Layer #2 and Pooling Layer #2
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
# Dense Layer
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout = tf.layers.dropout(
inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
# Logits Layer
logits = tf.layers.dense(inputs=dropout, units=10)
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
```
The following sections (with headings corresponding to each code block above)
dive deeper into the `tf.layers` code used to create each layer, as well as how
to calculate loss, configure the training op, and generate predictions. If
you're already experienced with CNNs and @{$custom_estimators$TensorFlow `Estimator`s},
and find the above code intuitive, you may want to skim these sections or just
skip ahead to ["Training and Evaluating the CNN MNIST Classifier"](#train_eval_mnist).
### Input Layer
The methods in the `layers` module for creating convolutional and pooling layers
for two-dimensional image data expect input tensors to have a shape of
<code>[<em>batch_size</em>, <em>image_height</em>, <em>image_width</em>,
<em>channels</em>]</code> by default. This behavior can be changed using the <code><em>data_format</em></code> parameter; defined as follows:
* _`batch_size`_. Size of the subset of examples to use when performing
gradient descent during training.
* _`image_height`_. Height of the example images.
* _`image_width`_. Width of the example images.
* _`channels`_. Number of color channels in the example images. For color
images, the number of channels is 3 (red, green, blue). For monochrome
images, there is just 1 channel (black).
* _`data_format`_. A string, one of `channels_last` (default) or `channels_first`.
`channels_last` corresponds to inputs with shape
`(batch, ..., channels)` while `channels_first` corresponds to
inputs with shape `(batch, channels, ...)`.
Here, our MNIST dataset is composed of monochrome 28x28 pixel images, so the
desired shape for our input layer is <code>[<em>batch_size</em>, 28, 28,
1]</code>.
To convert our input feature map (`features`) to this shape, we can perform the
following `reshape` operation:
```python
input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])
```
Note that we've indicated `-1` for batch size, which specifies that this
dimension should be dynamically computed based on the number of input values in
`features["x"]`, holding the size of all other dimensions constant. This allows
us to treat `batch_size` as a hyperparameter that we can tune. For example, if
we feed examples into our model in batches of 5, `features["x"]` will contain
3,920 values (one value for each pixel in each image), and `input_layer` will
have a shape of `[5, 28, 28, 1]`. Similarly, if we feed examples in batches of
100, `features["x"]` will contain 78,400 values, and `input_layer` will have a
shape of `[100, 28, 28, 1]`.
### Convolutional Layer #1
In our first convolutional layer, we want to apply 32 5x5 filters to the input
layer, with a ReLU activation function. We can use the `conv2d()` method in the
`layers` module to create this layer as follows:
```python
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
```
The `inputs` argument specifies our input tensor, which must have the shape
<code>[<em>batch_size</em>, <em>image_height</em>, <em>image_width</em>,
<em>channels</em>]</code>. Here, we're connecting our first convolutional layer
to `input_layer`, which has the shape <code>[<em>batch_size</em>, 28, 28,
1]</code>.
> Note: <code>conv2d()</code> will instead accept a shape of
> <code>[<em>batch_size</em>, <em>channels</em>, <em>image_height</em>, <em>image_width</em>]</code> when passed the argument
> <code>data_format=channels_first</code>.
The `filters` argument specifies the number of filters to apply (here, 32), and
`kernel_size` specifies the dimensions of the filters as <code>[<em>height</em>,
<em>width</em>]</code> (here, <code>[5, 5]</code>).
<p class="tip"><b>TIP:</b> If filter height and width have the same value, you can instead specify a
single integer for <code>kernel_size</code>—e.g., <code>kernel_size=5</code>.</p>
The `padding` argument specifies one of two enumerated values
(case-insensitive): `valid` (default value) or `same`. To specify that the
output tensor should have the same height and width values as the input tensor,
we set `padding=same` here, which instructs TensorFlow to add 0 values to the
edges of the input tensor to preserve height and width of 28. (Without padding,
a 5x5 convolution over a 28x28 tensor will produce a 24x24 tensor, as there are
24x24 locations to extract a 5x5 tile from a 28x28 grid.)
The `activation` argument specifies the activation function to apply to the
output of the convolution. Here, we specify ReLU activation with
@{tf.nn.relu}.
Our output tensor produced by `conv2d()` has a shape of
<code>[<em>batch_size</em>, 28, 28, 32]</code>: the same height and width
dimensions as the input, but now with 32 channels holding the output from each
of the filters.
### Pooling Layer #1
Next, we connect our first pooling layer to the convolutional layer we just
created. We can use the `max_pooling2d()` method in `layers` to construct a
layer that performs max pooling with a 2x2 filter and stride of 2:
```python
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
```
Again, `inputs` specifies the input tensor, with a shape of
<code>[<em>batch_size</em>, <em>image_height</em>, <em>image_width</em>,
<em>channels</em>]</code>. Here, our input tensor is `conv1`, the output from
the first convolutional layer, which has a shape of <code>[<em>batch_size</em>,
28, 28, 32]</code>.
> Note: As with <code>conv2d()</code>, <code>max_pooling2d()</code> will instead
> accept a shape of <code>[<em>batch_size</em>, <em>channels</em>,
> <em>image_height</em>, <em>image_width</em>]</code> when passed the argument
> <code>data_format=channels_first</code>.
The `pool_size` argument specifies the size of the max pooling filter as
<code>[<em>height</em>, <em>width</em>]</code> (here, `[2, 2]`). If both
dimensions have the same value, you can instead specify a single integer (e.g.,
`pool_size=2`).
The `strides` argument specifies the size of the stride. Here, we set a stride
of 2, which indicates that the subregions extracted by the filter should be
separated by 2 pixels in both the height and width dimensions (for a 2x2 filter,
this means that none of the regions extracted will overlap). If you want to set
different stride values for height and width, you can instead specify a tuple or
list (e.g., `stride=[3, 6]`).
Our output tensor produced by `max_pooling2d()` (`pool1`) has a shape of
<code>[<em>batch_size</em>, 14, 14, 32]</code>: the 2x2 filter reduces height and width by 50% each.
### Convolutional Layer #2 and Pooling Layer #2
We can connect a second convolutional and pooling layer to our CNN using
`conv2d()` and `max_pooling2d()` as before. For convolutional layer #2, we
configure 64 5x5 filters with ReLU activation, and for pooling layer #2, we use
the same specs as pooling layer #1 (a 2x2 max pooling filter with stride of 2):
```python
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
```
Note that convolutional layer #2 takes the output tensor of our first pooling
layer (`pool1`) as input, and produces the tensor `conv2` as output. `conv2`
has a shape of <code>[<em>batch_size</em>, 14, 14, 64]</code>, the same height and width as `pool1` (due to `padding="same"`), and 64 channels for the 64
filters applied.
Pooling layer #2 takes `conv2` as input, producing `pool2` as output. `pool2`
has shape <code>[<em>batch_size</em>, 7, 7, 64]</code> (50% reduction of height and width from `conv2`).
### Dense Layer
Next, we want to add a dense layer (with 1,024 neurons and ReLU activation) to
our CNN to perform classification on the features extracted by the
convolution/pooling layers. Before we connect the layer, however, we'll flatten
our feature map (`pool2`) to shape <code>[<em>batch_size</em>,
<em>features</em>]</code>, so that our tensor has only two dimensions:
```python
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
```
In the `reshape()` operation above, the `-1` signifies that the *`batch_size`*
dimension will be dynamically calculated based on the number of examples in our
input data. Each example has 7 (`pool2` height) * 7 (`pool2` width) * 64
(`pool2` channels) features, so we want the `features` dimension to have a value
of 7 * 7 * 64 (3136 in total). The output tensor, `pool2_flat`, has shape
<code>[<em>batch_size</em>, 3136]</code>.
Now, we can use the `dense()` method in `layers` to connect our dense layer as
follows:
```python
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
```
The `inputs` argument specifies the input tensor: our flattened feature map,
`pool2_flat`. The `units` argument specifies the number of neurons in the dense
layer (1,024). The `activation` argument takes the activation function; again,
we'll use `tf.nn.relu` to add ReLU activation.
To help improve the results of our model, we also apply dropout regularization
to our dense layer, using the `dropout` method in `layers`:
```python
dropout = tf.layers.dropout(
inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
```
Again, `inputs` specifies the input tensor, which is the output tensor from our
dense layer (`dense`).
The `rate` argument specifies the dropout rate; here, we use `0.4`, which means
40% of the elements will be randomly dropped out during training.
The `training` argument takes a boolean specifying whether or not the model is
currently being run in training mode; dropout will only be performed if
`training` is `True`. Here, we check if the `mode` passed to our model function
`cnn_model_fn` is `TRAIN` mode.
Our output tensor `dropout` has shape <code>[<em>batch_size</em>, 1024]</code>.
### Logits Layer
The final layer in our neural network is the logits layer, which will return the
raw values for our predictions. We create a dense layer with 10 neurons (one for
each target class 09), with linear activation (the default):
```python
logits = tf.layers.dense(inputs=dropout, units=10)
```
Our final output tensor of the CNN, `logits`, has shape
<code>[<em>batch_size</em>, 10]</code>.
### Generate Predictions {#generate_predictions}
The logits layer of our model returns our predictions as raw values in a
<code>[<em>batch_size</em>, 10]</code>-dimensional tensor. Let's convert these
raw values into two different formats that our model function can return:
* The **predicted class** for each example: a digit from 09.
* The **probabilities** for each possible target class for each example: the
probability that the example is a 0, is a 1, is a 2, etc.
For a given example, our predicted class is the element in the corresponding row
of the logits tensor with the highest raw value. We can find the index of this
element using the @{tf.argmax}
function:
```python
tf.argmax(input=logits, axis=1)
```
The `input` argument specifies the tensor from which to extract maximum
values—here `logits`. The `axis` argument specifies the axis of the `input`
tensor along which to find the greatest value. Here, we want to find the largest
value along the dimension with index of 1, which corresponds to our predictions
(recall that our logits tensor has shape <code>[<em>batch_size</em>,
10]</code>).
We can derive probabilities from our logits layer by applying softmax activation
using @{tf.nn.softmax}:
```python
tf.nn.softmax(logits, name="softmax_tensor")
```
> Note: We use the `name` argument to explicitly name this operation
> `softmax_tensor`, so we can reference it later. (We'll set up logging for the
> softmax values in ["Set Up a Logging Hook"](#set-up-a-logging-hook)).
We compile our predictions in a dict, and return an `EstimatorSpec` object:
```python
predictions = {
"classes": tf.argmax(input=logits, axis=1),
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
```
### Calculate Loss {#calculating-loss}
For both training and evaluation, we need to define a
[loss function](https://en.wikipedia.org/wiki/Loss_function)
that measures how closely the model's predictions match the target classes. For
multiclass classification problems like MNIST,
[cross entropy](https://en.wikipedia.org/wiki/Cross_entropy) is typically used
as the loss metric. The following code calculates cross entropy when the model
runs in either `TRAIN` or `EVAL` mode:
```python
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
```
Let's take a closer look at what's happening above.
Our `labels` tensor contains a list of prediction indices for our examples, e.g. `[1,
9, ...]`. `logits` contains the linear outputs of our last layer.
`tf.losses.sparse_softmax_cross_entropy`, calculates the softmax crossentropy
(aka: categorical crossentropy, negative log-likelihood) from these two inputs
in an efficient, numerically stable way.
### Configure the Training Op
In the previous section, we defined loss for our CNN as the softmax
cross-entropy of the logits layer and our labels. Let's configure our model to
optimize this loss value during training. We'll use a learning rate of 0.001 and
[stochastic gradient descent](https://en.wikipedia.org/wiki/Stochastic_gradient_descent)
as the optimization algorithm:
```python
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
```
> Note: For a more in-depth look at configuring training ops for Estimator model
> functions, see @{$custom_estimators#defining-the-training-op-for-the-model$"Defining the training op for the model"}
> in the @{$custom_estimators$"Creating Estimations in tf.estimator"} tutorial.
### Add evaluation metrics
To add accuracy metric in our model, we define `eval_metric_ops` dict in EVAL
mode as follows:
```python
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
```
<a id="train_eval_mnist"></a>
## Training and Evaluating the CNN MNIST Classifier
We've coded our MNIST CNN model function; now we're ready to train and evaluate
it.
### Load Training and Test Data
First, let's load our training and test data. Add a `main()` function to
`cnn_mnist.py` with the following code:
```python
def main(unused_argv):
# Load training and eval data
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
train_data = mnist.train.images # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)
```
We store the training feature data (the raw pixel values for 55,000 images of
hand-drawn digits) and training labels (the corresponding value from 09 for
each image) as [numpy
arrays](https://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html)
in `train_data` and `train_labels`, respectively. Similarly, we store the
evaluation feature data (10,000 images) and evaluation labels in `eval_data`
and `eval_labels`, respectively.
### Create the Estimator {#create-the-estimator}
Next, let's create an `Estimator` (a TensorFlow class for performing high-level
model training, evaluation, and inference) for our model. Add the following code
to `main()`:
```python
# Create the Estimator
mnist_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/mnist_convnet_model")
```
The `model_fn` argument specifies the model function to use for training,
evaluation, and prediction; we pass it the `cnn_model_fn` we created in
["Building the CNN MNIST Classifier."](#building-the-cnn-mnist-classifier) The
`model_dir` argument specifies the directory where model data (checkpoints) will
be saved (here, we specify the temp directory `/tmp/mnist_convnet_model`, but
feel free to change to another directory of your choice).
> Note: For an in-depth walkthrough of the TensorFlow `Estimator` API, see the
> tutorial @{$custom_estimators$"Creating Estimators in tf.estimator."}
### Set Up a Logging Hook {#set_up_a_logging_hook}
Since CNNs can take a while to train, let's set up some logging so we can track
progress during training. We can use TensorFlow's @{tf.train.SessionRunHook} to create a
@{tf.train.LoggingTensorHook}
that will log the probability values from the softmax layer of our CNN. Add the
following to `main()`:
```python
# Set up logging for predictions
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)
```
We store a dict of the tensors we want to log in `tensors_to_log`. Each key is a
label of our choice that will be printed in the log output, and the
corresponding label is the name of a `Tensor` in the TensorFlow graph. Here, our
`probabilities` can be found in `softmax_tensor`, the name we gave our softmax
operation earlier when we generated the probabilities in `cnn_model_fn`.
> Note: If you don't explicitly assign a name to an operation via the `name`
> argument, TensorFlow will assign a default name. A couple easy ways to
> discover the names applied to operations are to visualize your graph on
> @{$graph_viz$TensorBoard}) or to enable the
> @{$guide/debugger$TensorFlow Debugger (tfdbg)}.
Next, we create the `LoggingTensorHook`, passing `tensors_to_log` to the
`tensors` argument. We set `every_n_iter=50`, which specifies that probabilities
should be logged after every 50 steps of training.
### Train the Model
Now we're ready to train our model, which we can do by creating `train_input_fn`
and calling `train()` on `mnist_classifier`. Add the following to `main()`:
```python
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": train_data},
y=train_labels,
batch_size=100,
num_epochs=None,
shuffle=True)
mnist_classifier.train(
input_fn=train_input_fn,
steps=20000,
hooks=[logging_hook])
```
In the `numpy_input_fn` call, we pass the training feature data and labels to
`x` (as a dict) and `y`, respectively. We set a `batch_size` of `100` (which
means that the model will train on minibatches of 100 examples at each step).
`num_epochs=None` means that the model will train until the specified number of
steps is reached. We also set `shuffle=True` to shuffle the training data.
In the `train` call, we set `steps=20000`
(which means the model will train for 20,000 steps total). We pass our
`logging_hook` to the `hooks` argument, so that it will be triggered during
training.
### Evaluate the Model
Once training is complete, we want to evaluate our model to determine its
accuracy on the MNIST test set. We call the `evaluate` method, which evaluates
the metrics we specified in `eval_metric_ops` argument in the `model_fn`.
Add the following to `main()`:
```python
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": eval_data},
y=eval_labels,
num_epochs=1,
shuffle=False)
eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
```
To create `eval_input_fn`, we set `num_epochs=1`, so that the model evaluates
the metrics over one epoch of data and returns the result. We also set
`shuffle=False` to iterate through the data sequentially.
### Run the Model
We've coded the CNN model function, `Estimator`, and the training/evaluation
logic; now let's see the results. Run `cnn_mnist.py`.
> Note: Training CNNs is quite computationally intensive. Estimated completion
> time of `cnn_mnist.py` will vary depending on your processor, but will likely
> be upwards of 1 hour on CPU. To train more quickly, you can decrease the
> number of `steps` passed to `train()`, but note that this will affect accuracy.
As the model trains, you'll see log output like the following:
```python
INFO:tensorflow:loss = 2.36026, step = 1
INFO:tensorflow:probabilities = [[ 0.07722801 0.08618255 0.09256398, ...]]
...
INFO:tensorflow:loss = 2.13119, step = 101
INFO:tensorflow:global_step/sec: 5.44132
...
INFO:tensorflow:Loss for final step: 0.553216.
INFO:tensorflow:Restored model from /tmp/mnist_convnet_model
INFO:tensorflow:Eval steps [0,inf) for training step 20000.
INFO:tensorflow:Input iterator is exhausted.
INFO:tensorflow:Saving evaluation summary for step 20000: accuracy = 0.9733, loss = 0.0902271
{'loss': 0.090227105, 'global_step': 20000, 'accuracy': 0.97329998}
```
Here, we've achieved an accuracy of 97.3% on our test data set.
## Additional Resources
To learn more about TensorFlow Estimators and CNNs in TensorFlow, see the
following resources:
* @{$custom_estimators$Creating Estimators in tf.estimator}
provides an introduction to the TensorFlow Estimator API. It walks through
configuring an Estimator, writing a model function, calculating loss, and
defining a training op.
* @{$deep_cnn} walks through how to build a MNIST CNN classification model
*without estimators* using lower-level TensorFlow operations.

View File

@ -0,0 +1,24 @@
index.md
### Images
layers.md: MNIST
image_recognition.md: Image Recognition
/hub/tutorials/image_retraining.md: Image Retraining
deep_cnn.md
### Sequences
/hub/tutorials/text_classification_with_tf_hub: Text Classification
recurrent.md
seq2seq.md: Neural Machine Translation
recurrent_quickdraw.md: Drawing Classification
audio_recognition.md
### Data Representation
wide.md: Linear Models
wide_and_deep.md: Wide & Deep Learning
word2vec.md
kernel_methods.md: Kernel Methods
### Non-ML
mandelbrot.md
pdes.md

View File

@ -0,0 +1,237 @@
# Large-scale Linear Models with TensorFlow
@{tf.estimator$Estimators} provides (among other things) a rich set of tools for
working with linear models in TensorFlow. This document provides an overview of
those tools. It explains:
* What a linear model is.
* Why you might want to use a linear model.
* How Estimators make it easy to build linear models in TensorFlow.
* How you can use Estimators to combine linear models with.
deep learning to get the advantages of both.
Read this overview to decide whether the Estimator's linear model tools might
be useful to you. Then do the @{$wide$Linear Models tutorial} to
give it a try. This overview uses code samples from the tutorial, but the
tutorial walks through the code in greater detail.
To understand this overview it will help to have some familiarity
with basic machine learning concepts, and also with
@{$premade_estimators$Estimators}.
[TOC]
## What is a linear model?
A **linear model** uses a single weighted sum of features to make a prediction.
For example, if you have [data](https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names)
on age, years of education, and weekly hours of
work for a population, a model can learn weights for each of those numbers so that
their weighted sum estimates a person's salary. You can also use linear models
for classification.
Some linear models transform the weighted sum into a more convenient form. For
example, [**logistic regression**](https://developers.google.com/machine-learning/glossary/#logistic_regression) plugs the weighted sum into the logistic
function to turn the output into a value between 0 and 1. But you still just
have one weight for each input feature.
## Why would you want to use a linear model?
Why would you want to use so simple a model when recent research has
demonstrated the power of more complex neural networks with many layers?
Linear models:
* train quickly, compared to deep neural nets.
* can work well on very large feature sets.
* can be trained with algorithms that don't require a lot of fiddling
with learning rates, etc.
* can be interpreted and debugged more easily than neural nets.
You can examine the weights assigned to each feature to figure out what's
having the biggest impact on a prediction.
* provide an excellent starting point for learning about machine learning.
* are widely used in industry.
## How do Estimators help you build linear models?
You can build a linear model from scratch in TensorFlow without the help of a
special API. But Estimators provides some tools that make it easier to build
effective large-scale linear models.
### Feature columns and transformations
Much of the work of designing a linear model consists of transforming raw data
into suitable input features. Tensorflow uses the `FeatureColumn` abstraction to
enable these transformations.
A `FeatureColumn` represents a single feature in your data. A `FeatureColumn`
may represent a quantity like 'height', or it may represent a category like
'eye_color' where the value is drawn from a set of discrete possibilities like
{'blue', 'brown', 'green'}.
In the case of both *continuous features* like 'height' and *categorical
features* like 'eye_color', a single value in the data might get transformed
into a sequence of numbers before it is input into the model. The
`FeatureColumn` abstraction lets you manipulate the feature as a single
semantic unit in spite of this fact. You can specify transformations and
select features to include without dealing with specific indices in the
tensors you feed into the model.
#### Sparse columns
Categorical features in linear models are typically translated into a sparse
vector in which each possible value has a corresponding index or id. For
example, if there are only three possible eye colors you can represent
'eye_color' as a length 3 vector: 'brown' would become [1, 0, 0], 'blue' would
become [0, 1, 0] and 'green' would become [0, 0, 1]. These vectors are called
"sparse" because they may be very long, with many zeros, when the set of
possible values is very large (such as all English words).
While you don't need to use categorical columns to use the linear model tools
provided by Estimators, one of the strengths of linear models is their ability
to deal with large sparse vectors. Sparse features are a primary use case for
the linear model tools provided by Estimators.
##### Encoding sparse columns
`FeatureColumn` handles the conversion of categorical values into vectors
automatically, with code like this:
```python
eye_color = tf.feature_column.categorical_column_with_vocabulary_list(
"eye_color", vocabulary_list=["blue", "brown", "green"])
```
where `eye_color` is the name of a column in your source data.
You can also generate `FeatureColumn`s for categorical features for which you
don't know all possible values. For this case you would use
`categorical_column_with_hash_bucket()`, which uses a hash function to assign
indices to feature values.
```python
education = tf.feature_column.categorical_column_with_hash_bucket(
"education", hash_bucket_size=1000)
```
##### Feature Crosses
Because linear models assign independent weights to separate features, they
can't learn the relative importance of specific combinations of feature
values. If you have a feature 'favorite_sport' and a feature 'home_city' and
you're trying to predict whether a person likes to wear red, your linear model
won't be able to learn that baseball fans from St. Louis especially like to
wear red.
You can get around this limitation by creating a new feature
'favorite_sport_x_home_city'. The value of this feature for a given person is
just the concatenation of the values of the two source features:
'baseball_x_stlouis', for example. This sort of combination feature is called
a *feature cross*.
The `crossed_column()` method makes it easy to set up feature crosses:
```python
sport_x_city = tf.feature_column.crossed_column(
["sport", "city"], hash_bucket_size=int(1e4))
```
#### Continuous columns
You can specify a continuous feature like so:
```python
age = tf.feature_column.numeric_column("age")
```
Although, as a single real number, a continuous feature can often be input
directly into the model, Tensorflow offers useful transformations for this sort
of column as well.
##### Bucketization
*Bucketization* turns a continuous column into a categorical column. This
transformation lets you use continuous features in feature crosses, or learn
cases where specific value ranges have particular importance.
Bucketization divides the range of possible values into subranges called
buckets:
```python
age_buckets = tf.feature_column.bucketized_column(
age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
```
The bucket into which a value falls becomes the categorical label for
that value.
#### Input function
`FeatureColumn`s provide a specification for the input data for your model,
indicating how to represent and transform the data. But they do not provide
the data itself. You provide the data through an input function.
The input function must return a dictionary of tensors. Each key corresponds to
the name of a `FeatureColumn`. Each key's value is a tensor containing the
values of that feature for all data instances. See
@{$premade_estimators#input_fn} for a
more comprehensive look at input functions, and `input_fn` in the
[linear models tutorial code](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py)
for an example implementation of an input function.
The input function is passed to the `train()` and `evaluate()` calls that
initiate training and testing, as described in the next section.
### Linear estimators
Tensorflow estimator classes provide a unified training and evaluation harness
for regression and classification models. They take care of the details of the
training and evaluation loops and allow the user to focus on model inputs and
architecture.
To build a linear estimator, you can use either the
`tf.estimator.LinearClassifier` estimator or the
`tf.estimator.LinearRegressor` estimator, for classification and
regression respectively.
As with all tensorflow estimators, to run the estimator you just:
1. Instantiate the estimator class. For the two linear estimator classes,
you pass a list of `FeatureColumn`s to the constructor.
2. Call the estimator's `train()` method to train it.
3. Call the estimator's `evaluate()` method to see how it does.
For example:
```python
e = tf.estimator.LinearClassifier(
feature_columns=[
native_country, education, occupation, workclass, marital_status,
race, age_buckets, education_x_occupation,
age_buckets_x_race_x_occupation],
model_dir=YOUR_MODEL_DIRECTORY)
e.train(input_fn=input_fn_train, steps=200)
# Evaluate for one step (one pass through the test data).
results = e.evaluate(input_fn=input_fn_test)
# Print the stats for the evaluation.
for key in sorted(results):
print("%s: %s" % (key, results[key]))
```
### Wide and deep learning
The `tf.estimator` module also provides an estimator class that lets you jointly
train a linear model and a deep neural network. This novel approach combines the
ability of linear models to "memorize" key features with the generalization
ability of neural nets. Use `tf.estimator.DNNLinearCombinedClassifier` to
create this sort of "wide and deep" model:
```python
e = tf.estimator.DNNLinearCombinedClassifier(
model_dir=YOUR_MODEL_DIR,
linear_feature_columns=wide_columns,
dnn_feature_columns=deep_columns,
dnn_hidden_units=[100, 50])
```
For more information, see the @{$wide_and_deep$Wide and Deep Learning tutorial}.

View File

@ -0,0 +1,116 @@
# Mandelbrot Set
Visualizing the [Mandelbrot set](https://en.wikipedia.org/wiki/Mandelbrot_set)
doesn't have anything to do with machine learning, but it makes for a fun
example of how one can use TensorFlow for general mathematics. This is
actually a pretty naive implementation of the visualization, but it makes the
point. (We may end up providing a more elaborate implementation down the line
to produce more truly beautiful images.)
## Basic Setup
We'll need a few imports to get started.
```python
# Import libraries for simulation
import tensorflow as tf
import numpy as np
# Imports for visualization
import PIL.Image
from io import BytesIO
from IPython.display import Image, display
```
Now we'll define a function to actually display the image once we have
iteration counts.
```python
def DisplayFractal(a, fmt='jpeg'):
"""Display an array of iteration counts as a
colorful picture of a fractal."""
a_cyclic = (6.28*a/20.0).reshape(list(a.shape)+[1])
img = np.concatenate([10+20*np.cos(a_cyclic),
30+50*np.sin(a_cyclic),
155-80*np.cos(a_cyclic)], 2)
img[a==a.max()] = 0
a = img
a = np.uint8(np.clip(a, 0, 255))
f = BytesIO()
PIL.Image.fromarray(a).save(f, fmt)
display(Image(data=f.getvalue()))
```
## Session and Variable Initialization
For playing around like this, we often use an interactive session, but a regular
session would work as well.
```python
sess = tf.InteractiveSession()
```
It's handy that we can freely mix NumPy and TensorFlow.
```python
# Use NumPy to create a 2D array of complex numbers
Y, X = np.mgrid[-1.3:1.3:0.005, -2:1:0.005]
Z = X+1j*Y
```
Now we define and initialize TensorFlow tensors.
```python
xs = tf.constant(Z.astype(np.complex64))
zs = tf.Variable(xs)
ns = tf.Variable(tf.zeros_like(xs, tf.float32))
```
TensorFlow requires that you explicitly initialize variables before using them.
```python
tf.global_variables_initializer().run()
```
## Defining and Running the Computation
Now we specify more of the computation...
```python
# Compute the new values of z: z^2 + x
zs_ = zs*zs + xs
# Have we diverged with this new value?
not_diverged = tf.abs(zs_) < 4
# Operation to update the zs and the iteration count.
#
# Note: We keep computing zs after they diverge! This
# is very wasteful! There are better, if a little
# less simple, ways to do this.
#
step = tf.group(
zs.assign(zs_),
ns.assign_add(tf.cast(not_diverged, tf.float32))
)
```
... and run it for a couple hundred steps
```python
for i in range(200): step.run()
```
Let's see what we've got.
```python
DisplayFractal(ns.eval())
```
![jpeg](https://www.tensorflow.org/images/mandelbrot_output.jpg)
Not bad!

View File

@ -0,0 +1,141 @@
# Partial Differential Equations
TensorFlow isn't just for machine learning. Here we give a (somewhat
pedestrian) example of using TensorFlow for simulating the behavior of a
[partial differential equation](
https://en.wikipedia.org/wiki/Partial_differential_equation).
We'll simulate the surface of square pond as a few raindrops land on it.
## Basic Setup
A few imports we'll need.
```python
#Import libraries for simulation
import tensorflow as tf
import numpy as np
#Imports for visualization
import PIL.Image
from io import BytesIO
from IPython.display import clear_output, Image, display
```
A function for displaying the state of the pond's surface as an image.
```python
def DisplayArray(a, fmt='jpeg', rng=[0,1]):
"""Display an array as a picture."""
a = (a - rng[0])/float(rng[1] - rng[0])*255
a = np.uint8(np.clip(a, 0, 255))
f = BytesIO()
PIL.Image.fromarray(a).save(f, fmt)
clear_output(wait = True)
display(Image(data=f.getvalue()))
```
Here we start an interactive TensorFlow session for convenience in playing
around. A regular session would work as well if we were doing this in an
executable .py file.
```python
sess = tf.InteractiveSession()
```
## Computational Convenience Functions
```python
def make_kernel(a):
"""Transform a 2D array into a convolution kernel"""
a = np.asarray(a)
a = a.reshape(list(a.shape) + [1,1])
return tf.constant(a, dtype=1)
def simple_conv(x, k):
"""A simplified 2D convolution operation"""
x = tf.expand_dims(tf.expand_dims(x, 0), -1)
y = tf.nn.depthwise_conv2d(x, k, [1, 1, 1, 1], padding='SAME')
return y[0, :, :, 0]
def laplace(x):
"""Compute the 2D laplacian of an array"""
laplace_k = make_kernel([[0.5, 1.0, 0.5],
[1.0, -6., 1.0],
[0.5, 1.0, 0.5]])
return simple_conv(x, laplace_k)
```
## Define the PDE
Our pond is a perfect 500 x 500 square, as is the case for most ponds found in
nature.
```python
N = 500
```
Here we create our pond and hit it with some rain drops.
```python
# Initial Conditions -- some rain drops hit a pond
# Set everything to zero
u_init = np.zeros([N, N], dtype=np.float32)
ut_init = np.zeros([N, N], dtype=np.float32)
# Some rain drops hit a pond at random points
for n in range(40):
a,b = np.random.randint(0, N, 2)
u_init[a,b] = np.random.uniform()
DisplayArray(u_init, rng=[-0.1, 0.1])
```
![jpeg](https://www.tensorflow.org/images/pde_output_1.jpg)
Now let's specify the details of the differential equation.
```python
# Parameters:
# eps -- time resolution
# damping -- wave damping
eps = tf.placeholder(tf.float32, shape=())
damping = tf.placeholder(tf.float32, shape=())
# Create variables for simulation state
U = tf.Variable(u_init)
Ut = tf.Variable(ut_init)
# Discretized PDE update rules
U_ = U + eps * Ut
Ut_ = Ut + eps * (laplace(U) - damping * Ut)
# Operation to update the state
step = tf.group(
U.assign(U_),
Ut.assign(Ut_))
```
## Run The Simulation
This is where it gets fun -- running time forward with a simple for loop.
```python
# Initialize state to initial conditions
tf.global_variables_initializer().run()
# Run 1000 steps of PDE
for i in range(1000):
# Step simulation
step.run({eps: 0.03, damping: 0.04})
DisplayArray(U.eval(), rng=[-0.1, 0.1])
```
![jpeg](../images/pde_output_2.jpg)
Look! Ripples!

View File

@ -0,0 +1,232 @@
# Recurrent Neural Networks
## Introduction
Take a look at [this great article](https://colah.github.io/posts/2015-08-Understanding-LSTMs/)
for an introduction to recurrent neural networks and LSTMs in particular.
## Language Modeling
In this tutorial we will show how to train a recurrent neural network on
a challenging task of language modeling. The goal of the problem is to fit a
probabilistic model which assigns probabilities to sentences. It does so by
predicting next words in a text given a history of previous words. For this
purpose we will use the [Penn Tree Bank](https://catalog.ldc.upenn.edu/ldc99t42)
(PTB) dataset, which is a popular benchmark for measuring the quality of these
models, whilst being small and relatively fast to train.
Language modeling is key to many interesting problems such as speech
recognition, machine translation, or image captioning. It is also fun --
take a look [here](https://karpathy.github.io/2015/05/21/rnn-effectiveness/).
For the purpose of this tutorial, we will reproduce the results from
[Zaremba et al., 2014](https://arxiv.org/abs/1409.2329)
([pdf](https://arxiv.org/pdf/1409.2329.pdf)), which achieves very good quality
on the PTB dataset.
## Tutorial Files
This tutorial references the following files from `models/tutorials/rnn/ptb` in the [TensorFlow models repo](https://github.com/tensorflow/models):
File | Purpose
--- | ---
`ptb_word_lm.py` | The code to train a language model on the PTB dataset.
`reader.py` | The code to read the dataset.
## Download and Prepare the Data
The data required for this tutorial is in the `data/` directory of the
[PTB dataset from Tomas Mikolov's webpage](http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz).
The dataset is already preprocessed and contains overall 10000 different words,
including the end-of-sentence marker and a special symbol (\<unk\>) for rare
words. In `reader.py`, we convert each word to a unique integer identifier,
in order to make it easy for the neural network to process the data.
## The Model
### LSTM
The core of the model consists of an LSTM cell that processes one word at a
time and computes probabilities of the possible values for the next word in the
sentence. The memory state of the network is initialized with a vector of zeros
and gets updated after reading each word. For computational reasons, we will
process data in mini-batches of size `batch_size`. In this example, it is
important to note that `current_batch_of_words` does not correspond to a
"sentence" of words. Every word in a batch should correspond to a time t.
TensorFlow will automatically sum the gradients of each batch for you.
For example:
```
t=0 t=1 t=2 t=3 t=4
[The, brown, fox, is, quick]
[The, red, fox, jumped, high]
words_in_dataset[0] = [The, The]
words_in_dataset[1] = [brown, red]
words_in_dataset[2] = [fox, fox]
words_in_dataset[3] = [is, jumped]
words_in_dataset[4] = [quick, high]
batch_size = 2, time_steps = 5
```
The basic pseudocode is as follows:
```python
words_in_dataset = tf.placeholder(tf.float32, [time_steps, batch_size, num_features])
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# Initial state of the LSTM memory.
hidden_state = tf.zeros([batch_size, lstm.state_size])
current_state = tf.zeros([batch_size, lstm.state_size])
state = hidden_state, current_state
probabilities = []
loss = 0.0
for current_batch_of_words in words_in_dataset:
# The value of state is updated after processing each batch of words.
output, state = lstm(current_batch_of_words, state)
# The LSTM output can be used to make next word predictions
logits = tf.matmul(output, softmax_w) + softmax_b
probabilities.append(tf.nn.softmax(logits))
loss += loss_function(probabilities, target_words)
```
### Truncated Backpropagation
By design, the output of a recurrent neural network (RNN) depends on arbitrarily
distant inputs. Unfortunately, this makes backpropagation computation difficult.
In order to make the learning process tractable, it is common practice to create
an "unrolled" version of the network, which contains a fixed number
(`num_steps`) of LSTM inputs and outputs. The model is then trained on this
finite approximation of the RNN. This can be implemented by feeding inputs of
length `num_steps` at a time and performing a backward pass after each
such input block.
Here is a simplified block of code for creating a graph which performs
truncated backpropagation:
```python
# Placeholder for the inputs in a given iteration.
words = tf.placeholder(tf.int32, [batch_size, num_steps])
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# Initial state of the LSTM memory.
initial_state = state = tf.zeros([batch_size, lstm.state_size])
for i in range(num_steps):
# The value of state is updated after processing each batch of words.
output, state = lstm(words[:, i], state)
# The rest of the code.
# ...
final_state = state
```
And this is how to implement an iteration over the whole dataset:
```python
# A numpy array holding the state of LSTM after each batch of words.
numpy_state = initial_state.eval()
total_loss = 0.0
for current_batch_of_words in words_in_dataset:
numpy_state, current_loss = session.run([final_state, loss],
# Initialize the LSTM state from the previous iteration.
feed_dict={initial_state: numpy_state, words: current_batch_of_words})
total_loss += current_loss
```
### Inputs
The word IDs will be embedded into a dense representation (see the
@{$word2vec$Vector Representations Tutorial}) before feeding to
the LSTM. This allows the model to efficiently represent the knowledge about
particular words. It is also easy to write:
```python
# embedding_matrix is a tensor of shape [vocabulary_size, embedding size]
word_embeddings = tf.nn.embedding_lookup(embedding_matrix, word_ids)
```
The embedding matrix will be initialized randomly and the model will learn to
differentiate the meaning of words just by looking at the data.
### Loss Function
We want to minimize the average negative log probability of the target words:
$$ \text{loss} = -\frac{1}{N}\sum_{i=1}^{N} \ln p_{\text{target}_i} $$
It is not very difficult to implement but the function
`sequence_loss_by_example` is already available, so we can just use it here.
The typical measure reported in the papers is average per-word perplexity (often
just called perplexity), which is equal to
$$e^{-\frac{1}{N}\sum_{i=1}^{N} \ln p_{\text{target}_i}} = e^{\text{loss}} $$
and we will monitor its value throughout the training process.
### Stacking multiple LSTMs
To give the model more expressive power, we can add multiple layers of LSTMs
to process the data. The output of the first layer will become the input of
the second and so on.
We have a class called `MultiRNNCell` that makes the implementation seamless:
```python
def lstm_cell():
return tf.contrib.rnn.BasicLSTMCell(lstm_size)
stacked_lstm = tf.contrib.rnn.MultiRNNCell(
[lstm_cell() for _ in range(number_of_layers)])
initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32)
for i in range(num_steps):
# The value of state is updated after processing each batch of words.
output, state = stacked_lstm(words[:, i], state)
# The rest of the code.
# ...
final_state = state
```
## Run the Code
Before running the code, download the PTB dataset, as discussed at the beginning
of this tutorial. Then, extract the PTB dataset underneath your home directory
as follows:
```bsh
tar xvfz simple-examples.tgz -C $HOME
```
_(Note: On Windows, you may need to use
[other tools](https://wiki.haskell.org/How_to_unpack_a_tar_file_in_Windows).)_
Now, clone the [TensorFlow models repo](https://github.com/tensorflow/models)
from GitHub. Run the following commands:
```bsh
cd models/tutorials/rnn/ptb
python ptb_word_lm.py --data_path=$HOME/simple-examples/data/ --model=small
```
There are 3 supported model configurations in the tutorial code: "small",
"medium" and "large". The difference between them is in size of the LSTMs and
the set of hyperparameters used for training.
The larger the model, the better results it should get. The `small` model should
be able to reach perplexity below 120 on the test set and the `large` one below
80, though it might take several hours to train.
## What Next?
There are several tricks that we haven't mentioned that make the model better,
including:
* decreasing learning rate schedule,
* dropout between the LSTM layers.
Study the code and modify it to improve the model even further.

View File

@ -0,0 +1,411 @@
# Recurrent Neural Networks for Drawing Classification
[Quick, Draw!]: http://quickdraw.withgoogle.com
[Quick, Draw!] is a game where a player is challenged to draw a number of
objects and see if a computer can recognize the drawing.
The recognition in [Quick, Draw!] is performed by a classifier that takes the
user input, given as a sequence of strokes of points in x and y, and recognizes
the object category that the user tried to draw.
In this tutorial we'll show how to build an RNN-based recognizer for this
problem. The model will use a combination of convolutional layers, LSTM layers,
and a softmax output layer to classify the drawings:
<center> ![RNN model structure](../images/quickdraw_model.png) </center>
The figure above shows the structure of the model that we will build in this
tutorial. The input is a drawing that is encoded as a sequence of strokes of
points in x, y, and n, where n indicates whether a the point is the first point
in a new stroke.
Then, a series of 1-dimensional convolutions is applied. Then LSTM layers are
applied and the sum of the outputs of all LSTM steps is fed into a softmax layer
to make a classification decision among the classes of drawings that we know.
This tutorial uses the data from actual [Quick, Draw!] games [that is publicly
available](https://quickdraw.withgoogle.com/data). This dataset contains of 50M
drawings in 345 categories.
## Run the tutorial code
To try the code for this tutorial:
1. @{$install$Install TensorFlow} if you haven't already.
1. Download the [tutorial code]
(https://github.com/tensorflow/models/tree/master/tutorials/rnn/quickdraw/train_model.py).
1. [Download the data](#download-the-data) in `TFRecord` format from
[here](http://download.tensorflow.org/data/quickdraw_tutorial_dataset_v1.tar.gz) and unzip it. More details about [how to
obtain the original Quick, Draw!
data](#optional_download_the_full_quick_draw_data) and [how to convert that
to `TFRecord` files](#optional_converting_the_data) is available below.
1. Execute the tutorial code with the following command to train the RNN-based
model described in this tutorial. Make sure to adjust the paths to point to
the unzipped data from the download in step 3.
```shell
python train_model.py \
--training_data=rnn_tutorial_data/training.tfrecord-?????-of-????? \
--eval_data=rnn_tutorial_data/eval.tfrecord-?????-of-????? \
--classes_file=rnn_tutorial_data/training.tfrecord.classes
```
## Tutorial details
### Download the data
We make the data that we use in this tutorial available as `TFRecord` files
containing `TFExamples`. You can download the data from here:
http://download.tensorflow.org/data/quickdraw_tutorial_dataset_v1.tar.gz
Alternatively you can download the original data in `ndjson` format from the
Google cloud and convert it to the `TFRecord` files containing `TFExamples`
yourself as described in the next section.
### Optional: Download the full Quick Draw Data
The full [Quick, Draw!](https://quickdraw.withgoogle.com)
[dataset](https://quickdraw.withgoogle.com/data) is available on Google Cloud
Storage as [ndjson](http://ndjson.org/) files separated by category. You can
[browse the list of files in Cloud
Console](https://console.cloud.google.com/storage/quickdraw_dataset).
To download the data we recommend using
[gsutil](https://cloud.google.com/storage/docs/gsutil_install#install) to
download the entire dataset. Note that the original .ndjson files require
downloading ~22GB.
Then use the following command to check that your gsutil installation works and
that you can access the data bucket:
```shell
gsutil ls -r "gs://quickdraw_dataset/full/simplified/*"
```
which will output a long list of files like the following:
```shell
gs://quickdraw_dataset/full/simplified/The Eiffel Tower.ndjson
gs://quickdraw_dataset/full/simplified/The Great Wall of China.ndjson
gs://quickdraw_dataset/full/simplified/The Mona Lisa.ndjson
gs://quickdraw_dataset/full/simplified/aircraft carrier.ndjson
...
```
Then create a folder and download the dataset there.
```shell
mkdir rnn_tutorial_data
cd rnn_tutorial_data
gsutil -m cp "gs://quickdraw_dataset/full/simplified/*" .
```
This download will take a while and download a bit more than 23GB of data.
### Optional: Converting the data
To convert the `ndjson` files to
@{$python/python_io#TFRecords_Format_Details$TFRecord} files containing
[`tf.train.Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
protos run the following command.
```shell
python create_dataset.py --ndjson_path rnn_tutorial_data \
--output_path rnn_tutorial_data
```
This will store the data in 10 shards of
@{$python/python_io#TFRecords_Format_Details$TFRecord} files with 10000 items
per class for the training data and 1000 items per class as eval data.
This conversion process is described in more detail in the following.
The original QuickDraw data is formatted as `ndjson` files where each line
contains a JSON object like the following:
```json
{"word":"cat",
"countrycode":"VE",
"timestamp":"2017-03-02 23:25:10.07453 UTC",
"recognized":true,
"key_id":"5201136883597312",
"drawing":[
[
[130,113,99,109,76,64,55,48,48,51,59,86,133,154,170,203,214,217,215,208,186,176,162,157,132],
[72,40,27,79,82,88,100,120,134,152,165,184,189,186,179,152,131,114,100,89,76,0,31,65,70]
],[
[76,28,7],
[136,128,128]
],[
[76,23,0],
[160,164,175]
],[
[87,52,37],
[175,191,204]
],[
[174,220,246,251],
[134,132,136,139]
],[
[175,255],
[147,168]
],[
[171,208,215],
[164,198,210]
],[
[130,110,108,111,130,139,139,119],
[129,134,137,144,148,144,136,130]
],[
[107,106],
[96,113]
]
]
}
```
For our purpose of building a classifier we only care about the fields "`word`"
and "`drawing`". While parsing the ndjson files, we process them line by line
using a function that converts the strokes from the `drawing` field into a
tensor of size `[number of points, 3]` containing the differences of consecutive
points. This function also returns the class name as a string.
```python
def parse_line(ndjson_line):
"""Parse an ndjson line and return ink (as np array) and classname."""
sample = json.loads(ndjson_line)
class_name = sample["word"]
inkarray = sample["drawing"]
stroke_lengths = [len(stroke[0]) for stroke in inkarray]
total_points = sum(stroke_lengths)
np_ink = np.zeros((total_points, 3), dtype=np.float32)
current_t = 0
for stroke in inkarray:
for i in [0, 1]:
np_ink[current_t:(current_t + len(stroke[0])), i] = stroke[i]
current_t += len(stroke[0])
np_ink[current_t - 1, 2] = 1 # stroke_end
# Preprocessing.
# 1. Size normalization.
lower = np.min(np_ink[:, 0:2], axis=0)
upper = np.max(np_ink[:, 0:2], axis=0)
scale = upper - lower
scale[scale == 0] = 1
np_ink[:, 0:2] = (np_ink[:, 0:2] - lower) / scale
# 2. Compute deltas.
np_ink = np_ink[1:, 0:2] - np_ink[0:-1, 0:2]
return np_ink, class_name
```
Since we want the data to be shuffled for writing we read from each of the
category files in random order and write to a random shard.
For the training data we read the first 10000 items for each class and for the
eval data we read the next 1000 items for each class.
This data is then reformatted into a tensor of shape `[num_training_samples,
max_length, 3]`. Then we determine the bounding box of the original drawing in
screen coordinates and normalize the size such that the drawing has unit height.
<center> ![Size normalization](../images/quickdraw_sizenormalization.png) </center>
Finally, we compute the differences between consecutive points and store these
as a `VarLenFeature` in a
[tensorflow.Example](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
under the key `ink`. In addition we store the `class_index` as a single entry
`FixedLengthFeature` and the `shape` of the `ink` as a `FixedLengthFeature` of
length 2.
### Defining the model
To define the model we create a new `Estimator`. If you want to read more about
estimators, we recommend @{$custom_estimators$this tutorial}.
To build the model, we:
1. reshape the input back into the original shape - where the mini batch is
padded to the maximal length of its contents. In addition to the ink data we
also have the lengths for each example and the target class. This happens in
the function [`_get_input_tensors`](#-get-input-tensors).
1. pass the input through to a series of convolution layers in
[`_add_conv_layers`](#-add-conv-layers).
1. pass the output of the convolutions into a series of bidirectional LSTM
layers in [`_add_rnn_layers`](#-add-rnn-layers). At the end of that, the
outputs for each time step are summed up to have a compact, fixed length
embedding of the input.
1. classify this embedding using a softmax layer in
[`_add_fc_layers`](#-add-fc-layers).
In code this looks like:
```python
inks, lengths, targets = _get_input_tensors(features, targets)
convolved = _add_conv_layers(inks)
final_state = _add_rnn_layers(convolved, lengths)
logits =_add_fc_layers(final_state)
```
### _get_input_tensors
To obtain the input features we first obtain the shape from the features dict
and then create a 1D tensor of size `[batch_size]` containing the lengths of the
input sequences. The ink is stored as a SparseTensor in the features dict which
we convert into a dense tensor and then reshape to be `[batch_size, ?, 3]`. And
finally, if targets were passed in we make sure they are stored as a 1D tensor
of size `[batch_size]`
In code this looks like this:
```python
shapes = features["shape"]
lengths = tf.squeeze(
tf.slice(shapes, begin=[0, 0], size=[params["batch_size"], 1]))
inks = tf.reshape(
tf.sparse_tensor_to_dense(features["ink"]),
[params["batch_size"], -1, 3])
if targets is not None:
targets = tf.squeeze(targets)
```
### _add_conv_layers
The desired number of convolution layers and the lengths of the filters is
configured through the parameters `num_conv` and `conv_len` in the `params`
dict.
The input is a sequence where each point has dimensionality 3. We are going to
use 1D convolutions where we treat the 3 input features as channels. That means
that the input is a `[batch_size, length, 3]` tensor and the output will be a
`[batch_size, length, number_of_filters]` tensor.
```python
convolved = inks
for i in range(len(params.num_conv)):
convolved_input = convolved
if params.batch_norm:
convolved_input = tf.layers.batch_normalization(
convolved_input,
training=(mode == tf.estimator.ModeKeys.TRAIN))
# Add dropout layer if enabled and not first convolution layer.
if i > 0 and params.dropout:
convolved_input = tf.layers.dropout(
convolved_input,
rate=params.dropout,
training=(mode == tf.estimator.ModeKeys.TRAIN))
convolved = tf.layers.conv1d(
convolved_input,
filters=params.num_conv[i],
kernel_size=params.conv_len[i],
activation=None,
strides=1,
padding="same",
name="conv1d_%d" % i)
return convolved, lengths
```
### _add_rnn_layers
We pass the output from the convolutions into bidirectional LSTM layers for
which we use a helper function from contrib.
```python
outputs, _, _ = contrib_rnn.stack_bidirectional_dynamic_rnn(
cells_fw=[cell(params.num_nodes) for _ in range(params.num_layers)],
cells_bw=[cell(params.num_nodes) for _ in range(params.num_layers)],
inputs=convolved,
sequence_length=lengths,
dtype=tf.float32,
scope="rnn_classification")
```
see the code for more details and how to use `CUDA` accelerated implementations.
To create a compact, fixed-length embedding, we sum up the output of the LSTMs.
We first zero out the regions of the batch where the sequences have no data.
```python
mask = tf.tile(
tf.expand_dims(tf.sequence_mask(lengths, tf.shape(outputs)[1]), 2),
[1, 1, tf.shape(outputs)[2]])
zero_outside = tf.where(mask, outputs, tf.zeros_like(outputs))
outputs = tf.reduce_sum(zero_outside, axis=1)
```
### _add_fc_layers
The embedding of the input is passed into a fully connected layer which we then
use as a softmax layer.
```python
tf.layers.dense(final_state, params.num_classes)
```
### Loss, predictions, and optimizer
Finally, we need to add a loss, a training op, and predictions to create the
`ModelFn`:
```python
cross_entropy = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=targets, logits=logits))
# Add the optimizer.
train_op = tf.contrib.layers.optimize_loss(
loss=cross_entropy,
global_step=tf.train.get_global_step(),
learning_rate=params.learning_rate,
optimizer="Adam",
# some gradient clipping stabilizes training in the beginning.
clip_gradients=params.gradient_clipping_norm,
summaries=["learning_rate", "loss", "gradients", "gradient_norm"])
predictions = tf.argmax(logits, axis=1)
return model_fn_lib.ModelFnOps(
mode=mode,
predictions={"logits": logits,
"predictions": predictions},
loss=cross_entropy,
train_op=train_op,
eval_metric_ops={"accuracy": tf.metrics.accuracy(targets, predictions)})
```
### Training and evaluating the model
To train and evaluate the model we can rely on the functionalities of the
`Estimator` APIs and easily run training and evaluation with the `Experiment`
APIs:
```python
estimator = tf.estimator.Estimator(
model_fn=model_fn,
model_dir=output_dir,
config=config,
params=model_params)
# Train the model.
tf.contrib.learn.Experiment(
estimator=estimator,
train_input_fn=get_input_fn(
mode=tf.contrib.learn.ModeKeys.TRAIN,
tfrecord_pattern=FLAGS.training_data,
batch_size=FLAGS.batch_size),
train_steps=FLAGS.steps,
eval_input_fn=get_input_fn(
mode=tf.contrib.learn.ModeKeys.EVAL,
tfrecord_pattern=FLAGS.eval_data,
batch_size=FLAGS.batch_size),
min_eval_frequency=1000)
```
Note that this tutorial is just a quick example on a relatively small dataset to
get you familiar with the APIs of recurrent neural networks and estimators. Such
models can be even more powerful if you try them on a large dataset.
When training the model for 1M steps you can expect to get an accuracy of
approximately of approximately 70% on the top-1 candidate. Note that this
accuracy is sufficient to build the quickdraw game because of the game dynamics
the user will be able to adjust their drawing until it is ready. Also, the game
does not use the top-1 candidate only but accepts a drawing as correct if the
target category shows up with a score better than a fixed threshold.

View File

@ -23,7 +23,7 @@ straight in, feel free to look at the minimalistic implementation in
This basic example contains the code needed to download some data, train on it a
bit and visualize the result. Once you get comfortable with reading and running
the basic version, you can graduate to
[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py)
[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py)
which is a more serious implementation that showcases some more advanced
TensorFlow principles about how to efficiently use threads to move data into a
text model, how to checkpoint during training, etc.
@ -341,7 +341,7 @@ t-SNE.
Et voila! As expected, words that are similar end up clustering nearby each
other. For a more heavyweight implementation of word2vec that showcases more of
the advanced features of TensorFlow, see the implementation in
[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py).
[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py).
## Evaluating Embeddings: Analogical Reasoning
@ -357,7 +357,7 @@ Download the dataset for this task from
To see how we do this evaluation, have a look at the `build_eval_graph()` and
`eval()` functions in
[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py).
[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py).
The choice of hyperparameters can strongly influence the accuracy on this task.
To achieve state-of-the-art performance on this task requires training over a
@ -385,13 +385,13 @@ your model is seriously bottlenecked on input data, you may want to implement a
custom data reader for your problem, as described in
@{$new_data_formats$New Data Formats}. For the case of Skip-Gram
modeling, we've actually already done this for you as an example in
[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py).
[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py).
If your model is no longer I/O bound but you want still more performance, you
can take things further by writing your own TensorFlow Ops, as described in
@{$adding_an_op$Adding a New Op}. Again we've provided an
example of this for the Skip-Gram case
[models/tutorials/embedding/word2vec_optimized.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec_optimized.py).
[models/tutorials/embedding/word2vec_optimized.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec_optimized.py).
Feel free to benchmark these against each other to measure performance
improvements at each stage.

View File

@ -0,0 +1,5 @@
# Sequence-to-Sequence Models
Please check out the
[tensorflow neural machine translation tutorial](https://github.com/tensorflow/nmt)
for building sequence-to-sequence models with the latest Tensorflow API.

View File

@ -0,0 +1,461 @@
# TensorFlow Linear Model Tutorial
In this tutorial, we will use the tf.estimator API in TensorFlow to solve a
binary classification problem: Given census data about a person such as age,
education, marital status, and occupation (the features), we will try to predict
whether or not the person earns more than 50,000 dollars a year (the target
label). We will train a **logistic regression** model, and given an individual's
information our model will output a number between 0 and 1, which can be
interpreted as the probability that the individual has an annual income of over
50,000 dollars.
## Setup
To try the code for this tutorial:
1. @{$install$Install TensorFlow} if you haven't already.
2. Download [the tutorial code](https://github.com/tensorflow/models/tree/master/official/wide_deep/).
3. Execute the data download script we provide to you:
$ python data_download.py
4. Execute the tutorial code with the following command to train the linear
model described in this tutorial:
$ python wide_deep.py --model_type=wide
Read on to find out how this code builds its linear model.
## Reading The Census Data
The dataset we'll be using is the
[Census Income Dataset](https://archive.ics.uci.edu/ml/datasets/Census+Income).
We have provided
[data_download.py](https://github.com/tensorflow/models/tree/master/official/wide_deep/data_download.py)
which downloads the code and performs some additional cleanup.
Since the task is a binary classification problem, we'll construct a label
column named "label" whose value is 1 if the income is over 50K, and 0
otherwise. For reference, see `input_fn` in
[wide_deep.py](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py).
Next, let's take a look at the dataframe and see which columns we can use to
predict the target label. The columns can be grouped into two types—categorical
and continuous columns:
* A column is called **categorical** if its value can only be one of the
categories in a finite set. For example, the relationship status of a person
(wife, husband, unmarried, etc.) or the education level (high school,
college, etc.) are categorical columns.
* A column is called **continuous** if its value can be any numerical value in
a continuous range. For example, the capital gain of a person (e.g. $14,084)
is a continuous column.
Here's a list of columns available in the Census Income dataset:
| Column Name | Type | Description |
| -------------- | ----------- | --------------------------------- |
| age | Continuous | The age of the individual |
| workclass | Categorical | The type of employer the |
: : : individual has (government, :
: : : military, private, etc.). :
| fnlwgt | Continuous | The number of people the census |
: : : takers believe that observation :
: : : represents (sample weight). Final :
: : : weight will not be used. :
| education | Categorical | The highest level of education |
: : : achieved for that individual. :
| education_num | Continuous | The highest level of education in |
: : : numerical form. :
| marital_status | Categorical | Marital status of the individual. |
| occupation | Categorical | The occupation of the individual. |
| relationship | Categorical | Wife, Own-child, Husband, |
: : : Not-in-family, Other-relative, :
: : : Unmarried. :
| race | Categorical | Amer-Indian-Eskimo, Asian-Pac- |
: : : Islander, Black, White, Other. :
| gender | Categorical | Female, Male. |
| capital_gain | Continuous | Capital gains recorded. |
| capital_loss | Continuous | Capital Losses recorded. |
| hours_per_week | Continuous | Hours worked per week. |
| native_country | Categorical | Country of origin of the |
: : : individual. :
| income_bracket | Categorical | ">50K" or "<=50K", meaning |
: : : whether the person makes more :
: : : than $50,000 annually. :
## Converting Data into Tensors
When building a tf.estimator model, the input data is specified by means of an
Input Builder function. This builder function will not be called until it is
later passed to tf.estimator.Estimator methods such as `train` and `evaluate`.
The purpose of this function is to construct the input data, which is
represented in the form of @{tf.Tensor}s or @{tf.SparseTensor}s.
In more detail, the input builder function returns the following as a pair:
1. `features`: A dict from feature column names to `Tensors` or
`SparseTensors`.
2. `labels`: A `Tensor` containing the label column.
The keys of the `features` will be used to construct columns in the next
section. Because we want to call the `train` and `evaluate` methods with
different data, we define a method that returns an input function based on the
given data. Note that the returned input function will be called while
constructing the TensorFlow graph, not while running the graph. What it is
returning is a representation of the input data as the fundamental unit of
TensorFlow computations, a `Tensor` (or `SparseTensor`).
Each continuous column in the train or test data will be converted into a
`Tensor`, which in general is a good format to represent dense data. For
categorical data, we must represent the data as a `SparseTensor`. This data
format is good for representing sparse data. Our `input_fn` uses the `tf.data`
API, which makes it easy to apply transformations to our dataset:
```python
def input_fn(data_file, num_epochs, shuffle, batch_size):
"""Generate an input function for the Estimator."""
assert tf.gfile.Exists(data_file), (
'%s not found. Please make sure you have either run data_download.py or '
'set both arguments --train_data and --test_data.' % data_file)
def parse_csv(value):
print('Parsing', data_file)
columns = tf.decode_csv(value, record_defaults=_CSV_COLUMN_DEFAULTS)
features = dict(zip(_CSV_COLUMNS, columns))
labels = features.pop('income_bracket')
return features, tf.equal(labels, '>50K')
# Extract lines from input files using the Dataset API.
dataset = tf.data.TextLineDataset(data_file)
if shuffle:
dataset = dataset.shuffle(buffer_size=_SHUFFLE_BUFFER)
dataset = dataset.map(parse_csv, num_parallel_calls=5)
# We call repeat after shuffling, rather than before, to prevent separate
# epochs from blending together.
dataset = dataset.repeat(num_epochs)
dataset = dataset.batch(batch_size)
iterator = dataset.make_one_shot_iterator()
features, labels = iterator.get_next()
return features, labels
```
## Selecting and Engineering Features for the Model
Selecting and crafting the right set of feature columns is key to learning an
effective model. A **feature column** can be either one of the raw columns in
the original dataframe (let's call them **base feature columns**), or any new
columns created based on some transformations defined over one or multiple base
columns (let's call them **derived feature columns**). Basically, "feature
column" is an abstract concept of any raw or derived variable that can be used
to predict the target label.
### Base Categorical Feature Columns
To define a feature column for a categorical feature, we can create a
`CategoricalColumn` using the tf.feature_column API. If you know the set of all
possible feature values of a column and there are only a few of them, you can
use `categorical_column_with_vocabulary_list`. Each key in the list will get
assigned an auto-incremental ID starting from 0. For example, for the
`relationship` column we can assign the feature string "Husband" to an integer
ID of 0 and "Not-in-family" to 1, etc., by doing:
```python
relationship = tf.feature_column.categorical_column_with_vocabulary_list(
'relationship', [
'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',
'Other-relative'])
```
What if we don't know the set of possible values in advance? Not a problem. We
can use `categorical_column_with_hash_bucket` instead:
```python
occupation = tf.feature_column.categorical_column_with_hash_bucket(
'occupation', hash_bucket_size=1000)
```
What will happen is that each possible value in the feature column `occupation`
will be hashed to an integer ID as we encounter them in training. See an example
illustration below:
ID | Feature
--- | -------------
... |
9 | `"Machine-op-inspct"`
... |
103 | `"Farming-fishing"`
... |
375 | `"Protective-serv"`
... |
No matter which way we choose to define a `SparseColumn`, each feature string
will be mapped into an integer ID by looking up a fixed mapping or by hashing.
Note that hashing collisions are possible, but may not significantly impact the
model quality. Under the hood, the `LinearModel` class is responsible for
managing the mapping and creating `tf.Variable` to store the model parameters
(also known as model weights) for each feature ID. The model parameters will be
learned through the model training process we'll go through later.
We'll do the similar trick to define the other categorical features:
```python
education = tf.feature_column.categorical_column_with_vocabulary_list(
'education', [
'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',
'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',
'5th-6th', '10th', '1st-4th', 'Preschool', '12th'])
marital_status = tf.feature_column.categorical_column_with_vocabulary_list(
'marital_status', [
'Married-civ-spouse', 'Divorced', 'Married-spouse-absent',
'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed'])
relationship = tf.feature_column.categorical_column_with_vocabulary_list(
'relationship', [
'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',
'Other-relative'])
workclass = tf.feature_column.categorical_column_with_vocabulary_list(
'workclass', [
'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',
'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])
# To show an example of hashing:
occupation = tf.feature_column.categorical_column_with_hash_bucket(
'occupation', hash_bucket_size=1000)
```
### Base Continuous Feature Columns
Similarly, we can define a `NumericColumn` for each continuous feature column
that we want to use in the model:
```python
age = tf.feature_column.numeric_column('age')
education_num = tf.feature_column.numeric_column('education_num')
capital_gain = tf.feature_column.numeric_column('capital_gain')
capital_loss = tf.feature_column.numeric_column('capital_loss')
hours_per_week = tf.feature_column.numeric_column('hours_per_week')
```
### Making Continuous Features Categorical through Bucketization
Sometimes the relationship between a continuous feature and the label is not
linear. As a hypothetical example, a person's income may grow with age in the
early stage of one's career, then the growth may slow at some point, and finally
the income decreases after retirement. In this scenario, using the raw `age` as
a real-valued feature column might not be a good choice because the model can
only learn one of the three cases:
1. Income always increases at some rate as age grows (positive correlation),
1. Income always decreases at some rate as age grows (negative correlation), or
1. Income stays the same no matter at what age (no correlation)
If we want to learn the fine-grained correlation between income and each age
group separately, we can leverage **bucketization**. Bucketization is a process
of dividing the entire range of a continuous feature into a set of consecutive
bins/buckets, and then converting the original numerical feature into a bucket
ID (as a categorical feature) depending on which bucket that value falls into.
So, we can define a `bucketized_column` over `age` as:
```python
age_buckets = tf.feature_column.bucketized_column(
age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
```
where the `boundaries` is a list of bucket boundaries. In this case, there are
10 boundaries, resulting in 11 age group buckets (from age 17 and below, 18-24,
25-29, ..., to 65 and over).
### Intersecting Multiple Columns with CrossedColumn
Using each base feature column separately may not be enough to explain the data.
For example, the correlation between education and the label (earning > 50,000
dollars) may be different for different occupations. Therefore, if we only learn
a single model weight for `education="Bachelors"` and `education="Masters"`, we
won't be able to capture every single education-occupation combination (e.g.
distinguishing between `education="Bachelors" AND occupation="Exec-managerial"`
and `education="Bachelors" AND occupation="Craft-repair"`). To learn the
differences between different feature combinations, we can add **crossed feature
columns** to the model.
```python
education_x_occupation = tf.feature_column.crossed_column(
['education', 'occupation'], hash_bucket_size=1000)
```
We can also create a `CrossedColumn` over more than two columns. Each
constituent column can be either a base feature column that is categorical
(`SparseColumn`), a bucketized real-valued feature column (`BucketizedColumn`),
or even another `CrossColumn`. Here's an example:
```python
age_buckets_x_education_x_occupation = tf.feature_column.crossed_column(
[age_buckets, 'education', 'occupation'], hash_bucket_size=1000)
```
## Defining The Logistic Regression Model
After processing the input data and defining all the feature columns, we're now
ready to put them all together and build a Logistic Regression model. In the
previous section we've seen several types of base and derived feature columns,
including:
* `CategoricalColumn`
* `NumericColumn`
* `BucketizedColumn`
* `CrossedColumn`
All of these are subclasses of the abstract `FeatureColumn` class, and can be
added to the `feature_columns` field of a model:
```python
base_columns = [
education, marital_status, relationship, workclass, occupation,
age_buckets,
]
crossed_columns = [
tf.feature_column.crossed_column(
['education', 'occupation'], hash_bucket_size=1000),
tf.feature_column.crossed_column(
[age_buckets, 'education', 'occupation'], hash_bucket_size=1000),
]
model_dir = tempfile.mkdtemp()
model = tf.estimator.LinearClassifier(
model_dir=model_dir, feature_columns=base_columns + crossed_columns)
```
The model also automatically learns a bias term, which controls the prediction
one would make without observing any features (see the section "How Logistic
Regression Works" for more explanations). The learned model files will be stored
in `model_dir`.
## Training and Evaluating Our Model
After adding all the features to the model, now let's look at how to actually
train the model. Training a model is just a single command using the
tf.estimator API:
```python
model.train(input_fn=lambda: input_fn(train_data, num_epochs, True, batch_size))
```
After the model is trained, we can evaluate how good our model is at predicting
the labels of the holdout data:
```python
results = model.evaluate(input_fn=lambda: input_fn(
test_data, 1, False, batch_size))
for key in sorted(results):
print('%s: %s' % (key, results[key]))
```
The first line of the final output should be something like
`accuracy: 0.83557522`, which means the accuracy is 83.6%. Feel free to try more
features and transformations and see if you can do even better!
After the model is evaluated, we can use the model to predict whether an individual has an annual income of over
50,000 dollars given an individual's information input.
```python
pred_iter = model.predict(input_fn=lambda: input_fn(FLAGS.test_data, 1, False, 1))
for pred in pred_iter:
print(pred['classes'])
```
The model prediction output would be like `[b'1']` or `[b'0']` which means whether corresponding individual has an annual income of over 50,000 dollars or not.
If you'd like to see a working end-to-end example, you can download our
[example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py)
and set the `model_type` flag to `wide`.
## Adding Regularization to Prevent Overfitting
Regularization is a technique used to avoid **overfitting**. Overfitting happens
when your model does well on the data it is trained on, but worse on test data
that the model has not seen before, such as live traffic. Overfitting generally
occurs when a model is excessively complex, such as having too many parameters
relative to the number of observed training data. Regularization allows for you
to control your model's complexity and makes the model more generalizable to
unseen data.
In the Linear Model library, you can add L1 and L2 regularizations to the model
as:
```
model = tf.estimator.LinearClassifier(
model_dir=model_dir, feature_columns=base_columns + crossed_columns,
optimizer=tf.train.FtrlOptimizer(
learning_rate=0.1,
l1_regularization_strength=1.0,
l2_regularization_strength=1.0))
```
One important difference between L1 and L2 regularization is that L1
regularization tends to make model weights stay at zero, creating sparser
models, whereas L2 regularization also tries to make the model weights closer to
zero but not necessarily zero. Therefore, if you increase the strength of L1
regularization, you will have a smaller model size because many of the model
weights will be zero. This is often desirable when the feature space is very
large but sparse, and when there are resource constraints that prevent you from
serving a model that is too large.
In practice, you should try various combinations of L1, L2 regularization
strengths and find the best parameters that best control overfitting and give
you a desirable model size.
## How Logistic Regression Works
Finally, let's take a minute to talk about what the Logistic Regression model
actually looks like in case you're not already familiar with it. We'll denote
the label as \\(Y\\), and the set of observed features as a feature vector
\\(\mathbf{x}=[x_1, x_2, ..., x_d]\\). We define \\(Y=1\\) if an individual
earned > 50,000 dollars and \\(Y=0\\) otherwise. In Logistic Regression, the
probability of the label being positive (\\(Y=1\\)) given the features
\\(\mathbf{x}\\) is given as:
$$ P(Y=1|\mathbf{x}) = \frac{1}{1+\exp(-(\mathbf{w}^T\mathbf{x}+b))}$$
where \\(\mathbf{w}=[w_1, w_2, ..., w_d]\\) are the model weights for the
features \\(\mathbf{x}=[x_1, x_2, ..., x_d]\\). \\(b\\) is a constant that is
often called the **bias** of the model. The equation consists of two parts—A
linear model and a logistic function:
* **Linear Model**: First, we can see that \\(\mathbf{w}^T\mathbf{x}+b = b +
w_1x_1 + ... +w_dx_d\\) is a linear model where the output is a linear
function of the input features \\(\mathbf{x}\\). The bias \\(b\\) is the
prediction one would make without observing any features. The model weight
\\(w_i\\) reflects how the feature \\(x_i\\) is correlated with the positive
label. If \\(x_i\\) is positively correlated with the positive label, the
weight \\(w_i\\) increases, and the probability \\(P(Y=1|\mathbf{x})\\) will
be closer to 1. On the other hand, if \\(x_i\\) is negatively correlated
with the positive label, then the weight \\(w_i\\) decreases and the
probability \\(P(Y=1|\mathbf{x})\\) will be closer to 0.
* **Logistic Function**: Second, we can see that there's a logistic function
(also known as the sigmoid function) \\(S(t) = 1/(1+\exp(-t))\\) being
applied to the linear model. The logistic function is used to convert the
output of the linear model \\(\mathbf{w}^T\mathbf{x}+b\\) from any real
number into the range of \\([0, 1]\\), which can be interpreted as a
probability.
Model training is an optimization problem: The goal is to find a set of model
weights (i.e. model parameters) to minimize a **loss function** defined over the
training data, such as logistic loss for Logistic Regression models. The loss
function measures the discrepancy between the ground-truth label and the model's
prediction. If the prediction is very close to the ground-truth label, the loss
value will be low; if the prediction is very far from the label, then the loss
value would be high.
## Learn Deeper
If you're interested in learning more, check out our
@{$wide_and_deep$Wide & Deep Learning Tutorial} where we'll show you how to
combine the strengths of linear models and deep neural networks by jointly
training them using the tf.estimator API.

View File

@ -0,0 +1,243 @@
# TensorFlow Wide & Deep Learning Tutorial
In the previous @{$wide$TensorFlow Linear Model Tutorial}, we trained a logistic
regression model to predict the probability that the individual has an annual
income of over 50,000 dollars using the
[Census Income Dataset](https://archive.ics.uci.edu/ml/datasets/Census+Income).
TensorFlow is great for training deep neural networks too, and you might be
thinking which one you should choose—well, why not both? Would it be possible to
combine the strengths of both in one model?
In this tutorial, we'll introduce how to use the tf.estimator API to jointly
train a wide linear model and a deep feed-forward neural network. This approach
combines the strengths of memorization and generalization. It's useful for
generic large-scale regression and classification problems with sparse input
features (e.g., categorical features with a large number of possible feature
values). If you're interested in learning more about how Wide & Deep Learning
works, please check out our [research paper](https://arxiv.org/abs/1606.07792).
![Wide & Deep Spectrum of Models](https://www.tensorflow.org/images/wide_n_deep.svg "Wide & Deep")
The figure above shows a comparison of a wide model (logistic regression with
sparse features and transformations), a deep model (feed-forward neural network
with an embedding layer and several hidden layers), and a Wide & Deep model
(joint training of both). At a high level, there are only 3 steps to configure a
wide, deep, or Wide & Deep model using the tf.estimator API:
1. Select features for the wide part: Choose the sparse base columns and
crossed columns you want to use.
1. Select features for the deep part: Choose the continuous columns, the
embedding dimension for each categorical column, and the hidden layer sizes.
1. Put them all together in a Wide & Deep model
(`DNNLinearCombinedClassifier`).
And that's it! Let's go through a simple example.
## Setup
To try the code for this tutorial:
1. @{$install$Install TensorFlow} if you haven't already.
2. Download [the tutorial code](https://github.com/tensorflow/models/tree/master/official/wide_deep/).
3. Execute the data download script we provide to you:
$ python data_download.py
4. Execute the tutorial code with the following command to train the wide and
deep model described in this tutorial:
$ python wide_deep.py
Read on to find out how this code builds its model.
## Define Base Feature Columns
First, let's define the base categorical and continuous feature columns that
we'll use. These base columns will be the building blocks used by both the wide
part and the deep part of the model.
```python
import tensorflow as tf
# Continuous columns
age = tf.feature_column.numeric_column('age')
education_num = tf.feature_column.numeric_column('education_num')
capital_gain = tf.feature_column.numeric_column('capital_gain')
capital_loss = tf.feature_column.numeric_column('capital_loss')
hours_per_week = tf.feature_column.numeric_column('hours_per_week')
education = tf.feature_column.categorical_column_with_vocabulary_list(
'education', [
'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',
'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',
'5th-6th', '10th', '1st-4th', 'Preschool', '12th'])
marital_status = tf.feature_column.categorical_column_with_vocabulary_list(
'marital_status', [
'Married-civ-spouse', 'Divorced', 'Married-spouse-absent',
'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed'])
relationship = tf.feature_column.categorical_column_with_vocabulary_list(
'relationship', [
'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',
'Other-relative'])
workclass = tf.feature_column.categorical_column_with_vocabulary_list(
'workclass', [
'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',
'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])
# To show an example of hashing:
occupation = tf.feature_column.categorical_column_with_hash_bucket(
'occupation', hash_bucket_size=1000)
# Transformations.
age_buckets = tf.feature_column.bucketized_column(
age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
```
## The Wide Model: Linear Model with Crossed Feature Columns
The wide model is a linear model with a wide set of sparse and crossed feature
columns:
```python
base_columns = [
education, marital_status, relationship, workclass, occupation,
age_buckets,
]
crossed_columns = [
tf.feature_column.crossed_column(
['education', 'occupation'], hash_bucket_size=1000),
tf.feature_column.crossed_column(
[age_buckets, 'education', 'occupation'], hash_bucket_size=1000),
]
```
You can also see the @{$wide$TensorFlow Linear Model Tutorial} for more details.
Wide models with crossed feature columns can memorize sparse interactions
between features effectively. That being said, one limitation of crossed feature
columns is that they do not generalize to feature combinations that have not
appeared in the training data. Let's add a deep model with embeddings to fix
that.
## The Deep Model: Neural Network with Embeddings
The deep model is a feed-forward neural network, as shown in the previous
figure. Each of the sparse, high-dimensional categorical features are first
converted into a low-dimensional and dense real-valued vector, often referred to
as an embedding vector. These low-dimensional dense embedding vectors are
concatenated with the continuous features, and then fed into the hidden layers
of a neural network in the forward pass. The embedding values are initialized
randomly, and are trained along with all other model parameters to minimize the
training loss. If you're interested in learning more about embeddings, check out
the TensorFlow tutorial on @{$word2vec$Vector Representations of Words} or
[Word embedding](https://en.wikipedia.org/wiki/Word_embedding) on Wikipedia.
Another way to represent categorical columns to feed into a neural network is
via a one-hot or multi-hot representation. This is often appropriate for
categorical columns with only a few possible values. As an example of a one-hot
representation, for the relationship column, `"Husband"` can be represented as
[1, 0, 0, 0, 0, 0], and `"Not-in-family"` as [0, 1, 0, 0, 0, 0], etc. This is a
fixed representation, whereas embeddings are more flexible and calculated at
training time.
We'll configure the embeddings for the categorical columns using
`embedding_column`, and concatenate them with the continuous columns.
We also use `indicator_column` to create multi-hot representations of some
categorical columns.
```python
deep_columns = [
age,
education_num,
capital_gain,
capital_loss,
hours_per_week,
tf.feature_column.indicator_column(workclass),
tf.feature_column.indicator_column(education),
tf.feature_column.indicator_column(marital_status),
tf.feature_column.indicator_column(relationship),
# To show an example of embedding
tf.feature_column.embedding_column(occupation, dimension=8),
]
```
The higher the `dimension` of the embedding is, the more degrees of freedom the
model will have to learn the representations of the features. For simplicity, we
set the dimension to 8 for all feature columns here. Empirically, a more
informed decision for the number of dimensions is to start with a value on the
order of \\(\log_2(n)\\) or \\(k\sqrt[4]n\\), where \\(n\\) is the number of
unique features in a feature column and \\(k\\) is a small constant (usually
smaller than 10).
Through dense embeddings, deep models can generalize better and make predictions
on feature pairs that were previously unseen in the training data. However, it
is difficult to learn effective low-dimensional representations for feature
columns when the underlying interaction matrix between two feature columns is
sparse and high-rank. In such cases, the interaction between most feature pairs
should be zero except a few, but dense embeddings will lead to nonzero
predictions for all feature pairs, and thus can over-generalize. On the other
hand, linear models with crossed features can memorize these “exception rules”
effectively with fewer model parameters.
Now, let's see how to jointly train wide and deep models and allow them to
complement each others strengths and weaknesses.
## Combining Wide and Deep Models into One
The wide models and deep models are combined by summing up their final output
log odds as the prediction, then feeding the prediction to a logistic loss
function. All the graph definition and variable allocations have already been
handled for you under the hood, so you simply need to create a
`DNNLinearCombinedClassifier`:
```python
model = tf.estimator.DNNLinearCombinedClassifier(
model_dir='/tmp/census_model',
linear_feature_columns=base_columns + crossed_columns,
dnn_feature_columns=deep_columns,
dnn_hidden_units=[100, 50])
```
## Training and Evaluating The Model
Before we train the model, let's read in the Census dataset as we did in the
@{$wide$TensorFlow Linear Model tutorial}. See `data_download.py` as well as
`input_fn` within
[`wide_deep.py`](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py).
After reading in the data, you can train and evaluate the model:
```python
# Train and evaluate the model every `FLAGS.epochs_per_eval` epochs.
for n in range(FLAGS.train_epochs // FLAGS.epochs_per_eval):
model.train(input_fn=lambda: input_fn(
FLAGS.train_data, FLAGS.epochs_per_eval, True, FLAGS.batch_size))
results = model.evaluate(input_fn=lambda: input_fn(
FLAGS.test_data, 1, False, FLAGS.batch_size))
# Display evaluation metrics
print('Results at epoch', (n + 1) * FLAGS.epochs_per_eval)
print('-' * 30)
for key in sorted(results):
print('%s: %s' % (key, results[key]))
```
The final output accuracy should be somewhere around 85.5%. If you'd like to
see a working end-to-end example, you can download our
[example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py).
Note that this tutorial is just a quick example on a small dataset to get you
familiar with the API. Wide & Deep Learning will be even more powerful if you
try it on a large dataset with many sparse feature columns that have a large
number of possible feature values. Again, feel free to take a look at our
[research paper](https://arxiv.org/abs/1606.07792) for more ideas about how to
apply Wide & Deep Learning in real-world large-scale machine learning problems.

View File

@ -0,0 +1,405 @@
# Vector Representations of Words
In this tutorial we look at the word2vec model by
[Mikolov et al.](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf)
This model is used for learning vector representations of words, called "word
embeddings".
## Highlights
This tutorial is meant to highlight the interesting, substantive parts of
building a word2vec model in TensorFlow.
* We start by giving the motivation for why we would want to
represent words as vectors.
* We look at the intuition behind the model and how it is trained
(with a splash of math for good measure).
* We also show a simple implementation of the model in TensorFlow.
* Finally, we look at ways to make the naive version scale better.
We walk through the code later during the tutorial, but if you'd prefer to dive
straight in, feel free to look at the minimalistic implementation in
[tensorflow/examples/tutorials/word2vec/word2vec_basic.py](https://www.tensorflow.org/code/tensorflow/examples/tutorials/word2vec/word2vec_basic.py)
This basic example contains the code needed to download some data, train on it a
bit and visualize the result. Once you get comfortable with reading and running
the basic version, you can graduate to
[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py)
which is a more serious implementation that showcases some more advanced
TensorFlow principles about how to efficiently use threads to move data into a
text model, how to checkpoint during training, etc.
But first, let's look at why we would want to learn word embeddings in the first
place. Feel free to skip this section if you're an Embedding Pro and you'd just
like to get your hands dirty with the details.
## Motivation: Why Learn Word Embeddings?
Image and audio processing systems work with rich, high-dimensional datasets
encoded as vectors of the individual raw pixel-intensities for image data, or
e.g. power spectral density coefficients for audio data. For tasks like object
or speech recognition we know that all the information required to successfully
perform the task is encoded in the data (because humans can perform these tasks
from the raw data). However, natural language processing systems traditionally
treat words as discrete atomic symbols, and therefore 'cat' may be represented
as `Id537` and 'dog' as `Id143`. These encodings are arbitrary, and provide
no useful information to the system regarding the relationships that may exist
between the individual symbols. This means that the model can leverage
very little of what it has learned about 'cats' when it is processing data about
'dogs' (such that they are both animals, four-legged, pets, etc.). Representing
words as unique, discrete ids furthermore leads to data sparsity, and usually
means that we may need more data in order to successfully train statistical
models. Using vector representations can overcome some of these obstacles.
<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://www.tensorflow.org/images/audio-image-text.png" alt>
</div>
[Vector space models](https://en.wikipedia.org/wiki/Vector_space_model) (VSMs)
represent (embed) words in a continuous vector space where semantically
similar words are mapped to nearby points ('are embedded nearby each other').
VSMs have a long, rich history in NLP, but all methods depend in some way or
another on the
[Distributional Hypothesis](https://en.wikipedia.org/wiki/Distributional_semantics#Distributional_Hypothesis),
which states that words that appear in the same contexts share
semantic meaning. The different approaches that leverage this principle can be
divided into two categories: *count-based methods* (e.g.
[Latent Semantic Analysis](https://en.wikipedia.org/wiki/Latent_semantic_analysis)),
and *predictive methods* (e.g.
[neural probabilistic language models](http://www.scholarpedia.org/article/Neural_net_language_models)).
This distinction is elaborated in much more detail by
[Baroni et al.](http://clic.cimec.unitn.it/marco/publications/acl2014/baroni-etal-countpredict-acl2014.pdf),
but in a nutshell: Count-based methods compute the statistics of
how often some word co-occurs with its neighbor words in a large text corpus,
and then map these count-statistics down to a small, dense vector for each word.
Predictive models directly try to predict a word from its neighbors in terms of
learned small, dense *embedding vectors* (considered parameters of the
model).
Word2vec is a particularly computationally-efficient predictive model for
learning word embeddings from raw text. It comes in two flavors, the Continuous
Bag-of-Words model (CBOW) and the Skip-Gram model (Section 3.1 and 3.2 in [Mikolov et al.](https://arxiv.org/pdf/1301.3781.pdf)). Algorithmically, these
models are similar, except that CBOW predicts target words (e.g. 'mat') from
source context words ('the cat sits on the'), while the skip-gram does the
inverse and predicts source context-words from the target words. This inversion
might seem like an arbitrary choice, but statistically it has the effect that
CBOW smoothes over a lot of the distributional information (by treating an
entire context as one observation). For the most part, this turns out to be a
useful thing for smaller datasets. However, skip-gram treats each context-target
pair as a new observation, and this tends to do better when we have larger
datasets. We will focus on the skip-gram model in the rest of this tutorial.
## Scaling up with Noise-Contrastive Training
Neural probabilistic language models are traditionally trained using the
[maximum likelihood](https://en.wikipedia.org/wiki/Maximum_likelihood) (ML)
principle to maximize the probability of the next word \\(w_t\\) (for "target")
given the previous words \\(h\\) (for "history") in terms of a
[*softmax* function](https://en.wikipedia.org/wiki/Softmax_function),
$$
\begin{align}
P(w_t | h) &= \text{softmax}(\text{score}(w_t, h)) \\
&= \frac{\exp \{ \text{score}(w_t, h) \} }
{\sum_\text{Word w' in Vocab} \exp \{ \text{score}(w', h) \} }
\end{align}
$$
where \\(\text{score}(w_t, h)\\) computes the compatibility of word \\(w_t\\)
with the context \\(h\\) (a dot product is commonly used). We train this model
by maximizing its [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function)
on the training set, i.e. by maximizing
$$
\begin{align}
J_\text{ML} &= \log P(w_t | h) \\
&= \text{score}(w_t, h) -
\log \left( \sum_\text{Word w' in Vocab} \exp \{ \text{score}(w', h) \} \right).
\end{align}
$$
This yields a properly normalized probabilistic model for language modeling.
However this is very expensive, because we need to compute and normalize each
probability using the score for all other \\(V\\) words \\(w'\\) in the current
context \\(h\\), *at every training step*.
<div style="width:60%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://www.tensorflow.org/images/softmax-nplm.png" alt>
</div>
On the other hand, for feature learning in word2vec we do not need a full
probabilistic model. The CBOW and skip-gram models are instead trained using a
binary classification objective ([logistic regression](https://en.wikipedia.org/wiki/Logistic_regression))
to discriminate the real target words \\(w_t\\) from \\(k\\) imaginary (noise) words \\(\tilde w\\), in the
same context. We illustrate this below for a CBOW model. For skip-gram the
direction is simply inverted.
<div style="width:60%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://www.tensorflow.org/images/nce-nplm.png" alt>
</div>
Mathematically, the objective (for each example) is to maximize
$$J_\text{NEG} = \log Q_\theta(D=1 |w_t, h) +
k \mathop{\mathbb{E}}_{\tilde w \sim P_\text{noise}}
\left[ \log Q_\theta(D = 0 |\tilde w, h) \right]$$
where \\(Q_\theta(D=1 | w, h)\\) is the binary logistic regression probability
under the model of seeing the word \\(w\\) in the context \\(h\\) in the dataset
\\(D\\), calculated in terms of the learned embedding vectors \\(\theta\\). In
practice we approximate the expectation by drawing \\(k\\) contrastive words
from the noise distribution (i.e. we compute a
[Monte Carlo average](https://en.wikipedia.org/wiki/Monte_Carlo_integration)).
This objective is maximized when the model assigns high probabilities
to the real words, and low probabilities to noise words. Technically, this is
called
[Negative Sampling](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf),
and there is good mathematical motivation for using this loss function:
The updates it proposes approximate the updates of the softmax function in the
limit. But computationally it is especially appealing because computing the
loss function now scales only with the number of *noise words* that we
select (\\(k\\)), and not *all words* in the vocabulary (\\(V\\)). This makes it
much faster to train. We will actually make use of the very similar
[noise-contrastive estimation (NCE)](https://papers.nips.cc/paper/5165-learning-word-embeddings-efficiently-with-noise-contrastive-estimation.pdf)
loss, for which TensorFlow has a handy helper function `tf.nn.nce_loss()`.
Let's get an intuitive feel for how this would work in practice!
## The Skip-gram Model
As an example, let's consider the dataset
`the quick brown fox jumped over the lazy dog`
We first form a dataset of words and the contexts in which they appear. We
could define 'context' in any way that makes sense, and in fact people have
looked at syntactic contexts (i.e. the syntactic dependents of the current
target word, see e.g.
[Levy et al.](https://levyomer.files.wordpress.com/2014/04/dependency-based-word-embeddings-acl-2014.pdf)),
words-to-the-left of the target, words-to-the-right of the target, etc. For now,
let's stick to the vanilla definition and define 'context' as the window
of words to the left and to the right of a target word. Using a window
size of 1, we then have the dataset
`([the, brown], quick), ([quick, fox], brown), ([brown, jumped], fox), ...`
of `(context, target)` pairs. Recall that skip-gram inverts contexts and
targets, and tries to predict each context word from its target word, so the
task becomes to predict 'the' and 'brown' from 'quick', 'quick' and 'fox' from
'brown', etc. Therefore our dataset becomes
`(quick, the), (quick, brown), (brown, quick), (brown, fox), ...`
of `(input, output)` pairs. The objective function is defined over the entire
dataset, but we typically optimize this with
[stochastic gradient descent](https://en.wikipedia.org/wiki/Stochastic_gradient_descent)
(SGD) using one example at a time (or a 'minibatch' of `batch_size` examples,
where typically `16 <= batch_size <= 512`). So let's look at one step of
this process.
Let's imagine at training step \\(t\\) we observe the first training case above,
where the goal is to predict `the` from `quick`. We select `num_noise` number
of noisy (contrastive) examples by drawing from some noise distribution,
typically the unigram distribution, \\(P(w)\\). For simplicity let's say
`num_noise=1` and we select `sheep` as a noisy example. Next we compute the
loss for this pair of observed and noisy examples, i.e. the objective at time
step \\(t\\) becomes
$$J^{(t)}_\text{NEG} = \log Q_\theta(D=1 | \text{the, quick}) +
\log(Q_\theta(D=0 | \text{sheep, quick}))$$
The goal is to make an update to the embedding parameters \\(\theta\\) to improve
(in this case, maximize) this objective function. We do this by deriving the
gradient of the loss with respect to the embedding parameters \\(\theta\\), i.e.
\\(\frac{\partial}{\partial \theta} J_\text{NEG}\\) (luckily TensorFlow provides
easy helper functions for doing this!). We then perform an update to the
embeddings by taking a small step in the direction of the gradient. When this
process is repeated over the entire training set, this has the effect of
'moving' the embedding vectors around for each word until the model is
successful at discriminating real words from noise words.
We can visualize the learned vectors by projecting them down to 2 dimensions
using for instance something like the
[t-SNE dimensionality reduction technique](https://lvdmaaten.github.io/tsne/).
When we inspect these visualizations it becomes apparent that the vectors
capture some general, and in fact quite useful, semantic information about
words and their relationships to one another. It was very interesting when we
first discovered that certain directions in the induced vector space specialize
towards certain semantic relationships, e.g. *male-female*, *verb tense* and
even *country-capital* relationships between words, as illustrated in the figure
below (see also for example
[Mikolov et al., 2013](https://www.aclweb.org/anthology/N13-1090)).
<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://www.tensorflow.org/images/linear-relationships.png" alt>
</div>
This explains why these vectors are also useful as features for many canonical
NLP prediction tasks, such as part-of-speech tagging or named entity recognition
(see for example the original work by
[Collobert et al., 2011](https://arxiv.org/abs/1103.0398)
([pdf](https://arxiv.org/pdf/1103.0398.pdf)), or follow-up work by
[Turian et al., 2010](https://www.aclweb.org/anthology/P10-1040)).
But for now, let's just use them to draw pretty pictures!
## Building the Graph
This is all about embeddings, so let's define our embedding matrix.
This is just a big random matrix to start. We'll initialize the values to be
uniform in the unit cube.
```python
embeddings = tf.Variable(
tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
```
The noise-contrastive estimation loss is defined in terms of a logistic regression
model. For this, we need to define the weights and biases for each word in the
vocabulary (also called the `output weights` as opposed to the `input
embeddings`). So let's define that.
```python
nce_weights = tf.Variable(
tf.truncated_normal([vocabulary_size, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))
nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
```
Now that we have the parameters in place, we can define our skip-gram model
graph. For simplicity, let's suppose we've already integerized our text corpus
with a vocabulary so that each word is represented as an integer (see
[tensorflow/examples/tutorials/word2vec/word2vec_basic.py](https://www.tensorflow.org/code/tensorflow/examples/tutorials/word2vec/word2vec_basic.py)
for the details). The skip-gram model takes two inputs. One is a batch full of
integers representing the source context words, the other is for the target
words. Let's create placeholder nodes for these inputs, so that we can feed in
data later.
```python
# Placeholders for inputs
train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
```
Now what we need to do is look up the vector for each of the source words in
the batch. TensorFlow has handy helpers that make this easy.
```python
embed = tf.nn.embedding_lookup(embeddings, train_inputs)
```
Ok, now that we have the embeddings for each word, we'd like to try to predict
the target word using the noise-contrastive training objective.
```python
# Compute the NCE loss, using a sample of the negative labels each time.
loss = tf.reduce_mean(
tf.nn.nce_loss(weights=nce_weights,
biases=nce_biases,
labels=train_labels,
inputs=embed,
num_sampled=num_sampled,
num_classes=vocabulary_size))
```
Now that we have a loss node, we need to add the nodes required to compute
gradients and update the parameters, etc. For this we will use stochastic
gradient descent, and TensorFlow has handy helpers to make this easy as well.
```python
# We use the SGD optimizer.
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0).minimize(loss)
```
## Training the Model
Training the model is then as simple as using a `feed_dict` to push data into
the placeholders and calling
@{tf.Session.run} with this new data
in a loop.
```python
for inputs, labels in generate_batch(...):
feed_dict = {train_inputs: inputs, train_labels: labels}
_, cur_loss = session.run([optimizer, loss], feed_dict=feed_dict)
```
See the full example code in
[tensorflow/examples/tutorials/word2vec/word2vec_basic.py](https://www.tensorflow.org/code/tensorflow/examples/tutorials/word2vec/word2vec_basic.py).
## Visualizing the Learned Embeddings
After training has finished we can visualize the learned embeddings using
t-SNE.
<div style="width:100%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://www.tensorflow.org/images/tsne.png" alt>
</div>
Et voila! As expected, words that are similar end up clustering nearby each
other. For a more heavyweight implementation of word2vec that showcases more of
the advanced features of TensorFlow, see the implementation in
[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py).
## Evaluating Embeddings: Analogical Reasoning
Embeddings are useful for a wide variety of prediction tasks in NLP. Short of
training a full-blown part-of-speech model or named-entity model, one simple way
to evaluate embeddings is to directly use them to predict syntactic and semantic
relationships like `king is to queen as father is to ?`. This is called
*analogical reasoning* and the task was introduced by
[Mikolov and colleagues
](https://www.aclweb.org/anthology/N13-1090).
Download the dataset for this task from
[download.tensorflow.org](http://download.tensorflow.org/data/questions-words.txt).
To see how we do this evaluation, have a look at the `build_eval_graph()` and
`eval()` functions in
[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py).
The choice of hyperparameters can strongly influence the accuracy on this task.
To achieve state-of-the-art performance on this task requires training over a
very large dataset, carefully tuning the hyperparameters and making use of
tricks like subsampling the data, which is out of the scope of this tutorial.
## Optimizing the Implementation
Our vanilla implementation showcases the flexibility of TensorFlow. For
example, changing the training objective is as simple as swapping out the call
to `tf.nn.nce_loss()` for an off-the-shelf alternative such as
`tf.nn.sampled_softmax_loss()`. If you have a new idea for a loss function, you
can manually write an expression for the new objective in TensorFlow and let
the optimizer compute its derivatives. This flexibility is invaluable in the
exploratory phase of machine learning model development, where we are trying
out several different ideas and iterating quickly.
Once you have a model structure you're satisfied with, it may be worth
optimizing your implementation to run more efficiently (and cover more data in
less time). For example, the naive code we used in this tutorial would suffer
compromised speed because we use Python for reading and feeding data items --
each of which require very little work on the TensorFlow back-end. If you find
your model is seriously bottlenecked on input data, you may want to implement a
custom data reader for your problem, as described in
@{$new_data_formats$New Data Formats}. For the case of Skip-Gram
modeling, we've actually already done this for you as an example in
[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py).
If your model is no longer I/O bound but you want still more performance, you
can take things further by writing your own TensorFlow Ops, as described in
@{$adding_an_op$Adding a New Op}. Again we've provided an
example of this for the Skip-Gram case
[models/tutorials/embedding/word2vec_optimized.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec_optimized.py).
Feel free to benchmark these against each other to measure performance
improvements at each stage.
## Conclusion
In this tutorial we covered the word2vec model, a computationally efficient
model for learning word embeddings. We motivated why embeddings are useful,
discussed efficient training techniques and showed how to implement all of this
in TensorFlow. Overall, we hope that this has show-cased how TensorFlow affords
you the flexibility you need for early experimentation, and the control you
later need for bespoke optimized implementation.

View File

@ -16,9 +16,9 @@ limitations under the License.
#ifndef TENSORFLOW_JAVA_SRC_GEN_CC_JAVA_DEFS_H_
#define TENSORFLOW_JAVA_SRC_GEN_CC_JAVA_DEFS_H_
#include <string>
#include <list>
#include <map>
#include <string>
#include <utility>
namespace tensorflow {

View File

@ -19,10 +19,10 @@ limitations under the License.
#include <string>
#include <vector>
#include "tensorflow/core/framework/op_def.pb.h"
#include "tensorflow/core/framework/api_def.pb.h"
#include "tensorflow/core/framework/op_def.pb.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/java/src/gen/cc/op_specs.h"
namespace tensorflow {

View File

@ -14,9 +14,9 @@ limitations under the License.
==============================================================================*/
#include <map>
#include <vector>
#include <string>
#include <utility>
#include <vector>
#include "re2/re2.h"
#include "tensorflow/core/framework/op.h"
@ -50,7 +50,7 @@ class TypeResolver {
// For example, if the argument's datatype is DT_STRING, this method will
// return "java.lang.String", so the argument can become "Operand<String>"
// in the Ops API
Type TypeOf(const OpDef_ArgDef& arg_def, bool *iterable_out);
Type TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out);
// Returns types of an input attribute
//
@ -62,7 +62,7 @@ class TypeResolver {
// <java.lang.Float, float>, so the attribute can be used as a "Float" object
// in the Ops API and casted to a "float" when passing through the JNI layer.
std::pair<Type, Type> TypesOf(const OpDef_AttrDef& attr_def,
bool *iterable_out);
bool* iterable_out);
// Returns true if the type of this attribute has already been resolved
bool IsAttributeVisited(const string& attr_name) {
@ -89,8 +89,7 @@ class TypeResolver {
}
};
Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def,
bool* iterable_out) {
Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) {
*iterable_out = false;
if (!arg_def.number_attr().empty()) {
// when number_attr is set, argument has to be a list of tensors
@ -219,21 +218,17 @@ string SnakeToCamelCase(const string& str, bool upper = false) {
return result;
}
bool FindAndCut(re2::StringPiece* input, const RE2& expr,
re2::StringPiece* before_match, re2::StringPiece* ret_match = nullptr) {
re2::StringPiece match;
if (!expr.Match(*input, 0, input->size(), RE2::UNANCHORED, &match, 1)) {
return false;
}
before_match->set(input->data(), match.begin() - input->begin());
input->remove_prefix(match.end() - before_match->begin());
if (ret_match != nullptr) {
*ret_match = match;
}
bool FindAndCut(string* input, const RE2& expr, string* before_match,
string* ret_match = nullptr) {
string match;
if (!RE2::PartialMatch(*input, expr, &match)) return false;
*before_match = input->substr(0, input->find(match));
*input = input->substr(before_match->size() + match.size());
if (ret_match != nullptr) *ret_match = match;
return true;
}
string ParseDocumentation(re2::StringPiece input) {
string ParseDocumentation(const string& inp) {
std::stringstream javadoc_text;
// TODO(karllessard) This is a very minimalist utility method for converting
@ -246,20 +241,20 @@ string ParseDocumentation(re2::StringPiece input) {
markups_subexpr.push_back("`+"); // inlined code and code blocks
markups_subexpr.push_back("\\*{1,2}\\b"); // text emphasis
markups_subexpr.push_back("\\["); // hyperlinks
const RE2 markup_expr(str_util::Join(markups_subexpr, "|"));
const RE2 markup_expr("(" + str_util::Join(markups_subexpr, "|") + ")");
bool in_list = false;
string input = inp;
while (true) {
re2::StringPiece text;
re2::StringPiece markup;
string text, markup;
if (!FindAndCut(&input, markup_expr, &text, &markup)) {
javadoc_text << input;
break; // end of loop
}
javadoc_text << text;
if (markup.starts_with("\n")) {
if (str_util::StartsWith(markup, "\n")) {
javadoc_text << "\n";
if (markup.contains("*")) {
if (str_util::StrContains(markup, "*")) {
// new list item
javadoc_text << (in_list ? "</li>\n" : "<ul>\n") << "<li>\n";
in_list = true;
@ -267,18 +262,18 @@ string ParseDocumentation(re2::StringPiece input) {
// end of list
javadoc_text << "</li>\n</ul>\n";
in_list = false;
} else if (!input.starts_with("```")) {
} else if (!str_util::StartsWith(input, "```")) {
// new paragraph (not required if a <pre> block follows)
javadoc_text << "<p>\n";
}
} else if (markup.starts_with("```")) {
} else if (str_util::StartsWith(markup, "```")) {
// code blocks
if (FindAndCut(&input, "```\\s*\n*", &text)) {
if (FindAndCut(&input, "(```\\s*\n*)", &text)) {
javadoc_text << "<pre>{@code\n" << text << "}</pre>\n";
} else {
javadoc_text << markup;
}
} else if (markup.starts_with("`")) {
} else if (str_util::StartsWith("(" + markup + ")", "`")) {
// inlined code
if (FindAndCut(&input, markup, &text)) {
javadoc_text << "{@code " << text << "}";
@ -287,26 +282,28 @@ string ParseDocumentation(re2::StringPiece input) {
}
} else if (markup == "**") {
// text emphasis (strong)
if (FindAndCut(&input, "\\b\\*{2}", &text)) {
if (FindAndCut(&input, "(\\b\\*{2})", &text)) {
javadoc_text << "<b>" << ParseDocumentation(text) << "</b>";
} else {
javadoc_text << markup;
}
} else if (markup == "*") {
// text emphasis (normal)
if (FindAndCut(&input, "\\b\\*{1}", &text)) {
if (FindAndCut(&input, "(\\b\\*{1})", &text)) {
javadoc_text << "<i>" << ParseDocumentation(text) << "</i>";
} else {
javadoc_text << markup;
}
} else if (markup.starts_with("[")) {
} else if (str_util::StartsWith(markup, "[")) {
// hyperlinks
string label;
string link;
if (RE2::Consume(&input, "([^\\[]+)\\]\\((http.+)\\)", &label, &link)) {
if (RE2::PartialMatch(input, "([^\\[]+)\\]\\((http.+)\\)", &label,
&link) &&
str_util::StartsWith(input, label + link)) {
input = input.substr(label.size() + link.size());
javadoc_text << "<a href=\"" << link << "\">"
<< ParseDocumentation(label)
<< "</a>";
<< ParseDocumentation(label) << "</a>";
} else {
javadoc_text << markup;
}
@ -319,53 +316,52 @@ string ParseDocumentation(re2::StringPiece input) {
}
ArgumentSpec CreateInput(const OpDef_ArgDef& input_def,
const ApiDef::Arg& input_api_def, TypeResolver* type_resolver) {
const ApiDef::Arg& input_api_def,
TypeResolver* type_resolver) {
bool iterable = false;
Type type = type_resolver->TypeOf(input_def, &iterable);
Type var_type = Type::Interface("Operand", "org.tensorflow")
.add_parameter(type);
Type var_type =
Type::Interface("Operand", "org.tensorflow").add_parameter(type);
if (iterable) {
var_type = Type::IterableOf(var_type);
}
return ArgumentSpec(input_api_def.name(),
return ArgumentSpec(
input_api_def.name(),
Variable::Create(SnakeToCamelCase(input_api_def.rename_to()), var_type),
type,
ParseDocumentation(input_api_def.description()),
iterable);
type, ParseDocumentation(input_api_def.description()), iterable);
}
AttributeSpec CreateAttribute(const OpDef_AttrDef& attr_def,
const ApiDef::Attr& attr_api_def, TypeResolver* type_resolver) {
const ApiDef::Attr& attr_api_def,
TypeResolver* type_resolver) {
bool iterable = false;
std::pair<Type, Type> types = type_resolver->TypesOf(attr_def, &iterable);
Type var_type = types.first.kind() == Type::GENERIC ?
Type::Class("Class").add_parameter(types.first) : types.first;
Type var_type = types.first.kind() == Type::GENERIC
? Type::Class("Class").add_parameter(types.first)
: types.first;
if (iterable) {
var_type = Type::ListOf(var_type);
}
return AttributeSpec(attr_api_def.name(),
return AttributeSpec(
attr_api_def.name(),
Variable::Create(SnakeToCamelCase(attr_api_def.rename_to()), var_type),
types.first,
types.second,
ParseDocumentation(attr_api_def.description()),
iterable,
attr_api_def.has_default_value());
types.first, types.second, ParseDocumentation(attr_api_def.description()),
iterable, attr_api_def.has_default_value());
}
ArgumentSpec CreateOutput(const OpDef_ArgDef& output_def,
const ApiDef::Arg& output_api, TypeResolver* type_resolver) {
const ApiDef::Arg& output_api,
TypeResolver* type_resolver) {
bool iterable = false;
Type type = type_resolver->TypeOf(output_def, &iterable);
Type var_type = Type::Class("Output", "org.tensorflow")
.add_parameter(type);
Type var_type = Type::Class("Output", "org.tensorflow").add_parameter(type);
if (iterable) {
var_type = Type::ListOf(var_type);
}
return ArgumentSpec(output_api.name(),
return ArgumentSpec(
output_api.name(),
Variable::Create(SnakeToCamelCase(output_api.rename_to()), var_type),
type,
ParseDocumentation(output_api.description()),
iterable);
type, ParseDocumentation(output_api.description()), iterable);
}
EndpointSpec CreateEndpoint(const OpDef& op_def, const ApiDef& api_def,
@ -377,11 +373,10 @@ EndpointSpec CreateEndpoint(const OpDef& op_def, const ApiDef& api_def,
package = name_tokens.at(0);
name = name_tokens.at(1);
} else {
package = kDefaultEndpointPackage;
package = "core"; // generate unclassified ops in the 'core' package
name = name_tokens.at(0);
}
return EndpointSpec(package,
name,
return EndpointSpec(package, name,
Javadoc::Create(ParseDocumentation(api_def.summary()))
.details(ParseDocumentation(api_def.description())));
}
@ -389,8 +384,7 @@ EndpointSpec CreateEndpoint(const OpDef& op_def, const ApiDef& api_def,
} // namespace
OpSpec OpSpec::Create(const OpDef& op_def, const ApiDef& api_def) {
OpSpec op(api_def.graph_op_name(),
api_def.visibility() == ApiDef::HIDDEN,
OpSpec op(api_def.graph_op_name(), api_def.visibility() == ApiDef::HIDDEN,
op_def.deprecation().explanation());
TypeResolver type_resolver(op_def);
for (const string& next_input_name : api_def.arg_order()) {
@ -406,8 +400,8 @@ OpSpec OpSpec::Create(const OpDef& op_def, const ApiDef& api_def) {
// do not parse attributes already visited, they have probably been inferred
// before as an input argument type
if (!type_resolver.IsAttributeVisited(op_def.attr(i).name())) {
AttributeSpec attr = CreateAttribute(op_def.attr(i), api_def.attr(i),
&type_resolver);
AttributeSpec attr =
CreateAttribute(op_def.attr(i), api_def.attr(i), &type_resolver);
// attributes with a default value are optional
if (attr.has_default_value() && attr.type().kind() != Type::GENERIC) {
op.optional_attributes_.push_back(attr);
@ -417,8 +411,8 @@ OpSpec OpSpec::Create(const OpDef& op_def, const ApiDef& api_def) {
}
}
for (int i = 0; i < op_def.output_arg().size(); ++i) {
op.outputs_.push_back(CreateOutput(op_def.output_arg(i), api_def.out_arg(i),
&type_resolver));
op.outputs_.push_back(
CreateOutput(op_def.output_arg(i), api_def.out_arg(i), &type_resolver));
}
for (const auto& endpoint_def : api_def.endpoint()) {
op.endpoints_.push_back(CreateEndpoint(op_def, api_def, endpoint_def));

View File

@ -19,9 +19,9 @@ limitations under the License.
#include <string>
#include <vector>
#include "tensorflow/core/framework/op_def.pb.h"
#include "tensorflow/core/framework/api_def.pb.h"
#include "tensorflow/core/framework/attr_value.pb.h"
#include "tensorflow/core/framework/op_def.pb.h"
#include "tensorflow/java/src/gen/cc/java_defs.h"
namespace tensorflow {
@ -39,8 +39,7 @@ class EndpointSpec {
// TODO(annarev): hardcode depcreated to false until deprecated is possible
EndpointSpec(const string& package, const string& name,
const Javadoc& javadoc)
: package_(package), name_(name), javadoc_(javadoc),
deprecated_(false) {}
: package_(package), name_(name), javadoc_(javadoc), deprecated_(false) {}
const string& package() const { return package_; }
const string& name() const { return name_; }
@ -63,10 +62,13 @@ class ArgumentSpec {
// type: the tensor type of this argument
// description: a description of this argument, in javadoc
// iterable: true if this argument is a list
ArgumentSpec(const string& op_def_name, const Variable& var,
const Type& type, const string& description, bool iterable)
: op_def_name_(op_def_name), var_(var), type_(type),
description_(description), iterable_(iterable) {}
ArgumentSpec(const string& op_def_name, const Variable& var, const Type& type,
const string& description, bool iterable)
: op_def_name_(op_def_name),
var_(var),
type_(type),
description_(description),
iterable_(iterable) {}
const string& op_def_name() const { return op_def_name_; }
const Variable& var() const { return var_; }
@ -94,11 +96,16 @@ class AttributeSpec {
// iterable: true if this attribute is a list
// has_default_value: true if this attribute has a default value if not set
AttributeSpec(const string& op_def_name, const Variable& var,
const Type& type, const Type& jni_type, const string& description,
bool iterable, bool has_default_value)
: op_def_name_(op_def_name), var_(var), type_(type),
description_(description), iterable_(iterable),
jni_type_(jni_type), has_default_value_(has_default_value) {}
const Type& type, const Type& jni_type,
const string& description, bool iterable,
bool has_default_value)
: op_def_name_(op_def_name),
var_(var),
type_(type),
description_(description),
iterable_(iterable),
jni_type_(jni_type),
has_default_value_(has_default_value) {}
const string& op_def_name() const { return op_def_name_; }
const Variable& var() const { return var_; }
@ -148,7 +155,8 @@ class OpSpec {
// deprecation_explanation: message to show if all endpoints are deprecated
explicit OpSpec(const string& graph_op_name, bool hidden,
const string& deprecation_explanation)
: graph_op_name_(graph_op_name), hidden_(hidden),
: graph_op_name_(graph_op_name),
hidden_(hidden),
deprecation_explanation_(deprecation_explanation) {}
const string graph_op_name_;

View File

@ -0,0 +1,48 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
package org.tensorflow;
/**
* Interface implemented by operands of a TensorFlow operation.
*
* <p>Example usage:
*
* <pre>{@code
* // The "decodeJpeg" operation can be used as input to the "cast" operation
* Input decodeJpeg = ops.image().decodeJpeg(...);
* ops.math().cast(decodeJpeg, DataType.FLOAT);
*
* // The output "y" of the "unique" operation can be used as input to the "cast" operation
* Output y = ops.array().unique(...).y();
* ops.math().cast(y, DataType.FLOAT);
*
* // The "split" operation can be used as input list to the "concat" operation
* Iterable<? extends Input> split = ops.array().split(...);
* ops.array().concat(0, split);
* }</pre>
*/
public interface Input<T> {
/**
* Returns the symbolic handle of a tensor.
*
* <p>Inputs to TensorFlow operations are outputs of another TensorFlow operation. This method is
* used to obtain a symbolic handle that represents the computation of the input.
*
* @see OperationBuilder#addInput(Output)
*/
Output<T> asOutput();
}

View File

@ -0,0 +1,30 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// GENERATED FILE. To update, edit tftypes.pl instead.
package org.tensorflow.types;
import org.tensorflow.DataType;
/** Represents a boolean. */
public class TFBool implements TFType {
private TFBool() {}
static {
Types.typeCodes.put(TFBool.class, DataType.BOOL);
}
static {
Types.scalars.put(TFBool.class, false);
}
}

View File

@ -0,0 +1,30 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// GENERATED FILE. To update, edit tftypes.pl instead.
package org.tensorflow.types;
import org.tensorflow.DataType;
/** Represents a 64-bit double precision floating point number. */
public class TFDouble implements TFType {
private TFDouble() {}
static {
Types.typeCodes.put(TFDouble.class, DataType.DOUBLE);
}
static {
Types.scalars.put(TFDouble.class, 0.0);
}
}

View File

@ -0,0 +1,30 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// GENERATED FILE. To update, edit tftypes.pl instead.
package org.tensorflow.types;
import org.tensorflow.DataType;
/** Represents a 32-bit single precision floating point number. */
public class TFFloat implements TFType {
private TFFloat() {}
static {
Types.typeCodes.put(TFFloat.class, DataType.FLOAT);
}
static {
Types.scalars.put(TFFloat.class, 0f);
}
}

View File

@ -0,0 +1,30 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// GENERATED FILE. To update, edit tftypes.pl instead.
package org.tensorflow.types;
import org.tensorflow.DataType;
/** Represents a 32-bit signed integer. */
public class TFInt32 implements TFType {
private TFInt32() {}
static {
Types.typeCodes.put(TFInt32.class, DataType.INT32);
}
static {
Types.scalars.put(TFInt32.class, 0);
}
}

View File

@ -0,0 +1,30 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// GENERATED FILE. To update, edit tftypes.pl instead.
package org.tensorflow.types;
import org.tensorflow.DataType;
/** Represents a 64-bit signed integer. */
public class TFInt64 implements TFType {
private TFInt64() {}
static {
Types.typeCodes.put(TFInt64.class, DataType.INT64);
}
static {
Types.scalars.put(TFInt64.class, 0L);
}
}

View File

@ -0,0 +1,27 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// GENERATED FILE. To update, edit tftypes.pl instead.
package org.tensorflow.types;
import org.tensorflow.DataType;
/** Represents an arbitrary sequence of bytes. */
public class TFString implements TFType {
private TFString() {}
static {
Types.typeCodes.put(TFString.class, DataType.STRING);
}
}

View File

@ -0,0 +1,20 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
package org.tensorflow.types;
/**
* A marker interface for classes representing TensorFlow types.
*/
public interface TFType {}

View File

@ -0,0 +1,30 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// GENERATED FILE. To update, edit tftypes.pl instead.
package org.tensorflow.types;
import org.tensorflow.DataType;
/** Represents an 8-bit unsigned integer. */
public class TFUInt8 implements TFType {
private TFUInt8() {}
static {
Types.typeCodes.put(TFUInt8.class, DataType.UINT8);
}
static {
Types.scalars.put(TFUInt8.class, (byte)0);
}
}

View File

@ -0,0 +1,52 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
package org.tensorflow.types;
import java.util.HashMap;
import java.util.Map;
import org.tensorflow.DataType;
/**
* Utility class for managing the representation of TensorFlow types as Java
* types. For each TensorFlow type (e.g., int32), there is a corresponding Java
* type (e.g., TFInt32) that represents it at compile time and a corresponding
* class object (e.g., TFInt32.class) that represents it at run time. There is
* also an enumeration value in DataType that can be used to represent the
* type, though that should rarely be required.
*/
public class Types {
private Types() {} // not instantiable
static final Map<Class<?>, DataType> typeCodes = new HashMap<>();
/** Returns the DataType value corresponding to a TensorFlow type class. */
public static DataType dataType(Class<? extends TFType> c) {
DataType dtype = typeCodes.get(c);
if (dtype == null) {
throw new IllegalArgumentException("" + c + " is not a TensorFlow type.");
}
return dtype;
}
static final Map<Class<?>, Object> scalars = new HashMap<>();
/** Returns the zero value of type described by {@code c}, or null if
* the type (e.g., string) is not numeric and therefore has no zero value.
*/
public static Object zeroValue(Class<? extends TFType> c) {
return scalars.get(c);
}
}

View File

@ -867,6 +867,19 @@ class ResourceVariable(variables.Variable):
__array_priority__ = 100
def is_initialized(self, name=None):
"""Checks whether a resource variable has been initialized.
Outputs boolean scalar indicating whether the tensor has been initialized.
Args:
name: A name for the operation (optional).
Returns:
A `Tensor` of type `bool`.
"""
return gen_resource_variable_ops.var_is_initialized_op(self.handle, name)
def assign_sub(self, delta, use_locking=None, name=None, read_value=True):
"""Subtracts a value from this variable.

View File

@ -19,7 +19,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.eager import context
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.ops import gen_resource_variable_ops
@ -124,9 +123,7 @@ def is_variable_initialized(ref, name=None):
if ref.dtype._is_ref_dtype:
return gen_state_ops.is_variable_initialized(ref=ref, name=name)
# Handle resource variables.
if context.executing_eagerly() or ref.op.type == "VarHandleOp":
return gen_resource_variable_ops.var_is_initialized_op(ref.handle,
name=name)
return ref.is_initialized(name=name)
@tf_export("assign_sub")

View File

@ -95,7 +95,7 @@ bool HostExecutor::MemcpyDeviceToDevice(Stream *stream,
// the nature of the HostExecutor) memcpy on the stream (HostStream)
// associated with the HostExecutor.
AsHostStream(stream)->EnqueueTask(
[src_mem, dst_mem, size]() { memcpy(dst_mem, src_mem, size); });
[src_mem, dst_mem, size]() { memcpy(src_mem, dst_mem, size); });
return true;
}

View File

@ -0,0 +1,502 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Upgrader for Python scripts according to an API change specification."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ast
import collections
import os
import shutil
import sys
import tempfile
import traceback
class APIChangeSpec(object):
"""This class defines the transformations that need to happen.
This class must provide the following fields:
* `function_keyword_renames`: maps function names to a map of old -> new
argument names
* `function_renames`: maps function names to new function names
* `change_to_function`: a set of function names that have changed (for
notifications)
* `function_reorders`: maps functions whose argument order has changed to the
list of arguments in the new order
* `function_handle`: maps function names to custom handlers for the function
For an example, see `TFAPIChangeSpec`.
"""
class _FileEditTuple(
collections.namedtuple("_FileEditTuple",
["comment", "line", "start", "old", "new"])):
"""Each edit that is recorded by a _FileEditRecorder.
Fields:
comment: A description of the edit and why it was made.
line: The line number in the file where the edit occurs (1-indexed).
start: The line number in the file where the edit occurs (0-indexed).
old: text string to remove (this must match what was in file).
new: text string to add in place of `old`.
"""
__slots__ = ()
class _FileEditRecorder(object):
"""Record changes that need to be done to the file."""
def __init__(self, filename):
# all edits are lists of chars
self._filename = filename
self._line_to_edit = collections.defaultdict(list)
self._errors = []
def process(self, text):
"""Process a list of strings, each corresponding to the recorded changes.
Args:
text: A list of lines of text (assumed to contain newlines)
Returns:
A tuple of the modified text and a textual description of what is done.
Raises:
ValueError: if substitution source location does not have expected text.
"""
change_report = ""
# Iterate of each line
for line, edits in self._line_to_edit.items():
offset = 0
# sort by column so that edits are processed in order in order to make
# indexing adjustments cumulative for changes that change the string
# length
edits.sort(key=lambda x: x.start)
# Extract each line to a list of characters, because mutable lists
# are editable, unlike immutable strings.
char_array = list(text[line - 1])
# Record a description of the change
change_report += "%r Line %d\n" % (self._filename, line)
change_report += "-" * 80 + "\n\n"
for e in edits:
change_report += "%s\n" % e.comment
change_report += "\n Old: %s" % (text[line - 1])
# Make underscore buffers for underlining where in the line the edit was
change_list = [" "] * len(text[line - 1])
change_list_new = [" "] * len(text[line - 1])
# Iterate for each edit
for e in edits:
# Create effective start, end by accounting for change in length due
# to previous edits
start_eff = e.start + offset
end_eff = start_eff + len(e.old)
# Make sure the edit is changing what it should be changing
old_actual = "".join(char_array[start_eff:end_eff])
if old_actual != e.old:
raise ValueError("Expected text %r but got %r" %
("".join(e.old), "".join(old_actual)))
# Make the edit
char_array[start_eff:end_eff] = list(e.new)
# Create the underline highlighting of the before and after
change_list[e.start:e.start + len(e.old)] = "~" * len(e.old)
change_list_new[start_eff:end_eff] = "~" * len(e.new)
# Keep track of how to generate effective ranges
offset += len(e.new) - len(e.old)
# Finish the report comment
change_report += " %s\n" % "".join(change_list)
text[line - 1] = "".join(char_array)
change_report += " New: %s" % (text[line - 1])
change_report += " %s\n\n" % "".join(change_list_new)
return "".join(text), change_report, self._errors
def add(self, comment, line, start, old, new, error=None):
"""Add a new change that is needed.
Args:
comment: A description of what was changed
line: Line number (1 indexed)
start: Column offset (0 indexed)
old: old text
new: new text
error: this "edit" is something that cannot be fixed automatically
Returns:
None
"""
self._line_to_edit[line].append(
_FileEditTuple(comment, line, start, old, new))
if error:
self._errors.append("%s:%d: %s" % (self._filename, line, error))
class _ASTCallVisitor(ast.NodeVisitor):
"""AST Visitor that processes function calls.
Updates function calls from old API version to new API version using a given
change spec.
"""
def __init__(self, filename, lines, api_change_spec):
self._filename = filename
self._file_edit = _FileEditRecorder(filename)
self._lines = lines
self._api_change_spec = api_change_spec
def process(self, lines):
return self._file_edit.process(lines)
def generic_visit(self, node):
ast.NodeVisitor.generic_visit(self, node)
def _rename_functions(self, node, full_name):
function_renames = self._api_change_spec.function_renames
try:
new_name = function_renames[full_name]
self._file_edit.add("Renamed function %r to %r" % (full_name, new_name),
node.lineno, node.col_offset, full_name, new_name)
except KeyError:
pass
def _get_attribute_full_path(self, node):
"""Traverse an attribute to generate a full name e.g. tf.foo.bar.
Args:
node: A Node of type Attribute.
Returns:
a '.'-delimited full-name or None if the tree was not a simple form.
i.e. `foo()+b).bar` returns None, while `a.b.c` would return "a.b.c".
"""
curr = node
items = []
while not isinstance(curr, ast.Name):
if not isinstance(curr, ast.Attribute):
return None
items.append(curr.attr)
curr = curr.value
items.append(curr.id)
return ".".join(reversed(items))
def _find_true_position(self, node):
"""Return correct line number and column offset for a given node.
This is necessary mainly because ListComp's location reporting reports
the next token after the list comprehension list opening.
Args:
node: Node for which we wish to know the lineno and col_offset
"""
import re
find_open = re.compile("^\s*(\\[).*$")
find_string_chars = re.compile("['\"]")
if isinstance(node, ast.ListComp):
# Strangely, ast.ListComp returns the col_offset of the first token
# after the '[' token which appears to be a bug. Workaround by
# explicitly finding the real start of the list comprehension.
line = node.lineno
col = node.col_offset
# loop over lines
while 1:
# Reverse the text to and regular expression search for whitespace
text = self._lines[line - 1]
reversed_preceding_text = text[:col][::-1]
# First find if a [ can be found with only whitespace between it and
# col.
m = find_open.match(reversed_preceding_text)
if m:
new_col_offset = col - m.start(1) - 1
return line, new_col_offset
else:
if (reversed_preceding_text == "" or
reversed_preceding_text.isspace()):
line = line - 1
prev_line = self._lines[line - 1]
# TODO(aselle):
# this is poor comment detection, but it is good enough for
# cases where the comment does not contain string literal starting/
# ending characters. If ast gave us start and end locations of the
# ast nodes rather than just start, we could use string literal
# node ranges to filter out spurious #'s that appear in string
# literals.
comment_start = prev_line.find("#")
if comment_start == -1:
col = len(prev_line) - 1
elif find_string_chars.search(prev_line[comment_start:]) is None:
col = comment_start
else:
return None, None
else:
return None, None
# Most other nodes return proper locations (with notably does not), but
# it is not possible to use that in an argument.
return node.lineno, node.col_offset
def visit_Call(self, node): # pylint: disable=invalid-name
"""Handle visiting a call node in the AST.
Args:
node: Current Node
"""
# Find a simple attribute name path e.g. "tf.foo.bar"
full_name = self._get_attribute_full_path(node.func)
# Make sure the func is marked as being part of a call
node.func.is_function_for_call = True
if full_name:
# Call special handlers
function_handles = self._api_change_spec.function_handle
if full_name in function_handles:
function_handles[full_name](self._file_edit, node)
# Examine any non-keyword argument and make it into a keyword argument
# if reordering required.
function_reorders = self._api_change_spec.function_reorders
function_keyword_renames = (
self._api_change_spec.function_keyword_renames)
if full_name in function_reorders:
reordered = function_reorders[full_name]
for idx, arg in enumerate(node.args):
lineno, col_offset = self._find_true_position(arg)
if lineno is None or col_offset is None:
self._file_edit.add(
"Failed to add keyword %r to reordered function %r" %
(reordered[idx], full_name),
arg.lineno,
arg.col_offset,
"",
"",
error="A necessary keyword argument failed to be inserted.")
else:
keyword_arg = reordered[idx]
if (full_name in function_keyword_renames and
keyword_arg in function_keyword_renames[full_name]):
keyword_arg = function_keyword_renames[full_name][keyword_arg]
self._file_edit.add("Added keyword %r to reordered function %r" %
(reordered[idx], full_name), lineno, col_offset,
"", keyword_arg + "=")
# Examine each keyword argument and convert it to the final renamed form
renamed_keywords = ({} if full_name not in function_keyword_renames else
function_keyword_renames[full_name])
for keyword in node.keywords:
argkey = keyword.arg
argval = keyword.value
if argkey in renamed_keywords:
argval_lineno, argval_col_offset = self._find_true_position(argval)
if argval_lineno is not None and argval_col_offset is not None:
# TODO(aselle): We should scan backward to find the start of the
# keyword key. Unfortunately ast does not give you the location of
# keyword keys, so we are forced to infer it from the keyword arg
# value.
key_start = argval_col_offset - len(argkey) - 1
key_end = key_start + len(argkey) + 1
if (self._lines[argval_lineno - 1][key_start:key_end] == argkey +
"="):
self._file_edit.add("Renamed keyword argument from %r to %r" %
(argkey,
renamed_keywords[argkey]), argval_lineno,
argval_col_offset - len(argkey) - 1,
argkey + "=", renamed_keywords[argkey] + "=")
continue
self._file_edit.add(
"Failed to rename keyword argument from %r to %r" %
(argkey, renamed_keywords[argkey]),
argval.lineno,
argval.col_offset - len(argkey) - 1,
"",
"",
error="Failed to find keyword lexographically. Fix manually.")
ast.NodeVisitor.generic_visit(self, node)
def visit_Attribute(self, node): # pylint: disable=invalid-name
"""Handle bare Attributes i.e. [tf.foo, tf.bar].
Args:
node: Node that is of type ast.Attribute
"""
full_name = self._get_attribute_full_path(node)
if full_name:
self._rename_functions(node, full_name)
if full_name in self._api_change_spec.change_to_function:
if not hasattr(node, "is_function_for_call"):
new_text = full_name + "()"
self._file_edit.add("Changed %r to %r" % (full_name, new_text),
node.lineno, node.col_offset, full_name, new_text)
ast.NodeVisitor.generic_visit(self, node)
class ASTCodeUpgrader(object):
"""Handles upgrading a set of Python files using a given API change spec."""
def __init__(self, api_change_spec):
if not isinstance(api_change_spec, APIChangeSpec):
raise TypeError("Must pass APIChangeSpec to ASTCodeUpgrader, got %s" %
type(api_change_spec))
self._api_change_spec = api_change_spec
def process_file(self, in_filename, out_filename):
"""Process the given python file for incompatible changes.
Args:
in_filename: filename to parse
out_filename: output file to write to
Returns:
A tuple representing number of files processed, log of actions, errors
"""
# Write to a temporary file, just in case we are doing an implace modify.
with open(in_filename, "r") as in_file, \
tempfile.NamedTemporaryFile("w", delete=False) as temp_file:
ret = self.process_opened_file(in_filename, in_file, out_filename,
temp_file)
shutil.move(temp_file.name, out_filename)
return ret
# Broad exceptions are required here because ast throws whatever it wants.
# pylint: disable=broad-except
def process_opened_file(self, in_filename, in_file, out_filename, out_file):
"""Process the given python file for incompatible changes.
This function is split out to facilitate StringIO testing from
tf_upgrade_test.py.
Args:
in_filename: filename to parse
in_file: opened file (or StringIO)
out_filename: output file to write to
out_file: opened file (or StringIO)
Returns:
A tuple representing number of files processed, log of actions, errors
"""
process_errors = []
text = "-" * 80 + "\n"
text += "Processing file %r\n outputting to %r\n" % (in_filename,
out_filename)
text += "-" * 80 + "\n\n"
parsed_ast = None
lines = in_file.readlines()
try:
parsed_ast = ast.parse("".join(lines))
except Exception:
text += "Failed to parse %r\n\n" % in_filename
text += traceback.format_exc()
if parsed_ast:
visitor = _ASTCallVisitor(in_filename, lines, self._api_change_spec)
visitor.visit(parsed_ast)
out_text, new_text, process_errors = visitor.process(lines)
text += new_text
if out_file:
out_file.write(out_text)
text += "\n"
return 1, text, process_errors
# pylint: enable=broad-except
def process_tree(self, root_directory, output_root_directory,
copy_other_files):
"""Processes upgrades on an entire tree of python files in place.
Note that only Python files. If you have custom code in other languages,
you will need to manually upgrade those.
Args:
root_directory: Directory to walk and process.
output_root_directory: Directory to use as base.
copy_other_files: Copy files that are not touched by this converter.
Returns:
A tuple of files processed, the report string ofr all files, and errors
"""
# make sure output directory doesn't exist
if output_root_directory and os.path.exists(output_root_directory):
print("Output directory %r must not already exist." %
(output_root_directory))
sys.exit(1)
# make sure output directory does not overlap with root_directory
norm_root = os.path.split(os.path.normpath(root_directory))
norm_output = os.path.split(os.path.normpath(output_root_directory))
if norm_root == norm_output:
print("Output directory %r same as input directory %r" %
(root_directory, output_root_directory))
sys.exit(1)
# Collect list of files to process (we do this to correctly handle if the
# user puts the output directory in some sub directory of the input dir)
files_to_process = []
files_to_copy = []
for dir_name, _, file_list in os.walk(root_directory):
py_files = [f for f in file_list if f.endswith(".py")]
copy_files = [f for f in file_list if not f.endswith(".py")]
for filename in py_files:
fullpath = os.path.join(dir_name, filename)
fullpath_output = os.path.join(output_root_directory,
os.path.relpath(fullpath,
root_directory))
files_to_process.append((fullpath, fullpath_output))
if copy_other_files:
for filename in copy_files:
fullpath = os.path.join(dir_name, filename)
fullpath_output = os.path.join(output_root_directory,
os.path.relpath(
fullpath, root_directory))
files_to_copy.append((fullpath, fullpath_output))
file_count = 0
tree_errors = []
report = ""
report += ("=" * 80) + "\n"
report += "Input tree: %r\n" % root_directory
report += ("=" * 80) + "\n"
for input_path, output_path in files_to_process:
output_directory = os.path.dirname(output_path)
if not os.path.isdir(output_directory):
os.makedirs(output_directory)
file_count += 1
_, l_report, l_errors = self.process_file(input_path, output_path)
tree_errors += l_errors
report += l_report
for input_path, output_path in files_to_copy:
output_directory = os.path.dirname(output_path)
if not os.path.isdir(output_directory):
os.makedirs(output_directory)
shutil.copy(input_path, output_path)
return file_count, report, tree_errors

View File

@ -0,0 +1,83 @@
FROM tensorflow/tensorflow:latest-devel
LABEL maintainer="Clayne Robison<clayne.b.robison@intel.com>"
# These arguments are parameterized. Use --build-args to override.
ARG TF_BRANCH=r1.9
ARG WHL_DIR=/whl
RUN apt-get update && apt-get install -y --no-install-recommends \
golang \
vim \
emacs \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN pip --no-cache-dir install --upgrade \
pip setuptools
RUN pip --no-cache-dir install wheel
# Download and build TensorFlow.
WORKDIR /
RUN rm -rf tensorflow && \
git clone https://github.com/tensorflow/tensorflow.git && \
cd tensorflow && \
git checkout ${TF_BRANCH}
WORKDIR /tensorflow
# Configure the build for CPU with MKL by accepting default build options and
# setting library locations
ENV CI_BUILD_PYTHON=python \
LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \
PYTHON_BIN_PATH=/usr/bin/python \
PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \
CC_OPT_FLAGS='-march=native' \
TF_NEED_JEMALLOC=0 \
TF_NEED_GCP=1 \
TF_NEED_CUDA=0 \
TF_NEED_HDFS=0 \
TF_NEED_S3=1 \
TF_NEED_OPENCL=0 \
TF_NEED_GDR=0 \
TF_ENABLE_XLA=0 \
TF_NEED_VERBS=0 \
TF_NEED_MPI=0
RUN ./configure
# Build and Install TensorFlow.
# The 'mkl' option builds with Intel(R) Math Kernel Library (MKL), which detects
# the platform it is currently running on and takes appropriately optimized
# paths. The -march=native option is for code that is not in MKL, and assumes
# this container will be run on the same architecture on which it is built.
RUN LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \
bazel build --config=mkl \
--config="opt" \
--copt="-march=broadwell" \
--copt="-O3" \
//tensorflow/tools/pip_package:build_pip_package && \
mkdir ${WHL_DIR} && \
bazel-bin/tensorflow/tools/pip_package/build_pip_package ${WHL_DIR}
# Clean up Bazel cache when done, but leave the whl.
# This will upgrade the default Tensorflow version with the Intel MKL version
RUN pip --no-cache-dir install --upgrade ${WHL_DIR}/tensorflow-*.whl && \
rm -rf /root/.cache
WORKDIR /root
#add welcome message with instructions
RUN echo '[ ! -z "$TERM" -a -r /etc/motd ] && cat /etc/issue && cat /etc/motd' \
>> /etc/bash.bashrc \
; echo "\
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n\
| \n\
| Docker container running Ubuntu \n\
| with TensorFlow ${TF_BRANCH} optimized for CPU \n\
| with Intel(R) MKL \n\
| \n\
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||\n\
\n "\
> /etc/motd

View File

@ -0,0 +1,115 @@
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
LABEL maintainer="Gunhan Gulsoy <gunan@google.com>"
# It is possible to override these for releases.
ARG TF_BRANCH=master
ARG BAZEL_VERSION=0.5.4
ARG TF_AVAILABLE_CPUS=32
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
golang \
libcurl3-dev \
libfreetype6-dev \
libpng12-dev \
libzmq3-dev \
pkg-config \
python-dev \
python-pip \
rsync \
software-properties-common \
unzip \
zip \
zlib1g-dev \
openjdk-8-jdk \
openjdk-8-jre-headless \
wget \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN pip --no-cache-dir install --upgrade \
pip setuptools
RUN pip --no-cache-dir install \
ipykernel \
jupyter \
matplotlib \
numpy \
scipy \
sklearn \
pandas \
wheel \
&& \
python -m ipykernel.kernelspec
# Set up our notebook config.
COPY jupyter_notebook_config.py /root/.jupyter/
# Jupyter has issues with being run directly:
# https://github.com/ipython/ipython/issues/7062
# We just add a little wrapper script.
COPY run_jupyter.sh /
# Set up Bazel.
# Running bazel inside a `docker build` command causes trouble, cf:
# https://github.com/bazelbuild/bazel/issues/134
# The easiest solution is to set up a bazelrc file forcing --batch.
RUN echo "startup --batch" >>/etc/bazel.bazelrc
# Similarly, we need to workaround sandboxing issues:
# https://github.com/bazelbuild/bazel/issues/418
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
>>/etc/bazel.bazelrc
WORKDIR /
RUN mkdir /bazel && \
cd /bazel && \
wget --quiet https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
wget --quiet https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE && \
chmod +x bazel-*.sh && \
./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
# Download and build TensorFlow.
WORKDIR /
RUN git clone https://github.com/tensorflow/tensorflow.git && \
cd tensorflow && \
git checkout ${TF_BRANCH}
WORKDIR /tensorflow
# Configure the build for our CUDA configuration.
ENV CI_BUILD_PYTHON=python \
LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:${LD_LIBRARY_PATH} \
CUDNN_INSTALL_PATH=/usr/lib/x86_64-linux-gnu \
PYTHON_BIN_PATH=/usr/bin/python \
PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \
TF_NEED_CUDA=1 \
TF_CUDA_VERSION=9.0 \
TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0,6.1,7.0 \
TF_CUDNN_VERSION=7
RUN ./configure
# Build and Install TensorFlow.
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \
bazel build -c opt \
--config=cuda \
--cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
--jobs=${TF_AVAILABLE_CPUS} \
tensorflow/tools/pip_package:build_pip_package && \
mkdir /pip_pkg && \
bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg && \
pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \
rm -rf /pip_pkg && \
rm -rf /root/.cache
# Clean up pip wheel and Bazel cache when done.
WORKDIR /root
# TensorBoard
EXPOSE 6006
# IPython
EXPOSE 8888

16
third_party/codegen.BUILD vendored Normal file
View File

@ -0,0 +1,16 @@
# -*- mode: python; -*-
#
# Description:
# Extension to ast that allow ast -> python code generation.
package(default_visibility = ["//visibility:public"])
licenses(["notice"]) # New BSD
exports_files(["LICENSE"])
py_library(
name = "com_github_andreif_codegen",
srcs = glob(["codegen.py"]),
srcs_version = "PY2AND3",
)

201
third_party/mkl/LICENSE vendored Normal file
View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

23
third_party/nanopb.BUILD vendored Normal file
View File

@ -0,0 +1,23 @@
# Description:
# Nanopb, a tiny ANSI C protobuf implementation for use on embedded devices.
licenses(["notice"]) # zlib license
exports_files(["LICENSE.txt"])
cc_library(
name = "nanopb",
srcs = [
"pb_common.c",
"pb_decode.c",
"pb_encode.c",
],
hdrs = [
"pb.h",
"pb_common.h",
"pb_decode.h",
"pb_encode.h",
],
includes = ["."],
visibility = ["//visibility:public"],
)