Running our linter on a lot of files.

Change: 112920860
2016-01-24 23:05:22 -08:00 · 2016-01-24 23:05:22 -08:00 · 9b70316263
commit 9b70316263
parent 87af568b0b
148 changed files with 949 additions and 475 deletions
--- a/4
+++ b/4
@ -21,8 +21,8 @@ new_http_archive(

 new_http_archive(
  name = "eigen_archive",
-  url = "https://bitbucket.org/eigen/eigen/get/fb2fa05.tar.gz",
-  sha256 = "8aacd8065d52528af1a22d6b72925dbb6b9fb8f25e46769481dd06d3edf63bbd",
+  url = "https://bitbucket.org/eigen/eigen/get/c8e5d09.tar.gz",
+  sha256 = "be61d1ce686e950d9a3a61b2aa83562ba4582ce1813e475869146e0a05915857",
  build_file = "eigen.BUILD",
 )

--- a/2
+++ b/2
@ -92,7 +92,7 @@ while true; do
 done

 cat > third_party/gpus/cuda/cuda.config <<EOF
-# CUDA_TOOLKIT_PATH refers to the CUDA toolkit. Tensorflow requries Cuda 7.0
+# CUDA_TOOLKIT_PATH refers to the CUDA toolkit. Tensorflow requires Cuda 7.0
 # at the moment.
 CUDA_TOOLKIT_PATH="$CUDA_TOOLKIT_PATH"

--- a/eigen.BUILD
+++ b/eigen.BUILD
@ -1,6 +1,6 @@
 package(default_visibility = ["//visibility:public"])

-archive_dir = "eigen-eigen-fb2fa0527077"
+archive_dir = "eigen-eigen-c8e5d094f3a9"

 cc_library(
    name = "eigen",
--- a/tensorflow/cc/ops/cc_op_gen.cc
+++ b/tensorflow/cc/ops/cc_op_gen.cc
@ -169,7 +169,7 @@ void WriteCCOp(const OpDef& op_def, WritableFile* h, WritableFile* cc) {
                     R"comment(  .WithName(StringPiece): Set the Node's name
  .WithDevice(StringPiece): Set the Node's requested device
  .WithControlInput(Node*) / .WithControlInputs({Node*, ...}):
-    Add control depencies on the specified Node(s).
+    Add control dependencies on the specified Node(s).

 Returns a pointer to the created Node)comment");

--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@ -94,7 +94,7 @@ string GetRendezvousKey(const string& tensor_name,
 //
 // 2) Recv nodes always complete immediately: The inputs are sent into
 //    the local rendezvous before we start the executor, so the
-//    corresonding recvs will not block.
+//    corresponding recvs will not block.
 //
 // Based on these assumptions, we can use the same thread pool for
 // both "non-blocking" and "blocking" OpKernels on Android.
@ -543,7 +543,7 @@ Status DirectSession::CreateGraphs(
    }
  }

-  for (auto partition : partitions) {
+  for (auto&& partition : partitions) {
    const string& partition_name = partition.first;

    GraphDef* graph_def = &partition.second;
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@ -95,7 +95,7 @@ TEST_F(DirectSessionMinusAXTest, RunSimpleNetwork) {
  ASSERT_OK(s);

  ASSERT_EQ(1, outputs.size());
-  // The first output should be initiailzed and have the correct
+  // The first output should be initialized and have the correct
  // output.
  auto mat = outputs[0].matrix<float>();
  ASSERT_TRUE(outputs[0].IsInitialized());
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@ -383,7 +383,7 @@ Status ExecutorImpl::InferAllocAttr(
  return s;
 }

-// The state associated with one invokation of ExecutorImpl::Run.
+// The state associated with one invocation of ExecutorImpl::Run.
 // ExecutorState dispatches nodes when they become ready and keeps
 // track of how many predecessors of a node have not done (pending_).
 class ExecutorState {
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@ -479,7 +479,7 @@ Status FunctionLibraryRuntimeImpl::InstantiateSymbolicGradient(
  const auto& func = f->func();
  const FunctionDef* fdef = lib_def_->Find(func.name());
  if (fdef == nullptr) {
-    // f is a primitve op.
+    // f is a primitive op.
    gradient::Creator creator;
    TF_RETURN_IF_ERROR(gradient::GetOpGradientCreator(func.name(), &creator));
    if (creator == nullptr) {
@ -1169,7 +1169,7 @@ class SymbolicGradientHelper {

  // 'ready' keeps track of nodes that have been completely
  // backpropped. Initially, for every output y of the function f, we
-  // add dy as an input of the the gradient function.
+  // add dy as an input of the gradient function.
  std::deque<Node*> ready_;

  // Makes a copy of fbody_ in gbody_.
--- a/tensorflow/core/common_runtime/function.h
+++ b/tensorflow/core/common_runtime/function.h
@ -90,7 +90,7 @@ bool RemoveListArrayConverter(Graph* g);
 // multiple times by calling ExpandInlineFunctions a few times.
 bool ExpandInlineFunctions(FunctionLibraryRuntime* lib, Graph* graph);

-// Applies graph rewrite optimzation such as inlining, dead code
+// Applies graph rewrite optimization such as inlining, dead code
 // removal, etc.
 //
 // **g is a graph constructed based on the runtime library 'lib'.
--- a/tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h
@ -32,7 +32,7 @@ class GPUAllocatorRetry {
  // then wait up to 'max_millis_to_wait' milliseconds, retrying each
  // time a call to DeallocateRaw() is detected, until either a good
  // pointer is returned or the deadline is exhausted.  If the
-  // deadline is exahusted, try one more time with 'verbose_failure'
+  // deadline is exhausted, try one more time with 'verbose_failure'
  // set to true.  The value returned is either the first good pointer
  // obtained from 'alloc_func' or nullptr.
  void* AllocateRaw(std::function<void*(size_t alignment, size_t num_bytes,
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@ -441,7 +441,7 @@ void GPUBFCAllocator::DumpMemoryLog(size_t num_bytes) {
    }
  }

-  // Next show the the chunks that are in use, and also summarize their
+  // Next show the chunks that are in use, and also summarize their
  // number by size.
  std::map<size_t, int> in_use_by_size;
  for (auto& it : ptr_to_chunk_map_) {
--- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc
@ -198,7 +198,7 @@ TEST(EventMgr, StreamSwitchingFlushesImmediately) {
  EXPECT_GT(initial_live_bytes, live_tensor_bytes);
 }

-TEST(EventMgr, ManySmallTensorsSeperateCallsFlushed) {
+TEST(EventMgr, ManySmallTensorsSeparateCallsFlushed) {
  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
  EventMgr em(stream_exec, GPUOptions());
  TEST_EventMgrHelper th(&em);
--- a/tensorflow/core/common_runtime/gpu/gpu_region_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_region_allocator.cc
@ -280,7 +280,7 @@ void GPURegionAllocator::CheckForMemoryLeaks() {
 }

 // Since there's no merging of chunks once allocated, we want to
-// maximize their reusablity (which argues for fewer, larger sizes),
+// maximize their reusability (which argues for fewer, larger sizes),
 // while minimizing waste (which argues for tight-fitting sizes).
 //
 // The smallest unit of allocation is 256 bytes.
--- a/tensorflow/core/common_runtime/gpu/gpu_stream_util.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_stream_util.cc
@ -61,7 +61,7 @@ Status AssignStreams(const Graph* graph, const AssignStreamsOpts& opts,
      }
    }
  }
-  // We perform stream assigmnent assuming a large number of
+  // We perform stream assignment assuming a large number of
  // stream IDs and then map these down to the required number of streams
  // using simple round-robin.
  // Stream Assignment strategy:
--- a/tensorflow/core/common_runtime/gpu/gpu_util.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_util.h
@ -34,7 +34,7 @@ class GPUUtil {
  // "tensor" is GPU-local.  "dev" is the hosting GPU.
  // "device_context" should be the context of the GPU "_Send" op
  // which provides the Tensor.
-  // Sets all necessasry fields of "proto" by transferring value
+  // Sets all necessary fields of "proto" by transferring value
  // bytes from GPU to CPU RAM. "is_dead" indicates that the
  // tensor is dead with an uninit value.
  static void SetProtoFromGPU(const Tensor& tensor, Device* dev,
--- a/tensorflow/core/common_runtime/gpu/pool_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/pool_allocator.cc
@ -47,7 +47,7 @@ PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize,
 PoolAllocator::~PoolAllocator() { Clear(); }

 namespace {
-// Pools contain Chunks allocatated from the underlying Allocator.
+// Pools contain Chunks allocated from the underlying Allocator.
 // Chunk alignment is always on kPoolAlignment boundaries.  Each Chunk
 // begins with a descriptor (ChunkPrefix) that gives its size and a
 // pointer to itself.  The pointer returned to the user is just past
@ -56,7 +56,7 @@ namespace {
 // pointer and also re-write the ChunkPrefix.chunk_ptr value
 // immediately before it.  This way the Chunk address and size can be
 // recovered from the returned user pointer, regardless of alignment.
-// Note that this deferencing of the pointers means that we cannot
+// Note that this dereferencing of the pointers means that we cannot
 // handle GPU memory, only CPU memory.
 struct ChunkPrefix {
  size_t num_bytes;
--- a/tensorflow/core/common_runtime/gpu/process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/process_state.cc
@ -47,7 +47,7 @@ const bool FLAGS_brain_gpu_region_allocator_reset_to_nan = false;
 const bool FLAGS_brain_gpu_use_bfc_allocator = true;

 // If true, record attributes of memory allocations and
-// dyanmically check for appropriate use of registered memory.
+// dynamically check for appropriate use of registered memory.
 // Should only be true for debugging or diagnosis of
 // performance issues.
 bool FLAGS_brain_gpu_record_mem_types = false;
--- a/tensorflow/core/common_runtime/gpu/process_state.h
+++ b/tensorflow/core/common_runtime/gpu/process_state.h
@ -67,7 +67,7 @@ class ProcessState {
  MemDesc PtrType(const void* ptr);

  // Returns the one CPUAllocator used for the given numa_node.
-  // TEMPORY: ignores numa_node.
+  // TEMPORARY: ignores numa_node.
  Allocator* GetCPUAllocator(int numa_node);

  // Returns the one GPU allocator used for the indexed GPU.
@ -80,7 +80,7 @@ class ProcessState {
  // used on that first call is used.
  //
  // "Allocator type" describes the type of algorithm to use for the
-  // underlying allocator.  REQURES: Must be a valid type (see
+  // underlying allocator.  REQUIRES: Must be a valid type (see
  // config.proto for the list of supported strings.).
  //
  // REQUIRES: gpu_id must be a valid ordinal for a GPU available in the
@ -98,7 +98,7 @@ class ProcessState {
  // interface to be used for network device memory registration.
  // "bus_id" is platform-specific.  On many platforms it
  // should be 0.  On machines with multiple PCIe buses, it should be
-  // the index of one of the PCIe buses.  If the the bus_id is invalid,
+  // the index of one of the PCIe buses.  If the bus_id is invalid,
  // results are undefined.
  typedef std::function<void(void*, size_t)> AllocVisitor;
  void AddGPUAllocVisitor(int bus_id, AllocVisitor visitor);
--- a/tensorflow/core/common_runtime/kernel_benchmark_testlib.h
+++ b/tensorflow/core/common_runtime/kernel_benchmark_testlib.h
@ -29,7 +29,7 @@ limitations under the License.
 namespace tensorflow {

 class Device;
-class SessionOptions;
+struct SessionOptions;

 namespace test {

--- a/tensorflow/core/common_runtime/local_device.h
+++ b/tensorflow/core/common_runtime/local_device.h
@ -22,7 +22,7 @@ limitations under the License.

 namespace tensorflow {

-class SessionOptions;
+struct SessionOptions;

 // This class is shared by ThreadPoolDevice and GPUDevice and
 // initializes a shared Eigen compute device used by both.  This
--- a/tensorflow/core/common_runtime/session_factory.h
+++ b/tensorflow/core/common_runtime/session_factory.h
@ -25,7 +25,7 @@ limitations under the License.
 namespace tensorflow {

 class Session;
-class SessionOptions;
+struct SessionOptions;

 class SessionFactory {
 public:
--- a/tensorflow/core/common_runtime/simple_placer.cc
+++ b/tensorflow/core/common_runtime/simple_placer.cc
@ -37,7 +37,7 @@ namespace {
 // types in 'supported_device_types' and returns the *first* subset of devices
 // that match.
 //
-// For example, if suported_device_types contains {GPU, CPU} and
+// For example, if supported_device_types contains {GPU, CPU} and
 // 'devices' contains CPU and GPU devices, the returned vector will
 // include *only* GPU devices, since that is higher in the priority
 // order in 'supported_device_types'.
--- a/tensorflow/core/framework/attr_value_util.h
+++ b/tensorflow/core/framework/attr_value_util.h
@ -36,7 +36,7 @@ string SummarizeAttrValue(const AttrValue& attr_value);
 // Generates an error if attr_value doesn't have the indicated attr type.
 Status AttrValueHasType(const AttrValue& attr_value, StringPiece type);

-// Converts a text proto value from "text" into the the field of *out
+// Converts a text proto value from "text" into the field of *out
 // indicated by "type" (e.g. from the type field of an AttrDef).
 // Examples:
 // * If type:"int" and text:"-14", then *out is set to "i: -14"
--- a/tensorflow/core/framework/function.proto
+++ b/tensorflow/core/framework/function.proto
@ -61,7 +61,7 @@ message FunctionDef {
    // 'attr' maps names defined by 'func's attr defs to attr values.
    // attr values may have placeholders which are substituted
    // recursively by concrete values when this node is instantiated.
-    // These placeholdes must name an attr listed in the FunctionDef's
+    // These placeholders must name an attr listed in the FunctionDef's
    // signature.
    map<string, AttrValue> attr = 5;
  }
--- a/tensorflow/core/framework/function_test.cc
+++ b/tensorflow/core/framework/function_test.cc
@ -296,8 +296,8 @@ REGISTER_OP("Cond")
 output = Cond(input) ? then_branch(input) : else_branch(input)

 cond: A function takes 'input' and returns a scalar.
-then_branch: A funcion takes 'input' and returns 'output'.
-else_branch: A funcion takes 'input' and returns 'output'.
+then_branch: A function takes 'input' and returns 'output'.
+else_branch: A function takes 'input' and returns 'output'.
 )doc");

 TEST(TFunc, Body_Array_List_Converter) {
--- a/tensorflow/core/framework/node_def_util.h
+++ b/tensorflow/core/framework/node_def_util.h
@ -146,7 +146,7 @@ Status ValidateNodeDef(const NodeDef& node_def, const OpDef& op_def);

 // Computes the mapping from input/output argument name to the
 // corresponding input/output index range.  For example,
-// input "foo" coresponds to input indices
+// input "foo" corresponds to input indices
 //   [ (*inputs)["foo"].first, (*inputs)["foo"].second ).
 typedef std::unordered_map<string, std::pair<int, int>> NameRangeMap;
 Status NameRangesForNode(const NodeDef& node_def, const OpDef& op_def,
--- a/tensorflow/core/framework/op.h
+++ b/tensorflow/core/framework/op.h
@ -72,7 +72,7 @@ class OpRegistry : public OpRegistryInterface {
  const OpDef* LookUp(const string& op_type_name,
                      Status* status) const override;

-  // Fills *ops with all registered OpDefss (except those with names
+  // Fills *ops with all registered OpDefs (except those with names
  // starting with '_' if include_internal == false).
  void Export(bool include_internal, OpList* ops) const;

--- a/tensorflow/core/framework/op_compatibility_test.cc
+++ b/tensorflow/core/framework/op_compatibility_test.cc
@ -53,7 +53,7 @@ class OpCompatibilityTest : public OpsTestBase {
    ASSERT_OK(InOutTypesForNode(*node_def(), old_op_def, &old_in_types,
                                &old_out_types));

-    // This should be all that is needed to get compatiblity.
+    // This should be all that is needed to get compatibility.
    const OpDef* new_op_def = RegisteredOpDef();
    AddDefaultsToNodeDef(*new_op_def, node_def());

@ -93,7 +93,7 @@ class OpCompatibilityTest : public OpsTestBase {
    ASSERT_OK(InOutTypesForNode(*node_def(), old_op_def, &old_in_types,
                                &old_out_types));

-    // This should be all that is needed to get compatiblity.
+    // This should be all that is needed to get compatibility.
    const OpDef* new_op_def = RegisteredOpDef();
    AddDefaultsToNodeDef(*new_op_def, node_def());

@ -117,7 +117,7 @@ class OpCompatibilityTest : public OpsTestBase {
    ASSERT_OK(InOutTypesForNode(*node_def(), old_op_def, &old_in_types,
                                &old_out_types));

-    // This should be all that is needed to get compatiblity.
+    // This should be all that is needed to get compatibility.
    const OpDef* new_op_def = RegisteredOpDef();
    AddDefaultsToNodeDef(*new_op_def, node_def());

--- a/tensorflow/core/framework/op_def_builder.h
+++ b/tensorflow/core/framework/op_def_builder.h
@ -64,7 +64,7 @@ class OpDefBuilder {
  // Perhaps by linking the type of the tensor to a type attr?
  OpDefBuilder& Attr(StringPiece spec);

-  // Adds an input or ouput to this OpDefBuilder (and returns *this).
+  // Adds an input or output to this OpDefBuilder (and returns *this).
  // The spec has form "<name>:<type-expr>" or "<name>:Ref(<type-expr>)"
  // where <name> matches regexp [a-z][a-z0-9_]* and <type-expr> can be:
  // * For a single tensor: <type>
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@ -578,7 +578,7 @@ class OpKernelContext {
  Status mutable_input(const string& name, Tensor* tensor, bool lock_held);

  // Returns the named list-valued mutable input in "list", as defined
-  // in the OpDef.  If the named intput is not list-valued, returns a
+  // in the OpDef.  If the named input is not list-valued, returns a
  // one-element list. Must be used to access Ref inputs. The values
  // stored in the Tensor buffer may be modified, and modifications
  // will be visible to other Ops reading the same ref tensor.
--- a/tensorflow/core/framework/reader_interface.h
+++ b/tensorflow/core/framework/reader_interface.h
@ -50,7 +50,7 @@ class ReaderInterface : public ResourceBase {
 public:
  // Read a single record into *key / *value.  May get more work from
  // *queue if the current work is complete.  Sets the status on
-  // *context with an OutOfRange Status if the the current work is
+  // *context with an OutOfRange Status if the current work is
  // complete and the queue is done (closed and empty).
  // This method may block.
  virtual void Read(QueueInterface* queue, string* key, string* value,
--- a/tensorflow/core/framework/rendezvous.h
+++ b/tensorflow/core/framework/rendezvous.h
@ -44,7 +44,7 @@ class Rendezvous : public core::RefCounted {
    AllocatorAttributes alloc_attrs;
  };

-  // Constructs a rendezvouz key for the tensor of "name" sent from
+  // Constructs a rendezvous key for the tensor of "name" sent from
  // "src_device" to "dst_device". The tensor is generated in the frame
  // and iteration specified by "frame_iter".
  static string CreateKey(const string& src_device, uint64 src_incarnation,
--- a/tensorflow/core/framework/tensor.proto
+++ b/tensorflow/core/framework/tensor.proto
@ -24,8 +24,8 @@ message TensorProto {
  // to represent a constant Tensor with a single value.
  int32 version_number = 3;

-  // Serialized content from TensorBase::Serialize() This representation can be
-  // used for all tensor types.
+  // Serialized content from Tensor::AsProtoTensorContent(). This representation
+  // can be used for all tensor types.
  bytes tensor_content = 4;

  // Type specific representations that make it easy to create tensor protos in
--- a/tensorflow/core/framework/tensor_slice.h
+++ b/tensorflow/core/framework/tensor_slice.h
@ -38,7 +38,7 @@ class TensorSlice {
  // -- from just a dimension (in this case it will create a full slice)
  // -- from an array of pairs of integers.
  // -- from a TensorSliceProto protocol buffer
-  // -- from a string format of "start,lenth:start,length..." where each
+  // -- from a string format of "start,length:start,length..." where each
  //    "start,length" pair represents the slice on one dimension. We allow a
  //    special "-" that means "everything for this dimension". One such example
  //    is:  0,10:-:14,1:-:-
--- a/tensorflow/core/framework/tensor_testutil.h
+++ b/tensorflow/core/framework/tensor_testutil.h
@ -84,7 +84,7 @@ template <typename T>
 void ExpectTensorEqual(const Tensor& x, const Tensor& y);

 // Expects "x" and "y" are tensors of the same type, same shape, and
-// approxmiate equal values, each within "abs_err".
+// approximate equal values, each within "abs_err".
 template <typename T>
 void ExpectTensorNear(const Tensor& x, const Tensor& y, const T& abs_err);

--- a/tensorflow/core/kernels/adjust_contrast_op.h
+++ b/tensorflow/core/kernels/adjust_contrast_op.h
@ -38,14 +38,11 @@ struct AdjustContrast {
    Eigen::array<int, 4> scalar_broadcast{{batch, height, width, channels}};
 #if !defined(EIGEN_HAS_INDEX_LIST)
    Eigen::array<int, 2> reduction_axis{{1, 2}};
-    Eigen::array<int, 4> scalar{{1, 1, 1, 1}};
    Eigen::array<int, 4> broadcast_dims{{1, height, width, 1}};
    Eigen::Tensor<int, 4>::Dimensions reshape_dims{{batch, 1, 1, channels}};
 #else
    Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >
        reduction_axis;
-    Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<1>,
-                     Eigen::type2index<1>, Eigen::type2index<1> > scalar;
    Eigen::IndexList<Eigen::type2index<1>, int, int, Eigen::type2index<1> >
        broadcast_dims;
    broadcast_dims.set(1, height);
@ -55,6 +52,7 @@ struct AdjustContrast {
    reshape_dims.set(0, batch);
    reshape_dims.set(3, channels);
 #endif
+    Eigen::Sizes<1, 1, 1, 1> scalar;
    float num_reduced_coeffs = height * width;
    mean_values.device(d) =
        (input.template cast<float>().sum(reduction_axis).eval() /
@ -88,16 +86,12 @@ struct AdjustContrastv2 {
    Eigen::array<int, 4> scalar_broadcast{{batch, height, width, channels}};
 #if !defined(EIGEN_HAS_INDEX_LIST)
    Eigen::array<int, 2> reduction_axis{{0, 1}};
-    Eigen::array<int, 4> scalar{{1, 1, 1, 1}};
    Eigen::array<int, 4> broadcast_dims{{1, height, width, 1}};
    Eigen::Tensor<int, 4>::Dimensions reshape_dims{{batch, 1, 1, channels}};
    Eigen::array<int, 4> reduced_dims_first{{1, 2, 0, 3}};
 #else
    Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1> >
        reduction_axis;
-    Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<1>,
-                     Eigen::type2index<1>, Eigen::type2index<1> >
-        scalar;
    Eigen::IndexList<Eigen::type2index<1>, int, int, Eigen::type2index<1> >
        broadcast_dims;
    broadcast_dims.set(1, height);
@ -110,6 +104,7 @@ struct AdjustContrastv2 {
                     Eigen::type2index<0>, Eigen::type2index<3> >
        reduced_dims_first;
 #endif
+    Eigen::Sizes<1, 1, 1, 1> scalar;
    float num_reduced_coeffs = height * width;
    output.device(d) =
        (input.shuffle(reduced_dims_first).sum(reduction_axis).eval() /
--- a/tensorflow/core/kernels/check_numerics_op.cc
+++ b/tensorflow/core/kernels/check_numerics_op.cc
@ -17,6 +17,7 @@ limitations under the License.

 #include <math.h>
 #include <algorithm>
+#include <numeric>

 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/types.h"
--- a/tensorflow/core/kernels/serialize_sparse_op.cc
+++ b/tensorflow/core/kernels/serialize_sparse_op.cc
@ -16,6 +16,7 @@ limitations under the License.
 #define EIGEN_USE_THREADS

 #include <algorithm>
+#include <numeric>
 #include <unordered_map>
 #include <utility>
 #include <vector>
--- a/tensorflow/core/kernels/sparse_concat_op.cc
+++ b/tensorflow/core/kernels/sparse_concat_op.cc
@ -16,6 +16,7 @@ limitations under the License.
 #define EIGEN_USE_THREADS

 #include <algorithm>
+#include <numeric>
 #include <unordered_map>
 #include <utility>
 #include <vector>
--- a/tensorflow/core/kernels/sparse_reorder_op.cc
+++ b/tensorflow/core/kernels/sparse_reorder_op.cc
@ -16,6 +16,7 @@ limitations under the License.
 #define EIGEN_USE_THREADS

 #include <algorithm>
+#include <numeric>
 #include <unordered_map>
 #include <utility>

--- a/tensorflow/core/kernels/sparse_to_dense_op.cc
+++ b/tensorflow/core/kernels/sparse_to_dense_op.cc
@ -20,6 +20,7 @@ limitations under the License.

 #define EIGEN_USE_THREADS

+#include <numeric>
 #include <sstream>
 #include <string>
 #include <unordered_map>
--- a/tensorflow/core/ops/functional_ops.cc
+++ b/tensorflow/core/ops/functional_ops.cc
@ -44,7 +44,7 @@ then, g is
                                     dL/dy1, dL/dy2, ..., dL/dy_M),

 where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the
-loss function). dL/dx_i is the the partial derivative of L with respect
+loss function). dL/dx_i is the partial derivative of L with respect
 to x_i.

 (Needs some math expert to say the comment above better.)
--- a/tensorflow/core/ops/io_ops.cc
+++ b/tensorflow/core/ops/io_ops.cc
@ -146,7 +146,7 @@ REGISTER_OP("ShardedFilename")
    .Input("num_shards: int32")
    .Output("filename: string")
    .Doc(R"doc(
-Generate a sharded filename. The filename is printf formated as
+Generate a sharded filename. The filename is printf formatted as
   %s-%05d-of-%05d, basename, shard, num_shards.
 )doc");

--- a/tensorflow/core/ops/math_grad.cc
+++ b/tensorflow/core/ops/math_grad.cc
@ -485,7 +485,7 @@ Status MinMaxGradHelper(const string& op, const AttrSlice& attrs,
      // Attr defs
      {{"T: {float, double}"}},
      {
-        // keep_dims because we need to do x == y, which requries x
+        // keep_dims because we need to do x == y, which requires x
        // and y are broadcastable.
        {{"y"}, op, {"x", "i"}, {{"T", "$T"}, {"keep_dims", true}}},
        {{"mask"}, "Equal", {"x", "y"}, {{"T", "$T"}}},
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@ -168,7 +168,7 @@ performs the following:

 1. Flattens the filter to a 2-D matrix with shape
   `[filter_height * filter_width * in_channels, output_channels]`.
-2. Extracts image patches from the the input tensor to form a *virtual*
+2. Extracts image patches from the input tensor to form a *virtual*
   tensor of shape `[batch, out_height, out_width,
   filter_height * filter_width * in_channels]`.
 3. For each patch, right-multiplies the filter matrix and the image patch
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@ -1712,7 +1712,7 @@ op {
    }
  }
  summary: "Computes a 2-D convolution given 4-D `input` and `filter` tensors."
-  description: "Given an input tensor of shape `[batch, in_height, in_width, in_channels]`\nand a filter / kernel tensor of shape\n`[filter_height, filter_width, in_channels, out_channels]`, this op\nperforms the following:\n\n1. Flattens the filter to a 2-D matrix with shape\n   `[filter_height * filter_width * in_channels, output_channels]`.\n2. Extracts image patches from the the input tensor to form a *virtual*\n   tensor of shape `[batch, out_height, out_width,\n   filter_height * filter_width * in_channels]`.\n3. For each patch, right-multiplies the filter matrix and the image patch\n   vector.\n\nIn detail,\n\n    output[b, i, j, k] =\n        sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *\n                        filter[di, dj, q, k]\n\nMust have `strides[0] = strides[3] = 1`.  For the most common case of the same\nhorizontal and vertices strides, `strides = [1, stride, stride, 1]`."
+  description: "Given an input tensor of shape `[batch, in_height, in_width, in_channels]`\nand a filter / kernel tensor of shape\n`[filter_height, filter_width, in_channels, out_channels]`, this op\nperforms the following:\n\n1. Flattens the filter to a 2-D matrix with shape\n   `[filter_height * filter_width * in_channels, output_channels]`.\n2. Extracts image patches from the input tensor to form a *virtual*\n   tensor of shape `[batch, out_height, out_width,\n   filter_height * filter_width * in_channels]`.\n3. For each patch, right-multiplies the filter matrix and the image patch\n   vector.\n\nIn detail,\n\n    output[b, i, j, k] =\n        sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *\n                        filter[di, dj, q, k]\n\nMust have `strides[0] = strides[3] = 1`.  For the most common case of the same\nhorizontal and vertices strides, `strides = [1, stride, stride, 1]`."
 }
 op {
  name: "Conv2DBackpropFilter"
@ -4782,7 +4782,7 @@ op {
  attr {
    name: "num_negative_samples"
    type: "int"
-    description: "Number of negative samples per exaple."
+    description: "Number of negative samples per example."
  }
  summary: "Training via negative sampling."
 }
@ -5029,7 +5029,7 @@ op {
  attr {
    name: "dense_shapes"
    type: "list(shape)"
-    description: "A list of Ndense shapes; the shapes of data in each Feature\ngiven in dense_keys.\nThe number of elements in the Feature corresponding to dense_key[j]\nmust always equal dense_shapes[j].NumEntries().\nIf dense_shapes[j] == (D0, D1, ..., DN) then the the shape of output\nTensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):\nThe dense outputs are just the inputs row-stacked by batch."
+    description: "A list of Ndense shapes; the shapes of data in each Feature\ngiven in dense_keys.\nThe number of elements in the Feature corresponding to dense_key[j]\nmust always equal dense_shapes[j].NumEntries().\nIf dense_shapes[j] == (D0, D1, ..., DN) then the shape of output\nTensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):\nThe dense outputs are just the inputs row-stacked by batch."
    has_minimum: true
  }
  summary: "Transforms a vector of brain.Example protos (as strings) into typed tensors."
@ -7480,7 +7480,7 @@ op {
    name: "filename"
    type: DT_STRING
  }
-  summary: "Generate a sharded filename. The filename is printf formated as"
+  summary: "Generate a sharded filename. The filename is printf formatted as"
  description: "   %s-%05d-of-%05d, basename, shard, num_shards."
 }
 op {
@ -8852,7 +8852,7 @@ op {
  attr {
    name: "f"
    type: "func"
-    description: "The function we want to compute the gradient for.\n\nThe function \'f\' must be a numerical function which takes N inputs and\nproduces M outputs. Its gradient function \'g\', which is computed by\nthis SymbolicGradient op is a function taking N + M inputs and\nproduces N outputs.\n\nI.e. if we have\n   (y1, y2, ..., y_M) = f(x1, x2, ..., x_N),\nthen, g is\n   (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N,\n                                     dL/dy1, dL/dy2, ..., dL/dy_M),\n\nwhere L is a scalar-value function of (x1, x2, ..., xN) (e.g., the\nloss function). dL/dx_i is the the partial derivative of L with respect\nto x_i.\n\n(Needs some math expert to say the comment above better.)"
+    description: "The function we want to compute the gradient for.\n\nThe function \'f\' must be a numerical function which takes N inputs and\nproduces M outputs. Its gradient function \'g\', which is computed by\nthis SymbolicGradient op is a function taking N + M inputs and\nproduces N outputs.\n\nI.e. if we have\n   (y1, y2, ..., y_M) = f(x1, x2, ..., x_N),\nthen, g is\n   (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N,\n                                     dL/dy1, dL/dy2, ..., dL/dy_M),\n\nwhere L is a scalar-value function of (x1, x2, ..., xN) (e.g., the\nloss function). dL/dx_i is the partial derivative of L with respect\nto x_i.\n\n(Needs some math expert to say the comment above better.)"
  }
  summary: "Computes the gradient function for function f via backpropagation."
 }
--- a/tensorflow/core/ops/parsing_ops.cc
+++ b/tensorflow/core/ops/parsing_ops.cc
@ -71,7 +71,7 @@ dense_shapes: A list of Ndense shapes; the shapes of data in each Feature
  given in dense_keys.
  The number of elements in the Feature corresponding to dense_key[j]
  must always equal dense_shapes[j].NumEntries().
-  If dense_shapes[j] == (D0, D1, ..., DN) then the the shape of output
+  If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output
  Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
  The dense outputs are just the inputs row-stacked by batch.
 sparse_keys: A list of Nsparse string Tensors (scalars).
--- a/tensorflow/core/platform/tracing.h
+++ b/tensorflow/core/platform/tracing.h
@ -37,7 +37,7 @@ class Tracing {
 public:
  // This enumeration contains the identifiers of all TensorFlow
  // threadscape events and code regions.  Threadscape assigns its
-  // own identiers at runtime when we register our events and we
+  // own identifiers at runtime when we register our events and we
  // cannot know in advance what IDs it will choose.  The "RecordEvent"
  // method and "ScopedActivity" use these event IDs for consistency
  // and remap them to threadscape IDs at runtime.  This enum is limited
--- a/tensorflow/core/public/tensor_c_api.h
+++ b/tensorflow/core/public/tensor_c_api.h
@ -30,7 +30,7 @@ limitations under the License.
 // * Objects are always passed around as pointers to opaque structs
 //   and these structs are allocated/deallocated via the API.
 // * TF_Status holds error information.  It is an object type
-//   and threfore is passed around as a pointer to an opaque
+//   and therefore is passed around as a pointer to an opaque
 //   struct as mentioned above.
 // * Every call that has a TF_Status* argument clears it on success
 //   and fills it with error info on failure.
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h
@ -212,7 +212,7 @@ class SparseTensor {
    }
  }

-  // Helper for Split() that retuerns the shape given a slice index.
+  // Helper for Split() that returns the shape given a slice index.
  static inline int GetSliceShape(const int slice_index, const int split_size,
                                  const int residual) {
    CHECK_GT(split_size, 0);
--- a/tensorflow/core/util/tensor_slice_set.cc
+++ b/tensorflow/core/util/tensor_slice_set.cc
@ -62,8 +62,8 @@ bool TensorSliceSet::Query(const TensorSlice& slice, float* data) const {
    }
    return true;
  } else {
-    // We didn't find any exact match but there is still a posibility that
-    // mutliple existing slices can be patched together to output the slice.
+    // We didn't find any exact match but there is still a possibility that
+    // multiple existing slices can be patched together to output the slice.
    // We figure this out by computing the intersection of each of the existing
    // slices with the query slice, and check if the union of all these
    // intersections cover the entire slice. We rely on the fact that the
@ -119,7 +119,7 @@ bool TensorSliceSet::QueryMeta(
    results->emplace_back(std::make_pair(info->slice, info->tag));
    return true;
  } else {
-    // We didn't find any exact match but there is still a posibility that
+    // We didn't find any exact match but there is still a possibility that
    // multiple existing slices can be patched together to output the slice.
    // We figure this out by computing the intersection of each of the existing
    // slices with the query slice, and check if the union of all these
--- a/tensorflow/core/util/tensor_slice_set.h
+++ b/tensorflow/core/util/tensor_slice_set.h
@ -57,7 +57,7 @@ class TensorSliceSet {

  // Query about a new slice: checks if we have data for "slice" and if we have
  // the data and "data" is not nullptr, fill "data" with the slice data. The
-  // caller needs to make sure "data" point to a large eough buffer.
+  // caller needs to make sure "data" point to a large enough buffer.
  // TODO(yangke): avoid unnecessary copying by using a core::RefCounted
  // pointer.
  bool Query(const TensorSlice& slice, float* data) const;
--- a/tensorflow/core/util/use_cudnn.h
+++ b/tensorflow/core/util/use_cudnn.h
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-// The utility to check whether we have Cudnn depenedency.
+// The utility to check whether we have Cudnn dependency.

 #ifndef TENSORFLOW_UTIL_USE_CUDNN_H_
 #define TENSORFLOW_UTIL_USE_CUDNN_H_
--- a/tensorflow/examples/tutorials/mnist/fully_connected_feed.py
+++ b/tensorflow/examples/tutorials/mnist/fully_connected_feed.py
@ -46,7 +46,7 @@ flags.DEFINE_boolean('fake_data', False, 'If true, uses fake data '


 def placeholder_inputs(batch_size):
-  """Generate placeholder variables to represent the the input tensors.
+  """Generate placeholder variables to represent the input tensors.

  These placeholders are used as inputs by the rest of the model building
  code and will be fed from the downloaded data in the .run() loop, below.
--- a/tensorflow/examples/tutorials/mnist/mnist.py
+++ b/tensorflow/examples/tutorials/mnist/mnist.py
@ -94,7 +94,7 @@ def loss(logits, labels):
  Returns:
    loss: Loss tensor of type float.
  """
-  # Convert from sparse integer labels in the range [0, NUM_CLASSSES)
+  # Convert from sparse integer labels in the range [0, NUM_CLASSES)
  # to 1-hot dense float vectors (that is we will have batch_size vectors,
  # each with NUM_CLASSES values, all of which are 0.0 except there will
  # be a 1.0 in the entry corresponding to the label).
--- a/tensorflow/examples/tutorials/mnist/mnist_softmax.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================

-"""A very simple MNIST classifer.
+"""A very simple MNIST classifier.

 See extensive documentation at
 http://tensorflow.org/tutorials/mnist/beginners/index.md
--- a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
@ -13,12 +13,12 @@
 # limitations under the License.
 # ==============================================================================

-"""A very simple MNIST classifer, modified to display data in TensorBoard.
+"""A very simple MNIST classifier, modified to display data in TensorBoard.

 See extensive documentation for the original model at
 http://tensorflow.org/tutorials/mnist/beginners/index.md

-See documentaion on the TensorBoard specific pieces at
+See documentation on the TensorBoard specific pieces at
 http://tensorflow.org/how_tos/summaries_and_tensorboard/index.md

 If you modify this file, please update the exerpt in
@ -53,7 +53,7 @@ def main(_):
  W = tf.Variable(tf.zeros([784, 10]), name='weights')
  b = tf.Variable(tf.zeros([10], name='bias'))

-  # use a name scope to organize nodes in the graph visualizer
+  # Use a name scope to organize nodes in the graph visualizer
  with tf.name_scope('Wx_b'):
    y = tf.nn.softmax(tf.matmul(x, W) + b)

@ -85,7 +85,7 @@ def main(_):
  # Train the model, and feed in test data and record summaries every 10 steps

  for i in range(FLAGS.max_steps):
-    if i % 10 == 0:  # Record summary data, and the accuracy
+    if i % 10 == 0:  # Record summary data and the accuracy
      if FLAGS.fake_data:
        batch_xs, batch_ys = mnist.train.next_batch(
            100, fake_data=FLAGS.fake_data)
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@ -73,7 +73,7 @@ def build_dataset(words):
      index = dictionary[word]
    else:
      index = 0  # dictionary['UNK']
-      unk_count = unk_count + 1
+      unk_count += 1
    data.append(index)
  count[0][1] = unk_count
  reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
@ -87,7 +87,7 @@ print('Sample data', data[:10])
 data_index = 0


-# Step 4: Function to generate a training batch for the skip-gram model.
+# Step 3: Function to generate a training batch for the skip-gram model.
 def generate_batch(batch_size, num_skips, skip_window):
  global data_index
  assert batch_size % num_skips == 0
@ -117,7 +117,7 @@ for i in range(8):
  print(batch[i], '->', labels[i, 0])
  print(reverse_dictionary[batch[i]], '->', reverse_dictionary[labels[i, 0]])

-# Step 5: Build and train a skip-gram model.
+# Step 4: Build and train a skip-gram model.

 batch_size = 128
 embedding_size = 128  # Dimension of the embedding vector.
@ -172,7 +172,7 @@ with graph.as_default():
  similarity = tf.matmul(
      valid_embeddings, normalized_embeddings, transpose_b=True)

-# Step 6: Begin training
+# Step 5: Begin training.
 num_steps = 100001

 with tf.Session(graph=graph) as session:
@ -193,12 +193,12 @@ with tf.Session(graph=graph) as session:

    if step % 2000 == 0:
      if step > 0:
-        average_loss = average_loss / 2000
+        average_loss /= 2000
      # The average loss is an estimate of the loss over the last 2000 batches.
      print("Average loss at step ", step, ": ", average_loss)
      average_loss = 0

-    # note that this is expensive (~20% slowdown if computed every 500 steps)
+    # Note that this is expensive (~20% slowdown if computed every 500 steps)
    if step % 10000 == 0:
      sim = similarity.eval()
      for i in xrange(valid_size):
@ -212,7 +212,7 @@ with tf.Session(graph=graph) as session:
        print(log_str)
  final_embeddings = normalized_embeddings.eval()

-# Step 7: Visualize the embeddings.
+# Step 6: Visualize the embeddings.

 def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
  assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
--- a/tensorflow/examples/udacity/1_notmnist.ipynb
+++ b/tensorflow/examples/udacity/1_notmnist.ipynb
@ -123,7 +123,7 @@
            "outputId": "ef6c790c-2513-4b09-962e-27c79390c762"
          },
          "cell_type": "code",
-          "input": "num_classes = 10\n\ndef extract(filename):\n  tar = tarfile.open(filename)\n  tar.extractall()\n  tar.close()\n  root = os.path.splitext(os.path.splitext(filename)[0])[0]  # remove .tar.gz\n  data_folders = [os.path.join(root, d) for d in sorted(os.listdir(root))]\n  if len(data_folders) != num_classes:\n    raise Exception(\n      'Expected %d folders, one per class. Found %d instead.' % (\n        num_folders, len(data_folders)))\n  print data_folders\n  return data_folders\n  \ntrain_folders = extract(train_filename)\ntest_folders = extract(test_filename)",
+          "input": "num_classes = 10\n\ndef extract(filename):\n  tar = tarfile.open(filename)\n  tar.extractall()\n  tar.close()\n  root = os.path.splitext(os.path.splitext(filename)[0])[0]  # remove .tar.gz\n  data_folders = [os.path.join(root, d) for d in sorted(os.listdir(root))]\n  if len(data_folders) != num_classes:\n    raise Exception(\n      'Expected %d folders, one per class. Found %d instead.' % (\n        num_classes, len(data_folders)))\n  print data_folders\n  return data_folders\n  \ntrain_folders = extract(train_filename)\ntest_folders = extract(test_filename)",
          "language": "python",
          "outputs": [
            {
@ -393,4 +393,4 @@
  },
  "nbformat": 3,
  "nbformat_minor": 0
-}
+}
--- a/tensorflow/examples/udacity/4_convolutions.ipynb
+++ b/tensorflow/examples/udacity/4_convolutions.ipynb
@ -218,7 +218,7 @@
            "colab_type": "text"
          },
          "cell_type": "markdown",
-          "source": "---\nProblem 1\n---------\n\nThe convolutional model above uses convolutions with stride 2 to reduce the dimensionality. Replace the strides a max pooling operation (`nn.max_pool()`) of stride 2 and kernel size 2.\n\n---"
+          "source": "---\nProblem 1\n---------\n\nThe convolutional model above uses convolutions with stride 2 to reduce the dimensionality. Replace the strides by a max pooling operation (`nn.max_pool()`) of stride 2 and kernel size 2.\n\n---"
        },
        {
          "metadata": {
@ -239,4 +239,4 @@
  },
  "nbformat": 3,
  "nbformat_minor": 0
-}
+}
--- a/tensorflow/g3doc/api_docs/index.md
+++ b/tensorflow/g3doc/api_docs/index.md
@ -6,7 +6,7 @@ and the easiest to use, but the C++ API may offer some performance advantages
 in graph execution, and supports deployment to small devices such as Android.

 Over time, we hope that the TensorFlow community will develop front ends for
-languages like Go, Java, JavaScript, Lua R, and perhaps others. With
+languages like Go, Java, JavaScript, Lua, R, and perhaps others. With
 [SWIG](http://swig.org), it's relatively easy to develop a TensorFlow interface
 for your favorite language.

--- a/tensorflow/g3doc/api_docs/python/framework.md
+++ b/tensorflow/g3doc/api_docs/python/framework.md
@ -917,7 +917,7 @@ c = tf.constant([[1.0, 2.0], [3.0, 4.0]])
 d = tf.constant([[1.0, 1.0], [0.0, 1.0]])
 e = tf.matmul(c, d)

-# Construct a `Session` to execut the graph.
+# Construct a `Session` to execute the graph.
 sess = tf.Session()

 # Execute the graph and store the value that `e` represents in `result`.
--- a/tensorflow/g3doc/api_docs/python/nn.md
+++ b/tensorflow/g3doc/api_docs/python/nn.md
@ -243,7 +243,7 @@ as `'SAME'` or `'VALID'`, the output size and the padding pixels are computed.
 For the `'SAME'` padding, the output height and width are computed as:

    out_height = ceil(float(in_height) / float(strides[1]))
-    out_width  = ceil(float(in_width) / float(stides[2]))
+    out_width  = ceil(float(in_width) / float(strides[2]))

 and the padding on the top and left are computed as:

@ -265,7 +265,7 @@ same number of pixels on both sides.
 For the `'VALID`' padding, the output height and width are computed as:

    out_height = ceil(float(in_height - filter_height + 1) / float(strides[1]))
-    out_width  = ceil(float(in_width - filter_width + 1) / float(stides[2]))
+    out_width  = ceil(float(in_width - filter_width + 1) / float(strides[2]))

 and the padding values are always zero. The output is then computed as

--- a/tensorflow/g3doc/get_started/os_setup.md
+++ b/tensorflow/g3doc/get_started/os_setup.md
@ -269,7 +269,7 @@ The exact location of the Python library depends on your system, but is usually
 You can find out the directory with the following command:

 ```bash
-$ python -c 'import site; print("\n".join(site.getsitepackages()))'
+$ python -c 'import os; import inspect; import tensorflow; print(os.path.dirname(inspect.getfile(tensorflow)))'
 ```

 The simple demo model for classifying handwritten digits from the MNIST dataset
@ -310,7 +310,7 @@ depends on.
 #### Install Bazel

 Follow instructions [here](http://bazel.io/docs/install.html) to install the
-dependencies for Bazel. Then download the latest stable bazel version using the
+dependencies for bazel. Then download the latest stable bazel version using the
 [installer for your system](https://github.com/bazelbuild/bazel/releases) and
 run the installer as mentioned there:

@ -374,6 +374,7 @@ toolkit is installed in `/usr/local/cuda`:
 tar xvzf cudnn-6.5-linux-x64-v2.tgz
 sudo cp cudnn-6.5-linux-x64-v2/cudnn.h /usr/local/cuda/include
 sudo cp cudnn-6.5-linux-x64-v2/libcudnn* /usr/local/cuda/lib64
+sudo chmod a+r /usr/local/cuda/lib64/libcudnn*
 ```

 ##### Configure TensorFlow's canonical view of Cuda libraries
@ -450,7 +451,7 @@ Configuration finished
 ##### Known issues

 * Although it is possible to build both Cuda and non-Cuda configs under the same
-source tree, we recommend to run "bazel clean" when switching between these two
+source tree, we recommend to run `bazel clean` when switching between these two
 configs in the same source tree.

 * You have to run configure before running bazel build. Otherwise, the build
@ -469,7 +470,7 @@ case, be sure to install its dependency [PCRE](from www.pcre.org) and not PCRE2.
 #### Dependencies

 Follow instructions [here](http://bazel.io/docs/install.html) to install the
-dependencies for Bazel. You can then use homebrew to install bazel and SWIG:
+dependencies for bazel. You can then use homebrew to install bazel and SWIG:

 ```bash
 $ brew install bazel swig
@ -506,6 +507,8 @@ Do you wish to build TensorFlow with GPU support? [y/N]

 ### Create the pip package and install

+When building from source, you will still build a pip package and install that.
+
 ```bash
 $ bazel build -c opt //tensorflow/tools/pip_package:build_pip_package

@ -518,6 +521,29 @@ $ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
 $ pip install /tmp/tensorflow_pkg/tensorflow-0.6.0-cp27-none-linux_x86_64.whl
 ```

+## Setting up TensorFlow for Development
+
+If you're working on TensorFlow itself, it is useful to be able to test your
+changes in an interactive python shell without having to reinstall TensorFlow.
+
+To set up TensorFlow such that all files are linked (instead of copied) from the
+system directories, run the following commands inside the TensorFlow root
+directory:
+
+```bash
+bazel build -c opt //tensorflow/tools/pip_package:build_pip_package
+mkdir _python_build
+cd _python_build
+ln -s ../bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/* .
+ln -s ../tensorflow/tools/pip_package/* .
+python setup.py develop
+```
+
+Note that this setup still requires you to rebuild the
+`//tensorflow/tools/pip_package:build_pip_package` target every time you change
+a C++ file; add, delete, or move any python file; or if you change bazel build
+rules.
+
 ## Train your first TensorFlow neural net model

 Starting from the root of your source tree, run:
@ -546,29 +572,6 @@ Validation error: 7.0%
 ...
 ```

-## Setting up TensorFlow for Development
-
-If you're working on TensorFlow itself, it is useful to be able to test your
-changes in an interactive python shell without having to reinstall TensorFlow.
-
-To set up TensorFlow such that all files are linked (instead of copied) from the
-system directories, run the following commands inside the TensorFlow root
-directory:
-
-```bash
-bazel build -c opt //tensorflow/tools/pip_package:build_pip_package
-mkdir _python_build
-cd _python_build
-ln -s ../bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/* .
-ln -s ../tensorflow/tools/pip_package/* .
-python setup.py develop
-```
-
-Note that this setup still requires you to rebuild the
-`//tensorflow/tools/pip_package:build_pip_package` target every time you change
-a C++ file; add, delete, or move any python file; or if you change bazel build
-rules.
-
 ## Common Problems

 ### GPU-related issues
@ -579,7 +582,7 @@ If you encounter the following when trying to run a TensorFlow program:
 ImportError: libcudart.so.7.0: cannot open shared object file: No such file or directory
 ```

-Make sure you followed the the GPU installation [instructions](#optional-install-cuda-gpus-on-linux).
+Make sure you followed the GPU installation [instructions](#optional-install-cuda-gpus-on-linux).

 ### Pip installation issues

--- a/tensorflow/g3doc/how_tos/reading_data/index.md
+++ b/tensorflow/g3doc/how_tos/reading_data/index.md
@ -99,7 +99,7 @@ key, value = reader.read(filename_queue)
 record_defaults = [[1], [1], [1], [1], [1]]
 col1, col2, col3, col4, col5 = tf.decode_csv(
    value, record_defaults=record_defaults)
-features = tf.concat(0, [col1, col2, col3, col4])
+features = tf.pack([col1, col2, col3, col4])

 with tf.Session() as sess:
  # Start populating the filename queue.
--- a/tensorflow/g3doc/how_tos/threading_and_queues/index.md
+++ b/tensorflow/g3doc/how_tos/threading_and_queues/index.md
@ -85,7 +85,7 @@ def MyLoop(coord):
 coord = Coordinator()

 # Create 10 threads that run 'MyLoop()'
-threads = [threading.Thread(target=MyLoop, args=(coord)) for i in xrange(10)]
+threads = [threading.Thread(target=MyLoop, args=(coord,)) for i in xrange(10)]

 # Start the threads and wait for all of them to stop.
 for t in threads: t.start()
--- a/tensorflow/g3doc/resources/dims_types.md
+++ b/tensorflow/g3doc/resources/dims_types.md
@ -40,7 +40,7 @@ Rank | Shape | Dimension number | Example
 1 | [D0] | 1-D | A 1-D tensor with shape [5].
 2 | [D0, D1] | 2-D | A 2-D tensor with shape [3, 4].
 3 | [D0, D1, D2] | 3-D | A 3-D tensor with shape [1, 4, 3].
-n | [D0, D1, ... Dn] | n-D | A tensor with shape [D0, D1, ... Dn].
+n | [D0, D1, ... Dn-1] | n-D | A tensor with shape [D0, D1, ... Dn-1].

 Shapes can be represented via Python lists / tuples of ints, or with the
 [`TensorShape` class](../api_docs/python/framework.md#TensorShape).
@ -64,4 +64,4 @@ Data type | Python type | Description
 `DT_COMPLEX64` | `tf.complex64` | Complex number made of two 32 bits floating points: real and imaginary parts.
 `DT_QINT8` | `tf.qint8` | 8 bits signed integer used in quantized Ops.
 `DT_QINT32` | `tf.qint32` | 32 bits signed integer used in quantized Ops.
-`DT_QUINT8` | `tf.quint8` | 8 bits unsigned integer used in quantized Ops.
+`DT_QUINT8` | `tf.quint8` | 8 bits unsigned integer used in quantized Ops.
--- a/tensorflow/g3doc/tutorials/mnist/tf/index.md
+++ b/tensorflow/g3doc/tutorials/mnist/tf/index.md
@ -390,7 +390,7 @@ summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
 ```

 Lastly, the events file will be updated with new summary values every time the
-`summary_op` is run and the ouput passed to the writer's `add_summary()`
+`summary_op` is run and the output passed to the writer's `add_summary()`
 function.

 ```python
--- a/tensorflow/g3doc/tutorials/recurrent/index.md
+++ b/tensorflow/g3doc/tutorials/recurrent/index.md
@ -10,7 +10,7 @@ for an introduction to recurrent neural networks and LSTMs in particular.

 In this tutorial we will show how to train a recurrent neural network on
 a challenging task of language modeling. The goal of the problem is to fit a
-probabilistic model which assigns probablities to sentences. It does so by
+probabilistic model which assigns probabilities to sentences. It does so by
 predicting next words in a text given a history of previous words. For this
 purpose we will use the Penn Tree Bank (PTB) dataset, which is a popular
 benchmark for measuring quality of these models, whilst being small and
@ -80,7 +80,7 @@ of unrolled steps.
 This is easy to implement by feeding inputs of length `num_steps` at a time and
 doing backward pass after each iteration.

-A simplifed version of the code for the graph creation for truncated
+A simplified version of the code for the graph creation for truncated
 backpropagation:

 ```python
@ -129,7 +129,7 @@ word_embeddings = tf.nn.embedding_lookup(embedding_matrix, word_ids)
 The embedding matrix will be initialized randomly and the model will learn to
 differentiate the meaning of words just by looking at the data.

-### Loss Fuction
+### Loss Function

 We want to minimize the average negative log probability of the target words:

--- a/tensorflow/g3doc/tutorials/seq2seq/index.md
+++ b/tensorflow/g3doc/tutorials/seq2seq/index.md
@ -50,7 +50,7 @@ This basic architecture is depicted below.
 Each box in the picture above represents a cell of the RNN, most commonly
 a GRU cell or an LSTM cell (see the [RNN Tutorial](../../tutorials/recurrent/index.md)
 for an explanation of those). Encoder and decoder can share weights or,
-as is more common, use a different set of parameters. Mutli-layer cells
+as is more common, use a different set of parameters. Multi-layer cells
 have been successfully used in sequence-to-sequence models too, e.g. for
 translation [Sutskever et al., 2014](http://arxiv.org/abs/1409.3215).

@ -203,7 +203,7 @@ sentence with a special PAD symbol. Then we'd need only one seq2seq model,
 for the padded lengths. But on shorter sentence our model would be inefficient,
 encoding and decoding many PAD symbols that are useless.

-As a compromise between contructing a graph for every pair of lengths and
+As a compromise between constructing a graph for every pair of lengths and
 padding to a single length, we use a number of *buckets* and pad each sentence
 to the length of the bucket above it. In `translate.py` we use the following
 default buckets.
--- a/tensorflow/models/embedding/word2vec_kernels.cc
+++ b/tensorflow/models/embedding/word2vec_kernels.cc
@ -133,7 +133,7 @@ class SkipgramOp : public OpKernel {
  int32 label_limit_ GUARDED_BY(mu_);

  // {example_pos_, label_pos_} is the cursor for the next example.
-  // example_pos_ wrapps around at the end of corpus_. For each
+  // example_pos_ wraps around at the end of corpus_. For each
  // example, we randomly generate [label_pos_, label_limit) for
  // labels.
  void NextExample(int32* example, int32* label) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
--- a/tensorflow/models/embedding/word2vec_ops.cc
+++ b/tensorflow/models/embedding/word2vec_ops.cc
@ -65,7 +65,7 @@ w_out: output word embedding.
 examples: A vector of word ids.
 labels: A vector of word ids.
 vocab_count: Count of words in the vocabulary.
-num_negative_samples: Number of negative samples per exaple.
+num_negative_samples: Number of negative samples per example.
 )doc");

 }  // end namespace tensorflow
--- a/tensorflow/models/image/alexnet/alexnet_benchmark.py
+++ b/tensorflow/models/image/alexnet/alexnet_benchmark.py
@ -164,7 +164,7 @@ def time_tensorflow_run(session, target, info_string):

  Args:
    session: the TensorFlow session to run the computation under.
-    target: the targe Tensor that is passed to the session's run() function.
+    target: the target Tensor that is passed to the session's run() function.
    info_string: a string summarizing this run, to be printed with the stats.

  Returns:
--- a/tensorflow/models/image/cifar10/cifar10.py
+++ b/tensorflow/models/image/cifar10/cifar10.py
@ -230,7 +230,7 @@ def inference(images):
    weights = _variable_with_weight_decay('weights', shape=[dim, 384],
                                          stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
-    local3 = tf.nn.relu_layer(reshape, weights, biases, name=scope.name)
+    local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
    _activation_summary(local3)

  # local4
@ -238,7 +238,7 @@ def inference(images):
    weights = _variable_with_weight_decay('weights', shape=[384, 192],
                                          stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
-    local4 = tf.nn.relu_layer(local3, weights, biases, name=scope.name)
+    local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
    _activation_summary(local4)

  # softmax, i.e. softmax(WX + b)
@ -247,7 +247,7 @@ def inference(images):
                                          stddev=1/192.0, wd=0.0)
    biases = _variable_on_cpu('biases', [NUM_CLASSES],
                              tf.constant_initializer(0.0))
-    softmax_linear = tf.nn.xw_plus_b(local4, weights, biases, name=scope.name)
+    softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
    _activation_summary(softmax_linear)

  return softmax_linear
@ -301,7 +301,7 @@ def _add_loss_summaries(total_loss):
  losses = tf.get_collection('losses')
  loss_averages_op = loss_averages.apply(losses + [total_loss])

-  # Attach a scalar summmary to all individual losses and the total loss; do the
+  # Attach a scalar summary to all individual losses and the total loss; do the
  # same for the averaged version of the losses.
  for l in losses + [total_loss]:
    # Name each loss as '(raw)' and name the moving average version of the loss
@ -384,5 +384,5 @@ def maybe_download_and_extract():
                                             reporthook=_progress)
    print()
    statinfo = os.stat(filepath)
-    print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
+    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
    tarfile.open(filepath, 'r:gz').extractall(dest_directory)
--- a/tensorflow/models/image/cifar10/cifar10_multi_gpu_train.py
+++ b/tensorflow/models/image/cifar10/cifar10_multi_gpu_train.py
@ -95,7 +95,7 @@ def tower_loss(scope):
  loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
  loss_averages_op = loss_averages.apply(losses + [total_loss])

-  # Attach a scalar summmary to all individual losses and the total loss; do the
+  # Attach a scalar summary to all individual losses and the total loss; do the
  # same for the averaged version of the losses.
  for l in losses + [total_loss]:
    # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
--- a/tensorflow/models/rnn/ptb/ptb_word_lm.py
+++ b/tensorflow/models/rnn/ptb/ptb_word_lm.py
@ -129,9 +129,9 @@ class PTBModel(object):
        states.append(state)

    output = tf.reshape(tf.concat(1, outputs), [-1, size])
-    logits = tf.nn.xw_plus_b(output,
-                             tf.get_variable("softmax_w", [size, vocab_size]),
-                             tf.get_variable("softmax_b", [vocab_size]))
+    softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
+    softmax_b = tf.get_variable("softmax_b", [vocab_size])
+    logits = tf.matmul(output, softmax_w) + softmax_b
    loss = seq2seq.sequence_loss_by_example([logits],
                                            [tf.reshape(self._targets, [-1])],
                                            [tf.ones([batch_size * num_steps])],
--- a/tensorflow/models/rnn/ptb/reader.py
+++ b/tensorflow/models/rnn/ptb/reader.py
@ -43,8 +43,7 @@ def _build_vocab(filename):
  data = _read_words(filename)

  counter = collections.Counter(data)
-  count_pairs = sorted(counter.items(),
-                       key=lambda (word, count): (-count, word))
+  count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))

  words, _ = list(zip(*count_pairs))
  word_to_id = dict(zip(words, range(len(words))))
--- a/tensorflow/models/rnn/translate/data_utils.py
+++ b/tensorflow/models/rnn/translate/data_utils.py
@ -255,7 +255,7 @@ def prepare_wmt_data(data_dir, en_vocabulary_size, fr_vocabulary_size):
      (3) path to the token-ids for English development data-set,
      (4) path to the token-ids for French development data-set,
      (5) path to the English vocabulary file,
-      (6) path to the French vocabluary file.
+      (6) path to the French vocabulary file.
  """
  # Get wmt data to the specified directory.
  train_path = get_wmt_enfr_train_set(data_dir)
--- a/tensorflow/models/rnn/translate/seq2seq_model.py
+++ b/tensorflow/models/rnn/translate/seq2seq_model.py
@ -141,9 +141,10 @@ class Seq2SeqModel(object):
      # If we use output projection, we need to project outputs for decoding.
      if output_projection is not None:
        for b in xrange(len(buckets)):
-          self.outputs[b] = [tf.nn.xw_plus_b(output, output_projection[0],
-                                             output_projection[1])
-                             for output in self.outputs[b]]
+          self.outputs[b] = [
+              tf.matmul(output, output_projection[0]) + output_projection[1]
+              for output in self.outputs[b]
+          ]
    else:
      self.outputs, self.losses = seq2seq.model_with_buckets(
          self.encoder_inputs, self.decoder_inputs, targets,
@ -184,7 +185,7 @@ class Seq2SeqModel(object):
      average perplexity, and the outputs.

    Raises:
-      ValueError: if length of enconder_inputs, decoder_inputs, or
+      ValueError: if length of encoder_inputs, decoder_inputs, or
        target_weights disagrees with bucket size for the specified bucket_id.
    """
    # Check if the sizes match.
--- a/tensorflow/python/client/session.py
+++ b/tensorflow/python/client/session.py
@ -53,6 +53,17 @@ class SessionInterface(object):
    raise NotImplementedError('Run')


+def _get_indexed_slices_value_from_fetches(fetched_vals):
+  return ops.IndexedSlicesValue(fetched_vals[0], fetched_vals[1],
+                                fetched_vals[2]
+                                if len(fetched_vals) == 3 else None)
+
+
+def _get_feeds_for_indexed_slices(feed, feed_val):
+  return list(zip([feed.values, feed.indices] if feed.dense_shape is None else
+                  [feed.values, feed.indices, feed.dense_shape], feed_val))
+
+
 class BaseSession(SessionInterface):
  """A class for interacting with a TensorFlow computation.

@ -221,6 +232,14 @@ class BaseSession(SessionInterface):
           lambda fetched_vals: ops.SparseTensorValue(*fetched_vals)),
       lambda feed, feed_val: list(zip(
           [feed.indices, feed.values, feed.shape], feed_val))),
+      # IndexedSlices are fetched as IndexedSlicesValues. They can be fed
+      # IndexedSlicesValues or normal tuples.
+      (ops.IndexedSlices,
+       lambda fetch: (
+           [fetch.values, fetch.indices] if fetch.dense_shape is None
+           else [fetch.values, fetch.indices, fetch.dense_shape],
+           _get_indexed_slices_value_from_fetches),
+       _get_feeds_for_indexed_slices),
      # The default catches all types and performs no expansions.
      (object,
       lambda fetch: ([fetch], lambda fetched_vals: fetched_vals[0]),
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@ -248,6 +248,144 @@ class SessionTest(test_util.TensorFlowTestCase):
      self.assertAllEqual(sp2_out.values, values)
      self.assertAllEqual(sp2_out.shape, shape)

+  def testFetchIndexedSlices(self):
+    with session.Session() as s:
+      indices = np.array([[3, 2, 0], [4, 5, 1]]).astype(np.int64)
+      values = np.array([1.0, 2.0]).astype(np.float32)
+      dense_shape = np.array([7, 9, 2]).astype(np.int64)
+      ind = ops.IndexedSlices(
+          constant_op.constant(values), constant_op.constant(indices),
+          constant_op.constant(dense_shape))
+      # Single fetch, use as tuple
+      ind_out = s.run(ind)
+      values_out, indices_out, dense_shape_out = ind_out
+      self.assertAllEqual(values_out, values)
+      self.assertAllEqual(indices_out, indices)
+      self.assertAllEqual(dense_shape_out, dense_shape)
+      # Single fetch, use as IndexedSlicesValue
+      ind_out = s.run(ind)
+      self.assertAllEqual(ind_out.values, values)
+      self.assertAllEqual(ind_out.indices, indices)
+      self.assertAllEqual(ind_out.dense_shape, dense_shape)
+      # Tuple fetch, use as tuple
+      values_out, indices_out, dense_shape_out = s.run(ind)
+      self.assertAllEqual(values_out, values)
+      self.assertAllEqual(indices_out, indices)
+      self.assertAllEqual(dense_shape_out, dense_shape)
+      # List fetch, use as tuple
+      (values_out, indices_out, dense_shape_out), = s.run([ind])
+      self.assertAllEqual(values_out, values)
+      self.assertAllEqual(indices_out, indices)
+      self.assertAllEqual(dense_shape_out, dense_shape)
+      # List fetch, use as IndexedSlicesValue
+      ind_out, = s.run([ind])
+      self.assertAllEqual(ind_out.values, values)
+      self.assertAllEqual(ind_out.indices, indices)
+      self.assertAllEqual(ind_out.dense_shape, dense_shape)
+
+  def testFeedIndexedSlices(self):
+    with session.Session() as s:
+      values = np.array([1.0, 2.0]).astype(np.float32)
+      indices = np.array([[3, 2, 0], [4, 5, 1]]).astype(np.int64)
+      dense_shape = np.array([7, 9, 2]).astype(np.int64)
+      ind = ops.IndexedSlices(
+          array_ops.placeholder(dtype=np.float32,
+                                shape=(2,)),
+          array_ops.placeholder(dtype=np.int64,
+                                shape=(2, 3)),
+          array_ops.placeholder(dtype=np.int64,
+                                shape=(3,)),)
+      ind_values = array_ops.identity(ind.values)
+      ind_indices = array_ops.identity(ind.indices)
+      ind_dense_shape = array_ops.identity(ind.dense_shape)
+      ind2 = ops.IndexedSlices(ind_values, ind_indices, ind_dense_shape)
+      # Feed with tuple
+      values_out, indices_out, dense_shape_out = s.run(
+          [ind_values, ind_indices, ind_dense_shape],
+          {ind: (values, indices, dense_shape)})
+      self.assertAllEqual(values_out, values)
+      self.assertAllEqual(indices_out, indices)
+      self.assertAllEqual(dense_shape_out, dense_shape)
+      # Feed with IndexedSlicesValue
+      values_out, indices_out, dense_shape_out = s.run(
+          [ind_values, ind_indices, ind_dense_shape],
+          {ind: ops.IndexedSlicesValue(values, indices, dense_shape)})
+      self.assertAllEqual(values_out, values)
+      self.assertAllEqual(indices_out, indices)
+      self.assertAllEqual(dense_shape_out, dense_shape)
+      # Feed with IndexedSlicesValue, fetch IndexedSlicesValue
+      ind2_out = s.run(ind2, {ind: ops.IndexedSlicesValue(values, indices,
+                                                          dense_shape)})
+      self.assertAllEqual(ind2_out.values, values)
+      self.assertAllEqual(ind2_out.indices, indices)
+      self.assertAllEqual(ind2_out.dense_shape, dense_shape)
+
+  def testFetchIndexedSlicesWithoutDenseShape(self):
+    with session.Session() as s:
+      indices = np.array([[3, 2, 0], [4, 5, 1]]).astype(np.int64)
+      values = np.array([1.0, 2.0]).astype(np.float32)
+      dense_shape = None
+      ind = ops.IndexedSlices(
+          constant_op.constant(values), constant_op.constant(indices), None)
+      # Single fetch, use as tuple
+      ind_out = s.run(ind)
+      values_out, indices_out, dense_shape_out = ind_out
+      self.assertAllEqual(values_out, values)
+      self.assertAllEqual(indices_out, indices)
+      self.assertAllEqual(dense_shape_out, dense_shape)
+      # Single fetch, use as IndexedSlicesValue
+      ind_out = s.run(ind)
+      self.assertAllEqual(ind_out.values, values)
+      self.assertAllEqual(ind_out.indices, indices)
+      self.assertAllEqual(ind_out.dense_shape, dense_shape)
+      # Tuple fetch, use as tuple
+      values_out, indices_out, dense_shape_out = s.run(ind)
+      self.assertAllEqual(values_out, values)
+      self.assertAllEqual(indices_out, indices)
+      self.assertAllEqual(dense_shape_out, dense_shape)
+      # List fetch, use as tuple
+      (values_out, indices_out, dense_shape_out), = s.run([ind])
+      self.assertAllEqual(values_out, values)
+      self.assertAllEqual(indices_out, indices)
+      self.assertAllEqual(dense_shape_out, dense_shape)
+      # List fetch, use as IndexedSlicesValue
+      ind_out, = s.run([ind])
+      self.assertAllEqual(ind_out.values, values)
+      self.assertAllEqual(ind_out.indices, indices)
+      self.assertAllEqual(ind_out.dense_shape, dense_shape)
+
+  def testFeedIndexedSlicesWithoutDenseShape(self):
+    with session.Session() as s:
+      values = np.array([1.0, 2.0]).astype(np.float32)
+      indices = np.array([[3, 2, 0], [4, 5, 1]]).astype(np.int64)
+      dense_shape = None
+      ind = ops.IndexedSlices(
+          array_ops.placeholder(dtype=np.float32,
+                                shape=(2,)),
+          array_ops.placeholder(dtype=np.int64,
+                                shape=(2, 3)),
+          None)
+      ind_values = array_ops.identity(ind.values)
+      ind_indices = array_ops.identity(ind.indices)
+      ind2 = ops.IndexedSlices(ind_values, ind_indices)
+      # Feed with tuple
+      values_out, indices_out = s.run(
+          [ind_values, ind_indices], {ind: (values, indices)})
+      self.assertAllEqual(values_out, values)
+      self.assertAllEqual(indices_out, indices)
+      # Feed with IndexedSlicesValue
+      values_out, indices_out = s.run(
+          [ind_values, ind_indices],
+          {ind: ops.IndexedSlicesValue(values, indices, dense_shape)})
+      self.assertAllEqual(values_out, values)
+      self.assertAllEqual(indices_out, indices)
+      # Feed with IndexedSlicesValue, fetch IndexedSlicesValue
+      ind2_out = s.run(ind2, {ind: ops.IndexedSlicesValue(values, indices,
+                                                          dense_shape)})
+      self.assertAllEqual(ind2_out.values, values)
+      self.assertAllEqual(ind2_out.indices, indices)
+      self.assertAllEqual(ind2_out.dense_shape, dense_shape)
+
  def testExtendWithStatelessOperations(self):
    with session.Session() as s:
      a = constant_op.constant(1.0, shape=[1, 2])
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@ -204,7 +204,11 @@ tensorflow::ImportNumpy();
 // The wrapped function TF_GetOpList returns a TF_Buffer pointer. This typemap
 // creates a Python string from the TF_Buffer and returns it.
 %typemap(out) TF_Buffer TF_GetOpList {
+%#if PY_MAJOR_VERSION < 3
  $result = PyString_FromStringAndSize(
+%#else
+  $result = PyUnicode_FromStringAndSize(
+%#endif
      reinterpret_cast<const char*>($1.data), $1.length);
 }

--- a/tensorflow/python/framework/docs.py
+++ b/tensorflow/python/framework/docs.py
@ -121,7 +121,7 @@ def collect_members(module_to_name):
          if len(fullname) == len(other_fullname):
            raise RuntimeError("Can't decide whether to use %s or %s for %s: "
                               "both full names have length %d" %
-                               (fullname, other_fullname, len(fullname)))
+                               (fullname, other_fullname, name, len(fullname)))
          if len(fullname) > len(other_fullname):
            continue  # Use the shorter full name
        members[name] = fullname, member
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@ -143,7 +143,7 @@ class Tensor(object):
  d = tf.constant([[1.0, 1.0], [0.0, 1.0]])
  e = tf.matmul(c, d)

-  # Construct a `Session` to execut the graph.
+  # Construct a `Session` to execute the graph.
  sess = tf.Session()

  # Execute the graph and store the value that `e` represents in `result`.
@ -784,6 +784,10 @@ class IndexedSlices(object):
        (", dense_shape=%s" % self._dense_shape) if self._dense_shape else "")


+IndexedSlicesValue = collections.namedtuple(
+    "IndexedSlicesValue", ["values", "indices", "dense_shape"])
+
+
 class SparseTensor(object):
  """Represents a sparse tensor.

--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@ -1427,26 +1427,49 @@ class ControlFlowTest(tf.test.TestCase):
    self.assertIs(None, m.get_shape().ndims)
    self.assertEqual([], index.get_shape())

-    # All inputs known but different.
+    # All inputs known with different ranks.
+    p1 = tf.placeholder(tf.float32, shape=[1, 2])
+    p2 = tf.placeholder(tf.float32, shape=[1, 2, 3])
+    m, index = control_flow_ops.merge([p1, p2])
+    self.assertIs(None, m.get_shape().ndims)
+    self.assertEqual([], index.get_shape())
+
+    # All inputs known with some dimensions different.
    p1 = tf.placeholder(tf.float32, shape=[1, 2])
    p2 = tf.placeholder(tf.float32, shape=[2, 1])
    m, index = control_flow_ops.merge([p1, p2])
-    self.assertIs(None, m.get_shape().ndims)
+    self.assertEqual([None, None], m.get_shape().as_list())
    self.assertEqual([], index.get_shape())

-    # All inputs known but same.
+    p1 = tf.placeholder(tf.float32, shape=[1, 2])
+    p2 = tf.placeholder(tf.float32, shape=[None, 2])
+    m, index = control_flow_ops.merge([p1, p2])
+    self.assertEqual([None, 2], m.get_shape().as_list())
+    self.assertEqual([], index.get_shape())
+
+    p1 = tf.placeholder(tf.float32, shape=[1, 2])
+    p2 = tf.placeholder(tf.float32, shape=[2, 2])
+    m, index = control_flow_ops.merge([p1, p2])
+    self.assertEqual([None, 2], m.get_shape().as_list())
+    self.assertEqual([], index.get_shape())
+
+    # All inputs known with same dimensions.
    p1 = tf.placeholder(tf.float32, shape=[1, 2])
    p2 = tf.placeholder(tf.float32, shape=[1, 2])
    m, index = control_flow_ops.merge([p1, p2])
-    self.assertEqual([1, 2], m.get_shape())
+    self.assertEqual([1, 2], m.get_shape().as_list())
    self.assertEqual([], index.get_shape())

-    # Possibly the same but not guaranteed.
-    p1 = tf.placeholder(tf.float32, shape=[1, 2])
-    p2 = tf.placeholder(tf.float32)
-    p2.set_shape([None, 2])
+    p1 = tf.placeholder(tf.float32, shape=[None, 2])
+    p2 = tf.placeholder(tf.float32, shape=[None, 2])
    m, index = control_flow_ops.merge([p1, p2])
-    self.assertIs(None, m.get_shape().ndims)
+    self.assertEqual([None, 2], m.get_shape().as_list())
+    self.assertEqual([], index.get_shape())
+
+    p1 = tf.placeholder(tf.float32, shape=[None, None])
+    p2 = tf.placeholder(tf.float32, shape=[None, None])
+    m, index = control_flow_ops.merge([p1, p2])
+    self.assertEqual([None, None], m.get_shape().as_list())
    self.assertEqual([], index.get_shape())

  def testRefSelect(self):
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@ -106,7 +106,7 @@ class UnaryOpTest(tf.test.TestCase):
    def func(x):
      try:
        return fn(x)
-      except ValueError, e:
+      except ValueError as e:
        if "domain error" in e.message:
          return np.inf * np.ones_like(x)
        else:
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@ -37,7 +37,7 @@ class Plus1RNNCell(tf.nn.rnn_cell.RNNCell):
  def state_size(self):
    return 5

-  def __call__(self, input_, state):
+  def __call__(self, input_, state, scope=None):
    return (input_ + 1, state + 1)


@ -516,30 +516,47 @@ class BidirectionalRNNTest(tf.test.TestCase):
    self._seed = 23489
    np.random.seed(self._seed)

-  def _testBidirectionalRNN(self, use_gpu):
+  def _createBidirectionalRNN(self, use_gpu, use_shape, use_sequence_length):
    num_units = 3
    input_size = 5
    batch_size = 2
    max_length = 8
+
+    initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed)
+    sequence_length = tf.placeholder(tf.int64) if use_sequence_length else None
+    cell_fw = tf.nn.rnn_cell.LSTMCell(num_units,
+                                      input_size,
+                                      initializer=initializer)
+    cell_bw = tf.nn.rnn_cell.LSTMCell(num_units,
+                                      input_size,
+                                      initializer=initializer)
+    inputs = max_length * [
+        tf.placeholder(tf.float32,
+                       shape=(batch_size, input_size) if use_shape else None)
+    ]
+    outputs = tf.nn.bidirectional_rnn(cell_fw,
+                                      cell_bw,
+                                      inputs,
+                                      dtype=tf.float32,
+                                      sequence_length=sequence_length)
+    self.assertEqual(len(outputs), len(inputs))
+    for out in outputs:
+      if use_sequence_length:
+        # Merging with the zero state makes the dimensions None.
+        self.assertEqual(out.get_shape().as_list(), [None, None])
+      else:
+        self.assertEqual(out.get_shape().as_list(), [batch_size if use_shape
+                                                     else None, 2 * num_units])
+
+    input_value = np.random.randn(batch_size, input_size)
+
+    return input_value, inputs, outputs, sequence_length
+
+  def _testBidirectionalRNN(self, use_gpu, use_shape):
    with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
-      initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed)
-      sequence_length = tf.placeholder(tf.int64)
-      cell_fw = tf.nn.rnn_cell.LSTMCell(
-          num_units, input_size, initializer=initializer)
-      cell_bw = tf.nn.rnn_cell.LSTMCell(
-          num_units, input_size, initializer=initializer)
-      inputs = max_length * [
-          tf.placeholder(tf.float32, shape=(batch_size, input_size))]
-      outputs = tf.nn.bidirectional_rnn(
-          cell_fw, cell_bw, inputs, dtype=tf.float32,
-          sequence_length=sequence_length)
-
-      self.assertEqual(len(outputs), len(inputs))
-      for out in outputs:
-        self.assertEqual(out.get_shape().as_list(), [batch_size, 2 * num_units])
-
+      input_value, inputs, outputs, sequence_length = (
+          self._createBidirectionalRNN(use_gpu, use_shape, True))
      tf.initialize_all_variables().run()
-      input_value = np.random.randn(batch_size, input_size)
      # Run with pre-specified sequence length of 2, 3
      out = sess.run(outputs, feed_dict={inputs[0]: input_value,
                                         sequence_length: [2, 3]})
@ -575,10 +592,46 @@ class BidirectionalRNNTest(tf.test.TestCase):
      self.assertEqual(out[2][1][1], out[0][1][4])
      self.assertEqual(out[2][1][2], out[0][1][5])

-  def testBidirectionalRNN(self):
-    self._testBidirectionalRNN(use_gpu=False)
-    self._testBidirectionalRNN(use_gpu=True)
+  def _testBidirectionalRNNWithoutSequenceLength(self, use_gpu, use_shape):
+    with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
+      input_value, inputs, outputs, _ = self._createBidirectionalRNN(
+          use_gpu, use_shape, False)
+      tf.initialize_all_variables().run()
+      out = sess.run(outputs, feed_dict={inputs[0]: input_value})

+      # Since the forward and backward LSTM cells were initialized with the
+      # same parameters, the forward and backward output has to be the same,
+      # but reversed in time. The format is output[time][batch][depth], and
+      # due to depth concatenation (as num_units=3 for both RNNs):
+      # - forward output:  out[][][depth] for 0 <= depth < 3
+      # - backward output: out[][][depth] for 4 <= depth < 6
+      #
+      # Both sequences in batch are length=8.  Check that the time=i
+      # forward output is equal to time=8-1-i backward output
+      for i in xrange(8):
+        self.assertEqual(out[i][0][0], out[8 - 1 - i][0][3])
+        self.assertEqual(out[i][0][1], out[8 - 1 - i][0][4])
+        self.assertEqual(out[i][0][2], out[8 - 1 - i][0][5])
+      for i in xrange(8):
+        self.assertEqual(out[i][1][0], out[8 - 1 - i][1][3])
+        self.assertEqual(out[i][1][1], out[8 - 1 - i][1][4])
+        self.assertEqual(out[i][1][2], out[8 - 1 - i][1][5])
+
+  def testBidirectionalRNN(self):
+    self._testBidirectionalRNN(use_gpu=False, use_shape=False)
+    self._testBidirectionalRNN(use_gpu=True, use_shape=False)
+    self._testBidirectionalRNN(use_gpu=False, use_shape=True)
+    self._testBidirectionalRNN(use_gpu=True, use_shape=True)
+
+  def testBidirectionalRNNWithoutSequenceLength(self):
+    self._testBidirectionalRNNWithoutSequenceLength(use_gpu=False,
+                                                    use_shape=False)
+    self._testBidirectionalRNNWithoutSequenceLength(use_gpu=True,
+                                                    use_shape=False)
+    self._testBidirectionalRNNWithoutSequenceLength(use_gpu=False,
+                                                    use_shape=True)
+    self._testBidirectionalRNNWithoutSequenceLength(use_gpu=True,
+                                                    use_shape=True)

 if __name__ == "__main__":
  tf.test.main()
--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@ -24,6 +24,14 @@ limitations under the License.
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/port.h"

+// Return type of import_array() changed between Python 2 and 3
+// NUMPY_IMPORT_ARRAY_RETVAL is NULL for Python 3
+#if PY_MAJOR_VERSION >= 3
+#define NUMPY_IMPORT_ARRAY_RETURN_TYPE int
+#else
+#define NUMPY_IMPORT_ARRAY_RETURN_TYPE void
+#endif
+
 namespace tensorflow {
 namespace {

@ -39,7 +47,7 @@ PyObject* GetPyTrampoline() {
 }

 // Module initialization (mainly import numpy) if needed.
-void InitIfNeeded() {
+NUMPY_IMPORT_ARRAY_RETURN_TYPE InitIfNeeded() {
  mutex_lock l(mu);
  if (!initialized) {
    PyGILState_STATE py_threadstate;
--- a/tensorflow/python/ops/candidate_sampling_ops.py
+++ b/tensorflow/python/ops/candidate_sampling_ops.py
@ -192,10 +192,19 @@ def learned_unigram_candidate_sampler(true_classes, num_true, num_sampled,
      seed2=seed2, name=name)


-def fixed_unigram_candidate_sampler(true_classes, num_true, num_sampled, unique,
-                                    range_max, vocab_file='', distortion=1.0,
-                                    num_reserved_ids=0, num_shards=1, shard=0,
-                                    unigrams=[], seed=None, name=None):
+def fixed_unigram_candidate_sampler(true_classes,
+                                    num_true,
+                                    num_sampled,
+                                    unique,
+                                    range_max,
+                                    vocab_file='',
+                                    distortion=1.0,
+                                    num_reserved_ids=0,
+                                    num_shards=1,
+                                    shard=0,
+                                    unigrams=(),
+                                    seed=None,
+                                    name=None):
  """Samples a set of classes using the provided (fixed) base distribution.

  This operation randomly samples a tensor of sampled classes
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@ -1908,9 +1908,10 @@ def _MergeShape(op):
  first output that is one of those inputs, and a second scalar
  output.

-  This function conservatively assumes that if any of its inputs is
-  not fully defined, the output shape is unknown. If all of the inputs
-  have the exact same known shape, the output must have that shape.
+  If all input shapes are known and have the same rank, the output
+  shape must have that rank, otherwise the output shape is unknown.
+  Each output dimension is specified only if that dimension in all
+  inputs are the same.

  Args:
    op: A Merge Operation.
@ -1919,16 +1920,20 @@ def _MergeShape(op):
    A single-element list containing the Shape of the Merge op.

  """
-  first_input_shape = op.inputs[0].get_shape()
-  if first_input_shape.is_fully_defined():
+  output_shape = op.inputs[0].get_shape()
+  if output_shape.dims is None:
+    return [tensor_shape.unknown_shape(), tensor_shape.scalar()]
+  else:
    for input_ in op.inputs[1:]:
      input_shape = input_.get_shape()
-      if (not input_shape.is_fully_defined()
-          or not input_shape.is_compatible_with(first_input_shape)):
+      if input_shape.dims is None or input_shape.ndims != output_shape.ndims:
        return [tensor_shape.unknown_shape(), tensor_shape.scalar()]
-    return [first_input_shape, tensor_shape.scalar()]
-  else:
-    return [tensor_shape.unknown_shape(), tensor_shape.scalar()]
+      else:
+        output_shape = tensor_shape.TensorShape(
+            [input_dim.value if input_dim.value == output_dim.value else None
+             for input_dim, output_dim in zip(input_shape.dims,
+                                              output_shape.dims)])
+    return [output_shape, tensor_shape.scalar()]

 ops.RegisterShape("RefMerge")(_MergeShape)

--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@ -655,7 +655,7 @@ def reduce_sum(input_tensor, reduction_indices=None, keep_dims=False,

  Args:
    input_tensor: The tensor to reduce. Should have numeric type.
-    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+    reduction_indices: The dimensions to reduce. If `None` (the default),
      reduces all dimensions.
    keep_dims: If true, retains reduced dimensions with length 1.
    name: A name for the operation (optional).
@ -692,7 +692,7 @@ def reduce_mean(input_tensor, reduction_indices=None, keep_dims=False,

  Args:
    input_tensor: The tensor to reduce. Should have numeric type.
-    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+    reduction_indices: The dimensions to reduce. If `None` (the default),
      reduces all dimensions.
    keep_dims: If true, retains reduced dimensions with length 1.
    name: A name for the operation (optional).
@ -719,7 +719,7 @@ def reduce_prod(input_tensor, reduction_indices=None, keep_dims=False,

  Args:
    input_tensor: The tensor to reduce. Should have numeric type.
-    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+    reduction_indices: The dimensions to reduce. If `None` (the default),
      reduces all dimensions.
    keep_dims: If true, retains reduced dimensions with length 1.
    name: A name for the operation (optional).
@ -746,7 +746,7 @@ def reduce_min(input_tensor, reduction_indices=None, keep_dims=False,

  Args:
    input_tensor: The tensor to reduce. Should have numeric type.
-    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+    reduction_indices: The dimensions to reduce. If `None` (the default),
      reduces all dimensions.
    keep_dims: If true, retains reduced dimensions with length 1.
    name: A name for the operation (optional).
@ -773,7 +773,7 @@ def reduce_max(input_tensor, reduction_indices=None, keep_dims=False,

  Args:
    input_tensor: The tensor to reduce. Should have numeric type.
-    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+    reduction_indices: The dimensions to reduce. If `None` (the default),
      reduces all dimensions.
    keep_dims: If true, retains reduced dimensions with length 1.
    name: A name for the operation (optional).
@ -810,7 +810,7 @@ def reduce_all(input_tensor, reduction_indices=None, keep_dims=False,

  Args:
    input_tensor: The boolean tensor to reduce.
-    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+    reduction_indices: The dimensions to reduce. If `None` (the default),
      reduces all dimensions.
    keep_dims: If true, retains reduced dimensions with length 1.
    name: A name for the operation (optional).
@ -847,7 +847,7 @@ def reduce_any(input_tensor, reduction_indices=None, keep_dims=False,

  Args:
    input_tensor: The boolean tensor to reduce.
-    reduction_indices: The dimensions to reduce. If `None` (the defaut),
+    reduction_indices: The dimensions to reduce. If `None` (the default),
      reduces all dimensions.
    keep_dims: If true, retains reduced dimensions with length 1.
    name: A name for the operation (optional).
--- a/tensorflow/python/ops/nn.py
+++ b/tensorflow/python/ops/nn.py
@ -55,7 +55,7 @@ strided according to the `strides` argument.  `strides = [1, 1, 1, 1]` applies
 the filter to a patch at every offset, `strides = [1, 2, 2, 1]` applies the
 filter to every other image patch in each dimension, etc.

-Ignoring channels for the moment, and assume that the the 4-D `input` has shape
+Ignoring channels for the moment, and assume that the 4-D `input` has shape
 `[batch, in_height, in_width, ...]` and the 4-D `filter` has shape
 `[filter_height, filter_width, ...]`, then the spatial semantics of the
 convolution ops are as follows: first, according to the padding scheme chosen
@ -63,7 +63,7 @@ as `'SAME'` or `'VALID'`, the output size and the padding pixels are computed.
 For the `'SAME'` padding, the output height and width are computed as:

    out_height = ceil(float(in_height) / float(strides[1]))
-    out_width  = ceil(float(in_width) / float(stides[2]))
+    out_width  = ceil(float(in_width) / float(strides[2]))

 and the padding on the top and left are computed as:

@ -85,7 +85,7 @@ same number of pixels on both sides.
 For the `'VALID`' padding, the output height and width are computed as:

    out_height = ceil(float(in_height - filter_height + 1) / float(strides[1]))
-    out_width  = ceil(float(in_width - filter_width + 1) / float(stides[2]))
+    out_width  = ceil(float(in_width - filter_width + 1) / float(strides[2]))

 and the padding values are always zero. The output is then computed as

--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@ -167,8 +167,12 @@ def _reverse_seq(input_seq, lengths):
  if lengths is None:
    return list(reversed(input_seq))

+  for input_ in input_seq:
+    input_.set_shape(input_.get_shape().with_rank(2))
+
  # Join into (time, batch_size, depth)
  s_joined = array_ops.pack(input_seq)
+
  # Reverse along dimension 0
  s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
  # Split again into list
@ -227,11 +231,13 @@ def bidirectional_rnn(cell_fw, cell_bw, inputs,
  name = scope or "BiRNN"
  # Forward direction
  with vs.variable_scope(name + "_FW"):
-    output_fw, _ = rnn(cell_fw, inputs, initial_state_fw, dtype)
+    output_fw, _ = rnn(cell_fw, inputs, initial_state_fw, dtype,
+                       sequence_length)
+
  # Backward direction
  with vs.variable_scope(name + "_BW"):
-    tmp, _ = rnn(
-        cell_bw, _reverse_seq(inputs, sequence_length), initial_state_bw, dtype)
+    tmp, _ = rnn(cell_bw, _reverse_seq(inputs, sequence_length),
+                 initial_state_bw, dtype, sequence_length)
  output_bw = _reverse_seq(tmp, sequence_length)
  # Concat each of the forward/backward outputs
  outputs = [array_ops.concat(1, [fw, bw])
--- a/tensorflow/python/ops/rnn_cell.py
+++ b/tensorflow/python/ops/rnn_cell.py
@ -144,7 +144,7 @@ class GRUCell(RNNCell):
    """Gated recurrent unit (GRU) with nunits cells."""
    with vs.variable_scope(scope or type(self).__name__):  # "GRUCell"
      with vs.variable_scope("Gates"):  # Reset gate and update gate.
-        # We start with bias of 1.0 to not reset and not udpate.
+        # We start with bias of 1.0 to not reset and not update.
        r, u = array_ops.split(1, 2, linear([inputs, state],
                                            2 * self._num_units, True, 1.0))
        r, u = sigmoid(r), sigmoid(u)
@ -211,7 +211,7 @@ def _get_sharded_variable(name, shape, initializer, dtype, num_shards):


 def _matmul_with_sharded_variable(tensor, sharded_tensor):
-  """Multiply tensor with each tensor in sharded_tensor and column-concat."""
+  """Multiply tensor with each tensor in sharded_tensor, column-concatenated."""
  return array_ops.concat(1, [math_ops.matmul(tensor, shard)
                              for shard in sharded_tensor])

@ -364,7 +364,7 @@ class OutputProjectionWrapper(RNNCell):

  Note: in many cases it may be more efficient to not use this wrapper,
  but instead concatenate the whole sequence of your outputs in time,
-  do the projection on this batch-concated sequence, then split it
+  do the projection on this batch-concatenated sequence, then split it
  if needed or directly feed into a softmax.
  """

@ -412,7 +412,7 @@ class InputProjectionWrapper(RNNCell):

  Note: in many cases it may be more efficient to not use this wrapper,
  but instead concatenate the whole sequence of your inputs in time,
-  do the projection on this batch-concated sequence, then split it.
+  do the projection on this batch-concatenated sequence, then split it.
  """

  def __init__(self, cell, input_size):
@ -501,7 +501,7 @@ class DropoutWrapper(RNNCell):
  def state_size(self):
    return self._cell.state_size

-  def __call__(self, inputs, state):
+  def __call__(self, inputs, state, scope=None):
    """Run the cell with the declared dropouts."""
    if (not isinstance(self._input_keep_prob, float) or
        self._input_keep_prob < 1):
@ -518,7 +518,7 @@ class EmbeddingWrapper(RNNCell):

  Note: in many cases it may be more efficient to not use this wrapper,
  but instead concatenate the whole sequence of your inputs in time,
-  do the embedding on this batch-concated sequence, then split it and
+  do the embedding on this batch-concatenated sequence, then split it and
  feed into your RNN.
  """

--- a/tensorflow/python/summary/impl/reservoir.py
+++ b/tensorflow/python/summary/impl/reservoir.py
@ -214,7 +214,7 @@ class _ReservoirBucket(object):
      self.items = list(filter(filterFn, self.items))
      size_diff = size_before - len(self.items)

-      # Estimate a correction the the number of items seen
+      # Estimate a correction the number of items seen
      prop_remaining = len(self.items) / float(
          size_before) if size_before > 0 else 0
      self._num_items_seen = int(round(self._num_items_seen * prop_remaining))
--- a/tensorflow/python/training/adagrad.py
+++ b/tensorflow/python/training/adagrad.py
@ -27,6 +27,8 @@ from tensorflow.python.training import training_ops
 class AdagradOptimizer(optimizer.Optimizer):
  """Optimizer that implements the Adagrad algorithm.

+  (http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
+
  See http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf.

  @@__init__
--- a/tensorflow/python/training/adam.py
+++ b/tensorflow/python/training/adam.py
@ -31,6 +31,8 @@ from tensorflow.python.training import training_ops
 class AdamOptimizer(optimizer.Optimizer):
  """Optimizer that implements the Adam algorithm.

+  (http://arxiv.org/pdf/1412.6980v7.pdf).
+
  See http://arxiv.org/pdf/1412.6980v7.pdf.

  @@__init__
--- a/tensorflow/python/training/coordinator.py
+++ b/tensorflow/python/training/coordinator.py
@ -215,7 +215,7 @@ class Coordinator(object):
    """Wait till the Coordinator is told to stop.

    Args:
-      timeout: float.  Sleep for up to that many seconds waiting for
+      timeout: Float.  Sleep for up to that many seconds waiting for
        should_stop() to become True.

    Returns:
@ -229,7 +229,7 @@ class Coordinator(object):
    Blocks until all `threads` have terminated or `request_stop()` is called.

    After the threads stop, if an `exc_info` was passed to `request_stop`, that
-    exception is re-reaised.
+    exception is re-raised.

    Grace period handling: When `request_stop()` is called, threads are given
    'stop_grace_period_secs' seconds to terminate.  If any of them is still
@ -289,7 +289,7 @@ class LooperThread(threading.Thread):
    """Create a LooperThread.

    Args:
-      coord: a Coordinator.
+      coord: A Coordinator.
      timer_interval_secs: Time boundaries at which to call Run(), or None
        if it should be called back to back.
      target: Optional callable object that will be executed in the thread.
--- a/tensorflow/python/training/ftrl.py
+++ b/tensorflow/python/training/ftrl.py
@ -249,7 +249,7 @@ class FtrlOptimizer(optimizer.Optimizer):
        gradients.  Defaults to "Ftrl".

    Raises:
-      ValueError: if one of the arguments is invalid.
+      ValueError: If one of the arguments is invalid.
    """
    super(FtrlOptimizer, self).__init__(use_locking, name)

--- a/tensorflow/python/training/ftrl_test.py
+++ b/tensorflow/python/training/ftrl_test.py
@ -170,7 +170,7 @@ class FtrlOptimizerTest(tf.test.TestCase):
    v0_val, v1_val = sess.run([var0, var1])
    return v0_val, v1_val

-  # When variables are intialized with Zero, FTRL-Proximal has two properties:
+  # When variables are initialized with Zero, FTRL-Proximal has two properties:
  # 1. Without L1&L2 but with fixed learning rate, FTRL-Proximal is identical
  # with GradientDescent.
  # 2. Without L1&L2 but with adaptive learning rate, FTRL-Proximal is identical
--- a/Show More
+++ b/Show More