minor spelling tweaks

2019-12-09 18:21:12 +09:00 · 2019-12-09 18:21:12 +09:00 · e664420b79
commit e664420b79
parent b41fbcbf85
211 changed files with 336 additions and 336 deletions
--- a/tensorflow/compiler/jit/compilability_check_util.cc
+++ b/tensorflow/compiler/jit/compilability_check_util.cc
@ -509,10 +509,10 @@ RecursiveCompilabilityChecker::OperationFilter CreateOperationFilter(
  auto it = uncompilable_nodes->find(function_identifier);
  if (it == uncompilable_nodes->end()) {
    std::vector<RecursiveCompilabilityChecker::UncompilableNodeInfo>
-        uncompileable_node_info{std::move(node_info)};
+        uncompilable_node_info{std::move(node_info)};
    uncompilable_nodes->emplace(
        std::move(function_identifier),
-        std::make_pair(function, std::move(uncompileable_node_info)));
+        std::make_pair(function, std::move(uncompilable_node_info)));
  } else {
    it->second.second.emplace_back(std::move(node_info));
  }
--- a/tensorflow/compiler/jit/deadness_analysis.cc
+++ b/tensorflow/compiler/jit/deadness_analysis.cc
@ -96,7 +96,7 @@ limitations under the License.
 // Symbolic > NonSymbolic.  The lattice has height = 2 so two iterations are
 // sufficient to converge.
 //
-// We first do an optimisitc analysis and, if it does not converge, we then fall
+// We first do an optimistic analysis and, if it does not converge, we then fall
 // back to a pessimistic analysis.  The optimistic analysis assigns the same
 // symbolic predicate to all the merge nodes whose preceding enter nodes have
 // the same frame name on the first iteration.  On the second iteration, if all
@ -1255,7 +1255,7 @@ Status DeadnessAnalysisImpl::GetFrameBasedTopologicalOrder(
    } else if (IsRootExit(node)) {
      ++num_exits_for_frame[cf.frame_name];
    }
-    // Edge NextIteration->Merge is counted before starting the traveral to
+    // Edge NextIteration->Merge is counted before starting the traversal to
    // break the backedges.
    if (IsMerge(node)) {
      for (const Edge* e : node->in_edges()) {
@ -1458,7 +1458,7 @@ Status DeadnessAnalysisImpl::PopulateFrame(absl::Span<Node* const> topo,

  for (Node* n : topo) {
    // The nodes added to should_revisit in the previous loop need to be
-    // revisited now.  Reprocesing these initial nodes may add *their* consumers
+    // revisited now.  Reprocessing these initial nodes may add *their* consumers
    // to should_revisit, and these newly added nodes will also be processed by
    // this very same loop.  Since we're traversing the graph in topological
    // order (producers before consumers) and HandleNode(n) can only ever add
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
@ -95,7 +95,7 @@ extern const char* const kXlaNumResourceArgsAttr;
 extern const char* const kXlaHasReferenceVarsAttr;

 // Sorts each node's control inputs by their names. This guarantees that for two
-// structually equivalent GraphDefs, we get the same traversal ordering on
+// structurally equivalent GraphDefs, we get the same traversal ordering on
 // node's control input fields.
 // TODO(hpucha): Move the utilities to a more appropriate place.
 void SortControlInputs(GraphDef* gdef);
--- a/tensorflow/compiler/jit/encapsulate_util.h
+++ b/tensorflow/compiler/jit/encapsulate_util.h
@ -72,7 +72,7 @@ extern const char kXlaLiftedArgOutsideCompilationAttrName[];

 // Attribute indicating that this is an IdentityN node receiving inputs for a
 // outside compilation Placeholder node (the original outside compilation node
-// is moved out of TPU comutation, and we left a Placeholder node there).
+// is moved out of TPU computation, and we left a Placeholder node there).
 // Attribute value will be a string, which is the outside compilation cluster
 // name for the outside compilation Placeholder node.
 extern const char kXlaOutsideCompilationInputsAttrName[];
--- a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc
+++ b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc
@ -941,7 +941,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest,
  // "const0"
  // "identity0" = "const0" (outside compilation cluster "0")
  // "identity1" = "const0" "^identity0" (outside compilation cluster "1",
-  //                                      control depdent on cluster "0")
+  //                                      control dependent on cluster "0")
  // "identity2" = "identity1"
  FunctionDefLibrary fdl;
  {
--- a/tensorflow/compiler/jit/graphcycles/graphcycles.h
+++ b/tensorflow/compiler/jit/graphcycles/graphcycles.h
@ -123,7 +123,7 @@ class GraphCycles {
  absl::Span<const int32> Successors(int32 node) const;
  absl::Span<const int32> Predecessors(int32 node) const;

-  // Return a copy of the sucessors set. This is needed for code using the
+  // Return a copy of the successors set. This is needed for code using the
  // collection while modifying the GraphCycles.
  std::vector<int32> SuccessorsCopy(int32 node) const;
  // Return a copy of the predecessors set. This is needed for code using the
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@ -1366,7 +1366,7 @@ Status MarkForCompilationPassImpl::Run() {
 void MarkForCompilationPassImpl::DumpPostClusteringGraphs() {
  DumpGraphToFile("mark_for_compilation", *graph_, flib_def_);

-  // We also dump out an annoated version of the TF graph where the nodes
+  // We also dump out an annotated version of the TF graph where the nodes
  // names are prefixed with the cluster names.  This can help visualizing the
  // clustering decisions on TensorBoard.
  Graph new_graph(graph_->op_registry());
--- a/tensorflow/compiler/jit/node_matchers.h
+++ b/tensorflow/compiler/jit/node_matchers.h
@ -187,7 +187,7 @@ impl::NodeMatcherProperties Op(string op);
 // Matches a node with assigned device `assigned_device`.
 impl::NodeMatcherProperties AssignedDevice(string assigned_device);

-// Matches a node with a boolean typed attrbute named `name` and with value
+// Matches a node with a boolean typed attribute named `name` and with value
 // `value`.
 template <typename ValueTy>
 impl::NodeMatcherProperties Attr(const string& name, ValueTy value) {
--- a/tensorflow/compiler/jit/node_matchers_test.cc
+++ b/tensorflow/compiler/jit/node_matchers_test.cc
@ -125,7 +125,7 @@ TEST(NodeMatchers, CheckControlDependence) {
            "is any node");
 }

-TEST(NodeMatchers, ConstVaulue) {
+TEST(NodeMatchers, ConstValue) {
  Scope root = Scope::NewRootScope().ExitOnError();
  Output placeholder =
      ops::Placeholder(root.WithOpName("placeholder"), DT_FLOAT);
--- a/tensorflow/compiler/jit/ops/xla_ops.cc
+++ b/tensorflow/compiler/jit/ops/xla_ops.cc
@ -110,7 +110,7 @@ Merges the outputs from the PartitionedCall node and the _XlaRun node.
 Unlike the TensorFlow Merge op, which requires inputs of some types to be
 placed on the host, the _XlaMerge op can merge inputs of all types when
 placed on the device. This prevents the need for copy operations, in
-particluar when an XLA cluster has int32 outputs. The _XlaMerge up does not
+particular when an XLA cluster has int32 outputs. The _XlaMerge up does not
 have a value_index output that identifies the chosen input.
 )");

--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@ -262,7 +262,7 @@ void XlaDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
                << xla_tensor->shaped_buffer().ToString();
        // For devices don't allow sync on completion, the device execution is
        // deferred. We check the execution stream status here to avoid wrong
-        // results from a failed stream being propogated to following
+        // results from a failed stream being propagated to following
        // host-side ops.
        if (!device_allows_sync_on_completion) {
          done_status.Update(xla_tensor->RefreshStatusOfStreams());
--- a/tensorflow/compiler/jit/xla_kernel_creator_util.cc
+++ b/tensorflow/compiler/jit/xla_kernel_creator_util.cc
@ -222,7 +222,7 @@ Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def,
  // using xla::ComputationDataHandle, which is just a symbolic handle that
  // xla::ComputationBuilder assigns. How does this handle gets assigned for
  // constant arguments? Even constant arguments get an _Arg node in the graph
-  // instatiated for Function compilation. The tf2xla kernel for constant _Arg
+  // instantiated for Function compilation. The tf2xla kernel for constant _Arg
  // nodes takes the constant value, converts it to XlaLiteral, and feeds it
  // to xla::ComputationBuilder.ConstantLiteral, which returns the handle. This
  // constant XlaLiteral is included in the HLO graph, and subsequently, in
--- a/tensorflow/compiler/jit/xla_launch_util.cc
+++ b/tensorflow/compiler/jit/xla_launch_util.cc
@ -84,7 +84,7 @@ VariableInfo::~VariableInfo() {
  }
 }

-// Returns a vector of VaribleInfo instances for the resource variable inputs to
+// Returns a vector of VariableInfo instances for the resource variable inputs to
 // the kernel with context `ctx`.  The input indices for the resource variable
 // inputs are in `variable_indices`.
 static Status GetVariableInfosFromCtxInputs(
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc
@ -416,7 +416,7 @@ bool RemoveRedundantStatsOps(mlir::FuncOp func,
          if (res->hasOneUse()) {
            if (auto next_stats = llvm::dyn_cast<quant::StatisticsOp>(
                    *res->getUsers().begin())) {
-              // quantization parameters can be propgated to next_stats
+              // quantization parameters can be propagated to next_stats
              redundant_stats_ops.insert(next_stats);
              // add next_stats to the work list so propagation can
              // continue.
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
@ -342,14 +342,14 @@ ElementsAttr Quantize(Attribute real_value, Type tensor_type);
 // parameters in this type is based on the min and max element of the
 // attribute. When the elements in the `attr` are not in floating-point, or
 // the value range isn't straddling zero, an empty type is returned. The min/max
-// are ajusted to be symmetric if `symmetric` flag is set to True. And
+// are adjusted to be symmetric if `symmetric` flag is set to True. And
 // `symmetric` can only be set to true when it is signed and narrow_range.
 Type GetUniformQuantizedTypeForWeight(ElementsAttr attr, bool symmetric,
                                      unsigned num_bits, bool is_sign,
                                      bool narrow_range);

 // Returns the per channel quantized type for an element attribute.
-// `quant_dim` defines the quantization axis. The channel min/max are ajusted
+// `quant_dim` defines the quantization axis. The channel min/max are adjusted
 // to be symmetric if `symmetric` flag is set to True. And `symmetric` can only
 // be set to true when it is signed and narrow_range.
 Type GetUniformQuantizedPerAxisTypeForWeight(ElementsAttr attr, int quant_dim,
--- a/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc
@ -413,13 +413,13 @@ void PreprocessTopoSortGraph(
      }
      operation_to_in_degrees->try_emplace(&op, input_ops.size());
      for (auto* input_op : input_ops) {
-        auto preceeding_op_it = operation_to_outputs->find(input_op);
-        if (preceeding_op_it == operation_to_outputs->end()) {
+        auto preceding_op_it = operation_to_outputs->find(input_op);
+        if (preceding_op_it == operation_to_outputs->end()) {
          auto result = operation_to_outputs->try_emplace(
              input_op, llvm::DenseSet<Operation*>());
-          preceeding_op_it = result.first;
+          preceding_op_it = result.first;
        }
-        preceeding_op_it->second.insert(&op);
+        preceding_op_it->second.insert(&op);
      }
    }
  }
--- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc
@ -394,14 +394,14 @@ struct FuseBinaryOpToFollowingAffineOp : public OpRewritePattern<AffineOpType> {
      // w * (x ' c) + b => (w ' c) x + b
      // so we have to update the weight.
      bool is_mul = llvm::isa<MulOp>(binary_op);
-      auto new_fitler =
+      auto new_filter =
          filter_cst.mapValues(filter_type.getElementType(), [&](APFloat it) {
            return (is_mul ? it * cst_value : it / cst_value).bitcastToAPInt();
          });
      // We recreate the constant op in case it is shared by the other ops. This
      // might increase the model size.
      auto new_filter_op = rewriter.create<ConstOp>(
-          fc_op.getLoc(), filter->getType(), new_fitler);
+          fc_op.getLoc(), filter->getType(), new_filter);
      fc_op.setOperand(0, binary_op->getOperand(0));
      if (fc_op.filter() != filter) {
        // This filter goes through quantize and dequantize ops. Then we just
--- a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
@ -132,8 +132,8 @@ class FoldIfOp : public OpRewritePattern<TF::IfOp> {

 // Erases functions from the given candidates that are not referenced by any of
 // the ops in the module.
-static void EraseDeadFuncs(const FuncSet& candiate_funcs, ModuleOp module) {
-  if (candiate_funcs.empty()) return;
+static void EraseDeadFuncs(const FuncSet& candidate_funcs, ModuleOp module) {
+  if (candidate_funcs.empty()) return;

  SymbolTable manager(module);

@ -149,7 +149,7 @@ static void EraseDeadFuncs(const FuncSet& candiate_funcs, ModuleOp module) {
    }
  });

-  for (FuncOp func : candiate_funcs) {
+  for (FuncOp func : candidate_funcs) {
    if (!in_use_funcs.count(func)) manager.erase(func);
  }
 }
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
@ -132,7 +132,7 @@ struct InsertTFLQuantOpsAfterTFFakeQuantOp

    int quant_dim = -1;
    if (PerAxis) {
-      // This is a special case that the quant_dim is the last dimentions.
+      // This is a special case that the quant_dim is the last dimensions.
      quant_dim = res->getType().template cast<ShapedType>().getRank() - 1;
    }
    // Use the min/max from the operands and the num_bits and narrow_range
--- a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
+++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
@ -98,7 +98,7 @@ Value* SliceRankedTensor(OpBuilder* builder, Value* input,
                         ArrayRef<int64_t> size_values,
                         mlir::Location location) {
  // If the size of the tensor to be sliced from the input overflows
-  // the input tensor's dimenions, return 0-valued tensor of the requested
+  // the input tensor's dimensions, return 0-valued tensor of the requested
  // shape.
  ArrayRef<int64_t> input_shape = GetRankedTensorShape(input);
  for (int i = 0; i < input_shape.size(); i++) {
--- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
+++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
@ -122,7 +122,7 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) {
                                std::get<1>(operand_and_result));
      }
    } else if (auto replicate = llvm::dyn_cast<tf_device::ReplicateOp>(op)) {
-      // The nested block for RepliateOp is handled separately in side-effect
+      // The nested block for ReplicateOp is handled separately in side-effect
      // analysis. Inside that block, we can still treat its block arguments as
      // different resources.
      for (auto arg : replicate.GetBody().getArguments()) {
@ -305,7 +305,7 @@ void SideEffectAnalysis::AnalyzeRegion(
  // region, and tracking resource accesses in per_resource_access_info_.

  // Returns whether an access to `resource` can skip control edges from
-  // prevoius accesses to unknown resources, due to that earlier accesses to
+  // previous accesses to unknown resources, due to that earlier accesses to
  // `resource` already indirectly tracked previous accesses to uknown
  // resources. `read_only` specifies the type of access of the current op being
  // considered.
--- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h
+++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h
@ -105,7 +105,7 @@ class SideEffectAnalysis {
  void ConsumeChildAnalyses(
      llvm::SmallVector<SideEffectAnalysis, 4>&& children);

-  // Updates control_predecessors_ for `op` that is being visted, on the given
+  // Updates control_predecessors_ for `op` that is being visited, on the given
  // `resource_id`.
  void AddPredecessorsForAccess(int64_t resource_id, Operation* op,
                                bool read_only);
@ -124,7 +124,7 @@ class SideEffectAnalysis {
      sorted_control_successors_;

  // Internal per-resource data structure when we build the dependencies.
-  struct PerResourceAcessInfo {
+  struct PerResourceAccessInfo {
    // Last op that writes the resource before the current op being analyzed.
    Operation* last_write = nullptr;
    // Read ops since last_write before the current op being analyzed.
@ -134,7 +134,7 @@ class SideEffectAnalysis {
    bool tracked_last_unknown_read = false;
    bool tracked_last_unknown_write = false;
  };
-  llvm::SmallDenseMap<int64_t, PerResourceAcessInfo, 8>
+  llvm::SmallDenseMap<int64_t, PerResourceAccessInfo, 8>
      per_resource_access_info_;
 };

--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
@ -1317,7 +1317,7 @@ Operations are applied to the input(s) according to the following rules:
     Considering the batch matrix multiplication equation again
     (`bij,bjk->bik`), the contracted axis label is `j`.

- (e) Expand Diagonal: If the output subcripts contain repeated (explicit) axis
+ (e) Expand Diagonal: If the output subscripts contain repeated (explicit) axis
     labels, the opposite operation of (a) is applied. For example, in the
     equation `i->iii`, and input shape `[3]`, the output of shape `[3, 3, 3]`
     are all zeros, except for the (generalized) diagonal which is populated
@ -1325,7 +1325,7 @@ Operations are applied to the input(s) according to the following rules:
     Note: This operation is not supported by `np.einsum` or `tf.einsum`; it is
     provided to enable computing the symbolic gradient of `tf.einsum`.

-The output subcripts must contain only labels appearing in at least one of the
+The output subscripts must contain only labels appearing in at least one of the
 input subscripts. Furthermore, all dimensions mapping to the same axis label
 must be equal.

@ -1337,7 +1337,7 @@ according to standard NumPy broadcasting

 The broadcasted dimensions are placed in the corresponding location of the
 ellipsis in the output subscript. If the broadcasted dimensions are non-empty
-and the output subcripts do not contain ellipsis, then an InvalidArgument error
+and the output subscripts do not contain ellipsis, then an InvalidArgument error
 is raised.

@compatibility(numpy)
--- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
@ -84,7 +84,7 @@ DenseIntElementsAttr GetBiasAddGradReductionIndices(int64_t rank,
  tensorflow::TensorFormat format;
  if (!FormatFromString(data_format.getValue().str(), &format)) return {};

-  // Reudce along all dimensions except the feature dimension.
+  // Reduce along all dimensions except the feature dimension.
  int64_t feature_dim = GetTensorFeatureDimIndex(rank, format);
  llvm::SmallVector<int64_t, 4> dims_to_reduce(rank - 1);
  std::iota(dims_to_reduce.begin(), dims_to_reduce.begin() + feature_dim, 0);
--- a/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc
@ -45,7 +45,7 @@ struct ExecutorToControlDialectConversion

 // Replace all uses of value `v` with a list of new values. Because number of
 // new values might be greater than 1, users of `v` might be replaced with their
-// clones in case of non-resizble operands list.
+// clones in case of non-resizable operands list.
 void ReplaceAllUsesOfValueWithValues(Value *v,
                                     Operation::operand_range new_values) {
  int new_values_size = std::distance(new_values.begin(), new_values.end());
--- a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc
@ -197,7 +197,7 @@ class Exporter {

  // Each NextIteration node in the original graph is converted to a pair of
  // source and sink operations in the MLIR, and we use the following two maps
-  // to pair and convet them back to a single NextIteration node. We choose to
+  // to pair and convert them back to a single NextIteration node. We choose to
  // the "name" attribute, which is from the unique node name, to find out the
  // pairs: When scanning the operations in the block, the source operations
  // are inserted to the name_to_inst_ first, and the other "sink" operation
--- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
@ -2283,7 +2283,7 @@ class StructuredValueLinearizer {
  // Returns the list of index paths to each leaf of the StructuredValue,
  // in a linearized order matching `tf.nest.flatten`.
  //
-  // If an error ocurred during the linearization process, an error message with
+  // If an error occurred during the linearization process, an error message with
  // `error_context` prepended will be included in the returned status.
  StatusOr<llvm::ArrayRef<mlir::ArrayAttr>> GetLeafIndexPaths(
      llvm::StringRef error_context) const;
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
@ -759,7 +759,7 @@ def HLO_UnaryEinsumOp: HLO_Op<"unary_einsum", [NoSideEffect]> {

  let hasCanonicalizer = 1;

-  // UnarayEinsumOp is unconditionally canonicalized to the binary EinsumOp so
+  // UnaryEinsumOp is unconditionally canonicalized to the binary EinsumOp so
  // the HLO converter shouldn't be invoked.
  let hasCustomHLOConverter = 1;
 }
--- a/tensorflow/compiler/mlir/xla/transforms/canonicalize.td
+++ b/tensorflow/compiler/mlir/xla/transforms/canonicalize.td
@ -38,7 +38,7 @@ def DynamicSliceToSlice: Pat<(HLO_DynamicSliceOp HLO_Tensor:$input,
           (BuildSliceLimits $starting_indices, $slice_sizes),
            (BuildSliceStrides $input))>;

-def UnaryToBianryEinsumEq : NativeCodeCall<
+def UnaryToBinaryEinsumEq : NativeCodeCall<
  "$_builder.getStringAttr(\",\" + $0.getValue().str())">;

 // Convert UnaryEinsumOp to EinsumOp with two operands with redundant first
@ -46,4 +46,4 @@ def UnaryToBianryEinsumEq : NativeCodeCall<
 def UnaryEinsumToEinsum : Pat<
  (HLO_UnaryEinsumOp $operand, $equation),
  (HLO_EinsumOp (HLO_ConstOp (GetScalarOfType<1> $operand)),
-                $operand, (UnaryToBianryEinsumEq $equation))>;
+                $operand, (UnaryToBinaryEinsumEq $equation))>;
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
@ -448,7 +448,7 @@ static DenseIntElementsAttr TFSliceSizes2HLOSliceSizes(
 // `element_types`, create two block arguments, one for lhs and one for rhs, and
 // generates xla_hlo.compare op to compare them with the given `direction`.
 //
-// Note that this right now only does comparsion on the first pair of block
+// Note that this right now only does comparision on the first pair of block
 // arguments.
 static void BuildSortComparisonBody(llvm::ArrayRef<Type> element_types,
                                    StringRef direction, Region *body,
@ -2149,7 +2149,7 @@ class ConvertTopKV2Op : public OpRewritePattern<TF::TopKV2Op> {
 // Converts tf.Unpack to a series of XLA HLO slice ops.
 //
 // Each slice takes one element along the dimension to unpack and takes the full
-// range for all other dimenions. Each slice is then reshaped to drop the
+// range for all other dimensions. Each slice is then reshaped to drop the
 // dimension to unpack (which is always of size 1).
 // TODO(antiagainst): consider changing this into a TF internal lowering pass.
 class ConvertUnpackOp : public OpRewritePattern<TF::UnpackOp> {
--- a/tensorflow/compiler/mlir/xla/transforms/lower_complex_patterns.td
+++ b/tensorflow/compiler/mlir/xla/transforms/lower_complex_patterns.td
@ -107,8 +107,8 @@ def : Pat<(HLO_AbsOp HLO_ComplexTensor:$val),
              (NullDenseIntElementsAttr))),
           (HLO_ConstOp (ConstantSplat<"0"> $real)))>;

-// Expononetial can be lowered to an exponential on the real component and a
-// sum of sinusoids of the imageinary component, which equates to a normal
+// Exponential can be lowered to an exponential on the real component and a
+// sum of sinusoids of the imaginary component, which equates to a normal
 // exponential operator multiplied by Euler's formula.
 //
 // Exp(a + ib) = Exp(a) * Exp(ib) = Exp(a) * (Cos(b) + iSin(b))
--- a/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h
+++ b/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h
@ -157,7 +157,7 @@ inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::CompareOp>(
  if (element_type.isa<IntegerType>()) {
    Optional<CmpIPredicate> predicate =
        getIntCmpPredicate(lhlo_op.comparison_direction());
-    assert(predicate.hasValue() && "expected valid comparision direction");
+    assert(predicate.hasValue() && "expected valid comparison direction");
    return b.create<ScalarIOp<CompareOp>>(lhlo_op.getLoc(),
                                          predicate.getValue(), lhs, rhs);
  }
--- a/tensorflow/compiler/tests/matrix_diag_ops_test.py
+++ b/tensorflow/compiler/tests/matrix_diag_ops_test.py
@ -114,7 +114,7 @@ def square_cases(align=None):
                   [6, 7, 8, 9, 1],
                   [2, 3, 4, 5, 6]]])
  tests = dict()
-  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals)
+  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals)
  tests[-1, -1] = (np.array([[6, 4, 1, 7],
                             [5, 2, 8, 5]]),
                   np.array([[[0, 0, 0, 0, 0],
@ -192,7 +192,7 @@ def tall_cases(align=None):
                   [7, 8, 9],
                   [9, 8, 7]]])
  tests = dict()
-  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals)
+  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals)
  tests[0, 0] = (np.array([[1, 5, 9],
                           [3, 2, 6]]),
                 np.array([[[1, 0, 0],
@ -276,7 +276,7 @@ def fat_cases(align=None):
                   [8, 9, 1, 2],
                   [3, 4, 5, 6]]])
  tests = dict()
-  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals)
+  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals)
  tests[0, 0] = (np.array([[1, 6, 2],
                           [4, 9, 5]]),
                 np.array([[[1, 0, 0, 0],
--- a/tensorflow/compiler/tests/quantized_ops_test.py
+++ b/tensorflow/compiler/tests/quantized_ops_test.py
@ -49,7 +49,7 @@ class QuantizedOpsTest(xla_test.XLATestCase):
        self.assertAllEqual(value, expected)


-class DeuantizedOpsTest(xla_test.XLATestCase):
+class DequantizedOpsTest(xla_test.XLATestCase):

  def pack_uint8_r2_to_uint32(self, test_input):
    num_rows, num_columns = test_input.get_shape().as_list()
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@ -3423,7 +3423,7 @@ int main(int argc, char** argv) {
      tensorflow::Flag(
          "tf_xla_random_seed", &tensorflow::tf_xla_random_seed,
          "Random seed to use for XLA tests. <= 0 means choose a seed "
-          "nondetermistically."),
+          "nondeterministically."),
      // TODO(phawkins): it might make more sense to run each test up to a
      // configurable time bound.
      tensorflow::Flag("tf_xla_test_repetitions",
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
@ -161,7 +161,7 @@ Status GetEngineInfo(const Graph* g,
    const int node_id = node->id();
    const string& node_name = node->name();

-    // Create input connections. Sort edges first to make determnistic since
+    // Create input connections. Sort edges first to make deterministic since
    // in_edges is a set of pointers.
    std::vector<const Edge*> in_edges(node->in_edges().begin(),
                                      node->in_edges().end());
@ -186,7 +186,7 @@ Status GetEngineInfo(const Graph* g,
        // If it doesn't have any edges, TF will prune it out.
        //
        // Note that the segmenter already ensure that the constant data input
-        // is valid and suppported by the engine.
+        // is valid and supported by the engine.
        if (!added_const_nodes.insert(input_node).second) {
          // Already added before.
          continue;
@ -209,7 +209,7 @@ Status GetEngineInfo(const Graph* g,
            node_id, edge->dst_input(), /*input_edge=*/true, port);
      }
    }
-    // Create output connections. Sort edges first to make determnistic since
+    // Create output connections. Sort edges first to make deterministic since
    // out_edges is a set of pointers.
    std::vector<const Edge*> out_edges(node->out_edges().begin(),
                                       node->out_edges().end());
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
@ -2511,7 +2511,7 @@ Status ConvertStridedSliceHelper(OpConverterParams* params,
    return Status::OK();
  } else if (pad_dims.size() == 1) {
    // Only one dim is modified but we have to have 2, mark a second dim which
-    // will have padding of 0. The dim we add is chosen to avoid an unecessary
+    // will have padding of 0. The dim we add is chosen to avoid an unnecessary
    // transpose.
    if (pad_dims[0] != 2) {
      pad_dims.push_back(2);
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
@ -141,9 +141,9 @@ Status ConvertSegmentToGraphDef(
 // Converts given subgraph to a TRT engine saved in 'engine'. Returns ok iff
 // 'builder' successfully build the engine. If the result is not ok, 'engine'
 // will be set to nullptr
-// Once returned, 'builder' is not needed any more and can be safely detroyed.
+// Once returned, 'builder' is not needed any more and can be safely destroyed.
 //
-// - convert_successfully: indicates whether the converson to TensorRT network
+// - convert_successfully: indicates whether the conversion to TensorRT network
 //   is successful. This is different than successfully building the engine:
 //   building can still fail afterwards.
 Status ConvertGraphDefToEngine(
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
@ -521,7 +521,7 @@ TEST_F(ValidatorTest, ConvertToTensorOrWeights) {
        "Scalar input tensor is not supported since the first dimension "
        "is treated as batch dimension by TRT");
  }
-  // Convert non-Const. We test the case where the non-batch dimemsion is
+  // Convert non-Const. We test the case where the non-batch dimension is
  // unknown as well, to make sure the validator allows that.
  for (const int32 non_batch_dim : {-1, 2}) {
    const int32 batch_size = 12;
@ -973,7 +973,7 @@ TEST_F(ConverterTest, GetWeightRange) {

 TEST_F(ConverterTest, ProvideQuantizationRange) {
  FakeITensor fake_tensor;
-  // Assymetric range
+  // Asymmetric range
  converter_->ProvideQuantizationRange(&fake_tensor, 0.0f, 6.0f);
  EXPECT_EQ(6.0f, quantization_ranges()[&fake_tensor]);
  converter_->ProvideQuantizationRange(&fake_tensor, 1.0f, 6.0f);
--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
+++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
@ -125,7 +125,7 @@ class TRTEngineOp : public AsyncOpKernel {
  // Verify that the input shapes are consistent and can be handled by this op.
  Status VerifyInputShapes(const std::vector<TensorShape>& shapes);

-  // Return engine batch in cached_engne_batch_sizes_ which is closest to input
+  // Return engine batch in cached_engine_batch_sizes_ which is closest to input
  // batch.
  Status GetEngineInputShapes(
      const CacheType& cache,
--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc
@ -112,7 +112,7 @@ TEST_F(TRTEngineResourceOpsTest, Basic) {
  EXPECT_TRUE(
      errors::IsNotFound(rm->Lookup(container, resource_name, &resource)));

-  // Create the resouce using an empty file with InitializeTRTResource.
+  // Create the resource using an empty file with InitializeTRTResource.
  Reset();
  Env* env = Env::Default();
  const string filename = io::JoinPath(testing::TmpDir(), "trt_engine_file");
--- a/tensorflow/compiler/tf2tensorrt/segment/segment.cc
+++ b/tensorflow/compiler/tf2tensorrt/segment/segment.cc
@ -466,7 +466,7 @@ Status SegmentGraph(const Graph* tf_graph,
  // grow from the output-side of the network towards the inputs.
  //
  // In general this is not guaranteed to produce a globally optimal
-  // segmentation. For exaample, consider graph with node {A, B, C, D} and edges
+  // segmentation. For example, consider graph with node {A, B, C, D} and edges
  // {A->B, A->C, B->D, C->D), where A, B, D are trt compatible but C is not, so
  // in theory we can choose to contract either A, B or B, D but not both, but
  // here it always choose to contract B, D.
--- a/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h
+++ b/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h
@ -34,7 +34,7 @@ namespace tensorrt {
 // TRTs pull model for calibration. When TRT implements a means for
 // a push calibration This class should be updated accordingly

-// IInt8EntropyCalibrator2 is prefferred for TRT 5.1+.
+// IInt8EntropyCalibrator2 is preferred for TRT 5.1+.
 #if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
 struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 {
 #else
--- a/tensorflow/compiler/tf2xla/functionalize_cond.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc
@ -572,7 +572,7 @@ Status Conditional::ExtractBodies(Graph* graph) {
      if (visited.at(n->id())) continue;
      visited[n->id()] = true;

-      // Verify output edges and record control edges exitting scope.
+      // Verify output edges and record control edges exiting scope.
      for (const Edge* e : n->out_edges()) {
        Node* dst = e->dst();
        if (IsMerge(dst)) continue;
@ -602,7 +602,7 @@ Status Conditional::ExtractBodies(Graph* graph) {
        }
      }

-      // Copying incomming edges to dst node. Iterate over a copy of the edges
+      // Copying incoming edges to dst node. Iterate over a copy of the edges
      // as they could be mutated during iteration.
      std::vector<const Edge*> in_edges(n->in_edges().begin(),
                                        n->in_edges().end());
@ -719,7 +719,7 @@ Status Conditional::ExtractBodies(Graph* graph) {
    ++index;

    // Connect the input to the merge_ with the retval, except if it is a
-    // Swich node, which is handled separately.
+    // Switch node, which is handled separately.
    for (auto e : m->in_edges()) {
      if (e->IsControlEdge()) continue;
      int branch_index = static_cast<int>(find_branch(e));
@ -1139,7 +1139,7 @@ StateMap::CondId FunctionalizeCond::StateAlongEdge(const Edge* e) {
      // node. If we don't record this into CondState, branches might have
      // incorrect CondState (e.g. if the branch only has a Const data node).
      // We set it to kNeither because there is no way to tell whether it's
-      // for true branch or false branch. This node's desendents might have
+      // for true branch or false branch. This node's descendents might have
      // other incoming edges with defined BranchType, and we correctly handle
      // merging kNeither with other defined BranchType in StateAlongEdge().
      state[predicate] = BranchType::kNeither;
--- a/tensorflow/compiler/tf2xla/functionalize_cond.h
+++ b/tensorflow/compiler/tf2xla/functionalize_cond.h
@ -213,7 +213,7 @@ class FunctionalizeCond {
  // This populates the state_map_.
  Status DetermineStates(std::vector<Node*> rev_topo_order);

-  // Determine the CondState for a given node using the incomming edges
+  // Determine the CondState for a given node using the incoming edges
  // to the node. Note: it is expected that this node's CondState is only
  // determined once its input's CondState is.
  Status DetermineCondState(Node* dst) {
--- a/tensorflow/compiler/tf2xla/kernels/assert_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/assert_op.cc
@ -22,7 +22,7 @@ namespace tensorflow {

 namespace {

-// This TensorFlow op supports the Assert primitve.
+// This TensorFlow op supports the Assert primitive.
 class AssertOp : public XlaOpKernel {
 public:
  explicit AssertOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
--- a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc
@ -329,7 +329,7 @@ class MaxPoolGradOp : public XlaOpKernel {
        (padding_ == VALID) ? xla::Padding::kValid : xla::Padding::kSame;

    // Create a MaxPool operation to check the expected resulting shape, and
-    // then throw away the operation because we don't actually neeed it here.
+    // then throw away the operation because we don't actually need it here.
    TensorShape expected_out_shape;
    auto pooling =
        xla::MaxPool(ctx->Input(0), ksize_, stride_, xla_padding,
--- a/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc
@ -37,7 +37,7 @@ class XlaConvOp : public XlaOpKernel {
        context, context->GetAttr("precision_config", &precision_config_attr));
    OP_REQUIRES(context,
                precision_config_.ParsePartialFromString(precision_config_attr),
-                errors::InvalidArgument("Error parsing precison config."));
+                errors::InvalidArgument("Error parsing precision config."));
  }

  void Compile(XlaOpKernelContext* context) override {
--- a/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc
@ -32,7 +32,7 @@ class XlaSvdOp : public XlaOpKernel {
                   ctx->GetAttr("precision_config", &precision_config_attr));
    OP_REQUIRES(ctx,
                precision_config_.ParsePartialFromString(precision_config_attr),
-                errors::InvalidArgument("Error parsing precison config."));
+                errors::InvalidArgument("Error parsing precision config."));
    if (precision_config_.operand_precision_size() == 0) {
      precision_config_.mutable_operand_precision()->Add(
          xla::PrecisionConfig::HIGHEST);
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
@ -84,7 +84,7 @@ lower: a boolean specifies whether the calculation is done with the lower

 max_iter: maximum number of sweep update, i.e., the whole lower triangular
  part or upper triangular part based on parameter lower. Heuristically, it has
-  been argued that approximatly logN sweeps are needed in practice (Ref: Golub &
+  been argued that approximately logN sweeps are needed in practice (Ref: Golub &
  van Loan "Matrix Computation").

 epsilon: the tolerance ratio.
@ -116,7 +116,7 @@ a: the input tensor.

 max_iter: maximum number of sweep update, i.e., the whole lower triangular
  part or upper triangular part based on parameter lower. Heuristically, it has
-  been argued that approximatly log(min (M, N)) sweeps are needed in practice
+  been argued that approximately log(min (M, N)) sweeps are needed in practice
  (Ref: Golub & van Loan "Matrix Computation").

 epsilon: the tolerance ratio.
@ -610,7 +610,7 @@ REGISTER_OP("XlaDequantize")
    .SetShapeFn(shape_inference::UnknownShape)
    .Doc(R"doc(
 Takes the packed uint32 input and unpacks the input to uint8 to do
-Dequantization on deivce.
+Dequantization on device.

 input: Input tensors whose types is uint32, shape is [d0, ..., dn].
 output: Output tensors whose types is bloat16. If transpose_output is true,
@ -644,7 +644,7 @@ REGISTER_OP("XlaEinsum")
    .Doc(R"doc(
 An op which supports basic einsum op with 2 inputs and 1 output.

-This op has better TPU performnce since it doesn't have explicitly reshape and
+This op has better TPU performance since it doesn't have explicitly reshape and
 transpose operations as tf.einsum does.
 )doc");

--- a/tensorflow/compiler/tf2xla/shape_util.h
+++ b/tensorflow/compiler/tf2xla/shape_util.h
@ -51,7 +51,7 @@ xla::Shape TensorShapeToXLAShape(xla::PrimitiveType type,
 // In case the input shape is a tuple, the minor-to-major values will be in the
 // order of the tuple elements within the tuple shape.
 // If a shape (or a subshape of a tuple shape) has missing layout, a rank long
-// sequence of -1 values will be emittted.
+// sequence of -1 values will be emitted.
 xla::StatusOr<std::vector<int>> GetShapeLayoutVector(const xla::Shape& shape);

 // Given the input shape and a linearized sequence of the minor-to-major values
--- a/tensorflow/compiler/tf2xla/tf2xla.proto
+++ b/tensorflow/compiler/tf2xla/tf2xla.proto
@ -52,7 +52,7 @@ message Variable {
  TensorShapeProto shape = 3;
  DataType type = 4;

-  // Flag for variables that are never assigned. Assigments to a read-only
+  // Flag for variables that are never assigned. Assignments to a read-only
  // variable or unassigned variables that are not read-only are invalid.
  bool readonly = 5;
 }
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.h
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h
@ -213,13 +213,13 @@ class XlaOpKernelContext {
    return dynamic_dimension_is_minus_one_;
  }

-  // Reads the current value of the resouce variable referred to by input
+  // Reads the current value of the resource variable referred to by input
  // `index`. If `shape` is not nullptr, sets `*shape` to the shape of the
  // variable. Returns an error if the variable has not been initialized, or if
  // its type does not match `type`.
  Status ReadVariableInput(int index, DataType type, TensorShape* shape,
                           xla::XlaOp* value);
-  // Reads the current value of the resouce variable referred to by input
+  // Reads the current value of the resource variable referred to by input
  // `name`.
  Status ReadVariableInput(absl::string_view name, DataType type,
                           TensorShape* shape, xla::XlaOp* value);
--- a/tensorflow/compiler/xla/client/lib/comparators_test.cc
+++ b/tensorflow/compiler/xla/client/lib/comparators_test.cc
@ -73,7 +73,7 @@ void BuildComparatorAndComparisons(ComparatorsTest* test,
    }
  }

-  // Concantenate the comparison results.
+  // Concatenate the comparison results.
  ConcatInDim(test->builder(), all_comparisons, 0);

  // If we use less-than comparisons, we expect the comparison to result in true
--- a/tensorflow/compiler/xla/client/lib/matrix.cc
+++ b/tensorflow/compiler/xla/client/lib/matrix.cc
@ -316,7 +316,7 @@ Status ValidateEinsumNumericDimensions(absl::Span<const int64> x_config,

 namespace {
 // Helper method to remove dimensions from a shape and dot dimension numbers
-// used to implment implicit broadcasting.
+// used to implement implicit broadcasting.
 template <typename C>
 void DeleteDimsFromContainer(absl::Span<const int64> to_delete, Shape* shape,
                             C* batch_dims, C* contracting_dims) {
@ -473,7 +473,7 @@ xla::XlaOp Einsum(xla::XlaOp x, absl::Span<const int64> x_config, xla::XlaOp y,
      transpose_dims[output_transpose_dims[i]] = i;
    }

-    // Remove ones that where broadcated from the x and the y shape and adjust
+    // Remove ones that where broadcasted from the x and the y shape and adjust
    // the dimension numbers that are more minor than those dimensions.
    DeleteDimsFromContainer(lhs_delete_dims, &x_shape,
                            dnums.mutable_lhs_batch_dimensions(),
--- a/tensorflow/compiler/xla/client/lib/matrix.h
+++ b/tensorflow/compiler/xla/client/lib/matrix.h
@ -132,7 +132,7 @@ xla::XlaOp Einsum(
 // the input.
 xla::XlaOp EinsumDiagonal(XlaOp x, absl::Span<const int64> config);

-// Same as above but supporting numeric labels on dimensins. So "ab,cb->ac"
+// Same as above but supporting numeric labels on dimensions. So "ab,cb->ac"
 // becomes:
 //   x_config = {0, 1}
 //   y_config = {2, 1}
--- a/tensorflow/compiler/xla/client/lib/pooling.cc
+++ b/tensorflow/compiler/xla/client/lib/pooling.cc
@ -39,7 +39,7 @@ XlaOp AvgPoolDivideByCountWithGeneralPadding(
  std::vector<int64> window_ksize(num_spatial_dims);
  std::vector<int64> window_stride(num_spatial_dims);
  CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims)
-      << "Invalid number of spatial dimentions in data format specification";
+      << "Invalid number of spatial dimensions in data format specification";
  for (int i = 0; i < num_spatial_dims; ++i) {
    int dim = data_format.spatial_dimension(i);
    input_dim_sizes[i] = input_shape[dim];
@ -95,7 +95,7 @@ PaddingConfig MakeSpatialPaddingConfig(
    padding_config.add_dimensions();
  }
  CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims)
-      << "Invalid number of spatial dimentions in data format specification";
+      << "Invalid number of spatial dimensions in data format specification";
  for (int i = 0; i < num_spatial_dims; ++i) {
    int dim = data_format.spatial_dimension(i);
    auto padding_dimension = padding_config.mutable_dimensions(dim);
@ -178,7 +178,7 @@ std::vector<std::pair<int64, int64>> MakeSpatialPadding(
  std::vector<int64> kernel_size_spatial_dimensions;
  std::vector<int64> stride_spatial_dimensions;
  CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims)
-      << "Invalid number of spatial dimentions in data format specification";
+      << "Invalid number of spatial dimensions in data format specification";
  for (int i = 0; i < num_spatial_dims; ++i) {
    int dim = data_format.spatial_dimension(i);
    input_spatial_dimensions.push_back(input_size[dim]);
--- a/tensorflow/compiler/xla/client/lib/slicing.cc
+++ b/tensorflow/compiler/xla/client/lib/slicing.cc
@ -154,29 +154,29 @@ XlaOp TorchGather(XlaOp input, XlaOp index, int64 dim, bool sparse) {
      return TorchIndexSelect(input, index, 0);
    }
    if (!sparse) {
-      std::vector<int64> index_broacast_dims;
-      std::vector<int64> input_broacast_dims;
+      std::vector<int64> index_broadcast_dims;
+      std::vector<int64> input_broadcast_dims;
      std::vector<int64> sizes;
      for (int64 i = 0; i < index_shape.rank(); ++i) {
        if (i < dim) {
-          input_broacast_dims.push_back(i);
-          index_broacast_dims.push_back(i);
+          input_broadcast_dims.push_back(i);
+          index_broadcast_dims.push_back(i);
        } else if (i == dim) {
          sizes.push_back(input_shape.dimensions(i));
-          input_broacast_dims.push_back(i);
-          index_broacast_dims.push_back(i + 1);
+          input_broadcast_dims.push_back(i);
+          index_broadcast_dims.push_back(i + 1);
        } else {
-          input_broacast_dims.push_back(i + 1);
-          index_broacast_dims.push_back(i + 1);
+          input_broadcast_dims.push_back(i + 1);
+          index_broadcast_dims.push_back(i + 1);
        }
        sizes.push_back(index_shape.dimensions(i));
      }
      auto mask = Eq(
-          BroadcastInDim(index, sizes, index_broacast_dims),
+          BroadcastInDim(index, sizes, index_broadcast_dims),
          Iota(builder, ShapeUtil::MakeShape(index_shape.element_type(), sizes),
               dim));
      auto masked_input = Select(
-          mask, BroadcastInDim(input, sizes, input_broacast_dims),
+          mask, BroadcastInDim(input, sizes, input_broadcast_dims),
          Zeros(builder,
                ShapeUtil::MakeShape(input_shape.element_type(), sizes)));
      return Reduce(masked_input, Zero(builder, input_shape.element_type()),
@ -214,25 +214,25 @@ XlaOp TorchScatterDense(XlaOp input, XlaOp index, XlaOp src, int64 dim,
  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
    TF_ASSIGN_OR_RETURN(Shape index_shape, builder->GetShape(index));
    TF_ASSIGN_OR_RETURN(Shape input_shape, builder->GetShape(input));
-    std::vector<int64> index_broacast_dims;
+    std::vector<int64> index_broadcast_dims;
    std::vector<int64> sizes;
    for (int64 i = 0; i < index_shape.rank(); ++i) {
      if (i < dim) {
-        index_broacast_dims.push_back(i);
+        index_broadcast_dims.push_back(i);
      } else {
        if (i == dim) {
          sizes.push_back(input_shape.dimensions(i));
        }
-        index_broacast_dims.push_back(i + 1);
+        index_broadcast_dims.push_back(i + 1);
      }
      sizes.push_back(index_shape.dimensions(i));
    }
    auto mask =
-        Eq(BroadcastInDim(index, sizes, index_broacast_dims),
+        Eq(BroadcastInDim(index, sizes, index_broadcast_dims),
           Iota(builder,
                ShapeUtil::MakeShape(index_shape.element_type(), sizes), dim));
    auto masked_src =
-        Select(mask, BroadcastInDim(src, sizes, index_broacast_dims),
+        Select(mask, BroadcastInDim(src, sizes, index_broadcast_dims),
               Zeros(builder,
                     ShapeUtil::MakeShape(input_shape.element_type(), sizes)));

--- a/tensorflow/compiler/xla/client/lib/testing.cc
+++ b/tensorflow/compiler/xla/client/lib/testing.cc
@ -98,7 +98,7 @@ std::vector<std::unique_ptr<GlobalData>> MakeFakeArgumentsOrDie(
    const XlaComputation& computation, Client* client,
    DebugOptions* debug_opts /*=nullptr*/) {
  CHECK(computation.proto().has_host_program_shape())
-      << "Computation should have progran shape.";
+      << "Computation should have program shape.";
  auto program_shape = computation.proto().host_program_shape();

  std::vector<std::unique_ptr<GlobalData>> results;
--- a/tensorflow/compiler/xla/client/local_client.cc
+++ b/tensorflow/compiler/xla/client/local_client.cc
@ -329,15 +329,15 @@ StatusOr<int> LocalClient::ReplicaNumberToDeviceOrdinal(int replica_number) {
 }

 StatusOr<TransferToServerResponse> LocalClient::TransferToLocalServer(
-    const ::xla::BorrowingLiteral& literal, int device_oridinal) {
+    const ::xla::BorrowingLiteral& literal, int device_ordinal) {
  const ::xla::Shape& shape = literal.shape();

  TF_ASSIGN_OR_RETURN(
      ::xla::ScopedShapedBuffer shaped_buffer,
      backend().transfer_manager()->AllocateScopedShapedBuffer(
-          shape, backend().memory_allocator(), device_oridinal));
+          shape, backend().memory_allocator(), device_ordinal));
  TF_ASSIGN_OR_RETURN(auto stream,
-                      mutable_backend()->BorrowStream(device_oridinal));
+                      mutable_backend()->BorrowStream(device_ordinal));
  TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
      stream.get(), literal, shaped_buffer));
  std::vector<::xla::ScopedShapedBuffer> replicated_buffer;
--- a/tensorflow/compiler/xla/client/local_client.h
+++ b/tensorflow/compiler/xla/client/local_client.h
@ -122,7 +122,7 @@ class LocalClient : public Client {

  // Transfer the BorrowingLiteral to the device with the given ordinal.
  StatusOr<TransferToServerResponse> TransferToLocalServer(
-      const ::xla::BorrowingLiteral& literal, int device_oridinal);
+      const ::xla::BorrowingLiteral& literal, int device_ordinal);

  // Copy the data from the device contained in the given ShapedBuffer and
  // return as a Literal.
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@ -232,7 +232,7 @@ class XlaBuilder {
  // added operation.
  //
  // `remove_dynamic_dimensions` tells the builder whether to remove the
-  // dyanmic dimensions information in all ops.
+  // dynamic dimensions information in all ops.
  //
  // TODO(b/121223198): Delete `remove_dynamic_dimensions` and keeps the
  // dynamic dimensions information when XLA backend can handle dynamic
@ -1194,7 +1194,7 @@ XlaOp Broadcast(XlaOp operand, absl::Span<const int64> broadcast_sizes);
 //
 // For example, say operand = {1, 2}, i.e., a 1D tensor in shape s32[2]; the
 // output shape is s32[2,2]:
-// - Specifying {1} as brodcast_dimension will generate output
+// - Specifying {1} as broadcast_dimension will generate output
 //   {{1, 2},
 //    {1, 2}}
 // - On the other hand, specifying {0} as broadcast_dimension
@ -1469,7 +1469,7 @@ XlaOp TriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower,
 // two minor dimensions equal.
 // If `lower` is true, the data from the lower triangle is used; if false, the
 // upper triangle is used. The input data in the other triangle of the input
-// does not affect the output. Returns the output in the same lower/uppper
+// does not affect the output. Returns the output in the same lower/upper
 // triangle. The data returned in the other output triangle is arbitrary and
 // implementation-defined.
 //
--- a/tensorflow/compiler/xla/client/xla_builder_test.cc
+++ b/tensorflow/compiler/xla/client/xla_builder_test.cc
@ -292,7 +292,7 @@ TEST_F(XlaBuilderTest, BinopHasInDimAndDegenerateBroadcast) {
  TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b));

  // The binary operation has in-dim broadcast and degenerate broadcast, should
-  // first do the in-dim broadcast then convert the degnerate broadcast into a
+  // first do the in-dim broadcast then convert the degenerate broadcast into a
  // reshape and a broadcast.
  //
  // Expected:
--- a/tensorflow/compiler/xla/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/debug_options_flags.cc
@ -328,7 +328,7 @@ static void AllocateFlags() {
          "use multi-threaded Eigen mode."),
      tensorflow::Flag("xla_gpu_cuda_data_dir",
                       flag_values->mutable_xla_gpu_cuda_data_dir(),
-                       "If non-empty, speficies a local directory containing "
+                       "If non-empty, specifies a local directory containing "
                       "ptxas and nvvm libdevice files; otherwise we use "
                       "those from runfile directories."),
      tensorflow::Flag("xla_gpu_ftz",
@ -347,7 +347,7 @@ static void AllocateFlags() {
          flag_values->xla_gpu_max_kernel_unroll_factor(),
          "Specify the maximum kernel unroll factor for the GPU backend."),
      tensorflow::Flag("xla_gpu_ptx_file", setter_for_xla_gpu_ptx_file, "",
-                       "If non-empty, speficies a file containing ptx to use. "
+                       "If non-empty, specifies a file containing ptx to use. "
                       "The filename prefix must have the same pattern as PTX "
                       "dumped by XLA. This allows to match one specific "
                       "module. General workflow. Get the generated module "
--- a/tensorflow/compiler/xla/debug_options_flags.h
+++ b/tensorflow/compiler/xla/debug_options_flags.h
@ -52,7 +52,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags();
 // By default all passes have infinite fuel.  You can restrict how much fuel a
 // pass has by specifying XLA_FLAGS=--xla_fuel=PASS1=NUM1,PASS2=NUM2,...
 //
-// If a user specifes --xla_fuel=PASS=NUM but ConsumeFuel(PASS) is not called
+// If a user specifies --xla_fuel=PASS=NUM but ConsumeFuel(PASS) is not called
 // before the program exits, we'll print a warning.
 //
 // We recommend as a convention you use a pass's name for the `pass` argument,
@ -91,7 +91,7 @@ bool ConsumeFuel(absl::string_view pass,
 // startup.
 //
 // You may call this function twice in the same thread to reset its fuel pool
-// back to the intitial state.
+// back to the initial state.
 void ResetThreadLocalFuel();

 }  // namespace xla
--- a/tensorflow/compiler/xla/execution_options_util.h
+++ b/tensorflow/compiler/xla/execution_options_util.h
@ -21,7 +21,7 @@ limitations under the License.
 namespace xla {

 // Create a default ExecutionOptions proto; this proto has its debug options
-// popupated to the default values taken from flags.
+// populated to the default values taken from flags.
 ExecutionOptions CreateDefaultExecutionOptions();

 }  // namespace xla
--- a/tensorflow/compiler/xla/g3doc/operation_semantics.md
+++ b/tensorflow/compiler/xla/g3doc/operation_semantics.md
@ -94,7 +94,7 @@ The participating cores can be configured by:
    in the same order of 1, 2, 3. Then, another AllToAll will be applied within
    replicas 4, 5, 0, and the concatenation order is also 4, 5, 0. If
    `replica_groups` is empty, all replicas belong to one group, in the
-    concatenation order of their appearence.
+    concatenation order of their appearance.

 Prerequisites:

--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@ -248,7 +248,7 @@ Status MutableLiteralBase::CopySliceFromInternal(
    TF_RET_CHECK(src_base.size() == copy_size.size());

    // Scan the source from minor, stepping in copy size blocks, then within
-    // the index enumaration functor, do a strided copy advancing source index
+    // the index enumeration functor, do a strided copy advancing source index
    // by one (walking through the minor dimension), and destination index by
    // proper stride size at the matching dimension.
    DimensionVector src_indexes(src_base.size(), 0);
--- a/tensorflow/compiler/xla/literal.h
+++ b/tensorflow/compiler/xla/literal.h
@ -810,7 +810,7 @@ class Literal : public MutableLiteralBase {
  Literal(const Shape& shape, bool allocate_arrays);
  Literal& operator=(Literal&& other);

-  // Similar to CopyFrom, but with move semantincs. The subshape of this literal
+  // Similar to CopyFrom, but with move semantics. The subshape of this literal
  // rooted at 'dest_shape_index' must be *equal* to the shape 'src_literal'
  // (layouts and shapes must match), but need not be arrays. The memory
  // allocated in this literal for the subshape at dest_shape_index is
@ -883,7 +883,7 @@ class BorrowingLiteral : public LiteralBase {
  BorrowingLiteral() : LiteralBase() {}

  // 'src_buf_ptr' is not owned by this class and must outlive the
-  // lifetime of this class. It points to an appropirately sized buffer with
+  // lifetime of this class. It points to an appropriately sized buffer with
  // data interpretered as indicated by 'shape'.
  // This constructor is only used for array shapes.
  BorrowingLiteral(const char* src_buf_ptr, const Shape& shape);
--- a/tensorflow/compiler/xla/literal_comparison.cc
+++ b/tensorflow/compiler/xla/literal_comparison.cc
@ -433,7 +433,7 @@ class NearComparator {
      }
    } else if (IsInf(expected) || IsInf(actual)) {
      // If either the expected or actual value is infinity but not both,
-      // then both absolute and relative error are regarded as inifity.
+      // then both absolute and relative error are regarded as infinity.
      CHECK(!CompareEqual(expected, actual, {linear_index}));
      abs_error = std::numeric_limits<float>::infinity();
      rel_error = std::numeric_limits<float>::infinity();
--- a/tensorflow/compiler/xla/literal_test.cc
+++ b/tensorflow/compiler/xla/literal_test.cc
@ -1134,7 +1134,7 @@ TEST_F(LiteralUtilTest, CopyFromDifferentShapes) {
 TEST_F(LiteralUtilTest, F16) {
  // Verify that the internal data views are consistent and that they
  // are in little endian format
-  // TODO - modify if we make the data format machine endianess dependent
+  // TODO - modify if we make the data format machine endianness dependent
  Literal m1 = Literal::CreateFromShape(ShapeUtil::MakeShape(F16, {2, 2}));
  const char* d1 = reinterpret_cast<const char*>(m1.data<half>().data());
  EXPECT_EQ(d1[0], 0);
--- a/tensorflow/compiler/xla/parse_flags_from_env.h
+++ b/tensorflow/compiler/xla/parse_flags_from_env.h
@ -30,7 +30,7 @@ limitations under the License.
 //  - <single-quote><characters string not containing nul or
 //    single-quote><single_quote> in which case the effective value is the
 //    string with the single-quotes removed
-//  - <double-quote><character string not containing nul or unesecaped
+//  - <double-quote><character string not containing nul or unescaped
 //    double-quote><double_quote> in which case the effective value if the
 //    string with the double-quotes removed, and escaped sequences of
 //    <backslash><char> replaced by <char>.
--- a/tensorflow/compiler/xla/parse_flags_from_env_test.cc
+++ b/tensorflow/compiler/xla/parse_flags_from_env_test.cc
@ -73,14 +73,14 @@ static const char kTestFlagString[] =
    "--single_quoted='single quoted \\\\ \n \"' "
    "--double_quoted=\"double quoted \\\\ \n '\\\"\" ";

-// Test that the environent variable is parsed correctly.
+// Test that the environment variable is parsed correctly.
 TEST(ParseFlagsFromEnv, Basic) {
  // Prepare environment.
  setenv("TF_XLA_FLAGS", kTestFlagString, true /*overwrite*/);
  TestParseFlagsFromEnv("(flags in environment variable)");
 }

-// Test that a file named by the environent variable is parsed correctly.
+// Test that a file named by the environment variable is parsed correctly.
 TEST(ParseFlagsFromEnv, File) {
  // environment variables where  tmp dir may be specified.
  static const char* kTempVars[] = {"TEST_TMPDIR", "TMP"};
@ -154,7 +154,7 @@ int main(int argc, char* argv[]) {
  xla::int32 int_flag = 1;
  const std::vector<tensorflow::Flag> flag_list = {
      tensorflow::Flag("recursing", &recursing,
-                       "Whether the binary is being invoked recusively."),
+                       "Whether the binary is being invoked recursively."),
      tensorflow::Flag("int_flag", &int_flag, "An integer flag to test with"),
  };
  xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
--- a/tensorflow/compiler/xla/python/local_client.cc
+++ b/tensorflow/compiler/xla/python/local_client.cc
@ -551,7 +551,7 @@ PyLocalBuffer::DestructureTuple() {
  absl::MutexLock lock(&mu_);
  if (!on_host_shape_.IsTuple()) {
    return InvalidArgument(
-        "Attemped to destructure a PyLocalBuffer that did not have a tuple "
+        "Attempted to destructure a PyLocalBuffer that did not have a tuple "
        "shape; shape: %s",
        ShapeUtil::HumanString(on_host_shape_));
  }
--- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc
@ -345,7 +345,7 @@ PyTpuBuffer::DestructureTuple() {
  tensorflow::profiler::TraceMe traceme("PyTpuBuffer::DestructureTuple");
  if (!on_host_shape_.IsTuple()) {
    return InvalidArgument(
-        "Attemped to destructure a PyTpuBuffer that did not have a tuple "
+        "Attempted to destructure a PyTpuBuffer that did not have a tuple "
        "shape; shape: %s",
        ShapeUtil::HumanString(on_host_shape_));
  }
--- a/tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h
+++ b/tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h
@ -37,7 +37,7 @@
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/platform/logging.h"

-// This API is EXPERIMENTAL and under active developement. It is subject to
+// This API is EXPERIMENTAL and under active development. It is subject to
 // change without notice.

 namespace tpu_driver {
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@ -1526,7 +1526,7 @@ class ComputationBuilder(object):
          ConvWithGeneralPadding.
      feature_group_count: number of feature groups for grouped convolution.
      batch_group_count: number of batch groups for grouped convolution.
-    Returns: a XlaOp representing the ConvGenralDilated operation.
+    Returns: a XlaOp representing the ConvGeneralDilated operation.
    """
    if dimension_numbers is None:
      dimension_numbers = self._GetConvDimensionNumbers(len(window_strides))
--- a/tensorflow/compiler/xla/python_api/types.py
+++ b/tensorflow/compiler/xla/python_api/types.py
@ -25,10 +25,10 @@ import numpy as _np  # Avoids becoming a part of public Tensorflow API.
 from tensorflow.compiler.xla import xla_data_pb2
 from tensorflow.python.framework import dtypes

-# Records corresponsence between a XLA primitive type and Python/Numpy types.
+# Records correspondence between a XLA primitive type and Python/Numpy types.
 #
 # primitive_type: value of type xla_data_pb2.PrimitiveType
-# numpy_dtype: corresponsing Numpy "dtype" (like np.float32)
+# numpy_dtype: corresponding Numpy "dtype" (like np.float32)
 # literal_field_name: name of the field in the LiteralProto message elements
 # of this type go into.
 # literal_field_type: type of the field named 'literal_field_name'.
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@ -673,7 +673,7 @@ Status AlgebraicSimplifierVisitor::HandleBitcast(HloInstruction* bitcast) {
        bitcast, HloInstruction::CreateBitcast(bitcast->shape(), op));
  }
  // All bitcasts can be eliminated (assuming layout constraints are
-  // satisified).
+  // satisfied).
  ReplaceInstructionIfSameShape(bitcast, bitcast->mutable_operand(0));
  return Status::OK();
 }
@ -692,7 +692,7 @@ Status AlgebraicSimplifierVisitor::HandleCopy(HloInstruction* copy) {
    return ReplaceWithNewInstruction(
        copy, HloInstruction::CreateUnary(copy->shape(), HloOpcode::kCopy, op));
  }
-  // All copies can be eliminated (assuming layout constraints are satisified).
+  // All copies can be eliminated (assuming layout constraints are satisfied).
  if (ReplaceInstructionIfSameShape(copy, copy->mutable_operand(0))) {
    return Status::OK();
  }
@ -2735,7 +2735,7 @@ Status AlgebraicSimplifierVisitor::HandlePower(HloInstruction* power) {

  // Don't perform this optimization if either of the exponents is complex; this
  // identity is true only for real-valued exponents.  In addition, we cowardly
-  // refuse to do this transformation if the two expontents have different
+  // refuse to do this transformation if the two exponents have different
  // element types.
  if (lhs->opcode() == HloOpcode::kPower &&
      !ShapeUtil::ElementIsComplex(lhs->operand(1)->shape()) &&
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@ -4756,7 +4756,7 @@ TEST_P(DotStrengthReductionTest, DotStrengthReduction) {
  const bool computation_should_be_modified =
      dot_should_be_transformed || (transpose_lhs && transpose_rhs);
  EXPECT_EQ(changed, computation_should_be_modified);
-  // The second pass of algebriac simplifer will remove dots without
+  // The second pass of algebraic simplifier will remove dots without
  // non-contracting dimensions or contracting dimensions.
  TF_ASSERT_OK_AND_ASSIGN(changed, simplifier.Run(module.get()));
  EXPECT_EQ(changed, computation_should_be_modified);
--- a/tensorflow/compiler/xla/service/batchnorm_expander_test.cc
+++ b/tensorflow/compiler/xla/service/batchnorm_expander_test.cc
@ -38,7 +38,7 @@ namespace {

 class BatchNormExpanderTest : public HloTestBase {
 protected:
-  // BatchNorm should have a dynamic sized dividor for mean operations.
+  // BatchNorm should have a dynamic sized divider for mean operations.
  int64 CountGetDimensionSize(const HloModule& module) {
    int64 count = 0;
    for (HloComputation* comp : module.computations()) {
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@ -1608,7 +1608,7 @@ StatusOr<std::unique_ptr<BufferAssignment>> BufferAssigner::CreateAssignment(
      /*is_thread_local=*/false, &buffers_to_assign_sequentially,
      assignment.get()));
  // Assign buffers with sequential ordering, if any. If all global
-  // computations are sequential, we can run heap simuation on the whole
+  // computations are sequential, we can run heap simulation on the whole
  // module, which reduces memory usage.
  const bool run_whole_module_heap_simulation =
      buffers_to_assign_sequentially.size() == global_computations.size();
--- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
@ -770,7 +770,7 @@ TEST_F(BufferAssignmentTest, PresetAssignments) {
 }

 TEST_F(BufferAssignmentTest, PresetAssignmentsWhile) {
-  // Tests preset assignments when there is no 1-to-1 corrspondance between
+  // Tests preset assignments when there is no 1-to-1 correspondence between
  // HloValue and HloBuffer (i.e., a while loop).
  auto module = CreateNewVerifiedModule();
  Shape f32vec10_color1 =
--- a/tensorflow/compiler/xla/service/buffer_value.h
+++ b/tensorflow/compiler/xla/service/buffer_value.h
@ -160,7 +160,7 @@ class BufferValue {
  BufferValue(HloInstruction* instruction, const ShapeIndex& index, Id id);

 private:
-  // The definining instruction and index are not stored here; they can be found
+  // The defining instruction and index are not stored here; they can be found
  // in the LogicalBuffer and HloValue subclasses. This class exists only to
  // support migrations from TuplePointsToAnalysis to HloDataflowAnalysis, by
  // allowing abstract use of LogicalBuffer or HloValue. After those migrations
--- a/tensorflow/compiler/xla/service/call_inliner.cc
+++ b/tensorflow/compiler/xla/service/call_inliner.cc
@ -27,7 +27,7 @@ namespace {

 // Traverses the callee computation, inlining cloned nodes into the caller
 // computation and connecting them to producers/consumers appropriately.
-// When the traversal has completed, the provided call instruction is entriely
+// When the traversal has completed, the provided call instruction is entirely
 // replaced in the caller's graph.
 class SubcomputationInsertionVisitor : public DfsHloVisitorWithDefault {
 public:
--- a/tensorflow/compiler/xla/service/cholesky_expander.cc
+++ b/tensorflow/compiler/xla/service/cholesky_expander.cc
@ -93,7 +93,7 @@ std::pair<XlaOp, XlaOp> CholeskyUnblocked(
          Zeros(body_builder,
                ShapeUtil::MakeShape(a_shape.element_type(), matrix_dims));
      // L * L.T, This matrix has of a lot of multiplying with zero
-      // (namely, L[:, j:] = 0) and redudant computation, but it is faster
+      // (namely, L[:, j:] = 0) and redundant computation, but it is faster
      // than slice.
      auto l_square = BatchDot(body_l, false, body_l, true, precision);

--- a/tensorflow/compiler/xla/service/collective_ops_utils.h
+++ b/tensorflow/compiler/xla/service/collective_ops_utils.h
@ -32,7 +32,7 @@ namespace xla {

 enum class ReductionKind { SUM, PRODUCT, MIN, MAX };

-// Atempts to match computation to one of the possible cases in ReductionKind.
+// Attempts to match computation to one of the possible cases in ReductionKind.
 absl::optional<ReductionKind> MatchReductionComputation(
    const HloComputation* computation);

--- a/tensorflow/compiler/xla/service/compiler.h
+++ b/tensorflow/compiler/xla/service/compiler.h
@ -47,7 +47,7 @@ namespace xla {
 // The following types are used for ahead of time compilation.

 // Contains the object file data created as a result of ahead-of-time
-// compuation.
+// computation.
 using ObjectFileData = std::vector<char>;

 // Abstract superclass describing the result of an ahead-of-time compilation.
--- a/tensorflow/compiler/xla/service/computation_placer.h
+++ b/tensorflow/compiler/xla/service/computation_placer.h
@ -71,7 +71,7 @@ class ComputationPlacer {

  // Returns the device id assigned to the given replica and computation
  // instance for [replica_count x computation_count] setup. The returned device
-  // id must match the assignement from PlaceReplicatedComputation().
+  // id must match the assignment from PlaceReplicatedComputation().
  virtual StatusOr<int> DeviceId(int replica, int computation,
                                 int replica_count, int computation_count);

--- a/tensorflow/compiler/xla/service/conditional_simplifier.cc
+++ b/tensorflow/compiler/xla/service/conditional_simplifier.cc
@ -189,7 +189,7 @@ StatusOr<bool> TryRemoveUnusedConditionalOperands(
    }
    for (HloInstruction* user : param->users()) {
      // If the user is not a get tuple element, assume it is unsafe to remove
-      // elemnts from the tuple.
+      // elements from the tuple.
      if (user->opcode() != HloOpcode::kGetTupleElement) {
        return false;
      }
--- a/tensorflow/compiler/xla/service/convolution_group_converter.cc
+++ b/tensorflow/compiler/xla/service/convolution_group_converter.cc
@ -393,7 +393,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
      const int64 depthwise_multiplier =
          filter->shape().dimensions(kernel_output_feature_dim) / group_count;
      // Split the kernel output feature dimension into group count and
-      // depthwise mutlipler.
+      // depthwise mutilipler.
      for (int64 i = 0; i < filter->shape().rank(); ++i) {
        if (i == kernel_output_feature_dim) {
          new_filter_dimension.push_back(group_count);
@ -439,7 +439,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
      new_dim->set_window_dilation(1);
      new_dim->set_base_dilation(1);

-      // Split the output feature dimension into and output featrue of group
+      // Split the output feature dimension into and output feature of group
      // count and depthwise multipler as an output spatial dimension.
      std::vector<int64> new_output_dimension;
      new_output_dimension.reserve(convolution->shape().rank() + 1);
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@ -1177,7 +1177,7 @@ TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinct) {

  InsertCopies(module_.get());

-  // The entry computation requires two copies to resolve the non-disinctness of
+  // The entry computation requires two copies to resolve the non-distinctness of
  // two init elements and the constant passed in as one of the init
  // elements. Either element can be copied for the distinctness issue.
  EXPECT_EQ(CountCopies(*module_->entry_computation()), 2);
@ -1996,7 +1996,7 @@ void BM_ParallelWhiles(int num_iters, int num_whiles) {
    tensorflow::testing::StopTiming();

    // Each body receives of copy of two of the parameters (the corresponding
-    // elements in the body are modifed), and there is one copy in each body.
+    // elements in the body are modified), and there is one copy in each body.
    ASSERT_EQ(CountCopies(module), 3 * num_whiles);
  }
 }
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@ -350,7 +350,7 @@ Status CpuCompiler::RunHloPassesAfterLayoutAssn(
  // duplicate or NOPs, so remove them with algebraic simplification and CSE.
  {
    auto& pass = pipeline.AddPass<HloPassFix<HloPassPipeline>>(
-        "simplification after layout assignement");
+        "simplification after layout assignment");
    pass.AddInvariantChecker<HloVerifier>(
        /*layout_sensitive=*/true,
        /*allow_mixed_precision=*/false,
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@ -327,7 +327,7 @@ StatusOr<ExecutionOutput> CpuExecutable::ExecuteAsyncOnStream(
  //
  // Logically we want this lambda to capture `buffers` by move, ultimately our
  // functor needs to be wrapped in an std::function, and that requires its
-  // functor to be copyable.  Thus we perpitrate the hack of capturing buffers
+  // functor to be copyable.  Thus we perpetrate the hack of capturing buffers
  // "by shared pointer".
  //
  // We also need to change the types of some of the variables we capture:
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
@ -28,7 +28,7 @@ namespace cpu {

 // We want to change the layout of constant arrays to be column major when all
 // of their users are dot operations that can be made faster with the flipped
-// layout.  To avoid going quadriatic over the # of instructions, we cache this
+// layout.  To avoid going quadratic over the # of instructions, we cache this
 // property in should_make_rhs_col_major -- it maps a constant to true if all of
 // the users of said constant are dot operations that can be sped up.  This
 // cache is populated lazily as we encounter dot operations traversing the
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@ -84,7 +84,7 @@ enum class DotImplementationStrategy {
  // supported.
  kTiledLlvmIrGemv,

-  // The dot operation is lowered into LLVM IR that implemetns a tiled
+  // The dot operation is lowered into LLVM IR that implements a tiled
  // Matrix*Matrix operation.  No fusions are supported.  The two inputs
  // and the output have to be row major.
  kTiledLlvmIrGemm,
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter_internal.h
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter_internal.h
@ -63,7 +63,7 @@ enum class DotImplementationStrategy {
  // supported.
  kTiledLlvmIrGemv,

-  // The dot operation is lowered into LLVM IR that implemetns a tiled
+  // The dot operation is lowered into LLVM IR that implements a tiled
  // Matrix*Matrix operation.  No fusions are supported.  The two inputs
  // and the output have to be row major.
  kTiledLlvmIrGemm,
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@ -1159,7 +1159,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
      /*instruction=*/*convolution, /*operands=*/{lhs, rhs},
      /*supported_types=*/{F16, F32, F64, C64, C128}));

-  // TODO(tonywy): Add PotentiallyImplementedAsMKLCovolution to support
+  // TODO(tonywy): Add PotentiallyImplementedAsMKLConvolution to support
  // different data layouts.
  if (PotentiallyImplementedAsEigenConvolution(*convolution,
                                               target_machine_features_)) {
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@ -294,7 +294,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
      absl::string_view name);

  // Emits a call to a "global" function (e.g. to the computation nested within
-  // a kWhile or a kCall).  Buffer assignment unabiguously assignes buffers to
+  // a kWhile or a kCall).  Buffer assignment unabiguously assigns buffers to
  // the parameters and return values for these computations so there is no need
  // to explicitly pass parameters or return results.
  void EmitGlobalCall(const HloComputation& callee, absl::string_view name);
@ -366,7 +366,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
  // without generating IR with illegal (e.g. excessively large or
  // non-power-of-two) vector types.  We do this by introducing a layer of
  // abstraction: we introduce a high level vector-like concept called a
-  // "sharded vector" that models data paralleism, and is mapped to a sequence
+  // "sharded vector" that models data parallelism, and is mapped to a sequence
  // scalar and vector llvm::Value s.
  //
  // For example, we can represent 29 f32 elements by a sharded vector mapped to
--- a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc
+++ b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc
@ -185,7 +185,7 @@ llvm::Value* GenerateVF32Exp(llvm::IRBuilder<>* b, llvm::Value* input,
  // value of n clamped to [-127, 127]. In the case where n' = 127, `a` can grow
  // up to as large as 88.8 - 127 * log(2) which is about 0.7703. Even though
  // this value of `a` is outside our previously specified range, e^a will still
-  // only have a relative error of approximetely 2^-16 at worse. In practice
+  // only have a relative error of approximately 2^-16 at worse. In practice
  // this seems to work well enough; it passes our exhaustive tests, breaking
  // only one result, and by one ulp (we return exp(88.7228394) = max-float but
  // we should return inf).
--- a/Show More
+++ b/Show More