From e664420b79a93ae1a47c7ccd70ebe0fb31819ff6 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Mon, 9 Dec 2019 18:21:12 +0900
Subject: [PATCH 1/3] minor spelling tweaks

---
 .../compiler/jit/compilability_check_util.cc  |  4 +--
 tensorflow/compiler/jit/deadness_analysis.cc  |  6 ++--
 .../compiler/jit/encapsulate_subgraphs_pass.h |  2 +-
 tensorflow/compiler/jit/encapsulate_util.h    |  2 +-
 .../extract_outside_compilation_pass_test.cc  |  2 +-
 .../compiler/jit/graphcycles/graphcycles.h    |  2 +-
 .../compiler/jit/mark_for_compilation_pass.cc |  2 +-
 tensorflow/compiler/jit/node_matchers.h       |  2 +-
 tensorflow/compiler/jit/node_matchers_test.cc |  2 +-
 tensorflow/compiler/jit/ops/xla_ops.cc        |  2 +-
 tensorflow/compiler/jit/xla_device_context.cc |  2 +-
 .../compiler/jit/xla_kernel_creator_util.cc   |  2 +-
 tensorflow/compiler/jit/xla_launch_util.cc    |  2 +-
 .../lite/quantization/quantization_utils.cc   |  2 +-
 .../lite/quantization/quantization_utils.h    |  4 +--
 .../mlir/lite/transforms/extract_ophint.cc    |  8 ++---
 .../compiler/mlir/lite/transforms/optimize.cc |  4 +--
 .../transforms/optimize_functional_ops.cc     |  6 ++--
 .../mlir/lite/transforms/prepare_tf.cc        |  2 +-
 .../compiler/mlir/lite/utils/lstm_utils.cc    |  2 +-
 .../analysis/side_effect_analysis.cc          |  4 +--
 .../analysis/side_effect_analysis.h           |  6 ++--
 .../mlir/tensorflow/ir/tf_generated_ops.td    |  6 ++--
 .../mlir/tensorflow/transforms/lower_tf.cc    |  2 +-
 .../translate/executor_to_control_dialect.cc  |  2 +-
 .../tensorflow/translate/export_graphdef.cc   |  2 +-
 .../mlir/tensorflow/translate/import_model.cc |  2 +-
 tensorflow/compiler/mlir/xla/ir/hlo_ops.td    |  2 +-
 .../mlir/xla/transforms/canonicalize.td       |  4 +--
 .../mlir/xla/transforms/legalize_tf.cc        |  4 +--
 .../xla/transforms/lower_complex_patterns.td  |  4 +--
 .../xla/transforms/map_lhlo_to_scalar_op.h    |  2 +-
 .../compiler/tests/matrix_diag_ops_test.py    |  6 ++--
 .../compiler/tests/quantized_ops_test.py      |  2 +-
 tensorflow/compiler/tests/randomized_tests.cc |  2 +-
 .../tf2tensorrt/convert/convert_graph.cc      |  6 ++--
 .../tf2tensorrt/convert/convert_nodes.cc      |  2 +-
 .../tf2tensorrt/convert/convert_nodes.h       |  4 +--
 .../tf2tensorrt/convert/convert_nodes_test.cc |  4 +--
 .../tf2tensorrt/kernels/trt_engine_op.cc      |  2 +-
 .../kernels/trt_engine_resource_ops_test.cc   |  2 +-
 .../compiler/tf2tensorrt/segment/segment.cc   |  2 +-
 .../tf2tensorrt/utils/trt_int8_calibrator.h   |  2 +-
 .../compiler/tf2xla/functionalize_cond.cc     |  8 ++---
 .../compiler/tf2xla/functionalize_cond.h      |  2 +-
 .../compiler/tf2xla/kernels/assert_op.cc      |  2 +-
 .../compiler/tf2xla/kernels/pooling_ops.cc    |  2 +-
 .../compiler/tf2xla/kernels/xla_conv_op.cc    |  2 +-
 .../compiler/tf2xla/kernels/xla_svd_op.cc     |  2 +-
 tensorflow/compiler/tf2xla/ops/xla_ops.cc     |  8 ++---
 tensorflow/compiler/tf2xla/shape_util.h       |  2 +-
 tensorflow/compiler/tf2xla/tf2xla.proto       |  2 +-
 tensorflow/compiler/tf2xla/xla_op_kernel.h    |  4 +--
 .../xla/client/lib/comparators_test.cc        |  2 +-
 tensorflow/compiler/xla/client/lib/matrix.cc  |  4 +--
 tensorflow/compiler/xla/client/lib/matrix.h   |  2 +-
 tensorflow/compiler/xla/client/lib/pooling.cc |  6 ++--
 tensorflow/compiler/xla/client/lib/slicing.cc | 30 +++++++++----------
 tensorflow/compiler/xla/client/lib/testing.cc |  2 +-
 .../compiler/xla/client/local_client.cc       |  6 ++--
 tensorflow/compiler/xla/client/local_client.h |  2 +-
 tensorflow/compiler/xla/client/xla_builder.h  |  6 ++--
 .../compiler/xla/client/xla_builder_test.cc   |  2 +-
 .../compiler/xla/debug_options_flags.cc       |  4 +--
 tensorflow/compiler/xla/debug_options_flags.h |  4 +--
 .../compiler/xla/execution_options_util.h     |  2 +-
 .../compiler/xla/g3doc/operation_semantics.md |  2 +-
 tensorflow/compiler/xla/literal.cc            |  2 +-
 tensorflow/compiler/xla/literal.h             |  4 +--
 tensorflow/compiler/xla/literal_comparison.cc |  2 +-
 tensorflow/compiler/xla/literal_test.cc       |  2 +-
 .../compiler/xla/parse_flags_from_env.h       |  2 +-
 .../compiler/xla/parse_flags_from_env_test.cc |  6 ++--
 .../compiler/xla/python/local_client.cc       |  2 +-
 .../python/tpu_driver/client/tpu_client.cc    |  2 +-
 .../xla/python/tpu_driver/tpu_driver.h        |  2 +-
 tensorflow/compiler/xla/python/xla_client.py  |  2 +-
 tensorflow/compiler/xla/python_api/types.py   |  4 +--
 .../xla/service/algebraic_simplifier.cc       |  6 ++--
 .../xla/service/algebraic_simplifier_test.cc  |  2 +-
 .../xla/service/batchnorm_expander_test.cc    |  2 +-
 .../compiler/xla/service/buffer_assignment.cc |  2 +-
 .../xla/service/buffer_assignment_test.cc     |  2 +-
 .../compiler/xla/service/buffer_value.h       |  2 +-
 .../compiler/xla/service/call_inliner.cc      |  2 +-
 .../compiler/xla/service/cholesky_expander.cc |  2 +-
 .../xla/service/collective_ops_utils.h        |  2 +-
 tensorflow/compiler/xla/service/compiler.h    |  2 +-
 .../compiler/xla/service/computation_placer.h |  2 +-
 .../xla/service/conditional_simplifier.cc     |  2 +-
 .../service/convolution_group_converter.cc    |  4 +--
 .../xla/service/copy_insertion_test.cc        |  4 +--
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  2 +-
 .../xla/service/cpu/cpu_executable.cc         |  2 +-
 .../xla/service/cpu/cpu_layout_assignment.cc  |  2 +-
 .../xla/service/cpu/dot_op_emitter.cc         |  2 +-
 .../xla/service/cpu/dot_op_emitter_internal.h |  2 +-
 .../compiler/xla/service/cpu/ir_emitter.cc    |  2 +-
 .../compiler/xla/service/cpu/ir_emitter.h     |  4 +--
 .../xla/service/cpu/llvm_ir_runtime.cc        |  2 +-
 .../service/cpu/parallel_task_assignment.cc   |  2 +-
 .../xla/service/cpu/runtime_fork_join.cc      |  2 +-
 .../xla/service/cpu/shape_partition.cc        |  2 +-
 .../xla/service/cpu/tiled_dot_emitter.cc      |  6 ++--
 tensorflow/compiler/xla/service/dump.cc       |  2 +-
 .../service/dynamic_dimension_inference.cc    |  8 ++---
 .../compiler/xla/service/dynamic_padder.cc    |  6 ++--
 .../xla/service/elemental_ir_emitter.cc       |  4 +--
 .../xla/service/gpu/backend_configs.proto     |  2 +-
 .../service/gpu/cudnn_batchnorm_rewriter.cc   |  4 +--
 .../xla/service/gpu/cudnn_batchnorm_thunk.cc  |  2 +-
 .../service/gpu/cudnn_pad_for_convolutions.cc | 10 +++----
 .../compiler/xla/service/gpu/fusion_merger.cc |  4 +--
 .../xla/service/gpu/fusion_merger_test.cc     | 12 ++++----
 .../service/gpu/gpu_conv_algorithm_picker.cc  |  6 ++--
 .../gpu/gpu_conv_padding_legalization.cc      |  2 +-
 .../xla/service/gpu/gpu_debug_info_manager.h  |  2 +-
 .../xla/service/gpu/gpu_executable.cc         |  4 +--
 .../compiler/xla/service/gpu/gpu_executable.h |  2 +-
 .../compiler/xla/service/gpu/gpu_fusible.cc   |  2 +-
 .../compiler/xla/service/gpu/gpu_fusible.h    |  2 +-
 .../xla/service/gpu/gpu_layout_assignment.cc  |  2 +-
 .../xla/service/gpu/gpu_transfer_manager.cc   |  2 +-
 .../xla/service/gpu/ir_emitter_unnested.cc    |  2 +-
 .../xla/service/gpu/ir_emitter_unnested.h     |  2 +-
 .../xla/service/gpu/kernel_mapping_scheme.h   |  2 +-
 .../gpu/llvm_gpu_backend/gpu_backend_lib.cc   |  2 +-
 .../xla/service/gpu/multi_output_fusion.cc    |  2 +-
 .../xla/service/gpu/nvptx_compiler.cc         |  2 +-
 .../xla/service/gpu/stream_executor_util.h    |  2 +-
 .../compiler/xla/service/gpu/thunk_emitter.h  |  2 +-
 .../xla/service/hlo_alias_analysis.cc         |  4 +--
 tensorflow/compiler/xla/service/hlo_buffer.h  |  2 +-
 .../compiler/xla/service/hlo_casting_utils.h  |  2 +-
 .../compiler/xla/service/hlo_computation.cc   |  2 +-
 .../compiler/xla/service/hlo_computation.h    |  2 +-
 .../compiler/xla/service/hlo_cost_analysis.cc |  2 +-
 .../compiler/xla/service/hlo_cost_analysis.h  |  2 +-
 .../compiler/xla/service/hlo_cse_test.cc      |  6 ++--
 .../xla/service/hlo_dataflow_analysis.cc      |  2 +-
 .../xla/service/hlo_dataflow_analysis_test.cc |  2 +-
 .../xla/service/hlo_domain_isolator.h         |  2 +-
 .../xla/service/hlo_domain_remover.cc         |  2 +-
 .../compiler/xla/service/hlo_domain_test.cc   |  2 +-
 .../xla/service/hlo_domain_verifier.cc        |  4 +--
 .../compiler/xla/service/hlo_evaluator.cc     |  4 +--
 .../compiler/xla/service/hlo_evaluator.h      |  4 +--
 .../compiler/xla/service/hlo_graph_dumper.cc  |  2 +-
 .../service/hlo_input_output_alias_config.h   |  4 +--
 .../compiler/xla/service/hlo_instruction.cc   |  4 +--
 .../compiler/xla/service/hlo_instruction.h    |  2 +-
 .../xla/service/hlo_instruction_test.cc       |  6 ++--
 .../compiler/xla/service/hlo_instructions.cc  |  2 +-
 .../compiler/xla/service/hlo_instructions.h   |  2 +-
 .../xla/service/hlo_live_range_test.cc        |  4 +--
 .../xla/service/hlo_liveness_analysis.cc      |  4 +--
 .../xla/service/hlo_liveness_analysis_test.cc |  6 ++--
 .../xla/service/hlo_memory_scheduler.cc       |  2 +-
 tensorflow/compiler/xla/service/hlo_module.h  |  2 +-
 .../xla/service/hlo_module_dce_test.cc        |  2 +-
 .../xla/service/hlo_module_group_util.h       |  2 +-
 .../compiler/xla/service/hlo_ordering_test.cc |  4 +--
 tensorflow/compiler/xla/service/hlo_parser.cc |  4 +--
 .../compiler/xla/service/hlo_parser_test.cc   |  4 +--
 .../xla/service/hlo_rematerialization_test.cc |  2 +-
 tensorflow/compiler/xla/service/hlo_runner.h  |  2 +-
 .../compiler/xla/service/hlo_sharding.h       |  2 +-
 .../xla/service/hlo_sharding_metadata.cc      |  2 +-
 .../xla/service/indexed_array_analysis.cc     |  2 +-
 .../service/indexed_array_analysis_test.cc    |  2 +-
 .../compiler/xla/service/layout_assignment.cc |  2 +-
 .../compiler/xla/service/layout_assignment.h  |  4 +--
 .../llvm_ir/dynamic_update_slice_util.cc      |  2 +-
 .../compiler/xla/service/llvm_ir/llvm_util.cc |  2 +-
 .../xla/service/memory_space_assignment.cc    |  2 +-
 .../xla/service/memory_space_assignment.h     |  2 +-
 .../service/memory_space_assignment_test.cc   |  2 +-
 .../experimental/conv_emitter/conv_emitter.cc |  6 ++--
 .../experimental/conv_emitter/conv_emitter.h  |  2 +-
 .../xla/service/mlir_gpu/failover_compiler.cc |  2 +-
 .../xla/service/mlir_gpu/mlir_compiler.cc     |  2 +-
 .../xla/service/multi_output_fusion.h         |  2 +-
 .../compiler/xla/service/op_expander_pass.h   |  2 +-
 .../compiler/xla/service/reshape_mover.cc     |  2 +-
 tensorflow/compiler/xla/service/service.cc    |  8 ++---
 .../xla/service/shape_inference_test.cc       |  4 +--
 .../xla/service/tree_reduction_rewriter.h     |  2 +-
 .../xla/service/tuple_points_to_analysis.h    |  2 +-
 .../service/while_loop_constant_sinking.cc    |  2 +-
 .../while_loop_invariant_code_motion.cc       |  2 +-
 .../xla/service/while_loop_simplifier_test.cc |  2 +-
 tensorflow/compiler/xla/shape_test.cc         |  6 ++--
 tensorflow/compiler/xla/status_macros_test.cc |  2 +-
 .../xla/tests/array_elementwise_ops_test.cc   |  2 +-
 .../compiler/xla/tests/bfloat16_test.cc       |  8 ++---
 .../compiler/xla/tests/collective_ops_test.cc |  2 +-
 .../compiler/xla/tests/convolution_test.cc    |  6 ++--
 .../compiler/xla/tests/dynamic_ops_test.cc    |  2 +-
 .../xla/tests/exhaustive_binary_test.cc       |  8 ++---
 .../xla/tests/exhaustive_op_test_utils.h      |  4 +--
 .../xla/tests/exhaustive_unary_test.cc        |  8 ++---
 .../xla/tests/gather_operation_test.cc        |  2 +-
 tensorflow/compiler/xla/tests/map_test.cc     |  2 +-
 .../xla/tests/multioutput_fusion_test.cc      |  4 +--
 .../compiler/xla/tools/hlo_proto_to_json.cc   |  4 +--
 .../xla/tools/interactive_graphviz_test.sh    |  2 +-
 .../compiler/xla/tools/run_hlo_module_main.cc |  2 +-
 tensorflow/compiler/xla/util.cc               |  2 +-
 tensorflow/compiler/xrt/tests/raw_api_test.cc |  2 +-
 tensorflow/compiler/xrt/xrt_memory_manager.cc |  2 +-
 tensorflow/compiler/xrt/xrt_memory_manager.h  |  2 +-
 211 files changed, 336 insertions(+), 336 deletions(-)

diff --git a/tensorflow/compiler/jit/compilability_check_util.cc b/tensorflow/compiler/jit/compilability_check_util.cc
index b8f04f7d791..14ade0ea920 100644
--- a/tensorflow/compiler/jit/compilability_check_util.cc
+++ b/tensorflow/compiler/jit/compilability_check_util.cc
@@ -509,10 +509,10 @@ RecursiveCompilabilityChecker::OperationFilter CreateOperationFilter(
   auto it = uncompilable_nodes->find(function_identifier);
   if (it == uncompilable_nodes->end()) {
     std::vector<RecursiveCompilabilityChecker::UncompilableNodeInfo>
-        uncompileable_node_info{std::move(node_info)};
+        uncompilable_node_info{std::move(node_info)};
     uncompilable_nodes->emplace(
         std::move(function_identifier),
-        std::make_pair(function, std::move(uncompileable_node_info)));
+        std::make_pair(function, std::move(uncompilable_node_info)));
   } else {
     it->second.second.emplace_back(std::move(node_info));
   }
diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc
index 912991e267a..b78bcd36d47 100644
--- a/tensorflow/compiler/jit/deadness_analysis.cc
+++ b/tensorflow/compiler/jit/deadness_analysis.cc
@@ -96,7 +96,7 @@ limitations under the License.
 // Symbolic > NonSymbolic.  The lattice has height = 2 so two iterations are
 // sufficient to converge.
 //
-// We first do an optimisitc analysis and, if it does not converge, we then fall
+// We first do an optimistic analysis and, if it does not converge, we then fall
 // back to a pessimistic analysis.  The optimistic analysis assigns the same
 // symbolic predicate to all the merge nodes whose preceding enter nodes have
 // the same frame name on the first iteration.  On the second iteration, if all
@@ -1255,7 +1255,7 @@ Status DeadnessAnalysisImpl::GetFrameBasedTopologicalOrder(
     } else if (IsRootExit(node)) {
       ++num_exits_for_frame[cf.frame_name];
     }
-    // Edge NextIteration->Merge is counted before starting the traveral to
+    // Edge NextIteration->Merge is counted before starting the traversal to
     // break the backedges.
     if (IsMerge(node)) {
       for (const Edge* e : node->in_edges()) {
@@ -1458,7 +1458,7 @@ Status DeadnessAnalysisImpl::PopulateFrame(absl::Span<Node* const> topo,
 
   for (Node* n : topo) {
     // The nodes added to should_revisit in the previous loop need to be
-    // revisited now.  Reprocesing these initial nodes may add *their* consumers
+    // revisited now.  Reprocessing these initial nodes may add *their* consumers
     // to should_revisit, and these newly added nodes will also be processed by
     // this very same loop.  Since we're traversing the graph in topological
     // order (producers before consumers) and HandleNode(n) can only ever add
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
index 50e4149bc08..8b627cd959a 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
@@ -95,7 +95,7 @@ extern const char* const kXlaNumResourceArgsAttr;
 extern const char* const kXlaHasReferenceVarsAttr;
 
 // Sorts each node's control inputs by their names. This guarantees that for two
-// structually equivalent GraphDefs, we get the same traversal ordering on
+// structurally equivalent GraphDefs, we get the same traversal ordering on
 // node's control input fields.
 // TODO(hpucha): Move the utilities to a more appropriate place.
 void SortControlInputs(GraphDef* gdef);
diff --git a/tensorflow/compiler/jit/encapsulate_util.h b/tensorflow/compiler/jit/encapsulate_util.h
index 406e4a797a4..9ddbe4d5cc9 100644
--- a/tensorflow/compiler/jit/encapsulate_util.h
+++ b/tensorflow/compiler/jit/encapsulate_util.h
@@ -72,7 +72,7 @@ extern const char kXlaLiftedArgOutsideCompilationAttrName[];
 
 // Attribute indicating that this is an IdentityN node receiving inputs for a
 // outside compilation Placeholder node (the original outside compilation node
-// is moved out of TPU comutation, and we left a Placeholder node there).
+// is moved out of TPU computation, and we left a Placeholder node there).
 // Attribute value will be a string, which is the outside compilation cluster
 // name for the outside compilation Placeholder node.
 extern const char kXlaOutsideCompilationInputsAttrName[];
diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc
index 26f830c59c3..a6f2bd41275 100644
--- a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc
+++ b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc
@@ -941,7 +941,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest,
   // "const0"
   // "identity0" = "const0" (outside compilation cluster "0")
   // "identity1" = "const0" "^identity0" (outside compilation cluster "1",
-  //                                      control depdent on cluster "0")
+  //                                      control dependent on cluster "0")
   // "identity2" = "identity1"
   FunctionDefLibrary fdl;
   {
diff --git a/tensorflow/compiler/jit/graphcycles/graphcycles.h b/tensorflow/compiler/jit/graphcycles/graphcycles.h
index ce171a2ead0..bbf61016fb3 100644
--- a/tensorflow/compiler/jit/graphcycles/graphcycles.h
+++ b/tensorflow/compiler/jit/graphcycles/graphcycles.h
@@ -123,7 +123,7 @@ class GraphCycles {
   absl::Span<const int32> Successors(int32 node) const;
   absl::Span<const int32> Predecessors(int32 node) const;
 
-  // Return a copy of the sucessors set. This is needed for code using the
+  // Return a copy of the successors set. This is needed for code using the
   // collection while modifying the GraphCycles.
   std::vector<int32> SuccessorsCopy(int32 node) const;
   // Return a copy of the predecessors set. This is needed for code using the
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
index 4ca52a26bbd..0ab746ead95 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -1366,7 +1366,7 @@ Status MarkForCompilationPassImpl::Run() {
 void MarkForCompilationPassImpl::DumpPostClusteringGraphs() {
   DumpGraphToFile("mark_for_compilation", *graph_, flib_def_);
 
-  // We also dump out an annoated version of the TF graph where the nodes
+  // We also dump out an annotated version of the TF graph where the nodes
   // names are prefixed with the cluster names.  This can help visualizing the
   // clustering decisions on TensorBoard.
   Graph new_graph(graph_->op_registry());
diff --git a/tensorflow/compiler/jit/node_matchers.h b/tensorflow/compiler/jit/node_matchers.h
index 0d4f02c236b..ea47394bf7d 100644
--- a/tensorflow/compiler/jit/node_matchers.h
+++ b/tensorflow/compiler/jit/node_matchers.h
@@ -187,7 +187,7 @@ impl::NodeMatcherProperties Op(string op);
 // Matches a node with assigned device `assigned_device`.
 impl::NodeMatcherProperties AssignedDevice(string assigned_device);
 
-// Matches a node with a boolean typed attrbute named `name` and with value
+// Matches a node with a boolean typed attribute named `name` and with value
 // `value`.
 template <typename ValueTy>
 impl::NodeMatcherProperties Attr(const string& name, ValueTy value) {
diff --git a/tensorflow/compiler/jit/node_matchers_test.cc b/tensorflow/compiler/jit/node_matchers_test.cc
index c3f0dfece85..8edb3e456c4 100644
--- a/tensorflow/compiler/jit/node_matchers_test.cc
+++ b/tensorflow/compiler/jit/node_matchers_test.cc
@@ -125,7 +125,7 @@ TEST(NodeMatchers, CheckControlDependence) {
             "is any node");
 }
 
-TEST(NodeMatchers, ConstVaulue) {
+TEST(NodeMatchers, ConstValue) {
   Scope root = Scope::NewRootScope().ExitOnError();
   Output placeholder =
       ops::Placeholder(root.WithOpName("placeholder"), DT_FLOAT);
diff --git a/tensorflow/compiler/jit/ops/xla_ops.cc b/tensorflow/compiler/jit/ops/xla_ops.cc
index 0217ba71929..b1cf2166721 100644
--- a/tensorflow/compiler/jit/ops/xla_ops.cc
+++ b/tensorflow/compiler/jit/ops/xla_ops.cc
@@ -110,7 +110,7 @@ Merges the outputs from the PartitionedCall node and the _XlaRun node.
 Unlike the TensorFlow Merge op, which requires inputs of some types to be
 placed on the host, the _XlaMerge op can merge inputs of all types when
 placed on the device. This prevents the need for copy operations, in
-particluar when an XLA cluster has int32 outputs. The _XlaMerge up does not
+particular when an XLA cluster has int32 outputs. The _XlaMerge up does not
 have a value_index output that identifies the chosen input.
 )");
 
diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index c1fb2f6671f..996ad09e2a9 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@@ -262,7 +262,7 @@ void XlaDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
                 << xla_tensor->shaped_buffer().ToString();
         // For devices don't allow sync on completion, the device execution is
         // deferred. We check the execution stream status here to avoid wrong
-        // results from a failed stream being propogated to following
+        // results from a failed stream being propagated to following
         // host-side ops.
         if (!device_allows_sync_on_completion) {
           done_status.Update(xla_tensor->RefreshStatusOfStreams());
diff --git a/tensorflow/compiler/jit/xla_kernel_creator_util.cc b/tensorflow/compiler/jit/xla_kernel_creator_util.cc
index 96bde65003f..6441dd3ed28 100644
--- a/tensorflow/compiler/jit/xla_kernel_creator_util.cc
+++ b/tensorflow/compiler/jit/xla_kernel_creator_util.cc
@@ -222,7 +222,7 @@ Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def,
   // using xla::ComputationDataHandle, which is just a symbolic handle that
   // xla::ComputationBuilder assigns. How does this handle gets assigned for
   // constant arguments? Even constant arguments get an _Arg node in the graph
-  // instatiated for Function compilation. The tf2xla kernel for constant _Arg
+  // instantiated for Function compilation. The tf2xla kernel for constant _Arg
   // nodes takes the constant value, converts it to XlaLiteral, and feeds it
   // to xla::ComputationBuilder.ConstantLiteral, which returns the handle. This
   // constant XlaLiteral is included in the HLO graph, and subsequently, in
diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc
index ddaaefcef7d..8dbeea50ffa 100644
--- a/tensorflow/compiler/jit/xla_launch_util.cc
+++ b/tensorflow/compiler/jit/xla_launch_util.cc
@@ -84,7 +84,7 @@ VariableInfo::~VariableInfo() {
   }
 }
 
-// Returns a vector of VaribleInfo instances for the resource variable inputs to
+// Returns a vector of VariableInfo instances for the resource variable inputs to
 // the kernel with context `ctx`.  The input indices for the resource variable
 // inputs are in `variable_indices`.
 static Status GetVariableInfosFromCtxInputs(
diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc
index eec93e9ae6a..ca10809be69 100644
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc
@@ -416,7 +416,7 @@ bool RemoveRedundantStatsOps(mlir::FuncOp func,
           if (res->hasOneUse()) {
             if (auto next_stats = llvm::dyn_cast<quant::StatisticsOp>(
                     *res->getUsers().begin())) {
-              // quantization parameters can be propgated to next_stats
+              // quantization parameters can be propagated to next_stats
               redundant_stats_ops.insert(next_stats);
               // add next_stats to the work list so propagation can
               // continue.
diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
index c9f9d6619a3..9689a85ef6f 100644
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
@@ -342,14 +342,14 @@ ElementsAttr Quantize(Attribute real_value, Type tensor_type);
 // parameters in this type is based on the min and max element of the
 // attribute. When the elements in the `attr` are not in floating-point, or
 // the value range isn't straddling zero, an empty type is returned. The min/max
-// are ajusted to be symmetric if `symmetric` flag is set to True. And
+// are adjusted to be symmetric if `symmetric` flag is set to True. And
 // `symmetric` can only be set to true when it is signed and narrow_range.
 Type GetUniformQuantizedTypeForWeight(ElementsAttr attr, bool symmetric,
                                       unsigned num_bits, bool is_sign,
                                       bool narrow_range);
 
 // Returns the per channel quantized type for an element attribute.
-// `quant_dim` defines the quantization axis. The channel min/max are ajusted
+// `quant_dim` defines the quantization axis. The channel min/max are adjusted
 // to be symmetric if `symmetric` flag is set to True. And `symmetric` can only
 // be set to true when it is signed and narrow_range.
 Type GetUniformQuantizedPerAxisTypeForWeight(ElementsAttr attr, int quant_dim,
diff --git a/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc b/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc
index 63cf4240224..52eb6216e90 100644
--- a/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc
@@ -413,13 +413,13 @@ void PreprocessTopoSortGraph(
       }
       operation_to_in_degrees->try_emplace(&op, input_ops.size());
       for (auto* input_op : input_ops) {
-        auto preceeding_op_it = operation_to_outputs->find(input_op);
-        if (preceeding_op_it == operation_to_outputs->end()) {
+        auto preceding_op_it = operation_to_outputs->find(input_op);
+        if (preceding_op_it == operation_to_outputs->end()) {
           auto result = operation_to_outputs->try_emplace(
               input_op, llvm::DenseSet<Operation*>());
-          preceeding_op_it = result.first;
+          preceding_op_it = result.first;
         }
-        preceeding_op_it->second.insert(&op);
+        preceding_op_it->second.insert(&op);
       }
     }
   }
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/transforms/optimize.cc
index d8697a8c4e0..1313bae97a1 100644
--- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc
@@ -394,14 +394,14 @@ struct FuseBinaryOpToFollowingAffineOp : public OpRewritePattern<AffineOpType> {
       // w * (x ' c) + b => (w ' c) x + b
       // so we have to update the weight.
       bool is_mul = llvm::isa<MulOp>(binary_op);
-      auto new_fitler =
+      auto new_filter =
           filter_cst.mapValues(filter_type.getElementType(), [&](APFloat it) {
             return (is_mul ? it * cst_value : it / cst_value).bitcastToAPInt();
           });
       // We recreate the constant op in case it is shared by the other ops. This
       // might increase the model size.
       auto new_filter_op = rewriter.create<ConstOp>(
-          fc_op.getLoc(), filter->getType(), new_fitler);
+          fc_op.getLoc(), filter->getType(), new_filter);
       fc_op.setOperand(0, binary_op->getOperand(0));
       if (fc_op.filter() != filter) {
         // This filter goes through quantize and dequantize ops. Then we just
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
index 173785ba5b0..59dc271400e 100644
--- a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
@@ -132,8 +132,8 @@ class FoldIfOp : public OpRewritePattern<TF::IfOp> {
 
 // Erases functions from the given candidates that are not referenced by any of
 // the ops in the module.
-static void EraseDeadFuncs(const FuncSet& candiate_funcs, ModuleOp module) {
-  if (candiate_funcs.empty()) return;
+static void EraseDeadFuncs(const FuncSet& candidate_funcs, ModuleOp module) {
+  if (candidate_funcs.empty()) return;
 
   SymbolTable manager(module);
 
@@ -149,7 +149,7 @@ static void EraseDeadFuncs(const FuncSet& candiate_funcs, ModuleOp module) {
     }
   });
 
-  for (FuncOp func : candiate_funcs) {
+  for (FuncOp func : candidate_funcs) {
     if (!in_use_funcs.count(func)) manager.erase(func);
   }
 }
diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
index 823efdc3ef5..45248ddc01c 100644
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
@@ -132,7 +132,7 @@ struct InsertTFLQuantOpsAfterTFFakeQuantOp
 
     int quant_dim = -1;
     if (PerAxis) {
-      // This is a special case that the quant_dim is the last dimentions.
+      // This is a special case that the quant_dim is the last dimensions.
       quant_dim = res->getType().template cast<ShapedType>().getRank() - 1;
     }
     // Use the min/max from the operands and the num_bits and narrow_range
diff --git a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
index faf6427cedd..92a8ad49bf4 100644
--- a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
+++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
@@ -98,7 +98,7 @@ Value* SliceRankedTensor(OpBuilder* builder, Value* input,
                          ArrayRef<int64_t> size_values,
                          mlir::Location location) {
   // If the size of the tensor to be sliced from the input overflows
-  // the input tensor's dimenions, return 0-valued tensor of the requested
+  // the input tensor's dimensions, return 0-valued tensor of the requested
   // shape.
   ArrayRef<int64_t> input_shape = GetRankedTensorShape(input);
   for (int i = 0; i < input_shape.size(); i++) {
diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
index 898393479b0..5e0e9aef03c 100644
--- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
+++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
@@ -122,7 +122,7 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) {
                                 std::get<1>(operand_and_result));
       }
     } else if (auto replicate = llvm::dyn_cast<tf_device::ReplicateOp>(op)) {
-      // The nested block for RepliateOp is handled separately in side-effect
+      // The nested block for ReplicateOp is handled separately in side-effect
       // analysis. Inside that block, we can still treat its block arguments as
       // different resources.
       for (auto arg : replicate.GetBody().getArguments()) {
@@ -305,7 +305,7 @@ void SideEffectAnalysis::AnalyzeRegion(
   // region, and tracking resource accesses in per_resource_access_info_.
 
   // Returns whether an access to `resource` can skip control edges from
-  // prevoius accesses to unknown resources, due to that earlier accesses to
+  // previous accesses to unknown resources, due to that earlier accesses to
   // `resource` already indirectly tracked previous accesses to uknown
   // resources. `read_only` specifies the type of access of the current op being
   // considered.
diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h
index 3d65217db27..8d8815d709d 100644
--- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h
+++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h
@@ -105,7 +105,7 @@ class SideEffectAnalysis {
   void ConsumeChildAnalyses(
       llvm::SmallVector<SideEffectAnalysis, 4>&& children);
 
-  // Updates control_predecessors_ for `op` that is being visted, on the given
+  // Updates control_predecessors_ for `op` that is being visited, on the given
   // `resource_id`.
   void AddPredecessorsForAccess(int64_t resource_id, Operation* op,
                                 bool read_only);
@@ -124,7 +124,7 @@ class SideEffectAnalysis {
       sorted_control_successors_;
 
   // Internal per-resource data structure when we build the dependencies.
-  struct PerResourceAcessInfo {
+  struct PerResourceAccessInfo {
     // Last op that writes the resource before the current op being analyzed.
     Operation* last_write = nullptr;
     // Read ops since last_write before the current op being analyzed.
@@ -134,7 +134,7 @@ class SideEffectAnalysis {
     bool tracked_last_unknown_read = false;
     bool tracked_last_unknown_write = false;
   };
-  llvm::SmallDenseMap<int64_t, PerResourceAcessInfo, 8>
+  llvm::SmallDenseMap<int64_t, PerResourceAccessInfo, 8>
       per_resource_access_info_;
 };
 
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
index 5b5c028c89d..691ce85dbc8 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
@@ -1317,7 +1317,7 @@ Operations are applied to the input(s) according to the following rules:
      Considering the batch matrix multiplication equation again
      (`bij,bjk->bik`), the contracted axis label is `j`.
 
- (e) Expand Diagonal: If the output subcripts contain repeated (explicit) axis
+ (e) Expand Diagonal: If the output subscripts contain repeated (explicit) axis
      labels, the opposite operation of (a) is applied. For example, in the
      equation `i->iii`, and input shape `[3]`, the output of shape `[3, 3, 3]`
      are all zeros, except for the (generalized) diagonal which is populated
@@ -1325,7 +1325,7 @@ Operations are applied to the input(s) according to the following rules:
      Note: This operation is not supported by `np.einsum` or `tf.einsum`; it is
      provided to enable computing the symbolic gradient of `tf.einsum`.
 
-The output subcripts must contain only labels appearing in at least one of the
+The output subscripts must contain only labels appearing in at least one of the
 input subscripts. Furthermore, all dimensions mapping to the same axis label
 must be equal.
 
@@ -1337,7 +1337,7 @@ according to standard NumPy broadcasting
 
 The broadcasted dimensions are placed in the corresponding location of the
 ellipsis in the output subscript. If the broadcasted dimensions are non-empty
-and the output subcripts do not contain ellipsis, then an InvalidArgument error
+and the output subscripts do not contain ellipsis, then an InvalidArgument error
 is raised.
 
 @compatibility(numpy)
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
index 89941c2fab4..caacc376a0f 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
@@ -84,7 +84,7 @@ DenseIntElementsAttr GetBiasAddGradReductionIndices(int64_t rank,
   tensorflow::TensorFormat format;
   if (!FormatFromString(data_format.getValue().str(), &format)) return {};
 
-  // Reudce along all dimensions except the feature dimension.
+  // Reduce along all dimensions except the feature dimension.
   int64_t feature_dim = GetTensorFeatureDimIndex(rank, format);
   llvm::SmallVector<int64_t, 4> dims_to_reduce(rank - 1);
   std::iota(dims_to_reduce.begin(), dims_to_reduce.begin() + feature_dim, 0);
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc b/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc
index 280f8f195de..8a4f8aacc0d 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc
@@ -45,7 +45,7 @@ struct ExecutorToControlDialectConversion
 
 // Replace all uses of value `v` with a list of new values. Because number of
 // new values might be greater than 1, users of `v` might be replaced with their
-// clones in case of non-resizble operands list.
+// clones in case of non-resizable operands list.
 void ReplaceAllUsesOfValueWithValues(Value *v,
                                      Operation::operand_range new_values) {
   int new_values_size = std::distance(new_values.begin(), new_values.end());
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc
index 58242e62f1c..2ebb7505b7f 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc
@@ -197,7 +197,7 @@ class Exporter {
 
   // Each NextIteration node in the original graph is converted to a pair of
   // source and sink operations in the MLIR, and we use the following two maps
-  // to pair and convet them back to a single NextIteration node. We choose to
+  // to pair and convert them back to a single NextIteration node. We choose to
   // the "name" attribute, which is from the unique node name, to find out the
   // pairs: When scanning the operations in the block, the source operations
   // are inserted to the name_to_inst_ first, and the other "sink" operation
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
index 3bf2c34e2c7..c7528682001 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
@@ -2283,7 +2283,7 @@ class StructuredValueLinearizer {
   // Returns the list of index paths to each leaf of the StructuredValue,
   // in a linearized order matching `tf.nest.flatten`.
   //
-  // If an error ocurred during the linearization process, an error message with
+  // If an error occurred during the linearization process, an error message with
   // `error_context` prepended will be included in the returned status.
   StatusOr<llvm::ArrayRef<mlir::ArrayAttr>> GetLeafIndexPaths(
       llvm::StringRef error_context) const;
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
index 2b325b42e23..48e4ef1c3be 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
@@ -759,7 +759,7 @@ def HLO_UnaryEinsumOp: HLO_Op<"unary_einsum", [NoSideEffect]> {
 
   let hasCanonicalizer = 1;
 
-  // UnarayEinsumOp is unconditionally canonicalized to the binary EinsumOp so
+  // UnaryEinsumOp is unconditionally canonicalized to the binary EinsumOp so
   // the HLO converter shouldn't be invoked.
   let hasCustomHLOConverter = 1;
 }
diff --git a/tensorflow/compiler/mlir/xla/transforms/canonicalize.td b/tensorflow/compiler/mlir/xla/transforms/canonicalize.td
index 37f6d7deaa3..d510a3df994 100644
--- a/tensorflow/compiler/mlir/xla/transforms/canonicalize.td
+++ b/tensorflow/compiler/mlir/xla/transforms/canonicalize.td
@@ -38,7 +38,7 @@ def DynamicSliceToSlice: Pat<(HLO_DynamicSliceOp HLO_Tensor:$input,
            (BuildSliceLimits $starting_indices, $slice_sizes),
             (BuildSliceStrides $input))>;
 
-def UnaryToBianryEinsumEq : NativeCodeCall<
+def UnaryToBinaryEinsumEq : NativeCodeCall<
   "$_builder.getStringAttr(\",\" + $0.getValue().str())">;
 
 // Convert UnaryEinsumOp to EinsumOp with two operands with redundant first
@@ -46,4 +46,4 @@ def UnaryToBianryEinsumEq : NativeCodeCall<
 def UnaryEinsumToEinsum : Pat<
   (HLO_UnaryEinsumOp $operand, $equation),
   (HLO_EinsumOp (HLO_ConstOp (GetScalarOfType<1> $operand)),
-                $operand, (UnaryToBianryEinsumEq $equation))>;
+                $operand, (UnaryToBinaryEinsumEq $equation))>;
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
index 8cee5e23d64..3187ffa9a64 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
@@ -448,7 +448,7 @@ static DenseIntElementsAttr TFSliceSizes2HLOSliceSizes(
 // `element_types`, create two block arguments, one for lhs and one for rhs, and
 // generates xla_hlo.compare op to compare them with the given `direction`.
 //
-// Note that this right now only does comparsion on the first pair of block
+// Note that this right now only does comparision on the first pair of block
 // arguments.
 static void BuildSortComparisonBody(llvm::ArrayRef<Type> element_types,
                                     StringRef direction, Region *body,
@@ -2149,7 +2149,7 @@ class ConvertTopKV2Op : public OpRewritePattern<TF::TopKV2Op> {
 // Converts tf.Unpack to a series of XLA HLO slice ops.
 //
 // Each slice takes one element along the dimension to unpack and takes the full
-// range for all other dimenions. Each slice is then reshaped to drop the
+// range for all other dimensions. Each slice is then reshaped to drop the
 // dimension to unpack (which is always of size 1).
 // TODO(antiagainst): consider changing this into a TF internal lowering pass.
 class ConvertUnpackOp : public OpRewritePattern<TF::UnpackOp> {
diff --git a/tensorflow/compiler/mlir/xla/transforms/lower_complex_patterns.td b/tensorflow/compiler/mlir/xla/transforms/lower_complex_patterns.td
index 252a10fc412..d8a5ae6c6de 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lower_complex_patterns.td
+++ b/tensorflow/compiler/mlir/xla/transforms/lower_complex_patterns.td
@@ -107,8 +107,8 @@ def : Pat<(HLO_AbsOp HLO_ComplexTensor:$val),
               (NullDenseIntElementsAttr))),
            (HLO_ConstOp (ConstantSplat<"0"> $real)))>;
 
-// Expononetial can be lowered to an exponential on the real component and a
-// sum of sinusoids of the imageinary component, which equates to a normal
+// Exponential can be lowered to an exponential on the real component and a
+// sum of sinusoids of the imaginary component, which equates to a normal
 // exponential operator multiplied by Euler's formula.
 //
 // Exp(a + ib) = Exp(a) * Exp(ib) = Exp(a) * (Cos(b) + iSin(b))
diff --git a/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h b/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h
index 4107548a26b..11e3af7649b 100644
--- a/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h
+++ b/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h
@@ -157,7 +157,7 @@ inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::CompareOp>(
   if (element_type.isa<IntegerType>()) {
     Optional<CmpIPredicate> predicate =
         getIntCmpPredicate(lhlo_op.comparison_direction());
-    assert(predicate.hasValue() && "expected valid comparision direction");
+    assert(predicate.hasValue() && "expected valid comparison direction");
     return b.create<ScalarIOp<CompareOp>>(lhlo_op.getLoc(),
                                           predicate.getValue(), lhs, rhs);
   }
diff --git a/tensorflow/compiler/tests/matrix_diag_ops_test.py b/tensorflow/compiler/tests/matrix_diag_ops_test.py
index 69ae03a06cf..1ca9b157fa1 100644
--- a/tensorflow/compiler/tests/matrix_diag_ops_test.py
+++ b/tensorflow/compiler/tests/matrix_diag_ops_test.py
@@ -114,7 +114,7 @@ def square_cases(align=None):
                    [6, 7, 8, 9, 1],
                    [2, 3, 4, 5, 6]]])
   tests = dict()
-  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals)
+  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals)
   tests[-1, -1] = (np.array([[6, 4, 1, 7],
                              [5, 2, 8, 5]]),
                    np.array([[[0, 0, 0, 0, 0],
@@ -192,7 +192,7 @@ def tall_cases(align=None):
                    [7, 8, 9],
                    [9, 8, 7]]])
   tests = dict()
-  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals)
+  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals)
   tests[0, 0] = (np.array([[1, 5, 9],
                            [3, 2, 6]]),
                  np.array([[[1, 0, 0],
@@ -276,7 +276,7 @@ def fat_cases(align=None):
                    [8, 9, 1, 2],
                    [3, 4, 5, 6]]])
   tests = dict()
-  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals)
+  # tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals)
   tests[0, 0] = (np.array([[1, 6, 2],
                            [4, 9, 5]]),
                  np.array([[[1, 0, 0, 0],
diff --git a/tensorflow/compiler/tests/quantized_ops_test.py b/tensorflow/compiler/tests/quantized_ops_test.py
index 9a1d29c0092..100be3b9aa5 100644
--- a/tensorflow/compiler/tests/quantized_ops_test.py
+++ b/tensorflow/compiler/tests/quantized_ops_test.py
@@ -49,7 +49,7 @@ class QuantizedOpsTest(xla_test.XLATestCase):
         self.assertAllEqual(value, expected)
 
 
-class DeuantizedOpsTest(xla_test.XLATestCase):
+class DequantizedOpsTest(xla_test.XLATestCase):
 
   def pack_uint8_r2_to_uint32(self, test_input):
     num_rows, num_columns = test_input.get_shape().as_list()
diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc
index 5a2bda93942..dfa5bc106ed 100644
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@@ -3423,7 +3423,7 @@ int main(int argc, char** argv) {
       tensorflow::Flag(
           "tf_xla_random_seed", &tensorflow::tf_xla_random_seed,
           "Random seed to use for XLA tests. <= 0 means choose a seed "
-          "nondetermistically."),
+          "nondeterministically."),
       // TODO(phawkins): it might make more sense to run each test up to a
       // configurable time bound.
       tensorflow::Flag("tf_xla_test_repetitions",
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
index 20804af5229..669d38757fa 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
@@ -161,7 +161,7 @@ Status GetEngineInfo(const Graph* g,
     const int node_id = node->id();
     const string& node_name = node->name();
 
-    // Create input connections. Sort edges first to make determnistic since
+    // Create input connections. Sort edges first to make deterministic since
     // in_edges is a set of pointers.
     std::vector<const Edge*> in_edges(node->in_edges().begin(),
                                       node->in_edges().end());
@@ -186,7 +186,7 @@ Status GetEngineInfo(const Graph* g,
         // If it doesn't have any edges, TF will prune it out.
         //
         // Note that the segmenter already ensure that the constant data input
-        // is valid and suppported by the engine.
+        // is valid and supported by the engine.
         if (!added_const_nodes.insert(input_node).second) {
           // Already added before.
           continue;
@@ -209,7 +209,7 @@ Status GetEngineInfo(const Graph* g,
             node_id, edge->dst_input(), /*input_edge=*/true, port);
       }
     }
-    // Create output connections. Sort edges first to make determnistic since
+    // Create output connections. Sort edges first to make deterministic since
     // out_edges is a set of pointers.
     std::vector<const Edge*> out_edges(node->out_edges().begin(),
                                        node->out_edges().end());
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
index 90c28e03d4d..de2b0e4826f 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
@@ -2511,7 +2511,7 @@ Status ConvertStridedSliceHelper(OpConverterParams* params,
     return Status::OK();
   } else if (pad_dims.size() == 1) {
     // Only one dim is modified but we have to have 2, mark a second dim which
-    // will have padding of 0. The dim we add is chosen to avoid an unecessary
+    // will have padding of 0. The dim we add is chosen to avoid an unnecessary
     // transpose.
     if (pad_dims[0] != 2) {
       pad_dims.push_back(2);
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
index eb51ec1b3f6..e0d95dc7528 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
@@ -141,9 +141,9 @@ Status ConvertSegmentToGraphDef(
 // Converts given subgraph to a TRT engine saved in 'engine'. Returns ok iff
 // 'builder' successfully build the engine. If the result is not ok, 'engine'
 // will be set to nullptr
-// Once returned, 'builder' is not needed any more and can be safely detroyed.
+// Once returned, 'builder' is not needed any more and can be safely destroyed.
 //
-// - convert_successfully: indicates whether the converson to TensorRT network
+// - convert_successfully: indicates whether the conversion to TensorRT network
 //   is successful. This is different than successfully building the engine:
 //   building can still fail afterwards.
 Status ConvertGraphDefToEngine(
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
index ef03ab91714..738b848f959 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
@@ -521,7 +521,7 @@ TEST_F(ValidatorTest, ConvertToTensorOrWeights) {
         "Scalar input tensor is not supported since the first dimension "
         "is treated as batch dimension by TRT");
   }
-  // Convert non-Const. We test the case where the non-batch dimemsion is
+  // Convert non-Const. We test the case where the non-batch dimension is
   // unknown as well, to make sure the validator allows that.
   for (const int32 non_batch_dim : {-1, 2}) {
     const int32 batch_size = 12;
@@ -973,7 +973,7 @@ TEST_F(ConverterTest, GetWeightRange) {
 
 TEST_F(ConverterTest, ProvideQuantizationRange) {
   FakeITensor fake_tensor;
-  // Assymetric range
+  // Asymmetric range
   converter_->ProvideQuantizationRange(&fake_tensor, 0.0f, 6.0f);
   EXPECT_EQ(6.0f, quantization_ranges()[&fake_tensor]);
   converter_->ProvideQuantizationRange(&fake_tensor, 1.0f, 6.0f);
diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
index f707cf75417..2d22c0a149f 100644
--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
+++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
@@ -125,7 +125,7 @@ class TRTEngineOp : public AsyncOpKernel {
   // Verify that the input shapes are consistent and can be handled by this op.
   Status VerifyInputShapes(const std::vector<TensorShape>& shapes);
 
-  // Return engine batch in cached_engne_batch_sizes_ which is closest to input
+  // Return engine batch in cached_engine_batch_sizes_ which is closest to input
   // batch.
   Status GetEngineInputShapes(
       const CacheType& cache,
diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc
index 7a9b9f65fd8..c868416d048 100644
--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc
@@ -112,7 +112,7 @@ TEST_F(TRTEngineResourceOpsTest, Basic) {
   EXPECT_TRUE(
       errors::IsNotFound(rm->Lookup(container, resource_name, &resource)));
 
-  // Create the resouce using an empty file with InitializeTRTResource.
+  // Create the resource using an empty file with InitializeTRTResource.
   Reset();
   Env* env = Env::Default();
   const string filename = io::JoinPath(testing::TmpDir(), "trt_engine_file");
diff --git a/tensorflow/compiler/tf2tensorrt/segment/segment.cc b/tensorflow/compiler/tf2tensorrt/segment/segment.cc
index 6d3920874aa..4d9dd42a53a 100644
--- a/tensorflow/compiler/tf2tensorrt/segment/segment.cc
+++ b/tensorflow/compiler/tf2tensorrt/segment/segment.cc
@@ -466,7 +466,7 @@ Status SegmentGraph(const Graph* tf_graph,
   // grow from the output-side of the network towards the inputs.
   //
   // In general this is not guaranteed to produce a globally optimal
-  // segmentation. For exaample, consider graph with node {A, B, C, D} and edges
+  // segmentation. For example, consider graph with node {A, B, C, D} and edges
   // {A->B, A->C, B->D, C->D), where A, B, D are trt compatible but C is not, so
   // in theory we can choose to contract either A, B or B, D but not both, but
   // here it always choose to contract B, D.
diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h b/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h
index d2ea8ad38cf..06b39716490 100644
--- a/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h
+++ b/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h
@@ -34,7 +34,7 @@ namespace tensorrt {
 // TRTs pull model for calibration. When TRT implements a means for
 // a push calibration This class should be updated accordingly
 
-// IInt8EntropyCalibrator2 is prefferred for TRT 5.1+.
+// IInt8EntropyCalibrator2 is preferred for TRT 5.1+.
 #if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
 struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 {
 #else
diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc
index 0e614ca7ace..f9af5581a67 100644
--- a/tensorflow/compiler/tf2xla/functionalize_cond.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc
@@ -572,7 +572,7 @@ Status Conditional::ExtractBodies(Graph* graph) {
       if (visited.at(n->id())) continue;
       visited[n->id()] = true;
 
-      // Verify output edges and record control edges exitting scope.
+      // Verify output edges and record control edges exiting scope.
       for (const Edge* e : n->out_edges()) {
         Node* dst = e->dst();
         if (IsMerge(dst)) continue;
@@ -602,7 +602,7 @@ Status Conditional::ExtractBodies(Graph* graph) {
         }
       }
 
-      // Copying incomming edges to dst node. Iterate over a copy of the edges
+      // Copying incoming edges to dst node. Iterate over a copy of the edges
       // as they could be mutated during iteration.
       std::vector<const Edge*> in_edges(n->in_edges().begin(),
                                         n->in_edges().end());
@@ -719,7 +719,7 @@ Status Conditional::ExtractBodies(Graph* graph) {
     ++index;
 
     // Connect the input to the merge_ with the retval, except if it is a
-    // Swich node, which is handled separately.
+    // Switch node, which is handled separately.
     for (auto e : m->in_edges()) {
       if (e->IsControlEdge()) continue;
       int branch_index = static_cast<int>(find_branch(e));
@@ -1139,7 +1139,7 @@ StateMap::CondId FunctionalizeCond::StateAlongEdge(const Edge* e) {
       // node. If we don't record this into CondState, branches might have
       // incorrect CondState (e.g. if the branch only has a Const data node).
       // We set it to kNeither because there is no way to tell whether it's
-      // for true branch or false branch. This node's desendents might have
+      // for true branch or false branch. This node's descendents might have
       // other incoming edges with defined BranchType, and we correctly handle
       // merging kNeither with other defined BranchType in StateAlongEdge().
       state[predicate] = BranchType::kNeither;
diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.h b/tensorflow/compiler/tf2xla/functionalize_cond.h
index d85800fb8ee..7940732a11d 100644
--- a/tensorflow/compiler/tf2xla/functionalize_cond.h
+++ b/tensorflow/compiler/tf2xla/functionalize_cond.h
@@ -213,7 +213,7 @@ class FunctionalizeCond {
   // This populates the state_map_.
   Status DetermineStates(std::vector<Node*> rev_topo_order);
 
-  // Determine the CondState for a given node using the incomming edges
+  // Determine the CondState for a given node using the incoming edges
   // to the node. Note: it is expected that this node's CondState is only
   // determined once its input's CondState is.
   Status DetermineCondState(Node* dst) {
diff --git a/tensorflow/compiler/tf2xla/kernels/assert_op.cc b/tensorflow/compiler/tf2xla/kernels/assert_op.cc
index 94543686b47..c40caa8fa10 100644
--- a/tensorflow/compiler/tf2xla/kernels/assert_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/assert_op.cc
@@ -22,7 +22,7 @@ namespace tensorflow {
 
 namespace {
 
-// This TensorFlow op supports the Assert primitve.
+// This TensorFlow op supports the Assert primitive.
 class AssertOp : public XlaOpKernel {
  public:
   explicit AssertOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
diff --git a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc
index 507bc8d7a3b..67d49eafcde 100644
--- a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc
@@ -329,7 +329,7 @@ class MaxPoolGradOp : public XlaOpKernel {
         (padding_ == VALID) ? xla::Padding::kValid : xla::Padding::kSame;
 
     // Create a MaxPool operation to check the expected resulting shape, and
-    // then throw away the operation because we don't actually neeed it here.
+    // then throw away the operation because we don't actually need it here.
     TensorShape expected_out_shape;
     auto pooling =
         xla::MaxPool(ctx->Input(0), ksize_, stride_, xla_padding,
diff --git a/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc
index 0b5b66ae52f..7a8aec295a6 100644
--- a/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc
@@ -37,7 +37,7 @@ class XlaConvOp : public XlaOpKernel {
         context, context->GetAttr("precision_config", &precision_config_attr));
     OP_REQUIRES(context,
                 precision_config_.ParsePartialFromString(precision_config_attr),
-                errors::InvalidArgument("Error parsing precison config."));
+                errors::InvalidArgument("Error parsing precision config."));
   }
 
   void Compile(XlaOpKernelContext* context) override {
diff --git a/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc
index a28ecd660ab..8e9ed35783f 100644
--- a/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc
@@ -32,7 +32,7 @@ class XlaSvdOp : public XlaOpKernel {
                    ctx->GetAttr("precision_config", &precision_config_attr));
     OP_REQUIRES(ctx,
                 precision_config_.ParsePartialFromString(precision_config_attr),
-                errors::InvalidArgument("Error parsing precison config."));
+                errors::InvalidArgument("Error parsing precision config."));
     if (precision_config_.operand_precision_size() == 0) {
       precision_config_.mutable_operand_precision()->Add(
           xla::PrecisionConfig::HIGHEST);
diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
index dab051b39a8..33b740a706c 100644
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
@@ -84,7 +84,7 @@ lower: a boolean specifies whether the calculation is done with the lower
 
 max_iter: maximum number of sweep update, i.e., the whole lower triangular
   part or upper triangular part based on parameter lower. Heuristically, it has
-  been argued that approximatly logN sweeps are needed in practice (Ref: Golub &
+  been argued that approximately logN sweeps are needed in practice (Ref: Golub &
   van Loan "Matrix Computation").
 
 epsilon: the tolerance ratio.
@@ -116,7 +116,7 @@ a: the input tensor.
 
 max_iter: maximum number of sweep update, i.e., the whole lower triangular
   part or upper triangular part based on parameter lower. Heuristically, it has
-  been argued that approximatly log(min (M, N)) sweeps are needed in practice
+  been argued that approximately log(min (M, N)) sweeps are needed in practice
   (Ref: Golub & van Loan "Matrix Computation").
 
 epsilon: the tolerance ratio.
@@ -610,7 +610,7 @@ REGISTER_OP("XlaDequantize")
     .SetShapeFn(shape_inference::UnknownShape)
     .Doc(R"doc(
 Takes the packed uint32 input and unpacks the input to uint8 to do
-Dequantization on deivce.
+Dequantization on device.
 
 input: Input tensors whose types is uint32, shape is [d0, ..., dn].
 output: Output tensors whose types is bloat16. If transpose_output is true,
@@ -644,7 +644,7 @@ REGISTER_OP("XlaEinsum")
     .Doc(R"doc(
 An op which supports basic einsum op with 2 inputs and 1 output.
 
-This op has better TPU performnce since it doesn't have explicitly reshape and
+This op has better TPU performance since it doesn't have explicitly reshape and
 transpose operations as tf.einsum does.
 )doc");
 
diff --git a/tensorflow/compiler/tf2xla/shape_util.h b/tensorflow/compiler/tf2xla/shape_util.h
index e775c4462c3..331cfa38c1d 100644
--- a/tensorflow/compiler/tf2xla/shape_util.h
+++ b/tensorflow/compiler/tf2xla/shape_util.h
@@ -51,7 +51,7 @@ xla::Shape TensorShapeToXLAShape(xla::PrimitiveType type,
 // In case the input shape is a tuple, the minor-to-major values will be in the
 // order of the tuple elements within the tuple shape.
 // If a shape (or a subshape of a tuple shape) has missing layout, a rank long
-// sequence of -1 values will be emittted.
+// sequence of -1 values will be emitted.
 xla::StatusOr<std::vector<int>> GetShapeLayoutVector(const xla::Shape& shape);
 
 // Given the input shape and a linearized sequence of the minor-to-major values
diff --git a/tensorflow/compiler/tf2xla/tf2xla.proto b/tensorflow/compiler/tf2xla/tf2xla.proto
index 3093a0b1d8d..557f5bc3470 100644
--- a/tensorflow/compiler/tf2xla/tf2xla.proto
+++ b/tensorflow/compiler/tf2xla/tf2xla.proto
@@ -52,7 +52,7 @@ message Variable {
   TensorShapeProto shape = 3;
   DataType type = 4;
 
-  // Flag for variables that are never assigned. Assigments to a read-only
+  // Flag for variables that are never assigned. Assignments to a read-only
   // variable or unassigned variables that are not read-only are invalid.
   bool readonly = 5;
 }
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h
index 3e75cf7fa58..27b198f8bee 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.h
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h
@@ -213,13 +213,13 @@ class XlaOpKernelContext {
     return dynamic_dimension_is_minus_one_;
   }
 
-  // Reads the current value of the resouce variable referred to by input
+  // Reads the current value of the resource variable referred to by input
   // `index`. If `shape` is not nullptr, sets `*shape` to the shape of the
   // variable. Returns an error if the variable has not been initialized, or if
   // its type does not match `type`.
   Status ReadVariableInput(int index, DataType type, TensorShape* shape,
                            xla::XlaOp* value);
-  // Reads the current value of the resouce variable referred to by input
+  // Reads the current value of the resource variable referred to by input
   // `name`.
   Status ReadVariableInput(absl::string_view name, DataType type,
                            TensorShape* shape, xla::XlaOp* value);
diff --git a/tensorflow/compiler/xla/client/lib/comparators_test.cc b/tensorflow/compiler/xla/client/lib/comparators_test.cc
index 598956803b3..d6e5d80b85f 100644
--- a/tensorflow/compiler/xla/client/lib/comparators_test.cc
+++ b/tensorflow/compiler/xla/client/lib/comparators_test.cc
@@ -73,7 +73,7 @@ void BuildComparatorAndComparisons(ComparatorsTest* test,
     }
   }
 
-  // Concantenate the comparison results.
+  // Concatenate the comparison results.
   ConcatInDim(test->builder(), all_comparisons, 0);
 
   // If we use less-than comparisons, we expect the comparison to result in true
diff --git a/tensorflow/compiler/xla/client/lib/matrix.cc b/tensorflow/compiler/xla/client/lib/matrix.cc
index d2275af5ca4..3f4a63c31be 100644
--- a/tensorflow/compiler/xla/client/lib/matrix.cc
+++ b/tensorflow/compiler/xla/client/lib/matrix.cc
@@ -316,7 +316,7 @@ Status ValidateEinsumNumericDimensions(absl::Span<const int64> x_config,
 
 namespace {
 // Helper method to remove dimensions from a shape and dot dimension numbers
-// used to implment implicit broadcasting.
+// used to implement implicit broadcasting.
 template <typename C>
 void DeleteDimsFromContainer(absl::Span<const int64> to_delete, Shape* shape,
                              C* batch_dims, C* contracting_dims) {
@@ -473,7 +473,7 @@ xla::XlaOp Einsum(xla::XlaOp x, absl::Span<const int64> x_config, xla::XlaOp y,
       transpose_dims[output_transpose_dims[i]] = i;
     }
 
-    // Remove ones that where broadcated from the x and the y shape and adjust
+    // Remove ones that where broadcasted from the x and the y shape and adjust
     // the dimension numbers that are more minor than those dimensions.
     DeleteDimsFromContainer(lhs_delete_dims, &x_shape,
                             dnums.mutable_lhs_batch_dimensions(),
diff --git a/tensorflow/compiler/xla/client/lib/matrix.h b/tensorflow/compiler/xla/client/lib/matrix.h
index 6377704c58c..46f70ed27b9 100644
--- a/tensorflow/compiler/xla/client/lib/matrix.h
+++ b/tensorflow/compiler/xla/client/lib/matrix.h
@@ -132,7 +132,7 @@ xla::XlaOp Einsum(
 // the input.
 xla::XlaOp EinsumDiagonal(XlaOp x, absl::Span<const int64> config);
 
-// Same as above but supporting numeric labels on dimensins. So "ab,cb->ac"
+// Same as above but supporting numeric labels on dimensions. So "ab,cb->ac"
 // becomes:
 //   x_config = {0, 1}
 //   y_config = {2, 1}
diff --git a/tensorflow/compiler/xla/client/lib/pooling.cc b/tensorflow/compiler/xla/client/lib/pooling.cc
index 1979c867a4c..45033ec07e7 100644
--- a/tensorflow/compiler/xla/client/lib/pooling.cc
+++ b/tensorflow/compiler/xla/client/lib/pooling.cc
@@ -39,7 +39,7 @@ XlaOp AvgPoolDivideByCountWithGeneralPadding(
   std::vector<int64> window_ksize(num_spatial_dims);
   std::vector<int64> window_stride(num_spatial_dims);
   CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims)
-      << "Invalid number of spatial dimentions in data format specification";
+      << "Invalid number of spatial dimensions in data format specification";
   for (int i = 0; i < num_spatial_dims; ++i) {
     int dim = data_format.spatial_dimension(i);
     input_dim_sizes[i] = input_shape[dim];
@@ -95,7 +95,7 @@ PaddingConfig MakeSpatialPaddingConfig(
     padding_config.add_dimensions();
   }
   CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims)
-      << "Invalid number of spatial dimentions in data format specification";
+      << "Invalid number of spatial dimensions in data format specification";
   for (int i = 0; i < num_spatial_dims; ++i) {
     int dim = data_format.spatial_dimension(i);
     auto padding_dimension = padding_config.mutable_dimensions(dim);
@@ -178,7 +178,7 @@ std::vector<std::pair<int64, int64>> MakeSpatialPadding(
   std::vector<int64> kernel_size_spatial_dimensions;
   std::vector<int64> stride_spatial_dimensions;
   CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims)
-      << "Invalid number of spatial dimentions in data format specification";
+      << "Invalid number of spatial dimensions in data format specification";
   for (int i = 0; i < num_spatial_dims; ++i) {
     int dim = data_format.spatial_dimension(i);
     input_spatial_dimensions.push_back(input_size[dim]);
diff --git a/tensorflow/compiler/xla/client/lib/slicing.cc b/tensorflow/compiler/xla/client/lib/slicing.cc
index b47ddb7919f..7d8f433bac8 100644
--- a/tensorflow/compiler/xla/client/lib/slicing.cc
+++ b/tensorflow/compiler/xla/client/lib/slicing.cc
@@ -154,29 +154,29 @@ XlaOp TorchGather(XlaOp input, XlaOp index, int64 dim, bool sparse) {
       return TorchIndexSelect(input, index, 0);
     }
     if (!sparse) {
-      std::vector<int64> index_broacast_dims;
-      std::vector<int64> input_broacast_dims;
+      std::vector<int64> index_broadcast_dims;
+      std::vector<int64> input_broadcast_dims;
       std::vector<int64> sizes;
       for (int64 i = 0; i < index_shape.rank(); ++i) {
         if (i < dim) {
-          input_broacast_dims.push_back(i);
-          index_broacast_dims.push_back(i);
+          input_broadcast_dims.push_back(i);
+          index_broadcast_dims.push_back(i);
         } else if (i == dim) {
           sizes.push_back(input_shape.dimensions(i));
-          input_broacast_dims.push_back(i);
-          index_broacast_dims.push_back(i + 1);
+          input_broadcast_dims.push_back(i);
+          index_broadcast_dims.push_back(i + 1);
         } else {
-          input_broacast_dims.push_back(i + 1);
-          index_broacast_dims.push_back(i + 1);
+          input_broadcast_dims.push_back(i + 1);
+          index_broadcast_dims.push_back(i + 1);
         }
         sizes.push_back(index_shape.dimensions(i));
       }
       auto mask = Eq(
-          BroadcastInDim(index, sizes, index_broacast_dims),
+          BroadcastInDim(index, sizes, index_broadcast_dims),
           Iota(builder, ShapeUtil::MakeShape(index_shape.element_type(), sizes),
                dim));
       auto masked_input = Select(
-          mask, BroadcastInDim(input, sizes, input_broacast_dims),
+          mask, BroadcastInDim(input, sizes, input_broadcast_dims),
           Zeros(builder,
                 ShapeUtil::MakeShape(input_shape.element_type(), sizes)));
       return Reduce(masked_input, Zero(builder, input_shape.element_type()),
@@ -214,25 +214,25 @@ XlaOp TorchScatterDense(XlaOp input, XlaOp index, XlaOp src, int64 dim,
   return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     TF_ASSIGN_OR_RETURN(Shape index_shape, builder->GetShape(index));
     TF_ASSIGN_OR_RETURN(Shape input_shape, builder->GetShape(input));
-    std::vector<int64> index_broacast_dims;
+    std::vector<int64> index_broadcast_dims;
     std::vector<int64> sizes;
     for (int64 i = 0; i < index_shape.rank(); ++i) {
       if (i < dim) {
-        index_broacast_dims.push_back(i);
+        index_broadcast_dims.push_back(i);
       } else {
         if (i == dim) {
           sizes.push_back(input_shape.dimensions(i));
         }
-        index_broacast_dims.push_back(i + 1);
+        index_broadcast_dims.push_back(i + 1);
       }
       sizes.push_back(index_shape.dimensions(i));
     }
     auto mask =
-        Eq(BroadcastInDim(index, sizes, index_broacast_dims),
+        Eq(BroadcastInDim(index, sizes, index_broadcast_dims),
            Iota(builder,
                 ShapeUtil::MakeShape(index_shape.element_type(), sizes), dim));
     auto masked_src =
-        Select(mask, BroadcastInDim(src, sizes, index_broacast_dims),
+        Select(mask, BroadcastInDim(src, sizes, index_broadcast_dims),
                Zeros(builder,
                      ShapeUtil::MakeShape(input_shape.element_type(), sizes)));
 
diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc
index 9f520bcdadf..5e177cd391e 100644
--- a/tensorflow/compiler/xla/client/lib/testing.cc
+++ b/tensorflow/compiler/xla/client/lib/testing.cc
@@ -98,7 +98,7 @@ std::vector<std::unique_ptr<GlobalData>> MakeFakeArgumentsOrDie(
     const XlaComputation& computation, Client* client,
     DebugOptions* debug_opts /*=nullptr*/) {
   CHECK(computation.proto().has_host_program_shape())
-      << "Computation should have progran shape.";
+      << "Computation should have program shape.";
   auto program_shape = computation.proto().host_program_shape();
 
   std::vector<std::unique_ptr<GlobalData>> results;
diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc
index 153cb9f5212..97679d9b9ac 100644
--- a/tensorflow/compiler/xla/client/local_client.cc
+++ b/tensorflow/compiler/xla/client/local_client.cc
@@ -329,15 +329,15 @@ StatusOr<int> LocalClient::ReplicaNumberToDeviceOrdinal(int replica_number) {
 }
 
 StatusOr<TransferToServerResponse> LocalClient::TransferToLocalServer(
-    const ::xla::BorrowingLiteral& literal, int device_oridinal) {
+    const ::xla::BorrowingLiteral& literal, int device_ordinal) {
   const ::xla::Shape& shape = literal.shape();
 
   TF_ASSIGN_OR_RETURN(
       ::xla::ScopedShapedBuffer shaped_buffer,
       backend().transfer_manager()->AllocateScopedShapedBuffer(
-          shape, backend().memory_allocator(), device_oridinal));
+          shape, backend().memory_allocator(), device_ordinal));
   TF_ASSIGN_OR_RETURN(auto stream,
-                      mutable_backend()->BorrowStream(device_oridinal));
+                      mutable_backend()->BorrowStream(device_ordinal));
   TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
       stream.get(), literal, shaped_buffer));
   std::vector<::xla::ScopedShapedBuffer> replicated_buffer;
diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h
index f5e66c6d586..221a911567c 100644
--- a/tensorflow/compiler/xla/client/local_client.h
+++ b/tensorflow/compiler/xla/client/local_client.h
@@ -122,7 +122,7 @@ class LocalClient : public Client {
 
   // Transfer the BorrowingLiteral to the device with the given ordinal.
   StatusOr<TransferToServerResponse> TransferToLocalServer(
-      const ::xla::BorrowingLiteral& literal, int device_oridinal);
+      const ::xla::BorrowingLiteral& literal, int device_ordinal);
 
   // Copy the data from the device contained in the given ShapedBuffer and
   // return as a Literal.
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 5e93bb2b3ba..ac86b78fded 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -232,7 +232,7 @@ class XlaBuilder {
   // added operation.
   //
   // `remove_dynamic_dimensions` tells the builder whether to remove the
-  // dyanmic dimensions information in all ops.
+  // dynamic dimensions information in all ops.
   //
   // TODO(b/121223198): Delete `remove_dynamic_dimensions` and keeps the
   // dynamic dimensions information when XLA backend can handle dynamic
@@ -1194,7 +1194,7 @@ XlaOp Broadcast(XlaOp operand, absl::Span<const int64> broadcast_sizes);
 //
 // For example, say operand = {1, 2}, i.e., a 1D tensor in shape s32[2]; the
 // output shape is s32[2,2]:
-// - Specifying {1} as brodcast_dimension will generate output
+// - Specifying {1} as broadcast_dimension will generate output
 //   {{1, 2},
 //    {1, 2}}
 // - On the other hand, specifying {0} as broadcast_dimension
@@ -1469,7 +1469,7 @@ XlaOp TriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower,
 // two minor dimensions equal.
 // If `lower` is true, the data from the lower triangle is used; if false, the
 // upper triangle is used. The input data in the other triangle of the input
-// does not affect the output. Returns the output in the same lower/uppper
+// does not affect the output. Returns the output in the same lower/upper
 // triangle. The data returned in the other output triangle is arbitrary and
 // implementation-defined.
 //
diff --git a/tensorflow/compiler/xla/client/xla_builder_test.cc b/tensorflow/compiler/xla/client/xla_builder_test.cc
index f76ea38e08e..fd227ea47f2 100644
--- a/tensorflow/compiler/xla/client/xla_builder_test.cc
+++ b/tensorflow/compiler/xla/client/xla_builder_test.cc
@@ -292,7 +292,7 @@ TEST_F(XlaBuilderTest, BinopHasInDimAndDegenerateBroadcast) {
   TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b));
 
   // The binary operation has in-dim broadcast and degenerate broadcast, should
-  // first do the in-dim broadcast then convert the degnerate broadcast into a
+  // first do the in-dim broadcast then convert the degenerate broadcast into a
   // reshape and a broadcast.
   //
   // Expected:
diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc
index 064d8cd8d8b..16c83ab9b2c 100644
--- a/tensorflow/compiler/xla/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/debug_options_flags.cc
@@ -328,7 +328,7 @@ static void AllocateFlags() {
           "use multi-threaded Eigen mode."),
       tensorflow::Flag("xla_gpu_cuda_data_dir",
                        flag_values->mutable_xla_gpu_cuda_data_dir(),
-                       "If non-empty, speficies a local directory containing "
+                       "If non-empty, specifies a local directory containing "
                        "ptxas and nvvm libdevice files; otherwise we use "
                        "those from runfile directories."),
       tensorflow::Flag("xla_gpu_ftz",
@@ -347,7 +347,7 @@ static void AllocateFlags() {
           flag_values->xla_gpu_max_kernel_unroll_factor(),
           "Specify the maximum kernel unroll factor for the GPU backend."),
       tensorflow::Flag("xla_gpu_ptx_file", setter_for_xla_gpu_ptx_file, "",
-                       "If non-empty, speficies a file containing ptx to use. "
+                       "If non-empty, specifies a file containing ptx to use. "
                        "The filename prefix must have the same pattern as PTX "
                        "dumped by XLA. This allows to match one specific "
                        "module. General workflow. Get the generated module "
diff --git a/tensorflow/compiler/xla/debug_options_flags.h b/tensorflow/compiler/xla/debug_options_flags.h
index 1675b377edf..069e36dc52a 100644
--- a/tensorflow/compiler/xla/debug_options_flags.h
+++ b/tensorflow/compiler/xla/debug_options_flags.h
@@ -52,7 +52,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags();
 // By default all passes have infinite fuel.  You can restrict how much fuel a
 // pass has by specifying XLA_FLAGS=--xla_fuel=PASS1=NUM1,PASS2=NUM2,...
 //
-// If a user specifes --xla_fuel=PASS=NUM but ConsumeFuel(PASS) is not called
+// If a user specifies --xla_fuel=PASS=NUM but ConsumeFuel(PASS) is not called
 // before the program exits, we'll print a warning.
 //
 // We recommend as a convention you use a pass's name for the `pass` argument,
@@ -91,7 +91,7 @@ bool ConsumeFuel(absl::string_view pass,
 // startup.
 //
 // You may call this function twice in the same thread to reset its fuel pool
-// back to the intitial state.
+// back to the initial state.
 void ResetThreadLocalFuel();
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/execution_options_util.h b/tensorflow/compiler/xla/execution_options_util.h
index a8ca27ec8df..7bb817b8f1d 100644
--- a/tensorflow/compiler/xla/execution_options_util.h
+++ b/tensorflow/compiler/xla/execution_options_util.h
@@ -21,7 +21,7 @@ limitations under the License.
 namespace xla {
 
 // Create a default ExecutionOptions proto; this proto has its debug options
-// popupated to the default values taken from flags.
+// populated to the default values taken from flags.
 ExecutionOptions CreateDefaultExecutionOptions();
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/g3doc/operation_semantics.md b/tensorflow/compiler/xla/g3doc/operation_semantics.md
index 8cf8022340a..ee7b2b20928 100644
--- a/tensorflow/compiler/xla/g3doc/operation_semantics.md
+++ b/tensorflow/compiler/xla/g3doc/operation_semantics.md
@@ -94,7 +94,7 @@ The participating cores can be configured by:
     in the same order of 1, 2, 3. Then, another AllToAll will be applied within
     replicas 4, 5, 0, and the concatenation order is also 4, 5, 0. If
     `replica_groups` is empty, all replicas belong to one group, in the
-    concatenation order of their appearence.
+    concatenation order of their appearance.
 
 Prerequisites:
 
diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index bbd640f6064..3d6310c1e17 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -248,7 +248,7 @@ Status MutableLiteralBase::CopySliceFromInternal(
     TF_RET_CHECK(src_base.size() == copy_size.size());
 
     // Scan the source from minor, stepping in copy size blocks, then within
-    // the index enumaration functor, do a strided copy advancing source index
+    // the index enumeration functor, do a strided copy advancing source index
     // by one (walking through the minor dimension), and destination index by
     // proper stride size at the matching dimension.
     DimensionVector src_indexes(src_base.size(), 0);
diff --git a/tensorflow/compiler/xla/literal.h b/tensorflow/compiler/xla/literal.h
index 227717188ab..2d27f8eb7f6 100644
--- a/tensorflow/compiler/xla/literal.h
+++ b/tensorflow/compiler/xla/literal.h
@@ -810,7 +810,7 @@ class Literal : public MutableLiteralBase {
   Literal(const Shape& shape, bool allocate_arrays);
   Literal& operator=(Literal&& other);
 
-  // Similar to CopyFrom, but with move semantincs. The subshape of this literal
+  // Similar to CopyFrom, but with move semantics. The subshape of this literal
   // rooted at 'dest_shape_index' must be *equal* to the shape 'src_literal'
   // (layouts and shapes must match), but need not be arrays. The memory
   // allocated in this literal for the subshape at dest_shape_index is
@@ -883,7 +883,7 @@ class BorrowingLiteral : public LiteralBase {
   BorrowingLiteral() : LiteralBase() {}
 
   // 'src_buf_ptr' is not owned by this class and must outlive the
-  // lifetime of this class. It points to an appropirately sized buffer with
+  // lifetime of this class. It points to an appropriately sized buffer with
   // data interpretered as indicated by 'shape'.
   // This constructor is only used for array shapes.
   BorrowingLiteral(const char* src_buf_ptr, const Shape& shape);
diff --git a/tensorflow/compiler/xla/literal_comparison.cc b/tensorflow/compiler/xla/literal_comparison.cc
index 662aeead14e..e1f52f72e5d 100644
--- a/tensorflow/compiler/xla/literal_comparison.cc
+++ b/tensorflow/compiler/xla/literal_comparison.cc
@@ -433,7 +433,7 @@ class NearComparator {
       }
     } else if (IsInf(expected) || IsInf(actual)) {
       // If either the expected or actual value is infinity but not both,
-      // then both absolute and relative error are regarded as inifity.
+      // then both absolute and relative error are regarded as infinity.
       CHECK(!CompareEqual(expected, actual, {linear_index}));
       abs_error = std::numeric_limits<float>::infinity();
       rel_error = std::numeric_limits<float>::infinity();
diff --git a/tensorflow/compiler/xla/literal_test.cc b/tensorflow/compiler/xla/literal_test.cc
index d1dd6b8fd77..9b17cb762c8 100644
--- a/tensorflow/compiler/xla/literal_test.cc
+++ b/tensorflow/compiler/xla/literal_test.cc
@@ -1134,7 +1134,7 @@ TEST_F(LiteralUtilTest, CopyFromDifferentShapes) {
 TEST_F(LiteralUtilTest, F16) {
   // Verify that the internal data views are consistent and that they
   // are in little endian format
-  // TODO - modify if we make the data format machine endianess dependent
+  // TODO - modify if we make the data format machine endianness dependent
   Literal m1 = Literal::CreateFromShape(ShapeUtil::MakeShape(F16, {2, 2}));
   const char* d1 = reinterpret_cast<const char*>(m1.data<half>().data());
   EXPECT_EQ(d1[0], 0);
diff --git a/tensorflow/compiler/xla/parse_flags_from_env.h b/tensorflow/compiler/xla/parse_flags_from_env.h
index 76940a4299a..18d9788cde4 100644
--- a/tensorflow/compiler/xla/parse_flags_from_env.h
+++ b/tensorflow/compiler/xla/parse_flags_from_env.h
@@ -30,7 +30,7 @@ limitations under the License.
 //  - <single-quote><characters string not containing nul or
 //    single-quote><single_quote> in which case the effective value is the
 //    string with the single-quotes removed
-//  - <double-quote><character string not containing nul or unesecaped
+//  - <double-quote><character string not containing nul or unescaped
 //    double-quote><double_quote> in which case the effective value if the
 //    string with the double-quotes removed, and escaped sequences of
 //    <backslash><char> replaced by <char>.
diff --git a/tensorflow/compiler/xla/parse_flags_from_env_test.cc b/tensorflow/compiler/xla/parse_flags_from_env_test.cc
index 3465552ebbf..32f27449b22 100644
--- a/tensorflow/compiler/xla/parse_flags_from_env_test.cc
+++ b/tensorflow/compiler/xla/parse_flags_from_env_test.cc
@@ -73,14 +73,14 @@ static const char kTestFlagString[] =
     "--single_quoted='single quoted \\\\ \n \"' "
     "--double_quoted=\"double quoted \\\\ \n '\\\"\" ";
 
-// Test that the environent variable is parsed correctly.
+// Test that the environment variable is parsed correctly.
 TEST(ParseFlagsFromEnv, Basic) {
   // Prepare environment.
   setenv("TF_XLA_FLAGS", kTestFlagString, true /*overwrite*/);
   TestParseFlagsFromEnv("(flags in environment variable)");
 }
 
-// Test that a file named by the environent variable is parsed correctly.
+// Test that a file named by the environment variable is parsed correctly.
 TEST(ParseFlagsFromEnv, File) {
   // environment variables where  tmp dir may be specified.
   static const char* kTempVars[] = {"TEST_TMPDIR", "TMP"};
@@ -154,7 +154,7 @@ int main(int argc, char* argv[]) {
   xla::int32 int_flag = 1;
   const std::vector<tensorflow::Flag> flag_list = {
       tensorflow::Flag("recursing", &recursing,
-                       "Whether the binary is being invoked recusively."),
+                       "Whether the binary is being invoked recursively."),
       tensorflow::Flag("int_flag", &int_flag, "An integer flag to test with"),
   };
   xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
diff --git a/tensorflow/compiler/xla/python/local_client.cc b/tensorflow/compiler/xla/python/local_client.cc
index ef8ff4275a6..d0bb1eb8015 100644
--- a/tensorflow/compiler/xla/python/local_client.cc
+++ b/tensorflow/compiler/xla/python/local_client.cc
@@ -551,7 +551,7 @@ PyLocalBuffer::DestructureTuple() {
   absl::MutexLock lock(&mu_);
   if (!on_host_shape_.IsTuple()) {
     return InvalidArgument(
-        "Attemped to destructure a PyLocalBuffer that did not have a tuple "
+        "Attempted to destructure a PyLocalBuffer that did not have a tuple "
         "shape; shape: %s",
         ShapeUtil::HumanString(on_host_shape_));
   }
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc
index b9ca2a7e1a7..2b69239bb7a 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc
@@ -345,7 +345,7 @@ PyTpuBuffer::DestructureTuple() {
   tensorflow::profiler::TraceMe traceme("PyTpuBuffer::DestructureTuple");
   if (!on_host_shape_.IsTuple()) {
     return InvalidArgument(
-        "Attemped to destructure a PyTpuBuffer that did not have a tuple "
+        "Attempted to destructure a PyTpuBuffer that did not have a tuple "
         "shape; shape: %s",
         ShapeUtil::HumanString(on_host_shape_));
   }
diff --git a/tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h b/tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h
index 36b7fa0d801..dc28ad1f0b4 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h
+++ b/tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h
@@ -37,7 +37,7 @@
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/platform/logging.h"
 
-// This API is EXPERIMENTAL and under active developement. It is subject to
+// This API is EXPERIMENTAL and under active development. It is subject to
 // change without notice.
 
 namespace tpu_driver {
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index a7e35a8a81f..a0ae04a9580 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -1526,7 +1526,7 @@ class ComputationBuilder(object):
           ConvWithGeneralPadding.
       feature_group_count: number of feature groups for grouped convolution.
       batch_group_count: number of batch groups for grouped convolution.
-    Returns: a XlaOp representing the ConvGenralDilated operation.
+    Returns: a XlaOp representing the ConvGeneralDilated operation.
     """
     if dimension_numbers is None:
       dimension_numbers = self._GetConvDimensionNumbers(len(window_strides))
diff --git a/tensorflow/compiler/xla/python_api/types.py b/tensorflow/compiler/xla/python_api/types.py
index 57dfce3971b..fffe5adab1d 100644
--- a/tensorflow/compiler/xla/python_api/types.py
+++ b/tensorflow/compiler/xla/python_api/types.py
@@ -25,10 +25,10 @@ import numpy as _np  # Avoids becoming a part of public Tensorflow API.
 from tensorflow.compiler.xla import xla_data_pb2
 from tensorflow.python.framework import dtypes
 
-# Records corresponsence between a XLA primitive type and Python/Numpy types.
+# Records correspondence between a XLA primitive type and Python/Numpy types.
 #
 # primitive_type: value of type xla_data_pb2.PrimitiveType
-# numpy_dtype: corresponsing Numpy "dtype" (like np.float32)
+# numpy_dtype: corresponding Numpy "dtype" (like np.float32)
 # literal_field_name: name of the field in the LiteralProto message elements
 # of this type go into.
 # literal_field_type: type of the field named 'literal_field_name'.
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 2fe8c309cb0..f145b447bef 100755
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -673,7 +673,7 @@ Status AlgebraicSimplifierVisitor::HandleBitcast(HloInstruction* bitcast) {
         bitcast, HloInstruction::CreateBitcast(bitcast->shape(), op));
   }
   // All bitcasts can be eliminated (assuming layout constraints are
-  // satisified).
+  // satisfied).
   ReplaceInstructionIfSameShape(bitcast, bitcast->mutable_operand(0));
   return Status::OK();
 }
@@ -692,7 +692,7 @@ Status AlgebraicSimplifierVisitor::HandleCopy(HloInstruction* copy) {
     return ReplaceWithNewInstruction(
         copy, HloInstruction::CreateUnary(copy->shape(), HloOpcode::kCopy, op));
   }
-  // All copies can be eliminated (assuming layout constraints are satisified).
+  // All copies can be eliminated (assuming layout constraints are satisfied).
   if (ReplaceInstructionIfSameShape(copy, copy->mutable_operand(0))) {
     return Status::OK();
   }
@@ -2735,7 +2735,7 @@ Status AlgebraicSimplifierVisitor::HandlePower(HloInstruction* power) {
 
   // Don't perform this optimization if either of the exponents is complex; this
   // identity is true only for real-valued exponents.  In addition, we cowardly
-  // refuse to do this transformation if the two expontents have different
+  // refuse to do this transformation if the two exponents have different
   // element types.
   if (lhs->opcode() == HloOpcode::kPower &&
       !ShapeUtil::ElementIsComplex(lhs->operand(1)->shape()) &&
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 88282986560..f37ff5387ee 100755
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -4756,7 +4756,7 @@ TEST_P(DotStrengthReductionTest, DotStrengthReduction) {
   const bool computation_should_be_modified =
       dot_should_be_transformed || (transpose_lhs && transpose_rhs);
   EXPECT_EQ(changed, computation_should_be_modified);
-  // The second pass of algebriac simplifer will remove dots without
+  // The second pass of algebraic simplifier will remove dots without
   // non-contracting dimensions or contracting dimensions.
   TF_ASSERT_OK_AND_ASSIGN(changed, simplifier.Run(module.get()));
   EXPECT_EQ(changed, computation_should_be_modified);
diff --git a/tensorflow/compiler/xla/service/batchnorm_expander_test.cc b/tensorflow/compiler/xla/service/batchnorm_expander_test.cc
index 34b516184fa..d7b0dc8b29d 100644
--- a/tensorflow/compiler/xla/service/batchnorm_expander_test.cc
+++ b/tensorflow/compiler/xla/service/batchnorm_expander_test.cc
@@ -38,7 +38,7 @@ namespace {
 
 class BatchNormExpanderTest : public HloTestBase {
  protected:
-  // BatchNorm should have a dynamic sized dividor for mean operations.
+  // BatchNorm should have a dynamic sized divider for mean operations.
   int64 CountGetDimensionSize(const HloModule& module) {
     int64 count = 0;
     for (HloComputation* comp : module.computations()) {
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 0d96ffd4568..e7cb128b62b 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -1608,7 +1608,7 @@ StatusOr<std::unique_ptr<BufferAssignment>> BufferAssigner::CreateAssignment(
       /*is_thread_local=*/false, &buffers_to_assign_sequentially,
       assignment.get()));
   // Assign buffers with sequential ordering, if any. If all global
-  // computations are sequential, we can run heap simuation on the whole
+  // computations are sequential, we can run heap simulation on the whole
   // module, which reduces memory usage.
   const bool run_whole_module_heap_simulation =
       buffers_to_assign_sequentially.size() == global_computations.size();
diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
index e54ad852d44..912c98b5001 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
@@ -770,7 +770,7 @@ TEST_F(BufferAssignmentTest, PresetAssignments) {
 }
 
 TEST_F(BufferAssignmentTest, PresetAssignmentsWhile) {
-  // Tests preset assignments when there is no 1-to-1 corrspondance between
+  // Tests preset assignments when there is no 1-to-1 correspondence between
   // HloValue and HloBuffer (i.e., a while loop).
   auto module = CreateNewVerifiedModule();
   Shape f32vec10_color1 =
diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h
index 11d8abc5bad..44cd7b5ebbd 100644
--- a/tensorflow/compiler/xla/service/buffer_value.h
+++ b/tensorflow/compiler/xla/service/buffer_value.h
@@ -160,7 +160,7 @@ class BufferValue {
   BufferValue(HloInstruction* instruction, const ShapeIndex& index, Id id);
 
  private:
-  // The definining instruction and index are not stored here; they can be found
+  // The defining instruction and index are not stored here; they can be found
   // in the LogicalBuffer and HloValue subclasses. This class exists only to
   // support migrations from TuplePointsToAnalysis to HloDataflowAnalysis, by
   // allowing abstract use of LogicalBuffer or HloValue. After those migrations
diff --git a/tensorflow/compiler/xla/service/call_inliner.cc b/tensorflow/compiler/xla/service/call_inliner.cc
index 1718b151e48..4f2436de4fa 100644
--- a/tensorflow/compiler/xla/service/call_inliner.cc
+++ b/tensorflow/compiler/xla/service/call_inliner.cc
@@ -27,7 +27,7 @@ namespace {
 
 // Traverses the callee computation, inlining cloned nodes into the caller
 // computation and connecting them to producers/consumers appropriately.
-// When the traversal has completed, the provided call instruction is entriely
+// When the traversal has completed, the provided call instruction is entirely
 // replaced in the caller's graph.
 class SubcomputationInsertionVisitor : public DfsHloVisitorWithDefault {
  public:
diff --git a/tensorflow/compiler/xla/service/cholesky_expander.cc b/tensorflow/compiler/xla/service/cholesky_expander.cc
index 74fc15a3eed..20576cdc52d 100644
--- a/tensorflow/compiler/xla/service/cholesky_expander.cc
+++ b/tensorflow/compiler/xla/service/cholesky_expander.cc
@@ -93,7 +93,7 @@ std::pair<XlaOp, XlaOp> CholeskyUnblocked(
           Zeros(body_builder,
                 ShapeUtil::MakeShape(a_shape.element_type(), matrix_dims));
       // L * L.T, This matrix has of a lot of multiplying with zero
-      // (namely, L[:, j:] = 0) and redudant computation, but it is faster
+      // (namely, L[:, j:] = 0) and redundant computation, but it is faster
       // than slice.
       auto l_square = BatchDot(body_l, false, body_l, true, precision);
 
diff --git a/tensorflow/compiler/xla/service/collective_ops_utils.h b/tensorflow/compiler/xla/service/collective_ops_utils.h
index 2c5f2d64d1f..8b3c60f76de 100644
--- a/tensorflow/compiler/xla/service/collective_ops_utils.h
+++ b/tensorflow/compiler/xla/service/collective_ops_utils.h
@@ -32,7 +32,7 @@ namespace xla {
 
 enum class ReductionKind { SUM, PRODUCT, MIN, MAX };
 
-// Atempts to match computation to one of the possible cases in ReductionKind.
+// Attempts to match computation to one of the possible cases in ReductionKind.
 absl::optional<ReductionKind> MatchReductionComputation(
     const HloComputation* computation);
 
diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h
index a0248839fdd..b2e1231e315 100644
--- a/tensorflow/compiler/xla/service/compiler.h
+++ b/tensorflow/compiler/xla/service/compiler.h
@@ -47,7 +47,7 @@ namespace xla {
 // The following types are used for ahead of time compilation.
 
 // Contains the object file data created as a result of ahead-of-time
-// compuation.
+// computation.
 using ObjectFileData = std::vector<char>;
 
 // Abstract superclass describing the result of an ahead-of-time compilation.
diff --git a/tensorflow/compiler/xla/service/computation_placer.h b/tensorflow/compiler/xla/service/computation_placer.h
index 8df2a50cf8f..03b65fd76a5 100644
--- a/tensorflow/compiler/xla/service/computation_placer.h
+++ b/tensorflow/compiler/xla/service/computation_placer.h
@@ -71,7 +71,7 @@ class ComputationPlacer {
 
   // Returns the device id assigned to the given replica and computation
   // instance for [replica_count x computation_count] setup. The returned device
-  // id must match the assignement from PlaceReplicatedComputation().
+  // id must match the assignment from PlaceReplicatedComputation().
   virtual StatusOr<int> DeviceId(int replica, int computation,
                                  int replica_count, int computation_count);
 
diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.cc b/tensorflow/compiler/xla/service/conditional_simplifier.cc
index 86f6a9295e6..f60742a8c23 100644
--- a/tensorflow/compiler/xla/service/conditional_simplifier.cc
+++ b/tensorflow/compiler/xla/service/conditional_simplifier.cc
@@ -189,7 +189,7 @@ StatusOr<bool> TryRemoveUnusedConditionalOperands(
     }
     for (HloInstruction* user : param->users()) {
       // If the user is not a get tuple element, assume it is unsafe to remove
-      // elemnts from the tuple.
+      // elements from the tuple.
       if (user->opcode() != HloOpcode::kGetTupleElement) {
         return false;
       }
diff --git a/tensorflow/compiler/xla/service/convolution_group_converter.cc b/tensorflow/compiler/xla/service/convolution_group_converter.cc
index cfcf059ba5f..f942d6768df 100644
--- a/tensorflow/compiler/xla/service/convolution_group_converter.cc
+++ b/tensorflow/compiler/xla/service/convolution_group_converter.cc
@@ -393,7 +393,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
       const int64 depthwise_multiplier =
           filter->shape().dimensions(kernel_output_feature_dim) / group_count;
       // Split the kernel output feature dimension into group count and
-      // depthwise mutlipler.
+      // depthwise mutilipler.
       for (int64 i = 0; i < filter->shape().rank(); ++i) {
         if (i == kernel_output_feature_dim) {
           new_filter_dimension.push_back(group_count);
@@ -439,7 +439,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
       new_dim->set_window_dilation(1);
       new_dim->set_base_dilation(1);
 
-      // Split the output feature dimension into and output featrue of group
+      // Split the output feature dimension into and output feature of group
       // count and depthwise multipler as an output spatial dimension.
       std::vector<int64> new_output_dimension;
       new_output_dimension.reserve(convolution->shape().rank() + 1);
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index cde75d0c16c..88497e35214 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -1177,7 +1177,7 @@ TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinct) {
 
   InsertCopies(module_.get());
 
-  // The entry computation requires two copies to resolve the non-disinctness of
+  // The entry computation requires two copies to resolve the non-distinctness of
   // two init elements and the constant passed in as one of the init
   // elements. Either element can be copied for the distinctness issue.
   EXPECT_EQ(CountCopies(*module_->entry_computation()), 2);
@@ -1996,7 +1996,7 @@ void BM_ParallelWhiles(int num_iters, int num_whiles) {
     tensorflow::testing::StopTiming();
 
     // Each body receives of copy of two of the parameters (the corresponding
-    // elements in the body are modifed), and there is one copy in each body.
+    // elements in the body are modified), and there is one copy in each body.
     ASSERT_EQ(CountCopies(module), 3 * num_whiles);
   }
 }
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 1270cd7a1bc..6a331ba4f19 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -350,7 +350,7 @@ Status CpuCompiler::RunHloPassesAfterLayoutAssn(
   // duplicate or NOPs, so remove them with algebraic simplification and CSE.
   {
     auto& pass = pipeline.AddPass<HloPassFix<HloPassPipeline>>(
-        "simplification after layout assignement");
+        "simplification after layout assignment");
     pass.AddInvariantChecker<HloVerifier>(
         /*layout_sensitive=*/true,
         /*allow_mixed_precision=*/false,
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
index d19cf4fb015..206fbd36344 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@@ -327,7 +327,7 @@ StatusOr<ExecutionOutput> CpuExecutable::ExecuteAsyncOnStream(
   //
   // Logically we want this lambda to capture `buffers` by move, ultimately our
   // functor needs to be wrapped in an std::function, and that requires its
-  // functor to be copyable.  Thus we perpitrate the hack of capturing buffers
+  // functor to be copyable.  Thus we perpetrate the hack of capturing buffers
   // "by shared pointer".
   //
   // We also need to change the types of some of the variables we capture:
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
index 95b8025f873..4e0715ea7af 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
@@ -28,7 +28,7 @@ namespace cpu {
 
 // We want to change the layout of constant arrays to be column major when all
 // of their users are dot operations that can be made faster with the flipped
-// layout.  To avoid going quadriatic over the # of instructions, we cache this
+// layout.  To avoid going quadratic over the # of instructions, we cache this
 // property in should_make_rhs_col_major -- it maps a constant to true if all of
 // the users of said constant are dot operations that can be sped up.  This
 // cache is populated lazily as we encounter dot operations traversing the
diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index a50c0dafba6..c03a4a0c9ad 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -84,7 +84,7 @@ enum class DotImplementationStrategy {
   // supported.
   kTiledLlvmIrGemv,
 
-  // The dot operation is lowered into LLVM IR that implemetns a tiled
+  // The dot operation is lowered into LLVM IR that implements a tiled
   // Matrix*Matrix operation.  No fusions are supported.  The two inputs
   // and the output have to be row major.
   kTiledLlvmIrGemm,
diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter_internal.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter_internal.h
index cc28918ed60..0c75eaec858 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter_internal.h
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter_internal.h
@@ -63,7 +63,7 @@ enum class DotImplementationStrategy {
   // supported.
   kTiledLlvmIrGemv,
 
-  // The dot operation is lowered into LLVM IR that implemetns a tiled
+  // The dot operation is lowered into LLVM IR that implements a tiled
   // Matrix*Matrix operation.  No fusions are supported.  The two inputs
   // and the output have to be row major.
   kTiledLlvmIrGemm,
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index cf167a57087..394d1fc979d 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -1159,7 +1159,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
       /*instruction=*/*convolution, /*operands=*/{lhs, rhs},
       /*supported_types=*/{F16, F32, F64, C64, C128}));
 
-  // TODO(tonywy): Add PotentiallyImplementedAsMKLCovolution to support
+  // TODO(tonywy): Add PotentiallyImplementedAsMKLConvolution to support
   // different data layouts.
   if (PotentiallyImplementedAsEigenConvolution(*convolution,
                                                target_machine_features_)) {
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index 453676bd7c7..95458ba05a4 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -294,7 +294,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
       absl::string_view name);
 
   // Emits a call to a "global" function (e.g. to the computation nested within
-  // a kWhile or a kCall).  Buffer assignment unabiguously assignes buffers to
+  // a kWhile or a kCall).  Buffer assignment unabiguously assigns buffers to
   // the parameters and return values for these computations so there is no need
   // to explicitly pass parameters or return results.
   void EmitGlobalCall(const HloComputation& callee, absl::string_view name);
@@ -366,7 +366,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   // without generating IR with illegal (e.g. excessively large or
   // non-power-of-two) vector types.  We do this by introducing a layer of
   // abstraction: we introduce a high level vector-like concept called a
-  // "sharded vector" that models data paralleism, and is mapped to a sequence
+  // "sharded vector" that models data parallelism, and is mapped to a sequence
   // scalar and vector llvm::Value s.
   //
   // For example, we can represent 29 f32 elements by a sharded vector mapped to
diff --git a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc
index 0b4e3ecd99b..a42dcccf381 100644
--- a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc
+++ b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc
@@ -185,7 +185,7 @@ llvm::Value* GenerateVF32Exp(llvm::IRBuilder<>* b, llvm::Value* input,
   // value of n clamped to [-127, 127]. In the case where n' = 127, `a` can grow
   // up to as large as 88.8 - 127 * log(2) which is about 0.7703. Even though
   // this value of `a` is outside our previously specified range, e^a will still
-  // only have a relative error of approximetely 2^-16 at worse. In practice
+  // only have a relative error of approximately 2^-16 at worse. In practice
   // this seems to work well enough; it passes our exhaustive tests, breaking
   // only one result, and by one ulp (we return exp(88.7228394) = max-float but
   // we should return inf).
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc
index 4a8d963bedf..14afe770ede 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc
@@ -193,7 +193,7 @@ bool ParallelTaskAssigner::AssignParallelTasksHelper(
                                             computation->instructions().end());
   for (auto* instruction : instructions) {
     // Assign parallel tasks to sub-computations for While and Call HLOs.
-    // TODO(b/27458679) Evaluate alternative intra-op parallelsim placement,
+    // TODO(b/27458679) Evaluate alternative intra-op parallelism placement,
     // and support other callable computations like reduce.
     if (instruction->opcode() == HloOpcode::kWhile) {
       changed |= AssignParallelTasksHelper(module, instruction->while_body(),
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fork_join.cc b/tensorflow/compiler/xla/service/cpu/runtime_fork_join.cc
index 6f72ddadf94..bf1a1e016af 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_fork_join.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_fork_join.cc
@@ -33,7 +33,7 @@ using ComputeFunctionType = void (*)(void*, const void*, const void**, void**,
 
 // Dispatches 'num_partitions - 1' calls to 'function_ptr' in parallel.
 // Calls 'function_ptr' for first partition inline.
-// Uses blocking counter to synchonize threads after parallel calls complete.
+// Uses blocking counter to synchronize threads after parallel calls complete.
 //
 // The 'partitions' array has a total number of elements equal to
 // 'num_partitions * num_partitioned_dims * 2' (the '2' is necessary to specify
diff --git a/tensorflow/compiler/xla/service/cpu/shape_partition.cc b/tensorflow/compiler/xla/service/cpu/shape_partition.cc
index d12c5396148..e95afbbb131 100644
--- a/tensorflow/compiler/xla/service/cpu/shape_partition.cc
+++ b/tensorflow/compiler/xla/service/cpu/shape_partition.cc
@@ -146,7 +146,7 @@ std::vector<std::pair<int64, int64>> ShapePartitionIterator::GetPartition(
       partition[i].second = dimension_partition_sizes_[i];
     }
     CHECK_GT(partition[i].second, 0);
-    // Update index to remove conribution from current dimension.
+    // Update index to remove contribution from current dimension.
     index -= partition_index * dimension_partition_strides_[i];
   }
   return partition;
diff --git a/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc b/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc
index 7668f364bad..c4626462b66 100644
--- a/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc
@@ -173,7 +173,7 @@ class GemvConfig {
 //   |         C            | D |
 //   +----------------------+---+
 //
-// where A is the largest submatrix of the LHS that can be evenly dividied into
+// where A is the largest submatrix of the LHS that can be evenly divided into
 // tiles.  For each tile in A, assuming tile_rows_ == tile_cols_ == 4, we have:
 //
 //   +---+---+---+---+       +--+--+--+--+
@@ -212,7 +212,7 @@ class GemvConfig {
 // Where R is the starting row for the tile.
 //
 // We have an inner epilogue loop to deal with the "C" submatrix and an outer
-// epilogue loop to deal with the B,D submarix.
+// epilogue loop to deal with the B,D submatrix.
 //
 // TODO(sanjoy): We should investigate if using gather loads and scatter stores
 // can be used here have the same inner loop for both column-major and row-major
@@ -410,7 +410,7 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue(
 //   |         C            | D |
 //   +----------------------+---+
 //
-// where A is the largest submatrix of the LHS that can be evenly dividied into
+// where A is the largest submatrix of the LHS that can be evenly divided into
 // tiles.  For each tile in A, assuming tile_rows_ == tile_cols_ == 4, we have:
 //
 //   +---+---+---+---+
diff --git a/tensorflow/compiler/xla/service/dump.cc b/tensorflow/compiler/xla/service/dump.cc
index beea561dad6..85884d4af68 100644
--- a/tensorflow/compiler/xla/service/dump.cc
+++ b/tensorflow/compiler/xla/service/dump.cc
@@ -49,7 +49,7 @@ struct CanonicalDebugOptions {
     // function we treat this struct's members as write-only, and read only from
     // `opts`.
 
-    // Did the user specifiy an explicit format for dumping?
+    // Did the user specify an explicit format for dumping?
     bool output_format_other_than_url_specified =
         opts.xla_dump_hlo_as_text() || opts.xla_dump_hlo_as_proto() ||
         opts.xla_dump_hlo_as_dot() || opts.xla_dump_hlo_as_html() ||
diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
index 14ea6f988cb..2079130714a 100644
--- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
+++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
@@ -298,7 +298,7 @@ Status DynamicDimensionInferenceVisitor::HandleDot(HloInstruction* hlo) {
         // A. batch dims
         // B. contracting dims
         // C. non-batch non-contracting dims.
-        // The output dimemsions of a dot has three parts with the following
+        // The output dimensions of a dot has three parts with the following
         // order:
         // [(type A), (lhs type C), (rhs type C)]
         //
@@ -317,7 +317,7 @@ Status DynamicDimensionInferenceVisitor::HandleDot(HloInstruction* hlo) {
         bool lhs = operand_index == 0;
 
         // The first loop keep tracks of batch dimension. RHS and LHS could have
-        // diffrent batch dimension numbers.
+        // different batch dimension numbers.
         if (lhs) {
           for (int64 i : dimension_numbers.lhs_batch_dimensions()) {
             result_dim_mapping[i] = current_result_dims++;
@@ -1039,13 +1039,13 @@ Status DynamicDimensionInferenceVisitor::HandleGather(HloInstruction* hlo) {
         if (operand_index != 1) {
           if (hlo->gather_slice_sizes()[input_dynamic_dimension] == 1) {
             // Gathering a size 1 dimension out of a dynamic dimension removes
-            // the dynamisity.
+            // the dynamicity.
             return Status::OK();
           }
           if (hlo->gather_slice_sizes()[input_dynamic_dimension] ==
               operand->shape().dimensions(input_dynamic_dimension)) {
             // Gathering a full-sized dimension out of a dynamic dimension
-            // propagates the dynamisity to output.
+            // propagates the dynamicity to output.
             int64 output_dimension = input_dynamic_dimension;
             for (int64 collapsed_dim : gather_dims.collapsed_slice_dims()) {
               if (collapsed_dim < input_dynamic_dimension) {
diff --git a/tensorflow/compiler/xla/service/dynamic_padder.cc b/tensorflow/compiler/xla/service/dynamic_padder.cc
index c94a2594f3b..21b0a98af4b 100644
--- a/tensorflow/compiler/xla/service/dynamic_padder.cc
+++ b/tensorflow/compiler/xla/service/dynamic_padder.cc
@@ -169,7 +169,7 @@ HloInstruction* PadWithScalar(HloInstruction* inst, int64 dim,
   return padded;
 }
 
-// In a reshape if a dynamci dimension is splitted into multiple output
+// In a reshape if a dynamic dimension is splitted into multiple output
 // dimensions, we need to rewrite the input of the reshape.
 //
 // The reason for this is that a continuous input may not be evenly reshaped
@@ -290,7 +290,7 @@ Status RewriteDynamicReshapeSplitInput(
 
   // Step 4. Sort iota.
   // Use binary mark to sort iota mask, then use iota mask to reshape input.
-  HloComputation::Builder comp_builder("compare_bianry_iota");
+  HloComputation::Builder comp_builder("compare_binary_iota");
   {
     HloInstruction* lhs_key =
         comp_builder.AddInstruction(HloInstruction::CreateParameter(
@@ -322,7 +322,7 @@ Status RewriteDynamicReshapeSplitInput(
           mask_input_shape, sorted_binary_iota, 1));
 
   // Step 5. Sort original input using iota mask as key.
-  HloComputation::Builder comp_builder_iota("compare_bianry_iota");
+  HloComputation::Builder comp_builder_iota("compare_binary_iota");
   {
     HloInstruction* lhs_key =
         comp_builder_iota.AddInstruction(HloInstruction::CreateParameter(
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index 199e14a2164..66801d28f16 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -691,7 +691,7 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
       llvm::Value* imag_numerator = FMul(four, FMul(cos_b, sin_b));
 
       // Expm1(x) is about x for small values of x, but exp_sum_m2 is about x^2
-      // for small value of x. As a result, due to floating point precission
+      // for small value of x. As a result, due to floating point precision
       // issues, x^2 is a better approximation than Expm1(x) + Expm1(x) for
       // small values of x.
       llvm::Value* a_sqr = FMul(a, a);
@@ -1376,7 +1376,7 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitExpm1(PrimitiveType prim_type,
   auto for_small_x = FAdd(x, x_squared_over_two);
   // At this point, the relative errors due to floating point precision loss of
   // calculating exp(x) - 1 and the polynomial exp(x)-1 = x + x^2/2 are about
-  // equal, with a value of approximetely 2^-16.
+  // equal, with a value of approximately 2^-16.
   const auto kExponentIsSmallThreshold = 0.009;
   auto abs_x =
       llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::fabs, {value}, {type}, b_);
diff --git a/tensorflow/compiler/xla/service/gpu/backend_configs.proto b/tensorflow/compiler/xla/service/gpu/backend_configs.proto
index 602e61ac0e8..0724a83180e 100644
--- a/tensorflow/compiler/xla/service/gpu/backend_configs.proto
+++ b/tensorflow/compiler/xla/service/gpu/backend_configs.proto
@@ -6,7 +6,7 @@ import "tensorflow/compiler/xla/xla_data.proto";
 
 // Backend configs for XLA:GPU.
 //
-// These are metadata that the GPU backend attaches to HloInstrucitons and later
+// These are metadata that the GPU backend attaches to HloInstructions and later
 // uses during e.g. codegen.
 //
 // Remember that proto3 doesn't give clients a way to tell the difference
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc
index a8528de96f5..b00dba3e9da 100755
--- a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc
@@ -193,7 +193,7 @@ Status Visitor::HandleBatchNormTraining(HloInstruction* batch_norm) {
   if (is_batchnorm_with_fp16_inputs) {
     new_gte = AddConvert(new_gte, F32);
   }
-  // Repackage the results. Athough this tuple is redundant when convert is not
+  // Repackage the results. Although this tuple is redundant when convert is not
   // inserted, TupleSimplifier eliminates the Tuple eventually
   std::unique_ptr<HloInstruction> replacing_tuple = HloInstruction::CreateTuple(
       {new_gte,
@@ -282,7 +282,7 @@ Status Visitor::HandleBatchNormGrad(HloInstruction* batch_norm) {
   if (is_batchnorm_with_fp16_inputs) {
     new_gte = AddConvert(new_gte, F32);
   }
-  // Repackage the results. Athough this tuple is redundant when convert is not
+  // Repackage the results. Although this tuple is redundant when convert is not
   // inserted, TupleSimplifier eliminates the Tuple eventually
   std::unique_ptr<HloInstruction> replacing_tuple = HloInstruction::CreateTuple(
       {new_gte,
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc
index 4a4198f2fc9..f3fdc6b04e6 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc
@@ -72,7 +72,7 @@ void CheckInputOutputPrimitivetypeAreValid(const HloInstruction* hlo) {
 
   // The last operand is the feature index which must be int64.
   CHECK_EQ(hlo->operand(num_operands - 1)->shape().element_type(), S64)
-      << "Not yet impelemented";
+      << "Not yet implemented";
 
   // Check Outputs.
   if (hlo->shape().IsTuple()) {
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_pad_for_convolutions.cc b/tensorflow/compiler/xla/service/gpu/cudnn_pad_for_convolutions.cc
index 17c02b64db5..6a5eb226be0 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_pad_for_convolutions.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_pad_for_convolutions.cc
@@ -143,7 +143,7 @@ static std::vector<HloCustomCallInstruction*> GetRelevantConvs(
 // instruction to cuDNN convolution that may need padding to figure out the
 // desired padded input and output tensor shapes and store the desired
 // shapes in new_input_shapes and new_input_shapes.  Notice that
-// new_input_shapes is a vector for multiple input tesnsors. This function
+// new_input_shapes is a vector for multiple input tensors. This function
 // shall return true, if padding is necessary or false otherwise in addition to
 // status.
 static StatusOr<bool> ResolveAndPad(
@@ -175,7 +175,7 @@ static StatusOr<bool> ResolveAndPad(
 // Don't run this pass on GPUs without tensor cores -- it will make them slower!
 //
 // TODO(jlebar): Also pad dots.
-static StatusOr<bool> TryResolvePadedShapesForTensorCore(
+static StatusOr<bool> TryResolvePaddedShapesForTensorCore(
     HloCustomCallInstruction* conv, std::vector<Shape>* new_input_shapes_ptr,
     Shape* new_result_shape_ptr) {
   TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv));
@@ -278,7 +278,7 @@ static StatusOr<bool> TryResolvePadedShapesForTensorCore(
 
 // Adds padding to cudnn integer convolutions to make input and output feature
 // maps multiple of 4
-static StatusOr<bool> TryResolvePadedShapesForIntegerConvolution(
+static StatusOr<bool> TryResolvePaddedShapesForIntegerConvolution(
     HloCustomCallInstruction* conv, std::vector<Shape>* new_input_shapes_ptr,
     Shape* new_result_shape_ptr) {
   TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv));
@@ -390,14 +390,14 @@ StatusOr<bool> CudnnPadForConvolutions::Run(HloModule* module) {
     for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) {
       TF_ASSIGN_OR_RETURN(
           bool local_changed,
-          ResolveAndPad(conv, TryResolvePadedShapesForIntegerConvolution));
+          ResolveAndPad(conv, TryResolvePaddedShapesForIntegerConvolution));
       changed |= local_changed;
     }
     for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) {
       if (is_volta_or_later_) {
         TF_ASSIGN_OR_RETURN(
             bool local_changed,
-            ResolveAndPad(conv, TryResolvePadedShapesForTensorCore));
+            ResolveAndPad(conv, TryResolvePaddedShapesForTensorCore));
         changed |= local_changed;
       }
     }
diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc
index f707a87d79e..b4ccf758e94 100644
--- a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc
+++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc
@@ -215,7 +215,7 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) {
   // would occur if 'fusion' were merged into multiple users.
   //
   // If 'fusion' has just one user, then an earlier fusion pass chose not to
-  // fuse this producer/comsumer pair (likely because of expensive instruction
+  // fuse this producer/consumer pair (likely because of expensive instruction
   // re-use by the consumer), and so we honor that choice here as well.
   if (absl::c_any_of(fusion->fused_instructions(),
                      [](const HloInstruction* instruction) {
@@ -230,7 +230,7 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) {
 
   // Skip 'fusion' instruction if merging it into all users would result in a
   // net increase in bytes transferred (currently allowing the net bytes
-  // transferred to be exceeded up to ~10% in exhange for eliminating the
+  // transferred to be exceeded up to ~10% in exchange for eliminating the
   // overhead from a GPU kernel launch).
   const double current_bytes_transferred = GetCurrentBytesTransferred(fusion);
   const double merged_bytes_transferred = GetMergedBytesTransferred(fusion);
diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc
index 50ed7448790..47fd9bbfb09 100644
--- a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc
@@ -109,9 +109,9 @@ ENTRY MergeSharedFusionInstruction.Computation0 {
 // This is because the bytes read by Fusion2 (when replicated if the instruction
 // is merged into Fusion0 and Fusion1) would exceed the bytes transferred
 // threshold.
-TEST_F(FusionMergerTest, BytesTransferredThresholdExeceeded) {
+TEST_F(FusionMergerTest, BytesTransferredThresholdExceeded) {
   auto module = ParseAndReturnVerifiedModule(R"(
-HloModule BytesTransferredThresholdExeceeded
+HloModule BytesTransferredThresholdExceeded
 
 comp.2 {
   state.param_1.1 = (f32[4]{0}, f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0)
@@ -138,7 +138,7 @@ comp {
   ROOT add.5 = f32[4]{0} add(multiply.2, constant.param_1.1)
 }
 
-ENTRY BytesTransferredThresholdExeceeded.Computation2 {
+ENTRY BytesTransferredThresholdExceeded.Computation2 {
   constant = f32[4]{0} constant({1, 1, 1, 1})
   state = (f32[4]{0}, f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0)
   fusion.2 = f32[4]{0} fusion(state), kind=kLoop, calls=comp.2
@@ -157,9 +157,9 @@ ENTRY BytesTransferredThresholdExeceeded.Computation2 {
 // Fusion2 is merged into Fusion0 and Fusion1, because bytes read from Param by
 // Fusion2 is reduced for this test which makes the merge operation into its
 // operand below the bytes transferred threshold.
-TEST_F(FusionMergerTest, BytesTransferredThresholdNotExeceeded) {
+TEST_F(FusionMergerTest, BytesTransferredThresholdNotExceeded) {
   auto module = ParseAndReturnVerifiedModule(R"(
-HloModule BytesTransferredThresholdNotExeceeded
+HloModule BytesTransferredThresholdNotExceeded
 
 comp.2 {
   state.param_1.1 = (f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0)
@@ -184,7 +184,7 @@ comp {
   ROOT add.4 = f32[4]{0} add(multiply.2, constant.param_1.1)
 }
 
-ENTRY BytesTransferredThresholdNotExeceeded.Computation2 {
+ENTRY BytesTransferredThresholdNotExceeded.Computation2 {
   constant = f32[4]{0} constant({1, 1, 1, 1})
   state = (f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0)
   fusion.2 = f32[4]{0} fusion(state), kind=kLoop, calls=comp.2
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
index bf175999c55..71a86207987 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
@@ -294,7 +294,7 @@ StatusOr<AutotuneResult> GpuConvAlgorithmPicker::PickBestAlgorithm(
                       allocator->GetStream(stream_exec_->device_ordinal()));
   StatusOr<AutotuneResult> result_or(InternalError("Unknown platform."));
   // Check StreamExecutor on which platform it is. ROCm and Cuda implementation
-  // have diverged. Secifically, we need to make sure redzone allocator related
+  // have diverged. Specifically, we need to make sure redzone allocator related
   // utilities are not used in ROCm routine
   if (stream_exec_->platform_kind() == se::PlatformKind::kROCm) {
     result_or = PickBestAlgorithmNoCacheRocm(instr, allocator, stream);
@@ -349,7 +349,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda(
 
   optional<BufferComparator> comparator;
   // Use the first algorithm that's supported as reference. There isn't a
-  // particular reason to use it, as any algorithm sufficies. It doesn't make
+  // particular reason to use it, as any algorithm suffices. It doesn't make
   // this algorithm considered correct, though.
   se::DeviceMemoryBase reference_result_buffer;
   AlgorithmDesc first_algorithm;
@@ -462,7 +462,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda(
                    << instr->ToString() << ": " << compare_result.status();
         if (compare_result.status().code() ==
             tensorflow::error::RESOURCE_EXHAUSTED) {
-          // Possibly OOM. Propatate the error.
+          // Possibly OOM. Propagate the error.
           return compare_result.status();
         }
         CHECK(!crash_on_checking_failure);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_padding_legalization.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_padding_legalization.cc
index da090f2e5e9..5fa102ac785 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_conv_padding_legalization.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_padding_legalization.cc
@@ -213,7 +213,7 @@ bool GpuConvPaddingLegalization::CanonicalizeBackwardFilterConvolution(
   //   BackwardFilterConv(ABCD, xyz, padding_low=1, padding_high=2)
   // is equivalent to
   //   ABCD0 = Pad(ABCD, padding_high=1)
-  //   BackwardFilterConv(ABCD0, xyz, padding_low=pading_high=1)
+  //   BackwardFilterConv(ABCD0, xyz, padding_low=padding_high=1)
   // We choose the lesser of padding_low and padding_high as the new padding.
   HloInstruction* input = backward_conv->mutable_operand(0);
   Window new_backward_conv_window = backward_conv->window();
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h
index f12d47980f3..41825a33174 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h
@@ -53,7 +53,7 @@ class GpuDebugInfoManager {
 
   // Register an active module to GpuDebugInfoManager. We will keep track all
   // existing HloModules within the process.
-  // Modules with same module id can be registered and tracked seperately.
+  // Modules with same module id can be registered and tracked separately.
   void RegisterModule(
       const ModuleIdentifier& module_id, std::shared_ptr<HloModule> hlo_module,
       std::shared_ptr<const BufferAssignment> buffer_assignment);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index 93af1cd995e..a879e6faf32 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -121,8 +121,8 @@ Status GpuExecutable::CheckCompatibilityWithServiceExecutableRunOptions(
     main_stream->parent()->GetDeviceDescription().cuda_compute_capability(
         &stream_compute_compatibility.first,
         &stream_compute_compatibility.second);
-    GpuVersion nvdia_compute_compatibility = stream_compute_compatibility;
-    TF_RET_CHECK(nvdia_compute_compatibility == gpu_version_)
+    GpuVersion nvidia_compute_compatibility = stream_compute_compatibility;
+    TF_RET_CHECK(nvidia_compute_compatibility == gpu_version_)
         << "Compute capability mismatch; expected {"
         << absl::get<std::pair<int, int>>(gpu_version_).first << ", "
         << absl::get<std::pair<int, int>>(gpu_version_).second << "}, but was {"
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
index 51e86a9f8ee..ca1d11b7b7d 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
@@ -118,7 +118,7 @@ class GpuExecutable : public Executable {
   // Computes annotations for each thunk and store them in thunk_annotations_.
   void ComputeThunkAnnotations();
 
-  // GpuExecutable check with either AMD's ISA version, or Nvdia's major minor
+  // GpuExecutable check with either AMD's ISA version, or Nvidia's major minor
   // version for compute capability, depending on the hardware.
   Status CheckCompatibilityWithServiceExecutableRunOptions(
       const ServiceExecutableRunOptions* run_options);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_fusible.cc b/tensorflow/compiler/xla/service/gpu/gpu_fusible.cc
index 24738683a19..86faa807cb7 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_fusible.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_fusible.cc
@@ -138,7 +138,7 @@ bool ShapesCompatibleForMultiOutputFusion(const HloInstruction& instr1,
   };
 
   // Multi-output fusion kernels share a common parallel loop. The loop
-  // dimenstions are determined by instruction shapes.
+  // dimensions are determined by instruction shapes.
   auto get_loop_shape = [&](const HloInstruction* element_instr) {
     // Special-case reduction-to-vector ops: The loop dimensions are determined
     // by the shape of the first operand.
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_fusible.h b/tensorflow/compiler/xla/service/gpu/gpu_fusible.h
index 145975e6f49..9d5246c3600 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_fusible.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_fusible.h
@@ -37,7 +37,7 @@ bool IsLoopFusible(const HloInstruction& instr);
 
 // The code emitted for reduce-rooted input fusions (EmitReductionToVector)
 // suffers from poor data locality if the layouts of input parameters differ. In
-// such situtations it is better not to fuse. Only input params with
+// such situations it is better not to fuse. Only input params with
 // maximum rank are considered. Params with smaller ranks will be broadcasted
 // and have not been observed to cause data locality issues.
 // TODO(b/111977086): Improve reduce emitters to remove this limitation.
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index 75c9d93c63b..f4644c1765b 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@@ -87,7 +87,7 @@ HeuristicLayoutAssignment(const HloInstruction* instr,
   // We could have used a mixed layout combination, e.g. (NHWC, NCHW, NCHW),
   // which on paper gives good performance. However, there are two observations:
   // * a mixed layout combination is more cuDNN-bug prone, based on empirical
-  //   envidence.
+  //   evidence.
   // * we've also observed that for mixed layouts, cuDNN transposes data back
   //   and forth from a different layout combination. If we end up with
   //   transposes anyway, we prefer to have them in XLA, as they can be fused.
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
index 11a829a12b4..05fa798dc39 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
@@ -165,7 +165,7 @@ Status GpuTransferManager::TransferLiteralFromOutfeed(
             absl::make_unique<MutableBorrowingLiteral>(literal, index));
       });
 
-  // Give the tree of buffers to the outfeed mananger. The device will fill it
+  // Give the tree of buffers to the outfeed manager. The device will fill it
   // while we're waiting for it below.
   gpu::OutfeedManager* outfeed_manager = gpu::GetOrCreateOutfeedManager();
   outfeed_manager->EnqueueDestination(&outfeed_buffers);
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 2f8fd5e01cf..e5d5e540309 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -250,7 +250,7 @@ int ComputeMaxUnrollFactor(const HloInstruction* hlo) {
 // Otherwise, the return type is i64.
 llvm::Type* GetIndexTypeForKernel(const HloInstruction* hlo, int64 launch_size,
                                   llvm::IRBuilder<>* b) {
-  // Find the unnested hlo instructon for which the kernel is generated for.
+  // Find the unnested hlo instruction for which the kernel is generated for.
   const HloInstruction* unnested_hlo = hlo;
   const HloComputation* computation = hlo->parent();
   if (computation->IsFusionComputation()) {
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index 8df348bc5c0..fb64da6b43e 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -45,7 +45,7 @@ namespace gpu {
 // Examples of things that are not unnested computations:
 //
 //  - The reducer of a kReduce HLO.  This is emitted using IrEmitterNested.
-//  - The body of a fusion node.  IrEmitterUnenested emits the relevant code
+//  - The body of a fusion node.  IrEmitterUnnested emits the relevant code
 //    within a kernel function using FusedIrEmitter.  (FusedIrEmitter is not
 //    really an IrEmitter, but is more an "IR generator generator".)
 //
diff --git a/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h b/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h
index 2eede7036cf..218f45631f5 100644
--- a/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h
+++ b/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h
@@ -37,7 +37,7 @@ namespace gpu {
 // Currently, there are two main use cases for a tiling scheme. First, we
 // implement kernels with 0-2-1 memory transpose using shared memory to improve
 // memory access pattern. Second, we implement reduction to contiguous
-// dimensions in layout, with or without memory tranpsose, to achieve better
+// dimensions in layout, with or without memory transpose, to achieve better
 // memory access pattern as well as to reduce the need numbers of executed
 // expensive instructions, such as thread synchronization related instructions
 // and atomic operations. For both use cases, we can apply a normalization to
diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
index d7ca14ad273..8881f29fe63 100644
--- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
+++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
@@ -522,7 +522,7 @@ StatusOr<string> CompileToPtx(llvm::Module* module, GpuVersion gpu_version,
     std::unique_ptr<llvm::TargetMachine> target_machine = NVPTXGetTargetMachine(
         default_target_triple, *compute_capability, hlo_module_config);
 
-    // Link with libdeivce, and optimize the LLVM module.
+    // Link with libdevice, and optimize the LLVM module.
     TF_RETURN_IF_ERROR(LinkAndOptimizeModule(
         module, gpu_version, hlo_module_config, libdevice_dir_path,
         NVPTXTargetModuleLinker, default_target_triple, target_machine.get(),
diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
index 594a423bda9..ccb1b7311b7 100644
--- a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
@@ -136,7 +136,7 @@ std::vector<HloInstruction*> GetProducerConsumerMultiOutputFusionCandidates(
     // Do not fuse a producer if the other operands of the fusion are
     // reachable from the producer, this would create a cycle.
     auto operand_reachable_from_producer = [&](const HloInstruction* operand) {
-      // If a get-tuple-elment instruction is not in the reachability
+      // If a get-tuple-element instruction is not in the reachability
       // map, it has been created by fusion in this pass. Simply move
       // on to its operand, which is in the reachability map.
       if (!reachability.IsPresent(operand) &&
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 6635b68899d..fa01d75d35a 100755
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -99,7 +99,7 @@ string GetLibdeviceDir(const HloModuleConfig& hlo_module_config) {
       "uses routines from libdevice.",
       hlo_module_config);
 
-  // GetCudaRootCandidates always inclues ".", but but if everything fails, we
+  // GetCudaRootCandidates always includes ".", but but if everything fails, we
   // return it anyway.  Better than returning the empty string.
   return ".";
 }
diff --git a/tensorflow/compiler/xla/service/gpu/stream_executor_util.h b/tensorflow/compiler/xla/service/gpu/stream_executor_util.h
index 3e2ae241a03..684143b2d04 100644
--- a/tensorflow/compiler/xla/service/gpu/stream_executor_util.h
+++ b/tensorflow/compiler/xla/service/gpu/stream_executor_util.h
@@ -82,7 +82,7 @@ se::GpuAsmOpts PtxOptsFromConfig(const HloModuleConfig& hlo_module_config);
 // `buffer_type` determines what buffer would be filled out with.
 //
 // Precondition: `buffer_type` is a floating point type, `rng_state` needs to be
-// initalized to zero on the first use.
+// initialized to zero on the first use.
 void InitializeBuffer(se::Stream* stream, PrimitiveType buffer_type,
                       int64* rng_state, se::DeviceMemoryBase buffer);
 
diff --git a/tensorflow/compiler/xla/service/gpu/thunk_emitter.h b/tensorflow/compiler/xla/service/gpu/thunk_emitter.h
index 55d92c74794..49d71192e77 100644
--- a/tensorflow/compiler/xla/service/gpu/thunk_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/thunk_emitter.h
@@ -25,7 +25,7 @@ namespace xla {
 namespace gpu {
 
 // Implements handling of GPU execution for HLO operations that are handed off
-// to specialzied thunks that do not require code generation. Intended to be
+// to specialized thunks that do not require code generation. Intended to be
 // mixed into GPU emitters.
 class ThunkEmitter {
  public:
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index 1810accebfc..384ae272dc1 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -185,7 +185,7 @@ class BufferValueMap {
     };
 
     // If the value shows up in a root instruction, alias it with parameter
-    // intruction.
+    // instruction.
     for (const HloPosition& pos : value.positions()) {
       if (pos.instruction == module_->entry_computation()->root_instruction()) {
         ShapeIndex output_index = pos.index;
@@ -404,7 +404,7 @@ bool HloAliasAnalysis::InstructionBuffersAreDistinct(
       }
     } else {
       // It's possible for multiple values at this index to have the same
-      // HloBuffer. This does not result in non-distictness. To account for
+      // HloBuffer. This does not result in non-distinctness. To account for
       // this case, add all of the buffers at this index after checking
       // whether each buffer exists at an earlier index. This is a corner
       // case, however, as the number of values at an index is almost always
diff --git a/tensorflow/compiler/xla/service/hlo_buffer.h b/tensorflow/compiler/xla/service/hlo_buffer.h
index 91597d6f705..870a1a78994 100644
--- a/tensorflow/compiler/xla/service/hlo_buffer.h
+++ b/tensorflow/compiler/xla/service/hlo_buffer.h
@@ -54,7 +54,7 @@ namespace xla {
 // HloValue{%cond_param}.
 //
 // HloBuffers may appear at different HloPositions in the module mirroring the
-// same propery of HloValues. For example:
+// same property of HloValues. For example:
 //
 //   %sub = Sub(...)
 //   %add = Add(...)
diff --git a/tensorflow/compiler/xla/service/hlo_casting_utils.h b/tensorflow/compiler/xla/service/hlo_casting_utils.h
index 7f73bba0365..4cae37add73 100644
--- a/tensorflow/compiler/xla/service/hlo_casting_utils.h
+++ b/tensorflow/compiler/xla/service/hlo_casting_utils.h
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// Casting utilitiy functions for HLO instructions.
+// Casting utility functions for HLO instructions.
 
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CASTING_UTILS_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CASTING_UTILS_H_
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index c2bbe3ccd71..fa116ae9da1 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -838,7 +838,7 @@ bool HloComputation::Equal(const HloComputation& other,
       continue;
     }
     visited.emplace(pair);
-    // TODO(b/123082518): Avoid recursively invoking == becasue it may
+    // TODO(b/123082518): Avoid recursively invoking == because it may
     // cause a stack overflow with deeply nested subcomputations.
     bool identical_ignoring_operands = pair.first->Identical(
         *pair.second,
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index 81c6bfc3ecf..579e4360092 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -419,7 +419,7 @@ class HloComputation {
   // the HLO computation with the exception of fusion computation. A parameter
   // instruction is removable for a fusion computation.
   //
-  // Note that IsSafelyRemovable() is a necassarily condition to remove an
+  // Note that IsSafelyRemovable() is a necessarily condition to remove an
   // instruction rather than a sufficient condition. For example, instructions
   // with side-effect (e.g., Send, Infeed) may be removed from a computation,
   // but the transformation must guarantee the invariants relevant to the
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
index 10ec9ea3757..38231df1f1d 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
@@ -324,7 +324,7 @@ Status HloCostAnalysis::HandleDot(const HloInstruction* dot) {
   for (auto dim : dnums.lhs_contracting_dimensions()) {
     reduction_width *= lhs_shape.dimensions(dim);
   }
-  // Each output elment requires reduction_width FMA operations.
+  // Each output element requires reduction_width FMA operations.
   current_properties_[kFlopsKey] =
       kFmaFlops * ShapeUtil::ElementsIn(dot_shape) * reduction_width;
   return Status::OK();
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
index 8df700802b6..6da93d28079 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
@@ -149,7 +149,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor {
   // if the HLO was not found to have a cost in the analysis.
   //
   // Note that the cost for sub HLO instructions are also returned if asked. For
-  // example, body and condidition of a while, fused instructions within a
+  // example, body and condition of a while, fused instructions within a
   // fusion, or the add instruction of a reduce.
   int64 flop_count(const HloInstruction& hlo) const;
   int64 transcendental_count(const HloInstruction& hlo) const;
diff --git a/tensorflow/compiler/xla/service/hlo_cse_test.cc b/tensorflow/compiler/xla/service/hlo_cse_test.cc
index 1eb0260468c..ba27611c6b0 100644
--- a/tensorflow/compiler/xla/service/hlo_cse_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_cse_test.cc
@@ -382,9 +382,9 @@ condition=%condition.1, body=%body
 
 // Test two while loops with identical bodies and same inputs, but different
 // conditions
-TEST_F(HloCseTest, WhileLoopsIdenticalBodiesAndInputDifferntConditions) {
+TEST_F(HloCseTest, WhileLoopsIdenticalBodiesAndInputDifferentConditions) {
   const char* const hlo_string = R"(
-    HloModule WhileLoopsIdenticalBodiesAndInputDifferntConditions
+    HloModule WhileLoopsIdenticalBodiesAndInputDifferentConditions
 
     %body (param: (f32[], f32[])) -> (f32[], f32[]) {
       %param = (f32[], f32[]) parameter(0)
@@ -404,7 +404,7 @@ index=1 %add = f32[] add(f32[] %get-tuple-element, f32[] %get-tuple-element.1)
       ROOT %constant.1 = pred[] constant(true)
     }
 
-    ENTRY %WhileLoopsIdenticalBodiesAndInputDifferntConditions () -> (f32[],
+    ENTRY %WhileLoopsIdenticalBodiesAndInputDifferentConditions () -> (f32[],
 f32[]) { %constant.2 = f32[] constant(1) %constant.3 = f32[] constant(2)
       %tuple.1 = (f32[], f32[]) tuple(f32[] %constant.2, f32[] %constant.3)
       %while = (f32[], f32[]) while((f32[], f32[]) %tuple.1),
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index ecfa6703f00..11d3c5fdbd0 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -1092,7 +1092,7 @@ bool HloDataflowAnalysis::CanShareOperandBufferWithUser(
     // TODO(b/80315712): This code is in a bit of a weird intermediate state
     // at the moment. The in-place DUS check really needs to be common to all
     // backends, so it runs first. Then we run the backend-specific check if
-    // provided, or go through the target-indepdendent check if not.
+    // provided, or go through the target-independent check if not.
     // Unfortunately, the notionally "target-independent" path actually contains
     // some target-specific code, so we can't run all of it *in addition* to the
     // target-specific function, like the interface documentation says.
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
index d6617dea1c4..330779b5ebd 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
@@ -1857,7 +1857,7 @@ TEST_P(HloDataflowAnalysisTest, NestedConditionals) {
   // inner_conditional((PRED, F32[], F32[]) %param_cond):
   //   %pred_cond = GetTupleElement(%param_cond, 0)
   //   %true_operand_cond = GetTupleElement(%param_cond, 1)
-  //   %false_opearnd_cond = GetTupleElement(%param_cond, 2)
+  //   %false_operand_cond = GetTupleElement(%param_cond, 2)
   //   return Conditional(%pred_cond, %true_operand_cond, computation1,
   //                      %false_operand_cond, computation2)
   //
diff --git a/tensorflow/compiler/xla/service/hlo_domain_isolator.h b/tensorflow/compiler/xla/service/hlo_domain_isolator.h
index 2274c3a96c2..1fa996dd683 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_isolator.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_isolator.h
@@ -35,7 +35,7 @@ class HloDomainIsolator : public HloModulePass {
   // Creates a new kDomain instruction for the edge between the use instruction
   // (the first HloInstruction argument), and the operand instruction (the
   // third HloInstruction argument) if the interesting attribute of the
-  // instruction differes from the attribute of the root (the second
+  // instruction differences from the attribute of the root (the second
   // HloInstruction argument).
   // Returns nullptr in case no domain separation is necessary.
   using DomainCreator = std::function<HloInstruction*(
diff --git a/tensorflow/compiler/xla/service/hlo_domain_remover.cc b/tensorflow/compiler/xla/service/hlo_domain_remover.cc
index 4975c3fbb93..b90da35ce0e 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_remover.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_remover.cc
@@ -61,7 +61,7 @@ StatusOr<bool> HloDomainRemover::RunContext::Run() {
   VLOG(4) << "Processing metadata domain: '" << remover_->kind_ << "'";
   int64 removed_domains = 0;
   for (HloComputation* computation : module_->computations()) {
-    // First create the domain instruciton sets. A domain instruction set is
+    // First create the domain instruction sets. A domain instruction set is
     // the set of instructions whose edges never cross a kDomain instruction.
     TF_ASSIGN_OR_RETURN(std::unique_ptr<HloDomainMap> domain_map,
                         HloDomainMap::Create(computation, remover_->kind_));
diff --git a/tensorflow/compiler/xla/service/hlo_domain_test.cc b/tensorflow/compiler/xla/service/hlo_domain_test.cc
index fd4fb0246d8..c2e0c907a24 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_test.cc
@@ -617,7 +617,7 @@ ENTRY entry {
   auto tuple0 = FindInstruction(module.get(), "tuple.0");
   tuple0->clear_sharding();
 
-  // Insert the following instructons above and below tuple.0, to emulate other
+  // Insert the following instructions above and below tuple.0, to emulate other
   // passes effects:
   //                 COPY.0
   //             \    /
diff --git a/tensorflow/compiler/xla/service/hlo_domain_verifier.cc b/tensorflow/compiler/xla/service/hlo_domain_verifier.cc
index dc514ae3e5c..f8e1973d5b9 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_verifier.cc
@@ -35,7 +35,7 @@ class HloDomainVerifier::RunContext {
 
  private:
   // If the verifier caller passed an empty vector for kinds, we collect all the
-  // avalable domain types.
+  // available domain types.
   Status PopulateDomainKinds();
 
   HloModule* module_;
@@ -67,7 +67,7 @@ Status HloDomainVerifier::RunContext::Run() {
   TF_RETURN_IF_ERROR(PopulateDomainKinds());
   for (HloComputation* computation : module_->computations()) {
     for (auto& kind : verifier_->kinds_) {
-      // First create the domain instruciton sets. A domain instruction set is
+      // First create the domain instruction sets. A domain instruction set is
       // the set of instructions whose edges never cross a kDomain instruction.
       TF_ASSIGN_OR_RETURN(std::unique_ptr<HloDomainMap> domain_map,
                           HloDomainMap::Create(computation, kind));
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 2145be59aca..b2435d3fdf3 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -1133,7 +1133,7 @@ bool CopyDataFromInput(const Literal& input_literal, int64 input_start,
   auto base_case = [&](int64 axis, int64 dst_index, int64 src_index,
                        bool within_src_bounds) {
     if (axis == 0) {
-      // For IRFFT, the negavie frequencies are only needed for the sweep along
+      // For IRFFT, the negative frequencies are only needed for the sweep along
       // the X axis, which is performed last. Leave this part of the working set
       // uninitialized until then.
       const int64 length = fft_lengths[axis];
@@ -1684,7 +1684,7 @@ class OutputOffsetIndexToInputIndex {
   std::vector<int64> input_index_;
 };
 
-// Rehapes the gather indices input to have a trailing degenerate `1` dimension
+// Reshapes the gather indices input to have a trailing degenerate `1` dimension
 // if necessary.  Hands over the ownership of the newly created literal (if
 // there is one) to `reshaped_start_indices`.
 static StatusOr<std::reference_wrapper<const Literal>> ReshapedGatherIndices(
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index de5a9aa4c2c..fc9d42c1b17 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h
@@ -253,7 +253,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   Status HandleCustomCall(HloInstruction* custom_call) override;
 
   // Unsupported HLOs, note some of them (such as BatchNorm*) are typically
-  // expanded in a semantic-preserving way into other HLOs by adding exanpsion
+  // expanded in a semantic-preserving way into other HLOs by adding expansion
   // HLO pass to the HLO optimization pass during compilation, which can then be
   // handled by the evaluator.
   Status HandleBatchNormGrad(HloInstruction* batch_norm_grad) override {
@@ -304,7 +304,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   //
   // TODO(b/35950897): have better memory management here to free instructions
   // that are no longer a parent for any other subsequent instruction in
-  // post-orderring.
+  // post-ordering.
   //
   // Must be cleared for each evaluation.
   //
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 507867c013d..516a4283448 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -387,7 +387,7 @@ class HloDotDumper {
   const HloExecutionProfile* profile_;  // may be null
   const NodeFilter filter_;
 
-  // Each HloInstruction dumped gets a monotically-increasing node ID.  This
+  // Each HloInstruction dumped gets a monotonically-increasing node ID.  This
   // must start at 1, because that's where graphviz's accounting starts.
   int64 next_node_id_ = 1;
   absl::flat_hash_map<const HloInstruction*, int64> node_ids_;
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
index 6bd34f8a127..689007ff9ab 100644
--- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
+++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
@@ -81,8 +81,8 @@ class HloInputOutputAliasConfig {
   // Checks whether the provided output index has already been aliased.
   bool OutputHasAlias(const ShapeIndex& output_index) const;
 
-  // (De)Serializes an HloInputOutoutAliasConfig to/from an
-  // HloInputOutoutAliasProto.
+  // (De)Serializes an HloInputOutputAliasConfig to/from an
+  // HloInputOutputAliasProto.
   HloInputOutputAliasProto ToProto() const;
 
   static StatusOr<HloInputOutputAliasConfig> CreateFromProto(
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index bc099371d08..7b20b3d6b66 100755
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -1637,7 +1637,7 @@ HloInstruction::~HloInstruction() {
     operands_[operand_num] = nullptr;
   }
 
-  // Update users. Set `nullptr` to the correpsonding operand slot for users.
+  // Update users. Set `nullptr` to the corresponding operand slot for users.
   for (auto& user : this->users()) {
     for (int i = 0; i < user->operand_count(); ++i) {
       if (user->operands_[i] == this) {
@@ -2693,7 +2693,7 @@ bool HloInstruction::IsFusible() const {
     case HloOpcode::kReduce:
     case HloOpcode::kReduceWindow:
       return true;
-    // Side effecting instrutions cannot be fused.
+    // Side effecting instructions cannot be fused.
     default:
       return !HasSideEffect();
   }
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 238a96e52a0..ba9fdbbe4c2 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -643,7 +643,7 @@ class HloInstruction {
       const std::vector<ReplicaGroup>& replica_groups,
       const absl::optional<int64>& channel_id);
 
-  // Creates a communitation instructions that permutes data cross replicas.
+  // Creates a communication instructions that permutes data cross replicas.
   // Data is sent/received according to the (source_replica_id,
   // target_replica_id) pairs in `source_target_pairs`. If a replica id is not a
   // target_replica_id in any pair, the output on that replica is a tensor
diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index a9d9eb9cfa4..e5735bea843 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -1545,7 +1545,7 @@ TEST_F(HloInstructionTest, StringifyScatter) {
       "to_apply=%Scatter.update");
 }
 
-TEST_F(HloInstructionTest, CanonnicalStringificationFusion) {
+TEST_F(HloInstructionTest, CanonicalStringificationFusion) {
   // Tests stringification of a simple op, fusion, while, and conditional.
   const Shape s1 = ShapeUtil::MakeShape(F32, {5, 10});
   const Shape s2 = ShapeUtil::MakeShape(F32, {20, 10});
@@ -1587,7 +1587,7 @@ TEST_F(HloInstructionTest, CanonnicalStringificationFusion) {
   EXPECT_EQ(fusion->ToString(options), expected_fusion);
 }
 
-TEST_F(HloInstructionTest, CanonnicalStringificationWhile) {
+TEST_F(HloInstructionTest, CanonicalStringificationWhile) {
   // Tests stringification of a simple op, fusion, while, and conditional.
   const Shape s1 = ShapeUtil::MakeShape(F32, {5, 10});
   const Shape s2 = ShapeUtil::MakeShape(F32, {20, 10});
@@ -1643,7 +1643,7 @@ TEST_F(HloInstructionTest, CanonnicalStringificationWhile) {
   EXPECT_EQ(loop->ToString(options), expected_loop);
 }
 
-TEST_F(HloInstructionTest, CanonnicalStringificationConditional) {
+TEST_F(HloInstructionTest, CanonicalStringificationConditional) {
   // Tests stringification of a simple op, fusion, while, and conditional.
   const Shape s1 = ShapeUtil::MakeShape(F32, {5, 10});
   const Shape s2 = ShapeUtil::MakeShape(F32, {20, 10});
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index a150efd8c83..94b5926d876 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -1356,7 +1356,7 @@ void HloFusionInstruction::MergeFusionInstructionIntoMultiOutput(
     HloFusionInstruction* instruction_to_merge) {
   // Add all non-parameter fused instructions to 'unfused_instructions' to be
   // merged into 'this'. `old_to_new' maps the instructions in the fused node
-  // to the disaseembled fusion instructions.
+  // to the disassembled fusion instructions.
   // Note that we add the unfused instructions to this->parent_ computation.
   // This is necessary because the unique_id needs for an instruction and
   // it's only added when inserting to the computation.
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 1863c78e7e1..75c7dd9f1ff 100755
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -768,7 +768,7 @@ class HloFusionInstruction : public HloInstruction {
   // Merges the fused instructions from 'instruction_to_merge' into the
   // fused instruction set of 'this', updating operands as necessary.
   //
-  // Predondition: 'instruction_to_merge' must be an operand of 'this'.
+  // Precondition: 'instruction_to_merge' must be an operand of 'this'.
   void MergeFusionInstruction(HloFusionInstruction* instruction_to_merge);
 
   // Merges the fused instructions from instruction_to_merge into the fused
diff --git a/tensorflow/compiler/xla/service/hlo_live_range_test.cc b/tensorflow/compiler/xla/service/hlo_live_range_test.cc
index 232c6b95e88..e2d320beffd 100644
--- a/tensorflow/compiler/xla/service/hlo_live_range_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_live_range_test.cc
@@ -144,7 +144,7 @@ TEST_F(HloLiveRangeTest, MultiplyAdd) {
 }
 
 TEST_F(HloLiveRangeTest, LiveOutBuffers) {
-  // If a buffer is live out, its life range is extened to the end of
+  // If a buffer is live out, its life range is extended to the end of
   // computation.
   auto builder = HloComputation::Builder(TestName());
   auto paramA = builder.AddInstruction(
@@ -181,7 +181,7 @@ TEST_F(HloLiveRangeTest, LiveOutBuffers) {
 }
 
 TEST_F(HloLiveRangeTest, InstructionScheduledAfterRoot) {
-  // If a buffer is live out, its life range is extened to the end of
+  // If a buffer is live out, its life range is extended to the end of
   // computation.
   auto builder = HloComputation::Builder(TestName());
   auto paramA = builder.AddInstruction(
diff --git a/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc b/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc
index e14bcfa7f67..994c6628f43 100644
--- a/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc
@@ -113,7 +113,7 @@ void MarkLiveAtAllIndices(const HloInstruction* instruction,
 // Propagates liveness through Tuple instructions.
 // *) For each tuple operand:
 //   *) For tuple output shape index associated with operand:
-//     *) Propgate live shape indices to tuple operand at the associated
+//     *) Propagate live shape indices to tuple operand at the associated
 //        shape index in the operands output, and add to worklist.
 void PropagateLivenessThroughTuple(
     const HloInstruction* instruction,
@@ -260,7 +260,7 @@ HloLivenessAnalysis::HloLivenessAnalysis(const HloModule& module)
 void HloLivenessAnalysis::RunAnalysis() {
   Worklist worklist;
   Workset workset;
-  // Add entry compuation root instruction.
+  // Add entry computation root instruction.
   MarkLiveAtAllIndices(module_.entry_computation()->root_instruction(),
                        &live_index_map_, &worklist, &workset);
   for (auto* computation : module_.computations()) {
diff --git a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc
index 35db6aa0635..03d353aa1e0 100644
--- a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc
@@ -136,7 +136,7 @@ TEST_F(HloLivenessAnalysisTest, NestedTupleAtEntryRoot) {
   EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "constant.3"), {}));
 }
 
-// Tests that GTE at entry root of Tuple instruction only propgates liveness
+// Tests that GTE at entry root of Tuple instruction only propagates liveness
 // to the live elements in tuple.
 TEST_F(HloLivenessAnalysisTest, GteOfTuple) {
   auto module = ParseAndReturnVerifiedModule(R"(
@@ -158,7 +158,7 @@ TEST_F(HloLivenessAnalysisTest, GteOfTuple) {
   EXPECT_FALSE(liveness.IsLive(GetInstruction(module.get(), "constant.2"), {}));
 }
 
-// Tests that GTE at entry root of nested Tuple instruction only propgates
+// Tests that GTE at entry root of nested Tuple instruction only propagates
 // liveness to the live elements in tuple.
 TEST_F(HloLivenessAnalysisTest, GteOfNestedTuple) {
   auto module = ParseAndReturnVerifiedModule(R"(
@@ -196,7 +196,7 @@ TEST_F(HloLivenessAnalysisTest, GteOfNestedTuple) {
 }
 
 // Tests that GTE of GTE (at entry root) of nested Tuple instruction only
-// propgates liveness to the live elements in tuple.
+// propagates liveness to the live elements in tuple.
 TEST_F(HloLivenessAnalysisTest, GteOfGteOfNestedTuple) {
   auto module = ParseAndReturnVerifiedModule(R"(
   HloModule SimpleModule
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
index 50eaee95455..bda297540ff 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
@@ -68,7 +68,7 @@ using ::tensorflow::strings::HumanReadableNumBytes;
 //   A D E F B C G
 // , which has a maximum memory usage of 6 (B is alive while F is executing).
 //
-// An optimal way to shedule the previous graph is:
+// An optimal way to schedule the previous graph is:
 //   A B C D E F G
 // , which has a maximum memory usage of 5 (when F is executing).
 //
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index 3e9630a13c4..5e662e0bebc 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -286,7 +286,7 @@ class HloModule {
   // Returns true if the module has a schedule set.
   bool has_schedule() const { return schedule_.has_value(); }
 
-  // Returns the schedue of the module. CHECK fails if no schedule is set.
+  // Returns the schedule of the module. CHECK fails if no schedule is set.
   const HloSchedule& schedule() const { return *schedule_; }
   HloSchedule& schedule() { return *schedule_; }
 
diff --git a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
index dba699dd8c5..301faa75f0a 100644
--- a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
@@ -187,7 +187,7 @@ TEST_F(HloModuleDceTest, OneWhileWithDeadTupleElement) {
 }
 
 // Tests that a tuple element {1} used by condition computation (which appears
-// dead in while.body{1} and at while.result{1}) propgates liveness of this
+// dead in while.body{1} and at while.result{1}) propagates liveness of this
 // tuple element to while.body{1} and at while.result{1}.
 TEST_F(HloModuleDceTest, OneWhileWithTupleElementUsedByCond) {
   auto module = ParseAndReturnVerifiedModule(R"(
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.h b/tensorflow/compiler/xla/service/hlo_module_group_util.h
index d388fe51d0d..12a4614412a 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_util.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group_util.h
@@ -103,7 +103,7 @@ class HloModuleGroupUtil {
       absl::Span<HloComputation* const> computations);
 
   // Updates the reachability of the given instruction, taking the global
-  // predeccessorss and successors into account.
+  // predecessors and successors into account.
   void UpdateReachabilityThroughInstruction(
       HloInstruction* instruction, HloReachabilityMap* reachability_map);
 
diff --git a/tensorflow/compiler/xla/service/hlo_ordering_test.cc b/tensorflow/compiler/xla/service/hlo_ordering_test.cc
index 2b77619f89b..f8295d579fb 100644
--- a/tensorflow/compiler/xla/service/hlo_ordering_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_ordering_test.cc
@@ -506,7 +506,7 @@ TEST_F(HloOrderingTest, InterferenceWithOuterRoot) {
   absl::string_view hlo_string = R"(
 HloModule InterferenceWithOuterRoot, is_scheduled=true
 
-Emmbedded (embedded_param: f32[4096,4096]) -> f32[4096,4096] {
+Embedded (embedded_param: f32[4096,4096]) -> f32[4096,4096] {
   embedded_param = f32[4096,4096]{1,0} parameter(0)
   multiply = f32[4096,4096]{1,0} multiply(embedded_param, embedded_param)
   ROOT log = f32[4096,4096]{1,0} log(multiply)
@@ -515,7 +515,7 @@ Emmbedded (embedded_param: f32[4096,4096]) -> f32[4096,4096] {
 ENTRY InterferenceWithOuterRoot {
   param = f32[4096,4096]{1,0} parameter(0)
   ROOT add = f32[4096,4096]{1,0} add(param, param)
-  call = f32[4096,4096]{1,0} call(param), to_apply=Emmbedded
+  call = f32[4096,4096]{1,0} call(param), to_apply=Embedded
 }
 
 )";
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 3ecd0af3480..b05f76a1d29 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -294,7 +294,7 @@ class HloParserImpl : public HloParser {
 
   // Parses a sub-attribute of the window attribute, e.g.,size=1x2x3.
   bool ParseDxD(const std::string& name, std::vector<int64>* result);
-  // Parses window's pad sub-attriute, e.g., pad=0_0x3x3.
+  // Parses window's pad sub-attribute, e.g., pad=0_0x3x3.
   bool ParseWindowPad(std::vector<std::vector<int64>>* pad);
 
   bool ParseSliceRanges(SliceRanges* result);
@@ -2297,7 +2297,7 @@ bool HloParserImpl::ParseTupleLiteral(Literal* literal, const Shape& shape) {
     // literal, (',' literal)*
     for (int i = 0; i < elements.size(); i++) {
       if (i > 0) {
-        ParseToken(TokKind::kComma, "exepcts ',' to separate tuple elements");
+        ParseToken(TokKind::kComma, "expects ',' to separate tuple elements");
       }
       if (!ParseLiteral(&elements[i],
                         ShapeUtil::GetTupleElementShape(shape, i))) {
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index 29a6a5e4297..d65613fc4b8 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1203,7 +1203,7 @@ ENTRY Rng {
 },
 // Reduce precision
 {
-"ReducePrevison",
+"ReducePrecision",
 R"(HloModule reduce_precision
 
 ENTRY ReducePrecision {
@@ -2095,7 +2095,7 @@ ENTRY %ShortConstant.v4 () -> f32[67,89] {
   EXPECT_EQ(result.ValueOrDie()->ToString(HloPrintOptions()), original);
 }
 
-TEST_F(HloParserTest, AttibutesAnyOrder) {
+TEST_F(HloParserTest, AttributesAnyOrder) {
   const string original = R"(HloModule any_order_module
 
 ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,4,1] {
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
index e18521811c0..166ba1b0d99 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc
@@ -457,7 +457,7 @@ TEST_P(IndirectUseTest, IndirectUseNotRematerialized) {
   //   F32[1024] %call = call(Subcomputation, {%add_1})
   //   F32[1024] %add_2 = add(%bcast, call)
   //   {F32[1024], F32[1024]} %tuple = tuple(%bcast, %add_2)
-  //   F32[1024] %gte = GetTupleElememt(%tuple, 0)
+  //   F32[1024] %gte = GetTupleElement(%tuple, 0)
   //   F32[1024] %negate = negate(%gte)
   //
   // Subcomputation:
diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h
index c077ccd95fe..3b5a80ce33b 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.h
+++ b/tensorflow/compiler/xla/service/hlo_runner.h
@@ -80,7 +80,7 @@ class HloRunner {
     bool run_hlo_passes = false;
 
     // If true, executes on multiple threads using se::Stream::ExecuteOnStream.
-    // Othewise, executes using xla::Executable::ExecuteOnStreams.
+    // Otherwise, executes using xla::Executable::ExecuteOnStreams.
     bool use_threads = false;
   };
 
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h
index 90a80a4421b..56479add95f 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.h
+++ b/tensorflow/compiler/xla/service/hlo_sharding.h
@@ -120,7 +120,7 @@ class HloSharding {
 
   // Retrieves a histogram of the devices used by the sharding. The returned
   // map has the device number as key, and the occurrence count as value.
-  // If a sharding does not have a device, it will not be incuded in the
+  // If a sharding does not have a device, it will not be included in the
   // histogram. The count argument, if not nullptr, will receive the total
   // number of elements this sharding is made of (one for array, N leaves for
   // tuples).
diff --git a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
index 094d98bc6e5..837483268f3 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
@@ -310,7 +310,7 @@ StatusOr<bool> ApplyShardingFromUsers(HloInstruction* instruction,
 }
 
 // Tries to propagate the sharding information into the instructions that are
-// part of the domain, in a reverse post order manner (users propoagate to
+// part of the domain, in a reverse post order manner (users propagate to
 // instruction).
 StatusOr<int64> ApplyDomainShardingPass(const DomainMetadata::Domain& domain,
                                         const HloSharding& domain_sharding) {
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
index 4a325e5cb5b..015246c8cae 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
@@ -886,7 +886,7 @@ IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp(HloOpcode opcode,
 
   // To figure out the broadcast dimensions for the (constant) source for the
   // scalar-indexed node, we "simulate" the index transformation done by the
-  // existing broadcsat:
+  // existing broadcast:
   enum class IndexComponent { Broadcasted, NotBroadcasted };
   std::vector<IndexComponent> simulated_index(
       broadcast_instr->shape().dimensions_size(), IndexComponent::Broadcasted);
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc
index 5478c4a9291..d64d64eb5ee 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc
@@ -35,7 +35,7 @@ class IndexedArrayAnalysisTest : public HloTestBase {
   }
 
  private:
-  // Replaces seqences of whitespace with a single space.  This makes the
+  // Replaces sequences of whitespace with a single space.  This makes the
   // strings being matched against "whitespace insensitive" which lets us indent
   // them for readability.
   string CanonicalizeWhitespace(const string& text) {
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index defaf4cd7ab..c1bbf791c73 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -607,7 +607,7 @@ Status LayoutAssignment::AddMandatoryConstraints(
           body_layout.result_shape(), instruction));
     } else if (instruction->opcode() == HloOpcode::kConditional) {
       // Find the conditional branch with the most instructions and force all
-      // other computations to match that layout. A potentially better decison
+      // other computations to match that layout. A potentially better decision
       // could count the number FLOPs or how constrained the layouts are.
       int64 largest_branch = 0;
       int64 largest_instruction_count =
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index a0f61fc416d..ef30ec3088b 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -456,7 +456,7 @@ class LayoutAssignment : public HloModulePass {
   // when the instruction is a tuple, and in such case the index represents
   // the location from where the copy instruction was created from.
   // If the index is empty, the whole sharding will be propagated, even in case
-  // the intruction has a tuple sharding.
+  // the instruction has a tuple sharding.
   static void SetupCopiedInstruction(const HloInstruction& instruction,
                                      HloInstruction* copy,
                                      const ShapeIndex& index);
@@ -508,7 +508,7 @@ class LayoutAssignment : public HloModulePass {
   // instructions can be set to match the computation.
   std::map<HloComputation*, ComputationLayout> computation_layouts_;
 
-  // Map from branch computations to the result layout they shuould apply.
+  // Map from branch computations to the result layout they should apply.
   std::map<HloComputation*, ComputationLayout> conditional_mismatch_;
 
   // Every copy added to the module by the layout assignment pass is registered
diff --git a/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc b/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc
index ba199f35712..77ce26c7e84 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc
@@ -24,7 +24,7 @@ namespace xla {
 namespace llvm_ir {
 
 bool MayBeImplementedAsInPlaceDynamicUpdateSlice(const HloInstruction* instr) {
-  // Today we can't emit a dynamic-update-slice if the DUS node is parallized;
+  // Today we can't emit a dynamic-update-slice if the DUS node is parallelized;
   // the emitter will not emit correct code.  It's possible to change this, but
   // then ParallelTaskAssigner would have to somehow know whether a node *will*
   // be emitted as an in-place DUS, and it can't, because it doesn't have a
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
index c4d527b6cbf..aa37b9e7be9 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
@@ -612,7 +612,7 @@ llvm::Function* CreateCpuFunction(llvm::FunctionType* function_type,
   // set.
   function->addFnAttr("denormal-fp-math", "preserve-sign");
 
-  // Add the optize attribute to the function if optimizing for size. This
+  // Add the optimize attribute to the function if optimizing for size. This
   // controls internal behavior of some optimization passes (e.g. loop
   // unrolling).
   if (cpu::options::OptimizeForSizeRequested(module_config)) {
diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc
index c1dc635eb81..669403fb8ad 100644
--- a/tensorflow/compiler/xla/service/memory_space_assignment.cc
+++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc
@@ -1195,7 +1195,7 @@ Status MemorySpaceAssignment::SimplifyGraph() {
             instruction->user_count() == 0 && !instruction->HasSideEffect() &&
             instruction != computation->root_instruction()) {
           VLOG(4) << "Instruction removed: " << instruction->ToString();
-          // Ensure the exported preset assignments don't contain a refence to
+          // Ensure the exported preset assignments don't contain a reference to
           // the removed instruction.
           preset_assignments_->RemoveAssignmentForInstruction(instruction);
           // Instead of deleting the instruction from the schedule, replace it
diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.h b/tensorflow/compiler/xla/service/memory_space_assignment.h
index 20551feb715..08d03cc8655 100644
--- a/tensorflow/compiler/xla/service/memory_space_assignment.h
+++ b/tensorflow/compiler/xla/service/memory_space_assignment.h
@@ -493,7 +493,7 @@ class MemorySpaceAssignment {
 
 // This struct contains mandatory memory assignments at a given time. E.g., an
 // input's required memory assignment time would correspond to the definition
-// time of the parameter instruction, and an output's time would correspnd to
+// time of the parameter instruction, and an output's time would correspond to
 // the time of last use.
 struct RequiredMemoryAssignment {
   MemorySpaceAssignment::MemorySpace memory_space;
diff --git a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc
index 068834e5701..99c9df890d8 100644
--- a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc
@@ -1171,7 +1171,7 @@ TEST_P(MemorySpaceAssignmentTest, NonEntryComputationSchedule5) {
   //
   // If a copy to alternate memory is inserted before foo, and if the size of
   // the while body is less than max prefetch interval so that the copy-done is
-  // kept in the alternate memory, then we end up refering to the copy-done in
+  // kept in the alternate memory, then we end up referring to the copy-done in
   // the root instruction of the while loop body. I.e.,
   //
   // cs = copy-start(a)
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc
index d7300f58364..84e239ae196 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc
@@ -285,7 +285,7 @@ mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size,
 // operations of their parent loop, and `where` must be an ancestor of that
 // parent loop.
 //
-// It always preseves the semantics of the program, therefore it may modify the
+// It always preserves the semantics of the program, therefore it may modify the
 // hoisted operations or add extra loops at the hoisted place.
 mlir::Operation* HoistAndFix(llvm::iplist<mlir::Operation>::iterator begin_op,
                              llvm::iplist<mlir::Operation>::iterator end_op,
@@ -618,7 +618,7 @@ StatusOr<TransformedMlirConvAnchors> TransformMlirConv(
   output_acc = llvm::cast<mlir::AllocOp>(
       HoistAndFix(output_acc, tiled_cartesian_loops.front()));
 
-  // Hoist everyting before reduction loops (aka zero initializations of
+  // Hoist everything before reduction loops (aka zero initializations of
   // output_acc):
   //   for (cartesian loops...) {
   //     %output_acc = alloc() : memref(..., f32)
@@ -752,7 +752,7 @@ StatusOr<mlir::FuncOp> EmitConvolutionForwardAsMlir(
 
   // TODO(timshen): Implement a transformation that collects loads to a given
   // buffer, create a local alloc() for the accessed part, redirects all loads
-  // and stores to that local alloc(), and create code to ininitialize /
+  // and stores to that local alloc(), and create code to initialize /
   // writeback the local alloc() if needed.
 
   // TODO(timshen): Implement CUDA-specific lowering.
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h
index c8bc0a15acd..f0b95876775 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h
+++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h
@@ -24,7 +24,7 @@ namespace mlir_gpu {
 
 // Builds MLIR using custom_call that represents a foward convolution.
 //
-// The generated function has the following signautre:
+// The generated function has the following signature:
 // func @<function_name>(%output: memref<physical_layout...>,
 //                       %input: memref<physical_layout...>,
 //                       %filter: memref<physical_layout...>) { ... }
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.cc b/tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.cc
index 4107d92da7e..7855f1da1cf 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.cc
@@ -104,7 +104,7 @@ FailoverCompiler::CompileAheadOfTime(
     const AotCompilationOptions& options) {
   // This is not supported by GPU compiler anyway.
   return Unimplemented(
-      "CompileAheadOfTime not implemeneted in failover compiler!");
+      "CompileAheadOfTime not implemented in failover compiler!");
 }
 
 HloCostAnalysis::ShapeSizeFunction FailoverCompiler::ShapeSizeBytesFunction()
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc
index 92f7e5a08ac..ab880b3e110 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc
@@ -230,7 +230,7 @@ static StatusOr<std::vector<const HloInstruction*>> ComputeOperandToValueMap(
       has_failed = true;
       continue;
     }
-    // host_index is the argument positon to the surrounding function that
+    // host_index is the argument position to the surrounding function that
     // contains the launch. This index corresponds to HLO operand indices
     // by construction.
     auto host_index = launchop_operand->getArgNumber();
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.h b/tensorflow/compiler/xla/service/multi_output_fusion.h
index 403b5dfaff9..9be69f808c4 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.h
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.h
@@ -79,7 +79,7 @@ class MultiOutputFusion : public HloModulePass {
   // Test if it's legal to fuse instr1 and instr2 into one fusion instruction.
   virtual bool LegalToFuse(HloInstruction* instr1, HloInstruction* instr2);
 
-  // Fuse HloInstrctuion instr1 and instr2 and return the fused instruction.
+  // Fuse HloInstruction instr1 and instr2 and return the fused instruction.
   // The other instruction is removed from its parent computation.
   virtual HloInstruction* Fuse(HloInstruction* instr1, HloInstruction* instr2);
 
diff --git a/tensorflow/compiler/xla/service/op_expander_pass.h b/tensorflow/compiler/xla/service/op_expander_pass.h
index 276e3d70b8e..49b3ba07031 100644
--- a/tensorflow/compiler/xla/service/op_expander_pass.h
+++ b/tensorflow/compiler/xla/service/op_expander_pass.h
@@ -34,7 +34,7 @@ class OpExpanderPass : public HloModulePass {
   virtual bool InstructionMatchesPattern(HloInstruction* instruction) = 0;
 
   // Returns a replacement for `instruction`, or nullptr if no replacement is
-  // neeeded (e.g. only the to_apply subcomputation of the instruction was
+  // needed (e.g. only the to_apply subcomputation of the instruction was
   // modified).
   virtual StatusOr<HloInstruction*> ExpandInstruction(
       HloInstruction* instruction) = 0;
diff --git a/tensorflow/compiler/xla/service/reshape_mover.cc b/tensorflow/compiler/xla/service/reshape_mover.cc
index 9e2d7406940..cd11b211747 100644
--- a/tensorflow/compiler/xla/service/reshape_mover.cc
+++ b/tensorflow/compiler/xla/service/reshape_mover.cc
@@ -80,7 +80,7 @@ bool CanTriviallyChangeShape(const HloInstruction* instruction) {
     return true;
   }
 
-  // A broadcase of scalar can trivially change its shape.
+  // A broadcast of scalar can trivially change its shape.
   if (instruction->opcode() == HloOpcode::kBroadcast &&
       ShapeUtil::IsScalar(instruction->operand(0)->shape())) {
     return true;
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 345a077e321..e12e1577211 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -660,7 +660,7 @@ Status Service::ExecuteGraphParallel(const ExecuteGraphParallelRequest* arg,
     const ExecuteGraphRequest& request = arg->requests(i);
     TF_RET_CHECK(request.has_computation()) << "computations may not be empty";
     TF_RET_CHECK(request.computation().has_host_program_shape())
-        << "programe shape may not be empty";
+        << "program shape may not be empty";
 
     // Get the executors.
     TF_ASSIGN_OR_RETURN(auto executors, GetExecutors(execution_options,
@@ -837,7 +837,7 @@ Status Service::Compile(const CompileRequest* arg, CompileResponse* result) {
     return InvalidArgument("computations may not be empty");
   }
   if (!arg->computation().has_host_program_shape()) {
-    return InvalidArgument("programe shape may not be empty");
+    return InvalidArgument("program shape may not be empty");
   }
 
   if (arg->execution_options().device_handles_size() > 1) {
@@ -887,7 +887,7 @@ Status Service::Execute(const ExecuteRequest* arg, ExecuteResponse* result) {
       ResolveAndValidateArguments(arg->arguments(), replicas));
 
   // Check that the replicated_arguments has the same shape and layout as the
-  // module config used when creating the exectuable.
+  // module config used when creating the executable.
   const int64 num_module_args =
       executable->module_config().entry_computation_layout().parameter_count();
   if (num_module_args != arg->arguments_size()) {
@@ -902,7 +902,7 @@ Status Service::Execute(const ExecuteRequest* arg, ExecuteResponse* result) {
     const Shape& shape_arg = replicated_arguments.front()[i]->on_host_shape();
     if (!ShapeUtil::Equal(shape_module, shape_arg)) {
       return InvalidArgumentStrCat(
-          "The executable exepcts the ", i, "th argument in shape ",
+          "The executable expects the ", i, "th argument in shape ",
           ShapeUtil::HumanStringWithLayout(shape_module), " but sees ",
           ShapeUtil::HumanStringWithLayout(shape_arg));
     }
diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index 3e345448a47..b189e047254 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc
@@ -1354,7 +1354,7 @@ TEST_F(ShapeInferenceTest, DotWithTwoContractingDimsPasses) {
 }
 
 // BatchMatMul with different batch dimension sizes fails.
-TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimSizesFails) {
+TEST_F(ShapeInferenceTest, DotWithMismatchedBatchDimSizesFails) {
   Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3});
   Shape rhs_shape = ShapeUtil::MakeShape(F32, {3, 3, 14});
 
@@ -1373,7 +1373,7 @@ TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimSizesFails) {
 }
 
 // BatchMatMul with different batch dimension numbers passes
-TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimNumbersPasses) {
+TEST_F(ShapeInferenceTest, DotWithMismatchedBatchDimNumbersPasses) {
   Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3});
   Shape rhs_shape = ShapeUtil::MakeShape(F32, {3, 2, 14});
 
diff --git a/tensorflow/compiler/xla/service/tree_reduction_rewriter.h b/tensorflow/compiler/xla/service/tree_reduction_rewriter.h
index a9852d88a6e..d6e1d4200e9 100644
--- a/tensorflow/compiler/xla/service/tree_reduction_rewriter.h
+++ b/tensorflow/compiler/xla/service/tree_reduction_rewriter.h
@@ -35,7 +35,7 @@ namespace xla {
 //
 // Applying this pass until a fixed point performs a variant of pairwise
 // summation (https://en.wikipedia.org/wiki/Pairwise_summation), which is
-// guaranteed to have an assymptotically smaller error bound provided that
+// guaranteed to have an asymptotically smaller error bound provided that
 // intermediate roundoff errors are random and have random sign.
 //
 // If this pass lowers the performance too much, the window size can always be
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
index cb589326ba7..c223378b332 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
@@ -302,7 +302,7 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault {
   // Information kept per instruction
   struct PerInstruction {
     std::unique_ptr<PointsToSet> points_to_set;
-    // Empircally, ~92% of instructions have 1
+    // Empirically, ~92% of instructions have 1
     // instruction_defined_buffer, and 99% have 0 or 1
     BufferDefinitionVector instruction_defined_buffers;
   };
diff --git a/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc
index 8b381dec073..1f2dcda288a 100644
--- a/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc
+++ b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc
@@ -112,7 +112,7 @@ StatusOr<bool> WhileLoopConstantSinking::Run(HloModule* module) {
   bool changed = false;
   std::vector<HloInstruction*> while_instrs;
   for (auto* comp : module->MakeNonfusionComputations()) {
-    // Right now we don't particulary care about optimizing while-of-while
+    // Right now we don't particularly care about optimizing while-of-while
     // patterns.  If/When we do, we'll want to visit the outer while (while_0)
     // before we visit the inner while (while_1):
     //
diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
index 351feec6bb7..2d33184b7d0 100644
--- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
+++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
@@ -317,7 +317,7 @@ StatusOr<bool> WhileLoopInvariantCodeMotion::Run(HloModule* module) {
     // TryHoistingInvariantInstructionsFromWhileBody can be generalized to
     // optimize the condition computation too, if needed.
     //
-    // The transform we do here is a pessmization for while loops that execute
+    // The transform we do here is a pessimization for while loops that execute
     // zero times*, but at this time we expect those to be rare.  If this
     // becomes a problem we can consider using the conditional HLO to avoid
     // doing extra work for while loops with zero trip count.
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc
index 8ec6e40044c..cff0fd458e5 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc
@@ -126,7 +126,7 @@ WhileLoopSimplifierTest::MakeModuleWithSimpleLoopTupleElementLoopBound(
   return ParseAndReturnVerifiedModule(hlo_string).ValueOrDie();
 }
 
-TEST_F(WhileLoopSimplifierTest, LoopWithZeroIterationSimiplified) {
+TEST_F(WhileLoopSimplifierTest, LoopWithZeroIterationSimplified) {
   auto m = MakeModuleWithSimpleLoop(/*num_iters=*/0);
   ASSERT_TRUE(WhileLoopSimplifier().Run(m.get()).ValueOrDie());
   EXPECT_THAT(m->entry_computation()->root_instruction(),
diff --git a/tensorflow/compiler/xla/shape_test.cc b/tensorflow/compiler/xla/shape_test.cc
index aa6c7d10989..47680a6ba32 100644
--- a/tensorflow/compiler/xla/shape_test.cc
+++ b/tensorflow/compiler/xla/shape_test.cc
@@ -45,13 +45,13 @@ class ShapeTest : public ::testing::Test {
       ShapeUtil::MakeTupleShape({opaque_, scalar_, matrix_, matrix2_});
   const Shape nested_tuple_ =
       ShapeUtil::MakeTupleShape({tuple_, matrix_, token_});
-  const Shape dyanmic_matrix_ =
+  const Shape dynamic_matrix_ =
       ShapeUtil::MakeShape(S32, {5, 2}, {true, false});
 };
 
 TEST_F(ShapeTest, ShapeToFromProto) {
   for (const Shape& shape : {opaque_, token_, scalar_, matrix_, matrix2_,
-                             tuple_, nested_tuple_, dyanmic_matrix_}) {
+                             tuple_, nested_tuple_, dynamic_matrix_}) {
     Shape shape_copy(shape.ToProto());
     EXPECT_TRUE(ShapeUtil::Equal(shape, shape_copy))
         << shape << " != " << shape_copy;
@@ -215,7 +215,7 @@ TEST_F(ShapeTest, ProgramShapeToString) {
 TEST_F(ShapeTest, SupportsAbslHash) {
   EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly(
       {opaque_, token_, scalar_, scalar_with_tile_, matrix_, matrix2_, tuple_,
-       nested_tuple_, dyanmic_matrix_}));
+       nested_tuple_, dynamic_matrix_}));
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/status_macros_test.cc b/tensorflow/compiler/xla/status_macros_test.cc
index 4b0740dad72..d1ed11c227e 100644
--- a/tensorflow/compiler/xla/status_macros_test.cc
+++ b/tensorflow/compiler/xla/status_macros_test.cc
@@ -90,7 +90,7 @@ TEST(StatusMacros, ReturnIfErrorOnError) {
   EXPECT_EQ(rc.status().code(), tensorflow::error::INTERNAL);
 }
 
-TEST(StatusMacros, AssignOrReturnSuccessufully) {
+TEST(StatusMacros, AssignOrReturnSuccessfully) {
   Status status = []() {
     TF_ASSIGN_OR_RETURN(int value, CreateIntSuccessfully());
     EXPECT_EQ(value, 42);
diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc
index 881d9c5879e..3bb2f619499 100644
--- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc
+++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc
@@ -3009,7 +3009,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, NonIdentityBroadcastOfSameRankIsDisallowed) {
 
 // Regression test for b/31927799. "slice - y" is fused and requires implicit
 // broadcast.
-XLA_TEST_F(ArrayElementwiseOpTest, ImplictBroadcastInFusedExpressions) {
+XLA_TEST_F(ArrayElementwiseOpTest, ImplicitBroadcastInFusedExpressions) {
   XlaBuilder builder(TestName());
   auto x_literal = LiteralUtil::CreateR1<float>({1, 2, 3});
   auto y_literal = LiteralUtil::CreateR1<float>({4, 5});
diff --git a/tensorflow/compiler/xla/tests/bfloat16_test.cc b/tensorflow/compiler/xla/tests/bfloat16_test.cc
index 63e48117056..a1b11fc87b2 100644
--- a/tensorflow/compiler/xla/tests/bfloat16_test.cc
+++ b/tensorflow/compiler/xla/tests/bfloat16_test.cc
@@ -76,8 +76,8 @@ XLA_TEST_F(Bfloat16Test, NegateScalarF16) {
                                 error_spec_);
 }
 
-// Disabled on interpreter since BatchNormExanper is not run by default on the
-// intepreter backend.
+// Disabled on interpreter since BatchNormExpander is not run by default on the
+// interpreter backend.
 XLA_TEST_F(Bfloat16Test, DISABLED_ON_INTERPRETER(BatchNormTraining)) {
   const int kFeatureIndex = 2;
   XlaBuilder builder(TestName());
@@ -112,8 +112,8 @@ XLA_TEST_F(Bfloat16Test, DISABLED_ON_INTERPRETER(BatchNormTraining)) {
   ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.01, 0.02));
 }
 
-// Disabled on interpreter since BatchNormExanper is not run by default on the
-// intepreter backend.
+// Disabled on interpreter since BatchNormExpander is not run by default on the
+// interpreter backend.
 XLA_TEST_F(Bfloat16Test, DISABLED_ON_INTERPRETER(BatchNormGrad)) {
   const int kFeatureIndex = 2;
   XlaBuilder builder(TestName());
diff --git a/tensorflow/compiler/xla/tests/collective_ops_test.cc b/tensorflow/compiler/xla/tests/collective_ops_test.cc
index 8de508e876e..56c5f688312 100644
--- a/tensorflow/compiler/xla/tests/collective_ops_test.cc
+++ b/tensorflow/compiler/xla/tests/collective_ops_test.cc
@@ -27,7 +27,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 
-// Tests cross-GPU operatons.
+// Tests cross-GPU operations.
 //
 // This test requires at least four GPUs.  For instructions on running this
 // within Google, see go/multi-gpu-unit-test.
diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc
index f91847e0010..097265f3bb1 100644
--- a/tensorflow/compiler/xla/tests/convolution_test.cc
+++ b/tensorflow/compiler/xla/tests/convolution_test.cc
@@ -1148,7 +1148,7 @@ TYPED_TEST(Convolve2D_1x4x4x160_3x3x1x160_Depthwise_Input_Batch_In_Lanes,
 }
 
 template <typename T>
-class Convolve2D_1x4x4x160_3x3x1x160_Dephtwise_Both_Batch_In_Lanes
+class Convolve2D_1x4x4x160_3x3x1x160_Depthwise_Both_Batch_In_Lanes
     : public ConvolutionTest {
  public:
   void RunTest() {
@@ -1210,9 +1210,9 @@ class Convolve2D_1x4x4x160_3x3x1x160_Dephtwise_Both_Batch_In_Lanes
   }
 };
 
-TYPED_TEST_CASE(Convolve2D_1x4x4x160_3x3x1x160_Dephtwise_Both_Batch_In_Lanes,
+TYPED_TEST_CASE(Convolve2D_1x4x4x160_3x3x1x160_Depthwise_Both_Batch_In_Lanes,
                 TestTypes);
-TYPED_TEST(Convolve2D_1x4x4x160_3x3x1x160_Dephtwise_Both_Batch_In_Lanes,
+TYPED_TEST(Convolve2D_1x4x4x160_3x3x1x160_Depthwise_Both_Batch_In_Lanes,
            Types) {
   this->RunTest();
 }
diff --git a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc
index 1ea72af5f5f..9ea27585e61 100644
--- a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc
+++ b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc
@@ -775,7 +775,7 @@ void BM_DynamicSlice(int num_iters) {
         stream.get(), start_index_literal, shaped_buffers[i]));
   }
 
-  // Add DynamicSlice op to the computatation.
+  // Add DynamicSlice op to the computation.
   DynamicSlice(input, start_indices, {1, 1, 1, 1});
   auto computation = builder.Build().ConsumeValueOrDie();
 
diff --git a/tensorflow/compiler/xla/tests/exhaustive_binary_test.cc b/tensorflow/compiler/xla/tests/exhaustive_binary_test.cc
index 64372788be4..3c14f78429a 100644
--- a/tensorflow/compiler/xla/tests/exhaustive_binary_test.cc
+++ b/tensorflow/compiler/xla/tests/exhaustive_binary_test.cc
@@ -131,7 +131,7 @@ template <typename T, typename std::enable_if<
                           std::is_same<T, float>::value ||
                           std::is_same<T, double>::value>::type* = nullptr>
 T ReferenceMax(T x, T y) {
-  // We need to propagate NAN here becasue std::max may not propagate NAN.
+  // We need to propagate NAN here because std::max may not propagate NAN.
   if (std::fpclassify(x) == FP_NAN) {
     return x;
   }
@@ -146,7 +146,7 @@ template <typename T, typename std::enable_if<
                           std::is_same<T, float>::value ||
                           std::is_same<T, double>::value>::type* = nullptr>
 T ReferenceMin(T x, T y) {
-  // We need to propagate NAN here becasue std::max may not propagate NAN.
+  // We need to propagate NAN here because std::max may not propagate NAN.
   if (std::fpclassify(x) == FP_NAN) {
     return x;
   }
@@ -319,7 +319,7 @@ INSTANTIATE_TEST_SUITE_P(
 // for each sub-test to avoid timeout because the implementation of ExpectNear
 // more than 2x slower for binary test.
 INSTANTIATE_TEST_SUITE_P(
-    LargeAndSmallMagnituedNormalValues, ExhaustiveF32BinaryTest,
+    LargeAndSmallMagnitudeNormalValues, ExhaustiveF32BinaryTest,
     ::testing::Combine(
         ::testing::ValuesIn(GetFpValuesForMagnitudeExtremeNormals<float>(40000,
                                                                          2000)),
@@ -402,7 +402,7 @@ INSTANTIATE_TEST_SUITE_P(
 // Similar to ExhaustiveF64BinaryTest, we use a smaller set of inputs for each
 // for each sub-test comparing with the unary test to avoid timeout.
 INSTANTIATE_TEST_SUITE_P(
-    LargeAndSmallMagnituedNormalValues, ExhaustiveF64BinaryTest,
+    LargeAndSmallMagnitudeNormalValues, ExhaustiveF64BinaryTest,
     ::testing::Combine(
         ::testing::ValuesIn(
             GetFpValuesForMagnitudeExtremeNormals<double>(40000, 2000)),
diff --git a/tensorflow/compiler/xla/tests/exhaustive_op_test_utils.h b/tensorflow/compiler/xla/tests/exhaustive_op_test_utils.h
index 3d77b44b53a..1aa06a0aa63 100644
--- a/tensorflow/compiler/xla/tests/exhaustive_op_test_utils.h
+++ b/tensorflow/compiler/xla/tests/exhaustive_op_test_utils.h
@@ -84,7 +84,7 @@ class ExhaustiveOpTestBase : public ClientLibraryTestBase {
                   : (T == F16 || T == BF16) ? U16 : PRIMITIVE_TYPE_INVALID;
   };
 
-  // Native types that correspond to the primtive types above.
+  // Native types that correspond to the primitive types above.
   using NativeT = typename primitive_util::PrimitiveTypeToNative<T>::type;
   using NativeRefT =
       typename primitive_util::PrimitiveTypeToNative<RefT::value>::type;
@@ -746,7 +746,7 @@ class ExhaustiveOpTestBase : public ClientLibraryTestBase {
   // The platform under test.
   const string platform_;
 
-  // Testing will ignore inputs for which known_incorect_fn_ returns true. The
+  // Testing will ignore inputs for which known_incorrect_fn_ returns true. The
   // argument to the function is the raw bits for the data being test, zero
   // extended to 64 bits if the data type is less than 64 bits.
   std::function<bool(int64)> known_incorrect_fn_;
diff --git a/tensorflow/compiler/xla/tests/exhaustive_unary_test.cc b/tensorflow/compiler/xla/tests/exhaustive_unary_test.cc
index a19f7eea3bd..0ab27554a0c 100644
--- a/tensorflow/compiler/xla/tests/exhaustive_unary_test.cc
+++ b/tensorflow/compiler/xla/tests/exhaustive_unary_test.cc
@@ -165,7 +165,7 @@ using ExhaustiveUnaryTest = ExhaustiveOpTestBase<T, 1>;
 // Test parameter is a tuple containing
 //   - primitive type under test,
 //   - (begin, end) range under test, as zero-extended int64s bitcast to the
-//     primtive type under test.
+//     primitive type under test.
 template <PrimitiveType T>
 class Exhaustive32BitOrLessUnaryTest
     : public ExhaustiveUnaryTest<T>,
@@ -727,7 +727,7 @@ INSTANTIATE_TEST_SUITE_P(NormalValues, ExhaustiveF64UnaryTest,
 // Tests a total of 4000000000 inputs, with 16000000 inputs in each sub-test, to
 // keep the peak memory usage low.
 INSTANTIATE_TEST_SUITE_P(
-    LargeAndSmallMagnituedNormalValues, ExhaustiveF64UnaryTest,
+    LargeAndSmallMagnitudeNormalValues, ExhaustiveF64UnaryTest,
     ::testing::ValuesIn(GetFpValuesForMagnitudeExtremeNormals<double>(
         4000000000ull, 16000000)));
 
@@ -873,7 +873,7 @@ INSTANTIATE_TEST_SUITE_P(
 // Tests a total of 40000 ^ 2 inputs, with 4000 ^ 2 inputs in each sub-test, to
 // keep the peak memory usage low.
 INSTANTIATE_TEST_SUITE_P(
-    F32LargeAndSmallMagnituedNormalValues, ExhaustiveC64UnaryTest,
+    F32LargeAndSmallMagnitudeNormalValues, ExhaustiveC64UnaryTest,
     ::testing::Combine(
         ::testing::ValuesIn(GetFpValuesForMagnitudeExtremeNormals<float>(40000,
                                                                          4000)),
@@ -960,7 +960,7 @@ INSTANTIATE_TEST_SUITE_P(
 // Tests a total of 40000 ^ 2 inputs, with 2000 ^ 2 inputs in each sub-test, to
 // keep the peak memory usage low.
 INSTANTIATE_TEST_SUITE_P(
-    LargeAndSmallMagnituedNormalValues, ExhaustiveC128UnaryTest,
+    LargeAndSmallMagnitudeNormalValues, ExhaustiveC128UnaryTest,
     ::testing::Combine(
         ::testing::ValuesIn(
             GetFpValuesForMagnitudeExtremeNormals<double>(40000, 2000)),
diff --git a/tensorflow/compiler/xla/tests/gather_operation_test.cc b/tensorflow/compiler/xla/tests/gather_operation_test.cc
index 47d3546fc41..71090077ae8 100644
--- a/tensorflow/compiler/xla/tests/gather_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/gather_operation_test.cc
@@ -619,7 +619,7 @@ ENTRY main {
 
 class GatherClientLibraryTest : public ClientLibraryTestBase {};
 
-// Disabled on interpreter since ExectuteAsyncOnStream is not supported.
+// Disabled on interpreter since ExecuteAsyncOnStream is not supported.
 XLA_TEST_F(GatherClientLibraryTest,
            DISABLED_ON_INTERPRETER(DISABLED_ON_GPU(Basic))) {
   // We create this HLO, but using the XlaBuilder API.
diff --git a/tensorflow/compiler/xla/tests/map_test.cc b/tensorflow/compiler/xla/tests/map_test.cc
index 4d327a6fe9c..58ff070671d 100644
--- a/tensorflow/compiler/xla/tests/map_test.cc
+++ b/tensorflow/compiler/xla/tests/map_test.cc
@@ -463,7 +463,7 @@ TEST_F(MapTest, NestedBinaryMap) {
   ComputeAndCompareR1<float>(&b, {0.1f, 0.5f, 0.25f, 1.0f, 4.0f}, {});
 }
 
-TEST_F(MapTest, MapOperantionWithBuildError) {
+TEST_F(MapTest, MapOperationWithBuildError) {
   // Maps (lambda (x y) (+ x y)) onto two R1F32 vectors but uses an unsupported
   // type combination (F32 + U16) to test that the error is reported to the
   // outermost XlaBuilder.
diff --git a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
index 0dcc0c278ae..81c0a8e1e46 100644
--- a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
@@ -187,8 +187,8 @@ class MultiOutputFusionTest : public HloTestBase {
 XLA_TEST_F(MultiOutputFusionTest, 2DNofusion) { RunTest2D(false, 5); }
 XLA_TEST_F(MultiOutputFusionTest, 2DFusion) { RunTest2D(true, 5); }
 XLA_TEST_F(MultiOutputFusionTest, 2DFusionSize129) { RunTest2D(true, 129); }
-XLA_TEST_F(MultiOutputFusionTest, DiffentTypesNoFusion) { RunTest1D(false, 8); }
-XLA_TEST_F(MultiOutputFusionTest, DiffentTypesFusion) { RunTest1D(true, 8); }
+XLA_TEST_F(MultiOutputFusionTest, DifferentTypesNoFusion) { RunTest1D(false, 8); }
+XLA_TEST_F(MultiOutputFusionTest, DifferentTypesFusion) { RunTest1D(true, 8); }
 
 XLA_TEST_F(MultiOutputFusionTest, FusionNodeIsRoot) {
   const char* testcase = R"(
diff --git a/tensorflow/compiler/xla/tools/hlo_proto_to_json.cc b/tensorflow/compiler/xla/tools/hlo_proto_to_json.cc
index 88f3a8bdde2..068ef744c33 100644
--- a/tensorflow/compiler/xla/tools/hlo_proto_to_json.cc
+++ b/tensorflow/compiler/xla/tools/hlo_proto_to_json.cc
@@ -17,8 +17,8 @@ limitations under the License.
 //   hlo_proto_to_json --input_file=some_binary_proto
 //   --output_file=path_to_dump_output
 //
-// Reads one serilized Hlo module, convert it into JSON format and dump into
-// some output directory. some_binaray_proto is obtained by serializing Hlo
+// Reads one serialized Hlo module, convert it into JSON format and dump into
+// some output directory. some_binary_proto is obtained by serializing Hlo
 // module to disk using the debug options
 //
 //   --xla_dump_to=DIR --xla_dump_hlo_as_proto
diff --git a/tensorflow/compiler/xla/tools/interactive_graphviz_test.sh b/tensorflow/compiler/xla/tools/interactive_graphviz_test.sh
index b3e43aa7da0..a1614c443fe 100755
--- a/tensorflow/compiler/xla/tools/interactive_graphviz_test.sh
+++ b/tensorflow/compiler/xla/tools/interactive_graphviz_test.sh
@@ -14,6 +14,6 @@
 # limitations under the License.
 # ==============================================================================*/
 
-# This is a placeholder for a compile-only test for intractive_graphviz tool.
+# This is a placeholder for a compile-only test for interactive_graphviz tool.
 
 exit 0
diff --git a/tensorflow/compiler/xla/tools/run_hlo_module_main.cc b/tensorflow/compiler/xla/tools/run_hlo_module_main.cc
index 7079f413eeb..39d7826e162 100644
--- a/tensorflow/compiler/xla/tools/run_hlo_module_main.cc
+++ b/tensorflow/compiler/xla/tools/run_hlo_module_main.cc
@@ -104,7 +104,7 @@ int main(int argc, char** argv) {
       tensorflow::Flag(
           "use_large_float_range", &opts.use_large_float_range,
           "Generate floating point values using a large uniform-log "
-          "distribtion as opposed to a small uniform distribution."),
+          "distribution as opposed to a small uniform distribution."),
       tensorflow::Flag(
           "abs_error_bound", &opts.abs_error_bound,
           "The absolute error bound used when comparing the test and "
diff --git a/tensorflow/compiler/xla/util.cc b/tensorflow/compiler/xla/util.cc
index 7b17db12595..6711779cd2b 100644
--- a/tensorflow/compiler/xla/util.cc
+++ b/tensorflow/compiler/xla/util.cc
@@ -341,7 +341,7 @@ std::pair<float, float> SplitF64ToF32(double x) {
   CHECK(std::isfinite(x_f32)) << x;
 
   // The high float is simply the double rounded to the nearest float. Because
-  // we are roundinng to nearest with ties to even, the error introduced in
+  // we are rounding to nearest with ties to even, the error introduced in
   // rounding is less than half an ULP in the high ULP.
   const float hi = x_f32;
   // We can compute the low term using Sterbenz' lemma: If a and b are two
diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc
index 427a631f82d..68f56a52d0e 100644
--- a/tensorflow/compiler/xrt/tests/raw_api_test.cc
+++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc
@@ -1527,7 +1527,7 @@ TEST(RawApiTest, CompileAndExecuteWithS64Argument) {
       xla::Shape(program_shape.result()), xla::S64));
 }
 
-// Tests the XRT device memory compation API (XRTCompactAllocations).
+// Tests the XRT device memory compaction API (XRTCompactAllocations).
 TEST(RawApiTest, TestDeviceMemoryCompaction) {
   static const int kNumAllocs = 32;
   Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
diff --git a/tensorflow/compiler/xrt/xrt_memory_manager.cc b/tensorflow/compiler/xrt/xrt_memory_manager.cc
index 3a304764800..14986be3d1e 100644
--- a/tensorflow/compiler/xrt/xrt_memory_manager.cc
+++ b/tensorflow/compiler/xrt/xrt_memory_manager.cc
@@ -319,7 +319,7 @@ Status XRTMemoryManager::TryFreeMemoryStep(MemoryReclaimContext* mrctx,
   }
   if (!mrctx->done_freeing) {
     // If the caller passed us a zero requested_free_size, we try to free chunks
-    // of kMaxFreeSize memory, until either the run function suceeds, or we run
+    // of kMaxFreeSize memory, until either the run function succeeds, or we run
     // out of freeable memory.
     const size_t kMaxFreeSize = 1000000000;
     size_t free_size =
diff --git a/tensorflow/compiler/xrt/xrt_memory_manager.h b/tensorflow/compiler/xrt/xrt_memory_manager.h
index 445be45cf57..0dcd07f9faa 100644
--- a/tensorflow/compiler/xrt/xrt_memory_manager.h
+++ b/tensorflow/compiler/xrt/xrt_memory_manager.h
@@ -87,7 +87,7 @@ class XRTMemoryManager : public ResourceBase {
     return Status::OK();
   }
 
-  // Releases an handle by dropping the refences count held on the
+  // Releases an handle by dropping the references count held on the
   // XRTTupleAllocation by the XRTMemoryManager. Existing XRTTupleAllocation
   // references will continue to be valid.
   Status Release(int64 handle);

From 3938a7002c75773cb2dfd98d02e3b00a56a73a26 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Mon, 9 Dec 2019 19:12:03 +0900
Subject: [PATCH 2/3] minor spelling tweaks

---
 RELEASE.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index c415315f882..f83bab83a2f 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -315,7 +315,7 @@ If you are experiencing any issues because of this change, please inform us (fil
   * Changed API to optimize TensorRT enginges during graph optimization. This is now
     done by calling `converter.build()` where previously `is_dynamic_op=False` would
     be set.
-  * `converter.convert()` no longer returns a `tf.function`. Now the funtion must be
+  * `converter.convert()` no longer returns a `tf.function`. Now the function must be
     accessed from the saved model.
   * The `converter.calibrate()` method has been removed. To trigger calibration, a
     `calibration_input_fn` should be provided to `converter.convert()`.
@@ -715,7 +715,7 @@ Weweler, Zantares, zjjott, 卜居, 王振华 (Wang Zhenhua), 黄鑫
 
 *   Updates `png_archive` dependency to 1.6.37 to not be affected by
     CVE-2019-7317, CVE-2018-13785, and CVE-2018-14048.
-*   Updates `sqlite` depenency to 3.28.0 to not be affected by CVE-2018-20506,
+*   Updates `sqlite` dependency to 3.28.0 to not be affected by CVE-2018-20506,
     CVE-2018-20346, and CVE-2018-20505.
 
 # Release 1.12.2
@@ -901,9 +901,9 @@ Weweler, Zantares, zjjott, 卜居, 王振华 (Wang Zhenhua), 黄鑫
         compilation as a second return argument.
     *   XLA HLO graphs can now be rendered as SVG/HTML.
 *   Estimator
-    *   Replace all occurences of `tf.contrib.estimator.BaselineEstimator` with
+    *   Replace all occurrences of `tf.contrib.estimator.BaselineEstimator` with
         `tf.estimator.BaselineEstimator`
-    *   Replace all occurences of
+    *   Replace all occurrences of
         `tf.contrib.estimator.DNNLinearCombinedEstimator` with
         `tf.estimator.DNNLinearCombinedEstimator`
     *   Replace all occurrences of `tf.contrib.estimator.DNNEstimator` with
@@ -915,7 +915,7 @@ Weweler, Zantares, zjjott, 卜居, 王振华 (Wang Zhenhua), 黄鑫
         `tf.estimator.Estimator.experimental_export_all_saved_models`.
     *   Update `regression_head` to the new Head API for Canned Estimator V2.
     *   Switch `multi_class_head` to Head API for Canned Estimator V2.
-    *   Replace all occurences of `tf.contrib.estimator.InMemoryEvaluatorHook`
+    *   Replace all occurrences of `tf.contrib.estimator.InMemoryEvaluatorHook`
         and `tf.contrib.estimator.make_stop_at_checkpoint_step_hook` with
         `tf.estimator.experimental.InMemoryEvaluatorHook` and
         `tf.estimator.experimental.make_stop_at_checkpoint_step_hook`

From 9dfd369d07303ee7c93c59fa918aeddca4fed733 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 10 Dec 2019 02:01:00 +0900
Subject: [PATCH 3/3] address review comment

---
 tensorflow/compiler/xla/service/hlo_computation.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index 579e4360092..9ca60403929 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -419,7 +419,7 @@ class HloComputation {
   // the HLO computation with the exception of fusion computation. A parameter
   // instruction is removable for a fusion computation.
   //
-  // Note that IsSafelyRemovable() is a necessarily condition to remove an
+  // Note that IsSafelyRemovable() is a necessary condition to remove an
   // instruction rather than a sufficient condition. For example, instructions
   // with side-effect (e.g., Send, Infeed) may be removed from a computation,
   // but the transformation must guarantee the invariants relevant to the