From e664420b79a93ae1a47c7ccd70ebe0fb31819ff6 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Mon, 9 Dec 2019 18:21:12 +0900 Subject: [PATCH 1/3] minor spelling tweaks --- .../compiler/jit/compilability_check_util.cc | 4 +-- tensorflow/compiler/jit/deadness_analysis.cc | 6 ++-- .../compiler/jit/encapsulate_subgraphs_pass.h | 2 +- tensorflow/compiler/jit/encapsulate_util.h | 2 +- .../extract_outside_compilation_pass_test.cc | 2 +- .../compiler/jit/graphcycles/graphcycles.h | 2 +- .../compiler/jit/mark_for_compilation_pass.cc | 2 +- tensorflow/compiler/jit/node_matchers.h | 2 +- tensorflow/compiler/jit/node_matchers_test.cc | 2 +- tensorflow/compiler/jit/ops/xla_ops.cc | 2 +- tensorflow/compiler/jit/xla_device_context.cc | 2 +- .../compiler/jit/xla_kernel_creator_util.cc | 2 +- tensorflow/compiler/jit/xla_launch_util.cc | 2 +- .../lite/quantization/quantization_utils.cc | 2 +- .../lite/quantization/quantization_utils.h | 4 +-- .../mlir/lite/transforms/extract_ophint.cc | 8 ++--- .../compiler/mlir/lite/transforms/optimize.cc | 4 +-- .../transforms/optimize_functional_ops.cc | 6 ++-- .../mlir/lite/transforms/prepare_tf.cc | 2 +- .../compiler/mlir/lite/utils/lstm_utils.cc | 2 +- .../analysis/side_effect_analysis.cc | 4 +-- .../analysis/side_effect_analysis.h | 6 ++-- .../mlir/tensorflow/ir/tf_generated_ops.td | 6 ++-- .../mlir/tensorflow/transforms/lower_tf.cc | 2 +- .../translate/executor_to_control_dialect.cc | 2 +- .../tensorflow/translate/export_graphdef.cc | 2 +- .../mlir/tensorflow/translate/import_model.cc | 2 +- tensorflow/compiler/mlir/xla/ir/hlo_ops.td | 2 +- .../mlir/xla/transforms/canonicalize.td | 4 +-- .../mlir/xla/transforms/legalize_tf.cc | 4 +-- .../xla/transforms/lower_complex_patterns.td | 4 +-- .../xla/transforms/map_lhlo_to_scalar_op.h | 2 +- .../compiler/tests/matrix_diag_ops_test.py | 6 ++-- .../compiler/tests/quantized_ops_test.py | 2 +- tensorflow/compiler/tests/randomized_tests.cc | 2 +- .../tf2tensorrt/convert/convert_graph.cc | 6 ++-- .../tf2tensorrt/convert/convert_nodes.cc | 2 +- .../tf2tensorrt/convert/convert_nodes.h | 4 +-- .../tf2tensorrt/convert/convert_nodes_test.cc | 4 +-- .../tf2tensorrt/kernels/trt_engine_op.cc | 2 +- .../kernels/trt_engine_resource_ops_test.cc | 2 +- .../compiler/tf2tensorrt/segment/segment.cc | 2 +- .../tf2tensorrt/utils/trt_int8_calibrator.h | 2 +- .../compiler/tf2xla/functionalize_cond.cc | 8 ++--- .../compiler/tf2xla/functionalize_cond.h | 2 +- .../compiler/tf2xla/kernels/assert_op.cc | 2 +- .../compiler/tf2xla/kernels/pooling_ops.cc | 2 +- .../compiler/tf2xla/kernels/xla_conv_op.cc | 2 +- .../compiler/tf2xla/kernels/xla_svd_op.cc | 2 +- tensorflow/compiler/tf2xla/ops/xla_ops.cc | 8 ++--- tensorflow/compiler/tf2xla/shape_util.h | 2 +- tensorflow/compiler/tf2xla/tf2xla.proto | 2 +- tensorflow/compiler/tf2xla/xla_op_kernel.h | 4 +-- .../xla/client/lib/comparators_test.cc | 2 +- tensorflow/compiler/xla/client/lib/matrix.cc | 4 +-- tensorflow/compiler/xla/client/lib/matrix.h | 2 +- tensorflow/compiler/xla/client/lib/pooling.cc | 6 ++-- tensorflow/compiler/xla/client/lib/slicing.cc | 30 +++++++++---------- tensorflow/compiler/xla/client/lib/testing.cc | 2 +- .../compiler/xla/client/local_client.cc | 6 ++-- tensorflow/compiler/xla/client/local_client.h | 2 +- tensorflow/compiler/xla/client/xla_builder.h | 6 ++-- .../compiler/xla/client/xla_builder_test.cc | 2 +- .../compiler/xla/debug_options_flags.cc | 4 +-- tensorflow/compiler/xla/debug_options_flags.h | 4 +-- .../compiler/xla/execution_options_util.h | 2 +- .../compiler/xla/g3doc/operation_semantics.md | 2 +- tensorflow/compiler/xla/literal.cc | 2 +- tensorflow/compiler/xla/literal.h | 4 +-- tensorflow/compiler/xla/literal_comparison.cc | 2 +- tensorflow/compiler/xla/literal_test.cc | 2 +- .../compiler/xla/parse_flags_from_env.h | 2 +- .../compiler/xla/parse_flags_from_env_test.cc | 6 ++-- .../compiler/xla/python/local_client.cc | 2 +- .../python/tpu_driver/client/tpu_client.cc | 2 +- .../xla/python/tpu_driver/tpu_driver.h | 2 +- tensorflow/compiler/xla/python/xla_client.py | 2 +- tensorflow/compiler/xla/python_api/types.py | 4 +-- .../xla/service/algebraic_simplifier.cc | 6 ++-- .../xla/service/algebraic_simplifier_test.cc | 2 +- .../xla/service/batchnorm_expander_test.cc | 2 +- .../compiler/xla/service/buffer_assignment.cc | 2 +- .../xla/service/buffer_assignment_test.cc | 2 +- .../compiler/xla/service/buffer_value.h | 2 +- .../compiler/xla/service/call_inliner.cc | 2 +- .../compiler/xla/service/cholesky_expander.cc | 2 +- .../xla/service/collective_ops_utils.h | 2 +- tensorflow/compiler/xla/service/compiler.h | 2 +- .../compiler/xla/service/computation_placer.h | 2 +- .../xla/service/conditional_simplifier.cc | 2 +- .../service/convolution_group_converter.cc | 4 +-- .../xla/service/copy_insertion_test.cc | 4 +-- .../compiler/xla/service/cpu/cpu_compiler.cc | 2 +- .../xla/service/cpu/cpu_executable.cc | 2 +- .../xla/service/cpu/cpu_layout_assignment.cc | 2 +- .../xla/service/cpu/dot_op_emitter.cc | 2 +- .../xla/service/cpu/dot_op_emitter_internal.h | 2 +- .../compiler/xla/service/cpu/ir_emitter.cc | 2 +- .../compiler/xla/service/cpu/ir_emitter.h | 4 +-- .../xla/service/cpu/llvm_ir_runtime.cc | 2 +- .../service/cpu/parallel_task_assignment.cc | 2 +- .../xla/service/cpu/runtime_fork_join.cc | 2 +- .../xla/service/cpu/shape_partition.cc | 2 +- .../xla/service/cpu/tiled_dot_emitter.cc | 6 ++-- tensorflow/compiler/xla/service/dump.cc | 2 +- .../service/dynamic_dimension_inference.cc | 8 ++--- .../compiler/xla/service/dynamic_padder.cc | 6 ++-- .../xla/service/elemental_ir_emitter.cc | 4 +-- .../xla/service/gpu/backend_configs.proto | 2 +- .../service/gpu/cudnn_batchnorm_rewriter.cc | 4 +-- .../xla/service/gpu/cudnn_batchnorm_thunk.cc | 2 +- .../service/gpu/cudnn_pad_for_convolutions.cc | 10 +++---- .../compiler/xla/service/gpu/fusion_merger.cc | 4 +-- .../xla/service/gpu/fusion_merger_test.cc | 12 ++++---- .../service/gpu/gpu_conv_algorithm_picker.cc | 6 ++-- .../gpu/gpu_conv_padding_legalization.cc | 2 +- .../xla/service/gpu/gpu_debug_info_manager.h | 2 +- .../xla/service/gpu/gpu_executable.cc | 4 +-- .../compiler/xla/service/gpu/gpu_executable.h | 2 +- .../compiler/xla/service/gpu/gpu_fusible.cc | 2 +- .../compiler/xla/service/gpu/gpu_fusible.h | 2 +- .../xla/service/gpu/gpu_layout_assignment.cc | 2 +- .../xla/service/gpu/gpu_transfer_manager.cc | 2 +- .../xla/service/gpu/ir_emitter_unnested.cc | 2 +- .../xla/service/gpu/ir_emitter_unnested.h | 2 +- .../xla/service/gpu/kernel_mapping_scheme.h | 2 +- .../gpu/llvm_gpu_backend/gpu_backend_lib.cc | 2 +- .../xla/service/gpu/multi_output_fusion.cc | 2 +- .../xla/service/gpu/nvptx_compiler.cc | 2 +- .../xla/service/gpu/stream_executor_util.h | 2 +- .../compiler/xla/service/gpu/thunk_emitter.h | 2 +- .../xla/service/hlo_alias_analysis.cc | 4 +-- tensorflow/compiler/xla/service/hlo_buffer.h | 2 +- .../compiler/xla/service/hlo_casting_utils.h | 2 +- .../compiler/xla/service/hlo_computation.cc | 2 +- .../compiler/xla/service/hlo_computation.h | 2 +- .../compiler/xla/service/hlo_cost_analysis.cc | 2 +- .../compiler/xla/service/hlo_cost_analysis.h | 2 +- .../compiler/xla/service/hlo_cse_test.cc | 6 ++-- .../xla/service/hlo_dataflow_analysis.cc | 2 +- .../xla/service/hlo_dataflow_analysis_test.cc | 2 +- .../xla/service/hlo_domain_isolator.h | 2 +- .../xla/service/hlo_domain_remover.cc | 2 +- .../compiler/xla/service/hlo_domain_test.cc | 2 +- .../xla/service/hlo_domain_verifier.cc | 4 +-- .../compiler/xla/service/hlo_evaluator.cc | 4 +-- .../compiler/xla/service/hlo_evaluator.h | 4 +-- .../compiler/xla/service/hlo_graph_dumper.cc | 2 +- .../service/hlo_input_output_alias_config.h | 4 +-- .../compiler/xla/service/hlo_instruction.cc | 4 +-- .../compiler/xla/service/hlo_instruction.h | 2 +- .../xla/service/hlo_instruction_test.cc | 6 ++-- .../compiler/xla/service/hlo_instructions.cc | 2 +- .../compiler/xla/service/hlo_instructions.h | 2 +- .../xla/service/hlo_live_range_test.cc | 4 +-- .../xla/service/hlo_liveness_analysis.cc | 4 +-- .../xla/service/hlo_liveness_analysis_test.cc | 6 ++-- .../xla/service/hlo_memory_scheduler.cc | 2 +- tensorflow/compiler/xla/service/hlo_module.h | 2 +- .../xla/service/hlo_module_dce_test.cc | 2 +- .../xla/service/hlo_module_group_util.h | 2 +- .../compiler/xla/service/hlo_ordering_test.cc | 4 +-- tensorflow/compiler/xla/service/hlo_parser.cc | 4 +-- .../compiler/xla/service/hlo_parser_test.cc | 4 +-- .../xla/service/hlo_rematerialization_test.cc | 2 +- tensorflow/compiler/xla/service/hlo_runner.h | 2 +- .../compiler/xla/service/hlo_sharding.h | 2 +- .../xla/service/hlo_sharding_metadata.cc | 2 +- .../xla/service/indexed_array_analysis.cc | 2 +- .../service/indexed_array_analysis_test.cc | 2 +- .../compiler/xla/service/layout_assignment.cc | 2 +- .../compiler/xla/service/layout_assignment.h | 4 +-- .../llvm_ir/dynamic_update_slice_util.cc | 2 +- .../compiler/xla/service/llvm_ir/llvm_util.cc | 2 +- .../xla/service/memory_space_assignment.cc | 2 +- .../xla/service/memory_space_assignment.h | 2 +- .../service/memory_space_assignment_test.cc | 2 +- .../experimental/conv_emitter/conv_emitter.cc | 6 ++-- .../experimental/conv_emitter/conv_emitter.h | 2 +- .../xla/service/mlir_gpu/failover_compiler.cc | 2 +- .../xla/service/mlir_gpu/mlir_compiler.cc | 2 +- .../xla/service/multi_output_fusion.h | 2 +- .../compiler/xla/service/op_expander_pass.h | 2 +- .../compiler/xla/service/reshape_mover.cc | 2 +- tensorflow/compiler/xla/service/service.cc | 8 ++--- .../xla/service/shape_inference_test.cc | 4 +-- .../xla/service/tree_reduction_rewriter.h | 2 +- .../xla/service/tuple_points_to_analysis.h | 2 +- .../service/while_loop_constant_sinking.cc | 2 +- .../while_loop_invariant_code_motion.cc | 2 +- .../xla/service/while_loop_simplifier_test.cc | 2 +- tensorflow/compiler/xla/shape_test.cc | 6 ++-- tensorflow/compiler/xla/status_macros_test.cc | 2 +- .../xla/tests/array_elementwise_ops_test.cc | 2 +- .../compiler/xla/tests/bfloat16_test.cc | 8 ++--- .../compiler/xla/tests/collective_ops_test.cc | 2 +- .../compiler/xla/tests/convolution_test.cc | 6 ++-- .../compiler/xla/tests/dynamic_ops_test.cc | 2 +- .../xla/tests/exhaustive_binary_test.cc | 8 ++--- .../xla/tests/exhaustive_op_test_utils.h | 4 +-- .../xla/tests/exhaustive_unary_test.cc | 8 ++--- .../xla/tests/gather_operation_test.cc | 2 +- tensorflow/compiler/xla/tests/map_test.cc | 2 +- .../xla/tests/multioutput_fusion_test.cc | 4 +-- .../compiler/xla/tools/hlo_proto_to_json.cc | 4 +-- .../xla/tools/interactive_graphviz_test.sh | 2 +- .../compiler/xla/tools/run_hlo_module_main.cc | 2 +- tensorflow/compiler/xla/util.cc | 2 +- tensorflow/compiler/xrt/tests/raw_api_test.cc | 2 +- tensorflow/compiler/xrt/xrt_memory_manager.cc | 2 +- tensorflow/compiler/xrt/xrt_memory_manager.h | 2 +- 211 files changed, 336 insertions(+), 336 deletions(-) diff --git a/tensorflow/compiler/jit/compilability_check_util.cc b/tensorflow/compiler/jit/compilability_check_util.cc index b8f04f7d791..14ade0ea920 100644 --- a/tensorflow/compiler/jit/compilability_check_util.cc +++ b/tensorflow/compiler/jit/compilability_check_util.cc @@ -509,10 +509,10 @@ RecursiveCompilabilityChecker::OperationFilter CreateOperationFilter( auto it = uncompilable_nodes->find(function_identifier); if (it == uncompilable_nodes->end()) { std::vector - uncompileable_node_info{std::move(node_info)}; + uncompilable_node_info{std::move(node_info)}; uncompilable_nodes->emplace( std::move(function_identifier), - std::make_pair(function, std::move(uncompileable_node_info))); + std::make_pair(function, std::move(uncompilable_node_info))); } else { it->second.second.emplace_back(std::move(node_info)); } diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc index 912991e267a..b78bcd36d47 100644 --- a/tensorflow/compiler/jit/deadness_analysis.cc +++ b/tensorflow/compiler/jit/deadness_analysis.cc @@ -96,7 +96,7 @@ limitations under the License. // Symbolic > NonSymbolic. The lattice has height = 2 so two iterations are // sufficient to converge. // -// We first do an optimisitc analysis and, if it does not converge, we then fall +// We first do an optimistic analysis and, if it does not converge, we then fall // back to a pessimistic analysis. The optimistic analysis assigns the same // symbolic predicate to all the merge nodes whose preceding enter nodes have // the same frame name on the first iteration. On the second iteration, if all @@ -1255,7 +1255,7 @@ Status DeadnessAnalysisImpl::GetFrameBasedTopologicalOrder( } else if (IsRootExit(node)) { ++num_exits_for_frame[cf.frame_name]; } - // Edge NextIteration->Merge is counted before starting the traveral to + // Edge NextIteration->Merge is counted before starting the traversal to // break the backedges. if (IsMerge(node)) { for (const Edge* e : node->in_edges()) { @@ -1458,7 +1458,7 @@ Status DeadnessAnalysisImpl::PopulateFrame(absl::Span topo, for (Node* n : topo) { // The nodes added to should_revisit in the previous loop need to be - // revisited now. Reprocesing these initial nodes may add *their* consumers + // revisited now. Reprocessing these initial nodes may add *their* consumers // to should_revisit, and these newly added nodes will also be processed by // this very same loop. Since we're traversing the graph in topological // order (producers before consumers) and HandleNode(n) can only ever add diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h index 50e4149bc08..8b627cd959a 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h @@ -95,7 +95,7 @@ extern const char* const kXlaNumResourceArgsAttr; extern const char* const kXlaHasReferenceVarsAttr; // Sorts each node's control inputs by their names. This guarantees that for two -// structually equivalent GraphDefs, we get the same traversal ordering on +// structurally equivalent GraphDefs, we get the same traversal ordering on // node's control input fields. // TODO(hpucha): Move the utilities to a more appropriate place. void SortControlInputs(GraphDef* gdef); diff --git a/tensorflow/compiler/jit/encapsulate_util.h b/tensorflow/compiler/jit/encapsulate_util.h index 406e4a797a4..9ddbe4d5cc9 100644 --- a/tensorflow/compiler/jit/encapsulate_util.h +++ b/tensorflow/compiler/jit/encapsulate_util.h @@ -72,7 +72,7 @@ extern const char kXlaLiftedArgOutsideCompilationAttrName[]; // Attribute indicating that this is an IdentityN node receiving inputs for a // outside compilation Placeholder node (the original outside compilation node -// is moved out of TPU comutation, and we left a Placeholder node there). +// is moved out of TPU computation, and we left a Placeholder node there). // Attribute value will be a string, which is the outside compilation cluster // name for the outside compilation Placeholder node. extern const char kXlaOutsideCompilationInputsAttrName[]; diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc index 26f830c59c3..a6f2bd41275 100644 --- a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc @@ -941,7 +941,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, // "const0" // "identity0" = "const0" (outside compilation cluster "0") // "identity1" = "const0" "^identity0" (outside compilation cluster "1", - // control depdent on cluster "0") + // control dependent on cluster "0") // "identity2" = "identity1" FunctionDefLibrary fdl; { diff --git a/tensorflow/compiler/jit/graphcycles/graphcycles.h b/tensorflow/compiler/jit/graphcycles/graphcycles.h index ce171a2ead0..bbf61016fb3 100644 --- a/tensorflow/compiler/jit/graphcycles/graphcycles.h +++ b/tensorflow/compiler/jit/graphcycles/graphcycles.h @@ -123,7 +123,7 @@ class GraphCycles { absl::Span Successors(int32 node) const; absl::Span Predecessors(int32 node) const; - // Return a copy of the sucessors set. This is needed for code using the + // Return a copy of the successors set. This is needed for code using the // collection while modifying the GraphCycles. std::vector SuccessorsCopy(int32 node) const; // Return a copy of the predecessors set. This is needed for code using the diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index 4ca52a26bbd..0ab746ead95 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -1366,7 +1366,7 @@ Status MarkForCompilationPassImpl::Run() { void MarkForCompilationPassImpl::DumpPostClusteringGraphs() { DumpGraphToFile("mark_for_compilation", *graph_, flib_def_); - // We also dump out an annoated version of the TF graph where the nodes + // We also dump out an annotated version of the TF graph where the nodes // names are prefixed with the cluster names. This can help visualizing the // clustering decisions on TensorBoard. Graph new_graph(graph_->op_registry()); diff --git a/tensorflow/compiler/jit/node_matchers.h b/tensorflow/compiler/jit/node_matchers.h index 0d4f02c236b..ea47394bf7d 100644 --- a/tensorflow/compiler/jit/node_matchers.h +++ b/tensorflow/compiler/jit/node_matchers.h @@ -187,7 +187,7 @@ impl::NodeMatcherProperties Op(string op); // Matches a node with assigned device `assigned_device`. impl::NodeMatcherProperties AssignedDevice(string assigned_device); -// Matches a node with a boolean typed attrbute named `name` and with value +// Matches a node with a boolean typed attribute named `name` and with value // `value`. template impl::NodeMatcherProperties Attr(const string& name, ValueTy value) { diff --git a/tensorflow/compiler/jit/node_matchers_test.cc b/tensorflow/compiler/jit/node_matchers_test.cc index c3f0dfece85..8edb3e456c4 100644 --- a/tensorflow/compiler/jit/node_matchers_test.cc +++ b/tensorflow/compiler/jit/node_matchers_test.cc @@ -125,7 +125,7 @@ TEST(NodeMatchers, CheckControlDependence) { "is any node"); } -TEST(NodeMatchers, ConstVaulue) { +TEST(NodeMatchers, ConstValue) { Scope root = Scope::NewRootScope().ExitOnError(); Output placeholder = ops::Placeholder(root.WithOpName("placeholder"), DT_FLOAT); diff --git a/tensorflow/compiler/jit/ops/xla_ops.cc b/tensorflow/compiler/jit/ops/xla_ops.cc index 0217ba71929..b1cf2166721 100644 --- a/tensorflow/compiler/jit/ops/xla_ops.cc +++ b/tensorflow/compiler/jit/ops/xla_ops.cc @@ -110,7 +110,7 @@ Merges the outputs from the PartitionedCall node and the _XlaRun node. Unlike the TensorFlow Merge op, which requires inputs of some types to be placed on the host, the _XlaMerge op can merge inputs of all types when placed on the device. This prevents the need for copy operations, in -particluar when an XLA cluster has int32 outputs. The _XlaMerge up does not +particular when an XLA cluster has int32 outputs. The _XlaMerge up does not have a value_index output that identifies the chosen input. )"); diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index c1fb2f6671f..996ad09e2a9 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -262,7 +262,7 @@ void XlaDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor, << xla_tensor->shaped_buffer().ToString(); // For devices don't allow sync on completion, the device execution is // deferred. We check the execution stream status here to avoid wrong - // results from a failed stream being propogated to following + // results from a failed stream being propagated to following // host-side ops. if (!device_allows_sync_on_completion) { done_status.Update(xla_tensor->RefreshStatusOfStreams()); diff --git a/tensorflow/compiler/jit/xla_kernel_creator_util.cc b/tensorflow/compiler/jit/xla_kernel_creator_util.cc index 96bde65003f..6441dd3ed28 100644 --- a/tensorflow/compiler/jit/xla_kernel_creator_util.cc +++ b/tensorflow/compiler/jit/xla_kernel_creator_util.cc @@ -222,7 +222,7 @@ Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def, // using xla::ComputationDataHandle, which is just a symbolic handle that // xla::ComputationBuilder assigns. How does this handle gets assigned for // constant arguments? Even constant arguments get an _Arg node in the graph - // instatiated for Function compilation. The tf2xla kernel for constant _Arg + // instantiated for Function compilation. The tf2xla kernel for constant _Arg // nodes takes the constant value, converts it to XlaLiteral, and feeds it // to xla::ComputationBuilder.ConstantLiteral, which returns the handle. This // constant XlaLiteral is included in the HLO graph, and subsequently, in diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index ddaaefcef7d..8dbeea50ffa 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -84,7 +84,7 @@ VariableInfo::~VariableInfo() { } } -// Returns a vector of VaribleInfo instances for the resource variable inputs to +// Returns a vector of VariableInfo instances for the resource variable inputs to // the kernel with context `ctx`. The input indices for the resource variable // inputs are in `variable_indices`. static Status GetVariableInfosFromCtxInputs( diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc index eec93e9ae6a..ca10809be69 100644 --- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc +++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc @@ -416,7 +416,7 @@ bool RemoveRedundantStatsOps(mlir::FuncOp func, if (res->hasOneUse()) { if (auto next_stats = llvm::dyn_cast( *res->getUsers().begin())) { - // quantization parameters can be propgated to next_stats + // quantization parameters can be propagated to next_stats redundant_stats_ops.insert(next_stats); // add next_stats to the work list so propagation can // continue. diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h index c9f9d6619a3..9689a85ef6f 100644 --- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h +++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h @@ -342,14 +342,14 @@ ElementsAttr Quantize(Attribute real_value, Type tensor_type); // parameters in this type is based on the min and max element of the // attribute. When the elements in the `attr` are not in floating-point, or // the value range isn't straddling zero, an empty type is returned. The min/max -// are ajusted to be symmetric if `symmetric` flag is set to True. And +// are adjusted to be symmetric if `symmetric` flag is set to True. And // `symmetric` can only be set to true when it is signed and narrow_range. Type GetUniformQuantizedTypeForWeight(ElementsAttr attr, bool symmetric, unsigned num_bits, bool is_sign, bool narrow_range); // Returns the per channel quantized type for an element attribute. -// `quant_dim` defines the quantization axis. The channel min/max are ajusted +// `quant_dim` defines the quantization axis. The channel min/max are adjusted // to be symmetric if `symmetric` flag is set to True. And `symmetric` can only // be set to true when it is signed and narrow_range. Type GetUniformQuantizedPerAxisTypeForWeight(ElementsAttr attr, int quant_dim, diff --git a/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc b/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc index 63cf4240224..52eb6216e90 100644 --- a/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc +++ b/tensorflow/compiler/mlir/lite/transforms/extract_ophint.cc @@ -413,13 +413,13 @@ void PreprocessTopoSortGraph( } operation_to_in_degrees->try_emplace(&op, input_ops.size()); for (auto* input_op : input_ops) { - auto preceeding_op_it = operation_to_outputs->find(input_op); - if (preceeding_op_it == operation_to_outputs->end()) { + auto preceding_op_it = operation_to_outputs->find(input_op); + if (preceding_op_it == operation_to_outputs->end()) { auto result = operation_to_outputs->try_emplace( input_op, llvm::DenseSet()); - preceeding_op_it = result.first; + preceding_op_it = result.first; } - preceeding_op_it->second.insert(&op); + preceding_op_it->second.insert(&op); } } } diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/transforms/optimize.cc index d8697a8c4e0..1313bae97a1 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc @@ -394,14 +394,14 @@ struct FuseBinaryOpToFollowingAffineOp : public OpRewritePattern { // w * (x ' c) + b => (w ' c) x + b // so we have to update the weight. bool is_mul = llvm::isa(binary_op); - auto new_fitler = + auto new_filter = filter_cst.mapValues(filter_type.getElementType(), [&](APFloat it) { return (is_mul ? it * cst_value : it / cst_value).bitcastToAPInt(); }); // We recreate the constant op in case it is shared by the other ops. This // might increase the model size. auto new_filter_op = rewriter.create( - fc_op.getLoc(), filter->getType(), new_fitler); + fc_op.getLoc(), filter->getType(), new_filter); fc_op.setOperand(0, binary_op->getOperand(0)); if (fc_op.filter() != filter) { // This filter goes through quantize and dequantize ops. Then we just diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc index 173785ba5b0..59dc271400e 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc @@ -132,8 +132,8 @@ class FoldIfOp : public OpRewritePattern { // Erases functions from the given candidates that are not referenced by any of // the ops in the module. -static void EraseDeadFuncs(const FuncSet& candiate_funcs, ModuleOp module) { - if (candiate_funcs.empty()) return; +static void EraseDeadFuncs(const FuncSet& candidate_funcs, ModuleOp module) { + if (candidate_funcs.empty()) return; SymbolTable manager(module); @@ -149,7 +149,7 @@ static void EraseDeadFuncs(const FuncSet& candiate_funcs, ModuleOp module) { } }); - for (FuncOp func : candiate_funcs) { + for (FuncOp func : candidate_funcs) { if (!in_use_funcs.count(func)) manager.erase(func); } } diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc index 823efdc3ef5..45248ddc01c 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc @@ -132,7 +132,7 @@ struct InsertTFLQuantOpsAfterTFFakeQuantOp int quant_dim = -1; if (PerAxis) { - // This is a special case that the quant_dim is the last dimentions. + // This is a special case that the quant_dim is the last dimensions. quant_dim = res->getType().template cast().getRank() - 1; } // Use the min/max from the operands and the num_bits and narrow_range diff --git a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc index faf6427cedd..92a8ad49bf4 100644 --- a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc +++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc @@ -98,7 +98,7 @@ Value* SliceRankedTensor(OpBuilder* builder, Value* input, ArrayRef size_values, mlir::Location location) { // If the size of the tensor to be sliced from the input overflows - // the input tensor's dimenions, return 0-valued tensor of the requested + // the input tensor's dimensions, return 0-valued tensor of the requested // shape. ArrayRef input_shape = GetRankedTensorShape(input); for (int i = 0; i < input_shape.size(); i++) { diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc index 898393479b0..5e0e9aef03c 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc +++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc @@ -122,7 +122,7 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) { std::get<1>(operand_and_result)); } } else if (auto replicate = llvm::dyn_cast(op)) { - // The nested block for RepliateOp is handled separately in side-effect + // The nested block for ReplicateOp is handled separately in side-effect // analysis. Inside that block, we can still treat its block arguments as // different resources. for (auto arg : replicate.GetBody().getArguments()) { @@ -305,7 +305,7 @@ void SideEffectAnalysis::AnalyzeRegion( // region, and tracking resource accesses in per_resource_access_info_. // Returns whether an access to `resource` can skip control edges from - // prevoius accesses to unknown resources, due to that earlier accesses to + // previous accesses to unknown resources, due to that earlier accesses to // `resource` already indirectly tracked previous accesses to uknown // resources. `read_only` specifies the type of access of the current op being // considered. diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h index 3d65217db27..8d8815d709d 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h +++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h @@ -105,7 +105,7 @@ class SideEffectAnalysis { void ConsumeChildAnalyses( llvm::SmallVector&& children); - // Updates control_predecessors_ for `op` that is being visted, on the given + // Updates control_predecessors_ for `op` that is being visited, on the given // `resource_id`. void AddPredecessorsForAccess(int64_t resource_id, Operation* op, bool read_only); @@ -124,7 +124,7 @@ class SideEffectAnalysis { sorted_control_successors_; // Internal per-resource data structure when we build the dependencies. - struct PerResourceAcessInfo { + struct PerResourceAccessInfo { // Last op that writes the resource before the current op being analyzed. Operation* last_write = nullptr; // Read ops since last_write before the current op being analyzed. @@ -134,7 +134,7 @@ class SideEffectAnalysis { bool tracked_last_unknown_read = false; bool tracked_last_unknown_write = false; }; - llvm::SmallDenseMap + llvm::SmallDenseMap per_resource_access_info_; }; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 5b5c028c89d..691ce85dbc8 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -1317,7 +1317,7 @@ Operations are applied to the input(s) according to the following rules: Considering the batch matrix multiplication equation again (`bij,bjk->bik`), the contracted axis label is `j`. - (e) Expand Diagonal: If the output subcripts contain repeated (explicit) axis + (e) Expand Diagonal: If the output subscripts contain repeated (explicit) axis labels, the opposite operation of (a) is applied. For example, in the equation `i->iii`, and input shape `[3]`, the output of shape `[3, 3, 3]` are all zeros, except for the (generalized) diagonal which is populated @@ -1325,7 +1325,7 @@ Operations are applied to the input(s) according to the following rules: Note: This operation is not supported by `np.einsum` or `tf.einsum`; it is provided to enable computing the symbolic gradient of `tf.einsum`. -The output subcripts must contain only labels appearing in at least one of the +The output subscripts must contain only labels appearing in at least one of the input subscripts. Furthermore, all dimensions mapping to the same axis label must be equal. @@ -1337,7 +1337,7 @@ according to standard NumPy broadcasting The broadcasted dimensions are placed in the corresponding location of the ellipsis in the output subscript. If the broadcasted dimensions are non-empty -and the output subcripts do not contain ellipsis, then an InvalidArgument error +and the output subscripts do not contain ellipsis, then an InvalidArgument error is raised. @compatibility(numpy) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc index 89941c2fab4..caacc376a0f 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc @@ -84,7 +84,7 @@ DenseIntElementsAttr GetBiasAddGradReductionIndices(int64_t rank, tensorflow::TensorFormat format; if (!FormatFromString(data_format.getValue().str(), &format)) return {}; - // Reudce along all dimensions except the feature dimension. + // Reduce along all dimensions except the feature dimension. int64_t feature_dim = GetTensorFeatureDimIndex(rank, format); llvm::SmallVector dims_to_reduce(rank - 1); std::iota(dims_to_reduce.begin(), dims_to_reduce.begin() + feature_dim, 0); diff --git a/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc b/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc index 280f8f195de..8a4f8aacc0d 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc @@ -45,7 +45,7 @@ struct ExecutorToControlDialectConversion // Replace all uses of value `v` with a list of new values. Because number of // new values might be greater than 1, users of `v` might be replaced with their -// clones in case of non-resizble operands list. +// clones in case of non-resizable operands list. void ReplaceAllUsesOfValueWithValues(Value *v, Operation::operand_range new_values) { int new_values_size = std::distance(new_values.begin(), new_values.end()); diff --git a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc index 58242e62f1c..2ebb7505b7f 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc @@ -197,7 +197,7 @@ class Exporter { // Each NextIteration node in the original graph is converted to a pair of // source and sink operations in the MLIR, and we use the following two maps - // to pair and convet them back to a single NextIteration node. We choose to + // to pair and convert them back to a single NextIteration node. We choose to // the "name" attribute, which is from the unique node name, to find out the // pairs: When scanning the operations in the block, the source operations // are inserted to the name_to_inst_ first, and the other "sink" operation diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index 3bf2c34e2c7..c7528682001 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -2283,7 +2283,7 @@ class StructuredValueLinearizer { // Returns the list of index paths to each leaf of the StructuredValue, // in a linearized order matching `tf.nest.flatten`. // - // If an error ocurred during the linearization process, an error message with + // If an error occurred during the linearization process, an error message with // `error_context` prepended will be included in the returned status. StatusOr> GetLeafIndexPaths( llvm::StringRef error_context) const; diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td index 2b325b42e23..48e4ef1c3be 100644 --- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td +++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td @@ -759,7 +759,7 @@ def HLO_UnaryEinsumOp: HLO_Op<"unary_einsum", [NoSideEffect]> { let hasCanonicalizer = 1; - // UnarayEinsumOp is unconditionally canonicalized to the binary EinsumOp so + // UnaryEinsumOp is unconditionally canonicalized to the binary EinsumOp so // the HLO converter shouldn't be invoked. let hasCustomHLOConverter = 1; } diff --git a/tensorflow/compiler/mlir/xla/transforms/canonicalize.td b/tensorflow/compiler/mlir/xla/transforms/canonicalize.td index 37f6d7deaa3..d510a3df994 100644 --- a/tensorflow/compiler/mlir/xla/transforms/canonicalize.td +++ b/tensorflow/compiler/mlir/xla/transforms/canonicalize.td @@ -38,7 +38,7 @@ def DynamicSliceToSlice: Pat<(HLO_DynamicSliceOp HLO_Tensor:$input, (BuildSliceLimits $starting_indices, $slice_sizes), (BuildSliceStrides $input))>; -def UnaryToBianryEinsumEq : NativeCodeCall< +def UnaryToBinaryEinsumEq : NativeCodeCall< "$_builder.getStringAttr(\",\" + $0.getValue().str())">; // Convert UnaryEinsumOp to EinsumOp with two operands with redundant first @@ -46,4 +46,4 @@ def UnaryToBianryEinsumEq : NativeCodeCall< def UnaryEinsumToEinsum : Pat< (HLO_UnaryEinsumOp $operand, $equation), (HLO_EinsumOp (HLO_ConstOp (GetScalarOfType<1> $operand)), - $operand, (UnaryToBianryEinsumEq $equation))>; + $operand, (UnaryToBinaryEinsumEq $equation))>; diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index 8cee5e23d64..3187ffa9a64 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -448,7 +448,7 @@ static DenseIntElementsAttr TFSliceSizes2HLOSliceSizes( // `element_types`, create two block arguments, one for lhs and one for rhs, and // generates xla_hlo.compare op to compare them with the given `direction`. // -// Note that this right now only does comparsion on the first pair of block +// Note that this right now only does comparision on the first pair of block // arguments. static void BuildSortComparisonBody(llvm::ArrayRef element_types, StringRef direction, Region *body, @@ -2149,7 +2149,7 @@ class ConvertTopKV2Op : public OpRewritePattern { // Converts tf.Unpack to a series of XLA HLO slice ops. // // Each slice takes one element along the dimension to unpack and takes the full -// range for all other dimenions. Each slice is then reshaped to drop the +// range for all other dimensions. Each slice is then reshaped to drop the // dimension to unpack (which is always of size 1). // TODO(antiagainst): consider changing this into a TF internal lowering pass. class ConvertUnpackOp : public OpRewritePattern { diff --git a/tensorflow/compiler/mlir/xla/transforms/lower_complex_patterns.td b/tensorflow/compiler/mlir/xla/transforms/lower_complex_patterns.td index 252a10fc412..d8a5ae6c6de 100644 --- a/tensorflow/compiler/mlir/xla/transforms/lower_complex_patterns.td +++ b/tensorflow/compiler/mlir/xla/transforms/lower_complex_patterns.td @@ -107,8 +107,8 @@ def : Pat<(HLO_AbsOp HLO_ComplexTensor:$val), (NullDenseIntElementsAttr))), (HLO_ConstOp (ConstantSplat<"0"> $real)))>; -// Expononetial can be lowered to an exponential on the real component and a -// sum of sinusoids of the imageinary component, which equates to a normal +// Exponential can be lowered to an exponential on the real component and a +// sum of sinusoids of the imaginary component, which equates to a normal // exponential operator multiplied by Euler's formula. // // Exp(a + ib) = Exp(a) * Exp(ib) = Exp(a) * (Cos(b) + iSin(b)) diff --git a/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h b/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h index 4107548a26b..11e3af7649b 100644 --- a/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h +++ b/tensorflow/compiler/mlir/xla/transforms/map_lhlo_to_scalar_op.h @@ -157,7 +157,7 @@ inline Operation* MapLhloOpToStdScalarOp( if (element_type.isa()) { Optional predicate = getIntCmpPredicate(lhlo_op.comparison_direction()); - assert(predicate.hasValue() && "expected valid comparision direction"); + assert(predicate.hasValue() && "expected valid comparison direction"); return b.create>(lhlo_op.getLoc(), predicate.getValue(), lhs, rhs); } diff --git a/tensorflow/compiler/tests/matrix_diag_ops_test.py b/tensorflow/compiler/tests/matrix_diag_ops_test.py index 69ae03a06cf..1ca9b157fa1 100644 --- a/tensorflow/compiler/tests/matrix_diag_ops_test.py +++ b/tensorflow/compiler/tests/matrix_diag_ops_test.py @@ -114,7 +114,7 @@ def square_cases(align=None): [6, 7, 8, 9, 1], [2, 3, 4, 5, 6]]]) tests = dict() - # tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals) + # tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals) tests[-1, -1] = (np.array([[6, 4, 1, 7], [5, 2, 8, 5]]), np.array([[[0, 0, 0, 0, 0], @@ -192,7 +192,7 @@ def tall_cases(align=None): [7, 8, 9], [9, 8, 7]]]) tests = dict() - # tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals) + # tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals) tests[0, 0] = (np.array([[1, 5, 9], [3, 2, 6]]), np.array([[[1, 0, 0], @@ -276,7 +276,7 @@ def fat_cases(align=None): [8, 9, 1, 2], [3, 4, 5, 6]]]) tests = dict() - # tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals) + # tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals) tests[0, 0] = (np.array([[1, 6, 2], [4, 9, 5]]), np.array([[[1, 0, 0, 0], diff --git a/tensorflow/compiler/tests/quantized_ops_test.py b/tensorflow/compiler/tests/quantized_ops_test.py index 9a1d29c0092..100be3b9aa5 100644 --- a/tensorflow/compiler/tests/quantized_ops_test.py +++ b/tensorflow/compiler/tests/quantized_ops_test.py @@ -49,7 +49,7 @@ class QuantizedOpsTest(xla_test.XLATestCase): self.assertAllEqual(value, expected) -class DeuantizedOpsTest(xla_test.XLATestCase): +class DequantizedOpsTest(xla_test.XLATestCase): def pack_uint8_r2_to_uint32(self, test_input): num_rows, num_columns = test_input.get_shape().as_list() diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc index 5a2bda93942..dfa5bc106ed 100644 --- a/tensorflow/compiler/tests/randomized_tests.cc +++ b/tensorflow/compiler/tests/randomized_tests.cc @@ -3423,7 +3423,7 @@ int main(int argc, char** argv) { tensorflow::Flag( "tf_xla_random_seed", &tensorflow::tf_xla_random_seed, "Random seed to use for XLA tests. <= 0 means choose a seed " - "nondetermistically."), + "nondeterministically."), // TODO(phawkins): it might make more sense to run each test up to a // configurable time bound. tensorflow::Flag("tf_xla_test_repetitions", diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc index 20804af5229..669d38757fa 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc @@ -161,7 +161,7 @@ Status GetEngineInfo(const Graph* g, const int node_id = node->id(); const string& node_name = node->name(); - // Create input connections. Sort edges first to make determnistic since + // Create input connections. Sort edges first to make deterministic since // in_edges is a set of pointers. std::vector in_edges(node->in_edges().begin(), node->in_edges().end()); @@ -186,7 +186,7 @@ Status GetEngineInfo(const Graph* g, // If it doesn't have any edges, TF will prune it out. // // Note that the segmenter already ensure that the constant data input - // is valid and suppported by the engine. + // is valid and supported by the engine. if (!added_const_nodes.insert(input_node).second) { // Already added before. continue; @@ -209,7 +209,7 @@ Status GetEngineInfo(const Graph* g, node_id, edge->dst_input(), /*input_edge=*/true, port); } } - // Create output connections. Sort edges first to make determnistic since + // Create output connections. Sort edges first to make deterministic since // out_edges is a set of pointers. std::vector out_edges(node->out_edges().begin(), node->out_edges().end()); diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 90c28e03d4d..de2b0e4826f 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2511,7 +2511,7 @@ Status ConvertStridedSliceHelper(OpConverterParams* params, return Status::OK(); } else if (pad_dims.size() == 1) { // Only one dim is modified but we have to have 2, mark a second dim which - // will have padding of 0. The dim we add is chosen to avoid an unecessary + // will have padding of 0. The dim we add is chosen to avoid an unnecessary // transpose. if (pad_dims[0] != 2) { pad_dims.push_back(2); diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h index eb51ec1b3f6..e0d95dc7528 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h @@ -141,9 +141,9 @@ Status ConvertSegmentToGraphDef( // Converts given subgraph to a TRT engine saved in 'engine'. Returns ok iff // 'builder' successfully build the engine. If the result is not ok, 'engine' // will be set to nullptr -// Once returned, 'builder' is not needed any more and can be safely detroyed. +// Once returned, 'builder' is not needed any more and can be safely destroyed. // -// - convert_successfully: indicates whether the converson to TensorRT network +// - convert_successfully: indicates whether the conversion to TensorRT network // is successful. This is different than successfully building the engine: // building can still fail afterwards. Status ConvertGraphDefToEngine( diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index ef03ab91714..738b848f959 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -521,7 +521,7 @@ TEST_F(ValidatorTest, ConvertToTensorOrWeights) { "Scalar input tensor is not supported since the first dimension " "is treated as batch dimension by TRT"); } - // Convert non-Const. We test the case where the non-batch dimemsion is + // Convert non-Const. We test the case where the non-batch dimension is // unknown as well, to make sure the validator allows that. for (const int32 non_batch_dim : {-1, 2}) { const int32 batch_size = 12; @@ -973,7 +973,7 @@ TEST_F(ConverterTest, GetWeightRange) { TEST_F(ConverterTest, ProvideQuantizationRange) { FakeITensor fake_tensor; - // Assymetric range + // Asymmetric range converter_->ProvideQuantizationRange(&fake_tensor, 0.0f, 6.0f); EXPECT_EQ(6.0f, quantization_ranges()[&fake_tensor]); converter_->ProvideQuantizationRange(&fake_tensor, 1.0f, 6.0f); diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc index f707cf75417..2d22c0a149f 100644 --- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc @@ -125,7 +125,7 @@ class TRTEngineOp : public AsyncOpKernel { // Verify that the input shapes are consistent and can be handled by this op. Status VerifyInputShapes(const std::vector& shapes); - // Return engine batch in cached_engne_batch_sizes_ which is closest to input + // Return engine batch in cached_engine_batch_sizes_ which is closest to input // batch. Status GetEngineInputShapes( const CacheType& cache, diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc index 7a9b9f65fd8..c868416d048 100644 --- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc +++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc @@ -112,7 +112,7 @@ TEST_F(TRTEngineResourceOpsTest, Basic) { EXPECT_TRUE( errors::IsNotFound(rm->Lookup(container, resource_name, &resource))); - // Create the resouce using an empty file with InitializeTRTResource. + // Create the resource using an empty file with InitializeTRTResource. Reset(); Env* env = Env::Default(); const string filename = io::JoinPath(testing::TmpDir(), "trt_engine_file"); diff --git a/tensorflow/compiler/tf2tensorrt/segment/segment.cc b/tensorflow/compiler/tf2tensorrt/segment/segment.cc index 6d3920874aa..4d9dd42a53a 100644 --- a/tensorflow/compiler/tf2tensorrt/segment/segment.cc +++ b/tensorflow/compiler/tf2tensorrt/segment/segment.cc @@ -466,7 +466,7 @@ Status SegmentGraph(const Graph* tf_graph, // grow from the output-side of the network towards the inputs. // // In general this is not guaranteed to produce a globally optimal - // segmentation. For exaample, consider graph with node {A, B, C, D} and edges + // segmentation. For example, consider graph with node {A, B, C, D} and edges // {A->B, A->C, B->D, C->D), where A, B, D are trt compatible but C is not, so // in theory we can choose to contract either A, B or B, D but not both, but // here it always choose to contract B, D. diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h b/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h index d2ea8ad38cf..06b39716490 100644 --- a/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h +++ b/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h @@ -34,7 +34,7 @@ namespace tensorrt { // TRTs pull model for calibration. When TRT implements a means for // a push calibration This class should be updated accordingly -// IInt8EntropyCalibrator2 is prefferred for TRT 5.1+. +// IInt8EntropyCalibrator2 is preferred for TRT 5.1+. #if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1) struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 { #else diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc index 0e614ca7ace..f9af5581a67 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.cc +++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc @@ -572,7 +572,7 @@ Status Conditional::ExtractBodies(Graph* graph) { if (visited.at(n->id())) continue; visited[n->id()] = true; - // Verify output edges and record control edges exitting scope. + // Verify output edges and record control edges exiting scope. for (const Edge* e : n->out_edges()) { Node* dst = e->dst(); if (IsMerge(dst)) continue; @@ -602,7 +602,7 @@ Status Conditional::ExtractBodies(Graph* graph) { } } - // Copying incomming edges to dst node. Iterate over a copy of the edges + // Copying incoming edges to dst node. Iterate over a copy of the edges // as they could be mutated during iteration. std::vector in_edges(n->in_edges().begin(), n->in_edges().end()); @@ -719,7 +719,7 @@ Status Conditional::ExtractBodies(Graph* graph) { ++index; // Connect the input to the merge_ with the retval, except if it is a - // Swich node, which is handled separately. + // Switch node, which is handled separately. for (auto e : m->in_edges()) { if (e->IsControlEdge()) continue; int branch_index = static_cast(find_branch(e)); @@ -1139,7 +1139,7 @@ StateMap::CondId FunctionalizeCond::StateAlongEdge(const Edge* e) { // node. If we don't record this into CondState, branches might have // incorrect CondState (e.g. if the branch only has a Const data node). // We set it to kNeither because there is no way to tell whether it's - // for true branch or false branch. This node's desendents might have + // for true branch or false branch. This node's descendents might have // other incoming edges with defined BranchType, and we correctly handle // merging kNeither with other defined BranchType in StateAlongEdge(). state[predicate] = BranchType::kNeither; diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.h b/tensorflow/compiler/tf2xla/functionalize_cond.h index d85800fb8ee..7940732a11d 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.h +++ b/tensorflow/compiler/tf2xla/functionalize_cond.h @@ -213,7 +213,7 @@ class FunctionalizeCond { // This populates the state_map_. Status DetermineStates(std::vector rev_topo_order); - // Determine the CondState for a given node using the incomming edges + // Determine the CondState for a given node using the incoming edges // to the node. Note: it is expected that this node's CondState is only // determined once its input's CondState is. Status DetermineCondState(Node* dst) { diff --git a/tensorflow/compiler/tf2xla/kernels/assert_op.cc b/tensorflow/compiler/tf2xla/kernels/assert_op.cc index 94543686b47..c40caa8fa10 100644 --- a/tensorflow/compiler/tf2xla/kernels/assert_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/assert_op.cc @@ -22,7 +22,7 @@ namespace tensorflow { namespace { -// This TensorFlow op supports the Assert primitve. +// This TensorFlow op supports the Assert primitive. class AssertOp : public XlaOpKernel { public: explicit AssertOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} diff --git a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc index 507bc8d7a3b..67d49eafcde 100644 --- a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc @@ -329,7 +329,7 @@ class MaxPoolGradOp : public XlaOpKernel { (padding_ == VALID) ? xla::Padding::kValid : xla::Padding::kSame; // Create a MaxPool operation to check the expected resulting shape, and - // then throw away the operation because we don't actually neeed it here. + // then throw away the operation because we don't actually need it here. TensorShape expected_out_shape; auto pooling = xla::MaxPool(ctx->Input(0), ksize_, stride_, xla_padding, diff --git a/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc index 0b5b66ae52f..7a8aec295a6 100644 --- a/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc @@ -37,7 +37,7 @@ class XlaConvOp : public XlaOpKernel { context, context->GetAttr("precision_config", &precision_config_attr)); OP_REQUIRES(context, precision_config_.ParsePartialFromString(precision_config_attr), - errors::InvalidArgument("Error parsing precison config.")); + errors::InvalidArgument("Error parsing precision config.")); } void Compile(XlaOpKernelContext* context) override { diff --git a/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc index a28ecd660ab..8e9ed35783f 100644 --- a/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc @@ -32,7 +32,7 @@ class XlaSvdOp : public XlaOpKernel { ctx->GetAttr("precision_config", &precision_config_attr)); OP_REQUIRES(ctx, precision_config_.ParsePartialFromString(precision_config_attr), - errors::InvalidArgument("Error parsing precison config.")); + errors::InvalidArgument("Error parsing precision config.")); if (precision_config_.operand_precision_size() == 0) { precision_config_.mutable_operand_precision()->Add( xla::PrecisionConfig::HIGHEST); diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc index dab051b39a8..33b740a706c 100644 --- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc +++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc @@ -84,7 +84,7 @@ lower: a boolean specifies whether the calculation is done with the lower max_iter: maximum number of sweep update, i.e., the whole lower triangular part or upper triangular part based on parameter lower. Heuristically, it has - been argued that approximatly logN sweeps are needed in practice (Ref: Golub & + been argued that approximately logN sweeps are needed in practice (Ref: Golub & van Loan "Matrix Computation"). epsilon: the tolerance ratio. @@ -116,7 +116,7 @@ a: the input tensor. max_iter: maximum number of sweep update, i.e., the whole lower triangular part or upper triangular part based on parameter lower. Heuristically, it has - been argued that approximatly log(min (M, N)) sweeps are needed in practice + been argued that approximately log(min (M, N)) sweeps are needed in practice (Ref: Golub & van Loan "Matrix Computation"). epsilon: the tolerance ratio. @@ -610,7 +610,7 @@ REGISTER_OP("XlaDequantize") .SetShapeFn(shape_inference::UnknownShape) .Doc(R"doc( Takes the packed uint32 input and unpacks the input to uint8 to do -Dequantization on deivce. +Dequantization on device. input: Input tensors whose types is uint32, shape is [d0, ..., dn]. output: Output tensors whose types is bloat16. If transpose_output is true, @@ -644,7 +644,7 @@ REGISTER_OP("XlaEinsum") .Doc(R"doc( An op which supports basic einsum op with 2 inputs and 1 output. -This op has better TPU performnce since it doesn't have explicitly reshape and +This op has better TPU performance since it doesn't have explicitly reshape and transpose operations as tf.einsum does. )doc"); diff --git a/tensorflow/compiler/tf2xla/shape_util.h b/tensorflow/compiler/tf2xla/shape_util.h index e775c4462c3..331cfa38c1d 100644 --- a/tensorflow/compiler/tf2xla/shape_util.h +++ b/tensorflow/compiler/tf2xla/shape_util.h @@ -51,7 +51,7 @@ xla::Shape TensorShapeToXLAShape(xla::PrimitiveType type, // In case the input shape is a tuple, the minor-to-major values will be in the // order of the tuple elements within the tuple shape. // If a shape (or a subshape of a tuple shape) has missing layout, a rank long -// sequence of -1 values will be emittted. +// sequence of -1 values will be emitted. xla::StatusOr> GetShapeLayoutVector(const xla::Shape& shape); // Given the input shape and a linearized sequence of the minor-to-major values diff --git a/tensorflow/compiler/tf2xla/tf2xla.proto b/tensorflow/compiler/tf2xla/tf2xla.proto index 3093a0b1d8d..557f5bc3470 100644 --- a/tensorflow/compiler/tf2xla/tf2xla.proto +++ b/tensorflow/compiler/tf2xla/tf2xla.proto @@ -52,7 +52,7 @@ message Variable { TensorShapeProto shape = 3; DataType type = 4; - // Flag for variables that are never assigned. Assigments to a read-only + // Flag for variables that are never assigned. Assignments to a read-only // variable or unassigned variables that are not read-only are invalid. bool readonly = 5; } diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h index 3e75cf7fa58..27b198f8bee 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.h +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h @@ -213,13 +213,13 @@ class XlaOpKernelContext { return dynamic_dimension_is_minus_one_; } - // Reads the current value of the resouce variable referred to by input + // Reads the current value of the resource variable referred to by input // `index`. If `shape` is not nullptr, sets `*shape` to the shape of the // variable. Returns an error if the variable has not been initialized, or if // its type does not match `type`. Status ReadVariableInput(int index, DataType type, TensorShape* shape, xla::XlaOp* value); - // Reads the current value of the resouce variable referred to by input + // Reads the current value of the resource variable referred to by input // `name`. Status ReadVariableInput(absl::string_view name, DataType type, TensorShape* shape, xla::XlaOp* value); diff --git a/tensorflow/compiler/xla/client/lib/comparators_test.cc b/tensorflow/compiler/xla/client/lib/comparators_test.cc index 598956803b3..d6e5d80b85f 100644 --- a/tensorflow/compiler/xla/client/lib/comparators_test.cc +++ b/tensorflow/compiler/xla/client/lib/comparators_test.cc @@ -73,7 +73,7 @@ void BuildComparatorAndComparisons(ComparatorsTest* test, } } - // Concantenate the comparison results. + // Concatenate the comparison results. ConcatInDim(test->builder(), all_comparisons, 0); // If we use less-than comparisons, we expect the comparison to result in true diff --git a/tensorflow/compiler/xla/client/lib/matrix.cc b/tensorflow/compiler/xla/client/lib/matrix.cc index d2275af5ca4..3f4a63c31be 100644 --- a/tensorflow/compiler/xla/client/lib/matrix.cc +++ b/tensorflow/compiler/xla/client/lib/matrix.cc @@ -316,7 +316,7 @@ Status ValidateEinsumNumericDimensions(absl::Span x_config, namespace { // Helper method to remove dimensions from a shape and dot dimension numbers -// used to implment implicit broadcasting. +// used to implement implicit broadcasting. template void DeleteDimsFromContainer(absl::Span to_delete, Shape* shape, C* batch_dims, C* contracting_dims) { @@ -473,7 +473,7 @@ xla::XlaOp Einsum(xla::XlaOp x, absl::Span x_config, xla::XlaOp y, transpose_dims[output_transpose_dims[i]] = i; } - // Remove ones that where broadcated from the x and the y shape and adjust + // Remove ones that where broadcasted from the x and the y shape and adjust // the dimension numbers that are more minor than those dimensions. DeleteDimsFromContainer(lhs_delete_dims, &x_shape, dnums.mutable_lhs_batch_dimensions(), diff --git a/tensorflow/compiler/xla/client/lib/matrix.h b/tensorflow/compiler/xla/client/lib/matrix.h index 6377704c58c..46f70ed27b9 100644 --- a/tensorflow/compiler/xla/client/lib/matrix.h +++ b/tensorflow/compiler/xla/client/lib/matrix.h @@ -132,7 +132,7 @@ xla::XlaOp Einsum( // the input. xla::XlaOp EinsumDiagonal(XlaOp x, absl::Span config); -// Same as above but supporting numeric labels on dimensins. So "ab,cb->ac" +// Same as above but supporting numeric labels on dimensions. So "ab,cb->ac" // becomes: // x_config = {0, 1} // y_config = {2, 1} diff --git a/tensorflow/compiler/xla/client/lib/pooling.cc b/tensorflow/compiler/xla/client/lib/pooling.cc index 1979c867a4c..45033ec07e7 100644 --- a/tensorflow/compiler/xla/client/lib/pooling.cc +++ b/tensorflow/compiler/xla/client/lib/pooling.cc @@ -39,7 +39,7 @@ XlaOp AvgPoolDivideByCountWithGeneralPadding( std::vector window_ksize(num_spatial_dims); std::vector window_stride(num_spatial_dims); CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims) - << "Invalid number of spatial dimentions in data format specification"; + << "Invalid number of spatial dimensions in data format specification"; for (int i = 0; i < num_spatial_dims; ++i) { int dim = data_format.spatial_dimension(i); input_dim_sizes[i] = input_shape[dim]; @@ -95,7 +95,7 @@ PaddingConfig MakeSpatialPaddingConfig( padding_config.add_dimensions(); } CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims) - << "Invalid number of spatial dimentions in data format specification"; + << "Invalid number of spatial dimensions in data format specification"; for (int i = 0; i < num_spatial_dims; ++i) { int dim = data_format.spatial_dimension(i); auto padding_dimension = padding_config.mutable_dimensions(dim); @@ -178,7 +178,7 @@ std::vector> MakeSpatialPadding( std::vector kernel_size_spatial_dimensions; std::vector stride_spatial_dimensions; CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims) - << "Invalid number of spatial dimentions in data format specification"; + << "Invalid number of spatial dimensions in data format specification"; for (int i = 0; i < num_spatial_dims; ++i) { int dim = data_format.spatial_dimension(i); input_spatial_dimensions.push_back(input_size[dim]); diff --git a/tensorflow/compiler/xla/client/lib/slicing.cc b/tensorflow/compiler/xla/client/lib/slicing.cc index b47ddb7919f..7d8f433bac8 100644 --- a/tensorflow/compiler/xla/client/lib/slicing.cc +++ b/tensorflow/compiler/xla/client/lib/slicing.cc @@ -154,29 +154,29 @@ XlaOp TorchGather(XlaOp input, XlaOp index, int64 dim, bool sparse) { return TorchIndexSelect(input, index, 0); } if (!sparse) { - std::vector index_broacast_dims; - std::vector input_broacast_dims; + std::vector index_broadcast_dims; + std::vector input_broadcast_dims; std::vector sizes; for (int64 i = 0; i < index_shape.rank(); ++i) { if (i < dim) { - input_broacast_dims.push_back(i); - index_broacast_dims.push_back(i); + input_broadcast_dims.push_back(i); + index_broadcast_dims.push_back(i); } else if (i == dim) { sizes.push_back(input_shape.dimensions(i)); - input_broacast_dims.push_back(i); - index_broacast_dims.push_back(i + 1); + input_broadcast_dims.push_back(i); + index_broadcast_dims.push_back(i + 1); } else { - input_broacast_dims.push_back(i + 1); - index_broacast_dims.push_back(i + 1); + input_broadcast_dims.push_back(i + 1); + index_broadcast_dims.push_back(i + 1); } sizes.push_back(index_shape.dimensions(i)); } auto mask = Eq( - BroadcastInDim(index, sizes, index_broacast_dims), + BroadcastInDim(index, sizes, index_broadcast_dims), Iota(builder, ShapeUtil::MakeShape(index_shape.element_type(), sizes), dim)); auto masked_input = Select( - mask, BroadcastInDim(input, sizes, input_broacast_dims), + mask, BroadcastInDim(input, sizes, input_broadcast_dims), Zeros(builder, ShapeUtil::MakeShape(input_shape.element_type(), sizes))); return Reduce(masked_input, Zero(builder, input_shape.element_type()), @@ -214,25 +214,25 @@ XlaOp TorchScatterDense(XlaOp input, XlaOp index, XlaOp src, int64 dim, return builder->ReportErrorOrReturn([&]() -> StatusOr { TF_ASSIGN_OR_RETURN(Shape index_shape, builder->GetShape(index)); TF_ASSIGN_OR_RETURN(Shape input_shape, builder->GetShape(input)); - std::vector index_broacast_dims; + std::vector index_broadcast_dims; std::vector sizes; for (int64 i = 0; i < index_shape.rank(); ++i) { if (i < dim) { - index_broacast_dims.push_back(i); + index_broadcast_dims.push_back(i); } else { if (i == dim) { sizes.push_back(input_shape.dimensions(i)); } - index_broacast_dims.push_back(i + 1); + index_broadcast_dims.push_back(i + 1); } sizes.push_back(index_shape.dimensions(i)); } auto mask = - Eq(BroadcastInDim(index, sizes, index_broacast_dims), + Eq(BroadcastInDim(index, sizes, index_broadcast_dims), Iota(builder, ShapeUtil::MakeShape(index_shape.element_type(), sizes), dim)); auto masked_src = - Select(mask, BroadcastInDim(src, sizes, index_broacast_dims), + Select(mask, BroadcastInDim(src, sizes, index_broadcast_dims), Zeros(builder, ShapeUtil::MakeShape(input_shape.element_type(), sizes))); diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc index 9f520bcdadf..5e177cd391e 100644 --- a/tensorflow/compiler/xla/client/lib/testing.cc +++ b/tensorflow/compiler/xla/client/lib/testing.cc @@ -98,7 +98,7 @@ std::vector> MakeFakeArgumentsOrDie( const XlaComputation& computation, Client* client, DebugOptions* debug_opts /*=nullptr*/) { CHECK(computation.proto().has_host_program_shape()) - << "Computation should have progran shape."; + << "Computation should have program shape."; auto program_shape = computation.proto().host_program_shape(); std::vector> results; diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index 153cb9f5212..97679d9b9ac 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -329,15 +329,15 @@ StatusOr LocalClient::ReplicaNumberToDeviceOrdinal(int replica_number) { } StatusOr LocalClient::TransferToLocalServer( - const ::xla::BorrowingLiteral& literal, int device_oridinal) { + const ::xla::BorrowingLiteral& literal, int device_ordinal) { const ::xla::Shape& shape = literal.shape(); TF_ASSIGN_OR_RETURN( ::xla::ScopedShapedBuffer shaped_buffer, backend().transfer_manager()->AllocateScopedShapedBuffer( - shape, backend().memory_allocator(), device_oridinal)); + shape, backend().memory_allocator(), device_ordinal)); TF_ASSIGN_OR_RETURN(auto stream, - mutable_backend()->BorrowStream(device_oridinal)); + mutable_backend()->BorrowStream(device_ordinal)); TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice( stream.get(), literal, shaped_buffer)); std::vector<::xla::ScopedShapedBuffer> replicated_buffer; diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index f5e66c6d586..221a911567c 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -122,7 +122,7 @@ class LocalClient : public Client { // Transfer the BorrowingLiteral to the device with the given ordinal. StatusOr TransferToLocalServer( - const ::xla::BorrowingLiteral& literal, int device_oridinal); + const ::xla::BorrowingLiteral& literal, int device_ordinal); // Copy the data from the device contained in the given ShapedBuffer and // return as a Literal. diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index 5e93bb2b3ba..ac86b78fded 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -232,7 +232,7 @@ class XlaBuilder { // added operation. // // `remove_dynamic_dimensions` tells the builder whether to remove the - // dyanmic dimensions information in all ops. + // dynamic dimensions information in all ops. // // TODO(b/121223198): Delete `remove_dynamic_dimensions` and keeps the // dynamic dimensions information when XLA backend can handle dynamic @@ -1194,7 +1194,7 @@ XlaOp Broadcast(XlaOp operand, absl::Span broadcast_sizes); // // For example, say operand = {1, 2}, i.e., a 1D tensor in shape s32[2]; the // output shape is s32[2,2]: -// - Specifying {1} as brodcast_dimension will generate output +// - Specifying {1} as broadcast_dimension will generate output // {{1, 2}, // {1, 2}} // - On the other hand, specifying {0} as broadcast_dimension @@ -1469,7 +1469,7 @@ XlaOp TriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower, // two minor dimensions equal. // If `lower` is true, the data from the lower triangle is used; if false, the // upper triangle is used. The input data in the other triangle of the input -// does not affect the output. Returns the output in the same lower/uppper +// does not affect the output. Returns the output in the same lower/upper // triangle. The data returned in the other output triangle is arbitrary and // implementation-defined. // diff --git a/tensorflow/compiler/xla/client/xla_builder_test.cc b/tensorflow/compiler/xla/client/xla_builder_test.cc index f76ea38e08e..fd227ea47f2 100644 --- a/tensorflow/compiler/xla/client/xla_builder_test.cc +++ b/tensorflow/compiler/xla/client/xla_builder_test.cc @@ -292,7 +292,7 @@ TEST_F(XlaBuilderTest, BinopHasInDimAndDegenerateBroadcast) { TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b)); // The binary operation has in-dim broadcast and degenerate broadcast, should - // first do the in-dim broadcast then convert the degnerate broadcast into a + // first do the in-dim broadcast then convert the degenerate broadcast into a // reshape and a broadcast. // // Expected: diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc index 064d8cd8d8b..16c83ab9b2c 100644 --- a/tensorflow/compiler/xla/debug_options_flags.cc +++ b/tensorflow/compiler/xla/debug_options_flags.cc @@ -328,7 +328,7 @@ static void AllocateFlags() { "use multi-threaded Eigen mode."), tensorflow::Flag("xla_gpu_cuda_data_dir", flag_values->mutable_xla_gpu_cuda_data_dir(), - "If non-empty, speficies a local directory containing " + "If non-empty, specifies a local directory containing " "ptxas and nvvm libdevice files; otherwise we use " "those from runfile directories."), tensorflow::Flag("xla_gpu_ftz", @@ -347,7 +347,7 @@ static void AllocateFlags() { flag_values->xla_gpu_max_kernel_unroll_factor(), "Specify the maximum kernel unroll factor for the GPU backend."), tensorflow::Flag("xla_gpu_ptx_file", setter_for_xla_gpu_ptx_file, "", - "If non-empty, speficies a file containing ptx to use. " + "If non-empty, specifies a file containing ptx to use. " "The filename prefix must have the same pattern as PTX " "dumped by XLA. This allows to match one specific " "module. General workflow. Get the generated module " diff --git a/tensorflow/compiler/xla/debug_options_flags.h b/tensorflow/compiler/xla/debug_options_flags.h index 1675b377edf..069e36dc52a 100644 --- a/tensorflow/compiler/xla/debug_options_flags.h +++ b/tensorflow/compiler/xla/debug_options_flags.h @@ -52,7 +52,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags(); // By default all passes have infinite fuel. You can restrict how much fuel a // pass has by specifying XLA_FLAGS=--xla_fuel=PASS1=NUM1,PASS2=NUM2,... // -// If a user specifes --xla_fuel=PASS=NUM but ConsumeFuel(PASS) is not called +// If a user specifies --xla_fuel=PASS=NUM but ConsumeFuel(PASS) is not called // before the program exits, we'll print a warning. // // We recommend as a convention you use a pass's name for the `pass` argument, @@ -91,7 +91,7 @@ bool ConsumeFuel(absl::string_view pass, // startup. // // You may call this function twice in the same thread to reset its fuel pool -// back to the intitial state. +// back to the initial state. void ResetThreadLocalFuel(); } // namespace xla diff --git a/tensorflow/compiler/xla/execution_options_util.h b/tensorflow/compiler/xla/execution_options_util.h index a8ca27ec8df..7bb817b8f1d 100644 --- a/tensorflow/compiler/xla/execution_options_util.h +++ b/tensorflow/compiler/xla/execution_options_util.h @@ -21,7 +21,7 @@ limitations under the License. namespace xla { // Create a default ExecutionOptions proto; this proto has its debug options -// popupated to the default values taken from flags. +// populated to the default values taken from flags. ExecutionOptions CreateDefaultExecutionOptions(); } // namespace xla diff --git a/tensorflow/compiler/xla/g3doc/operation_semantics.md b/tensorflow/compiler/xla/g3doc/operation_semantics.md index 8cf8022340a..ee7b2b20928 100644 --- a/tensorflow/compiler/xla/g3doc/operation_semantics.md +++ b/tensorflow/compiler/xla/g3doc/operation_semantics.md @@ -94,7 +94,7 @@ The participating cores can be configured by: in the same order of 1, 2, 3. Then, another AllToAll will be applied within replicas 4, 5, 0, and the concatenation order is also 4, 5, 0. If `replica_groups` is empty, all replicas belong to one group, in the - concatenation order of their appearence. + concatenation order of their appearance. Prerequisites: diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc index bbd640f6064..3d6310c1e17 100644 --- a/tensorflow/compiler/xla/literal.cc +++ b/tensorflow/compiler/xla/literal.cc @@ -248,7 +248,7 @@ Status MutableLiteralBase::CopySliceFromInternal( TF_RET_CHECK(src_base.size() == copy_size.size()); // Scan the source from minor, stepping in copy size blocks, then within - // the index enumaration functor, do a strided copy advancing source index + // the index enumeration functor, do a strided copy advancing source index // by one (walking through the minor dimension), and destination index by // proper stride size at the matching dimension. DimensionVector src_indexes(src_base.size(), 0); diff --git a/tensorflow/compiler/xla/literal.h b/tensorflow/compiler/xla/literal.h index 227717188ab..2d27f8eb7f6 100644 --- a/tensorflow/compiler/xla/literal.h +++ b/tensorflow/compiler/xla/literal.h @@ -810,7 +810,7 @@ class Literal : public MutableLiteralBase { Literal(const Shape& shape, bool allocate_arrays); Literal& operator=(Literal&& other); - // Similar to CopyFrom, but with move semantincs. The subshape of this literal + // Similar to CopyFrom, but with move semantics. The subshape of this literal // rooted at 'dest_shape_index' must be *equal* to the shape 'src_literal' // (layouts and shapes must match), but need not be arrays. The memory // allocated in this literal for the subshape at dest_shape_index is @@ -883,7 +883,7 @@ class BorrowingLiteral : public LiteralBase { BorrowingLiteral() : LiteralBase() {} // 'src_buf_ptr' is not owned by this class and must outlive the - // lifetime of this class. It points to an appropirately sized buffer with + // lifetime of this class. It points to an appropriately sized buffer with // data interpretered as indicated by 'shape'. // This constructor is only used for array shapes. BorrowingLiteral(const char* src_buf_ptr, const Shape& shape); diff --git a/tensorflow/compiler/xla/literal_comparison.cc b/tensorflow/compiler/xla/literal_comparison.cc index 662aeead14e..e1f52f72e5d 100644 --- a/tensorflow/compiler/xla/literal_comparison.cc +++ b/tensorflow/compiler/xla/literal_comparison.cc @@ -433,7 +433,7 @@ class NearComparator { } } else if (IsInf(expected) || IsInf(actual)) { // If either the expected or actual value is infinity but not both, - // then both absolute and relative error are regarded as inifity. + // then both absolute and relative error are regarded as infinity. CHECK(!CompareEqual(expected, actual, {linear_index})); abs_error = std::numeric_limits::infinity(); rel_error = std::numeric_limits::infinity(); diff --git a/tensorflow/compiler/xla/literal_test.cc b/tensorflow/compiler/xla/literal_test.cc index d1dd6b8fd77..9b17cb762c8 100644 --- a/tensorflow/compiler/xla/literal_test.cc +++ b/tensorflow/compiler/xla/literal_test.cc @@ -1134,7 +1134,7 @@ TEST_F(LiteralUtilTest, CopyFromDifferentShapes) { TEST_F(LiteralUtilTest, F16) { // Verify that the internal data views are consistent and that they // are in little endian format - // TODO - modify if we make the data format machine endianess dependent + // TODO - modify if we make the data format machine endianness dependent Literal m1 = Literal::CreateFromShape(ShapeUtil::MakeShape(F16, {2, 2})); const char* d1 = reinterpret_cast(m1.data().data()); EXPECT_EQ(d1[0], 0); diff --git a/tensorflow/compiler/xla/parse_flags_from_env.h b/tensorflow/compiler/xla/parse_flags_from_env.h index 76940a4299a..18d9788cde4 100644 --- a/tensorflow/compiler/xla/parse_flags_from_env.h +++ b/tensorflow/compiler/xla/parse_flags_from_env.h @@ -30,7 +30,7 @@ limitations under the License. // - in which case the effective value is the // string with the single-quotes removed -// - in which case the effective value if the // string with the double-quotes removed, and escaped sequences of // replaced by . diff --git a/tensorflow/compiler/xla/parse_flags_from_env_test.cc b/tensorflow/compiler/xla/parse_flags_from_env_test.cc index 3465552ebbf..32f27449b22 100644 --- a/tensorflow/compiler/xla/parse_flags_from_env_test.cc +++ b/tensorflow/compiler/xla/parse_flags_from_env_test.cc @@ -73,14 +73,14 @@ static const char kTestFlagString[] = "--single_quoted='single quoted \\\\ \n \"' " "--double_quoted=\"double quoted \\\\ \n '\\\"\" "; -// Test that the environent variable is parsed correctly. +// Test that the environment variable is parsed correctly. TEST(ParseFlagsFromEnv, Basic) { // Prepare environment. setenv("TF_XLA_FLAGS", kTestFlagString, true /*overwrite*/); TestParseFlagsFromEnv("(flags in environment variable)"); } -// Test that a file named by the environent variable is parsed correctly. +// Test that a file named by the environment variable is parsed correctly. TEST(ParseFlagsFromEnv, File) { // environment variables where tmp dir may be specified. static const char* kTempVars[] = {"TEST_TMPDIR", "TMP"}; @@ -154,7 +154,7 @@ int main(int argc, char* argv[]) { xla::int32 int_flag = 1; const std::vector flag_list = { tensorflow::Flag("recursing", &recursing, - "Whether the binary is being invoked recusively."), + "Whether the binary is being invoked recursively."), tensorflow::Flag("int_flag", &int_flag, "An integer flag to test with"), }; xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); diff --git a/tensorflow/compiler/xla/python/local_client.cc b/tensorflow/compiler/xla/python/local_client.cc index ef8ff4275a6..d0bb1eb8015 100644 --- a/tensorflow/compiler/xla/python/local_client.cc +++ b/tensorflow/compiler/xla/python/local_client.cc @@ -551,7 +551,7 @@ PyLocalBuffer::DestructureTuple() { absl::MutexLock lock(&mu_); if (!on_host_shape_.IsTuple()) { return InvalidArgument( - "Attemped to destructure a PyLocalBuffer that did not have a tuple " + "Attempted to destructure a PyLocalBuffer that did not have a tuple " "shape; shape: %s", ShapeUtil::HumanString(on_host_shape_)); } diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc index b9ca2a7e1a7..2b69239bb7a 100644 --- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc +++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc @@ -345,7 +345,7 @@ PyTpuBuffer::DestructureTuple() { tensorflow::profiler::TraceMe traceme("PyTpuBuffer::DestructureTuple"); if (!on_host_shape_.IsTuple()) { return InvalidArgument( - "Attemped to destructure a PyTpuBuffer that did not have a tuple " + "Attempted to destructure a PyTpuBuffer that did not have a tuple " "shape; shape: %s", ShapeUtil::HumanString(on_host_shape_)); } diff --git a/tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h b/tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h index 36b7fa0d801..dc28ad1f0b4 100644 --- a/tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h +++ b/tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h @@ -37,7 +37,7 @@ #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/platform/logging.h" -// This API is EXPERIMENTAL and under active developement. It is subject to +// This API is EXPERIMENTAL and under active development. It is subject to // change without notice. namespace tpu_driver { diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index a7e35a8a81f..a0ae04a9580 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -1526,7 +1526,7 @@ class ComputationBuilder(object): ConvWithGeneralPadding. feature_group_count: number of feature groups for grouped convolution. batch_group_count: number of batch groups for grouped convolution. - Returns: a XlaOp representing the ConvGenralDilated operation. + Returns: a XlaOp representing the ConvGeneralDilated operation. """ if dimension_numbers is None: dimension_numbers = self._GetConvDimensionNumbers(len(window_strides)) diff --git a/tensorflow/compiler/xla/python_api/types.py b/tensorflow/compiler/xla/python_api/types.py index 57dfce3971b..fffe5adab1d 100644 --- a/tensorflow/compiler/xla/python_api/types.py +++ b/tensorflow/compiler/xla/python_api/types.py @@ -25,10 +25,10 @@ import numpy as _np # Avoids becoming a part of public Tensorflow API. from tensorflow.compiler.xla import xla_data_pb2 from tensorflow.python.framework import dtypes -# Records corresponsence between a XLA primitive type and Python/Numpy types. +# Records correspondence between a XLA primitive type and Python/Numpy types. # # primitive_type: value of type xla_data_pb2.PrimitiveType -# numpy_dtype: corresponsing Numpy "dtype" (like np.float32) +# numpy_dtype: corresponding Numpy "dtype" (like np.float32) # literal_field_name: name of the field in the LiteralProto message elements # of this type go into. # literal_field_type: type of the field named 'literal_field_name'. diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 2fe8c309cb0..f145b447bef 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -673,7 +673,7 @@ Status AlgebraicSimplifierVisitor::HandleBitcast(HloInstruction* bitcast) { bitcast, HloInstruction::CreateBitcast(bitcast->shape(), op)); } // All bitcasts can be eliminated (assuming layout constraints are - // satisified). + // satisfied). ReplaceInstructionIfSameShape(bitcast, bitcast->mutable_operand(0)); return Status::OK(); } @@ -692,7 +692,7 @@ Status AlgebraicSimplifierVisitor::HandleCopy(HloInstruction* copy) { return ReplaceWithNewInstruction( copy, HloInstruction::CreateUnary(copy->shape(), HloOpcode::kCopy, op)); } - // All copies can be eliminated (assuming layout constraints are satisified). + // All copies can be eliminated (assuming layout constraints are satisfied). if (ReplaceInstructionIfSameShape(copy, copy->mutable_operand(0))) { return Status::OK(); } @@ -2735,7 +2735,7 @@ Status AlgebraicSimplifierVisitor::HandlePower(HloInstruction* power) { // Don't perform this optimization if either of the exponents is complex; this // identity is true only for real-valued exponents. In addition, we cowardly - // refuse to do this transformation if the two expontents have different + // refuse to do this transformation if the two exponents have different // element types. if (lhs->opcode() == HloOpcode::kPower && !ShapeUtil::ElementIsComplex(lhs->operand(1)->shape()) && diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 88282986560..f37ff5387ee 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -4756,7 +4756,7 @@ TEST_P(DotStrengthReductionTest, DotStrengthReduction) { const bool computation_should_be_modified = dot_should_be_transformed || (transpose_lhs && transpose_rhs); EXPECT_EQ(changed, computation_should_be_modified); - // The second pass of algebriac simplifer will remove dots without + // The second pass of algebraic simplifier will remove dots without // non-contracting dimensions or contracting dimensions. TF_ASSERT_OK_AND_ASSIGN(changed, simplifier.Run(module.get())); EXPECT_EQ(changed, computation_should_be_modified); diff --git a/tensorflow/compiler/xla/service/batchnorm_expander_test.cc b/tensorflow/compiler/xla/service/batchnorm_expander_test.cc index 34b516184fa..d7b0dc8b29d 100644 --- a/tensorflow/compiler/xla/service/batchnorm_expander_test.cc +++ b/tensorflow/compiler/xla/service/batchnorm_expander_test.cc @@ -38,7 +38,7 @@ namespace { class BatchNormExpanderTest : public HloTestBase { protected: - // BatchNorm should have a dynamic sized dividor for mean operations. + // BatchNorm should have a dynamic sized divider for mean operations. int64 CountGetDimensionSize(const HloModule& module) { int64 count = 0; for (HloComputation* comp : module.computations()) { diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index 0d96ffd4568..e7cb128b62b 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -1608,7 +1608,7 @@ StatusOr> BufferAssigner::CreateAssignment( /*is_thread_local=*/false, &buffers_to_assign_sequentially, assignment.get())); // Assign buffers with sequential ordering, if any. If all global - // computations are sequential, we can run heap simuation on the whole + // computations are sequential, we can run heap simulation on the whole // module, which reduces memory usage. const bool run_whole_module_heap_simulation = buffers_to_assign_sequentially.size() == global_computations.size(); diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc index e54ad852d44..912c98b5001 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc @@ -770,7 +770,7 @@ TEST_F(BufferAssignmentTest, PresetAssignments) { } TEST_F(BufferAssignmentTest, PresetAssignmentsWhile) { - // Tests preset assignments when there is no 1-to-1 corrspondance between + // Tests preset assignments when there is no 1-to-1 correspondence between // HloValue and HloBuffer (i.e., a while loop). auto module = CreateNewVerifiedModule(); Shape f32vec10_color1 = diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h index 11d8abc5bad..44cd7b5ebbd 100644 --- a/tensorflow/compiler/xla/service/buffer_value.h +++ b/tensorflow/compiler/xla/service/buffer_value.h @@ -160,7 +160,7 @@ class BufferValue { BufferValue(HloInstruction* instruction, const ShapeIndex& index, Id id); private: - // The definining instruction and index are not stored here; they can be found + // The defining instruction and index are not stored here; they can be found // in the LogicalBuffer and HloValue subclasses. This class exists only to // support migrations from TuplePointsToAnalysis to HloDataflowAnalysis, by // allowing abstract use of LogicalBuffer or HloValue. After those migrations diff --git a/tensorflow/compiler/xla/service/call_inliner.cc b/tensorflow/compiler/xla/service/call_inliner.cc index 1718b151e48..4f2436de4fa 100644 --- a/tensorflow/compiler/xla/service/call_inliner.cc +++ b/tensorflow/compiler/xla/service/call_inliner.cc @@ -27,7 +27,7 @@ namespace { // Traverses the callee computation, inlining cloned nodes into the caller // computation and connecting them to producers/consumers appropriately. -// When the traversal has completed, the provided call instruction is entriely +// When the traversal has completed, the provided call instruction is entirely // replaced in the caller's graph. class SubcomputationInsertionVisitor : public DfsHloVisitorWithDefault { public: diff --git a/tensorflow/compiler/xla/service/cholesky_expander.cc b/tensorflow/compiler/xla/service/cholesky_expander.cc index 74fc15a3eed..20576cdc52d 100644 --- a/tensorflow/compiler/xla/service/cholesky_expander.cc +++ b/tensorflow/compiler/xla/service/cholesky_expander.cc @@ -93,7 +93,7 @@ std::pair CholeskyUnblocked( Zeros(body_builder, ShapeUtil::MakeShape(a_shape.element_type(), matrix_dims)); // L * L.T, This matrix has of a lot of multiplying with zero - // (namely, L[:, j:] = 0) and redudant computation, but it is faster + // (namely, L[:, j:] = 0) and redundant computation, but it is faster // than slice. auto l_square = BatchDot(body_l, false, body_l, true, precision); diff --git a/tensorflow/compiler/xla/service/collective_ops_utils.h b/tensorflow/compiler/xla/service/collective_ops_utils.h index 2c5f2d64d1f..8b3c60f76de 100644 --- a/tensorflow/compiler/xla/service/collective_ops_utils.h +++ b/tensorflow/compiler/xla/service/collective_ops_utils.h @@ -32,7 +32,7 @@ namespace xla { enum class ReductionKind { SUM, PRODUCT, MIN, MAX }; -// Atempts to match computation to one of the possible cases in ReductionKind. +// Attempts to match computation to one of the possible cases in ReductionKind. absl::optional MatchReductionComputation( const HloComputation* computation); diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index a0248839fdd..b2e1231e315 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -47,7 +47,7 @@ namespace xla { // The following types are used for ahead of time compilation. // Contains the object file data created as a result of ahead-of-time -// compuation. +// computation. using ObjectFileData = std::vector; // Abstract superclass describing the result of an ahead-of-time compilation. diff --git a/tensorflow/compiler/xla/service/computation_placer.h b/tensorflow/compiler/xla/service/computation_placer.h index 8df2a50cf8f..03b65fd76a5 100644 --- a/tensorflow/compiler/xla/service/computation_placer.h +++ b/tensorflow/compiler/xla/service/computation_placer.h @@ -71,7 +71,7 @@ class ComputationPlacer { // Returns the device id assigned to the given replica and computation // instance for [replica_count x computation_count] setup. The returned device - // id must match the assignement from PlaceReplicatedComputation(). + // id must match the assignment from PlaceReplicatedComputation(). virtual StatusOr DeviceId(int replica, int computation, int replica_count, int computation_count); diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.cc b/tensorflow/compiler/xla/service/conditional_simplifier.cc index 86f6a9295e6..f60742a8c23 100644 --- a/tensorflow/compiler/xla/service/conditional_simplifier.cc +++ b/tensorflow/compiler/xla/service/conditional_simplifier.cc @@ -189,7 +189,7 @@ StatusOr TryRemoveUnusedConditionalOperands( } for (HloInstruction* user : param->users()) { // If the user is not a get tuple element, assume it is unsafe to remove - // elemnts from the tuple. + // elements from the tuple. if (user->opcode() != HloOpcode::kGetTupleElement) { return false; } diff --git a/tensorflow/compiler/xla/service/convolution_group_converter.cc b/tensorflow/compiler/xla/service/convolution_group_converter.cc index cfcf059ba5f..f942d6768df 100644 --- a/tensorflow/compiler/xla/service/convolution_group_converter.cc +++ b/tensorflow/compiler/xla/service/convolution_group_converter.cc @@ -393,7 +393,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { const int64 depthwise_multiplier = filter->shape().dimensions(kernel_output_feature_dim) / group_count; // Split the kernel output feature dimension into group count and - // depthwise mutlipler. + // depthwise mutilipler. for (int64 i = 0; i < filter->shape().rank(); ++i) { if (i == kernel_output_feature_dim) { new_filter_dimension.push_back(group_count); @@ -439,7 +439,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { new_dim->set_window_dilation(1); new_dim->set_base_dilation(1); - // Split the output feature dimension into and output featrue of group + // Split the output feature dimension into and output feature of group // count and depthwise multipler as an output spatial dimension. std::vector new_output_dimension; new_output_dimension.reserve(convolution->shape().rank() + 1); diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc index cde75d0c16c..88497e35214 100644 --- a/tensorflow/compiler/xla/service/copy_insertion_test.cc +++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc @@ -1177,7 +1177,7 @@ TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinct) { InsertCopies(module_.get()); - // The entry computation requires two copies to resolve the non-disinctness of + // The entry computation requires two copies to resolve the non-distinctness of // two init elements and the constant passed in as one of the init // elements. Either element can be copied for the distinctness issue. EXPECT_EQ(CountCopies(*module_->entry_computation()), 2); @@ -1996,7 +1996,7 @@ void BM_ParallelWhiles(int num_iters, int num_whiles) { tensorflow::testing::StopTiming(); // Each body receives of copy of two of the parameters (the corresponding - // elements in the body are modifed), and there is one copy in each body. + // elements in the body are modified), and there is one copy in each body. ASSERT_EQ(CountCopies(module), 3 * num_whiles); } } diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 1270cd7a1bc..6a331ba4f19 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -350,7 +350,7 @@ Status CpuCompiler::RunHloPassesAfterLayoutAssn( // duplicate or NOPs, so remove them with algebraic simplification and CSE. { auto& pass = pipeline.AddPass>( - "simplification after layout assignement"); + "simplification after layout assignment"); pass.AddInvariantChecker( /*layout_sensitive=*/true, /*allow_mixed_precision=*/false, diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index d19cf4fb015..206fbd36344 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -327,7 +327,7 @@ StatusOr CpuExecutable::ExecuteAsyncOnStream( // // Logically we want this lambda to capture `buffers` by move, ultimately our // functor needs to be wrapped in an std::function, and that requires its - // functor to be copyable. Thus we perpitrate the hack of capturing buffers + // functor to be copyable. Thus we perpetrate the hack of capturing buffers // "by shared pointer". // // We also need to change the types of some of the variables we capture: diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc index 95b8025f873..4e0715ea7af 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc @@ -28,7 +28,7 @@ namespace cpu { // We want to change the layout of constant arrays to be column major when all // of their users are dot operations that can be made faster with the flipped -// layout. To avoid going quadriatic over the # of instructions, we cache this +// layout. To avoid going quadratic over the # of instructions, we cache this // property in should_make_rhs_col_major -- it maps a constant to true if all of // the users of said constant are dot operations that can be sped up. This // cache is populated lazily as we encounter dot operations traversing the diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index a50c0dafba6..c03a4a0c9ad 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -84,7 +84,7 @@ enum class DotImplementationStrategy { // supported. kTiledLlvmIrGemv, - // The dot operation is lowered into LLVM IR that implemetns a tiled + // The dot operation is lowered into LLVM IR that implements a tiled // Matrix*Matrix operation. No fusions are supported. The two inputs // and the output have to be row major. kTiledLlvmIrGemm, diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter_internal.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter_internal.h index cc28918ed60..0c75eaec858 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter_internal.h +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter_internal.h @@ -63,7 +63,7 @@ enum class DotImplementationStrategy { // supported. kTiledLlvmIrGemv, - // The dot operation is lowered into LLVM IR that implemetns a tiled + // The dot operation is lowered into LLVM IR that implements a tiled // Matrix*Matrix operation. No fusions are supported. The two inputs // and the output have to be row major. kTiledLlvmIrGemm, diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index cf167a57087..394d1fc979d 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1159,7 +1159,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) { /*instruction=*/*convolution, /*operands=*/{lhs, rhs}, /*supported_types=*/{F16, F32, F64, C64, C128})); - // TODO(tonywy): Add PotentiallyImplementedAsMKLCovolution to support + // TODO(tonywy): Add PotentiallyImplementedAsMKLConvolution to support // different data layouts. if (PotentiallyImplementedAsEigenConvolution(*convolution, target_machine_features_)) { diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 453676bd7c7..95458ba05a4 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -294,7 +294,7 @@ class IrEmitter : public DfsHloVisitorWithDefault, absl::string_view name); // Emits a call to a "global" function (e.g. to the computation nested within - // a kWhile or a kCall). Buffer assignment unabiguously assignes buffers to + // a kWhile or a kCall). Buffer assignment unabiguously assigns buffers to // the parameters and return values for these computations so there is no need // to explicitly pass parameters or return results. void EmitGlobalCall(const HloComputation& callee, absl::string_view name); @@ -366,7 +366,7 @@ class IrEmitter : public DfsHloVisitorWithDefault, // without generating IR with illegal (e.g. excessively large or // non-power-of-two) vector types. We do this by introducing a layer of // abstraction: we introduce a high level vector-like concept called a - // "sharded vector" that models data paralleism, and is mapped to a sequence + // "sharded vector" that models data parallelism, and is mapped to a sequence // scalar and vector llvm::Value s. // // For example, we can represent 29 f32 elements by a sharded vector mapped to diff --git a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc index 0b4e3ecd99b..a42dcccf381 100644 --- a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc @@ -185,7 +185,7 @@ llvm::Value* GenerateVF32Exp(llvm::IRBuilder<>* b, llvm::Value* input, // value of n clamped to [-127, 127]. In the case where n' = 127, `a` can grow // up to as large as 88.8 - 127 * log(2) which is about 0.7703. Even though // this value of `a` is outside our previously specified range, e^a will still - // only have a relative error of approximetely 2^-16 at worse. In practice + // only have a relative error of approximately 2^-16 at worse. In practice // this seems to work well enough; it passes our exhaustive tests, breaking // only one result, and by one ulp (we return exp(88.7228394) = max-float but // we should return inf). diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc index 4a8d963bedf..14afe770ede 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc @@ -193,7 +193,7 @@ bool ParallelTaskAssigner::AssignParallelTasksHelper( computation->instructions().end()); for (auto* instruction : instructions) { // Assign parallel tasks to sub-computations for While and Call HLOs. - // TODO(b/27458679) Evaluate alternative intra-op parallelsim placement, + // TODO(b/27458679) Evaluate alternative intra-op parallelism placement, // and support other callable computations like reduce. if (instruction->opcode() == HloOpcode::kWhile) { changed |= AssignParallelTasksHelper(module, instruction->while_body(), diff --git a/tensorflow/compiler/xla/service/cpu/runtime_fork_join.cc b/tensorflow/compiler/xla/service/cpu/runtime_fork_join.cc index 6f72ddadf94..bf1a1e016af 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_fork_join.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_fork_join.cc @@ -33,7 +33,7 @@ using ComputeFunctionType = void (*)(void*, const void*, const void**, void**, // Dispatches 'num_partitions - 1' calls to 'function_ptr' in parallel. // Calls 'function_ptr' for first partition inline. -// Uses blocking counter to synchonize threads after parallel calls complete. +// Uses blocking counter to synchronize threads after parallel calls complete. // // The 'partitions' array has a total number of elements equal to // 'num_partitions * num_partitioned_dims * 2' (the '2' is necessary to specify diff --git a/tensorflow/compiler/xla/service/cpu/shape_partition.cc b/tensorflow/compiler/xla/service/cpu/shape_partition.cc index d12c5396148..e95afbbb131 100644 --- a/tensorflow/compiler/xla/service/cpu/shape_partition.cc +++ b/tensorflow/compiler/xla/service/cpu/shape_partition.cc @@ -146,7 +146,7 @@ std::vector> ShapePartitionIterator::GetPartition( partition[i].second = dimension_partition_sizes_[i]; } CHECK_GT(partition[i].second, 0); - // Update index to remove conribution from current dimension. + // Update index to remove contribution from current dimension. index -= partition_index * dimension_partition_strides_[i]; } return partition; diff --git a/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc b/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc index 7668f364bad..c4626462b66 100644 --- a/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc @@ -173,7 +173,7 @@ class GemvConfig { // | C | D | // +----------------------+---+ // -// where A is the largest submatrix of the LHS that can be evenly dividied into +// where A is the largest submatrix of the LHS that can be evenly divided into // tiles. For each tile in A, assuming tile_rows_ == tile_cols_ == 4, we have: // // +---+---+---+---+ +--+--+--+--+ @@ -212,7 +212,7 @@ class GemvConfig { // Where R is the starting row for the tile. // // We have an inner epilogue loop to deal with the "C" submatrix and an outer -// epilogue loop to deal with the B,D submarix. +// epilogue loop to deal with the B,D submatrix. // // TODO(sanjoy): We should investigate if using gather loads and scatter stores // can be used here have the same inner loop for both column-major and row-major @@ -410,7 +410,7 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( // | C | D | // +----------------------+---+ // -// where A is the largest submatrix of the LHS that can be evenly dividied into +// where A is the largest submatrix of the LHS that can be evenly divided into // tiles. For each tile in A, assuming tile_rows_ == tile_cols_ == 4, we have: // // +---+---+---+---+ diff --git a/tensorflow/compiler/xla/service/dump.cc b/tensorflow/compiler/xla/service/dump.cc index beea561dad6..85884d4af68 100644 --- a/tensorflow/compiler/xla/service/dump.cc +++ b/tensorflow/compiler/xla/service/dump.cc @@ -49,7 +49,7 @@ struct CanonicalDebugOptions { // function we treat this struct's members as write-only, and read only from // `opts`. - // Did the user specifiy an explicit format for dumping? + // Did the user specify an explicit format for dumping? bool output_format_other_than_url_specified = opts.xla_dump_hlo_as_text() || opts.xla_dump_hlo_as_proto() || opts.xla_dump_hlo_as_dot() || opts.xla_dump_hlo_as_html() || diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc index 14ea6f988cb..2079130714a 100644 --- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc +++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc @@ -298,7 +298,7 @@ Status DynamicDimensionInferenceVisitor::HandleDot(HloInstruction* hlo) { // A. batch dims // B. contracting dims // C. non-batch non-contracting dims. - // The output dimemsions of a dot has three parts with the following + // The output dimensions of a dot has three parts with the following // order: // [(type A), (lhs type C), (rhs type C)] // @@ -317,7 +317,7 @@ Status DynamicDimensionInferenceVisitor::HandleDot(HloInstruction* hlo) { bool lhs = operand_index == 0; // The first loop keep tracks of batch dimension. RHS and LHS could have - // diffrent batch dimension numbers. + // different batch dimension numbers. if (lhs) { for (int64 i : dimension_numbers.lhs_batch_dimensions()) { result_dim_mapping[i] = current_result_dims++; @@ -1039,13 +1039,13 @@ Status DynamicDimensionInferenceVisitor::HandleGather(HloInstruction* hlo) { if (operand_index != 1) { if (hlo->gather_slice_sizes()[input_dynamic_dimension] == 1) { // Gathering a size 1 dimension out of a dynamic dimension removes - // the dynamisity. + // the dynamicity. return Status::OK(); } if (hlo->gather_slice_sizes()[input_dynamic_dimension] == operand->shape().dimensions(input_dynamic_dimension)) { // Gathering a full-sized dimension out of a dynamic dimension - // propagates the dynamisity to output. + // propagates the dynamicity to output. int64 output_dimension = input_dynamic_dimension; for (int64 collapsed_dim : gather_dims.collapsed_slice_dims()) { if (collapsed_dim < input_dynamic_dimension) { diff --git a/tensorflow/compiler/xla/service/dynamic_padder.cc b/tensorflow/compiler/xla/service/dynamic_padder.cc index c94a2594f3b..21b0a98af4b 100644 --- a/tensorflow/compiler/xla/service/dynamic_padder.cc +++ b/tensorflow/compiler/xla/service/dynamic_padder.cc @@ -169,7 +169,7 @@ HloInstruction* PadWithScalar(HloInstruction* inst, int64 dim, return padded; } -// In a reshape if a dynamci dimension is splitted into multiple output +// In a reshape if a dynamic dimension is splitted into multiple output // dimensions, we need to rewrite the input of the reshape. // // The reason for this is that a continuous input may not be evenly reshaped @@ -290,7 +290,7 @@ Status RewriteDynamicReshapeSplitInput( // Step 4. Sort iota. // Use binary mark to sort iota mask, then use iota mask to reshape input. - HloComputation::Builder comp_builder("compare_bianry_iota"); + HloComputation::Builder comp_builder("compare_binary_iota"); { HloInstruction* lhs_key = comp_builder.AddInstruction(HloInstruction::CreateParameter( @@ -322,7 +322,7 @@ Status RewriteDynamicReshapeSplitInput( mask_input_shape, sorted_binary_iota, 1)); // Step 5. Sort original input using iota mask as key. - HloComputation::Builder comp_builder_iota("compare_bianry_iota"); + HloComputation::Builder comp_builder_iota("compare_binary_iota"); { HloInstruction* lhs_key = comp_builder_iota.AddInstruction(HloInstruction::CreateParameter( diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 199e14a2164..66801d28f16 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -691,7 +691,7 @@ StatusOr ElementalIrEmitter::EmitComplexUnaryOp( llvm::Value* imag_numerator = FMul(four, FMul(cos_b, sin_b)); // Expm1(x) is about x for small values of x, but exp_sum_m2 is about x^2 - // for small value of x. As a result, due to floating point precission + // for small value of x. As a result, due to floating point precision // issues, x^2 is a better approximation than Expm1(x) + Expm1(x) for // small values of x. llvm::Value* a_sqr = FMul(a, a); @@ -1376,7 +1376,7 @@ StatusOr ElementalIrEmitter::EmitExpm1(PrimitiveType prim_type, auto for_small_x = FAdd(x, x_squared_over_two); // At this point, the relative errors due to floating point precision loss of // calculating exp(x) - 1 and the polynomial exp(x)-1 = x + x^2/2 are about - // equal, with a value of approximetely 2^-16. + // equal, with a value of approximately 2^-16. const auto kExponentIsSmallThreshold = 0.009; auto abs_x = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::fabs, {value}, {type}, b_); diff --git a/tensorflow/compiler/xla/service/gpu/backend_configs.proto b/tensorflow/compiler/xla/service/gpu/backend_configs.proto index 602e61ac0e8..0724a83180e 100644 --- a/tensorflow/compiler/xla/service/gpu/backend_configs.proto +++ b/tensorflow/compiler/xla/service/gpu/backend_configs.proto @@ -6,7 +6,7 @@ import "tensorflow/compiler/xla/xla_data.proto"; // Backend configs for XLA:GPU. // -// These are metadata that the GPU backend attaches to HloInstrucitons and later +// These are metadata that the GPU backend attaches to HloInstructions and later // uses during e.g. codegen. // // Remember that proto3 doesn't give clients a way to tell the difference diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc index a8528de96f5..b00dba3e9da 100755 --- a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.cc @@ -193,7 +193,7 @@ Status Visitor::HandleBatchNormTraining(HloInstruction* batch_norm) { if (is_batchnorm_with_fp16_inputs) { new_gte = AddConvert(new_gte, F32); } - // Repackage the results. Athough this tuple is redundant when convert is not + // Repackage the results. Although this tuple is redundant when convert is not // inserted, TupleSimplifier eliminates the Tuple eventually std::unique_ptr replacing_tuple = HloInstruction::CreateTuple( {new_gte, @@ -282,7 +282,7 @@ Status Visitor::HandleBatchNormGrad(HloInstruction* batch_norm) { if (is_batchnorm_with_fp16_inputs) { new_gte = AddConvert(new_gte, F32); } - // Repackage the results. Athough this tuple is redundant when convert is not + // Repackage the results. Although this tuple is redundant when convert is not // inserted, TupleSimplifier eliminates the Tuple eventually std::unique_ptr replacing_tuple = HloInstruction::CreateTuple( {new_gte, diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc index 4a4198f2fc9..f3fdc6b04e6 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc @@ -72,7 +72,7 @@ void CheckInputOutputPrimitivetypeAreValid(const HloInstruction* hlo) { // The last operand is the feature index which must be int64. CHECK_EQ(hlo->operand(num_operands - 1)->shape().element_type(), S64) - << "Not yet impelemented"; + << "Not yet implemented"; // Check Outputs. if (hlo->shape().IsTuple()) { diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_pad_for_convolutions.cc b/tensorflow/compiler/xla/service/gpu/cudnn_pad_for_convolutions.cc index 17c02b64db5..6a5eb226be0 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_pad_for_convolutions.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_pad_for_convolutions.cc @@ -143,7 +143,7 @@ static std::vector GetRelevantConvs( // instruction to cuDNN convolution that may need padding to figure out the // desired padded input and output tensor shapes and store the desired // shapes in new_input_shapes and new_input_shapes. Notice that -// new_input_shapes is a vector for multiple input tesnsors. This function +// new_input_shapes is a vector for multiple input tensors. This function // shall return true, if padding is necessary or false otherwise in addition to // status. static StatusOr ResolveAndPad( @@ -175,7 +175,7 @@ static StatusOr ResolveAndPad( // Don't run this pass on GPUs without tensor cores -- it will make them slower! // // TODO(jlebar): Also pad dots. -static StatusOr TryResolvePadedShapesForTensorCore( +static StatusOr TryResolvePaddedShapesForTensorCore( HloCustomCallInstruction* conv, std::vector* new_input_shapes_ptr, Shape* new_result_shape_ptr) { TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv)); @@ -278,7 +278,7 @@ static StatusOr TryResolvePadedShapesForTensorCore( // Adds padding to cudnn integer convolutions to make input and output feature // maps multiple of 4 -static StatusOr TryResolvePadedShapesForIntegerConvolution( +static StatusOr TryResolvePaddedShapesForIntegerConvolution( HloCustomCallInstruction* conv, std::vector* new_input_shapes_ptr, Shape* new_result_shape_ptr) { TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv)); @@ -390,14 +390,14 @@ StatusOr CudnnPadForConvolutions::Run(HloModule* module) { for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) { TF_ASSIGN_OR_RETURN( bool local_changed, - ResolveAndPad(conv, TryResolvePadedShapesForIntegerConvolution)); + ResolveAndPad(conv, TryResolvePaddedShapesForIntegerConvolution)); changed |= local_changed; } for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) { if (is_volta_or_later_) { TF_ASSIGN_OR_RETURN( bool local_changed, - ResolveAndPad(conv, TryResolvePadedShapesForTensorCore)); + ResolveAndPad(conv, TryResolvePaddedShapesForTensorCore)); changed |= local_changed; } } diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc index f707a87d79e..b4ccf758e94 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc @@ -215,7 +215,7 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { // would occur if 'fusion' were merged into multiple users. // // If 'fusion' has just one user, then an earlier fusion pass chose not to - // fuse this producer/comsumer pair (likely because of expensive instruction + // fuse this producer/consumer pair (likely because of expensive instruction // re-use by the consumer), and so we honor that choice here as well. if (absl::c_any_of(fusion->fused_instructions(), [](const HloInstruction* instruction) { @@ -230,7 +230,7 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { // Skip 'fusion' instruction if merging it into all users would result in a // net increase in bytes transferred (currently allowing the net bytes - // transferred to be exceeded up to ~10% in exhange for eliminating the + // transferred to be exceeded up to ~10% in exchange for eliminating the // overhead from a GPU kernel launch). const double current_bytes_transferred = GetCurrentBytesTransferred(fusion); const double merged_bytes_transferred = GetMergedBytesTransferred(fusion); diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc index 50ed7448790..47fd9bbfb09 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc @@ -109,9 +109,9 @@ ENTRY MergeSharedFusionInstruction.Computation0 { // This is because the bytes read by Fusion2 (when replicated if the instruction // is merged into Fusion0 and Fusion1) would exceed the bytes transferred // threshold. -TEST_F(FusionMergerTest, BytesTransferredThresholdExeceeded) { +TEST_F(FusionMergerTest, BytesTransferredThresholdExceeded) { auto module = ParseAndReturnVerifiedModule(R"( -HloModule BytesTransferredThresholdExeceeded +HloModule BytesTransferredThresholdExceeded comp.2 { state.param_1.1 = (f32[4]{0}, f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0) @@ -138,7 +138,7 @@ comp { ROOT add.5 = f32[4]{0} add(multiply.2, constant.param_1.1) } -ENTRY BytesTransferredThresholdExeceeded.Computation2 { +ENTRY BytesTransferredThresholdExceeded.Computation2 { constant = f32[4]{0} constant({1, 1, 1, 1}) state = (f32[4]{0}, f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0) fusion.2 = f32[4]{0} fusion(state), kind=kLoop, calls=comp.2 @@ -157,9 +157,9 @@ ENTRY BytesTransferredThresholdExeceeded.Computation2 { // Fusion2 is merged into Fusion0 and Fusion1, because bytes read from Param by // Fusion2 is reduced for this test which makes the merge operation into its // operand below the bytes transferred threshold. -TEST_F(FusionMergerTest, BytesTransferredThresholdNotExeceeded) { +TEST_F(FusionMergerTest, BytesTransferredThresholdNotExceeded) { auto module = ParseAndReturnVerifiedModule(R"( -HloModule BytesTransferredThresholdNotExeceeded +HloModule BytesTransferredThresholdNotExceeded comp.2 { state.param_1.1 = (f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0) @@ -184,7 +184,7 @@ comp { ROOT add.4 = f32[4]{0} add(multiply.2, constant.param_1.1) } -ENTRY BytesTransferredThresholdNotExeceeded.Computation2 { +ENTRY BytesTransferredThresholdNotExceeded.Computation2 { constant = f32[4]{0} constant({1, 1, 1, 1}) state = (f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0) fusion.2 = f32[4]{0} fusion(state), kind=kLoop, calls=comp.2 diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc index bf175999c55..71a86207987 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc @@ -294,7 +294,7 @@ StatusOr GpuConvAlgorithmPicker::PickBestAlgorithm( allocator->GetStream(stream_exec_->device_ordinal())); StatusOr result_or(InternalError("Unknown platform.")); // Check StreamExecutor on which platform it is. ROCm and Cuda implementation - // have diverged. Secifically, we need to make sure redzone allocator related + // have diverged. Specifically, we need to make sure redzone allocator related // utilities are not used in ROCm routine if (stream_exec_->platform_kind() == se::PlatformKind::kROCm) { result_or = PickBestAlgorithmNoCacheRocm(instr, allocator, stream); @@ -349,7 +349,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda( optional comparator; // Use the first algorithm that's supported as reference. There isn't a - // particular reason to use it, as any algorithm sufficies. It doesn't make + // particular reason to use it, as any algorithm suffices. It doesn't make // this algorithm considered correct, though. se::DeviceMemoryBase reference_result_buffer; AlgorithmDesc first_algorithm; @@ -462,7 +462,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda( << instr->ToString() << ": " << compare_result.status(); if (compare_result.status().code() == tensorflow::error::RESOURCE_EXHAUSTED) { - // Possibly OOM. Propatate the error. + // Possibly OOM. Propagate the error. return compare_result.status(); } CHECK(!crash_on_checking_failure); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_padding_legalization.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_padding_legalization.cc index da090f2e5e9..5fa102ac785 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_conv_padding_legalization.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_padding_legalization.cc @@ -213,7 +213,7 @@ bool GpuConvPaddingLegalization::CanonicalizeBackwardFilterConvolution( // BackwardFilterConv(ABCD, xyz, padding_low=1, padding_high=2) // is equivalent to // ABCD0 = Pad(ABCD, padding_high=1) - // BackwardFilterConv(ABCD0, xyz, padding_low=pading_high=1) + // BackwardFilterConv(ABCD0, xyz, padding_low=padding_high=1) // We choose the lesser of padding_low and padding_high as the new padding. HloInstruction* input = backward_conv->mutable_operand(0); Window new_backward_conv_window = backward_conv->window(); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h index f12d47980f3..41825a33174 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h @@ -53,7 +53,7 @@ class GpuDebugInfoManager { // Register an active module to GpuDebugInfoManager. We will keep track all // existing HloModules within the process. - // Modules with same module id can be registered and tracked seperately. + // Modules with same module id can be registered and tracked separately. void RegisterModule( const ModuleIdentifier& module_id, std::shared_ptr hlo_module, std::shared_ptr buffer_assignment); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 93af1cd995e..a879e6faf32 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -121,8 +121,8 @@ Status GpuExecutable::CheckCompatibilityWithServiceExecutableRunOptions( main_stream->parent()->GetDeviceDescription().cuda_compute_capability( &stream_compute_compatibility.first, &stream_compute_compatibility.second); - GpuVersion nvdia_compute_compatibility = stream_compute_compatibility; - TF_RET_CHECK(nvdia_compute_compatibility == gpu_version_) + GpuVersion nvidia_compute_compatibility = stream_compute_compatibility; + TF_RET_CHECK(nvidia_compute_compatibility == gpu_version_) << "Compute capability mismatch; expected {" << absl::get>(gpu_version_).first << ", " << absl::get>(gpu_version_).second << "}, but was {" diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h index 51e86a9f8ee..ca1d11b7b7d 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h @@ -118,7 +118,7 @@ class GpuExecutable : public Executable { // Computes annotations for each thunk and store them in thunk_annotations_. void ComputeThunkAnnotations(); - // GpuExecutable check with either AMD's ISA version, or Nvdia's major minor + // GpuExecutable check with either AMD's ISA version, or Nvidia's major minor // version for compute capability, depending on the hardware. Status CheckCompatibilityWithServiceExecutableRunOptions( const ServiceExecutableRunOptions* run_options); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_fusible.cc b/tensorflow/compiler/xla/service/gpu/gpu_fusible.cc index 24738683a19..86faa807cb7 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_fusible.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_fusible.cc @@ -138,7 +138,7 @@ bool ShapesCompatibleForMultiOutputFusion(const HloInstruction& instr1, }; // Multi-output fusion kernels share a common parallel loop. The loop - // dimenstions are determined by instruction shapes. + // dimensions are determined by instruction shapes. auto get_loop_shape = [&](const HloInstruction* element_instr) { // Special-case reduction-to-vector ops: The loop dimensions are determined // by the shape of the first operand. diff --git a/tensorflow/compiler/xla/service/gpu/gpu_fusible.h b/tensorflow/compiler/xla/service/gpu/gpu_fusible.h index 145975e6f49..9d5246c3600 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_fusible.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_fusible.h @@ -37,7 +37,7 @@ bool IsLoopFusible(const HloInstruction& instr); // The code emitted for reduce-rooted input fusions (EmitReductionToVector) // suffers from poor data locality if the layouts of input parameters differ. In -// such situtations it is better not to fuse. Only input params with +// such situations it is better not to fuse. Only input params with // maximum rank are considered. Params with smaller ranks will be broadcasted // and have not been observed to cause data locality issues. // TODO(b/111977086): Improve reduce emitters to remove this limitation. diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc index 75c9d93c63b..f4644c1765b 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc @@ -87,7 +87,7 @@ HeuristicLayoutAssignment(const HloInstruction* instr, // We could have used a mixed layout combination, e.g. (NHWC, NCHW, NCHW), // which on paper gives good performance. However, there are two observations: // * a mixed layout combination is more cuDNN-bug prone, based on empirical - // envidence. + // evidence. // * we've also observed that for mixed layouts, cuDNN transposes data back // and forth from a different layout combination. If we end up with // transposes anyway, we prefer to have them in XLA, as they can be fused. diff --git a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc index 11a829a12b4..05fa798dc39 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc @@ -165,7 +165,7 @@ Status GpuTransferManager::TransferLiteralFromOutfeed( absl::make_unique(literal, index)); }); - // Give the tree of buffers to the outfeed mananger. The device will fill it + // Give the tree of buffers to the outfeed manager. The device will fill it // while we're waiting for it below. gpu::OutfeedManager* outfeed_manager = gpu::GetOrCreateOutfeedManager(); outfeed_manager->EnqueueDestination(&outfeed_buffers); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 2f8fd5e01cf..e5d5e540309 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -250,7 +250,7 @@ int ComputeMaxUnrollFactor(const HloInstruction* hlo) { // Otherwise, the return type is i64. llvm::Type* GetIndexTypeForKernel(const HloInstruction* hlo, int64 launch_size, llvm::IRBuilder<>* b) { - // Find the unnested hlo instructon for which the kernel is generated for. + // Find the unnested hlo instruction for which the kernel is generated for. const HloInstruction* unnested_hlo = hlo; const HloComputation* computation = hlo->parent(); if (computation->IsFusionComputation()) { diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index 8df348bc5c0..fb64da6b43e 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -45,7 +45,7 @@ namespace gpu { // Examples of things that are not unnested computations: // // - The reducer of a kReduce HLO. This is emitted using IrEmitterNested. -// - The body of a fusion node. IrEmitterUnenested emits the relevant code +// - The body of a fusion node. IrEmitterUnnested emits the relevant code // within a kernel function using FusedIrEmitter. (FusedIrEmitter is not // really an IrEmitter, but is more an "IR generator generator".) // diff --git a/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h b/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h index 2eede7036cf..218f45631f5 100644 --- a/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h +++ b/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h @@ -37,7 +37,7 @@ namespace gpu { // Currently, there are two main use cases for a tiling scheme. First, we // implement kernels with 0-2-1 memory transpose using shared memory to improve // memory access pattern. Second, we implement reduction to contiguous -// dimensions in layout, with or without memory tranpsose, to achieve better +// dimensions in layout, with or without memory transpose, to achieve better // memory access pattern as well as to reduce the need numbers of executed // expensive instructions, such as thread synchronization related instructions // and atomic operations. For both use cases, we can apply a normalization to diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index d7ca14ad273..8881f29fe63 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -522,7 +522,7 @@ StatusOr CompileToPtx(llvm::Module* module, GpuVersion gpu_version, std::unique_ptr target_machine = NVPTXGetTargetMachine( default_target_triple, *compute_capability, hlo_module_config); - // Link with libdeivce, and optimize the LLVM module. + // Link with libdevice, and optimize the LLVM module. TF_RETURN_IF_ERROR(LinkAndOptimizeModule( module, gpu_version, hlo_module_config, libdevice_dir_path, NVPTXTargetModuleLinker, default_target_triple, target_machine.get(), diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc index 594a423bda9..ccb1b7311b7 100644 --- a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc @@ -136,7 +136,7 @@ std::vector GetProducerConsumerMultiOutputFusionCandidates( // Do not fuse a producer if the other operands of the fusion are // reachable from the producer, this would create a cycle. auto operand_reachable_from_producer = [&](const HloInstruction* operand) { - // If a get-tuple-elment instruction is not in the reachability + // If a get-tuple-element instruction is not in the reachability // map, it has been created by fusion in this pass. Simply move // on to its operand, which is in the reachability map. if (!reachability.IsPresent(operand) && diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index 6635b68899d..fa01d75d35a 100755 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -99,7 +99,7 @@ string GetLibdeviceDir(const HloModuleConfig& hlo_module_config) { "uses routines from libdevice.", hlo_module_config); - // GetCudaRootCandidates always inclues ".", but but if everything fails, we + // GetCudaRootCandidates always includes ".", but but if everything fails, we // return it anyway. Better than returning the empty string. return "."; } diff --git a/tensorflow/compiler/xla/service/gpu/stream_executor_util.h b/tensorflow/compiler/xla/service/gpu/stream_executor_util.h index 3e2ae241a03..684143b2d04 100644 --- a/tensorflow/compiler/xla/service/gpu/stream_executor_util.h +++ b/tensorflow/compiler/xla/service/gpu/stream_executor_util.h @@ -82,7 +82,7 @@ se::GpuAsmOpts PtxOptsFromConfig(const HloModuleConfig& hlo_module_config); // `buffer_type` determines what buffer would be filled out with. // // Precondition: `buffer_type` is a floating point type, `rng_state` needs to be -// initalized to zero on the first use. +// initialized to zero on the first use. void InitializeBuffer(se::Stream* stream, PrimitiveType buffer_type, int64* rng_state, se::DeviceMemoryBase buffer); diff --git a/tensorflow/compiler/xla/service/gpu/thunk_emitter.h b/tensorflow/compiler/xla/service/gpu/thunk_emitter.h index 55d92c74794..49d71192e77 100644 --- a/tensorflow/compiler/xla/service/gpu/thunk_emitter.h +++ b/tensorflow/compiler/xla/service/gpu/thunk_emitter.h @@ -25,7 +25,7 @@ namespace xla { namespace gpu { // Implements handling of GPU execution for HLO operations that are handed off -// to specialzied thunks that do not require code generation. Intended to be +// to specialized thunks that do not require code generation. Intended to be // mixed into GPU emitters. class ThunkEmitter { public: diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc index 1810accebfc..384ae272dc1 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc @@ -185,7 +185,7 @@ class BufferValueMap { }; // If the value shows up in a root instruction, alias it with parameter - // intruction. + // instruction. for (const HloPosition& pos : value.positions()) { if (pos.instruction == module_->entry_computation()->root_instruction()) { ShapeIndex output_index = pos.index; @@ -404,7 +404,7 @@ bool HloAliasAnalysis::InstructionBuffersAreDistinct( } } else { // It's possible for multiple values at this index to have the same - // HloBuffer. This does not result in non-distictness. To account for + // HloBuffer. This does not result in non-distinctness. To account for // this case, add all of the buffers at this index after checking // whether each buffer exists at an earlier index. This is a corner // case, however, as the number of values at an index is almost always diff --git a/tensorflow/compiler/xla/service/hlo_buffer.h b/tensorflow/compiler/xla/service/hlo_buffer.h index 91597d6f705..870a1a78994 100644 --- a/tensorflow/compiler/xla/service/hlo_buffer.h +++ b/tensorflow/compiler/xla/service/hlo_buffer.h @@ -54,7 +54,7 @@ namespace xla { // HloValue{%cond_param}. // // HloBuffers may appear at different HloPositions in the module mirroring the -// same propery of HloValues. For example: +// same property of HloValues. For example: // // %sub = Sub(...) // %add = Add(...) diff --git a/tensorflow/compiler/xla/service/hlo_casting_utils.h b/tensorflow/compiler/xla/service/hlo_casting_utils.h index 7f73bba0365..4cae37add73 100644 --- a/tensorflow/compiler/xla/service/hlo_casting_utils.h +++ b/tensorflow/compiler/xla/service/hlo_casting_utils.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// Casting utilitiy functions for HLO instructions. +// Casting utility functions for HLO instructions. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CASTING_UTILS_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CASTING_UTILS_H_ diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index c2bbe3ccd71..fa116ae9da1 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -838,7 +838,7 @@ bool HloComputation::Equal(const HloComputation& other, continue; } visited.emplace(pair); - // TODO(b/123082518): Avoid recursively invoking == becasue it may + // TODO(b/123082518): Avoid recursively invoking == because it may // cause a stack overflow with deeply nested subcomputations. bool identical_ignoring_operands = pair.first->Identical( *pair.second, diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 81c6bfc3ecf..579e4360092 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -419,7 +419,7 @@ class HloComputation { // the HLO computation with the exception of fusion computation. A parameter // instruction is removable for a fusion computation. // - // Note that IsSafelyRemovable() is a necassarily condition to remove an + // Note that IsSafelyRemovable() is a necessarily condition to remove an // instruction rather than a sufficient condition. For example, instructions // with side-effect (e.g., Send, Infeed) may be removed from a computation, // but the transformation must guarantee the invariants relevant to the diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 10ec9ea3757..38231df1f1d 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -324,7 +324,7 @@ Status HloCostAnalysis::HandleDot(const HloInstruction* dot) { for (auto dim : dnums.lhs_contracting_dimensions()) { reduction_width *= lhs_shape.dimensions(dim); } - // Each output elment requires reduction_width FMA operations. + // Each output element requires reduction_width FMA operations. current_properties_[kFlopsKey] = kFmaFlops * ShapeUtil::ElementsIn(dot_shape) * reduction_width; return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h index 8df700802b6..6da93d28079 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h @@ -149,7 +149,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor { // if the HLO was not found to have a cost in the analysis. // // Note that the cost for sub HLO instructions are also returned if asked. For - // example, body and condidition of a while, fused instructions within a + // example, body and condition of a while, fused instructions within a // fusion, or the add instruction of a reduce. int64 flop_count(const HloInstruction& hlo) const; int64 transcendental_count(const HloInstruction& hlo) const; diff --git a/tensorflow/compiler/xla/service/hlo_cse_test.cc b/tensorflow/compiler/xla/service/hlo_cse_test.cc index 1eb0260468c..ba27611c6b0 100644 --- a/tensorflow/compiler/xla/service/hlo_cse_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cse_test.cc @@ -382,9 +382,9 @@ condition=%condition.1, body=%body // Test two while loops with identical bodies and same inputs, but different // conditions -TEST_F(HloCseTest, WhileLoopsIdenticalBodiesAndInputDifferntConditions) { +TEST_F(HloCseTest, WhileLoopsIdenticalBodiesAndInputDifferentConditions) { const char* const hlo_string = R"( - HloModule WhileLoopsIdenticalBodiesAndInputDifferntConditions + HloModule WhileLoopsIdenticalBodiesAndInputDifferentConditions %body (param: (f32[], f32[])) -> (f32[], f32[]) { %param = (f32[], f32[]) parameter(0) @@ -404,7 +404,7 @@ index=1 %add = f32[] add(f32[] %get-tuple-element, f32[] %get-tuple-element.1) ROOT %constant.1 = pred[] constant(true) } - ENTRY %WhileLoopsIdenticalBodiesAndInputDifferntConditions () -> (f32[], + ENTRY %WhileLoopsIdenticalBodiesAndInputDifferentConditions () -> (f32[], f32[]) { %constant.2 = f32[] constant(1) %constant.3 = f32[] constant(2) %tuple.1 = (f32[], f32[]) tuple(f32[] %constant.2, f32[] %constant.3) %while = (f32[], f32[]) while((f32[], f32[]) %tuple.1), diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index ecfa6703f00..11d3c5fdbd0 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -1092,7 +1092,7 @@ bool HloDataflowAnalysis::CanShareOperandBufferWithUser( // TODO(b/80315712): This code is in a bit of a weird intermediate state // at the moment. The in-place DUS check really needs to be common to all // backends, so it runs first. Then we run the backend-specific check if - // provided, or go through the target-indepdendent check if not. + // provided, or go through the target-independent check if not. // Unfortunately, the notionally "target-independent" path actually contains // some target-specific code, so we can't run all of it *in addition* to the // target-specific function, like the interface documentation says. diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index d6617dea1c4..330779b5ebd 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -1857,7 +1857,7 @@ TEST_P(HloDataflowAnalysisTest, NestedConditionals) { // inner_conditional((PRED, F32[], F32[]) %param_cond): // %pred_cond = GetTupleElement(%param_cond, 0) // %true_operand_cond = GetTupleElement(%param_cond, 1) - // %false_opearnd_cond = GetTupleElement(%param_cond, 2) + // %false_operand_cond = GetTupleElement(%param_cond, 2) // return Conditional(%pred_cond, %true_operand_cond, computation1, // %false_operand_cond, computation2) // diff --git a/tensorflow/compiler/xla/service/hlo_domain_isolator.h b/tensorflow/compiler/xla/service/hlo_domain_isolator.h index 2274c3a96c2..1fa996dd683 100644 --- a/tensorflow/compiler/xla/service/hlo_domain_isolator.h +++ b/tensorflow/compiler/xla/service/hlo_domain_isolator.h @@ -35,7 +35,7 @@ class HloDomainIsolator : public HloModulePass { // Creates a new kDomain instruction for the edge between the use instruction // (the first HloInstruction argument), and the operand instruction (the // third HloInstruction argument) if the interesting attribute of the - // instruction differes from the attribute of the root (the second + // instruction differences from the attribute of the root (the second // HloInstruction argument). // Returns nullptr in case no domain separation is necessary. using DomainCreator = std::function HloDomainRemover::RunContext::Run() { VLOG(4) << "Processing metadata domain: '" << remover_->kind_ << "'"; int64 removed_domains = 0; for (HloComputation* computation : module_->computations()) { - // First create the domain instruciton sets. A domain instruction set is + // First create the domain instruction sets. A domain instruction set is // the set of instructions whose edges never cross a kDomain instruction. TF_ASSIGN_OR_RETURN(std::unique_ptr domain_map, HloDomainMap::Create(computation, remover_->kind_)); diff --git a/tensorflow/compiler/xla/service/hlo_domain_test.cc b/tensorflow/compiler/xla/service/hlo_domain_test.cc index fd4fb0246d8..c2e0c907a24 100644 --- a/tensorflow/compiler/xla/service/hlo_domain_test.cc +++ b/tensorflow/compiler/xla/service/hlo_domain_test.cc @@ -617,7 +617,7 @@ ENTRY entry { auto tuple0 = FindInstruction(module.get(), "tuple.0"); tuple0->clear_sharding(); - // Insert the following instructons above and below tuple.0, to emulate other + // Insert the following instructions above and below tuple.0, to emulate other // passes effects: // COPY.0 // \ / diff --git a/tensorflow/compiler/xla/service/hlo_domain_verifier.cc b/tensorflow/compiler/xla/service/hlo_domain_verifier.cc index dc514ae3e5c..f8e1973d5b9 100644 --- a/tensorflow/compiler/xla/service/hlo_domain_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_domain_verifier.cc @@ -35,7 +35,7 @@ class HloDomainVerifier::RunContext { private: // If the verifier caller passed an empty vector for kinds, we collect all the - // avalable domain types. + // available domain types. Status PopulateDomainKinds(); HloModule* module_; @@ -67,7 +67,7 @@ Status HloDomainVerifier::RunContext::Run() { TF_RETURN_IF_ERROR(PopulateDomainKinds()); for (HloComputation* computation : module_->computations()) { for (auto& kind : verifier_->kinds_) { - // First create the domain instruciton sets. A domain instruction set is + // First create the domain instruction sets. A domain instruction set is // the set of instructions whose edges never cross a kDomain instruction. TF_ASSIGN_OR_RETURN(std::unique_ptr domain_map, HloDomainMap::Create(computation, kind)); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 2145be59aca..b2435d3fdf3 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1133,7 +1133,7 @@ bool CopyDataFromInput(const Literal& input_literal, int64 input_start, auto base_case = [&](int64 axis, int64 dst_index, int64 src_index, bool within_src_bounds) { if (axis == 0) { - // For IRFFT, the negavie frequencies are only needed for the sweep along + // For IRFFT, the negative frequencies are only needed for the sweep along // the X axis, which is performed last. Leave this part of the working set // uninitialized until then. const int64 length = fft_lengths[axis]; @@ -1684,7 +1684,7 @@ class OutputOffsetIndexToInputIndex { std::vector input_index_; }; -// Rehapes the gather indices input to have a trailing degenerate `1` dimension +// Reshapes the gather indices input to have a trailing degenerate `1` dimension // if necessary. Hands over the ownership of the newly created literal (if // there is one) to `reshaped_start_indices`. static StatusOr> ReshapedGatherIndices( diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index de5a9aa4c2c..fc9d42c1b17 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -253,7 +253,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleCustomCall(HloInstruction* custom_call) override; // Unsupported HLOs, note some of them (such as BatchNorm*) are typically - // expanded in a semantic-preserving way into other HLOs by adding exanpsion + // expanded in a semantic-preserving way into other HLOs by adding expansion // HLO pass to the HLO optimization pass during compilation, which can then be // handled by the evaluator. Status HandleBatchNormGrad(HloInstruction* batch_norm_grad) override { @@ -304,7 +304,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault { // // TODO(b/35950897): have better memory management here to free instructions // that are no longer a parent for any other subsequent instruction in - // post-orderring. + // post-ordering. // // Must be cleared for each evaluation. // diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 507867c013d..516a4283448 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -387,7 +387,7 @@ class HloDotDumper { const HloExecutionProfile* profile_; // may be null const NodeFilter filter_; - // Each HloInstruction dumped gets a monotically-increasing node ID. This + // Each HloInstruction dumped gets a monotonically-increasing node ID. This // must start at 1, because that's where graphviz's accounting starts. int64 next_node_id_ = 1; absl::flat_hash_map node_ids_; diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h index 6bd34f8a127..689007ff9ab 100644 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h @@ -81,8 +81,8 @@ class HloInputOutputAliasConfig { // Checks whether the provided output index has already been aliased. bool OutputHasAlias(const ShapeIndex& output_index) const; - // (De)Serializes an HloInputOutoutAliasConfig to/from an - // HloInputOutoutAliasProto. + // (De)Serializes an HloInputOutputAliasConfig to/from an + // HloInputOutputAliasProto. HloInputOutputAliasProto ToProto() const; static StatusOr CreateFromProto( diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index bc099371d08..7b20b3d6b66 100755 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1637,7 +1637,7 @@ HloInstruction::~HloInstruction() { operands_[operand_num] = nullptr; } - // Update users. Set `nullptr` to the correpsonding operand slot for users. + // Update users. Set `nullptr` to the corresponding operand slot for users. for (auto& user : this->users()) { for (int i = 0; i < user->operand_count(); ++i) { if (user->operands_[i] == this) { @@ -2693,7 +2693,7 @@ bool HloInstruction::IsFusible() const { case HloOpcode::kReduce: case HloOpcode::kReduceWindow: return true; - // Side effecting instrutions cannot be fused. + // Side effecting instructions cannot be fused. default: return !HasSideEffect(); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 238a96e52a0..ba9fdbbe4c2 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -643,7 +643,7 @@ class HloInstruction { const std::vector& replica_groups, const absl::optional& channel_id); - // Creates a communitation instructions that permutes data cross replicas. + // Creates a communication instructions that permutes data cross replicas. // Data is sent/received according to the (source_replica_id, // target_replica_id) pairs in `source_target_pairs`. If a replica id is not a // target_replica_id in any pair, the output on that replica is a tensor diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index a9d9eb9cfa4..e5735bea843 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -1545,7 +1545,7 @@ TEST_F(HloInstructionTest, StringifyScatter) { "to_apply=%Scatter.update"); } -TEST_F(HloInstructionTest, CanonnicalStringificationFusion) { +TEST_F(HloInstructionTest, CanonicalStringificationFusion) { // Tests stringification of a simple op, fusion, while, and conditional. const Shape s1 = ShapeUtil::MakeShape(F32, {5, 10}); const Shape s2 = ShapeUtil::MakeShape(F32, {20, 10}); @@ -1587,7 +1587,7 @@ TEST_F(HloInstructionTest, CanonnicalStringificationFusion) { EXPECT_EQ(fusion->ToString(options), expected_fusion); } -TEST_F(HloInstructionTest, CanonnicalStringificationWhile) { +TEST_F(HloInstructionTest, CanonicalStringificationWhile) { // Tests stringification of a simple op, fusion, while, and conditional. const Shape s1 = ShapeUtil::MakeShape(F32, {5, 10}); const Shape s2 = ShapeUtil::MakeShape(F32, {20, 10}); @@ -1643,7 +1643,7 @@ TEST_F(HloInstructionTest, CanonnicalStringificationWhile) { EXPECT_EQ(loop->ToString(options), expected_loop); } -TEST_F(HloInstructionTest, CanonnicalStringificationConditional) { +TEST_F(HloInstructionTest, CanonicalStringificationConditional) { // Tests stringification of a simple op, fusion, while, and conditional. const Shape s1 = ShapeUtil::MakeShape(F32, {5, 10}); const Shape s2 = ShapeUtil::MakeShape(F32, {20, 10}); diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index a150efd8c83..94b5926d876 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -1356,7 +1356,7 @@ void HloFusionInstruction::MergeFusionInstructionIntoMultiOutput( HloFusionInstruction* instruction_to_merge) { // Add all non-parameter fused instructions to 'unfused_instructions' to be // merged into 'this'. `old_to_new' maps the instructions in the fused node - // to the disaseembled fusion instructions. + // to the disassembled fusion instructions. // Note that we add the unfused instructions to this->parent_ computation. // This is necessary because the unique_id needs for an instruction and // it's only added when inserting to the computation. diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index 1863c78e7e1..75c7dd9f1ff 100755 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -768,7 +768,7 @@ class HloFusionInstruction : public HloInstruction { // Merges the fused instructions from 'instruction_to_merge' into the // fused instruction set of 'this', updating operands as necessary. // - // Predondition: 'instruction_to_merge' must be an operand of 'this'. + // Precondition: 'instruction_to_merge' must be an operand of 'this'. void MergeFusionInstruction(HloFusionInstruction* instruction_to_merge); // Merges the fused instructions from instruction_to_merge into the fused diff --git a/tensorflow/compiler/xla/service/hlo_live_range_test.cc b/tensorflow/compiler/xla/service/hlo_live_range_test.cc index 232c6b95e88..e2d320beffd 100644 --- a/tensorflow/compiler/xla/service/hlo_live_range_test.cc +++ b/tensorflow/compiler/xla/service/hlo_live_range_test.cc @@ -144,7 +144,7 @@ TEST_F(HloLiveRangeTest, MultiplyAdd) { } TEST_F(HloLiveRangeTest, LiveOutBuffers) { - // If a buffer is live out, its life range is extened to the end of + // If a buffer is live out, its life range is extended to the end of // computation. auto builder = HloComputation::Builder(TestName()); auto paramA = builder.AddInstruction( @@ -181,7 +181,7 @@ TEST_F(HloLiveRangeTest, LiveOutBuffers) { } TEST_F(HloLiveRangeTest, InstructionScheduledAfterRoot) { - // If a buffer is live out, its life range is extened to the end of + // If a buffer is live out, its life range is extended to the end of // computation. auto builder = HloComputation::Builder(TestName()); auto paramA = builder.AddInstruction( diff --git a/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc b/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc index e14bcfa7f67..994c6628f43 100644 --- a/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc @@ -113,7 +113,7 @@ void MarkLiveAtAllIndices(const HloInstruction* instruction, // Propagates liveness through Tuple instructions. // *) For each tuple operand: // *) For tuple output shape index associated with operand: -// *) Propgate live shape indices to tuple operand at the associated +// *) Propagate live shape indices to tuple operand at the associated // shape index in the operands output, and add to worklist. void PropagateLivenessThroughTuple( const HloInstruction* instruction, @@ -260,7 +260,7 @@ HloLivenessAnalysis::HloLivenessAnalysis(const HloModule& module) void HloLivenessAnalysis::RunAnalysis() { Worklist worklist; Workset workset; - // Add entry compuation root instruction. + // Add entry computation root instruction. MarkLiveAtAllIndices(module_.entry_computation()->root_instruction(), &live_index_map_, &worklist, &workset); for (auto* computation : module_.computations()) { diff --git a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc index 35db6aa0635..03d353aa1e0 100644 --- a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc @@ -136,7 +136,7 @@ TEST_F(HloLivenessAnalysisTest, NestedTupleAtEntryRoot) { EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "constant.3"), {})); } -// Tests that GTE at entry root of Tuple instruction only propgates liveness +// Tests that GTE at entry root of Tuple instruction only propagates liveness // to the live elements in tuple. TEST_F(HloLivenessAnalysisTest, GteOfTuple) { auto module = ParseAndReturnVerifiedModule(R"( @@ -158,7 +158,7 @@ TEST_F(HloLivenessAnalysisTest, GteOfTuple) { EXPECT_FALSE(liveness.IsLive(GetInstruction(module.get(), "constant.2"), {})); } -// Tests that GTE at entry root of nested Tuple instruction only propgates +// Tests that GTE at entry root of nested Tuple instruction only propagates // liveness to the live elements in tuple. TEST_F(HloLivenessAnalysisTest, GteOfNestedTuple) { auto module = ParseAndReturnVerifiedModule(R"( @@ -196,7 +196,7 @@ TEST_F(HloLivenessAnalysisTest, GteOfNestedTuple) { } // Tests that GTE of GTE (at entry root) of nested Tuple instruction only -// propgates liveness to the live elements in tuple. +// propagates liveness to the live elements in tuple. TEST_F(HloLivenessAnalysisTest, GteOfGteOfNestedTuple) { auto module = ParseAndReturnVerifiedModule(R"( HloModule SimpleModule diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc index 50eaee95455..bda297540ff 100644 --- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc +++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc @@ -68,7 +68,7 @@ using ::tensorflow::strings::HumanReadableNumBytes; // A D E F B C G // , which has a maximum memory usage of 6 (B is alive while F is executing). // -// An optimal way to shedule the previous graph is: +// An optimal way to schedule the previous graph is: // A B C D E F G // , which has a maximum memory usage of 5 (when F is executing). // diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 3e9630a13c4..5e662e0bebc 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -286,7 +286,7 @@ class HloModule { // Returns true if the module has a schedule set. bool has_schedule() const { return schedule_.has_value(); } - // Returns the schedue of the module. CHECK fails if no schedule is set. + // Returns the schedule of the module. CHECK fails if no schedule is set. const HloSchedule& schedule() const { return *schedule_; } HloSchedule& schedule() { return *schedule_; } diff --git a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc index dba699dd8c5..301faa75f0a 100644 --- a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc +++ b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc @@ -187,7 +187,7 @@ TEST_F(HloModuleDceTest, OneWhileWithDeadTupleElement) { } // Tests that a tuple element {1} used by condition computation (which appears -// dead in while.body{1} and at while.result{1}) propgates liveness of this +// dead in while.body{1} and at while.result{1}) propagates liveness of this // tuple element to while.body{1} and at while.result{1}. TEST_F(HloModuleDceTest, OneWhileWithTupleElementUsedByCond) { auto module = ParseAndReturnVerifiedModule(R"( diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.h b/tensorflow/compiler/xla/service/hlo_module_group_util.h index d388fe51d0d..12a4614412a 100644 --- a/tensorflow/compiler/xla/service/hlo_module_group_util.h +++ b/tensorflow/compiler/xla/service/hlo_module_group_util.h @@ -103,7 +103,7 @@ class HloModuleGroupUtil { absl::Span computations); // Updates the reachability of the given instruction, taking the global - // predeccessorss and successors into account. + // predecessors and successors into account. void UpdateReachabilityThroughInstruction( HloInstruction* instruction, HloReachabilityMap* reachability_map); diff --git a/tensorflow/compiler/xla/service/hlo_ordering_test.cc b/tensorflow/compiler/xla/service/hlo_ordering_test.cc index 2b77619f89b..f8295d579fb 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering_test.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering_test.cc @@ -506,7 +506,7 @@ TEST_F(HloOrderingTest, InterferenceWithOuterRoot) { absl::string_view hlo_string = R"( HloModule InterferenceWithOuterRoot, is_scheduled=true -Emmbedded (embedded_param: f32[4096,4096]) -> f32[4096,4096] { +Embedded (embedded_param: f32[4096,4096]) -> f32[4096,4096] { embedded_param = f32[4096,4096]{1,0} parameter(0) multiply = f32[4096,4096]{1,0} multiply(embedded_param, embedded_param) ROOT log = f32[4096,4096]{1,0} log(multiply) @@ -515,7 +515,7 @@ Emmbedded (embedded_param: f32[4096,4096]) -> f32[4096,4096] { ENTRY InterferenceWithOuterRoot { param = f32[4096,4096]{1,0} parameter(0) ROOT add = f32[4096,4096]{1,0} add(param, param) - call = f32[4096,4096]{1,0} call(param), to_apply=Emmbedded + call = f32[4096,4096]{1,0} call(param), to_apply=Embedded } )"; diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 3ecd0af3480..b05f76a1d29 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -294,7 +294,7 @@ class HloParserImpl : public HloParser { // Parses a sub-attribute of the window attribute, e.g.,size=1x2x3. bool ParseDxD(const std::string& name, std::vector* result); - // Parses window's pad sub-attriute, e.g., pad=0_0x3x3. + // Parses window's pad sub-attribute, e.g., pad=0_0x3x3. bool ParseWindowPad(std::vector>* pad); bool ParseSliceRanges(SliceRanges* result); @@ -2297,7 +2297,7 @@ bool HloParserImpl::ParseTupleLiteral(Literal* literal, const Shape& shape) { // literal, (',' literal)* for (int i = 0; i < elements.size(); i++) { if (i > 0) { - ParseToken(TokKind::kComma, "exepcts ',' to separate tuple elements"); + ParseToken(TokKind::kComma, "expects ',' to separate tuple elements"); } if (!ParseLiteral(&elements[i], ShapeUtil::GetTupleElementShape(shape, i))) { diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 29a6a5e4297..d65613fc4b8 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -1203,7 +1203,7 @@ ENTRY Rng { }, // Reduce precision { -"ReducePrevison", +"ReducePrecision", R"(HloModule reduce_precision ENTRY ReducePrecision { @@ -2095,7 +2095,7 @@ ENTRY %ShortConstant.v4 () -> f32[67,89] { EXPECT_EQ(result.ValueOrDie()->ToString(HloPrintOptions()), original); } -TEST_F(HloParserTest, AttibutesAnyOrder) { +TEST_F(HloParserTest, AttributesAnyOrder) { const string original = R"(HloModule any_order_module ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,4,1] { diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc index e18521811c0..166ba1b0d99 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc @@ -457,7 +457,7 @@ TEST_P(IndirectUseTest, IndirectUseNotRematerialized) { // F32[1024] %call = call(Subcomputation, {%add_1}) // F32[1024] %add_2 = add(%bcast, call) // {F32[1024], F32[1024]} %tuple = tuple(%bcast, %add_2) - // F32[1024] %gte = GetTupleElememt(%tuple, 0) + // F32[1024] %gte = GetTupleElement(%tuple, 0) // F32[1024] %negate = negate(%gte) // // Subcomputation: diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h index c077ccd95fe..3b5a80ce33b 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.h +++ b/tensorflow/compiler/xla/service/hlo_runner.h @@ -80,7 +80,7 @@ class HloRunner { bool run_hlo_passes = false; // If true, executes on multiple threads using se::Stream::ExecuteOnStream. - // Othewise, executes using xla::Executable::ExecuteOnStreams. + // Otherwise, executes using xla::Executable::ExecuteOnStreams. bool use_threads = false; }; diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h index 90a80a4421b..56479add95f 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.h +++ b/tensorflow/compiler/xla/service/hlo_sharding.h @@ -120,7 +120,7 @@ class HloSharding { // Retrieves a histogram of the devices used by the sharding. The returned // map has the device number as key, and the occurrence count as value. - // If a sharding does not have a device, it will not be incuded in the + // If a sharding does not have a device, it will not be included in the // histogram. The count argument, if not nullptr, will receive the total // number of elements this sharding is made of (one for array, N leaves for // tuples). diff --git a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc index 094d98bc6e5..837483268f3 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc @@ -310,7 +310,7 @@ StatusOr ApplyShardingFromUsers(HloInstruction* instruction, } // Tries to propagate the sharding information into the instructions that are -// part of the domain, in a reverse post order manner (users propoagate to +// part of the domain, in a reverse post order manner (users propagate to // instruction). StatusOr ApplyDomainShardingPass(const DomainMetadata::Domain& domain, const HloSharding& domain_sharding) { diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc index 4a325e5cb5b..015246c8cae 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc @@ -886,7 +886,7 @@ IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp(HloOpcode opcode, // To figure out the broadcast dimensions for the (constant) source for the // scalar-indexed node, we "simulate" the index transformation done by the - // existing broadcsat: + // existing broadcast: enum class IndexComponent { Broadcasted, NotBroadcasted }; std::vector simulated_index( broadcast_instr->shape().dimensions_size(), IndexComponent::Broadcasted); diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc index 5478c4a9291..d64d64eb5ee 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc @@ -35,7 +35,7 @@ class IndexedArrayAnalysisTest : public HloTestBase { } private: - // Replaces seqences of whitespace with a single space. This makes the + // Replaces sequences of whitespace with a single space. This makes the // strings being matched against "whitespace insensitive" which lets us indent // them for readability. string CanonicalizeWhitespace(const string& text) { diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index defaf4cd7ab..c1bbf791c73 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -607,7 +607,7 @@ Status LayoutAssignment::AddMandatoryConstraints( body_layout.result_shape(), instruction)); } else if (instruction->opcode() == HloOpcode::kConditional) { // Find the conditional branch with the most instructions and force all - // other computations to match that layout. A potentially better decison + // other computations to match that layout. A potentially better decision // could count the number FLOPs or how constrained the layouts are. int64 largest_branch = 0; int64 largest_instruction_count = diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index a0f61fc416d..ef30ec3088b 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -456,7 +456,7 @@ class LayoutAssignment : public HloModulePass { // when the instruction is a tuple, and in such case the index represents // the location from where the copy instruction was created from. // If the index is empty, the whole sharding will be propagated, even in case - // the intruction has a tuple sharding. + // the instruction has a tuple sharding. static void SetupCopiedInstruction(const HloInstruction& instruction, HloInstruction* copy, const ShapeIndex& index); @@ -508,7 +508,7 @@ class LayoutAssignment : public HloModulePass { // instructions can be set to match the computation. std::map computation_layouts_; - // Map from branch computations to the result layout they shuould apply. + // Map from branch computations to the result layout they should apply. std::map conditional_mismatch_; // Every copy added to the module by the layout assignment pass is registered diff --git a/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc b/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc index ba199f35712..77ce26c7e84 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc @@ -24,7 +24,7 @@ namespace xla { namespace llvm_ir { bool MayBeImplementedAsInPlaceDynamicUpdateSlice(const HloInstruction* instr) { - // Today we can't emit a dynamic-update-slice if the DUS node is parallized; + // Today we can't emit a dynamic-update-slice if the DUS node is parallelized; // the emitter will not emit correct code. It's possible to change this, but // then ParallelTaskAssigner would have to somehow know whether a node *will* // be emitted as an in-place DUS, and it can't, because it doesn't have a diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index c4d527b6cbf..aa37b9e7be9 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -612,7 +612,7 @@ llvm::Function* CreateCpuFunction(llvm::FunctionType* function_type, // set. function->addFnAttr("denormal-fp-math", "preserve-sign"); - // Add the optize attribute to the function if optimizing for size. This + // Add the optimize attribute to the function if optimizing for size. This // controls internal behavior of some optimization passes (e.g. loop // unrolling). if (cpu::options::OptimizeForSizeRequested(module_config)) { diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index c1dc635eb81..669403fb8ad 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -1195,7 +1195,7 @@ Status MemorySpaceAssignment::SimplifyGraph() { instruction->user_count() == 0 && !instruction->HasSideEffect() && instruction != computation->root_instruction()) { VLOG(4) << "Instruction removed: " << instruction->ToString(); - // Ensure the exported preset assignments don't contain a refence to + // Ensure the exported preset assignments don't contain a reference to // the removed instruction. preset_assignments_->RemoveAssignmentForInstruction(instruction); // Instead of deleting the instruction from the schedule, replace it diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.h b/tensorflow/compiler/xla/service/memory_space_assignment.h index 20551feb715..08d03cc8655 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.h +++ b/tensorflow/compiler/xla/service/memory_space_assignment.h @@ -493,7 +493,7 @@ class MemorySpaceAssignment { // This struct contains mandatory memory assignments at a given time. E.g., an // input's required memory assignment time would correspond to the definition -// time of the parameter instruction, and an output's time would correspnd to +// time of the parameter instruction, and an output's time would correspond to // the time of last use. struct RequiredMemoryAssignment { MemorySpaceAssignment::MemorySpace memory_space; diff --git a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc index 068834e5701..99c9df890d8 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc @@ -1171,7 +1171,7 @@ TEST_P(MemorySpaceAssignmentTest, NonEntryComputationSchedule5) { // // If a copy to alternate memory is inserted before foo, and if the size of // the while body is less than max prefetch interval so that the copy-done is - // kept in the alternate memory, then we end up refering to the copy-done in + // kept in the alternate memory, then we end up referring to the copy-done in // the root instruction of the while loop body. I.e., // // cs = copy-start(a) diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc index d7300f58364..84e239ae196 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc +++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.cc @@ -285,7 +285,7 @@ mlir::AffineForOp TileLoop(mlir::AffineForOp loop, int64_t size, // operations of their parent loop, and `where` must be an ancestor of that // parent loop. // -// It always preseves the semantics of the program, therefore it may modify the +// It always preserves the semantics of the program, therefore it may modify the // hoisted operations or add extra loops at the hoisted place. mlir::Operation* HoistAndFix(llvm::iplist::iterator begin_op, llvm::iplist::iterator end_op, @@ -618,7 +618,7 @@ StatusOr TransformMlirConv( output_acc = llvm::cast( HoistAndFix(output_acc, tiled_cartesian_loops.front())); - // Hoist everyting before reduction loops (aka zero initializations of + // Hoist everything before reduction loops (aka zero initializations of // output_acc): // for (cartesian loops...) { // %output_acc = alloc() : memref(..., f32) @@ -752,7 +752,7 @@ StatusOr EmitConvolutionForwardAsMlir( // TODO(timshen): Implement a transformation that collects loads to a given // buffer, create a local alloc() for the accessed part, redirects all loads - // and stores to that local alloc(), and create code to ininitialize / + // and stores to that local alloc(), and create code to initialize / // writeback the local alloc() if needed. // TODO(timshen): Implement CUDA-specific lowering. diff --git a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h index c8bc0a15acd..f0b95876775 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h +++ b/tensorflow/compiler/xla/service/mlir_gpu/experimental/conv_emitter/conv_emitter.h @@ -24,7 +24,7 @@ namespace mlir_gpu { // Builds MLIR using custom_call that represents a foward convolution. // -// The generated function has the following signautre: +// The generated function has the following signature: // func @(%output: memref, // %input: memref, // %filter: memref) { ... } diff --git a/tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.cc b/tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.cc index 4107d92da7e..7855f1da1cf 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.cc +++ b/tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.cc @@ -104,7 +104,7 @@ FailoverCompiler::CompileAheadOfTime( const AotCompilationOptions& options) { // This is not supported by GPU compiler anyway. return Unimplemented( - "CompileAheadOfTime not implemeneted in failover compiler!"); + "CompileAheadOfTime not implemented in failover compiler!"); } HloCostAnalysis::ShapeSizeFunction FailoverCompiler::ShapeSizeBytesFunction() diff --git a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc index 92f7e5a08ac..ab880b3e110 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc +++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc @@ -230,7 +230,7 @@ static StatusOr> ComputeOperandToValueMap( has_failed = true; continue; } - // host_index is the argument positon to the surrounding function that + // host_index is the argument position to the surrounding function that // contains the launch. This index corresponds to HLO operand indices // by construction. auto host_index = launchop_operand->getArgNumber(); diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.h b/tensorflow/compiler/xla/service/multi_output_fusion.h index 403b5dfaff9..9be69f808c4 100644 --- a/tensorflow/compiler/xla/service/multi_output_fusion.h +++ b/tensorflow/compiler/xla/service/multi_output_fusion.h @@ -79,7 +79,7 @@ class MultiOutputFusion : public HloModulePass { // Test if it's legal to fuse instr1 and instr2 into one fusion instruction. virtual bool LegalToFuse(HloInstruction* instr1, HloInstruction* instr2); - // Fuse HloInstrctuion instr1 and instr2 and return the fused instruction. + // Fuse HloInstruction instr1 and instr2 and return the fused instruction. // The other instruction is removed from its parent computation. virtual HloInstruction* Fuse(HloInstruction* instr1, HloInstruction* instr2); diff --git a/tensorflow/compiler/xla/service/op_expander_pass.h b/tensorflow/compiler/xla/service/op_expander_pass.h index 276e3d70b8e..49b3ba07031 100644 --- a/tensorflow/compiler/xla/service/op_expander_pass.h +++ b/tensorflow/compiler/xla/service/op_expander_pass.h @@ -34,7 +34,7 @@ class OpExpanderPass : public HloModulePass { virtual bool InstructionMatchesPattern(HloInstruction* instruction) = 0; // Returns a replacement for `instruction`, or nullptr if no replacement is - // neeeded (e.g. only the to_apply subcomputation of the instruction was + // needed (e.g. only the to_apply subcomputation of the instruction was // modified). virtual StatusOr ExpandInstruction( HloInstruction* instruction) = 0; diff --git a/tensorflow/compiler/xla/service/reshape_mover.cc b/tensorflow/compiler/xla/service/reshape_mover.cc index 9e2d7406940..cd11b211747 100644 --- a/tensorflow/compiler/xla/service/reshape_mover.cc +++ b/tensorflow/compiler/xla/service/reshape_mover.cc @@ -80,7 +80,7 @@ bool CanTriviallyChangeShape(const HloInstruction* instruction) { return true; } - // A broadcase of scalar can trivially change its shape. + // A broadcast of scalar can trivially change its shape. if (instruction->opcode() == HloOpcode::kBroadcast && ShapeUtil::IsScalar(instruction->operand(0)->shape())) { return true; diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 345a077e321..e12e1577211 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -660,7 +660,7 @@ Status Service::ExecuteGraphParallel(const ExecuteGraphParallelRequest* arg, const ExecuteGraphRequest& request = arg->requests(i); TF_RET_CHECK(request.has_computation()) << "computations may not be empty"; TF_RET_CHECK(request.computation().has_host_program_shape()) - << "programe shape may not be empty"; + << "program shape may not be empty"; // Get the executors. TF_ASSIGN_OR_RETURN(auto executors, GetExecutors(execution_options, @@ -837,7 +837,7 @@ Status Service::Compile(const CompileRequest* arg, CompileResponse* result) { return InvalidArgument("computations may not be empty"); } if (!arg->computation().has_host_program_shape()) { - return InvalidArgument("programe shape may not be empty"); + return InvalidArgument("program shape may not be empty"); } if (arg->execution_options().device_handles_size() > 1) { @@ -887,7 +887,7 @@ Status Service::Execute(const ExecuteRequest* arg, ExecuteResponse* result) { ResolveAndValidateArguments(arg->arguments(), replicas)); // Check that the replicated_arguments has the same shape and layout as the - // module config used when creating the exectuable. + // module config used when creating the executable. const int64 num_module_args = executable->module_config().entry_computation_layout().parameter_count(); if (num_module_args != arg->arguments_size()) { @@ -902,7 +902,7 @@ Status Service::Execute(const ExecuteRequest* arg, ExecuteResponse* result) { const Shape& shape_arg = replicated_arguments.front()[i]->on_host_shape(); if (!ShapeUtil::Equal(shape_module, shape_arg)) { return InvalidArgumentStrCat( - "The executable exepcts the ", i, "th argument in shape ", + "The executable expects the ", i, "th argument in shape ", ShapeUtil::HumanStringWithLayout(shape_module), " but sees ", ShapeUtil::HumanStringWithLayout(shape_arg)); } diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc index 3e345448a47..b189e047254 100644 --- a/tensorflow/compiler/xla/service/shape_inference_test.cc +++ b/tensorflow/compiler/xla/service/shape_inference_test.cc @@ -1354,7 +1354,7 @@ TEST_F(ShapeInferenceTest, DotWithTwoContractingDimsPasses) { } // BatchMatMul with different batch dimension sizes fails. -TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimSizesFails) { +TEST_F(ShapeInferenceTest, DotWithMismatchedBatchDimSizesFails) { Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3}); Shape rhs_shape = ShapeUtil::MakeShape(F32, {3, 3, 14}); @@ -1373,7 +1373,7 @@ TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimSizesFails) { } // BatchMatMul with different batch dimension numbers passes -TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimNumbersPasses) { +TEST_F(ShapeInferenceTest, DotWithMismatchedBatchDimNumbersPasses) { Shape lhs_shape = ShapeUtil::MakeShape(F32, {2, 11, 3}); Shape rhs_shape = ShapeUtil::MakeShape(F32, {3, 2, 14}); diff --git a/tensorflow/compiler/xla/service/tree_reduction_rewriter.h b/tensorflow/compiler/xla/service/tree_reduction_rewriter.h index a9852d88a6e..d6e1d4200e9 100644 --- a/tensorflow/compiler/xla/service/tree_reduction_rewriter.h +++ b/tensorflow/compiler/xla/service/tree_reduction_rewriter.h @@ -35,7 +35,7 @@ namespace xla { // // Applying this pass until a fixed point performs a variant of pairwise // summation (https://en.wikipedia.org/wiki/Pairwise_summation), which is -// guaranteed to have an assymptotically smaller error bound provided that +// guaranteed to have an asymptotically smaller error bound provided that // intermediate roundoff errors are random and have random sign. // // If this pass lowers the performance too much, the window size can always be diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h index cb589326ba7..c223378b332 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h @@ -302,7 +302,7 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault { // Information kept per instruction struct PerInstruction { std::unique_ptr points_to_set; - // Empircally, ~92% of instructions have 1 + // Empirically, ~92% of instructions have 1 // instruction_defined_buffer, and 99% have 0 or 1 BufferDefinitionVector instruction_defined_buffers; }; diff --git a/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc index 8b381dec073..1f2dcda288a 100644 --- a/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc +++ b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc @@ -112,7 +112,7 @@ StatusOr WhileLoopConstantSinking::Run(HloModule* module) { bool changed = false; std::vector while_instrs; for (auto* comp : module->MakeNonfusionComputations()) { - // Right now we don't particulary care about optimizing while-of-while + // Right now we don't particularly care about optimizing while-of-while // patterns. If/When we do, we'll want to visit the outer while (while_0) // before we visit the inner while (while_1): // diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc index 351feec6bb7..2d33184b7d0 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc @@ -317,7 +317,7 @@ StatusOr WhileLoopInvariantCodeMotion::Run(HloModule* module) { // TryHoistingInvariantInstructionsFromWhileBody can be generalized to // optimize the condition computation too, if needed. // - // The transform we do here is a pessmization for while loops that execute + // The transform we do here is a pessimization for while loops that execute // zero times*, but at this time we expect those to be rare. If this // becomes a problem we can consider using the conditional HLO to avoid // doing extra work for while loops with zero trip count. diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc index 8ec6e40044c..cff0fd458e5 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc @@ -126,7 +126,7 @@ WhileLoopSimplifierTest::MakeModuleWithSimpleLoopTupleElementLoopBound( return ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); } -TEST_F(WhileLoopSimplifierTest, LoopWithZeroIterationSimiplified) { +TEST_F(WhileLoopSimplifierTest, LoopWithZeroIterationSimplified) { auto m = MakeModuleWithSimpleLoop(/*num_iters=*/0); ASSERT_TRUE(WhileLoopSimplifier().Run(m.get()).ValueOrDie()); EXPECT_THAT(m->entry_computation()->root_instruction(), diff --git a/tensorflow/compiler/xla/shape_test.cc b/tensorflow/compiler/xla/shape_test.cc index aa6c7d10989..47680a6ba32 100644 --- a/tensorflow/compiler/xla/shape_test.cc +++ b/tensorflow/compiler/xla/shape_test.cc @@ -45,13 +45,13 @@ class ShapeTest : public ::testing::Test { ShapeUtil::MakeTupleShape({opaque_, scalar_, matrix_, matrix2_}); const Shape nested_tuple_ = ShapeUtil::MakeTupleShape({tuple_, matrix_, token_}); - const Shape dyanmic_matrix_ = + const Shape dynamic_matrix_ = ShapeUtil::MakeShape(S32, {5, 2}, {true, false}); }; TEST_F(ShapeTest, ShapeToFromProto) { for (const Shape& shape : {opaque_, token_, scalar_, matrix_, matrix2_, - tuple_, nested_tuple_, dyanmic_matrix_}) { + tuple_, nested_tuple_, dynamic_matrix_}) { Shape shape_copy(shape.ToProto()); EXPECT_TRUE(ShapeUtil::Equal(shape, shape_copy)) << shape << " != " << shape_copy; @@ -215,7 +215,7 @@ TEST_F(ShapeTest, ProgramShapeToString) { TEST_F(ShapeTest, SupportsAbslHash) { EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly( {opaque_, token_, scalar_, scalar_with_tile_, matrix_, matrix2_, tuple_, - nested_tuple_, dyanmic_matrix_})); + nested_tuple_, dynamic_matrix_})); } } // namespace diff --git a/tensorflow/compiler/xla/status_macros_test.cc b/tensorflow/compiler/xla/status_macros_test.cc index 4b0740dad72..d1ed11c227e 100644 --- a/tensorflow/compiler/xla/status_macros_test.cc +++ b/tensorflow/compiler/xla/status_macros_test.cc @@ -90,7 +90,7 @@ TEST(StatusMacros, ReturnIfErrorOnError) { EXPECT_EQ(rc.status().code(), tensorflow::error::INTERNAL); } -TEST(StatusMacros, AssignOrReturnSuccessufully) { +TEST(StatusMacros, AssignOrReturnSuccessfully) { Status status = []() { TF_ASSIGN_OR_RETURN(int value, CreateIntSuccessfully()); EXPECT_EQ(value, 42); diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index 881d9c5879e..3bb2f619499 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -3009,7 +3009,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, NonIdentityBroadcastOfSameRankIsDisallowed) { // Regression test for b/31927799. "slice - y" is fused and requires implicit // broadcast. -XLA_TEST_F(ArrayElementwiseOpTest, ImplictBroadcastInFusedExpressions) { +XLA_TEST_F(ArrayElementwiseOpTest, ImplicitBroadcastInFusedExpressions) { XlaBuilder builder(TestName()); auto x_literal = LiteralUtil::CreateR1({1, 2, 3}); auto y_literal = LiteralUtil::CreateR1({4, 5}); diff --git a/tensorflow/compiler/xla/tests/bfloat16_test.cc b/tensorflow/compiler/xla/tests/bfloat16_test.cc index 63e48117056..a1b11fc87b2 100644 --- a/tensorflow/compiler/xla/tests/bfloat16_test.cc +++ b/tensorflow/compiler/xla/tests/bfloat16_test.cc @@ -76,8 +76,8 @@ XLA_TEST_F(Bfloat16Test, NegateScalarF16) { error_spec_); } -// Disabled on interpreter since BatchNormExanper is not run by default on the -// intepreter backend. +// Disabled on interpreter since BatchNormExpander is not run by default on the +// interpreter backend. XLA_TEST_F(Bfloat16Test, DISABLED_ON_INTERPRETER(BatchNormTraining)) { const int kFeatureIndex = 2; XlaBuilder builder(TestName()); @@ -112,8 +112,8 @@ XLA_TEST_F(Bfloat16Test, DISABLED_ON_INTERPRETER(BatchNormTraining)) { ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.01, 0.02)); } -// Disabled on interpreter since BatchNormExanper is not run by default on the -// intepreter backend. +// Disabled on interpreter since BatchNormExpander is not run by default on the +// interpreter backend. XLA_TEST_F(Bfloat16Test, DISABLED_ON_INTERPRETER(BatchNormGrad)) { const int kFeatureIndex = 2; XlaBuilder builder(TestName()); diff --git a/tensorflow/compiler/xla/tests/collective_ops_test.cc b/tensorflow/compiler/xla/tests/collective_ops_test.cc index 8de508e876e..56c5f688312 100644 --- a/tensorflow/compiler/xla/tests/collective_ops_test.cc +++ b/tensorflow/compiler/xla/tests/collective_ops_test.cc @@ -27,7 +27,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/core/threadpool.h" -// Tests cross-GPU operatons. +// Tests cross-GPU operations. // // This test requires at least four GPUs. For instructions on running this // within Google, see go/multi-gpu-unit-test. diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index f91847e0010..097265f3bb1 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -1148,7 +1148,7 @@ TYPED_TEST(Convolve2D_1x4x4x160_3x3x1x160_Depthwise_Input_Batch_In_Lanes, } template -class Convolve2D_1x4x4x160_3x3x1x160_Dephtwise_Both_Batch_In_Lanes +class Convolve2D_1x4x4x160_3x3x1x160_Depthwise_Both_Batch_In_Lanes : public ConvolutionTest { public: void RunTest() { @@ -1210,9 +1210,9 @@ class Convolve2D_1x4x4x160_3x3x1x160_Dephtwise_Both_Batch_In_Lanes } }; -TYPED_TEST_CASE(Convolve2D_1x4x4x160_3x3x1x160_Dephtwise_Both_Batch_In_Lanes, +TYPED_TEST_CASE(Convolve2D_1x4x4x160_3x3x1x160_Depthwise_Both_Batch_In_Lanes, TestTypes); -TYPED_TEST(Convolve2D_1x4x4x160_3x3x1x160_Dephtwise_Both_Batch_In_Lanes, +TYPED_TEST(Convolve2D_1x4x4x160_3x3x1x160_Depthwise_Both_Batch_In_Lanes, Types) { this->RunTest(); } diff --git a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc index 1ea72af5f5f..9ea27585e61 100644 --- a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc +++ b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc @@ -775,7 +775,7 @@ void BM_DynamicSlice(int num_iters) { stream.get(), start_index_literal, shaped_buffers[i])); } - // Add DynamicSlice op to the computatation. + // Add DynamicSlice op to the computation. DynamicSlice(input, start_indices, {1, 1, 1, 1}); auto computation = builder.Build().ConsumeValueOrDie(); diff --git a/tensorflow/compiler/xla/tests/exhaustive_binary_test.cc b/tensorflow/compiler/xla/tests/exhaustive_binary_test.cc index 64372788be4..3c14f78429a 100644 --- a/tensorflow/compiler/xla/tests/exhaustive_binary_test.cc +++ b/tensorflow/compiler/xla/tests/exhaustive_binary_test.cc @@ -131,7 +131,7 @@ template ::value || std::is_same::value>::type* = nullptr> T ReferenceMax(T x, T y) { - // We need to propagate NAN here becasue std::max may not propagate NAN. + // We need to propagate NAN here because std::max may not propagate NAN. if (std::fpclassify(x) == FP_NAN) { return x; } @@ -146,7 +146,7 @@ template ::value || std::is_same::value>::type* = nullptr> T ReferenceMin(T x, T y) { - // We need to propagate NAN here becasue std::max may not propagate NAN. + // We need to propagate NAN here because std::max may not propagate NAN. if (std::fpclassify(x) == FP_NAN) { return x; } @@ -319,7 +319,7 @@ INSTANTIATE_TEST_SUITE_P( // for each sub-test to avoid timeout because the implementation of ExpectNear // more than 2x slower for binary test. INSTANTIATE_TEST_SUITE_P( - LargeAndSmallMagnituedNormalValues, ExhaustiveF32BinaryTest, + LargeAndSmallMagnitudeNormalValues, ExhaustiveF32BinaryTest, ::testing::Combine( ::testing::ValuesIn(GetFpValuesForMagnitudeExtremeNormals(40000, 2000)), @@ -402,7 +402,7 @@ INSTANTIATE_TEST_SUITE_P( // Similar to ExhaustiveF64BinaryTest, we use a smaller set of inputs for each // for each sub-test comparing with the unary test to avoid timeout. INSTANTIATE_TEST_SUITE_P( - LargeAndSmallMagnituedNormalValues, ExhaustiveF64BinaryTest, + LargeAndSmallMagnitudeNormalValues, ExhaustiveF64BinaryTest, ::testing::Combine( ::testing::ValuesIn( GetFpValuesForMagnitudeExtremeNormals(40000, 2000)), diff --git a/tensorflow/compiler/xla/tests/exhaustive_op_test_utils.h b/tensorflow/compiler/xla/tests/exhaustive_op_test_utils.h index 3d77b44b53a..1aa06a0aa63 100644 --- a/tensorflow/compiler/xla/tests/exhaustive_op_test_utils.h +++ b/tensorflow/compiler/xla/tests/exhaustive_op_test_utils.h @@ -84,7 +84,7 @@ class ExhaustiveOpTestBase : public ClientLibraryTestBase { : (T == F16 || T == BF16) ? U16 : PRIMITIVE_TYPE_INVALID; }; - // Native types that correspond to the primtive types above. + // Native types that correspond to the primitive types above. using NativeT = typename primitive_util::PrimitiveTypeToNative::type; using NativeRefT = typename primitive_util::PrimitiveTypeToNative::type; @@ -746,7 +746,7 @@ class ExhaustiveOpTestBase : public ClientLibraryTestBase { // The platform under test. const string platform_; - // Testing will ignore inputs for which known_incorect_fn_ returns true. The + // Testing will ignore inputs for which known_incorrect_fn_ returns true. The // argument to the function is the raw bits for the data being test, zero // extended to 64 bits if the data type is less than 64 bits. std::function known_incorrect_fn_; diff --git a/tensorflow/compiler/xla/tests/exhaustive_unary_test.cc b/tensorflow/compiler/xla/tests/exhaustive_unary_test.cc index a19f7eea3bd..0ab27554a0c 100644 --- a/tensorflow/compiler/xla/tests/exhaustive_unary_test.cc +++ b/tensorflow/compiler/xla/tests/exhaustive_unary_test.cc @@ -165,7 +165,7 @@ using ExhaustiveUnaryTest = ExhaustiveOpTestBase; // Test parameter is a tuple containing // - primitive type under test, // - (begin, end) range under test, as zero-extended int64s bitcast to the -// primtive type under test. +// primitive type under test. template class Exhaustive32BitOrLessUnaryTest : public ExhaustiveUnaryTest, @@ -727,7 +727,7 @@ INSTANTIATE_TEST_SUITE_P(NormalValues, ExhaustiveF64UnaryTest, // Tests a total of 4000000000 inputs, with 16000000 inputs in each sub-test, to // keep the peak memory usage low. INSTANTIATE_TEST_SUITE_P( - LargeAndSmallMagnituedNormalValues, ExhaustiveF64UnaryTest, + LargeAndSmallMagnitudeNormalValues, ExhaustiveF64UnaryTest, ::testing::ValuesIn(GetFpValuesForMagnitudeExtremeNormals( 4000000000ull, 16000000))); @@ -873,7 +873,7 @@ INSTANTIATE_TEST_SUITE_P( // Tests a total of 40000 ^ 2 inputs, with 4000 ^ 2 inputs in each sub-test, to // keep the peak memory usage low. INSTANTIATE_TEST_SUITE_P( - F32LargeAndSmallMagnituedNormalValues, ExhaustiveC64UnaryTest, + F32LargeAndSmallMagnitudeNormalValues, ExhaustiveC64UnaryTest, ::testing::Combine( ::testing::ValuesIn(GetFpValuesForMagnitudeExtremeNormals(40000, 4000)), @@ -960,7 +960,7 @@ INSTANTIATE_TEST_SUITE_P( // Tests a total of 40000 ^ 2 inputs, with 2000 ^ 2 inputs in each sub-test, to // keep the peak memory usage low. INSTANTIATE_TEST_SUITE_P( - LargeAndSmallMagnituedNormalValues, ExhaustiveC128UnaryTest, + LargeAndSmallMagnitudeNormalValues, ExhaustiveC128UnaryTest, ::testing::Combine( ::testing::ValuesIn( GetFpValuesForMagnitudeExtremeNormals(40000, 2000)), diff --git a/tensorflow/compiler/xla/tests/gather_operation_test.cc b/tensorflow/compiler/xla/tests/gather_operation_test.cc index 47d3546fc41..71090077ae8 100644 --- a/tensorflow/compiler/xla/tests/gather_operation_test.cc +++ b/tensorflow/compiler/xla/tests/gather_operation_test.cc @@ -619,7 +619,7 @@ ENTRY main { class GatherClientLibraryTest : public ClientLibraryTestBase {}; -// Disabled on interpreter since ExectuteAsyncOnStream is not supported. +// Disabled on interpreter since ExecuteAsyncOnStream is not supported. XLA_TEST_F(GatherClientLibraryTest, DISABLED_ON_INTERPRETER(DISABLED_ON_GPU(Basic))) { // We create this HLO, but using the XlaBuilder API. diff --git a/tensorflow/compiler/xla/tests/map_test.cc b/tensorflow/compiler/xla/tests/map_test.cc index 4d327a6fe9c..58ff070671d 100644 --- a/tensorflow/compiler/xla/tests/map_test.cc +++ b/tensorflow/compiler/xla/tests/map_test.cc @@ -463,7 +463,7 @@ TEST_F(MapTest, NestedBinaryMap) { ComputeAndCompareR1(&b, {0.1f, 0.5f, 0.25f, 1.0f, 4.0f}, {}); } -TEST_F(MapTest, MapOperantionWithBuildError) { +TEST_F(MapTest, MapOperationWithBuildError) { // Maps (lambda (x y) (+ x y)) onto two R1F32 vectors but uses an unsupported // type combination (F32 + U16) to test that the error is reported to the // outermost XlaBuilder. diff --git a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc index 0dcc0c278ae..81c0a8e1e46 100644 --- a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc +++ b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc @@ -187,8 +187,8 @@ class MultiOutputFusionTest : public HloTestBase { XLA_TEST_F(MultiOutputFusionTest, 2DNofusion) { RunTest2D(false, 5); } XLA_TEST_F(MultiOutputFusionTest, 2DFusion) { RunTest2D(true, 5); } XLA_TEST_F(MultiOutputFusionTest, 2DFusionSize129) { RunTest2D(true, 129); } -XLA_TEST_F(MultiOutputFusionTest, DiffentTypesNoFusion) { RunTest1D(false, 8); } -XLA_TEST_F(MultiOutputFusionTest, DiffentTypesFusion) { RunTest1D(true, 8); } +XLA_TEST_F(MultiOutputFusionTest, DifferentTypesNoFusion) { RunTest1D(false, 8); } +XLA_TEST_F(MultiOutputFusionTest, DifferentTypesFusion) { RunTest1D(true, 8); } XLA_TEST_F(MultiOutputFusionTest, FusionNodeIsRoot) { const char* testcase = R"( diff --git a/tensorflow/compiler/xla/tools/hlo_proto_to_json.cc b/tensorflow/compiler/xla/tools/hlo_proto_to_json.cc index 88f3a8bdde2..068ef744c33 100644 --- a/tensorflow/compiler/xla/tools/hlo_proto_to_json.cc +++ b/tensorflow/compiler/xla/tools/hlo_proto_to_json.cc @@ -17,8 +17,8 @@ limitations under the License. // hlo_proto_to_json --input_file=some_binary_proto // --output_file=path_to_dump_output // -// Reads one serilized Hlo module, convert it into JSON format and dump into -// some output directory. some_binaray_proto is obtained by serializing Hlo +// Reads one serialized Hlo module, convert it into JSON format and dump into +// some output directory. some_binary_proto is obtained by serializing Hlo // module to disk using the debug options // // --xla_dump_to=DIR --xla_dump_hlo_as_proto diff --git a/tensorflow/compiler/xla/tools/interactive_graphviz_test.sh b/tensorflow/compiler/xla/tools/interactive_graphviz_test.sh index b3e43aa7da0..a1614c443fe 100755 --- a/tensorflow/compiler/xla/tools/interactive_graphviz_test.sh +++ b/tensorflow/compiler/xla/tools/interactive_graphviz_test.sh @@ -14,6 +14,6 @@ # limitations under the License. # ==============================================================================*/ -# This is a placeholder for a compile-only test for intractive_graphviz tool. +# This is a placeholder for a compile-only test for interactive_graphviz tool. exit 0 diff --git a/tensorflow/compiler/xla/tools/run_hlo_module_main.cc b/tensorflow/compiler/xla/tools/run_hlo_module_main.cc index 7079f413eeb..39d7826e162 100644 --- a/tensorflow/compiler/xla/tools/run_hlo_module_main.cc +++ b/tensorflow/compiler/xla/tools/run_hlo_module_main.cc @@ -104,7 +104,7 @@ int main(int argc, char** argv) { tensorflow::Flag( "use_large_float_range", &opts.use_large_float_range, "Generate floating point values using a large uniform-log " - "distribtion as opposed to a small uniform distribution."), + "distribution as opposed to a small uniform distribution."), tensorflow::Flag( "abs_error_bound", &opts.abs_error_bound, "The absolute error bound used when comparing the test and " diff --git a/tensorflow/compiler/xla/util.cc b/tensorflow/compiler/xla/util.cc index 7b17db12595..6711779cd2b 100644 --- a/tensorflow/compiler/xla/util.cc +++ b/tensorflow/compiler/xla/util.cc @@ -341,7 +341,7 @@ std::pair SplitF64ToF32(double x) { CHECK(std::isfinite(x_f32)) << x; // The high float is simply the double rounded to the nearest float. Because - // we are roundinng to nearest with ties to even, the error introduced in + // we are rounding to nearest with ties to even, the error introduced in // rounding is less than half an ULP in the high ULP. const float hi = x_f32; // We can compute the low term using Sterbenz' lemma: If a and b are two diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc index 427a631f82d..68f56a52d0e 100644 --- a/tensorflow/compiler/xrt/tests/raw_api_test.cc +++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc @@ -1527,7 +1527,7 @@ TEST(RawApiTest, CompileAndExecuteWithS64Argument) { xla::Shape(program_shape.result()), xla::S64)); } -// Tests the XRT device memory compation API (XRTCompactAllocations). +// Tests the XRT device memory compaction API (XRTCompactAllocations). TEST(RawApiTest, TestDeviceMemoryCompaction) { static const int kNumAllocs = 32; Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag()); diff --git a/tensorflow/compiler/xrt/xrt_memory_manager.cc b/tensorflow/compiler/xrt/xrt_memory_manager.cc index 3a304764800..14986be3d1e 100644 --- a/tensorflow/compiler/xrt/xrt_memory_manager.cc +++ b/tensorflow/compiler/xrt/xrt_memory_manager.cc @@ -319,7 +319,7 @@ Status XRTMemoryManager::TryFreeMemoryStep(MemoryReclaimContext* mrctx, } if (!mrctx->done_freeing) { // If the caller passed us a zero requested_free_size, we try to free chunks - // of kMaxFreeSize memory, until either the run function suceeds, or we run + // of kMaxFreeSize memory, until either the run function succeeds, or we run // out of freeable memory. const size_t kMaxFreeSize = 1000000000; size_t free_size = diff --git a/tensorflow/compiler/xrt/xrt_memory_manager.h b/tensorflow/compiler/xrt/xrt_memory_manager.h index 445be45cf57..0dcd07f9faa 100644 --- a/tensorflow/compiler/xrt/xrt_memory_manager.h +++ b/tensorflow/compiler/xrt/xrt_memory_manager.h @@ -87,7 +87,7 @@ class XRTMemoryManager : public ResourceBase { return Status::OK(); } - // Releases an handle by dropping the refences count held on the + // Releases an handle by dropping the references count held on the // XRTTupleAllocation by the XRTMemoryManager. Existing XRTTupleAllocation // references will continue to be valid. Status Release(int64 handle); From 3938a7002c75773cb2dfd98d02e3b00a56a73a26 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Mon, 9 Dec 2019 19:12:03 +0900 Subject: [PATCH 2/3] minor spelling tweaks --- RELEASE.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index c415315f882..f83bab83a2f 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -315,7 +315,7 @@ If you are experiencing any issues because of this change, please inform us (fil * Changed API to optimize TensorRT enginges during graph optimization. This is now done by calling `converter.build()` where previously `is_dynamic_op=False` would be set. - * `converter.convert()` no longer returns a `tf.function`. Now the funtion must be + * `converter.convert()` no longer returns a `tf.function`. Now the function must be accessed from the saved model. * The `converter.calibrate()` method has been removed. To trigger calibration, a `calibration_input_fn` should be provided to `converter.convert()`. @@ -715,7 +715,7 @@ Weweler, Zantares, zjjott, 卜居, 王振华 (Wang Zhenhua), 黄鑫 * Updates `png_archive` dependency to 1.6.37 to not be affected by CVE-2019-7317, CVE-2018-13785, and CVE-2018-14048. -* Updates `sqlite` depenency to 3.28.0 to not be affected by CVE-2018-20506, +* Updates `sqlite` dependency to 3.28.0 to not be affected by CVE-2018-20506, CVE-2018-20346, and CVE-2018-20505. # Release 1.12.2 @@ -901,9 +901,9 @@ Weweler, Zantares, zjjott, 卜居, 王振华 (Wang Zhenhua), 黄鑫 compilation as a second return argument. * XLA HLO graphs can now be rendered as SVG/HTML. * Estimator - * Replace all occurences of `tf.contrib.estimator.BaselineEstimator` with + * Replace all occurrences of `tf.contrib.estimator.BaselineEstimator` with `tf.estimator.BaselineEstimator` - * Replace all occurences of + * Replace all occurrences of `tf.contrib.estimator.DNNLinearCombinedEstimator` with `tf.estimator.DNNLinearCombinedEstimator` * Replace all occurrences of `tf.contrib.estimator.DNNEstimator` with @@ -915,7 +915,7 @@ Weweler, Zantares, zjjott, 卜居, 王振华 (Wang Zhenhua), 黄鑫 `tf.estimator.Estimator.experimental_export_all_saved_models`. * Update `regression_head` to the new Head API for Canned Estimator V2. * Switch `multi_class_head` to Head API for Canned Estimator V2. - * Replace all occurences of `tf.contrib.estimator.InMemoryEvaluatorHook` + * Replace all occurrences of `tf.contrib.estimator.InMemoryEvaluatorHook` and `tf.contrib.estimator.make_stop_at_checkpoint_step_hook` with `tf.estimator.experimental.InMemoryEvaluatorHook` and `tf.estimator.experimental.make_stop_at_checkpoint_step_hook` From 9dfd369d07303ee7c93c59fa918aeddca4fed733 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Tue, 10 Dec 2019 02:01:00 +0900 Subject: [PATCH 3/3] address review comment --- tensorflow/compiler/xla/service/hlo_computation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 579e4360092..9ca60403929 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -419,7 +419,7 @@ class HloComputation { // the HLO computation with the exception of fusion computation. A parameter // instruction is removable for a fusion computation. // - // Note that IsSafelyRemovable() is a necessarily condition to remove an + // Note that IsSafelyRemovable() is a necessary condition to remove an // instruction rather than a sufficient condition. For example, instructions // with side-effect (e.g., Send, Infeed) may be removed from a computation, // but the transformation must guarantee the invariants relevant to the