minor spelling tweaks

2020-03-19 01:09:19 +09:00 · 2020-03-19 01:09:19 +09:00 · caa68bf2d0
commit caa68bf2d0
parent 56944a8148
36 changed files with 116 additions and 116 deletions
--- a/tensorflow/lite/kernels/acceleration_test_util_internal_test.cc
+++ b/tensorflow/lite/kernels/acceleration_test_util_internal_test.cc
@ -110,7 +110,7 @@ TEST_F(ReadAccelerationConfigTest, IgnoresCommentedLines) {
  EXPECT_TRUE(blacklist_.empty());
 }

-TEST_F(ReadAccelerationConfigTest, CommentCanHaveTralingBlanks) {
+TEST_F(ReadAccelerationConfigTest, CommentCanHaveTrailingBlanks) {
  ReadAccelerationConfig("  #key,value", consumer_);

  EXPECT_TRUE(whitelist_.empty());
--- a/tensorflow/lite/kernels/activations.cc
+++ b/tensorflow/lite/kernels/activations.cc
@ -809,7 +809,7 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
        params.input_range_radius = data->input_range_radius;
        params.input_multiplier = data->input_multiplier;
        params.input_left_shift = data->input_left_shift;
-        optimized_ops::Tanh16bitPercision(
+        optimized_ops::Tanh16bitPrecision(
            params, GetTensorShape(input), GetTensorData<uint8_t>(input),
            GetTensorShape(output), GetTensorData<uint8_t>(output));
      } else {
@ -824,7 +824,7 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
        params.input_range_radius = data->input_range_radius;
        params.input_multiplier = data->input_multiplier;
        params.input_left_shift = data->input_left_shift;
-        optimized_ops::Tanh16bitPercision(
+        optimized_ops::Tanh16bitPrecision(
            params, GetTensorShape(input), GetTensorData<int8_t>(input),
            GetTensorShape(output), GetTensorData<int8_t>(output));
      } else {
@ -881,7 +881,7 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
        params.input_range_radius = data->input_range_radius;
        params.input_multiplier = data->input_multiplier;
        params.input_left_shift = data->input_left_shift;
-        optimized_ops::Logistic16bitPercision(
+        optimized_ops::Logistic16bitPrecision(
            params, GetTensorShape(input), GetTensorData<uint8_t>(input),
            GetTensorShape(output), GetTensorData<uint8_t>(output));
      } else {
@ -896,7 +896,7 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
        params.input_range_radius = data->input_range_radius;
        params.input_multiplier = data->input_multiplier;
        params.input_left_shift = data->input_left_shift;
-        optimized_ops::Logistic16bitPercision(
+        optimized_ops::Logistic16bitPrecision(
            params, GetTensorShape(input), GetTensorData<int8_t>(input),
            GetTensorShape(output), GetTensorData<int8_t>(output));
      } else {
--- a/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc
+++ b/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc
@ -2766,11 +2766,11 @@ TEST_P(LSTMOpTest, BlackBoxTestWithAuxInputZeroAuxWeight) {
  // Aux input and input are the same, so we should observe the same outputs
  // as there's no aux input.
  lstm.SetAuxInput(0, batch0_start, batch0_end);
-  std::vector<float> dummpy_weights(n_cell * n_input, 0.0f);
-  lstm.SetAuxInputToInputWeights(dummpy_weights);
-  lstm.SetAuxInputToForgetWeights(dummpy_weights);
-  lstm.SetAuxInputToCellWeights(dummpy_weights);
-  lstm.SetAuxInputToOutputWeights(dummpy_weights);
+  std::vector<float> dummy_weights(n_cell * n_input, 0.0f);
+  lstm.SetAuxInputToInputWeights(dummy_weights);
+  lstm.SetAuxInputToForgetWeights(dummy_weights);
+  lstm.SetAuxInputToCellWeights(dummy_weights);
+  lstm.SetAuxInputToOutputWeights(dummy_weights);

  lstm.Invoke();

--- a/tensorflow/lite/kernels/bidirectional_sequence_rnn_test.cc
+++ b/tensorflow/lite/kernels/bidirectional_sequence_rnn_test.cc
@ -1346,7 +1346,7 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestCrossLinkingAuxInputOnlyTimeMajor) {
 }

 // Same as BlackBox test, but the input tensor and weights tensor are split
-// along the last dimension and passed to both regular and auxiliry inputs and
+// along the last dimension and passed to both regular and auxiliary inputs and
 // weights. The output in this case is the same. To understand this, let's
 // define W and V as regular input weights matrix and auxiliary input weights
 // matrix correspondingly. It's easy to see that this is equivalent to a regular
--- a/tensorflow/lite/kernels/cpu_backend_context.h
+++ b/tensorflow/lite/kernels/cpu_backend_context.h
@ -55,7 +55,7 @@ class CpuBackendContext final : public TfLiteInternalBackendContext {
  const std::unique_ptr<ruy::Context> ruy_context_;
  const std::unique_ptr<gemmlowp::GemmContext> gemmlowp_context_;

-  // The maxinum of threads used for parallelizing TfLite ops. However,
+  // The maximum of threads used for parallelizing TfLite ops. However,
  // cpu_backend_threadpool::Execute creates as many threads as it's
  // asked to, regardless of this. Typically a call site would query
  // cpu_backend_context->max_num_threads() and used that to determine
--- a/tensorflow/lite/kernels/cpu_backend_gemm_custom_gemv.h
+++ b/tensorflow/lite/kernels/cpu_backend_gemm_custom_gemv.h
@ -593,10 +593,10 @@ struct CustomGemvImpl<LhsScalar, RhsScalar, std::int32_t, DstScalar,

 // We want to use fused multiply-add when it's available (that is, on A64
 // unconditionally and on A32 with VFPv4) because it's often faster, and
-// because non-fused seems not to be available in A64 so a conscentious compiler
-// might emit slow code (separate mul and add instructions) in order to
+// because non-fused seems not to be available in A64 so a conscientious
+// compiler might emit slow code (separate mul and add instructions) in order to
 // implement the vmlaq_f32 intrinsic with strict bit-for-bit exactness on A64.
-// (Compilers seems to be generating a fused fmla instruction at the moment,
+// (Compilers seem to be generating a fused fmla instruction at the moment,
 // but that could change).
 //
 // We still want to support building for A32 without VFPv4.
--- a/tensorflow/lite/kernels/cpu_backend_gemm_eigen.cc
+++ b/tensorflow/lite/kernels/cpu_backend_gemm_eigen.cc
@ -19,7 +19,7 @@ limitations under the License.

 // See b/131835803: in TFLite code, because eigen_spatial_convolutions.h does
 // #define Eigen EigenForTFLite, it is difficult to have any #include of Eigen
-// headers in a header file, as that results in name clases (compilation
+// headers in a header file, as that results in name classes (compilation
 // errors) depending on the order in which these headers are #included.
 // So we have moved the #include of Eigen here, in a .cc file, where we have
 // control over the header #include sequence.
--- a/tensorflow/lite/kernels/detection_postprocess_test.cc
+++ b/tensorflow/lite/kernels/detection_postprocess_test.cc
@ -737,7 +737,7 @@ TEST(DetectionPostprocessOpTest,
              ElementsAreArray(ArrayFloatNear({3.0}, 1e-1)));
 }

-TEST(DetectionPostprocessOpTest, FloatTestwithNoBackgroudClassAndKeypoints) {
+TEST(DetectionPostprocessOpTest, FloatTestwithNoBackgroundClassAndKeypoints) {
  DetectionPostprocessOpModelwithRegularNMS m(
      {TensorType_FLOAT32, {1, 6, 5}}, {TensorType_FLOAT32, {1, 6, 2}},
      {TensorType_FLOAT32, {6, 4}}, {TensorType_FLOAT32, {}},
--- a/tensorflow/lite/kernels/fully_connected.cc
+++ b/tensorflow/lite/kernels/fully_connected.cc
@ -251,7 +251,7 @@ TfLiteStatus PrepareImpl(TfLiteContext* context, TfLiteNode* node) {
  TfLiteIntArray* output_size_array = nullptr;
  if (params->keep_num_dims) {
    // When number of dimensions are kept the filter operates along the last
-    // dimenions. In other words, for an input tensor with shape
+    // dimentions. In other words, for an input tensor with shape
    // [batch_size, ..., n_inputs] and a filter of shape [n_inputs, n_units]
    // this Op produces an output of shape [batch_size, ..., n_units].
    TF_LITE_ENSURE_EQ(context, input->dims->data[input->dims->size - 1],
--- a/tensorflow/lite/kernels/fully_connected_test.cc
+++ b/tensorflow/lite/kernels/fully_connected_test.cc
@ -790,7 +790,7 @@ TEST_P(QuantizedFullyConnectedOpTest,
       SimpleTestQuantizedInt16OutputShuffled4x16Int8Weights) {
  // The shuffled weights block shape is 4x16. The shape of the weights matrix
  // is: rows = output_depth, cols = input_depth. It must be a multiple of 4x16.
-  // This means that output_depth must be a multiple of 4, and input_deth must
+  // This means that output_depth must be a multiple of 4, and input_depth must
  // be a multiple of 16.
  for (int input_depth_numblocks : {1, 3}) {
    for (int output_depth_numblocks : {1, 3}) {
--- a/tensorflow/lite/kernels/internal/depthwiseconv_per_channel_quantized_test.cc
+++ b/tensorflow/lite/kernels/internal/depthwiseconv_per_channel_quantized_test.cc
@ -290,7 +290,7 @@ void TryTestOneDepthwiseConv3x3Filter() {
  // It's hard to come up with a right multiplier, random guess basically makes
  // all the results saturated and becomes meaningfulless, so we first use
  // reference impl to poke the min/max value of the accumulation, then use that
-  // value as a guided suggestion for us to populate meaningful mulitplier &
+  // value as a guided suggestion for us to populate meaningful multiplier &
  // shift.
  PickReasonableMultiplier(
      params, output_activation_min, output_activation_max, output_depth,
@ -305,7 +305,7 @@ void TryTestOneDepthwiseConv3x3Filter() {
      dilation_width_factor, dilation_height_factor, pad_width, pad_height,
      depth_multiplier, output_shape_inference, 0, output_shift.data()));

-  // The following tests compare referene impl and Neon general impl agrees,
+  // The following tests compare reference impl and Neon general impl agrees,
  // and reference impl loosely agrees with fast kernel since they use different
  // rounding strategy.
  reference_integer_ops::DepthwiseConvPerChannel(
--- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_float.h
+++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_float.h
@ -787,37 +787,37 @@ void FloatDepthwiseConvAccumRow(int stride, int dilation_factor,
  for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
    // For the current (filter_x, filter_y) point in the filter,
    // compute the boundaries of the corresponding output row segment.
-    int out_x_loop_start_unclampled = 0;
-    int out_x_loop_end_unclampled = 0;
-    if (kAllowStrided) {
+    int out_x_loop_start_unclamped = 0;
+    int out_x_loop_end_unclamped = 0;
+    if (kAllowStrided) 
      if (stride == 2) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
            (pad_width - dilation_factor * filter_x + 1) / 2;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
            (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
      } else if (stride == 4) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
            (pad_width - dilation_factor * filter_x + 3) / 4;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
            (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
      } else {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
            (pad_width - dilation_factor * filter_x + stride - 1) / stride;
-        out_x_loop_end_unclampled = (pad_width + input_width -
-                                     dilation_factor * filter_x + stride - 1) /
-                                    stride;
+        out_x_loop_end_unclamped = (pad_width + input_width -
+                                    dilation_factor * filter_x + stride - 1) /
+                                   stride;
      }
    } else {
-      out_x_loop_start_unclampled = pad_width - dilation_factor * filter_x;
-      out_x_loop_end_unclampled =
+      out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
+      out_x_loop_end_unclamped =
          pad_width + input_width - dilation_factor * filter_x;
    }
    // The kernel will have to iterate on the segment of the
    // output row that starts at out_x_loop_start and out_x_loop_end.
    const int out_x_loop_start =
-        std::max(out_x_buffer_start, out_x_loop_start_unclampled);
+        std::max(out_x_buffer_start, out_x_loop_start_unclamped);
    const int out_x_loop_end =
-        std::min(out_x_buffer_end, out_x_loop_end_unclampled);
+        std::min(out_x_buffer_end, out_x_loop_end_unclamped);

    float* acc_buffer_ptr =
        acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
--- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@ -1496,37 +1496,37 @@ void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor,
  for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
    // For the current (filter_x, filter_y) point in the filter,
    // compute the boundaries of the corresponding output row segment.
-    int out_x_loop_start_unclampled = 0;
-    int out_x_loop_end_unclampled = 0;
+    int out_x_loop_start_unclamped = 0;
+    int out_x_loop_end_unclamped = 0;
    if (kAllowStrided) {
      if (stride == 2) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
            (pad_width - dilation_factor * filter_x + 1) / 2;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
            (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
      } else if (stride == 4) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
            (pad_width - dilation_factor * filter_x + 3) / 4;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
            (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
      } else {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
            (pad_width - dilation_factor * filter_x + stride - 1) / stride;
-        out_x_loop_end_unclampled = (pad_width + input_width -
-                                     dilation_factor * filter_x + stride - 1) /
-                                    stride;
+        out_x_loop_end_unclamped = (pad_width + input_width -
+                                    dilation_factor * filter_x + stride - 1) /
+                                   stride;
      }
    } else {
-      out_x_loop_start_unclampled = pad_width - dilation_factor * filter_x;
-      out_x_loop_end_unclampled =
+      out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
+      out_x_loop_end_unclamped =
          pad_width + input_width - dilation_factor * filter_x;
    }
    // The kernel will have to iterate on the segment of the
    // output row that starts at out_x_loop_start and out_x_loop_end.
    const int out_x_loop_start =
-        std::max(out_x_buffer_start, out_x_loop_start_unclampled);
+        std::max(out_x_buffer_start, out_x_loop_start_unclamped);
    const int out_x_loop_end =
-        std::min(out_x_buffer_end, out_x_loop_end_unclampled);
+        std::min(out_x_buffer_end, out_x_loop_end_unclamped);

    int32* acc_buffer_ptr =
        acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
--- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
+++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
@ -13128,7 +13128,7 @@ inline void DepthwiseConvDotProduct3x3Impl(
  // "next" data, of at least 16 bytes, even when at the end of the workspace.
  // It is relatively expensive to detect the end micro block. It is also very
  // difficult to test for (to trigger) erroneous reads (past end of array) in
-  // the depth multplication case.
+  // the depth multiplication case.
  int workspace_width_micro_repeats =
      (has_depth_multiplication
           ? kDepthwiseConvScratchWorkspaceSize - kWorkspaceExtension
--- a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h
+++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h
@ -1441,37 +1441,37 @@ void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor,
  for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
    // For the current (filter_x, filter_y) point in the filter,
    // compute the boundaries of the corresponding output row segment.
-    int out_x_loop_start_unclampled = 0;
-    int out_x_loop_end_unclampled = 0;
+    int out_x_loop_start_unclamped = 0;
+    int out_x_loop_end_unclamped = 0;
    if (kAllowStrided) {
      if (stride == 2) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
            (pad_width - dilation_factor * filter_x + 1) / 2;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
            (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
      } else if (stride == 4) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
            (pad_width - dilation_factor * filter_x + 3) / 4;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
            (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
      } else {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
            (pad_width - dilation_factor * filter_x + stride - 1) / stride;
-        out_x_loop_end_unclampled = (pad_width + input_width -
-                                     dilation_factor * filter_x + stride - 1) /
-                                    stride;
+        out_x_loop_end_unclamped = (pad_width + input_width -
+                                    dilation_factor * filter_x + stride - 1) /
+                                   stride;
      }
    } else {
-      out_x_loop_start_unclampled = pad_width - dilation_factor * filter_x;
-      out_x_loop_end_unclampled =
+      out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
+      out_x_loop_end_unclamped =
          pad_width + input_width - dilation_factor * filter_x;
    }
    // The kernel will have to iterate on the segment of the
    // output row that starts at out_x_loop_start and out_x_loop_end.
    const int out_x_loop_start =
-        std::max(out_x_buffer_start, out_x_loop_start_unclampled);
+        std::max(out_x_buffer_start, out_x_loop_start_unclamped);
    const int out_x_loop_end =
-        std::min(out_x_buffer_end, out_x_loop_end_unclampled);
+        std::min(out_x_buffer_end, out_x_loop_end_unclamped);

    int32* acc_buffer_ptr =
        acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
--- a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h
+++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h
@ -179,10 +179,10 @@ struct DepthwiseConvWindowPerChannel<DepthwiseConvOutputRounding::kUpward, 8, 1,
        // the first 4 values of the output_multiplier_ptr (we have 8 in total);
        // v30 (which held duplicated output right shift previously) will hold
        // the first 4 values of the output_shift_ptr (we have 8 in total);
-        // lastly, v28 will hold the last 4 values of output_mulitplier and v31
+        // lastly, v28 will hold the last 4 values of output_multiplier and v31
        // (previously occupied by activations) will hold the last 4 values of
        // output_shift. Then v25 will be used for output activation min while
-        // output activation max will just reuse oother registers, like v24.
+        // output activation max will just reuse other registers, like v24.
        //
        // Set "constant" registers. These registers may be replaced with temp
        // values from time to time when there are not enough NEON registers.
@ -1024,7 +1024,7 @@ struct DepthwiseConvWindowPerChannel<DepthwiseConvOutputRounding::kUpward, 8, 2,
        // part.
        // The register planning here is really tricky:
        // v0-v29 are all used at least once for either filter/input/output,
-        // some of them are used for output shift and output mulitplier, or
+        // some of them are used for output shift and output multiplier, or
        // input/output offset.
        // Only v30 & v31 are only used for output activation min/max.
        // For per-channel case, we need 4 registers to hold output shift &
--- a/tensorflow/lite/kernels/internal/optimized/integer_ops/mean.h
+++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/mean.h
@ -222,7 +222,7 @@ inline void Mean(const tflite::MeanParams& op_params,
    MeanImpl(op_params, input_shape, input_data, multiplier, shift, bias,
             output_shape, output_data, 0, output_depth);
  } else {
-    // Instead parrallel for batch, we loop for the output_depth since batch
+    // Instead parallel for batch, we loop for the output_depth since batch
    // is typical 1.
    std::vector<MeanWorkerTask> tasks;
    // TODO(b/131746020) don't create new heap allocations every time.
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
@ -2339,7 +2339,7 @@ void NeonSymmetricQuantizeFloats(const float* values, const int size,
    const int32x4_t f2i0_i32x4 = RoundToNearest(mul0_f32x4);
    const int32x4_t f2i1_i32x4 = RoundToNearest(mul1_f32x4);

-    // Implements the vectorized version of the folowing block:
+    // Implements the vectorized version of the following block:
    //  quantized_values[i] = std::min(kScale, std::max(-kScale,
    //  quantized_value));
    int32x4_t max0_i32x4 = vmaxq_s32(f2i0_i32x4, neg_scale_i32x4);
--- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
@ -1123,7 +1123,7 @@ inline void Mean(const tflite::MeanParams& op_params,
    MeanImpl(op_params, input_shape, input_data, multiplier, shift, bias,
             output_shape, output_data, 0, output_depth);
  } else {
-    // Instead parrallel for batch, we loop for the output_depth since batch
+    // Instead parallel for batch, we loop for the output_depth since batch
    // is typical 1.
    std::vector<MeanWorkerTask> tasks;
    // TODO(b/131746020) don't create new heap allocations every time.
@ -5714,7 +5714,7 @@ inline void Quantize(const int32_t* multiplier, const int32_t* shift,
  //          ....
  //
  // In order to minimize the reload of the multipliers & shifts, once we load
-  // the multipliers & shifts, we load & quantize the raw accumualtrs for every
+  // the multipliers & shifts, we load & quantize the raw accumulators for every
  // row.
 #ifdef USE_NEON
  const int32x4_t output_offset_vec = vdupq_n_s32(output_zp);
@ -6369,7 +6369,7 @@ inline void HardSwish(const HardSwishParams& params,
  // Unfortunately, the Intel arm_neon_sse.h implementation of vqshl* is
  // buggy in the case of zero shift amounts, see b/137199585. That is why
  // this NEON code path is restricted to true ARM NEON, excluding
-  // arm_neon_sse.h. Anyway, the arm_neon_sse.h implemenation of saturating
+  // arm_neon_sse.h. Anyway, the arm_neon_sse.h implementation of saturating
  // left shifts is slow scalar code, so there may not be much benefit in
  // running that over just plain reference code.
  //
@ -7039,7 +7039,7 @@ inline void ClampWithRangeAndStore(int8_t* output_dst, int8x16_t input_val,

 #endif  // GEMMLOWP_NEON

-inline void Tanh16bitPercision(const TanhParams& params,
+inline void Tanh16bitPrecision(const TanhParams& params,
                               const RuntimeShape& input_shape,
                               const uint8* input_data,
                               const RuntimeShape& output_shape,
@ -7146,7 +7146,7 @@ inline void Tanh16bitPercision(const TanhParams& params,
  }
 }

-inline void Tanh16bitPercision(const TanhParams& params,
+inline void Tanh16bitPrecision(const TanhParams& params,
                               const RuntimeShape& input_shape,
                               const int8* input_data,
                               const RuntimeShape& output_shape,
@ -7239,7 +7239,7 @@ inline void Tanh16bitPercision(const TanhParams& params,
  }
 }

-inline void Logistic16bitPercision(const LogisticParams& params,
+inline void Logistic16bitPrecision(const LogisticParams& params,
                                   const RuntimeShape& input_shape,
                                   const uint8* input_data,
                                   const RuntimeShape& output_shape,
@ -7331,7 +7331,7 @@ inline void Logistic16bitPercision(const LogisticParams& params,
  }
 }

-inline void Logistic16bitPercision(const LogisticParams& params,
+inline void Logistic16bitPrecision(const LogisticParams& params,
                                   const RuntimeShape& input_shape,
                                   const int8* input_data,
                                   const RuntimeShape& output_shape,
--- a/tensorflow/lite/kernels/internal/quantization_util.cc
+++ b/tensorflow/lite/kernels/internal/quantization_util.cc
@ -372,7 +372,7 @@ void FakeQuantizeArray(const float nudged_scale, const float nudged_min,

 bool CheckedLog2(const float x, int* log2_result) {
  // Using TfLiteRound instead of std::round and std::log instead of
-  // std::log2 to work around these fuctions being missing in a toolchain
+  // std::log2 to work around these functions being missing in a toolchain
  // used in some TensorFlow tests as of May 2018.
  const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
  const float x_log2_rounded = TfLiteRound(x_log2);
--- a/tensorflow/lite/kernels/internal/reference/binary_function.h
+++ b/tensorflow/lite/kernels/internal/reference/binary_function.h
@ -26,7 +26,7 @@ namespace reference_ops {
 // TODO(ycling): Refactoring. Remove BroadcastLogical and use the more
 // generalized and efficient BroadcastBinaryFunction.
 //
-// Also appears to duplicte MinimumMaximum.
+// Also appears to duplicate MinimumMaximum.
 //
 // R: Result type. T1: Input 1 type. T2: Input 2 type.
 template <typename R, typename T1, typename T2>
--- a/tensorflow/lite/kernels/internal/spectrogram.cc
+++ b/tensorflow/lite/kernels/internal/spectrogram.cc
@ -175,7 +175,7 @@ bool Spectrogram::ComputeSquaredMagnitudeSpectrogram(
    for (int i = 0; i < output_frequency_channels_; ++i) {
      // Similar to the Complex case, except storing the norm.
      // But the norm function is known to be a performance killer,
-      // so do it this way with explicit real and imagninary temps.
+      // so do it this way with explicit real and imaginary temps.
      const double re = fft_input_output_[2 * i];
      const double im = fft_input_output_[2 * i + 1];
      // Which finally converts double to float if it needs to.
--- a/tensorflow/lite/kernels/internal/tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/tensor_utils.h
@ -161,8 +161,8 @@ void SparseMatrixBatchVectorMultiplyAccumulate(
 //     - multiplier and shift combined gives the scale.
 //     - assumes input zero point is 0.
 //     - scratch is created for optimization purpose only.
-//       TODO(jianlijianli): this can be removed if some furture optimization
-//       work makes it unnecesssary.
+//       TODO(jianlijianli): this can be removed if some future optimization
+//       work makes it unnecessary.
 void MatrixBatchVectorMultiplyAccumulate(
    const int8_t* input, const int32_t* bias,
    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
@ -192,8 +192,8 @@ void MatrixBatchVectorMultiplyAccumulate(
 //     - multiplier and shift combined gives the scale.
 //     - assumes input zero point is 0.
 //     - scratch is created for optimization purpose only.
-//       TODO(jianlijianli): this can be removed if some furture optimization
-//       work makes it unnecesssary.
+//       TODO(jianlijianli): this can be removed if some future optimization
+//       work makes it unnecessary.
 void MatrixBatchVectorMultiplyAccumulate(
    const int8_t* input, const int32_t* bias,
    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
@ -231,7 +231,7 @@ void MatrixBatchVectorMultiply(const int16_t* hidden,
 //     - output: the 32bit output
 // Note: We do not need saturation because the int8 * int8 is safe from overflow
 // in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero
-// initial output value is not exceiptionally large.
+// initial output value is not exceptionally large.
 void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
                                    int32_t n_row, int32_t n_col,
                                    int32_t* output);
@ -372,7 +372,7 @@ inline void VectorVectorCwiseProduct(const T* __restrict__ vector1,
  }
 }

-// Cwise product and accumulate of two vectors. Since it's a MAC opertation, the
+// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
 // assumption here is that result array is initialized to valid values.
 template <typename T>
 inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1,
--- a/tensorflow/lite/kernels/internal/tensor_utils_test.cc
+++ b/tensorflow/lite/kernels/internal/tensor_utils_test.cc
@ -371,14 +371,14 @@ TEST(uKernels, QuantMatrixBatchVectorMultiplyAccumulate8x8_16Test) {
  const int32_t multiplier = 2080364544;
  const int32_t shift = -2;

-  std::vector<int32_t> scrach(2 * 9, 0);
+  std::vector<int32_t> scratch(2 * 9, 0);
  std::vector<int16_t> output = {10, 2, 33, 4, 5,  6,  65, 4,  3,
                                 52, 1, 2,  8, -1, -2, 11, 17, -18};
  MatrixBatchVectorMultiplyAccumulate(
      input.data(), input_zeropoint_times_weights.data(),
      input_to_gate_weights.data(), multiplier, shift,
      /*n_batch=*/2, /*n_input=*/30, /*n_output=*/9, /*output_zp=*/0,
-      scrach.data(), output.data(), &context);
+      scratch.data(), output.data(), &context);
  const std::vector<int16_t> expected_output = {
      -210, 331,  153, 139, -570, -657, 258, 515,  -495,
      91,   -243, -73, 603, -744, -269, 169, -748, -174,
@ -497,11 +497,11 @@ TEST(uKernels, QuantMatrixBatchVectorMultiplyAccumulate8x8_8Test) {

  std::vector<int8_t> output = {1, 2, 3, 4, 5,  6,  5,  4,  3,
                                2, 1, 2, 8, -1, -2, 11, 17, 18};
-  std::vector<int32_t> scrach(2 * 9, 0);
+  std::vector<int32_t> scratch(2 * 9, 0);
  MatrixBatchVectorMultiplyAccumulate(
      input.data(), input_zeropoint_times_weights.data(),
      input_to_gate_weights.data(), multiplier, shift,
-      /*n_batch=*/2, /*n_input=*/30, /*n_output=*/9, output_zp, scrach.data(),
+      /*n_batch=*/2, /*n_input=*/30, /*n_output=*/9, output_zp, scratch.data(),
      output.data(), &context);
  const std::vector<int8_t> expected_output = {
      5,   -9, -2, -30, -5, -11, -22, -18, 18,
--- a/tensorflow/lite/kernels/kernel_util.cc
+++ b/tensorflow/lite/kernels/kernel_util.cc
@ -100,7 +100,7 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
        context, input, filter, bias, output, &real_multiplier));
    int exponent;

-    // Populate quantization parameteters with multiplier and shift.
+    // Populate quantization parameters with multiplier and shift.
    QuantizeMultiplier(real_multiplier, multiplier, &exponent);
    *shift = -exponent;
  }
--- a/tensorflow/lite/kernels/lstm.cc
+++ b/tensorflow/lite/kernels/lstm.cc
@ -1248,7 +1248,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
  }

  // Create a scratch buffer tensor for float case and hybrid case.
-  // TODO(jianlijianli): Create a is_float boolean and reorginze the temporary
+  // TODO(jianlijianli): Create a is_float boolean and reorganize the temporary
  // buffer allocation logic.
  if (!is_integer) {
    node->temporaries->data[0] = op_data->scratch_tensor_index;
--- a/tensorflow/lite/kernels/lstm_eval.cc
+++ b/tensorflow/lite/kernels/lstm_eval.cc
@ -861,7 +861,7 @@ inline void LstmStepHybrid(
 //
 // Layer norm coefficients of size 'n_cell', representing diagonal matrices.
 //   layer_norm_input_weight_ptr    - optional
-//   layer_norm_forput_weight_ptr   - optional
+//   layer_norm_forget_weight_ptr   - optional
 //   layer_norm_cell_weight_ptr     - optional
 //   layer_norm_output_weight_ptr   - optional
 //
@ -1187,7 +1187,7 @@ inline void LstmStepInteger(
 //
 // Layer norm coefficients of size 'n_cell', representing diagonal matrices.
 //   layer_norm_input_weight_ptr    - optional
-//   layer_norm_forput_weight_ptr   - optional
+//   layer_norm_forget_weight_ptr   - optional
 //   layer_norm_cell_weight_ptr     - optional
 //   layer_norm_output_weight_ptr   - optional
 //
--- a/tensorflow/lite/kernels/matrix_diag_test.cc
+++ b/tensorflow/lite/kernels/matrix_diag_test.cc
@ -91,7 +91,7 @@ TEST(MatrixDiagTest, Int32TestTwoDimDiag) {
  EXPECT_THAT(model.GetOutputType(), TfLiteType::kTfLiteInt32);
 }

-TEST(MatrixDiagTest, DegenenerateCase) {
+TEST(MatrixDiagTest, DegenerateCase) {
  MatrixDiagOpModel<uint8_t> model({TensorType_UINT8, {1}});
  model.PopulateTensor<uint8_t>(model.input(), {1});
  model.Invoke();
--- a/tensorflow/lite/kernels/pad_test.cc
+++ b/tensorflow/lite/kernels/pad_test.cc
@ -25,11 +25,11 @@ namespace {
 using ::testing::ElementsAreArray;
 using ::testing::Matcher;

-template <typename RegularInputOuput>
+template <typename RegularInputOutput>
 class PadOpModel : public SingleOpModel {
 public:
-  void SetInput(std::initializer_list<RegularInputOuput> data) {
-    PopulateTensor<RegularInputOuput>(input_, data);
+  void SetInput(std::initializer_list<RegularInputOutput> data) {
+    PopulateTensor<RegularInputOutput>(input_, data);
  }

  template <typename QuantizedInputOutput>
@ -46,8 +46,8 @@ class PadOpModel : public SingleOpModel {
    PopulateTensor<int>(paddings_, paddings);
  }

-  std::vector<RegularInputOuput> GetOutput() {
-    return ExtractVector<RegularInputOuput>(output_);
+  std::vector<RegularInputOutput> GetOutput() {
+    return ExtractVector<RegularInputOutput>(output_);
  }
  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }

@ -128,17 +128,17 @@ class PadOpConstModel : public PadOpModel<float> {
 };

 // Test case where paddings is a non-const tensor.
-template <typename RegularInputOuput>
-class PadV2OpDynamicModel : public PadOpModel<RegularInputOuput> {
+template <typename RegularInputOutput>
+class PadV2OpDynamicModel : public PadOpModel<RegularInputOutput> {
 public:
  PadV2OpDynamicModel(const TensorData& input,
                      std::initializer_list<int> paddings_shape,
-                      RegularInputOuput constant_values,
+                      RegularInputOutput constant_values,
                      const TensorData& output) {
    this->input_ = this->AddInput(input);
    this->paddings_ = this->AddInput(TensorType_INT32);
    this->constant_values_ = this->AddConstInput(
-        GetTensorType<RegularInputOuput>(), {constant_values}, {1});
+        GetTensorType<RegularInputOutput>(), {constant_values}, {1});
    this->output_ = this->AddOutput(output);

    this->SetBuiltinOp(BuiltinOperator_PADV2, BuiltinOptions_PadV2Options,
--- a/tensorflow/lite/kernels/rfft2d.cc
+++ b/tensorflow/lite/kernels/rfft2d.cc
@ -360,7 +360,7 @@ TfLiteStatus Rfft2dHelper(TfLiteContext* context, TfLiteNode* node) {
  double* fft_double_working_area_data = reinterpret_cast<double*>(
      GetTensorData<int64_t>(fft_double_working_area));

-  // Process evert slice in the input buffer
+  // Process every slice in the input buffer
  for (int i = 0; i < num_slices; ++i) {
    PrepareInputBuffer(input_data, input_height, input_width, fft_height,
                       fft_width, fft_input_output);
--- a/tensorflow/lite/kernels/strided_slice_test.cc
+++ b/tensorflow/lite/kernels/strided_slice_test.cc
@ -87,7 +87,7 @@ TYPED_TEST(StridedSliceOpTest, UnsupportedInputSize) {
               "StridedSlice op only supports 1D-5D input arrays.");
 }

-TYPED_TEST(StridedSliceOpTest, UnssupportedArgs) {
+TYPED_TEST(StridedSliceOpTest, UnsupportedArgs) {
  EXPECT_DEATH(
      StridedSliceOpModel<TypeParam>({3, 2}, {2}, {2}, {2}, 0, 0, 1, 0, 0),
      "ellipsis_mask is not implemented yet.");
--- a/tensorflow/lite/kernels/subgraph_test_util.h
+++ b/tensorflow/lite/kernels/subgraph_test_util.h
@ -63,7 +63,7 @@ class SubgraphBuilder {
  void BuildLessEqualCondSubgraph(Subgraph* subgraph, int rhs);

  // An accumulate loop body subgraph. Used to produce triangle number
-  // seqeuence. 2 inputs and 2 outpus
+  // sequence. 2 inputs and 2 outputs
  //   Equivalent to (counter, value) -> (counter + 1, counter + 1 + value)
  void BuildAccumulateLoopBodySubgraph(Subgraph* subgraph);

--- a/tensorflow/lite/kernels/subgraph_test_util_test.cc
+++ b/tensorflow/lite/kernels/subgraph_test_util_test.cc
@ -36,7 +36,7 @@ class SubgraphBuilderTest : public ::testing::Test {
  }

 protected:
-  void TestAccumelateLoopBody(int input1, int input2, int output1,
+  void TestAccumulateLoopBody(int input1, int input2, int output1,
                              int output2) {
    interpreter_.reset(new Interpreter);
    builder_->BuildAccumulateLoopBodySubgraph(
@ -140,9 +140,9 @@ TEST_F(SubgraphBuilderTest, TestBuildLessEqualCondSubgraph) {
 }

 TEST_F(SubgraphBuilderTest, TestBuildAccumulateLoopBodySubgraph) {
-  TestAccumelateLoopBody(1, 1, 2, 3);
-  TestAccumelateLoopBody(2, 3, 3, 6);
-  TestAccumelateLoopBody(3, 6, 4, 10);
+  TestAccumulateLoopBody(1, 1, 2, 3);
+  TestAccumulateLoopBody(2, 3, 3, 6);
+  TestAccumulateLoopBody(3, 6, 4, 10);
 }

 TEST_F(SubgraphBuilderTest, TestBuildPadLoopBodySubgraph) {
--- a/tensorflow/lite/kernels/svdf_test.cc
+++ b/tensorflow/lite/kernels/svdf_test.cc
@ -547,7 +547,7 @@ TEST_F(SVDFOpTest, BlackBoxTestInteger) {

  svdf.SetBias({-0.0976817, 0.15294972, 0.39635518, -0.02702999});

-  const std::vector<std::vector<float>> input_sequnces = {
+  const std::vector<std::vector<float>> input_sequences = {
      {0.49837467, 0.19278903, 0.26584083, 0.17660543, 0.52949083, -0.77931279},
      {0.12609188, -0.46347019, -0.89598465, 0.35867718, 0.36897406,
       0.73463392},
@ -585,7 +585,7 @@ TEST_F(SVDFOpTest, BlackBoxTestInteger) {
  };

  for (int sequence_index = 0; sequence_index < 12; ++sequence_index) {
-    svdf.SetInput(input_sequnces[sequence_index]);
+    svdf.SetInput(input_sequences[sequence_index]);
    svdf.Invoke();
    const std::vector<int8_t> res = svdf.GetOutput();
    EXPECT_THAT(res, ElementsAreArray(expected_output[sequence_index]));
--- a/tensorflow/lite/kernels/variable_ops_test.cc
+++ b/tensorflow/lite/kernels/variable_ops_test.cc
@ -22,7 +22,7 @@ limitations under the License.

 namespace tflite {

-// Forward declaraction for op kernels.
+// Forward declaration for op kernels.
 namespace ops {
 namespace custom {

@ -104,7 +104,7 @@ TEST_F(VariableOpsTest, TestReadVariableBeforeAssign) {
  ASSERT_EQ(interpreter_.Invoke(), kTfLiteError);
 }

-TEST_F(VariableOpsTest, TestReeasignToDifferentSize) {
+TEST_F(VariableOpsTest, TestReassignToDifferentSize) {
  // 1st invocation. The variable is assigned as a scalar.
  {
    ASSERT_EQ(interpreter_.AllocateTensors(), kTfLiteOk);
--- a/tensorflow/lite/kernels/while_test.cc
+++ b/tensorflow/lite/kernels/while_test.cc
@ -79,7 +79,7 @@ TEST_F(WhileTest, TestPadLoop) {
  TfLiteTensor* output2 = interpreter_->tensor(interpreter_->outputs()[1]);
  CheckIntTensor(output2, {11}, {0, 0, 0, 5, 7, 0, 0, 0, 0, 0, 0});

-  // The extra invocation serves as a regiression test: There was a bug that
+  // The extra invocation serves as a regression test: There was a bug that
  // invoking a while loop with dynamic shaped body makes the interpreter
  // state uninvokable.
  ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);