From e71f56e928cfab05b17d22604fccc7b8d94fbe32 Mon Sep 17 00:00:00 2001
From: Peng Sun <peng.sun@arm.com>
Date: Tue, 26 Nov 2019 19:03:24 +0000
Subject: [PATCH 1/3] INT16 reference_op for TRANSPOSE_CONVOLUTION

    add INT16 reference ops transpose_conv
    add TEST code for INT16 transpose_conv
---
 .../lite/kernels/internal/compatibility.h     |  2 +
 .../reference/integer_ops/transpose_conv.h    | 92 +++++++++++++++++++
 tensorflow/lite/kernels/kernel_util.cc        |  5 +-
 tensorflow/lite/kernels/transpose_conv.cc     | 80 ++++++++++++++--
 .../lite/kernels/transpose_conv_test.cc       | 92 ++++++++++++++++++-
 5 files changed, 254 insertions(+), 17 deletions(-)

diff --git a/tensorflow/lite/kernels/internal/compatibility.h b/tensorflow/lite/kernels/internal/compatibility.h
index bfd021ac48d..73075e1acc7 100644
--- a/tensorflow/lite/kernels/internal/compatibility.h
+++ b/tensorflow/lite/kernels/internal/compatibility.h
@@ -83,6 +83,8 @@ using int16 = std::int16_t;
 using uint16 = std::uint16_t;
 using int32 = std::int32_t;
 using uint32 = std::uint32_t;
+using int64 = std::int64_t;
+using uint64 = std::uint64_t;
 
 // TFLITE_DEPRECATED()
 //
diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h b/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
index 1ad6e20f2dc..1dd036460e1 100644
--- a/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
@@ -112,6 +112,98 @@ inline void TransposeConv(
   }
 }
 
+// int16 input, int8 filter, int64 accumulator
+inline void TransposeConv(
+    const ConvParams& params, const int32* output_multiplier,
+    const int32* output_shift, const RuntimeShape& input_shape,
+    const int16* input_data, const RuntimeShape& filter_shape,
+    const int8* filter_data, const RuntimeShape& output_shape,
+    int16* output_data, const RuntimeShape& im2col_shape, int8* im2col_data,
+    int64* scratch_buffer) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  (void)im2col_data;   // only used in optimized code.
+  (void)im2col_shape;  // only used in optimized code.
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int32 input_offset = params.input_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_activation_min = std::numeric_limits<int16_t>::min();
+  const int32 output_activation_max = std::numeric_limits<int16_t>::max();
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+  const int num_elements = output_shape.FlatSize();
+  // We need to initialize scratch_buffer to all 0s, as we apply the same
+  // 'scatter' based trick as in float version.
+  memset(scratch_buffer, 0, num_elements * sizeof(int64));
+
+  // Loop through input elements one at a time.
+  for (int batch = 0; batch < batches; ++batch) {
+    for (int in_y = 0; in_y < input_height; ++in_y) {
+      for (int in_x = 0; in_x < input_width; ++in_x) {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
+          // Loop through the output elements it will influence.
+          const int out_x_origin = (in_x * stride_width) - pad_width;
+          const int out_y_origin = (in_y * stride_height) - pad_height;
+          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
+              for (int out_channel = 0; out_channel < output_depth;
+                   ++out_channel) {
+                // Compute output element location.
+                const int out_x = out_x_origin + filter_x;
+                const int out_y = out_y_origin + filter_y;
+                // We cannot accumulate out of bounds.
+                if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
+                    (out_y < output_height)) {
+                  const int32 input_value = input_data[Offset(
+                      input_shape, batch, in_y, in_x, in_channel)];
+                  const int32 filter_value =
+                      filter_data[Offset(filter_shape, out_channel, filter_y,
+                                         filter_x, in_channel)];
+                  scratch_buffer[Offset(output_shape, batch, out_y, out_x,
+                                        out_channel)] +=
+                      (input_value + input_offset) * filter_value;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  for (int batch = 0; batch < batches; ++batch) {
+    for (int out_y = 0; out_y < output_height; ++out_y) {
+      for (int out_x = 0; out_x < output_width; ++out_x) {
+        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
+          int64 acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
+                                            out_channel)];
+          int32 scaled_acc = MultiplyByQuantizedMultiplier(
+              acc, output_multiplier[out_channel], output_shift[out_channel]);
+          scaled_acc += output_offset;
+          scaled_acc = std::max(scaled_acc, output_activation_min);
+          scaled_acc = std::min(scaled_acc, output_activation_max);
+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
+              static_cast<int16_t>(scaled_acc);
+        }
+      }
+    }
+  }
+}
+
 }  // namespace reference_integer_ops
 }  // namespace tflite
 
diff --git a/tensorflow/lite/kernels/kernel_util.cc b/tensorflow/lite/kernels/kernel_util.cc
index 32574d82c00..55b42780dcf 100644
--- a/tensorflow/lite/kernels/kernel_util.cc
+++ b/tensorflow/lite/kernels/kernel_util.cc
@@ -46,8 +46,9 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
   TF_LITE_ENSURE(context, affine_quantization->scale);
   const bool is_per_channel = affine_quantization->scale->size > 1;
   if (is_per_channel) {
-    //  Currently only Int8 is supported for per channel quantization.
-    TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8);
+    //  Currently only Int8/Int16 is supported for per channel quantization.
+    TF_LITE_ENSURE(context,
+                   input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
     TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8);
     TF_LITE_ENSURE_EQ(
         context, affine_quantization->scale->size,
diff --git a/tensorflow/lite/kernels/transpose_conv.cc b/tensorflow/lite/kernels/transpose_conv.cc
index 114b9ae48f4..5d02e1e3c32 100644
--- a/tensorflow/lite/kernels/transpose_conv.cc
+++ b/tensorflow/lite/kernels/transpose_conv.cc
@@ -154,8 +154,9 @@ static TfLiteStatus AllocateTemporaryTensorsIfRequired(TfLiteContext* context,
     ++temporaries_count;
   }
 
-  // Allocate scratch buffer tensor for UInt8 inputs.
-  if (input_type == kTfLiteUInt8 || input_type == kTfLiteInt8) {
+  // Allocate scratch buffer tensor
+  if (input_type == kTfLiteUInt8 || input_type == kTfLiteInt8 ||
+      input_type == kTfLiteInt16) {
     if (data->scratch_tensor_id == kTensorNotAllocated) {
       context->AddTensors(context, 1, &data->scratch_tensor_id);
     }
@@ -226,13 +227,15 @@ TfLiteStatus ResizeAndTransposeWeights(TfLiteContext* context,
                              GetTensorShape(transposed_weights),
                              GetTensorData<uint8>(transposed_weights));
   } else if (weights->type == kTfLiteInt8) {
+    // int16 transpose_conv also with int8 weights
     optimized_ops::Transpose(transpose_params, input_shape,
                              GetTensorData<int8>(weights),
                              GetTensorShape(transposed_weights),
                              GetTensorData<int8>(transposed_weights));
   } else {
     context->ReportError(
-        context, "Transpose conv only support float & uint8 right now.");
+        context,
+        "Transpose conv only support float, uint8, int8, int16 right now.");
     return kTfLiteError;
   }
 
@@ -258,10 +261,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumDimensions(output_shape), 1);
   TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
   TF_LITE_ENSURE_EQ(context, NumDimensions(weights), 4);
-  TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
-                              input->type == kTfLiteUInt8 ||
-                              input->type == kTfLiteInt8);
-  TF_LITE_ENSURE_EQ(context, weights->type, input->type);
+  TF_LITE_ENSURE(context,
+                 input->type == kTfLiteFloat32 || input->type == kTfLiteUInt8 ||
+                     input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
+  if (input->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, weights->type, kTfLiteInt8);
+  } else {
+    TF_LITE_ENSURE_EQ(context, weights->type, input->type);
+  }
   TF_LITE_ENSURE_EQ(context, output->type, input->type);
   // Ensure that weights and inputs have the same channel dimension.
   // Note: TOCO will reorder weights in the following format: OHWI.
@@ -305,12 +312,18 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     }
   }
 
-  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 ||
+      input->type == kTfLiteInt16) {
     node->temporaries->data[data->scratch_tensor_index] =
         data->scratch_tensor_id;
     TfLiteTensor* scratch_buffer =
         GetTemporary(context, node, data->scratch_tensor_index);
-    scratch_buffer->type = kTfLiteInt32;
+    if (input->type == kTfLiteInt16) {
+      scratch_buffer->type = kTfLiteInt64;
+    } else {
+      scratch_buffer->type = kTfLiteInt32;
+    }
+
     scratch_buffer->allocation_type = kTfLiteDynamic;
     if (!IsConstantTensor(output_shape)) {
       SetTensorToDynamic(scratch_buffer);
@@ -473,6 +486,38 @@ void EvalQuantizedPerChannel(TfLiteContext* context,
   }
 }
 
+void EvalQuantizedPerChannel16x8(TfLiteContext* context,
+                                 const TfLiteTransposeConvParams* params,
+                                 OpData* data, const TfLiteTensor* input,
+                                 const TfLiteTensor* weights,
+                                 const TfLiteTensor* transposed_weights,
+                                 TfLiteTensor* col2im, TfLiteTensor* output,
+                                 TfLiteTensor* scratch_buffer) {
+  tflite::ConvParams op_params;
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width_offset = data->padding.width_offset;
+  op_params.padding_values.height_offset = data->padding.height_offset;
+  op_params.stride_width = params->stride_width;
+  op_params.stride_height = params->stride_height;
+  // Need to flip the sign of input offset to add it directly to the quantized
+  // buffer.
+  op_params.input_offset = -input->params.zero_point;
+  op_params.output_offset = output->params.zero_point;
+  op_params.quantized_activation_min = data->output_activation_min;
+  op_params.quantized_activation_max = data->output_activation_max;
+
+  // Need to add optimized kernel
+  reference_integer_ops::TransposeConv(
+      op_params, data->per_channel_output_multiplier.data(),
+      data->per_channel_output_shift.data(), GetTensorShape(input),
+      GetTensorData<int16>(input), GetTensorShape(weights),
+      GetTensorData<int8>(weights), GetTensorShape(output),
+      GetTensorData<int16>(output), GetTensorShape(col2im),
+      GetTensorData<int8>(col2im), GetTensorData<int64_t>(scratch_buffer));
+}
+
 template <KernelType kernel_type>
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // Retrieve tensors (All should be allocated by now)
@@ -513,7 +558,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       filter_height, filter_width, params->padding, &unused_output_height,
       &unused_output_width);
 
-  // Currently support float32 and uint8.
+  // Currently support float32, uint8, int8, int16.
   switch (input->type) {
     case kTfLiteFloat32: {
       // Only for GenericOptimized path, we use transposed weights.
@@ -558,6 +603,21 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                                            output, scratch_buffer);
       break;
     }
+    case kTfLiteInt16: {
+      TfLiteTensor* scratch_buffer =
+          GetTemporary(context, node, data->scratch_tensor_index);
+      if (IsDynamicTensor(scratch_buffer)) {
+        TF_LITE_ENSURE_OK(context,
+                          ResizeTensor(context, output_shape, scratch_buffer));
+      }
+      if (data->weights_are_transposed && !IsConstantTensor(weights)) {
+        ResizeAndTransposeWeights(context, weights, transposed_weights);
+      }
+      EvalQuantizedPerChannel16x8(context, params, data, input, weights,
+                                  transposed_weights, col2im, output,
+                                  scratch_buffer);
+      break;
+    }
     default:
       context->ReportError(context, "Type '%s' is not currently supported.",
                            TfLiteTypeGetName(input->type));
diff --git a/tensorflow/lite/kernels/transpose_conv_test.cc b/tensorflow/lite/kernels/transpose_conv_test.cc
index 9a1a950fe0f..c99883b4bff 100644
--- a/tensorflow/lite/kernels/transpose_conv_test.cc
+++ b/tensorflow/lite/kernels/transpose_conv_test.cc
@@ -76,7 +76,10 @@ class BaseTransposeConvOpModel : public SingleOpModel {
 
     if (test_type == TestType::DYNAMIC) {
       PopulateTensor<int32_t>(output_shape_, output_shape_data);
-      PopulateTensor<InputType>(filter_, filter_data);
+      if (!std::is_same<InputType, int16_t>::value &&
+          !std::is_same<InputType, int8_t>::value) {
+        PopulateTensor<InputType>(filter_, filter_data);
+      }
     }
   }
 
@@ -85,6 +88,8 @@ class BaseTransposeConvOpModel : public SingleOpModel {
       QuantizeAndPopulate<uint8_t>(input_, data);
     } else if (std::is_same<InputType, int8_t>::value) {
       QuantizeAndPopulate<int8_t>(input_, data);
+    } else if (std::is_same<InputType, int16_t>::value) {
+      QuantizeAndPopulate<int16_t>(input_, data);
     } else {
       PopulateTensor(input_, data);
     }
@@ -325,10 +330,6 @@ class PerChannelQuantizedTransposeConvOpModel
                               GetZeroPoint(output_));
   }
 
-  void SetInput(const std::initializer_list<float>& data) {
-    QuantizeAndPopulate<int8_t>(input_, data);
-  }
-
   void SetFilter(const std::initializer_list<float>& data) {
     PerChannelSymmetricQuantizeAndPopulate(filter_, data);
   }
@@ -451,6 +452,87 @@ TEST_P(TransposeConvOpTest, PaddingValidTestQuantized) {
   EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 6, 6, 1}));
 }
 
+class PerChannelQuantizedTransposeConvOpModel16x8
+    : public BaseTransposeConvOpModel<int16_t> {
+ public:
+  using BaseTransposeConvOpModel::BaseTransposeConvOpModel;
+
+  std::vector<float> GetDequantizedOutput() {
+    return Dequantize<int16_t>(ExtractVector<int16_t>(output_),
+                               GetScale(output_), GetZeroPoint(output_));
+  }
+
+  void SetFilter(const std::initializer_list<float>& data) {
+    PerChannelSymmetricQuantizeAndPopulate(filter_, data);
+  }
+};
+
+TEST_P(TransposeConvOpTest, SimpleTestQuantizedPerChannel16x8) {
+  // TensorData(TensorType type = TensorType_FLOAT32, std::vector<int> shape =
+  // {},
+  //              float min = 0.0f, float max = 0.0f, float scale = 0.0f,
+  //              int32_t zero_point = 0, bool per_channel_quantization = false,
+  //              std::vector<float> per_channel_quantization_scales = {},
+  //              std::vector<int64_t> per_channel_quantization_offsets = {},
+  //              int32_t channel_index = 0)
+  const std::initializer_list<float> filter_data = {
+      // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+      1, 2,  // out channel = 0, y = 0, x = 0
+      3, 4,  // out channel = 0, y = 0, x = 1
+      3, 4,  // out channel = 0, y = 1, x = 0
+      5, 6,  // out channel = 0, y = 1, x = 1
+      7, 8,  // out channel = 1, y = 0, x = 0
+      5, 6,  // out channel = 1, y = 0, x = 1
+      3, 4,  // out channel = 1, y = 1, x = 0
+      1, 2,  // out channel = 1, y = 1, x = 1
+  };
+  PerChannelQuantizedTransposeConvOpModel16x8 model(
+      GetRegistration(),
+      /*output_shape_data=*/{1, 2, 3, 2},
+      /*filter=*/
+      {TensorType_INT8,
+       /*shape=*/{2, 2, 2, 2},
+       /*min=*/-64, /*max=*/64,
+       /*scale=*/0, /*zero_point=*/0,
+       /*per_channel=*/true,
+       /*per_channel_scales=*/{7.0 / 127, 8.0 / 127},
+       /*per_channel_offsets=*/{0, 0},
+       /*channel_index=*/0},
+      /*filter_data=*/{},
+      /*input=*/
+      {TensorType_INT16,
+       /*shape=*/{1, 2, 3, 2},
+       /*min=*/0, /*max=*/0,
+       /*scale=*/4.0 / 127,
+       /*zero_point=*/0},
+      /*output=*/
+      {TensorType_INT16,
+       /*shape=*/{},
+       /*min=*/0, /*max=*/0,
+       /*scale=*/1.0,
+       /*zero_point=*/0},
+      /*padding=*/Padding_SAME,
+      /*stride_w=*/1, /*stride_h=*/1, GetTestType());
+  model.SetInput({
+      // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+      3, 2,    // batch = 0, y = 0, x = 0
+      1, -1,   // batch = 0, y = 0, x = 1
+      -2, -3,  // batch = 0, y = 0, x = 2
+      4, 3,    // batch = 0, y = 1, x = 0
+      2, -2,   // batch = 0, y = 1, x = 1
+      -3, -4,  // batch = 0, y = 1, x = 2
+  });
+  model.SetFilter(filter_data);
+  model.Invoke();
+
+  EXPECT_THAT(model.GetDequantizedOutput(),
+              ElementsAreArray(ArrayFloatNear(
+                  {7, 37, 16, 26, -9, -39, 27, 69, 48, 42, -32, -74}, 1e-5)));
+
+  // GetOutputShape() should always be same as model.SetOutputShape(...);
+  EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 2, 3, 2}));
+}
+
 INSTANTIATE_TEST_SUITE_P(
     TransposeConvOpTest, TransposeConvOpTest,
     ::testing::Combine(

From 0be4b1808b5128c873e9fbe2d60d41127afcd6f7 Mon Sep 17 00:00:00 2001
From: Peng Sun <peng.sun@arm.com>
Date: Tue, 4 Feb 2020 18:34:13 +0000
Subject: [PATCH 2/3] input, output offset is zero in case of int16 activations

---
 .../kernels/internal/reference/integer_ops/transpose_conv.h | 6 ++----
 tensorflow/lite/kernels/transpose_conv.cc                   | 2 ++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h b/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
index 1dd036460e1..78166c14688 100644
--- a/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
@@ -113,6 +113,7 @@ inline void TransposeConv(
 }
 
 // int16 input, int8 filter, int64 accumulator
+// int16 activation with zero_point=0
 inline void TransposeConv(
     const ConvParams& params, const int32* output_multiplier,
     const int32* output_shift, const RuntimeShape& input_shape,
@@ -139,8 +140,6 @@ inline void TransposeConv(
   const int filter_width = filter_shape.Dims(2);
   const int output_height = output_shape.Dims(1);
   const int output_width = output_shape.Dims(2);
-  const int32 input_offset = params.input_offset;
-  const int32 output_offset = params.output_offset;
   const int32 output_activation_min = std::numeric_limits<int16_t>::min();
   const int32 output_activation_max = std::numeric_limits<int16_t>::max();
   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
@@ -175,7 +174,7 @@ inline void TransposeConv(
                                          filter_x, in_channel)];
                   scratch_buffer[Offset(output_shape, batch, out_y, out_x,
                                         out_channel)] +=
-                      (input_value + input_offset) * filter_value;
+                      input_value * filter_value;
                 }
               }
             }
@@ -193,7 +192,6 @@ inline void TransposeConv(
                                             out_channel)];
           int32 scaled_acc = MultiplyByQuantizedMultiplier(
               acc, output_multiplier[out_channel], output_shift[out_channel]);
-          scaled_acc += output_offset;
           scaled_acc = std::max(scaled_acc, output_activation_min);
           scaled_acc = std::min(scaled_acc, output_activation_max);
           output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
diff --git a/tensorflow/lite/kernels/transpose_conv.cc b/tensorflow/lite/kernels/transpose_conv.cc
index 5d02e1e3c32..73409085377 100644
--- a/tensorflow/lite/kernels/transpose_conv.cc
+++ b/tensorflow/lite/kernels/transpose_conv.cc
@@ -266,6 +266,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                      input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
   if (input->type == kTfLiteInt16) {
     TF_LITE_ENSURE_EQ(context, weights->type, kTfLiteInt8);
+    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
   } else {
     TF_LITE_ENSURE_EQ(context, weights->type, input->type);
   }

From 3b917d65494a80a569aa3bd3041b41ea7e821984 Mon Sep 17 00:00:00 2001
From: Peng Sun <peng.sun@arm.com>
Date: Wed, 5 Feb 2020 20:13:22 +0000
Subject: [PATCH 3/3] remove ambiguous reference to int64.

---
 tensorflow/lite/kernels/internal/compatibility.h            | 2 --
 .../kernels/internal/reference/integer_ops/transpose_conv.h | 6 +++---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/tensorflow/lite/kernels/internal/compatibility.h b/tensorflow/lite/kernels/internal/compatibility.h
index 73075e1acc7..bfd021ac48d 100644
--- a/tensorflow/lite/kernels/internal/compatibility.h
+++ b/tensorflow/lite/kernels/internal/compatibility.h
@@ -83,8 +83,6 @@ using int16 = std::int16_t;
 using uint16 = std::uint16_t;
 using int32 = std::int32_t;
 using uint32 = std::uint32_t;
-using int64 = std::int64_t;
-using uint64 = std::uint64_t;
 
 // TFLITE_DEPRECATED()
 //
diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h b/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
index 78166c14688..e27e238b8bc 100644
--- a/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
@@ -120,7 +120,7 @@ inline void TransposeConv(
     const int16* input_data, const RuntimeShape& filter_shape,
     const int8* filter_data, const RuntimeShape& output_shape,
     int16* output_data, const RuntimeShape& im2col_shape, int8* im2col_data,
-    int64* scratch_buffer) {
+    std::int64_t* scratch_buffer) {
   const int stride_width = params.stride_width;
   const int stride_height = params.stride_height;
   const int pad_width = params.padding_values.width;
@@ -147,7 +147,7 @@ inline void TransposeConv(
   const int num_elements = output_shape.FlatSize();
   // We need to initialize scratch_buffer to all 0s, as we apply the same
   // 'scatter' based trick as in float version.
-  memset(scratch_buffer, 0, num_elements * sizeof(int64));
+  memset(scratch_buffer, 0, num_elements * sizeof(std::int64_t));
 
   // Loop through input elements one at a time.
   for (int batch = 0; batch < batches; ++batch) {
@@ -188,7 +188,7 @@ inline void TransposeConv(
     for (int out_y = 0; out_y < output_height; ++out_y) {
       for (int out_x = 0; out_x < output_width; ++out_x) {
         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          int64 acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
+          std::int64_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
                                             out_channel)];
           int32 scaled_acc = MultiplyByQuantizedMultiplier(
               acc, output_multiplier[out_channel], output_shift[out_channel]);