diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py
index 7e5f8ce704f..900398d7a6f 100644
--- a/tensorflow/lite/python/lite.py
+++ b/tensorflow/lite/python/lite.py
@@ -180,11 +180,13 @@ class QuantizationMode(object):
   def post_training_int8_no_float(self):
     """Post training int8 quantize, disallow float fallback."""
     return (self._is_int8_target_required() and
+            not self._is_int16x8_target_required() and
             self._representative_dataset is not None)
 
   def post_training_int8_allow_float(self):
     """Post training int8 quantize, allow float fallback."""
     return (self._any_optimization_enabled() and
+            not self._is_int16x8_target_required() and
             self._representative_dataset is not None and
             self._smallest_supported_type() == constants.INT8)
 
@@ -193,6 +195,18 @@ class QuantizationMode(object):
     return (self._any_optimization_enabled() and
             self._contains_training_quant_op())
 
+  def post_training_int16x8_no_float(self):
+    """Post training int16x8 quantize, disallow float fallback."""
+    return (not self._is_int8_target_required() and
+            self._is_int16x8_target_required() and
+            not self._is_allow_float() and
+            self._representative_dataset is not None)
+
+  def post_training_int16x8_allow_float(self):
+    """Post training int16x8 quantize, allow float fallback."""
+    return (self._is_int16x8_target_required() and
+            self._is_allow_float())
+
   def post_training_dynamic_range_int8(self):
     """Post training int8 const, on-the-fly int8 quantize of dynamic tensors."""
     # Post-training dynamic range quantization is only enabled if post-training
@@ -212,9 +226,14 @@ class QuantizationMode(object):
     return not (self.post_training_int8_no_float() or
                 self.post_training_int8_allow_float() or
                 self.training_time_int8_allow_float() or
+                self.post_training_int16x8_no_float() or
+                self.post_training_int16x8_allow_float() or
                 self.post_training_dynamic_range_int8() or
                 self.post_training_fp16())
 
+  def activations_type(self):
+    return constants.INT16 if self._is_int16x8_target_required() else constants.INT8
+
   # Below are helpers for the above functions.
 
   def _validate_int8_required(self):
@@ -244,6 +263,18 @@ class QuantizationMode(object):
         self._target_spec.supported_ops) or
             set(self._target_spec.supported_types) == set([constants.INT8]))
 
+  def _is_int16x8_target_required(self):
+    return bool(
+          set(self._target_spec.supported_ops).intersection([
+            OpsSet.TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+        ]))
+
+  def _is_allow_float(self):
+    return bool(
+          set(self._target_spec.supported_ops).intersection([
+            OpsSet.TFLITE_BUILTINS
+        ]))
+
   def _any_optimization_enabled(self):
     return bool(
         set(self._optimizations).intersection([
@@ -309,13 +340,13 @@ class TFLiteConverterBase(object):
     return _get_grappler_config(optimizers)
 
   def _calibrate_quantize_model(self, result, inference_input_type,
-                                inference_output_type, allow_float):
+                                inference_output_type, activations_type, allow_float):
     if not isinstance(self.representative_dataset, RepresentativeDataset):
       self.representative_dataset = RepresentativeDataset(
           self.representative_dataset)
 
     calibrate_quantize = _calibrator.Calibrator(result)
-    activations_type = constants.INT16 if self._is_int16x8_target_required() else constants.INT8
+
     if (self.experimental_calibrate_only:
       return calibrate_quantize.calibrate(self.representative_dataset.input_gen)
     else:
@@ -608,12 +639,20 @@ class TFLiteConverterV2(TFLiteConverterBase):
         output_tensors=output_tensors,
         **converter_kwargs)
 
+    activations_type = quant_mode.activations_type()
+
     if quant_mode.post_training_int8_no_float():
       result = self._calibrate_quantize_model(result, constants.FLOAT,
-                                              constants.FLOAT, False)
+                                              constants.FLOAT, activations_type, False)
     elif quant_mode.post_training_int8_allow_float():
       result = self._calibrate_quantize_model(result, constants.FLOAT,
-                                              constants.FLOAT, True)
+                                              constants.FLOAT, activations_type, True)
+    elif quant_mode.post_training_int16x8_no_float():
+      result = self._calibrate_quantize_model(result, constants.FLOAT,
+                                              constants.FLOAT, activations_type, False)
+    elif quant_mode.post_training_int16x8_allow_float():
+      result = self._calibrate_quantize_model(result, constants.FLOAT,
+                                              constants.FLOAT, activations_type, True)
 
     return result
 
@@ -1114,6 +1153,8 @@ class TFLiteConverter(TFLiteConverterBase):
         quant_mode.post_training_int8_no_float() or
         quant_mode.post_training_int8_allow_float() or
         quant_mode.post_training_dynamic_range_int8() or
+        quant_mode.post_training_int16x8_no_float() or
+        quant_mode.post_training_int16x8_allow_float() or
         quant_mode.post_training_fp16())
     if post_training_optimize:
       # Post training optimizations require that TOCO outputs a float model.
@@ -1223,12 +1264,20 @@ class TFLiteConverter(TFLiteConverterBase):
           output_arrays=self._output_arrays,
           **converter_kwargs)
 
+    activations_type = quant_mode.activations_type()
+
     if quant_mode.post_training_int8_no_float():
       result = self._calibrate_quantize_model(result, inference_input_type,
-                                              inference_output_type, False)
+                                              inference_output_type, activations_type, False)
     elif quant_mode.post_training_int8_allow_float():
       result = self._calibrate_quantize_model(result, inference_input_type,
-                                              inference_output_type, True)
+                                              inference_output_type, activations_type, True)
+    elif quant_mode.post_training_int16x8_no_float():
+      result = self._calibrate_quantize_model(result, inference_input_type,
+                                              inference_output_type, activations_type, False)
+    elif quant_mode.post_training_int16x8_allow_float():
+      result = self._calibrate_quantize_model(result, inference_input_type,
+                                              inference_output_type, activations_type, True)
 
     return result
 
@@ -1334,7 +1383,6 @@ class TocoConverter(object):
 
   @classmethod
   @_deprecation.deprecated(
-      None, "Use `lite.TFLiteConverter.from_keras_model_file` instead.")
   def from_keras_model_file(cls,
                             model_file,
                             input_arrays=None,
diff --git a/tensorflow/lite/python/optimize/calibration_wrapper_pybind11.cc b/tensorflow/lite/python/optimize/calibration_wrapper_pybind11.cc
index f56b23090b9..9a8fea5d1f6 100644
--- a/tensorflow/lite/python/optimize/calibration_wrapper_pybind11.cc
+++ b/tensorflow/lite/python/optimize/calibration_wrapper_pybind11.cc
@@ -40,17 +40,17 @@ PYBIND11_MODULE(_pywrap_tensorflow_lite_calibration_wrapper, m) {
           })
       .def("QuantizeModel",
            [](CalibrationWrapper& self, int input_py_type, int output_py_type,
-              bool allow_float, bool enable_mlir_quantizer) {
+              bool allow_float, int activations_py_type, bool enable_mlir_quantizer) {
              return tensorflow::pyo_or_throw(
                  self.QuantizeModel(input_py_type, output_py_type, allow_float,
-                                    enable_mlir_quantizer));
+                                    activations_py_type, enable_mlir_quantizer));
            })
       .def("QuantizeModel",
            [](CalibrationWrapper& self, int input_py_type, int output_py_type,
-              bool allow_float) {
+              bool allow_float, int activations_py_type) {
              return tensorflow::pyo_or_throw(
                  self.QuantizeModel(input_py_type, output_py_type, allow_float,
-                                    /*enable_mlir_quantizer=*/false));
+                                    activations_py_type, /*enable_mlir_quantizer=*/false));
            })
       .def("QuantizeModel", [](CalibrationWrapper& self, int input_py_type,
                                int output_py_type, bool allow_float,