diff --git a/tensorflow/lite/python/convert.py b/tensorflow/lite/python/convert.py index 8e5c07782f3..1744defea94 100644 --- a/tensorflow/lite/python/convert.py +++ b/tensorflow/lite/python/convert.py @@ -108,20 +108,6 @@ class ConverterError(Exception): pass -def mlir_quantize(input_data_str): - """Quantize `input_data_str` with calibration results. - - Args: - input_data_str: Input data in serialized form (e.g. a TFLITE model with - calibration results). - - Returns: - Quantized model in serialized form (e.g. a TFLITE model) with floating-point - inputs and outputs. - """ - return wrap_toco.wrapped_experimental_mlir_quantize(input_data_str) - - def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str, diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index 78aeb7a3d9a..ba9e6e0bd39 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -37,7 +37,6 @@ from tensorflow.lite.experimental.tensorboard.ops_util import get_potentially_su from tensorflow.lite.python import lite_constants as constants from tensorflow.lite.python.convert import build_toco_convert_protos # pylint: disable=unused-import from tensorflow.lite.python.convert import ConverterError # pylint: disable=unused-import -from tensorflow.lite.python.convert import mlir_quantize as _mlir_quantize from tensorflow.lite.python.convert import OpsSet from tensorflow.lite.python.convert import toco_convert # pylint: disable=unused-import from tensorflow.lite.python.convert import toco_convert_graph_def as _toco_convert_graph_def @@ -311,19 +310,12 @@ class TFLiteConverterBase(object): def _calibrate_quantize_model(self, result, inference_input_type, inference_output_type, allow_float): - """Calibrate and quantize the model.""" if not isinstance(self.representative_dataset, RepresentativeDataset): self.representative_dataset = RepresentativeDataset( self.representative_dataset) calibrate_quantize = _calibrator.Calibrator(result) - if self._experimental_calibrate_only or self._experimental_new_quantizer: - calibrated = calibrate_quantize.calibrate( - self.representative_dataset.input_gen) - if self._experimental_calibrate_only: - return calibrated - elif self._experimental_new_quantizer: - return _mlir_quantize(calibrated) + return calibrate_quantize.calibrate(self.representative_dataset.input_gen) else: return calibrate_quantize.calibrate_and_quantize( self.representative_dataset.input_gen, inference_input_type, diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 9512bdca70d..508c4fc2053 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -137,10 +137,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): to_save = root.f.get_concrete_function() return (to_save, calibration_gen) - @parameterized.named_parameters( - ('EnableMlirQuantizer', True), # enable mlir quantizer - ('DisableMlirQuantizer', False)) # disable mlir quantizer - def testPostTrainingCalibrateAndQuantize(self, mlir_quantizer): + def testPostTrainingCalibrateAndQuantize(self): func, calibration_gen = self._getCalibrationQuantizeModel() # Convert float model. @@ -152,7 +149,6 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func]) quantized_converter.optimizations = [lite.Optimize.DEFAULT] quantized_converter.representative_dataset = calibration_gen - quantized_converter._experimental_new_quantizer = mlir_quantizer quantized_tflite = quantized_converter.convert() self.assertTrue(quantized_tflite) @@ -169,10 +165,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): # Ensure that the quantized weights tflite model is smaller. self.assertLess(len(quantized_tflite), len(float_tflite)) - @parameterized.named_parameters( - ('EnableMlirQuantizer', True), # enable mlir quantizer - ('DisableMlirQuantizer', False)) # disable mlir quantizer - def testCalibrateAndQuantizeBuiltinInt8(self, mlir_quantizer): + def testCalibrateAndQuantizeBuiltinInt8(self): func, calibration_gen = self._getCalibrationQuantizeModel() # Convert float model. @@ -187,7 +180,6 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): lite.OpsSet.TFLITE_BUILTINS_INT8 ] quantized_converter.representative_dataset = calibration_gen - quantized_converter._experimental_new_quantizer = mlir_quantizer quantized_tflite = quantized_converter.convert() self.assertTrue(quantized_tflite) diff --git a/tensorflow/lite/python/wrap_toco.py b/tensorflow/lite/python/wrap_toco.py index 2d3357819a4..54060844dab 100644 --- a/tensorflow/lite/python/wrap_toco.py +++ b/tensorflow/lite/python/wrap_toco.py @@ -41,8 +41,3 @@ def wrapped_toco_convert(model_flags_str, toco_flags_str, input_data_str, def wrapped_get_potentially_supported_ops(): """Wraps TocoGetPotentiallySupportedOps with lazy loader.""" return _pywrap_toco_api.TocoGetPotentiallySupportedOps() - - -def wrapped_experimental_mlir_quantize(input_data_str): - """Wraps experimental mlir quantize model.""" - return _pywrap_toco_api.ExperimentalMlirQuantizeModel(input_data_str) diff --git a/tensorflow/lite/toco/python/BUILD b/tensorflow/lite/toco/python/BUILD index f0aaadc9684..b8a00b90a06 100644 --- a/tensorflow/lite/toco/python/BUILD +++ b/tensorflow/lite/toco/python/BUILD @@ -35,12 +35,9 @@ cc_library( ], deps = [ "@com_google_protobuf//:protobuf_headers", + "//third_party/python_runtime:headers", "//tensorflow/core:lib", - "//tensorflow/lite/c:common", - "//tensorflow/lite/core/api", "//tensorflow/lite/python/interpreter_wrapper:python_utils", - "//tensorflow/lite/python/interpreter_wrapper:python_error_reporter", - "//tensorflow/lite/schema:schema_fbs", "//tensorflow/lite/toco/logging:conversion_log_util", "//tensorflow/lite/toco:model_flags_proto_cc", "//tensorflow/lite/toco:toco_convert", @@ -50,8 +47,8 @@ cc_library( "//tensorflow/lite/toco:toco_port", "//tensorflow/lite/toco:toco_tooling", "//tensorflow/lite/toco:tooling_util", + "//tensorflow/core:protos_all_cc", "//tensorflow/compiler/mlir/lite/python:graphdef_to_tfl_flatbuffer", - "//tensorflow/compiler/mlir/lite/quantization/lite:quantize_model", ] + select({ # This is required when running `tflite_convert` from `bazel`. # It requires to link with TensorFlow Ops to get the op definitions. diff --git a/tensorflow/lite/toco/python/toco_python_api.cc b/tensorflow/lite/toco/python/toco_python_api.cc index 177a66aa3bf..31de4cfc726 100644 --- a/tensorflow/lite/toco/python/toco_python_api.cc +++ b/tensorflow/lite/toco/python/toco_python_api.cc @@ -21,13 +21,8 @@ limitations under the License. #include "google/protobuf/text_format.h" #include "tensorflow/compiler/mlir/lite/python/graphdef_to_tfl_flatbuffer.h" -#include "tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/python/interpreter_wrapper/python_error_reporter.h" #include "tensorflow/lite/python/interpreter_wrapper/python_utils.h" -#include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/toco/import_tensorflow.h" #include "tensorflow/lite/toco/logging/conversion_log_util.h" #include "tensorflow/lite/toco/logging/toco_conversion_log.pb.h" @@ -214,39 +209,4 @@ PyObject* TocoGetPotentiallySupportedOps() { return list; } -PyObject* MlirQuantizeModel(PyObject* data, bool fully_quantize) { - using tflite::interpreter_wrapper::PythonErrorReporter; - char* buf = nullptr; - Py_ssize_t length; - std::unique_ptr error_reporter(new PythonErrorReporter); - - if (tflite::python_utils::ConvertFromPyString(data, &buf, &length) == -1) { - PyErr_Format(PyExc_ValueError, "Failed to convert input PyObject"); - return nullptr; - } - std::unique_ptr model = - tflite::FlatBufferModel::BuildFromBuffer(buf, length, - error_reporter.get()); - if (!model) { - PyErr_Format(PyExc_ValueError, "Invalid model"); - return nullptr; - } - auto tflite_model = absl::make_unique(); - model->GetModel()->UnPackTo(tflite_model.get(), nullptr); - - flatbuffers::FlatBufferBuilder builder; - auto status = mlir::lite::QuantizeModel( - *tflite_model, tflite::TensorType::TensorType_FLOAT32, - tflite::TensorType::TensorType_FLOAT32, {}, fully_quantize, &builder, - error_reporter.get()); - - if (status != kTfLiteOk) { - error_reporter->exception(); - return nullptr; - } - return tflite::python_utils::ConvertToPyString( - reinterpret_cast(builder.GetCurrentBufferPointer()), - builder.GetSize()); -} - } // namespace toco diff --git a/tensorflow/lite/toco/python/toco_python_api.h b/tensorflow/lite/toco/python/toco_python_api.h index ca67e3f0aac..dbb72e30803 100644 --- a/tensorflow/lite/toco/python/toco_python_api.h +++ b/tensorflow/lite/toco/python/toco_python_api.h @@ -16,7 +16,6 @@ limitations under the License. #define TENSORFLOW_LITE_TOCO_PYTHON_TOCO_PYTHON_API_H_ #include - #include namespace toco { @@ -40,10 +39,6 @@ PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, // Returns a list of names of all ops potentially supported by tflite. PyObject* TocoGetPotentiallySupportedOps(); -// Quantize the model with calibration data. Throw errors if `fully_quantize` -// is specified by the calibration data are not sufficient to quantize the -// model. -PyObject* MlirQuantizeModel(PyObject* data, bool fully_quantize); } // namespace toco #endif // TENSORFLOW_LITE_TOCO_PYTHON_TOCO_PYTHON_API_H_ diff --git a/tensorflow/python/lite/toco_python_api_wrapper.cc b/tensorflow/python/lite/toco_python_api_wrapper.cc index 1976aa9d3da..9199c79a48f 100644 --- a/tensorflow/python/lite/toco_python_api_wrapper.cc +++ b/tensorflow/python/lite/toco_python_api_wrapper.cc @@ -54,14 +54,4 @@ PYBIND11_MODULE(_pywrap_toco_api, m) { R"pbdoc( Returns a list of names of all ops potentially supported by tflite. )pbdoc"); - m.def( - "ExperimentalMlirQuantizeModel", - [](py::object input_contents_txt_raw, bool fully_quantize) { - return tensorflow::pyo_or_throw(toco::MlirQuantizeModel( - input_contents_txt_raw.ptr(), fully_quantize)); - }, - py::arg("input_contents_txt_raw"), py::arg("fully_quantize") = true, - R"pbdoc( - Returns a quantized model. - )pbdoc"); }