Fix quant notebooks for tensorflow.org
PiperOrigin-RevId: 261408660
This commit is contained in:
parent
8628f75ee1
commit
68ea31c83c
@ -76,13 +76,12 @@ upper_tabs:
|
||||
path: /lite/performance/model_optimization
|
||||
- title: "Post-training quantization"
|
||||
path: /lite/performance/post_training_quantization
|
||||
- title: "Post-training quantization example"
|
||||
path: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_quant.ipynb
|
||||
- title: "Post-training integer quantization example"
|
||||
path: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_integer_quant.ipynb
|
||||
- title: "Post-training float16 quantization example"
|
||||
path: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_float16_quant.ipynb
|
||||
status: external
|
||||
- title: "Post-training weight quantization"
|
||||
path: /lite/performance/post_training_quant
|
||||
- title: "Post-training integer quantization"
|
||||
path: /lite/performance/post_training_integer_quant
|
||||
- title: "Post-training float16 quantization"
|
||||
path: /lite/performance/post_training_float16_quant
|
||||
- title: "Delegates"
|
||||
path: /lite/performance/delegates
|
||||
- title: "GPU delegate"
|
||||
|
@ -3,7 +3,7 @@
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"name": "post-training-fp16-quant.ipynb",
|
||||
"name": "post_training-float16-quant.ipynb",
|
||||
"version": "0.3.2",
|
||||
"provenance": [],
|
||||
"private_outputs": true,
|
||||
@ -11,11 +11,45 @@
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 2",
|
||||
"name": "python2"
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "c8Cx-rUMVX25",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"##### Copyright 2019 The TensorFlow Authors."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "I9sUhVL_VZNO",
|
||||
"colab_type": "code",
|
||||
"colab": {},
|
||||
"cellView": "form"
|
||||
},
|
||||
"source": [
|
||||
"#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
|
||||
"# you may not use this file except in compliance with the License.\n",
|
||||
"# You may obtain a copy of the License at\n",
|
||||
"#\n",
|
||||
"# https://www.apache.org/licenses/LICENSE-2.0\n",
|
||||
"#\n",
|
||||
"# Unless required by applicable law or agreed to in writing, software\n",
|
||||
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
|
||||
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
|
||||
"# See the License for the specific language governing permissions and\n",
|
||||
"# limitations under the License."
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
@ -23,22 +57,25 @@
|
||||
"id": "6Y8E0lw5eYWm"
|
||||
},
|
||||
"source": [
|
||||
"# Post Training FP16 Quantization"
|
||||
"# Post-training float16 quantization"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "CIGrZZPTZVeO"
|
||||
"id": "CGuqeuPSVNo-",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/tutorials/post_training_float16_quant.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
|
||||
" <a target=\"_blank\" href=\"https://www.tensorflow.org/lite/performance/post_training_float16_quant\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tutorials/post_training_float16_quant.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
|
||||
" <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_float16_quant.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_float16_quant.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
|
||||
" </td>\n",
|
||||
"</table>"
|
||||
]
|
||||
@ -55,11 +92,10 @@
|
||||
"[TensorFlow Lite](https://www.tensorflow.org/lite/) now supports\n",
|
||||
"converting weights to 16-bit floating point values during model conversion from TensorFlow to TensorFlow Lite's flat buffer format. This results in a 2x reduction in model size. Some harware, like GPUs, can compute natively in this reduced precision arithmetic, realizing a speedup over traditional floating point execution. The Tensorflow Lite GPU delegate can be configured to run in this way. However, a model converted to float16 weights can still run on the CPU without additional modification: the float16 weights are upsampled to float32 prior to the first inference. This permits a significant reduction in model size in exchange for a minimal impacts to latency and accuracy.\n",
|
||||
"\n",
|
||||
"In this tutorial, we train an MNIST model from scratch, check its accuracy in TensorFlow, and then convert the saved model into a Tensorflow Lite flatbuffer\n",
|
||||
"with float16 quantization. We finally check the\n",
|
||||
"accuracy of the converted model and compare it to the original saved model. We\n",
|
||||
"run the training script [mnist.py](https://github.com/tensorflow/models/blob/master/official/mnist/mnist.py) from\n",
|
||||
"[Tensorflow official MNIST tutorial](https://github.com/tensorflow/models/tree/master/official/mnist).\n"
|
||||
"In this tutorial, you train an MNIST model from scratch, check its accuracy in TensorFlow, and then convert the saved model into a Tensorflow Lite flatbuffer\n",
|
||||
"with float16 quantization. Finally, check the\n",
|
||||
"accuracy of the converted model and compare it to the original saved model. The training script, `mnist.py`, is available from the\n",
|
||||
"[TensorFlow official MNIST tutorial](https://github.com/tensorflow/models/tree/master/official/mnist).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -69,7 +105,7 @@
|
||||
"id": "2XsEP17Zelz9"
|
||||
},
|
||||
"source": [
|
||||
"## Building an MNIST model"
|
||||
"## Build an MNIST model"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -105,7 +141,11 @@
|
||||
},
|
||||
"source": [
|
||||
"import tensorflow as tf\n",
|
||||
"tf.enable_eager_execution()"
|
||||
"tf.enable_eager_execution()\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"tf.logging.set_verbosity(tf.logging.DEBUG)"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
@ -203,8 +243,7 @@
|
||||
"id": "5NMaNZQCkW9X"
|
||||
},
|
||||
"source": [
|
||||
"For the example, we only trained the model for a single epoch, so it only trains to ~96% accuracy.\n",
|
||||
"\n"
|
||||
"For the example, you trained the model for just a single epoch, so it only trains to ~96% accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -253,10 +292,6 @@
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"import tensorflow as tf\n",
|
||||
"tf.enable_eager_execution()\n",
|
||||
"tf.logging.set_verbosity(tf.logging.DEBUG)\n",
|
||||
"\n",
|
||||
"converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)\n",
|
||||
"tflite_model = converter.convert()"
|
||||
],
|
||||
@ -391,8 +426,7 @@
|
||||
"id": "-5l6-ciItvX6"
|
||||
},
|
||||
"source": [
|
||||
"We can run the TensorFlow Lite model using the Python TensorFlow Lite\n",
|
||||
"Interpreter. \n",
|
||||
"Run the TensorFlow Lite model using the Python TensorFlow Lite Interpreter. \n",
|
||||
"\n",
|
||||
"### Load the test data\n",
|
||||
"\n",
|
||||
@ -407,7 +441,6 @@
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"_, mnist_test = tf.keras.datasets.mnist.load_data()\n",
|
||||
"images, labels = tf.cast(mnist_test[0], tf.float32)/255.0, mnist_test[1]\n",
|
||||
"\n",
|
||||
@ -585,7 +618,10 @@
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"print(eval_model(interpreter, mnist_ds))"
|
||||
"# Create smaller dataset for demonstration purposes\n",
|
||||
"mnist_ds_demo = mnist_ds.take(2000)\n",
|
||||
"\n",
|
||||
"print(eval_model(interpreter, mnist_ds_demo))"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
@ -597,7 +633,7 @@
|
||||
"id": "Km3cY9ry8ZlG"
|
||||
},
|
||||
"source": [
|
||||
"We can repeat the evaluation on the float16 quantized model to obtain:\n"
|
||||
"Repeat the evaluation on the float16 quantized model to obtain:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -612,7 +648,7 @@
|
||||
"# doesn't have super optimized server CPU kernels. For this reason this may be\n",
|
||||
"# slower than the above float interpreter. But for mobile CPUs, considerable\n",
|
||||
"# speedup can be observed.\n",
|
||||
"print(eval_model(interpreter_fp16, mnist_ds))\n"
|
||||
"print(eval_model(interpreter_fp16, mnist_ds_demo))"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
@ -624,7 +660,7 @@
|
||||
"id": "L7lfxkor8pgv"
|
||||
},
|
||||
"source": [
|
||||
"In this example, we have quantized a model to float16 with no difference in the accuracy.\n",
|
||||
"In this example, you have quantized a model to float16 with no difference in the accuracy.\n",
|
||||
"\n",
|
||||
"It's also possible to evaluate the fp16 quantized model on the GPU. To perform all arithmetic with the reduced precision values, be sure to create the `TfLiteGPUDelegateOptions` struct in your app and set `precision_loss_allowed` to `1`, like this:\n",
|
||||
"\n",
|
||||
|
@ -3,7 +3,7 @@
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"name": "post-training--integer-quant.ipynb",
|
||||
"name": "post_training_integer_quant.ipynb",
|
||||
"version": "0.3.2",
|
||||
"provenance": [],
|
||||
"private_outputs": true,
|
||||
@ -11,11 +11,45 @@
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 2",
|
||||
"name": "python2"
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "_DDaAex5Q7u-",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"##### Copyright 2019 The TensorFlow Authors."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "W1dWWdNHQ9L0",
|
||||
"colab_type": "code",
|
||||
"colab": {},
|
||||
"cellView": "form"
|
||||
},
|
||||
"source": [
|
||||
"#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
|
||||
"# you may not use this file except in compliance with the License.\n",
|
||||
"# You may obtain a copy of the License at\n",
|
||||
"#\n",
|
||||
"# https://www.apache.org/licenses/LICENSE-2.0\n",
|
||||
"#\n",
|
||||
"# Unless required by applicable law or agreed to in writing, software\n",
|
||||
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
|
||||
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
|
||||
"# See the License for the specific language governing permissions and\n",
|
||||
"# limitations under the License."
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
@ -23,7 +57,7 @@
|
||||
"id": "6Y8E0lw5eYWm"
|
||||
},
|
||||
"source": [
|
||||
"# Post Training Integer Quantization"
|
||||
"# Post-training integer quantization"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -35,10 +69,13 @@
|
||||
"source": [
|
||||
"<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/tutorials/post_training_integer_quant.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
|
||||
" <a target=\"_blank\" href=\"https://www.tensorflow.org/lite/performance/post_training_integer_quant\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tutorials/post_training_integer_quant.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
|
||||
" <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_integer_quant.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_integer_quant.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
|
||||
" </td>\n",
|
||||
"</table>"
|
||||
]
|
||||
@ -58,11 +95,10 @@
|
||||
"In contrast to [post-training \"on-the-fly\" quantization](https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/tutorials/post_training_quant.ipynb)\n",
|
||||
", which only stores weights as 8-bit ints, in this technique all weights *and* activations are quantized statically during model conversion.\n",
|
||||
"\n",
|
||||
"In this tutorial, we train an MNIST model from scratch, check its accuracy in TensorFlow, and then convert the saved model into a Tensorflow Lite flatbuffer\n",
|
||||
"with full quantization. We finally check the\n",
|
||||
"accuracy of the converted model and compare it to the original saved model. We\n",
|
||||
"run the training script [mnist.py](https://github.com/tensorflow/models/blob/master/official/mnist/mnist.py) from\n",
|
||||
"[Tensorflow official MNIST tutorial](https://github.com/tensorflow/models/tree/master/official/mnist).\n"
|
||||
"In this tutorial, you train an MNIST model from scratch, check its accuracy in TensorFlow, and then convert the saved model into a Tensorflow Lite flatbuffer\n",
|
||||
"with full quantization. Finally, check the\n",
|
||||
"accuracy of the converted model and compare it to the original saved model. The training script, `mnist.py`, is available from the\n",
|
||||
"[TensorFlow official MNIST tutorial](https://github.com/tensorflow/models/tree/master/official/mnist).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -72,7 +108,7 @@
|
||||
"id": "2XsEP17Zelz9"
|
||||
},
|
||||
"source": [
|
||||
"## Building an MNIST model"
|
||||
"## Build an MNIST model"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -194,8 +230,7 @@
|
||||
"id": "5NMaNZQCkW9X"
|
||||
},
|
||||
"source": [
|
||||
"For the example, we only trained the model for a single epoch, so it only trains to ~96% accuracy.\n",
|
||||
"\n"
|
||||
"For the example, you train the model for just a single epoch, so it only trains to ~96% accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -353,7 +388,7 @@
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"Finally, convert the model like usual. Note, by default the converted model will still use float input and outputs for invocation convenience."
|
||||
"Finally, convert the model like usual. By default, the converted model will still use float input and outputs for invocation convenience."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -411,7 +446,7 @@
|
||||
"id": "-5l6-ciItvX6"
|
||||
},
|
||||
"source": [
|
||||
"We can run the TensorFlow Lite model using the Python TensorFlow Lite\n",
|
||||
"Run the TensorFlow Lite model using the Python TensorFlow Lite\n",
|
||||
"Interpreter. \n",
|
||||
"\n",
|
||||
"### Load the test data\n",
|
||||
@ -580,6 +615,7 @@
|
||||
"\n",
|
||||
" input_index = interpreter.get_input_details()[0][\"index\"]\n",
|
||||
" output_index = interpreter.get_output_details()[0][\"index\"]\n",
|
||||
"\n",
|
||||
" for img, label in mnist_ds:\n",
|
||||
" total_seen += 1\n",
|
||||
" interpreter.set_tensor(input_index, img)\n",
|
||||
@ -605,7 +641,10 @@
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"print(eval_model(interpreter, mnist_ds))"
|
||||
"# Create smaller dataset for demonstration purposes\n",
|
||||
"mnist_ds_demo = mnist_ds.take(2000)\n",
|
||||
"\n",
|
||||
"print(eval_model(interpreter, mnist_ds_demo))"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
@ -617,7 +656,7 @@
|
||||
"id": "Km3cY9ry8ZlG"
|
||||
},
|
||||
"source": [
|
||||
"We can repeat the evaluation on the fully quantized model to obtain:\n"
|
||||
"Repeat the evaluation on the fully quantized model to obtain:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -632,7 +671,8 @@
|
||||
"# doesn't have super optimized server CPU kernels. For this reason this may be\n",
|
||||
"# slower than the above float interpreter. But for mobile CPUs, considerable\n",
|
||||
"# speedup can be observed.\n",
|
||||
"print(eval_model(interpreter_quant, mnist_ds))\n"
|
||||
"# Only use 2000 for demonstration purposes\n",
|
||||
"print(eval_model(interpreter_quant, mnist_ds_demo))"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
@ -644,8 +684,8 @@
|
||||
"id": "L7lfxkor8pgv"
|
||||
},
|
||||
"source": [
|
||||
"In this example, we have fully quantized a model with no difference in the accuracy."
|
||||
"In this example, you have fully quantized a model with no difference in the accuracy."
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,5 +1,55 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"name": "post_training_quant.ipynb",
|
||||
"version": "0.3.2",
|
||||
"provenance": [],
|
||||
"private_outputs": true,
|
||||
"collapsed_sections": [],
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "_-GR0EDHM1SO",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"##### Copyright 2019 The TensorFlow Authors."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "R3yYtBPkM2qZ",
|
||||
"colab_type": "code",
|
||||
"colab": {},
|
||||
"cellView": "form"
|
||||
},
|
||||
"source": [
|
||||
"#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
|
||||
"# you may not use this file except in compliance with the License.\n",
|
||||
"# You may obtain a copy of the License at\n",
|
||||
"#\n",
|
||||
"# https://www.apache.org/licenses/LICENSE-2.0\n",
|
||||
"#\n",
|
||||
"# Unless required by applicable law or agreed to in writing, software\n",
|
||||
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
|
||||
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
|
||||
"# See the License for the specific language governing permissions and\n",
|
||||
"# limitations under the License."
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
@ -7,7 +57,7 @@
|
||||
"id": "6Y8E0lw5eYWm"
|
||||
},
|
||||
"source": [
|
||||
"# Post Training Quantization"
|
||||
"# Post-training weight quantization"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -17,14 +67,17 @@
|
||||
"id": "CIGrZZPTZVeO"
|
||||
},
|
||||
"source": [
|
||||
"\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
|
||||
" \u003ctd\u003e\n",
|
||||
" \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/tutorials/post_training_quant.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
|
||||
" \u003c/td\u003e\n",
|
||||
" \u003ctd\u003e\n",
|
||||
" \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tutorials/post_training_quant.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
|
||||
" \u003c/td\u003e\n",
|
||||
"\u003c/table\u003e"
|
||||
"<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://www.tensorflow.org/lite/performance/post_training_quant\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_quant.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_quant.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
|
||||
" </td>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -38,34 +91,32 @@
|
||||
"\n",
|
||||
"[TensorFlow Lite](https://www.tensorflow.org/lite/) now supports\n",
|
||||
"converting weights to 8 bit precision as part of model conversion from\n",
|
||||
"tensorflow graphdefs to TFLite's flat buffer format. Weight quantization\n",
|
||||
"tensorflow graphdefs to TensorFlow Lite's flat buffer format. Weight quantization\n",
|
||||
"achieves a 4x reduction in the model size. In addition, TFLite supports on the\n",
|
||||
"fly quantization and dequantization of activations to allow for:\n",
|
||||
"\n",
|
||||
"1. Using quantized kernels for faster implementation when available.\n",
|
||||
"\n",
|
||||
"2. Mixing of floating-point kernels with quantized kernels for different parts\n",
|
||||
" of the graph.\n",
|
||||
"\n",
|
||||
"Note that the activations are always stored in floating point. For ops that\n",
|
||||
"The activations are always stored in floating point. For ops that\n",
|
||||
"support quantized kernels, the activations are quantized to 8 bits of precision\n",
|
||||
"dynamically prior to processing and are de-quantized to float precision after\n",
|
||||
"processing. Depending on the model being converted, this can give a speedup over\n",
|
||||
"pure floating point computation.\n",
|
||||
"\n",
|
||||
"In contrast to\n",
|
||||
"[quantization aware training](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/quantize)\n",
|
||||
"[quantization aware training](https://github.com/tensorflow/tensorflow/tree/r1.14/tensorflow/contrib/quantize)\n",
|
||||
", the weights are quantized post training and the activations are quantized dynamically \n",
|
||||
"at inference in this method.\n",
|
||||
"Therefore, the model weights are not retrained to compensate for quantization\n",
|
||||
"induced errors. It is important to check the accuracy of the quantized model to\n",
|
||||
"ensure that the degradation is acceptable.\n",
|
||||
"\n",
|
||||
"In this tutorial, we train an MNIST model from scratch, check its accuracy in\n",
|
||||
"tensorflow and then convert the saved model into a Tensorflow Lite flatbuffer\n",
|
||||
"with weight quantization. We finally check the\n",
|
||||
"accuracy of the converted model and compare it to the original saved model. We\n",
|
||||
"run the training script mnist.py from\n",
|
||||
"This tutorial trains an MNIST model from scratch, checks its accuracy in\n",
|
||||
"TensorFlow, and then converts the saved model into a Tensorflow Lite flatbuffer\n",
|
||||
"with weight quantization. Finally, it checks the\n",
|
||||
"accuracy of the converted model and compare it to the original saved model. The training script, `mnist.py`, is from\n",
|
||||
"[Tensorflow official mnist tutorial](https://github.com/tensorflow/models/tree/master/official/mnist).\n"
|
||||
]
|
||||
},
|
||||
@ -76,7 +127,7 @@
|
||||
"id": "2XsEP17Zelz9"
|
||||
},
|
||||
"source": [
|
||||
"## Building an MNIST model"
|
||||
"## Build an MNIST model"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -91,59 +142,57 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "gyqAw1M9lyab"
|
||||
"id": "gyqAw1M9lyab",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip uninstall -y tensorflow\n",
|
||||
"! pip install -U tf-nightly"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "WsN6s5L1ieNl"
|
||||
"id": "WsN6s5L1ieNl",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tensorflow as tf\n",
|
||||
"tf.enable_eager_execution()"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "00U0taBoe-w7"
|
||||
"id": "00U0taBoe-w7",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! git clone --depth 1 https://github.com/tensorflow/models"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "4XZPtSh-fUOc"
|
||||
"id": "4XZPtSh-fUOc",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if sys.version_info.major \u003e= 3:\n",
|
||||
"if sys.version_info.major >= 3:\n",
|
||||
" import pathlib\n",
|
||||
"else:\n",
|
||||
" import pathlib2 as pathlib\n",
|
||||
@ -151,7 +200,9 @@
|
||||
"# Add `models` to the python path.\n",
|
||||
"models_path = os.path.join(os.getcwd(), \"models\")\n",
|
||||
"sys.path.append(models_path)"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -165,31 +216,31 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "eMsw_6HujaqM"
|
||||
"id": "eMsw_6HujaqM",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saved_models_root = \"/tmp/mnist_saved_model\""
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "hWSAjQWagIHl"
|
||||
"id": "hWSAjQWagIHl",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The above path addition is not visible to subprocesses, add the path for the subprocess as well.\n",
|
||||
"# Note: channels_last is required here or the conversion may fail. \n",
|
||||
"!PYTHONPATH={models_path} python models/official/mnist/mnist.py --train_epochs=1 --export_dir {saved_models_root} --data_format=channels_last"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -198,7 +249,7 @@
|
||||
"id": "5NMaNZQCkW9X"
|
||||
},
|
||||
"source": [
|
||||
"For the example, we only trained the model for a single epoch, so it only trains to ~96% accuracy.\n",
|
||||
"For the example, since you trained the model for just a single epoch, so it only trains to ~96% accuracy.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
@ -216,17 +267,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "Xp5oClaZkbtn"
|
||||
"id": "Xp5oClaZkbtn",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saved_model_dir = str(sorted(pathlib.Path(saved_models_root).glob(\"*\"))[-1])\n",
|
||||
"saved_model_dir"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -242,19 +293,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "_i8B2nDZmAgQ"
|
||||
"id": "_i8B2nDZmAgQ",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tensorflow as tf\n",
|
||||
"tf.enable_eager_execution()\n",
|
||||
"converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)\n",
|
||||
"tflite_model = converter.convert()"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -268,31 +319,31 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "vptWZq2xnclo"
|
||||
"id": "vptWZq2xnclo",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tflite_models_dir = pathlib.Path(\"/tmp/mnist_tflite_models/\")\n",
|
||||
"tflite_models_dir.mkdir(exist_ok=True, parents=True)"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "Ie9pQaQrn5ue"
|
||||
"id": "Ie9pQaQrn5ue",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tflite_model_file = tflite_models_dir/\"mnist_model.tflite\"\n",
|
||||
"tflite_model_file.write_bytes(tflite_model)"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -306,14 +357,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "g8PUvLWDlmmz"
|
||||
"id": "g8PUvLWDlmmz",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"source": [
|
||||
"# Note: If you don't have a recent tf-nightly installed, the\n",
|
||||
"# \"optimizations\" line will have no effect.\n",
|
||||
"tf.logging.set_verbosity(tf.logging.INFO)\n",
|
||||
@ -321,7 +370,9 @@
|
||||
"tflite_quant_model = converter.convert()\n",
|
||||
"tflite_model_quant_file = tflite_models_dir/\"mnist_model_quant.tflite\"\n",
|
||||
"tflite_model_quant_file.write_bytes(tflite_quant_model)"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -329,22 +380,22 @@
|
||||
"colab_type": "text",
|
||||
"id": "PhMmUTl4sbkz"
|
||||
},
|
||||
"source": [
|
||||
"source": [
|
||||
"Note how the resulting file, is approximately `1/4` the size."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "JExfcfLDscu4"
|
||||
"id": "JExfcfLDscu4",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!ls -lh {tflite_models_dir}"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -363,7 +414,7 @@
|
||||
"id": "-5l6-ciItvX6"
|
||||
},
|
||||
"source": [
|
||||
"We can run the TensorFlow Lite model using the python TensorFlow Lite\n",
|
||||
"Run the TensorFlow Lite model using the Python TensorFlow Lite\n",
|
||||
"Interpreter. \n",
|
||||
"\n",
|
||||
"### load the test data\n",
|
||||
@ -373,13 +424,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "eTIuU07NuKFL"
|
||||
"id": "eTIuU07NuKFL",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()\n",
|
||||
@ -389,7 +438,9 @@
|
||||
"# `tf.lite.Interpreter.resize_tensor_input` to also change it for\n",
|
||||
"# the interpreter.\n",
|
||||
"mnist_ds = tf.data.Dataset.from_tensor_slices((images, labels)).batch(1)"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -403,48 +454,48 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "Jn16Rc23zTss"
|
||||
"id": "Jn16Rc23zTss",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"interpreter = tf.lite.Interpreter(model_path=str(tflite_model_file))\n",
|
||||
"interpreter.allocate_tensors()\n",
|
||||
"input_index = interpreter.get_input_details()[0][\"index\"]\n",
|
||||
"output_index = interpreter.get_output_details()[0][\"index\"]"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "J8Pztk1mvNVL"
|
||||
"id": "J8Pztk1mvNVL",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tf.logging.set_verbosity(tf.logging.DEBUG)\n",
|
||||
"interpreter_quant = tf.lite.Interpreter(model_path=str(tflite_model_quant_file))"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "Afl6yGvWyqAr"
|
||||
"id": "Afl6yGvWyqAr",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"interpreter_quant.allocate_tensors()\n",
|
||||
"input_index = interpreter_quant.get_input_details()[0][\"index\"]\n",
|
||||
"output_index = interpreter_quant.get_output_details()[0][\"index\"]\n"
|
||||
]
|
||||
"output_index = interpreter_quant.get_output_details()[0][\"index\"]"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -458,13 +509,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "AKslvo2kwWac"
|
||||
"id": "AKslvo2kwWac",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for img, label in mnist_ds.take(1):\n",
|
||||
" break\n",
|
||||
@ -472,17 +521,17 @@
|
||||
"interpreter.set_tensor(input_index, img)\n",
|
||||
"interpreter.invoke()\n",
|
||||
"predictions = interpreter.get_tensor(output_index)"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "XZClM2vo3_bm"
|
||||
"id": "XZClM2vo3_bm",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pylab as plt\n",
|
||||
"\n",
|
||||
@ -491,7 +540,9 @@
|
||||
"_ = plt.title(template.format(true= str(label[0].numpy()),\n",
|
||||
" predict=str(predictions[0])))\n",
|
||||
"plt.grid(False)"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -505,13 +556,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "05aeAuWjvjPx"
|
||||
"id": "05aeAuWjvjPx",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def eval_model(interpreter, mnist_ds):\n",
|
||||
" total_seen = 0\n",
|
||||
@ -530,20 +579,22 @@
|
||||
" (total_seen, float(num_correct) / float(total_seen)))\n",
|
||||
"\n",
|
||||
" return float(num_correct) / float(total_seen)"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "DqXBnDfJ7qxL"
|
||||
"id": "DqXBnDfJ7qxL",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(eval_model(interpreter, mnist_ds))"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -552,21 +603,21 @@
|
||||
"id": "Km3cY9ry8ZlG"
|
||||
},
|
||||
"source": [
|
||||
"We can repeat the evaluation on the weight quantized model to obtain:\n"
|
||||
"Repeat the evaluation on the weight quantized model to obtain:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "-9cnwiPp6EGm"
|
||||
"id": "-9cnwiPp6EGm",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(eval_model(interpreter_quant, mnist_ds))\n"
|
||||
]
|
||||
"print(eval_model(interpreter_quant, mnist_ds))"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -576,7 +627,7 @@
|
||||
},
|
||||
"source": [
|
||||
"\n",
|
||||
"In this example, we have compressed model with no difference in the accuracy."
|
||||
"In this example, the compressed model has no difference in the accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -586,31 +637,29 @@
|
||||
"id": "M0o1FtmWeKZm"
|
||||
},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"## Optimizing an existing model\n",
|
||||
"\n",
|
||||
"We now consider another example. Resnets with pre-activation layers (Resnet-v2) are widely used for vision applications.\n",
|
||||
"Resnets with pre-activation layers (Resnet-v2) are widely used for vision applications.\n",
|
||||
" Pre-trained frozen graph for resnet-v2-101 is available at the\n",
|
||||
" [Tensorflow Lite model repository](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/models.md).\n",
|
||||
"\n",
|
||||
"We can convert the frozen graph to a TFLite flatbuffer with quantization by:\n"
|
||||
"You can convert the frozen graph to a TensorFLow Lite flatbuffer with quantization by:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "v5p5VcNPjILQ"
|
||||
"id": "v5p5VcNPjILQ",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"archive_path = tf.keras.utils.get_file(\"resnet_v2_101.tgz\", \"https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz\", extract=True)\n",
|
||||
"archive_path = pathlib.Path(archive_path)\n",
|
||||
"archive_dir = str(archive_path.parent)"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -624,26 +673,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "g_Q_OMEJ4LIc"
|
||||
"id": "g_Q_OMEJ4LIc",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! cat {archive_dir}/resnet_v2_101_299_info.txt"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "ujCAFhqm-C6H"
|
||||
"id": "ujCAFhqm-C6H",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph_def_file = pathlib.Path(archive_path).parent/\"resnet_v2_101_299_frozen.pb\"\n",
|
||||
"input_arrays = [\"input\"] \n",
|
||||
@ -652,22 +699,23 @@
|
||||
" str(graph_def_file), input_arrays, output_arrays, input_shapes={\"input\":[1,299,299,3]})\n",
|
||||
"converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n",
|
||||
"resnet_tflite_file = graph_def_file.parent/\"resnet_v2_101_quantized.tflite\"\n",
|
||||
"resnet_tflite_file.write_bytes(converter.convert())\n"
|
||||
]
|
||||
"resnet_tflite_file.write_bytes(converter.convert())"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "vhOjeg1x9Knp"
|
||||
"id": "vhOjeg1x9Knp",
|
||||
"colab": {}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"!ls -lh {archive_dir}/*.tflite"
|
||||
]
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@ -683,21 +731,5 @@
|
||||
"The optimized model top-1 accuracy is 76.8, the same as the floating point model."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"collapsed_sections": [],
|
||||
"name": "post-training-quant.ipynb",
|
||||
"private_outputs": true,
|
||||
"provenance": [],
|
||||
"toc_visible": true,
|
||||
"version": "0.3.2"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 2",
|
||||
"name": "python2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
]
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user