TF Lite models page v2
PiperOrigin-RevId: 235061579
@ -30,8 +30,6 @@ upper_tabs:
|
|||||||
path: /lite/ops_versioning
|
path: /lite/ops_versioning
|
||||||
- title: TensorFlow Lite compatibility guide
|
- title: TensorFlow Lite compatibility guide
|
||||||
path: /lite/tf_ops_compatibility
|
path: /lite/tf_ops_compatibility
|
||||||
- title: List of hosted models
|
|
||||||
path: /lite/models
|
|
||||||
- title: TensorFlow Lite for iOS
|
- title: TensorFlow Lite for iOS
|
||||||
path: /lite/ios
|
path: /lite/ios
|
||||||
- title: TensorFlow Lite for Raspberry Pi
|
- title: TensorFlow Lite for Raspberry Pi
|
||||||
@ -85,27 +83,36 @@ upper_tabs:
|
|||||||
# contents:
|
# contents:
|
||||||
# - title: Overview
|
# - title: Overview
|
||||||
# path: /lite/models/
|
# path: /lite/models/
|
||||||
# - heading: Beginner
|
# - title: Hosted models
|
||||||
# style: divider
|
# path: /lite/models/hosted
|
||||||
# - title: Image labeling
|
# - title: Image classification
|
||||||
# section:
|
# section:
|
||||||
# - title: Overview
|
# - title: Overview
|
||||||
# path: /lite/models/image/label/overview
|
# path: /lite/models/image_classification/overview
|
||||||
# - title: Android
|
# - title: Android
|
||||||
# path: /lite/models/image/label/android
|
# path: /lite/models/image_classification/android
|
||||||
# - title: iOS
|
# - title: iOS
|
||||||
# path: /lite/models/image/label/ios
|
# path: /lite/models/image_classification/ios
|
||||||
# - heading: Advanced
|
# - title: Object detection
|
||||||
# style: divider
|
# section:
|
||||||
# - heading: Image
|
# - title: Overview
|
||||||
# - title: Image classification
|
# path: /lite/models/object_detection/overview
|
||||||
# path: /lite/models/image/classification/
|
# - title: Speech recognition
|
||||||
# - heading: Audio
|
# section:
|
||||||
# - title: Hot word detection
|
# - title: Overview
|
||||||
# path: /lite/models/audio/hot_word/
|
# path: /lite/models/speech_recognition/overview
|
||||||
# - heading: Text
|
# - title: Pose estimation
|
||||||
# - title: Text classification
|
# section:
|
||||||
# path: /lite/models/text/classification/
|
# - title: Overview
|
||||||
|
# path: /lite/models/pose_estimation/overview
|
||||||
|
# - title: Segmentation
|
||||||
|
# section:
|
||||||
|
# - title: Overview
|
||||||
|
# path: /lite/models/segmentation/overview
|
||||||
|
# - title: Smart reply
|
||||||
|
# section:
|
||||||
|
# - title: Overview
|
||||||
|
# path: /lite/models/smart_reply/overview
|
||||||
|
|
||||||
- name: API
|
- name: API
|
||||||
skip_translation: true
|
skip_translation: true
|
||||||
|
@ -1,125 +1,93 @@
|
|||||||
project_path: /lite/_project.yaml
|
|
||||||
book_path: /lite/_book.yaml
|
book_path: /lite/_book.yaml
|
||||||
|
project_path: /lite/_project.yaml
|
||||||
description: <!--no description-->
|
description: <!--no description-->
|
||||||
landing_page:
|
landing_page:
|
||||||
body_class: tfo-hide-page-nav
|
|
||||||
custom_css_path: /site-assets/css/style.css
|
custom_css_path: /site-assets/css/style.css
|
||||||
nav: both
|
nav: left
|
||||||
rows:
|
rows:
|
||||||
|
- classname: devsite-landing-row-100
|
||||||
# Hero
|
heading: Optimized models for common mobile and edge use cases
|
||||||
- classname: >
|
|
||||||
devsite-landing-row-50
|
|
||||||
devsite-landing-row-large-headings
|
|
||||||
devsite-landing-row-no-image-background
|
|
||||||
foreground: theme
|
|
||||||
items:
|
items:
|
||||||
- heading: Models marketplace
|
- description: >
|
||||||
description: >
|
Take state-of-the-art optimized research models and easily deploy them to mobile and edge devices.
|
||||||
The TensorFlow Lite models marketplace, your neighborhood model shoppe.
|
buttons:
|
||||||
image_path: /resources/images/tflite-card-16x9.png
|
- label: Skip to full models repo
|
||||||
|
path: /lite/models/hosted
|
||||||
|
|
||||||
# Features
|
|
||||||
- background: grey
|
- background: grey
|
||||||
items:
|
items:
|
||||||
- heading: Optimized for mobile
|
- heading: Optimized for edge
|
||||||
description: >
|
description: >
|
||||||
Machine learning can make your apps more engaging, personalized, and
|
Edge has many constraints, such as limited memory, speed, and power. These models are specifically designed to run on-device.
|
||||||
helpful, and provides solutions that are optimized to run on-device.
|
- heading: Built by TensorFlow
|
||||||
- heading: Built with Google expertise
|
|
||||||
description: >
|
description: >
|
||||||
Models offer the technologies that have long powered Google's own
|
These models were optimized by the talented TensorFlow Lite team. Breathe easy.
|
||||||
experiences on mobile.
|
- heading: Customizable
|
||||||
- heading: Approachable and comprehensive
|
|
||||||
description: >
|
description: >
|
||||||
Use out-of-the-box solutions (base APIs) or custom models, running
|
The models were trained with certain labels in mind. With transfer learning, you can customize them to fit your needs.
|
||||||
on-device or in the Cloud, depending on your specific needs.
|
|
||||||
|
|
||||||
# Beginner models
|
- classname: devsite-landing-row-cards
|
||||||
- classname: devsite-landing-row-100
|
heading: Solutions to common problems
|
||||||
heading: "Build machine learning into your apps"
|
|
||||||
items:
|
items:
|
||||||
- heading: >
|
- heading: "Image classification"
|
||||||
Image labeling
|
|
||||||
description: >
|
description: >
|
||||||
Identify objects, locations, activities, animal species, products, and
|
Identify hundreds of objects, including people, activities, animals, plants, and places.
|
||||||
more
|
image_path: /resources/images/tf-logo-card-16x9.png
|
||||||
icon:
|
path: /lite/models/image_classification/overview
|
||||||
path: ../images/landing-page/assistant_logo.png
|
buttons:
|
||||||
path: /lite/image/labeling/
|
- label: Use the model
|
||||||
- heading: >
|
path: /lite/models/image_classification/overview
|
||||||
Text recognition (OCR)
|
- heading: "Object detection"
|
||||||
description: >
|
description: >
|
||||||
Recognize and extract text from images
|
Detect multiple objects with bounding boxes. Yes, dogs and cats, too.
|
||||||
icon:
|
image_path: /resources/images/tf-logo-card-16x9.png
|
||||||
path: ../images/landing-page/assistant_logo.png
|
path: /lite/models/object_detection/overview
|
||||||
path: /lite/image/labeling/
|
buttons:
|
||||||
- heading: >
|
- label: Use the model
|
||||||
Face detection
|
path: /lite/models/object_detection/overview
|
||||||
|
- heading: "Speech recognition"
|
||||||
description: >
|
description: >
|
||||||
Detect faces and facial landmarks
|
“Okay, Google.” How about, “Okay, custom keyword spotting?”
|
||||||
icon:
|
image_path: /resources/images/tf-logo-card-16x9.png
|
||||||
path: ../images/landing-page/assistant_logo.png
|
path: /lite/models/speech_recognition/overview
|
||||||
path: /lite/image/labeling/
|
buttons:
|
||||||
|
- label: Use the model
|
||||||
|
path: /lite/models/speech_recognition/overview
|
||||||
|
|
||||||
- items:
|
- classname: devsite-landing-row-cards
|
||||||
- heading: >
|
|
||||||
Barcode scanning
|
|
||||||
description: >
|
|
||||||
Scan and process barcodes
|
|
||||||
icon:
|
|
||||||
path: ../images/landing-page/assistant_logo.png
|
|
||||||
path: /lite/image/labeling/
|
|
||||||
- heading: >
|
|
||||||
Landmark detection
|
|
||||||
description: >
|
|
||||||
Identify popular landmarks in an image
|
|
||||||
icon:
|
|
||||||
path: ../images/landing-page/assistant_logo.png
|
|
||||||
path: /lite/image/labeling/
|
|
||||||
- heading: >
|
|
||||||
Smart reply
|
|
||||||
description: >
|
|
||||||
Provide suggested text snippet that fits context
|
|
||||||
icon:
|
|
||||||
path: ../images/landing-page/assistant_logo.png
|
|
||||||
path: /lite/image/labeling/
|
|
||||||
|
|
||||||
# Custom models
|
|
||||||
- classname: >
|
|
||||||
devsite-landing-row-no-image-background
|
|
||||||
devsite-landing-row-50
|
|
||||||
devsite-landing-row-large-headings
|
|
||||||
foreground: theme
|
|
||||||
background: grey
|
|
||||||
items:
|
items:
|
||||||
- heading: Custom models
|
- heading: "Pose estimation"
|
||||||
description: >
|
description: >
|
||||||
<p>If models don’t cover your use cases, you can always
|
Estimate poses for single or multiple people. Imagine the possibilities, including stick figure dance parties.
|
||||||
bring your own existing TensorFlow Lite models. Just upload your model,
|
image_path: /resources/images/tf-logo-card-16x9.png
|
||||||
and we’ll take care of hosting and serving it to your app.</p>
|
path: /lite/models/pose_estimation/overview
|
||||||
|
buttons:
|
||||||
|
- label: Use the model
|
||||||
|
path: /lite/models/pose_estimation/overview
|
||||||
|
- heading: "Segmentation"
|
||||||
|
description: >
|
||||||
|
Pinpoint the shape of objects with strict localization accuracy and semantic labels. Trained with people, places, animals, and more.
|
||||||
|
image_path: /resources/images/tf-logo-card-16x9.png
|
||||||
|
path: /lite/models/segmentation/overview
|
||||||
|
buttons:
|
||||||
|
- label: Use the model
|
||||||
|
path: /lite/models/segmentation/overview
|
||||||
|
- heading: "Smart reply"
|
||||||
|
description: >
|
||||||
|
Generate reply suggestions to input conversational chat messages.
|
||||||
|
image_path: /resources/images/tf-logo-card-16x9.png
|
||||||
|
path: /lite/models/smart_reply/overview
|
||||||
|
buttons:
|
||||||
|
- label: Use the model
|
||||||
|
path: /lite/models/smart_reply/overview
|
||||||
|
|
||||||
<p>Models acts as an API layer to your custom model, making it easy to
|
|
||||||
run and use. In addition to deploying your models, we are releasing an
|
|
||||||
experimental model compression flow that aims to reduce model size (up
|
|
||||||
to orders of magnitudes) while maintaining similar accuracy. Sign up at
|
|
||||||
<a href="https://g.co/firebase/signup">g.co/firebase/signup</a></p>
|
|
||||||
|
|
||||||
<p>And if you’re new to machine learning and want more information on
|
|
||||||
custom models for mobile, you can <a
|
|
||||||
href="//www.tensorflow.org/lite/">learn more about TensorFlow
|
|
||||||
Lite.</a></p>
|
|
||||||
image_path: /resources/images/tflite-card-16x9.png
|
|
||||||
image_left: true
|
|
||||||
- classname: devsite-landing-row-large-headings
|
- classname: devsite-landing-row-large-headings
|
||||||
foreground: theme
|
foreground: theme
|
||||||
items:
|
items:
|
||||||
- heading: Just the beginning
|
- heading: Benefits of pre-trained models
|
||||||
description: >
|
description: >
|
||||||
Our ultimate goal is to reduce idea–to–implementation cycles and make AI
|
The best way to get started is to used pre-trained models because they
|
||||||
an essential and intuitive part of a developer's toolkit. We will do so
|
allow you to save time and money, leverage learnings from research scientists,
|
||||||
by continuing to add new Base APIs that leverage Google’s machine
|
and avoid expensive datasets. Instead of having to start from scratch, you
|
||||||
learning expertise. Base APIs will ultimately cover significantly more
|
could use an existing model and just retrain the last few layers of the
|
||||||
use cases in the vision, speech, and text fields. We will also continue
|
neural network to suit your needs. This process is called transfer learning.
|
||||||
to simplify use of custom models, adding tools to deploy, compress, and
|
|
||||||
create them.
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
|
# Hosted models
|
||||||
# List of Hosted Models
|
|
||||||
|
|
||||||
# AutoML mobile image classification models (Float Models)
|
# AutoML mobile image classification models (Float Models)
|
||||||
|
|
@ -1,3 +0,0 @@
|
|||||||
# iOS
|
|
||||||
|
|
||||||
lorem
|
|
@ -1,8 +0,0 @@
|
|||||||
# Overview
|
|
||||||
|
|
||||||
Image labeling gives you insight into the content of images. When you use the
|
|
||||||
API, you get a list of the entities that were recognized: people, things,
|
|
||||||
places, activities, and so on. Each label found comes with a score that
|
|
||||||
indicates the confidence the ML model has in its relevance. With this
|
|
||||||
information, you can perform tasks such as automatic metadata generation
|
|
||||||
and content moderation.
|
|
@ -3,21 +3,20 @@
|
|||||||
This tutorial provides a simple Android mobile application to classify images
|
This tutorial provides a simple Android mobile application to classify images
|
||||||
using the Android device camera. In this tutorial, you will download the demo
|
using the Android device camera. In this tutorial, you will download the demo
|
||||||
application from the Tensorflow repository, build it on your computer, and
|
application from the Tensorflow repository, build it on your computer, and
|
||||||
install it on your Android Device. You will also learn how to customize the
|
install it on your Android device. You will also learn how to customize the
|
||||||
application to suit your requirements.
|
application to suit your requirements.
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
* Android Studio 3.2 (installed on a Linux, Mac or Windows machine)
|
* Android Studio 3.2 (installed on a Linux, Mac or Windows machine)
|
||||||
|
|
||||||
* Android Device
|
* Android device
|
||||||
|
|
||||||
* USB cable (to connect Android device to your computer)
|
* USB cable (to connect Android device to your computer)
|
||||||
|
|
||||||
### Step 1. Clone the TensorFlow source code
|
### Step 1. Clone the TensorFlow source code
|
||||||
|
|
||||||
First, we clone the GitHub repository on the computer in a folder to get the
|
Clone the GitHub repository to your computer to get the demo application.
|
||||||
demo application.
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -29,21 +28,21 @@ Open the TensorFlow source code in Android Studio. To do this, open Android
|
|||||||
Studio and select `Open an existing project` setting the folder to
|
Studio and select `Open an existing project` setting the folder to
|
||||||
`tensorflow/lite/examples/android`
|
`tensorflow/lite/examples/android`
|
||||||
|
|
||||||

|
<img src="images/classifydemo_img1.png" />
|
||||||
|
|
||||||
This folder contains the demo application for image classification, object
|
This folder contains the demo application for image classification, object
|
||||||
detection, and speech hotword detection.
|
detection, and speech hotword detection.
|
||||||
|
|
||||||
### Step 2. Build the Android Studio project
|
### Step 2. Build the Android Studio project
|
||||||
|
|
||||||
In this step, Select `Build -> Make Project` and check that the project builds
|
Select `Build -> Make Project` and check that the project builds
|
||||||
successfully. You will need Android SDK configured in the settings. You'll need
|
successfully. You will need Android SDK configured in the settings. You'll need
|
||||||
at least SDK version 23. The gradle file will prompt you to download any missing
|
at least SDK version 23. The gradle file will prompt you to download any missing
|
||||||
libraries.
|
libraries.
|
||||||
|
|
||||||

|
<img src="images/classifydemo_img4.png" style="width: 40%" />
|
||||||
|
|
||||||

|
<img src="images/classifydemo_img2.png" style="width: 60%" />
|
||||||
|
|
||||||
#### TensorFlow Lite AAR from JCenter:
|
#### TensorFlow Lite AAR from JCenter:
|
||||||
|
|
||||||
@ -53,25 +52,25 @@ build.
|
|||||||
If you see a build error related to compatibility with Tensorflow Lite's Java
|
If you see a build error related to compatibility with Tensorflow Lite's Java
|
||||||
API (example: method X is undefined for type Interpreter), there has likely been
|
API (example: method X is undefined for type Interpreter), there has likely been
|
||||||
a backwards compatible change to the API. You will need to pull new app code
|
a backwards compatible change to the API. You will need to pull new app code
|
||||||
that's compatible with the nightly build by running git pull.
|
that's compatible with the nightly build by running `git pull`.
|
||||||
|
|
||||||
### Step 3. Install and Run the app
|
### Step 3. Install and run the app
|
||||||
|
|
||||||
Connect the Android device to the computer, and be sure to approve any ADB
|
Connect the Android device to the computer and be sure to approve any ADB
|
||||||
permission prompts that appear on your phone. Select `Run -> Run app.` Select
|
permission prompts that appear on your phone. Select `Run -> Run app.` Select
|
||||||
the deployment target in the connected devices to the device on which app will
|
the deployment target in the connected devices to the device on which the app will
|
||||||
be installed. This will install the app on the device.
|
be installed. This will install the app on the device.
|
||||||
|
|
||||||

|
<img src="images/classifydemo_img5.png" style="width: 60%" />
|
||||||
|
|
||||||

|
<img src="images/classifydemo_img6.png" style="width: 70%" />
|
||||||
|
|
||||||

|
<img src="images/classifydemo_img7.png" style="width: 40%" />
|
||||||
|
|
||||||

|
<img src="images/classifydemo_img8.png" style="width: 80%" />
|
||||||
|
|
||||||
To test the app, open the app named `TFL Classify` on the device. When you run
|
To test the app, open the app called `TFL Classify` on your device. When you run
|
||||||
the app first time, the app will request permission to access the camera.
|
the app the first time, the app will request permission to access the camera.
|
||||||
Re-installing the app may require you to uninstall the previous installations.
|
Re-installing the app may require you to uninstall the previous installations.
|
||||||
|
|
||||||
## Understanding Android App Code
|
## Understanding Android App Code
|
||||||
@ -85,7 +84,7 @@ This file depends on `AndroidManifest.xml` in the folder
|
|||||||
`tensorflow/tensorflow/lite/examples/android/app/src/main` to set the camera
|
`tensorflow/tensorflow/lite/examples/android/app/src/main` to set the camera
|
||||||
orientation.
|
orientation.
|
||||||
|
|
||||||
### Pre-process of bitmap image
|
### Pre-process bitmap image
|
||||||
|
|
||||||
The mobile application code that pre-processes the images and runs inference is
|
The mobile application code that pre-processes the images and runs inference is
|
||||||
in
|
in
|
||||||
@ -102,7 +101,7 @@ DIM_PIXEL_SIZE);
|
|||||||
c.imgData.order(ByteOrder.nativeOrder());
|
c.imgData.order(ByteOrder.nativeOrder());
|
||||||
```
|
```
|
||||||
|
|
||||||
While running the application, we preprocess the incoming bitmap images from the
|
While running the application, we pre-process the incoming bitmap images from the
|
||||||
camera to a Bytebuffer. Since this model is quantized 8-bit, we will put a
|
camera to a Bytebuffer. Since this model is quantized 8-bit, we will put a
|
||||||
single byte for each channel. `imgData` will contain an encoded `Color` for each
|
single byte for each channel. `imgData` will contain an encoded `Color` for each
|
||||||
pixel in ARGB format, so we need to mask the least significant 8 bits to get
|
pixel in ARGB format, so we need to mask the least significant 8 bits to get
|
After Width: | Height: | Size: 79 KiB |
After Width: | Height: | Size: 63 KiB |
Before Width: | Height: | Size: 137 KiB After Width: | Height: | Size: 137 KiB |
Before Width: | Height: | Size: 86 KiB After Width: | Height: | Size: 86 KiB |
Before Width: | Height: | Size: 91 KiB After Width: | Height: | Size: 91 KiB |
Before Width: | Height: | Size: 178 KiB After Width: | Height: | Size: 178 KiB |
Before Width: | Height: | Size: 135 KiB After Width: | Height: | Size: 135 KiB |
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
After Width: | Height: | Size: 117 KiB |
BIN
tensorflow/lite/g3doc/models/image_classification/images/dog.png
Normal file
After Width: | Height: | Size: 463 KiB |
229
tensorflow/lite/g3doc/models/image_classification/ios.md
Normal file
@ -0,0 +1,229 @@
|
|||||||
|
# TensorFlow Lite iOS Image Classifier App Example
|
||||||
|
|
||||||
|
This tutorial provides a simple iOS mobile application to classify images using
|
||||||
|
the iOS device camera. In this tutorial, you will download the demo application
|
||||||
|
from the Tensorflow repository, build it on your computer, and install it on
|
||||||
|
your iOS Device. You will also learn how to customize the application to suit
|
||||||
|
your needs.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
* You must have [Xcode](https://developer.apple.com/xcode/) installed and have
|
||||||
|
a valid Apple Developer ID, and have an iOS device set up and linked to your
|
||||||
|
developer account with all of the appropriate certificates. For these
|
||||||
|
instructions, we assume that you have already been able to build and deploy
|
||||||
|
an app to an iOS device with your current developer environment.
|
||||||
|
|
||||||
|
* The demo app requires a camera and must be executed on a real iOS device.
|
||||||
|
You can build it and run with the iPhone Simulator but it won't have any
|
||||||
|
camera information to classify.
|
||||||
|
|
||||||
|
* You don't need to build the entire TensorFlow library to run the demo, but
|
||||||
|
you will need to clone the TensorFlow repository if you haven't already:
|
||||||
|
|
||||||
|
git clone https://github.com/tensorflow/tensorflow
|
||||||
|
cd tensorflow
|
||||||
|
|
||||||
|
* You'll also need the Xcode command-line tools:
|
||||||
|
|
||||||
|
xcode-select --install
|
||||||
|
|
||||||
|
If this is a new install, you will need to run the Xcode application once to
|
||||||
|
agree to the license before continuing.
|
||||||
|
|
||||||
|
* Install CocoaPods if you don't have it:
|
||||||
|
|
||||||
|
sudo gem install cocoapods
|
||||||
|
|
||||||
|
### Step 1. Clone the TensorFlow source code
|
||||||
|
|
||||||
|
lone the GitHub repository onto your computer to get the
|
||||||
|
demo application.
|
||||||
|
|
||||||
|
```
|
||||||
|
git clone https://github.com/tensorflow/tensorflow
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2. Download required dependencies
|
||||||
|
|
||||||
|
Execute the shell script to download the model files used by the demo app (this
|
||||||
|
is done from inside the cloned directory):
|
||||||
|
|
||||||
|
```
|
||||||
|
tensorflow/lite/examples/ios/download_models.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the following command to install TensorFlow Lite pod:
|
||||||
|
|
||||||
|
```
|
||||||
|
cd tensorflow/lite/examples/ios/camera
|
||||||
|
pod install
|
||||||
|
```
|
||||||
|
|
||||||
|
If you have installed this pod before and that command doesn't work, try
|
||||||
|
|
||||||
|
```
|
||||||
|
pod repo update
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 3. Build the XCode project
|
||||||
|
|
||||||
|
Open the `tflite_camera_example.xcworkspace` project file generated in the last
|
||||||
|
step:
|
||||||
|
|
||||||
|
```
|
||||||
|
open tflite_camera_example.xcworkspace
|
||||||
|
```
|
||||||
|
|
||||||
|
Under `Project navigator -> tflite_camera_example -> Targets ->
|
||||||
|
tflite_camera_example -> General` change the bundle identifier by pre-pending
|
||||||
|
your name:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Plug in your iOS device. Note that the app must be executed with a real device with
|
||||||
|
a camera. Select the iOS device from the drop-down menu.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Click the "Run" button to build and run the app
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Note that, as mentioned earlier, you must already have a device set up and linked
|
||||||
|
to your Apple Developer account in order to deploy the app onto a device.
|
||||||
|
|
||||||
|
You'll have to grant permissions for the app to use the device's camera. Point
|
||||||
|
the camera at various objects and enjoy seeing how the model classifies things!
|
||||||
|
|
||||||
|
## Understanding iOS App Code
|
||||||
|
|
||||||
|
### Get camera input
|
||||||
|
|
||||||
|
The main logic of this app is in the Objective C++ source file
|
||||||
|
`tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm`.
|
||||||
|
|
||||||
|
The `setupAVCapture` method constructs a `AVCaptureSession` and set itself as a
|
||||||
|
delegate. The `captureOutput:didOutputSampleBuffer:fromConnection:` method is
|
||||||
|
called for every captured frame. It calls `runModelOnFrame` to run the model for
|
||||||
|
every frame.
|
||||||
|
|
||||||
|
### Create an interpreter
|
||||||
|
|
||||||
|
To create the interpreter, we need to load the model file. The following code
|
||||||
|
will load a model and create an interpreter.
|
||||||
|
|
||||||
|
```
|
||||||
|
model = tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String]);
|
||||||
|
```
|
||||||
|
|
||||||
|
Behind the scenes, the model is loaded as a memory-mapped file. It offers faster
|
||||||
|
load times and reduce the dirty pages in memory.
|
||||||
|
|
||||||
|
Construct a `BuiltinOpResolver` to use the TensorFliw Lite buildin ops. Then,
|
||||||
|
create the interpreter object using `InterpreterBuilder` that takes the model
|
||||||
|
file as argument as shown below.
|
||||||
|
|
||||||
|
```
|
||||||
|
tflite::ops::builtin::BuiltinOpResolver resolver;
|
||||||
|
tflite::InterpreterBuilder(*model, resolver)(&interpreter);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Obtain the input buffer
|
||||||
|
|
||||||
|
By default, the app uses a quantized model since it's smaller and faster. The
|
||||||
|
buffer is a raw pointer to an array of 8 bit unsigned integers (`uint8_t`). The
|
||||||
|
following code obtains the input buffer from the interpreter:
|
||||||
|
|
||||||
|
```
|
||||||
|
// Get the index of first input tensor.
|
||||||
|
int input_tensor_index = interpreter->inputs()[0];
|
||||||
|
// Get the pointer to the input buffer.
|
||||||
|
uint8_t* buffer = interpreter->typed_tensor<uint8_t>(input_tensor_index);
|
||||||
|
```
|
||||||
|
|
||||||
|
Throughout this document, it's assumed that a quantized model is used.
|
||||||
|
|
||||||
|
### Pre-process bitmap image
|
||||||
|
|
||||||
|
The MobileNet model that we're using takes 224x224x3 inputs, where the dimensions are
|
||||||
|
width, height, and colors (RGB). The images returned from `AVCaptureSession` is
|
||||||
|
bigger and has 4 color channels (RGBA).
|
||||||
|
|
||||||
|
Many image classification models (like MobileNet) take fixe-sized inputs. It's
|
||||||
|
required to scale or crop the image before feeding it into the model and change
|
||||||
|
the channels from RGBA to RGB.
|
||||||
|
|
||||||
|
The code to pre-process the images is in `ProcessInputWithQuantizedModel`
|
||||||
|
function in
|
||||||
|
`tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm`. It's a
|
||||||
|
simple implementation for nearest neighbor color sampling and it only copies
|
||||||
|
the first 3 bytes for each pixel.
|
||||||
|
|
||||||
|
```
|
||||||
|
void ProcessInputWithQuantizedModel(
|
||||||
|
uint8_t* input, uint8_t* output, int image_width, int image_height, int image_channels) {
|
||||||
|
for (int y = 0; y < wanted_input_height; ++y) {
|
||||||
|
uint8_t* out_row = output + (y * wanted_input_width * wanted_input_channels);
|
||||||
|
for (int x = 0; x < wanted_input_width; ++x) {
|
||||||
|
const int in_x = (y * image_width) / wanted_input_width;
|
||||||
|
const int in_y = (x * image_height) / wanted_input_height;
|
||||||
|
uint8_t* in_pixel = input + (in_y * image_width * image_channels) + (in_x * image_channels);
|
||||||
|
uint8_t* out_pixel = out_row + (x * wanted_input_channels);
|
||||||
|
for (int c = 0; c < wanted_input_channels; ++c) {
|
||||||
|
out_pixel[c] = in_pixel[c];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that the code pre-processes and prepares the model input from the camera
|
||||||
|
data. Therefore, the first parameter `input` should be the camera buffer. The
|
||||||
|
second parameter `output` should be the buffer of model input.
|
||||||
|
|
||||||
|
### Run inference and obtain output buffer
|
||||||
|
|
||||||
|
After pre-processing and filling the data into the input buffer of the
|
||||||
|
interpreter, it's really easy to run the interpreter:
|
||||||
|
|
||||||
|
```
|
||||||
|
if (interpreter->Invoke() != kTfLiteOk) {
|
||||||
|
NSLog("Failed to invoke!");
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The result is stored in the output tensor buffer of the interpreter. The
|
||||||
|
following code obtains the pointer to the buffer:
|
||||||
|
|
||||||
|
```
|
||||||
|
// Get the index of first output tensor.
|
||||||
|
const int output_tensor_index = interpreter->outputs()[0];
|
||||||
|
// Get the pointer to the output buffer.
|
||||||
|
uint8_t* buffer = interpreter->typed_tensor<uint8_t>(output_tensor_index);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Post-process values
|
||||||
|
|
||||||
|
The output buffer contains an array of `uint8_t`, and the value range is from 0-255.
|
||||||
|
We need to convert the value to float to get the probabilities with a value range from
|
||||||
|
0.0-1.0. The formula of the quantization value mapping is:
|
||||||
|
|
||||||
|
float_value = (quantized_value - zero_point) * scale
|
||||||
|
|
||||||
|
The following code converts quantized values back to float values, using the
|
||||||
|
quantizaiton parameters in tensors:
|
||||||
|
|
||||||
|
```
|
||||||
|
uint8_t* quantized_output = interpreter->typed_output_tensor<uint8_t>(0);
|
||||||
|
int32_t zero_point = input_tensor->params.zero_point;
|
||||||
|
float scale = input_tensor->params.scale;
|
||||||
|
float output[output_size];
|
||||||
|
for (int i = 0; i < output_size; ++i) {
|
||||||
|
output[i] = (quantized_output[i] - zero_point) * scale;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, we find the best set of classifications by storing them in a priority
|
||||||
|
queue based on their confidence scores. See the `GetTopN` function in
|
||||||
|
`tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm`.
|
211
tensorflow/lite/g3doc/models/image_classification/overview.md
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
# Image classification
|
||||||
|
<img src="../images/image.png" class="attempt-right">
|
||||||
|
|
||||||
|
Use a pre-trained and optimized model to identify hundreds of classes of objects, including people, activities, animals, plants, and places.
|
||||||
|
|
||||||
|
## Get started
|
||||||
|
|
||||||
|
If you are unfamiliar with the concept of image classification, you should start by reading <a href="#what_is_image_classification">What is image classification?</a>
|
||||||
|
|
||||||
|
If you understand image classification, you’re new to TensorFlow Lite, and you’re working with Android or iOS, we recommend following the corresponding tutorial that will walk you through our sample code.
|
||||||
|
|
||||||
|
<a class="button button-primary" href="android">Android</a>
|
||||||
|
<a class="button button-primary" href="ios">iOS</a>
|
||||||
|
|
||||||
|
If you are using a platform other than Android or iOS, or you are already familiar with the <a href="https://www.tensorflow.org/lite/apis">TensorFlow Lite APIs</a>, you can download our starter image classification model and the accompanying labels.
|
||||||
|
|
||||||
|
Once you have the starter model running on your target device, you can experiment with different models to find the optimal balance between performance, accuracy, and model size. For guidance, see Choose a different model.
|
||||||
|
|
||||||
|
|
||||||
|
If you are using a platform other than Android or iOS, or you are already familiar with the <a href="../apis">TensorFlow Lite APIs</a>, you can download our starter image classification model and the accompanying labels.
|
||||||
|
|
||||||
|
<a class="button button-primary" href="">Download starter model and labels</a>
|
||||||
|
|
||||||
|
## What is image classification?
|
||||||
|
A common use of machine learning is to identify what an image represents. For example, we might want to know what type of animal appears in the following photograph.
|
||||||
|
|
||||||
|
<img src="images/dog.png" alt="dog" width="50%">
|
||||||
|
|
||||||
|
The task of predicting what an image represents is called image classification. An image classification model is trained to recognize various classes of images. For example, a model might be trained to recognize photos representing three different types of animals: rabbits, hamsters, and dogs.
|
||||||
|
|
||||||
|
When we subsequently provide a new image as input to the model, it will output the probabilities of the image representing each of the types of animal it was trained on. An example output might be as follows:
|
||||||
|
|
||||||
|
<table style="width: 40%;">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Animal type</th>
|
||||||
|
<th>Probability</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>Rabbit</td>
|
||||||
|
<td>0.07</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Hamster</td>
|
||||||
|
<td>0.02</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="background-color: #fcb66d;">Dog</td>
|
||||||
|
<td style="background-color: #fcb66d;">0.91</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
Based on the output, we can see that the classification model has predicted that the image has a high probability of representing a dog.
|
||||||
|
|
||||||
|
Note: Image classification can only tell you the probability that an image represents one or more of the classes that the model was trained on. It cannot tell you the position or identity of objects within the image. If you need to identify objects and their positions within images, you should use an <a href="object_detection">object detection</a> model.
|
||||||
|
|
||||||
|
### Training, labels, and inference
|
||||||
|
|
||||||
|
During training, an image classification model is fed images and their associated labels. Each label is the name of a distinct concept, or class, that the model will learn to recognize. Here are some examples of labels and training data for our hypothetical model that classifies animal photos:
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Label</th>
|
||||||
|
<th>Training data</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>rabbit</td>
|
||||||
|
<td>[three different images of rabbits]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>hamster</td>
|
||||||
|
<td>[three different images of hamsters]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>dog</td>
|
||||||
|
<td>[three different images of dogs]</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
Given sufficient training data (often hundreds or thousands of images per label), an image classification model can learn to predict whether new images belong to any of the classes it has been trained on. This process of prediction is called inference.
|
||||||
|
|
||||||
|
To perform inference, an image is passed as input to a model. The model will then output an array of probabilities between 0 and 1. With our example model, this process might look like the following:
|
||||||
|
|
||||||
|
<table style="width: 60%">
|
||||||
|
<tr style="border-top: 0px;">
|
||||||
|
<td style="width: 40%"><img src="images/dog.png" alt="dog"></td>
|
||||||
|
<td style="width: 20%; font-size: 2em; vertical-align: middle;">→</td>
|
||||||
|
<td style="width: 40%; vertical-align: middle;">[0.07, 0.02, 0.91]</td>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
Each number in the output corresponds to a label in our training data. Associating our output with the three labels the model was trained on, we can see the model has predicted a high probability that the image represents a dog.
|
||||||
|
|
||||||
|
<table style="width: 40%;">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Label</th>
|
||||||
|
<th>Probability</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>rabbit</td>
|
||||||
|
<td>0.07</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>hamster</td>
|
||||||
|
<td>0.02</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="background-color: #fcb66d;">dog</td>
|
||||||
|
<td style="background-color: #fcb66d;">0.91</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
You might notice that the sum of all the probabilities (for rabbit, hamster, and dog) is equal to 1. This is a common type of output for models with multiple classes (see <a href="https://developers.google.com/machine-learning/crash-course/multi-class-neural-networks/softmax">Softmax</a> for more information).
|
||||||
|
|
||||||
|
### Ambiguous results
|
||||||
|
|
||||||
|
Since the probabilities will always sum to 1, if the image is not confidently recognized as belonging to any of the classes the model was trained on you may see the probability distributed throughout the labels without any one value being significantly larger.
|
||||||
|
|
||||||
|
For example, the following might indicate an ambiguous result:
|
||||||
|
|
||||||
|
<table style="width: 40%;">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Label</th>
|
||||||
|
<th>Probability</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>rabbit</td>
|
||||||
|
<td>0.31</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>hamster</td>
|
||||||
|
<td>0.35</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>dog</td>
|
||||||
|
<td>0.34</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
### Uses and limitations
|
||||||
|
|
||||||
|
The image classification models that we provide are useful for single-label classification, which means predicting which single label the image is most likely to represent. They are trained to recognize 1000 classes of image. For a full list of classes, see the labels file.
|
||||||
|
|
||||||
|
If you want to train a model to recognize new classes, see <a href="#customize_model">Customize model</a>.
|
||||||
|
|
||||||
|
For the following use cases, you should use a different type of model:
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Predicting the type and position of one or more objects within an image (see <a href="object_detection">object detection</a>)</li>
|
||||||
|
<li>Predicting the composition of an image, for example subject versus background (see <a href="segmentation">segmentation</a>)</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
Once you have the starter model running on your target device, you can experiment with different models to find the optimal balance between performance, accuracy, and model size. For guidance, see <a href="#choose_a_different_model">Choose a different model</a>.
|
||||||
|
|
||||||
|
## Choose a different model
|
||||||
|
|
||||||
|
There are a large number of image classification models available on our List of hosted models. You should aim to choose the optimal model for your application based on performance, accuracy and model size. There are trade-offs between each of them.
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
|
||||||
|
We measure performance in terms of the amount of time it takes for a model to run inference on a given piece of hardware. The less time, the faster the model.
|
||||||
|
|
||||||
|
The performance you require depends on your application. Performance can be important for applications like real-time video, where it may be important to analyze each frame in the time before the next frame is drawn (e.g. inference must be faster than 33ms to perform real-time inference on a 30fps video stream).
|
||||||
|
|
||||||
|
Our quantized Mobilenet models’ performance ranges from 3.7ms to 80.3 ms.
|
||||||
|
|
||||||
|
### Accuracy
|
||||||
|
We measure accuracy in terms of how often the model correctly classifies an image. For example, a model with a stated accuracy of 60% can be expected to classify an image correctly an average of 60% of the time.
|
||||||
|
|
||||||
|
Our List of hosted models provides Top-1 and Top-5 accuracy statistics. Top-1 refers to how often the correct label appears as the label with the highest probability in the model’s output. Top-5 refers to how often the correct label appears in the top 5 highest probabilities in the model’s output.
|
||||||
|
|
||||||
|
Our quantized Mobilenet models’ Top-5 accuracy ranges from 64.4 to 89.9%.
|
||||||
|
|
||||||
|
### Size
|
||||||
|
The size of a model on-disk varies with its performance and accuracy. Size may be important for mobile development (where it might impact app download sizes) or when working with hardware (where available storage might be limited).
|
||||||
|
|
||||||
|
Our quantized Mobilenet models’ size ranges from 0.5 to 3.4 Mb.
|
||||||
|
|
||||||
|
### Architecture
|
||||||
|
There are several different architectures of models available on List of hosted models, indicated by the model’s name. For example, you can choose between Mobilenet, Inception, and others.
|
||||||
|
|
||||||
|
The architecture of a model impacts its performance, accuracy, and size. All of our hosted models are trained on the same data, meaning you can use the provided statistics to compare them and choose which is optimal for your application.
|
||||||
|
|
||||||
|
Note: The image classification models we provide accept varying sizes of input. For some models, this is indicated in the filename. For example, the Mobilenet_V1_1.0_224 model accepts an input of 224x224 pixels. <br /><br />All of the models require three color channels per pixel (red, green, and blue). Quantized models require 1 byte per channel, and float models require 4 bytes per channel.<br /><br />Our Android and iOS code samples demonstrate how to process full-sized camera images into the required format for each model.
|
||||||
|
|
||||||
|
## Customize model
|
||||||
|
The pre-trained models we provide are trained to recognize 1000 classes of image. For a full list of classes, see the labels file.
|
||||||
|
|
||||||
|
You can use a technique known as transfer learning to re-train a model to recognize classes not in the original set. For example, you could re-train the model to distinguish between different species of tree, despite there being no trees in the original training data. To do this, you will need a set of training images for each of the new labels you wish to train.
|
||||||
|
|
||||||
|
Learn how to perform transfer learning in the TensorFlow for Poets codelab.
|
||||||
|
|
||||||
|
## Read more about this
|
||||||
|
<ul>
|
||||||
|
<li>Blog post:</li>
|
||||||
|
<li>Image classification GitHub:</li>
|
||||||
|
</ul>
|
BIN
tensorflow/lite/g3doc/models/images/audio.png
Normal file
After Width: | Height: | Size: 16 KiB |
BIN
tensorflow/lite/g3doc/models/images/blank.png
Normal file
After Width: | Height: | Size: 14 KiB |
BIN
tensorflow/lite/g3doc/models/images/camera.png
Normal file
After Width: | Height: | Size: 16 KiB |
BIN
tensorflow/lite/g3doc/models/images/detection.png
Normal file
After Width: | Height: | Size: 17 KiB |
BIN
tensorflow/lite/g3doc/models/images/image.png
Normal file
After Width: | Height: | Size: 16 KiB |
BIN
tensorflow/lite/g3doc/models/images/object.png
Normal file
After Width: | Height: | Size: 16 KiB |
BIN
tensorflow/lite/g3doc/models/images/output_stride.png
Normal file
After Width: | Height: | Size: 275 KiB |
BIN
tensorflow/lite/g3doc/models/images/pose.png
Normal file
After Width: | Height: | Size: 16 KiB |
BIN
tensorflow/lite/g3doc/models/images/segmentation.png
Normal file
After Width: | Height: | Size: 16 KiB |
BIN
tensorflow/lite/g3doc/models/images/sentiment.png
Normal file
After Width: | Height: | Size: 16 KiB |
BIN
tensorflow/lite/g3doc/models/images/smart_reply.png
Normal file
After Width: | Height: | Size: 15 KiB |
BIN
tensorflow/lite/g3doc/models/images/tabular.png
Normal file
After Width: | Height: | Size: 17 KiB |
BIN
tensorflow/lite/g3doc/models/images/text.png
Normal file
After Width: | Height: | Size: 17 KiB |
BIN
tensorflow/lite/g3doc/models/images/tflite_models.png
Normal file
After Width: | Height: | Size: 155 KiB |
BIN
tensorflow/lite/g3doc/models/images/video.png
Normal file
After Width: | Height: | Size: 16 KiB |
220
tensorflow/lite/g3doc/models/object_detection/overview.md
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
# Object detection
|
||||||
|
<img src="../images/detection.png" class="attempt-right">
|
||||||
|
|
||||||
|
Detect multiple objects with bounding boxes. Yes, dogs and cats too.
|
||||||
|
|
||||||
|
<a class="button button-primary" href="http://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip">Download starter model and labels</a>
|
||||||
|
|
||||||
|
## Tutorials (coming soon)
|
||||||
|
<a class="button button-primary" href="">iOS</a>
|
||||||
|
<a class="button button-primary" href="">Android</a>
|
||||||
|
|
||||||
|
## What is object detection?
|
||||||
|
Given an image or a video stream, an object detection model can identify which of a known set of objects might be present and provide information about their positions within the image.
|
||||||
|
|
||||||
|
<!-- TODO -->
|
||||||
|
For example, this screenshot of our <a href="">object detection sample app</a> shows how several objects have been recognized and their positions annotated:
|
||||||
|
|
||||||
|
|
||||||
|
<!-- TODO -->
|
||||||
|
TODO: Insert image
|
||||||
|
|
||||||
|
An object detection model is trained to detect the presence and location of multiple classes of objects. For example, a model might be trained with images that contain various pieces of computer hardware, along with a label that specifies the class of hardware they represent (e.g. a laptop, a keyboard, or a monitor), and data specifying where each object appears in the image.
|
||||||
|
|
||||||
|
When we subsequently provide an image to the model, it will output a list of the objects it detects, the location of a bounding box that contains each object, and a score that indicates the confidence that detection was correct.
|
||||||
|
|
||||||
|
### Model output
|
||||||
|
|
||||||
|
<table style="width: 60%;">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Class</th>
|
||||||
|
<th>Score</th>
|
||||||
|
<th>Location</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>Laptop</td>
|
||||||
|
<td>0.92</td>
|
||||||
|
<td>[18, 21, 57, 63]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Keyboard</td>
|
||||||
|
<td>0.88</td>
|
||||||
|
<td>[100, 30, 180, 150]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Monitor</td>
|
||||||
|
<td>0.87</td>
|
||||||
|
<td>[7, 82, 89, 163] </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Keyboard</td>
|
||||||
|
<td>0.23</td>
|
||||||
|
<td>[42, 66, 57, 83]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Monitor</td>
|
||||||
|
<td>0.11</td>
|
||||||
|
<td>[6, 42, 31, 58]</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
### Confidence score
|
||||||
|
|
||||||
|
To interpret these results, we can look at the score and the location for each detected object. The score is a number between 0 and 1 that indicates confidence that the object was genuinely detected. The closer the number is to 1, the more confident the model is.
|
||||||
|
|
||||||
|
Depending on your application, you can decide a cut-off threshold below which you will discard detection results. For our example, we might decide a sensible cut-off is a score of 0.5 (meaning a 50% probability that the detection is valid). In that case, we would ignore the last two objects in the array, because those confidence scores are below 0.5:
|
||||||
|
|
||||||
|
<table style="width: 60%;">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Class</th>
|
||||||
|
<th>Score</th>
|
||||||
|
<th>Location</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>Laptop</td>
|
||||||
|
<td>0.92</td>
|
||||||
|
<td>[18, 21, 57, 63]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Keyboard</td>
|
||||||
|
<td>0.88</td>
|
||||||
|
<td>[100, 30, 180, 150]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Monitor</td>
|
||||||
|
<td>0.87</td>
|
||||||
|
<td>[7, 82, 89, 163] </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="background-color: #e9cecc; text-decoration-line: line-through;">Keyboard</td>
|
||||||
|
<td style="background-color: #e9cecc; text-decoration-line: line-through;">0.23</td>
|
||||||
|
<td style="background-color: #e9cecc; text-decoration-line: line-through;">[42, 66, 57, 83]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="background-color: #e9cecc; text-decoration-line: line-through;">Monitor</td>
|
||||||
|
<td style="background-color: #e9cecc; text-decoration-line: line-through;">0.11</td>
|
||||||
|
<td style="background-color: #e9cecc; text-decoration-line: line-through;">[6, 42, 31, 58]</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
The cut-off you use should be based on whether you are more comfortable with false positives (objects that are wrongly identified, or areas of the image that are erroneously identified as objects when they are not), or false negatives (genuine objects that are missed because their confidence was low).
|
||||||
|
|
||||||
|
<!-- TODO -->
|
||||||
|
TODO: Insert screenshot showing both
|
||||||
|
|
||||||
|
### Location
|
||||||
|
|
||||||
|
For each detected object, the model will return an array of four numbers representing a bounding rectangle that surrounds its position. The numbers are ordered as follows:
|
||||||
|
|
||||||
|
<table style="width: 50%; margin: 0 auto;">
|
||||||
|
<tbody>
|
||||||
|
<tr style="border-top: none;">
|
||||||
|
<td>[</td>
|
||||||
|
<td>top,</td>
|
||||||
|
<td>left,</td>
|
||||||
|
<td>bottom,</td>
|
||||||
|
<td>right</td>
|
||||||
|
<td>]</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
The top value represents the distance of the rectangle’s top edge from the top of the image, in pixels. The left value represents the left edge’s distance from the left of the input image. The other values represent the bottom and right edges in a similar manner.
|
||||||
|
|
||||||
|
<!-- TODO -->
|
||||||
|
Note: Object detection models accept input images of a specific size. This is likely to be different from the size of the raw image captured by your device’s camera, and you will have to write code to crop and scale your raw image to fit the model’s input size (there are examples of this in our <a href="">sample code</a>).<br /><br />The pixel values output by the model refer to the position in the cropped and scaled image, so you must scale them to fit the raw image in order to interpret them correctly.
|
||||||
|
|
||||||
|
|
||||||
|
### Uses and limitations
|
||||||
|
|
||||||
|
<!-- TODO -->
|
||||||
|
The object detection model we provide can identify and locate up to 10 objects in an image. It is trained to recognize 80 classes of object. For a full list of classes, see the labels file in the <a href="">model zip</a>.
|
||||||
|
|
||||||
|
If you want to train a model to recognize new classes, see <a href="#customize_model">Customize model</a>.
|
||||||
|
|
||||||
|
For the following use cases, you should use a different type of model:
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Predicting which single label the image most likely represents (see <a href="image_classification">image classification</a>)</li>
|
||||||
|
<li>Predicting the composition of an image, for example subject versus background (see <a href="segmentation">segmentation</a>)</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
Get started
|
||||||
|
If you are new to TensorFlow Lite and are working with Android or iOS, we recommend following the corresponding tutorial that will walk you through our sample code.
|
||||||
|
|
||||||
|
<!-- TODO -->
|
||||||
|
<a class="button button-primary" href="">iOS</a>
|
||||||
|
<a class="button button-primary" href="">Android</a>
|
||||||
|
|
||||||
|
If you are using a platform other than Android or iOS, or you are already familiar with the <a href="../apis">TensorFlow Lite APIs</a>, you can download our starter object detection model and the accompanying labels.
|
||||||
|
|
||||||
|
<a href="http://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip">Download starter model and labels</a>
|
||||||
|
|
||||||
|
The model will return 10 detection results...
|
||||||
|
|
||||||
|
## Starter model
|
||||||
|
We recommend starting to implement object detection using the quantized COCO SSD MobileNet v1 model, available with labels from this download link:
|
||||||
|
|
||||||
|
<a href="http://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip">Download starter model and labels</a>
|
||||||
|
|
||||||
|
### Input
|
||||||
|
The model takes an image as input. The expected image is 300x300 pixels, with three channels (red, blue, and green) per pixel. This should be fed to the model as a flattened buffer of 270,000 byte values (300x300x3). Since the model is <a href="">quantized</a>, each value should be a single byte representing a value between 0 and 255.
|
||||||
|
|
||||||
|
### Output
|
||||||
|
The model outputs four arrays, mapped to the indices 0-4. Arrays 0, 1, and 2 describe 10 detected objects, with one element in each array corresponding to each object. There will always be 10 objects detected.
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Index</th>
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>0</td>
|
||||||
|
<td>Locations</td>
|
||||||
|
<td>Multidimensional array of [10][4] floating point values between 0 and 1, the inner arrays representing bounding boxes in the form [top, left, bottom, right]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>1</td>
|
||||||
|
<td>Classes</td>
|
||||||
|
<td>Array of 10 integers (output as floating point values) each indicating the index of a class label from the labels file</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>2</td>
|
||||||
|
<td>Scores</td>
|
||||||
|
<td>Array of 10 floating point values between 0 and 1 representing probability that a class was detected</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>3</td>
|
||||||
|
<td>Number and detections</td>
|
||||||
|
<td>Array of length 1 containing a floating point value expressing the total number of detection results</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
## Customize model
|
||||||
|
|
||||||
|
<!-- TODO -->
|
||||||
|
The pre-trained models we provide are trained to detect 80 classes of object. For a full list of classes, see the labels file in the <a href="">model zip</a>.
|
||||||
|
|
||||||
|
You can use a technique known as transfer learning to re-train a model to recognize classes not in the original set. For example, you could re-train the model to detect multiple types of vegetable, despite there only being one vegetable in the original training data. To do this, you will need a set of training images for each of the new labels you wish to train.
|
||||||
|
|
||||||
|
Learn how to perform transfer learning in the <a href="https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193">Training and serving a real-time mobile object detector in 30 minutes</a> blog post.
|
||||||
|
|
||||||
|
<!-- TODO -->
|
||||||
|
Read more about this
|
||||||
|
<ul>
|
||||||
|
<li>Blog post:</li>
|
||||||
|
<li>Object detection GitHub:</li>
|
||||||
|
</ul>
|
128
tensorflow/lite/g3doc/models/pose_estimation/overview.md
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
# Pose estimation
|
||||||
|
<img src="../images/pose.png" class="attempt-right" />
|
||||||
|
|
||||||
|
<i>PoseNet</i> is a vision model that can be used to estimate the pose of a person in an image/video by estimating where key body joints are.
|
||||||
|
|
||||||
|
<a class="button button-primary" href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/gpu/multi_person_mobilenet_v1_075_float.tflite">Download starter model</a>
|
||||||
|
|
||||||
|
## Tutorials (coming soon)
|
||||||
|
<a class="button button-primary" href="">iOS</a>
|
||||||
|
<a class="button button-primary" href="">Android</a>
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
Pose estimation refers to computer vision techniques that detect human figures in images and videos, so that one could determine, for example, where someone’s elbow shows up in an image.
|
||||||
|
|
||||||
|
To be clear, this technology is not recognizing who is in an image — there is no personal identifiable information associated to pose detection. The algorithm is simply estimating where key body joints are.
|
||||||
|
|
||||||
|
The key points detected are indexed by part id with a confidence score between 0.0 and 1.0; 1.0 being the highest.
|
||||||
|
|
||||||
|
<table style="width: 30%;">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Id</th>
|
||||||
|
<th>Part</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>0</td>
|
||||||
|
<td>nose</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>1</td>
|
||||||
|
<td>leftEye</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>2</td>
|
||||||
|
<td>rightEye</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>3</td>
|
||||||
|
<td>leftEar</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>4</td>
|
||||||
|
<td>rightEar</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>5</td>
|
||||||
|
<td>leftShoulder</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>6</td>
|
||||||
|
<td>rightShoulder</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>7</td>
|
||||||
|
<td>leftElbow</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>8</td>
|
||||||
|
<td>rightElbow</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>9</td>
|
||||||
|
<td>leftWrist</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>10</td>
|
||||||
|
<td>rightWrist</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>11</td>
|
||||||
|
<td>leftHip</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>12</td>
|
||||||
|
<td>rightHip</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>13</td>
|
||||||
|
<td>leftKnee</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>14</td>
|
||||||
|
<td>rightKnee</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>15</td>
|
||||||
|
<td>leftAnkle</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>16</td>
|
||||||
|
<td>rightAnkle</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
## Example output
|
||||||
|
<img src="https://www.tensorflow.org/images/models/pose_estimation.gif" />
|
||||||
|
|
||||||
|
## Get started
|
||||||
|
Android and iOS end-to-end tutorials are coming soon. In the meantime, if you want to experiment this on a web browser, check out the TensorFlow.js <a href="https://github.com/tensorflow/tfjs-models/tree/master/posenet">GitHub repository</a>.
|
||||||
|
|
||||||
|
|
||||||
|
## How it performs
|
||||||
|
Performance varies based on your device and output stride (heatmaps and offset vectors). The PoseNet model is image size invariant, which means it can predict pose positions in the same scale as the original image regardless of whether the image is downscaled. This means PoseNet can be configured to have a higher accuracy at the expense of performance.
|
||||||
|
|
||||||
|
The output stride determines how much we’re scaling down the output relative to the input image size. It affects the size of the layers and the model outputs. The higher the output stride, the smaller the resolution of layers in the network and the outputs, and correspondingly their accuracy. In this implementation, the output stride can have values of 8, 16, or 32. In other words, an output stride of 32 will result in the fastest performance but lowest accuracy, while 8 will result in the highest accuracy but slowest performance. We recommend starting with 16.
|
||||||
|
|
||||||
|
<img src="../images/models/output_stride.png" >
|
||||||
|
<span style="font-size: 0.8em">The output stride determines how much we’re scaling down the output relative to the input image size. A higher output stride is faster but results in lower accuracy.</span>
|
||||||
|
|
||||||
|
## Read more about this
|
||||||
|
<ul>
|
||||||
|
<li><a href="">Blog post: Real-time Human Pose Estimation in the Browser with TensorFlow.js</a></li>
|
||||||
|
<li><a href="">TF.js GitHub: Pose Detection in the Browser: PoseNet Model</a></li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
## Users
|
||||||
|
<ul>
|
||||||
|
<li><a href="">‘PomPom Mirror’</a></li>
|
||||||
|
<li><a href="">Amazing Art Installation Turns You Into A Bird | Chris Milk "The Treachery of Sanctuary"</a></li>
|
||||||
|
<li><a href="">Puppet Parade - Interactive Kinect Puppets</a></li>
|
||||||
|
<li><a href="">Messa di Voce (Performance), Excerpts</a></li>
|
||||||
|
<li><a href="">Augmented reality</a></li>
|
||||||
|
<li><a href="">Interactive animation</a></li>
|
||||||
|
<li><a href="">Gait analysis</a></li>
|
||||||
|
</ul>
|
After Width: | Height: | Size: 298 KiB |
32
tensorflow/lite/g3doc/models/segmentation/overview.md
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# Segmentation (GPU)
|
||||||
|
<img src="../images/segmentation.png" class="attempt-right" />
|
||||||
|
|
||||||
|
<i>DeepLab</i> is a state-of-art deep learning model for semantic image segmentation, where the goal is to assign semantic labels (e.g., person, dog, cat and so on) to every pixel in the input image.
|
||||||
|
|
||||||
|
<a class="button button-primary" href="https://storage.googleapis.com/download.tensorflow.org/models/tflite/gpu/deeplabv3_257_mv_gpu.tflite">Download starter model</a>
|
||||||
|
|
||||||
|
## Tutorials (coming soon)
|
||||||
|
<a class="button button-primary" href="">iOS</a>
|
||||||
|
<a class="button button-primary" href="">Android</a>
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
It all started with classification where the model predicts an entire input. With advances in data, hardware, and software, object detection can infer objects with spatial location. Semantic segmentation offers the highest level of granularity with labels at a pixel level.
|
||||||
|
|
||||||
|
Current implementation includes the following features:
|
||||||
|
<ol>
|
||||||
|
<li>DeepLabv1: We use atrous convolution to explicitly control the resolution at which feature responses are computed within Deep Convolutional Neural Networks.</li>
|
||||||
|
<li>DeepLabv2: We use atrous spatial pyramid pooling (ASPP) to robustly segment objects at multiple scales with filters at multiple sampling rates and effective fields-of-views.</li>
|
||||||
|
<li>DeepLabv3: We augment the ASPP module with image-level feature [5, 6] to capture longer range information. We also include batch normalization [7] parameters to facilitate the training. In particular, we applying atrous convolution to extract output features at different output strides during training and evaluation, which efficiently enables training BN at output stride = 16 and attains a high performance at output stride = 8 during evaluation.</li>
|
||||||
|
<li>DeepLabv3+: We extend DeepLabv3 to include a simple yet effective decoder module to refine the segmentation results especially along object boundaries. Furthermore, in this encoder-decoder structure one can arbitrarily control the resolution of extracted encoder features by atrous convolution to trade-off precision and runtime.</li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
## Example output
|
||||||
|
The model will create a mask over the target objects with high accuracy.
|
||||||
|
<img src="images/segmentation.gif" />
|
||||||
|
|
||||||
|
## Read more about this
|
||||||
|
<ul>
|
||||||
|
<li>Blog post: <a href="https://ai.googleblog.com/2018/03/semantic-image-segmentation-with.html">Semantic Image Segmentation with DeepLab in TensorFlow</a></li>
|
||||||
|
<li><a href="https://medium.com/tensorflow/tensorflow-lite-now-faster-with-mobile-gpus-developer-preview-e15797e6dee7">Blog post: TensorFlow Lite Now Faster with Mobile GPUs (Developer Preview)</a></li>
|
||||||
|
<li><a href="https://github.com/tensorflow/models/tree/master/research/deeplab">DeepLab GitHub: DeepLab: Deep Labelling for Semantic Image Segmentation</a></li>
|
||||||
|
</ul>
|
BIN
tensorflow/lite/g3doc/models/smart_reply/images/smart_reply.gif
Normal file
After Width: | Height: | Size: 725 KiB |
40
tensorflow/lite/g3doc/models/smart_reply/overview.md
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# Smart reply
|
||||||
|
<img src="../images/smart_reply.png" class="attempt-right" />
|
||||||
|
|
||||||
|
Smart replies are contextually relevant, one-touch responses that help the user to reply to an incoming text message (or email) efficiently and effortlessly.
|
||||||
|
|
||||||
|
<a class="button button-primary" href="http://download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip">Download starter model and labels</a>
|
||||||
|
|
||||||
|
## Tutorials (coming soon)
|
||||||
|
<a class="button button-primary" href="">iOS</a>
|
||||||
|
<a class="button button-primary" href="">Android</a>
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
The model generates reply suggestions to input conversational chat messages with an efficient inference that can be easily be plugged in to your chat application to power on-device conversational intelligence.
|
||||||
|
|
||||||
|
The on-device model comes with several benefits. It is:
|
||||||
|
<ul>
|
||||||
|
<li>Faster: The model resides on the device and does not require internet connectivity. Thus, the inference is very fast and has an average latency of only a few milliseconds.</li>
|
||||||
|
<li>Resource efficient: The model has a small memory footprint on the device.</li>
|
||||||
|
<li>Privacy-friendly: The user data never leaves the device and this eliminates any privacy restrictions.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
## Example output
|
||||||
|
<img src="images/smart_reply.gif" />
|
||||||
|
|
||||||
|
## How to use this model?
|
||||||
|
We have provided a pre-built demo APK that you can download, install, and test on your phone. Go to the <a href="https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/models/smartreply/g3doc">GitHub page</a> for instructions and list of support ops and functionalities.
|
||||||
|
|
||||||
|
## Read more about this
|
||||||
|
<ul>
|
||||||
|
<li><a href="https://arxiv.org/pdf/1708.00630.pdf">Research paper</a></li>
|
||||||
|
<li><a href="https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/models/smartreply/">Source code</a></li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
## Users
|
||||||
|
<ul>
|
||||||
|
<li><a href="https://www.blog.google/products/gmail/save-time-with-smart-reply-in-gmail/">Gmail</a></li>
|
||||||
|
<li><a href="https://www.blog.google/products/gmail/computer-respond-to-this-email/">Inbox</a></li>
|
||||||
|
<li><a href="https://blog.google/products/allo/google-allo-smarter-messaging-app/">Allo</a></li>
|
||||||
|
<li><a href="https://research.googleblog.com/2017/02/on-device-machine-intelligence.html">Smart Replies on Android Wear</a></li>
|
||||||
|
</ul>
|
14
tensorflow/lite/g3doc/models/speech_recognition/overview.md
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
# Speech recognition
|
||||||
|
|
||||||
|
<img src="../images/audio.png" class="attempt-right">
|
||||||
|
|
||||||
|
Recognize audio keywords!
|
||||||
|
|
||||||
|
<a class="button button-primary" href="">Download starter model</a>
|
||||||
|
|
||||||
|
## Tutorials (coming soon)
|
||||||
|
<a class="button button-primary" href="">iOS</a>
|
||||||
|
<a class="button button-primary" href="">Android</a>
|
||||||
|
|
||||||
|
## What is speech recognition?
|
||||||
|
Coming soon.
|