Add quantization option to Metal delegate Swift API

Also updated outdated GPU delegate documentation.

PiperOrigin-RevId: 315469909
Change-Id: I7b524373a397763c886905e83a2e8b75226d9471
This commit is contained in:
Taehee Jeong 2020-06-09 06:17:36 -07:00 committed by TensorFlower Gardener
parent c254833717
commit 7c1b0d0a37
3 changed files with 119 additions and 46 deletions

View File

@ -35,6 +35,7 @@ public final class MetalDelegate: Delegate {
var delegateOptions = TFLGpuDelegateOptions()
delegateOptions.allow_precision_loss = options.allowsPrecisionLoss
delegateOptions.wait_type = options.waitType.cWaitType
delegateOptions.enable_quantization = options.isQuantizationEnabled
cDelegate = TFLGpuDelegateCreate(&delegateOptions)
}
@ -54,6 +55,10 @@ extension MetalDelegate {
/// default is `passive`.
public var waitType: ThreadWaitType = .passive
/// Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default
/// is `false`.
public var isQuantizationEnabled = false
/// Creates a new instance with the default values.
public init() {}
}

View File

@ -179,6 +179,28 @@ delegate.close();
### iOS
#### Swift
Initialize TensorFlow Lite interpreter with the GPU delegate.
```swift
import TensorFlowLite
// Load model ...
let delegate = MetalDelegate()
if let interpreter = try Interpreter(modelPath: modelPath,
delegates: [delegate]) {
// Run inference ...
}
```
#### Objective-C
Note: For Objective-C, GPU delegate is provided via C API.
In your application code, include the GPU delegate header and call the
`Interpreter::ModifyGraphWithDelegate` function to register the GPU delegate to
the interpreter:

View File

@ -126,10 +126,28 @@ bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:gl_deleg
bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_gl.so # for dynamic library
```
### iOS (ObjC++)
### iOS (Swift)
To use TensorFlow Lite on GPU, get the GPU delegate via `NewGpuDelegate()` and
then pass it to `Interpreter::ModifyGraphWithDelegate()` (instead of calling
Initialize TensorFlow Lite interpreter with the GPU delegate.
```swift
import TensorFlowLite
let delegate = MetalDelegate()
if let interpreter = try Interpreter(modelPath: modelPath,
delegates: [delegate]) {
// Run inference ...
}
```
### iOS (Objective-C)
Note: For Objective-C, GPU delegate is provided via C API.
To use TensorFlow Lite on GPU, get the GPU delegate via `TFLGpuDelegateCreate()`
and then pass it to `Interpreter::ModifyGraphWithDelegate()` (instead of calling
`Interpreter::AllocateTensors()`).
```c++
@ -142,12 +160,7 @@ InterpreterBuilder(*model, op_resolver)(&interpreter);
// NEW: Prepare GPU delegate.
const GpuDelegateOptions options = {
.allow_precision_loss = false,
.wait_type = kGpuDelegateOptions::WaitType::Passive,
};
auto* delegate = NewGpuDelegate(options);
auto* delegate = TFLGpuDelegateCreate(/*default options=*/nullptr);
if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) return false;
// Run inference.
@ -156,7 +169,7 @@ if (interpreter->Invoke() != kTfLiteOk) return false;
ReadFromOutputTensor(interpreter->typed_output_tensor<float>(0));
// Clean up.
DeleteGpuDelegate(delegate);
TFLGpuDelegateDelete(delegate);
```
Note: When calling `Interpreter::ModifyGraphWithDelegate()` or
@ -169,7 +182,54 @@ called.
## Advanced usage
### Running quantized models (Experimental, Android only)
### Delegate Options for iOS
`TFLGpuDelegateCreate()` accepts a `struct` of options.
([C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/metal_delegate.h),
[Swift API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/swift/Sources/MetalDelegate.swift))
Passing `nullptr`(C API) or nothing (Swift API) to the initializer sets the
default options (which are explicated in the Basic Usage example above).
**Swift API**
```swift
// THIS:
var options = MetalDelegate.Options()
options.allowsPrecisionLoss = false
options.waitType = .passive
options.isQuantizationEnabled = false
let delegate = MetalDelegate(options: options)
// IS THE SAME AS THIS:
let delegate = MetalDelegate()
```
**C API (also used for Objective-C)**
```c++
// THIS:
const TFLGpuDelegateOptions options = {
.allow_precision_loss = false,
.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive,
.enable_quantization = false,
};
auto* delegate = TFLGpuDelegateCreate(options);
// IS THE SAME AS THIS:
auto* delegate = TFLGpuDelegateCreate(nullptr);
```
While it is convenient to use `nullptr`, we recommend that you explicitly set
the options, to avoid any unexpected behavior if default values are changed in
the future.
### Running quantized models (Experimental)
The GPU delegate already supports
[float16 quantized](https://www.tensorflow.org/lite/performance/post_training_float16_quant)
@ -186,6 +246,8 @@ tensors.
This feature can be enabled using delegate options as follows:
#### Android
**C++ API**
```c++
@ -206,51 +268,30 @@ GpuDelegate delegate = new GpuDelegate(new GpuDelegate.Options().setQuantizedMod
Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
```
### Delegate Options for iOS
#### iOS
`NewGpuDelegate()` accepts a `struct` of options.
**Swift API**
```c++
struct GpuDelegateOptions {
// Allows to quantify tensors, downcast values, process in float16 etc.
bool allow_precision_loss;
enum class WaitType {
// waitUntilCompleted
kPassive,
// Minimize latency. It uses active spinning instead of mutex and consumes
// additional CPU resources.
kActive,
// Useful when the output is used with GPU pipeline then or if external
// command encoder is set
kDoNotWait,
};
WaitType wait_type;
};
```swift
// NEW: Prepare custom options with feature enabled.
var options = MetalDelegate.Options()
options.isQuantizationEnabled = true
let delegate = MetalDelegate(options: options)
```
Passing `nullptr` into `NewGpuDelegate()` sets the default options (which are
explicated in the Basic Usage example above).
**C API (also used for Objective-C)**
```c++
```c
// THIS:
const GpuDelegateOptions options = {
.allow_precision_loss = false,
.wait_type = kGpuDelegateOptions::WaitType::Passive,
// NEW: Prepare custom options with feature enabled.
const TFLGpuDelegateOptions options = {
.enable_quantization = true,
};
auto* delegate = NewGpuDelegate(options);
// IS THE SAME AS THIS:
auto* delegate = NewGpuDelegate(nullptr);
auto* delegate = TFLGpuDelegateCreate(options);
```
While it is convenient to use `nullptr`, we recommend that you explicitly set
the options, to avoid any unexpected behavior if default values are changed in
the future.
### Input/Output Buffers (iOS only)
To do computation on the GPU, data must be made available to the GPU. This often
@ -280,8 +321,13 @@ off by calling `Interpreter::SetAllowBufferHandleOutput(true)` during
initialization.
```c++
#include "tensorflow/lite/delegates/gpu/metal_delegate.h"
#include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h"
// ...
// Prepare GPU delegate.
auto* delegate = NewGpuDelegate(nullptr);
auto* delegate = TFLGpuDelegateCreate(nullptr);
interpreter->SetAllowBufferHandleOutput(true); // disable default gpu->cpu copy
if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->inputs()[0], user_provided_input_buffer)) return false;
if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->outputs()[0], user_provided_output_buffer)) return false;