Add quantization option to Metal delegate Swift API

Also updated outdated GPU delegate documentation.

PiperOrigin-RevId: 315469909
Change-Id: I7b524373a397763c886905e83a2e8b75226d9471
This commit is contained in:
Taehee Jeong 2020-06-09 06:17:36 -07:00 committed by TensorFlower Gardener
parent c254833717
commit 7c1b0d0a37
3 changed files with 119 additions and 46 deletions

View File

@ -35,6 +35,7 @@ public final class MetalDelegate: Delegate {
var delegateOptions = TFLGpuDelegateOptions() var delegateOptions = TFLGpuDelegateOptions()
delegateOptions.allow_precision_loss = options.allowsPrecisionLoss delegateOptions.allow_precision_loss = options.allowsPrecisionLoss
delegateOptions.wait_type = options.waitType.cWaitType delegateOptions.wait_type = options.waitType.cWaitType
delegateOptions.enable_quantization = options.isQuantizationEnabled
cDelegate = TFLGpuDelegateCreate(&delegateOptions) cDelegate = TFLGpuDelegateCreate(&delegateOptions)
} }
@ -54,6 +55,10 @@ extension MetalDelegate {
/// default is `passive`. /// default is `passive`.
public var waitType: ThreadWaitType = .passive public var waitType: ThreadWaitType = .passive
/// Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default
/// is `false`.
public var isQuantizationEnabled = false
/// Creates a new instance with the default values. /// Creates a new instance with the default values.
public init() {} public init() {}
} }

View File

@ -179,6 +179,28 @@ delegate.close();
### iOS ### iOS
#### Swift
Initialize TensorFlow Lite interpreter with the GPU delegate.
```swift
import TensorFlowLite
// Load model ...
let delegate = MetalDelegate()
if let interpreter = try Interpreter(modelPath: modelPath,
delegates: [delegate]) {
// Run inference ...
}
```
#### Objective-C
Note: For Objective-C, GPU delegate is provided via C API.
In your application code, include the GPU delegate header and call the In your application code, include the GPU delegate header and call the
`Interpreter::ModifyGraphWithDelegate` function to register the GPU delegate to `Interpreter::ModifyGraphWithDelegate` function to register the GPU delegate to
the interpreter: the interpreter:

View File

@ -126,10 +126,28 @@ bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:gl_deleg
bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_gl.so # for dynamic library bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_gl.so # for dynamic library
``` ```
### iOS (ObjC++) ### iOS (Swift)
To use TensorFlow Lite on GPU, get the GPU delegate via `NewGpuDelegate()` and Initialize TensorFlow Lite interpreter with the GPU delegate.
then pass it to `Interpreter::ModifyGraphWithDelegate()` (instead of calling
```swift
import TensorFlowLite
let delegate = MetalDelegate()
if let interpreter = try Interpreter(modelPath: modelPath,
delegates: [delegate]) {
// Run inference ...
}
```
### iOS (Objective-C)
Note: For Objective-C, GPU delegate is provided via C API.
To use TensorFlow Lite on GPU, get the GPU delegate via `TFLGpuDelegateCreate()`
and then pass it to `Interpreter::ModifyGraphWithDelegate()` (instead of calling
`Interpreter::AllocateTensors()`). `Interpreter::AllocateTensors()`).
```c++ ```c++
@ -142,12 +160,7 @@ InterpreterBuilder(*model, op_resolver)(&interpreter);
// NEW: Prepare GPU delegate. // NEW: Prepare GPU delegate.
const GpuDelegateOptions options = { auto* delegate = TFLGpuDelegateCreate(/*default options=*/nullptr);
.allow_precision_loss = false,
.wait_type = kGpuDelegateOptions::WaitType::Passive,
};
auto* delegate = NewGpuDelegate(options);
if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) return false; if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) return false;
// Run inference. // Run inference.
@ -156,7 +169,7 @@ if (interpreter->Invoke() != kTfLiteOk) return false;
ReadFromOutputTensor(interpreter->typed_output_tensor<float>(0)); ReadFromOutputTensor(interpreter->typed_output_tensor<float>(0));
// Clean up. // Clean up.
DeleteGpuDelegate(delegate); TFLGpuDelegateDelete(delegate);
``` ```
Note: When calling `Interpreter::ModifyGraphWithDelegate()` or Note: When calling `Interpreter::ModifyGraphWithDelegate()` or
@ -169,7 +182,54 @@ called.
## Advanced usage ## Advanced usage
### Running quantized models (Experimental, Android only) ### Delegate Options for iOS
`TFLGpuDelegateCreate()` accepts a `struct` of options.
([C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/metal_delegate.h),
[Swift API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/swift/Sources/MetalDelegate.swift))
Passing `nullptr`(C API) or nothing (Swift API) to the initializer sets the
default options (which are explicated in the Basic Usage example above).
**Swift API**
```swift
// THIS:
var options = MetalDelegate.Options()
options.allowsPrecisionLoss = false
options.waitType = .passive
options.isQuantizationEnabled = false
let delegate = MetalDelegate(options: options)
// IS THE SAME AS THIS:
let delegate = MetalDelegate()
```
**C API (also used for Objective-C)**
```c++
// THIS:
const TFLGpuDelegateOptions options = {
.allow_precision_loss = false,
.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive,
.enable_quantization = false,
};
auto* delegate = TFLGpuDelegateCreate(options);
// IS THE SAME AS THIS:
auto* delegate = TFLGpuDelegateCreate(nullptr);
```
While it is convenient to use `nullptr`, we recommend that you explicitly set
the options, to avoid any unexpected behavior if default values are changed in
the future.
### Running quantized models (Experimental)
The GPU delegate already supports The GPU delegate already supports
[float16 quantized](https://www.tensorflow.org/lite/performance/post_training_float16_quant) [float16 quantized](https://www.tensorflow.org/lite/performance/post_training_float16_quant)
@ -186,6 +246,8 @@ tensors.
This feature can be enabled using delegate options as follows: This feature can be enabled using delegate options as follows:
#### Android
**C++ API** **C++ API**
```c++ ```c++
@ -206,51 +268,30 @@ GpuDelegate delegate = new GpuDelegate(new GpuDelegate.Options().setQuantizedMod
Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate); Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
``` ```
### Delegate Options for iOS #### iOS
`NewGpuDelegate()` accepts a `struct` of options. **Swift API**
```c++ ```swift
struct GpuDelegateOptions { // NEW: Prepare custom options with feature enabled.
// Allows to quantify tensors, downcast values, process in float16 etc. var options = MetalDelegate.Options()
bool allow_precision_loss; options.isQuantizationEnabled = true
let delegate = MetalDelegate(options: options)
enum class WaitType {
// waitUntilCompleted
kPassive,
// Minimize latency. It uses active spinning instead of mutex and consumes
// additional CPU resources.
kActive,
// Useful when the output is used with GPU pipeline then or if external
// command encoder is set
kDoNotWait,
};
WaitType wait_type;
};
``` ```
Passing `nullptr` into `NewGpuDelegate()` sets the default options (which are **C API (also used for Objective-C)**
explicated in the Basic Usage example above).
```c++ ```c
// THIS: // THIS:
const GpuDelegateOptions options = { // NEW: Prepare custom options with feature enabled.
.allow_precision_loss = false, const TFLGpuDelegateOptions options = {
.wait_type = kGpuDelegateOptions::WaitType::Passive, .enable_quantization = true,
}; };
auto* delegate = NewGpuDelegate(options); auto* delegate = TFLGpuDelegateCreate(options);
// IS THE SAME AS THIS:
auto* delegate = NewGpuDelegate(nullptr);
``` ```
While it is convenient to use `nullptr`, we recommend that you explicitly set
the options, to avoid any unexpected behavior if default values are changed in
the future.
### Input/Output Buffers (iOS only) ### Input/Output Buffers (iOS only)
To do computation on the GPU, data must be made available to the GPU. This often To do computation on the GPU, data must be made available to the GPU. This often
@ -280,8 +321,13 @@ off by calling `Interpreter::SetAllowBufferHandleOutput(true)` during
initialization. initialization.
```c++ ```c++
#include "tensorflow/lite/delegates/gpu/metal_delegate.h"
#include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h"
// ...
// Prepare GPU delegate. // Prepare GPU delegate.
auto* delegate = NewGpuDelegate(nullptr); auto* delegate = TFLGpuDelegateCreate(nullptr);
interpreter->SetAllowBufferHandleOutput(true); // disable default gpu->cpu copy interpreter->SetAllowBufferHandleOutput(true); // disable default gpu->cpu copy
if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->inputs()[0], user_provided_input_buffer)) return false; if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->inputs()[0], user_provided_input_buffer)) return false;
if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->outputs()[0], user_provided_output_buffer)) return false; if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->outputs()[0], user_provided_output_buffer)) return false;