Add quantization option to Metal delegate Swift API
Also updated outdated GPU delegate documentation. PiperOrigin-RevId: 315469909 Change-Id: I7b524373a397763c886905e83a2e8b75226d9471
This commit is contained in:
parent
c254833717
commit
7c1b0d0a37
@ -35,6 +35,7 @@ public final class MetalDelegate: Delegate {
|
|||||||
var delegateOptions = TFLGpuDelegateOptions()
|
var delegateOptions = TFLGpuDelegateOptions()
|
||||||
delegateOptions.allow_precision_loss = options.allowsPrecisionLoss
|
delegateOptions.allow_precision_loss = options.allowsPrecisionLoss
|
||||||
delegateOptions.wait_type = options.waitType.cWaitType
|
delegateOptions.wait_type = options.waitType.cWaitType
|
||||||
|
delegateOptions.enable_quantization = options.isQuantizationEnabled
|
||||||
cDelegate = TFLGpuDelegateCreate(&delegateOptions)
|
cDelegate = TFLGpuDelegateCreate(&delegateOptions)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -54,6 +55,10 @@ extension MetalDelegate {
|
|||||||
/// default is `passive`.
|
/// default is `passive`.
|
||||||
public var waitType: ThreadWaitType = .passive
|
public var waitType: ThreadWaitType = .passive
|
||||||
|
|
||||||
|
/// Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default
|
||||||
|
/// is `false`.
|
||||||
|
public var isQuantizationEnabled = false
|
||||||
|
|
||||||
/// Creates a new instance with the default values.
|
/// Creates a new instance with the default values.
|
||||||
public init() {}
|
public init() {}
|
||||||
}
|
}
|
||||||
|
@ -179,6 +179,28 @@ delegate.close();
|
|||||||
|
|
||||||
### iOS
|
### iOS
|
||||||
|
|
||||||
|
#### Swift
|
||||||
|
|
||||||
|
Initialize TensorFlow Lite interpreter with the GPU delegate.
|
||||||
|
|
||||||
|
```swift
|
||||||
|
import TensorFlowLite
|
||||||
|
|
||||||
|
// Load model ...
|
||||||
|
|
||||||
|
let delegate = MetalDelegate()
|
||||||
|
|
||||||
|
if let interpreter = try Interpreter(modelPath: modelPath,
|
||||||
|
delegates: [delegate]) {
|
||||||
|
// Run inference ...
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Objective-C
|
||||||
|
|
||||||
|
Note: For Objective-C, GPU delegate is provided via C API.
|
||||||
|
|
||||||
In your application code, include the GPU delegate header and call the
|
In your application code, include the GPU delegate header and call the
|
||||||
`Interpreter::ModifyGraphWithDelegate` function to register the GPU delegate to
|
`Interpreter::ModifyGraphWithDelegate` function to register the GPU delegate to
|
||||||
the interpreter:
|
the interpreter:
|
||||||
|
@ -126,10 +126,28 @@ bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:gl_deleg
|
|||||||
bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_gl.so # for dynamic library
|
bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_gl.so # for dynamic library
|
||||||
```
|
```
|
||||||
|
|
||||||
### iOS (ObjC++)
|
### iOS (Swift)
|
||||||
|
|
||||||
To use TensorFlow Lite on GPU, get the GPU delegate via `NewGpuDelegate()` and
|
Initialize TensorFlow Lite interpreter with the GPU delegate.
|
||||||
then pass it to `Interpreter::ModifyGraphWithDelegate()` (instead of calling
|
|
||||||
|
```swift
|
||||||
|
import TensorFlowLite
|
||||||
|
|
||||||
|
let delegate = MetalDelegate()
|
||||||
|
if let interpreter = try Interpreter(modelPath: modelPath,
|
||||||
|
delegates: [delegate]) {
|
||||||
|
|
||||||
|
// Run inference ...
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### iOS (Objective-C)
|
||||||
|
|
||||||
|
Note: For Objective-C, GPU delegate is provided via C API.
|
||||||
|
|
||||||
|
To use TensorFlow Lite on GPU, get the GPU delegate via `TFLGpuDelegateCreate()`
|
||||||
|
and then pass it to `Interpreter::ModifyGraphWithDelegate()` (instead of calling
|
||||||
`Interpreter::AllocateTensors()`).
|
`Interpreter::AllocateTensors()`).
|
||||||
|
|
||||||
```c++
|
```c++
|
||||||
@ -142,12 +160,7 @@ InterpreterBuilder(*model, op_resolver)(&interpreter);
|
|||||||
|
|
||||||
// NEW: Prepare GPU delegate.
|
// NEW: Prepare GPU delegate.
|
||||||
|
|
||||||
const GpuDelegateOptions options = {
|
auto* delegate = TFLGpuDelegateCreate(/*default options=*/nullptr);
|
||||||
.allow_precision_loss = false,
|
|
||||||
.wait_type = kGpuDelegateOptions::WaitType::Passive,
|
|
||||||
};
|
|
||||||
|
|
||||||
auto* delegate = NewGpuDelegate(options);
|
|
||||||
if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) return false;
|
if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) return false;
|
||||||
|
|
||||||
// Run inference.
|
// Run inference.
|
||||||
@ -156,7 +169,7 @@ if (interpreter->Invoke() != kTfLiteOk) return false;
|
|||||||
ReadFromOutputTensor(interpreter->typed_output_tensor<float>(0));
|
ReadFromOutputTensor(interpreter->typed_output_tensor<float>(0));
|
||||||
|
|
||||||
// Clean up.
|
// Clean up.
|
||||||
DeleteGpuDelegate(delegate);
|
TFLGpuDelegateDelete(delegate);
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: When calling `Interpreter::ModifyGraphWithDelegate()` or
|
Note: When calling `Interpreter::ModifyGraphWithDelegate()` or
|
||||||
@ -169,7 +182,54 @@ called.
|
|||||||
|
|
||||||
## Advanced usage
|
## Advanced usage
|
||||||
|
|
||||||
### Running quantized models (Experimental, Android only)
|
### Delegate Options for iOS
|
||||||
|
|
||||||
|
`TFLGpuDelegateCreate()` accepts a `struct` of options.
|
||||||
|
([C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/metal_delegate.h),
|
||||||
|
[Swift API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/swift/Sources/MetalDelegate.swift))
|
||||||
|
|
||||||
|
Passing `nullptr`(C API) or nothing (Swift API) to the initializer sets the
|
||||||
|
default options (which are explicated in the Basic Usage example above).
|
||||||
|
|
||||||
|
**Swift API**
|
||||||
|
|
||||||
|
```swift
|
||||||
|
|
||||||
|
// THIS:
|
||||||
|
var options = MetalDelegate.Options()
|
||||||
|
options.allowsPrecisionLoss = false
|
||||||
|
options.waitType = .passive
|
||||||
|
options.isQuantizationEnabled = false
|
||||||
|
let delegate = MetalDelegate(options: options)
|
||||||
|
|
||||||
|
// IS THE SAME AS THIS:
|
||||||
|
let delegate = MetalDelegate()
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
**C API (also used for Objective-C)**
|
||||||
|
|
||||||
|
```c++
|
||||||
|
|
||||||
|
// THIS:
|
||||||
|
const TFLGpuDelegateOptions options = {
|
||||||
|
.allow_precision_loss = false,
|
||||||
|
.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive,
|
||||||
|
.enable_quantization = false,
|
||||||
|
};
|
||||||
|
|
||||||
|
auto* delegate = TFLGpuDelegateCreate(options);
|
||||||
|
|
||||||
|
// IS THE SAME AS THIS:
|
||||||
|
auto* delegate = TFLGpuDelegateCreate(nullptr);
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
While it is convenient to use `nullptr`, we recommend that you explicitly set
|
||||||
|
the options, to avoid any unexpected behavior if default values are changed in
|
||||||
|
the future.
|
||||||
|
|
||||||
|
### Running quantized models (Experimental)
|
||||||
|
|
||||||
The GPU delegate already supports
|
The GPU delegate already supports
|
||||||
[float16 quantized](https://www.tensorflow.org/lite/performance/post_training_float16_quant)
|
[float16 quantized](https://www.tensorflow.org/lite/performance/post_training_float16_quant)
|
||||||
@ -186,6 +246,8 @@ tensors.
|
|||||||
|
|
||||||
This feature can be enabled using delegate options as follows:
|
This feature can be enabled using delegate options as follows:
|
||||||
|
|
||||||
|
#### Android
|
||||||
|
|
||||||
**C++ API**
|
**C++ API**
|
||||||
|
|
||||||
```c++
|
```c++
|
||||||
@ -206,51 +268,30 @@ GpuDelegate delegate = new GpuDelegate(new GpuDelegate.Options().setQuantizedMod
|
|||||||
Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
|
Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
|
||||||
```
|
```
|
||||||
|
|
||||||
### Delegate Options for iOS
|
#### iOS
|
||||||
|
|
||||||
`NewGpuDelegate()` accepts a `struct` of options.
|
**Swift API**
|
||||||
|
|
||||||
```c++
|
```swift
|
||||||
struct GpuDelegateOptions {
|
// NEW: Prepare custom options with feature enabled.
|
||||||
// Allows to quantify tensors, downcast values, process in float16 etc.
|
var options = MetalDelegate.Options()
|
||||||
bool allow_precision_loss;
|
options.isQuantizationEnabled = true
|
||||||
|
let delegate = MetalDelegate(options: options)
|
||||||
enum class WaitType {
|
|
||||||
// waitUntilCompleted
|
|
||||||
kPassive,
|
|
||||||
// Minimize latency. It uses active spinning instead of mutex and consumes
|
|
||||||
// additional CPU resources.
|
|
||||||
kActive,
|
|
||||||
// Useful when the output is used with GPU pipeline then or if external
|
|
||||||
// command encoder is set
|
|
||||||
kDoNotWait,
|
|
||||||
};
|
|
||||||
WaitType wait_type;
|
|
||||||
};
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Passing `nullptr` into `NewGpuDelegate()` sets the default options (which are
|
**C API (also used for Objective-C)**
|
||||||
explicated in the Basic Usage example above).
|
|
||||||
|
|
||||||
```c++
|
```c
|
||||||
|
|
||||||
// THIS:
|
// THIS:
|
||||||
const GpuDelegateOptions options = {
|
// NEW: Prepare custom options with feature enabled.
|
||||||
.allow_precision_loss = false,
|
const TFLGpuDelegateOptions options = {
|
||||||
.wait_type = kGpuDelegateOptions::WaitType::Passive,
|
.enable_quantization = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
auto* delegate = NewGpuDelegate(options);
|
auto* delegate = TFLGpuDelegateCreate(options);
|
||||||
|
|
||||||
// IS THE SAME AS THIS:
|
|
||||||
auto* delegate = NewGpuDelegate(nullptr);
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
While it is convenient to use `nullptr`, we recommend that you explicitly set
|
|
||||||
the options, to avoid any unexpected behavior if default values are changed in
|
|
||||||
the future.
|
|
||||||
|
|
||||||
### Input/Output Buffers (iOS only)
|
### Input/Output Buffers (iOS only)
|
||||||
|
|
||||||
To do computation on the GPU, data must be made available to the GPU. This often
|
To do computation on the GPU, data must be made available to the GPU. This often
|
||||||
@ -280,8 +321,13 @@ off by calling `Interpreter::SetAllowBufferHandleOutput(true)` during
|
|||||||
initialization.
|
initialization.
|
||||||
|
|
||||||
```c++
|
```c++
|
||||||
|
#include "tensorflow/lite/delegates/gpu/metal_delegate.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h"
|
||||||
|
|
||||||
|
// ...
|
||||||
|
|
||||||
// Prepare GPU delegate.
|
// Prepare GPU delegate.
|
||||||
auto* delegate = NewGpuDelegate(nullptr);
|
auto* delegate = TFLGpuDelegateCreate(nullptr);
|
||||||
interpreter->SetAllowBufferHandleOutput(true); // disable default gpu->cpu copy
|
interpreter->SetAllowBufferHandleOutput(true); // disable default gpu->cpu copy
|
||||||
if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->inputs()[0], user_provided_input_buffer)) return false;
|
if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->inputs()[0], user_provided_input_buffer)) return false;
|
||||||
if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->outputs()[0], user_provided_output_buffer)) return false;
|
if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->outputs()[0], user_provided_output_buffer)) return false;
|
||||||
|
Loading…
Reference in New Issue
Block a user