Enable quantized models by default on iOS APIs. Also makes related changes to docs.
PiperOrigin-RevId: 337876402 Change-Id: I7abb19894297cfe2781997b3e3e3ba4074fbf7e4
This commit is contained in:
parent
f7f73a1ff8
commit
1dd24b74c2
tensorflow/lite
delegates/gpu
experimental
objc
swift
g3doc/performance
@ -47,10 +47,15 @@ typedef struct {
|
||||
bool allow_precision_loss;
|
||||
TFLGpuDelegateWaitType wait_type;
|
||||
// Allows execution of integer quantized models
|
||||
// TODO(b/169350710): Enable by default.
|
||||
bool enable_quantization;
|
||||
} TFLGpuDelegateOptions;
|
||||
|
||||
// Populates TFLGpuDelegateOptions as follows:
|
||||
// allow_precision_loss = false;
|
||||
// wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive;
|
||||
// enable_quantization = true;
|
||||
TFL_CAPI_EXPORT extern TFLGpuDelegateOptions TFLGpuDelegateOptionsDefault(void);
|
||||
|
||||
// Creates a new delegate instance that need to be destroyed with
|
||||
// `TFLDeleteTfLiteGpuDelegate` when delegate is no longer used by TFLite.
|
||||
// When `options` is set to `nullptr`, the following default values are used:
|
||||
|
@ -177,10 +177,7 @@ class Delegate {
|
||||
if (options) {
|
||||
options_ = *options;
|
||||
} else {
|
||||
// Default options.
|
||||
options_.allow_precision_loss = false;
|
||||
options_.enable_quantization = false;
|
||||
options_.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive;
|
||||
options_ = TFLGpuDelegateOptionsDefault();
|
||||
}
|
||||
metal_device_ = MTLCreateSystemDefaultDevice();
|
||||
command_queue_ = [metal_device_ newCommandQueue];
|
||||
@ -732,3 +729,12 @@ bool TFLGpuDelegateSetCommandEncoder(
|
||||
metal_delegate->SetCommandEncoder(encoder, control_encoder);
|
||||
return true;
|
||||
}
|
||||
|
||||
TFLGpuDelegateOptions TFLGpuDelegateOptionsDefault() {
|
||||
TFLGpuDelegateOptions options = {
|
||||
.allow_precision_loss = false,
|
||||
.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive,
|
||||
.enable_quantization = true,
|
||||
};
|
||||
return options;
|
||||
}
|
||||
|
@ -57,7 +57,7 @@ typedef NS_ENUM(NSUInteger, TFLMetalDelegateThreadWaitType) {
|
||||
|
||||
/**
|
||||
* Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default is
|
||||
* `false`.
|
||||
* `true`.
|
||||
*/
|
||||
@property(nonatomic, getter=isQuantizationEnabled) BOOL quantizationEnabled;
|
||||
|
||||
|
@ -29,6 +29,7 @@ NS_ASSUME_NONNULL_BEGIN
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self != nil) {
|
||||
_quantizationEnabled = true;
|
||||
_waitType = TFLMetalDelegateThreadWaitTypePassive;
|
||||
}
|
||||
return self;
|
||||
|
@ -62,8 +62,8 @@ extension MetalDelegate {
|
||||
public var waitType: ThreadWaitType = .passive
|
||||
|
||||
/// Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default
|
||||
/// is `false`.
|
||||
public var isQuantizationEnabled = false
|
||||
/// is `true`.
|
||||
public var isQuantizationEnabled = true
|
||||
|
||||
/// Creates a new instance with the default values.
|
||||
public init() {}
|
||||
|
@ -52,8 +52,7 @@ Pod::Spec.new do |s|
|
||||
metal.test_spec 'Tests' do |ts|
|
||||
ts.source_files = swift_dir + 'Tests/{Interpreter,MetalDelegate}Tests.swift'
|
||||
ts.resources = [
|
||||
tfl_dir + 'testdata/add.bin',
|
||||
tfl_dir + 'testdata/add_quantized.bin',
|
||||
tfl_dir + 'testdata/multi_add.bin',
|
||||
]
|
||||
end
|
||||
end
|
||||
|
@ -34,18 +34,26 @@ class MetalDelegateTests: XCTestCase {
|
||||
}
|
||||
|
||||
func testInitInterpreterWithDelegate() throws {
|
||||
// If metal device is not available, skip.
|
||||
if MTLCreateSystemDefaultDevice() == nil {
|
||||
return
|
||||
}
|
||||
let metalDelegate = MetalDelegate()
|
||||
let interpreter = try Interpreter(modelPath: AddQuantizedModel.path, delegates: [metalDelegate])
|
||||
let interpreter = try Interpreter(modelPath: MultiAddModel.path, delegates: [metalDelegate])
|
||||
XCTAssertEqual(interpreter.delegates?.count, 1)
|
||||
XCTAssertNil(interpreter.options)
|
||||
}
|
||||
|
||||
func testInitInterpreterWithOptionsAndDelegate() throws {
|
||||
// If metal device is not available, skip.
|
||||
if MTLCreateSystemDefaultDevice() == nil {
|
||||
return
|
||||
}
|
||||
var options = Interpreter.Options()
|
||||
options.threadCount = 1
|
||||
let metalDelegate = MetalDelegate()
|
||||
let interpreter = try Interpreter(
|
||||
modelPath: AddQuantizedModel.path,
|
||||
modelPath: MultiAddModel.path,
|
||||
options: options,
|
||||
delegates: [metalDelegate]
|
||||
)
|
||||
@ -91,3 +99,16 @@ class MetalDelegateOptionsTests: XCTestCase {
|
||||
XCTAssertNotEqual(options1, options2)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Values for the `multi_add.bin` model.
|
||||
enum MultiAddModel {
|
||||
static let info = (name: "multi_add", extension: "bin")
|
||||
|
||||
static var path: String = {
|
||||
let bundle = Bundle(for: MetalDelegateTests.self)
|
||||
guard let path = bundle.path(forResource: info.name, ofType: info.extension) else { return "" }
|
||||
return path
|
||||
}()
|
||||
}
|
||||
|
||||
|
@ -12,11 +12,9 @@ resulting in lower latency. In the best scenario, inference on the GPU may now
|
||||
run fast enough for previously not available real-time applications.
|
||||
|
||||
Unlike CPUs, GPUs compute with 16-bit or 32-bit floating point numbers and do
|
||||
not require quantization for optimal performance.
|
||||
|
||||
**NOTE:** The delegate does accept 8-bit quantized models on Android. Support on
|
||||
iOS is experimental. Refer to the [advanced documentation](gpu_advanced.md) for
|
||||
details.
|
||||
not require quantization for optimal performance. The delegate does accept 8-bit
|
||||
quantized models, but the calculation will be performed in floating point
|
||||
numbers. Refer to the [advanced documentation](gpu_advanced.md) for details.
|
||||
|
||||
Another benefit with GPU inference is its power efficiency. GPUs carry out the
|
||||
computations in a very efficient and optimized manner, so that they consume less
|
||||
|
@ -325,7 +325,6 @@ Android APIs support quantized models by default. To disable, do the following:
|
||||
**C++ API**
|
||||
|
||||
```c++
|
||||
// NEW: Prepare custom options with feature enabled.
|
||||
TfLiteGpuDelegateOptionsV2 options = TfLiteGpuDelegateOptionsV2Default();
|
||||
options.experimental_flags = TFLITE_GPU_EXPERIMENTAL_FLAGS_NONE;
|
||||
|
||||
@ -336,7 +335,6 @@ if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) return false;
|
||||
**Java API**
|
||||
|
||||
```java
|
||||
// NEW: Prepare GPU delegate with feature turned on.
|
||||
GpuDelegate delegate = new GpuDelegate(new GpuDelegate.Options().setQuantizedModelsAllowed(false));
|
||||
|
||||
Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
|
||||
@ -344,27 +342,21 @@ Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
|
||||
|
||||
#### iOS
|
||||
|
||||
Support for quantized models on iOS APIs is experimental. To enable, do the
|
||||
following:
|
||||
iOD APIs support quantized models by default. To disable, do the following:
|
||||
|
||||
**Swift API**
|
||||
|
||||
```swift
|
||||
// NEW: Prepare custom options with feature enabled.
|
||||
var options = MetalDelegate.Options()
|
||||
options.isQuantizationEnabled = true
|
||||
options.isQuantizationEnabled = false
|
||||
let delegate = MetalDelegate(options: options)
|
||||
```
|
||||
|
||||
**C API (also used for Objective-C)**
|
||||
|
||||
```c
|
||||
|
||||
// THIS:
|
||||
// NEW: Prepare custom options with feature enabled.
|
||||
const TFLGpuDelegateOptions options = {
|
||||
.enable_quantization = true,
|
||||
};
|
||||
TFLGpuDelegateOptions options = TFLGpuDelegateOptionsDefault();
|
||||
options.enable_quantization = false;
|
||||
|
||||
auto* delegate = TFLGpuDelegateCreate(options);
|
||||
```
|
||||
|
Loading…
Reference in New Issue
Block a user