Enable quantized models by default on iOS APIs. Also makes related changes to docs.
PiperOrigin-RevId: 337876402 Change-Id: I7abb19894297cfe2781997b3e3e3ba4074fbf7e4
This commit is contained in:
parent
f7f73a1ff8
commit
1dd24b74c2
@ -47,10 +47,15 @@ typedef struct {
|
|||||||
bool allow_precision_loss;
|
bool allow_precision_loss;
|
||||||
TFLGpuDelegateWaitType wait_type;
|
TFLGpuDelegateWaitType wait_type;
|
||||||
// Allows execution of integer quantized models
|
// Allows execution of integer quantized models
|
||||||
// TODO(b/169350710): Enable by default.
|
|
||||||
bool enable_quantization;
|
bool enable_quantization;
|
||||||
} TFLGpuDelegateOptions;
|
} TFLGpuDelegateOptions;
|
||||||
|
|
||||||
|
// Populates TFLGpuDelegateOptions as follows:
|
||||||
|
// allow_precision_loss = false;
|
||||||
|
// wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive;
|
||||||
|
// enable_quantization = true;
|
||||||
|
TFL_CAPI_EXPORT extern TFLGpuDelegateOptions TFLGpuDelegateOptionsDefault(void);
|
||||||
|
|
||||||
// Creates a new delegate instance that need to be destroyed with
|
// Creates a new delegate instance that need to be destroyed with
|
||||||
// `TFLDeleteTfLiteGpuDelegate` when delegate is no longer used by TFLite.
|
// `TFLDeleteTfLiteGpuDelegate` when delegate is no longer used by TFLite.
|
||||||
// When `options` is set to `nullptr`, the following default values are used:
|
// When `options` is set to `nullptr`, the following default values are used:
|
||||||
|
@ -177,10 +177,7 @@ class Delegate {
|
|||||||
if (options) {
|
if (options) {
|
||||||
options_ = *options;
|
options_ = *options;
|
||||||
} else {
|
} else {
|
||||||
// Default options.
|
options_ = TFLGpuDelegateOptionsDefault();
|
||||||
options_.allow_precision_loss = false;
|
|
||||||
options_.enable_quantization = false;
|
|
||||||
options_.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive;
|
|
||||||
}
|
}
|
||||||
metal_device_ = MTLCreateSystemDefaultDevice();
|
metal_device_ = MTLCreateSystemDefaultDevice();
|
||||||
command_queue_ = [metal_device_ newCommandQueue];
|
command_queue_ = [metal_device_ newCommandQueue];
|
||||||
@ -732,3 +729,12 @@ bool TFLGpuDelegateSetCommandEncoder(
|
|||||||
metal_delegate->SetCommandEncoder(encoder, control_encoder);
|
metal_delegate->SetCommandEncoder(encoder, control_encoder);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TFLGpuDelegateOptions TFLGpuDelegateOptionsDefault() {
|
||||||
|
TFLGpuDelegateOptions options = {
|
||||||
|
.allow_precision_loss = false,
|
||||||
|
.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive,
|
||||||
|
.enable_quantization = true,
|
||||||
|
};
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
@ -57,7 +57,7 @@ typedef NS_ENUM(NSUInteger, TFLMetalDelegateThreadWaitType) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default is
|
* Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default is
|
||||||
* `false`.
|
* `true`.
|
||||||
*/
|
*/
|
||||||
@property(nonatomic, getter=isQuantizationEnabled) BOOL quantizationEnabled;
|
@property(nonatomic, getter=isQuantizationEnabled) BOOL quantizationEnabled;
|
||||||
|
|
||||||
|
@ -29,6 +29,7 @@ NS_ASSUME_NONNULL_BEGIN
|
|||||||
- (instancetype)init {
|
- (instancetype)init {
|
||||||
self = [super init];
|
self = [super init];
|
||||||
if (self != nil) {
|
if (self != nil) {
|
||||||
|
_quantizationEnabled = true;
|
||||||
_waitType = TFLMetalDelegateThreadWaitTypePassive;
|
_waitType = TFLMetalDelegateThreadWaitTypePassive;
|
||||||
}
|
}
|
||||||
return self;
|
return self;
|
||||||
|
@ -62,8 +62,8 @@ extension MetalDelegate {
|
|||||||
public var waitType: ThreadWaitType = .passive
|
public var waitType: ThreadWaitType = .passive
|
||||||
|
|
||||||
/// Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default
|
/// Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default
|
||||||
/// is `false`.
|
/// is `true`.
|
||||||
public var isQuantizationEnabled = false
|
public var isQuantizationEnabled = true
|
||||||
|
|
||||||
/// Creates a new instance with the default values.
|
/// Creates a new instance with the default values.
|
||||||
public init() {}
|
public init() {}
|
||||||
|
@ -52,8 +52,7 @@ Pod::Spec.new do |s|
|
|||||||
metal.test_spec 'Tests' do |ts|
|
metal.test_spec 'Tests' do |ts|
|
||||||
ts.source_files = swift_dir + 'Tests/{Interpreter,MetalDelegate}Tests.swift'
|
ts.source_files = swift_dir + 'Tests/{Interpreter,MetalDelegate}Tests.swift'
|
||||||
ts.resources = [
|
ts.resources = [
|
||||||
tfl_dir + 'testdata/add.bin',
|
tfl_dir + 'testdata/multi_add.bin',
|
||||||
tfl_dir + 'testdata/add_quantized.bin',
|
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -34,18 +34,26 @@ class MetalDelegateTests: XCTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func testInitInterpreterWithDelegate() throws {
|
func testInitInterpreterWithDelegate() throws {
|
||||||
|
// If metal device is not available, skip.
|
||||||
|
if MTLCreateSystemDefaultDevice() == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
let metalDelegate = MetalDelegate()
|
let metalDelegate = MetalDelegate()
|
||||||
let interpreter = try Interpreter(modelPath: AddQuantizedModel.path, delegates: [metalDelegate])
|
let interpreter = try Interpreter(modelPath: MultiAddModel.path, delegates: [metalDelegate])
|
||||||
XCTAssertEqual(interpreter.delegates?.count, 1)
|
XCTAssertEqual(interpreter.delegates?.count, 1)
|
||||||
XCTAssertNil(interpreter.options)
|
XCTAssertNil(interpreter.options)
|
||||||
}
|
}
|
||||||
|
|
||||||
func testInitInterpreterWithOptionsAndDelegate() throws {
|
func testInitInterpreterWithOptionsAndDelegate() throws {
|
||||||
|
// If metal device is not available, skip.
|
||||||
|
if MTLCreateSystemDefaultDevice() == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
var options = Interpreter.Options()
|
var options = Interpreter.Options()
|
||||||
options.threadCount = 1
|
options.threadCount = 1
|
||||||
let metalDelegate = MetalDelegate()
|
let metalDelegate = MetalDelegate()
|
||||||
let interpreter = try Interpreter(
|
let interpreter = try Interpreter(
|
||||||
modelPath: AddQuantizedModel.path,
|
modelPath: MultiAddModel.path,
|
||||||
options: options,
|
options: options,
|
||||||
delegates: [metalDelegate]
|
delegates: [metalDelegate]
|
||||||
)
|
)
|
||||||
@ -91,3 +99,16 @@ class MetalDelegateOptionsTests: XCTestCase {
|
|||||||
XCTAssertNotEqual(options1, options2)
|
XCTAssertNotEqual(options1, options2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Values for the `multi_add.bin` model.
|
||||||
|
enum MultiAddModel {
|
||||||
|
static let info = (name: "multi_add", extension: "bin")
|
||||||
|
|
||||||
|
static var path: String = {
|
||||||
|
let bundle = Bundle(for: MetalDelegateTests.self)
|
||||||
|
guard let path = bundle.path(forResource: info.name, ofType: info.extension) else { return "" }
|
||||||
|
return path
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -12,11 +12,9 @@ resulting in lower latency. In the best scenario, inference on the GPU may now
|
|||||||
run fast enough for previously not available real-time applications.
|
run fast enough for previously not available real-time applications.
|
||||||
|
|
||||||
Unlike CPUs, GPUs compute with 16-bit or 32-bit floating point numbers and do
|
Unlike CPUs, GPUs compute with 16-bit or 32-bit floating point numbers and do
|
||||||
not require quantization for optimal performance.
|
not require quantization for optimal performance. The delegate does accept 8-bit
|
||||||
|
quantized models, but the calculation will be performed in floating point
|
||||||
**NOTE:** The delegate does accept 8-bit quantized models on Android. Support on
|
numbers. Refer to the [advanced documentation](gpu_advanced.md) for details.
|
||||||
iOS is experimental. Refer to the [advanced documentation](gpu_advanced.md) for
|
|
||||||
details.
|
|
||||||
|
|
||||||
Another benefit with GPU inference is its power efficiency. GPUs carry out the
|
Another benefit with GPU inference is its power efficiency. GPUs carry out the
|
||||||
computations in a very efficient and optimized manner, so that they consume less
|
computations in a very efficient and optimized manner, so that they consume less
|
||||||
|
@ -325,7 +325,6 @@ Android APIs support quantized models by default. To disable, do the following:
|
|||||||
**C++ API**
|
**C++ API**
|
||||||
|
|
||||||
```c++
|
```c++
|
||||||
// NEW: Prepare custom options with feature enabled.
|
|
||||||
TfLiteGpuDelegateOptionsV2 options = TfLiteGpuDelegateOptionsV2Default();
|
TfLiteGpuDelegateOptionsV2 options = TfLiteGpuDelegateOptionsV2Default();
|
||||||
options.experimental_flags = TFLITE_GPU_EXPERIMENTAL_FLAGS_NONE;
|
options.experimental_flags = TFLITE_GPU_EXPERIMENTAL_FLAGS_NONE;
|
||||||
|
|
||||||
@ -336,7 +335,6 @@ if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) return false;
|
|||||||
**Java API**
|
**Java API**
|
||||||
|
|
||||||
```java
|
```java
|
||||||
// NEW: Prepare GPU delegate with feature turned on.
|
|
||||||
GpuDelegate delegate = new GpuDelegate(new GpuDelegate.Options().setQuantizedModelsAllowed(false));
|
GpuDelegate delegate = new GpuDelegate(new GpuDelegate.Options().setQuantizedModelsAllowed(false));
|
||||||
|
|
||||||
Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
|
Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
|
||||||
@ -344,27 +342,21 @@ Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
|
|||||||
|
|
||||||
#### iOS
|
#### iOS
|
||||||
|
|
||||||
Support for quantized models on iOS APIs is experimental. To enable, do the
|
iOD APIs support quantized models by default. To disable, do the following:
|
||||||
following:
|
|
||||||
|
|
||||||
**Swift API**
|
**Swift API**
|
||||||
|
|
||||||
```swift
|
```swift
|
||||||
// NEW: Prepare custom options with feature enabled.
|
|
||||||
var options = MetalDelegate.Options()
|
var options = MetalDelegate.Options()
|
||||||
options.isQuantizationEnabled = true
|
options.isQuantizationEnabled = false
|
||||||
let delegate = MetalDelegate(options: options)
|
let delegate = MetalDelegate(options: options)
|
||||||
```
|
```
|
||||||
|
|
||||||
**C API (also used for Objective-C)**
|
**C API (also used for Objective-C)**
|
||||||
|
|
||||||
```c
|
```c
|
||||||
|
TFLGpuDelegateOptions options = TFLGpuDelegateOptionsDefault();
|
||||||
// THIS:
|
options.enable_quantization = false;
|
||||||
// NEW: Prepare custom options with feature enabled.
|
|
||||||
const TFLGpuDelegateOptions options = {
|
|
||||||
.enable_quantization = true,
|
|
||||||
};
|
|
||||||
|
|
||||||
auto* delegate = TFLGpuDelegateCreate(options);
|
auto* delegate = TFLGpuDelegateCreate(options);
|
||||||
```
|
```
|
||||||
|
Loading…
x
Reference in New Issue
Block a user