Enable quantized models by default on iOS APIs. Also makes related changes to docs.

PiperOrigin-RevId: 337876402
Change-Id: I7abb19894297cfe2781997b3e3e3ba4074fbf7e4
This commit is contained in:
Taehee Jeong 2020-10-19 09:50:16 -07:00 committed by TensorFlower Gardener
parent f7f73a1ff8
commit 1dd24b74c2
9 changed files with 51 additions and 29 deletions

View File

@ -47,10 +47,15 @@ typedef struct {
bool allow_precision_loss;
TFLGpuDelegateWaitType wait_type;
// Allows execution of integer quantized models
// TODO(b/169350710): Enable by default.
bool enable_quantization;
} TFLGpuDelegateOptions;
// Populates TFLGpuDelegateOptions as follows:
// allow_precision_loss = false;
// wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive;
// enable_quantization = true;
TFL_CAPI_EXPORT extern TFLGpuDelegateOptions TFLGpuDelegateOptionsDefault(void);
// Creates a new delegate instance that need to be destroyed with
// `TFLDeleteTfLiteGpuDelegate` when delegate is no longer used by TFLite.
// When `options` is set to `nullptr`, the following default values are used:

View File

@ -177,10 +177,7 @@ class Delegate {
if (options) {
options_ = *options;
} else {
// Default options.
options_.allow_precision_loss = false;
options_.enable_quantization = false;
options_.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive;
options_ = TFLGpuDelegateOptionsDefault();
}
metal_device_ = MTLCreateSystemDefaultDevice();
command_queue_ = [metal_device_ newCommandQueue];
@ -732,3 +729,12 @@ bool TFLGpuDelegateSetCommandEncoder(
metal_delegate->SetCommandEncoder(encoder, control_encoder);
return true;
}
TFLGpuDelegateOptions TFLGpuDelegateOptionsDefault() {
TFLGpuDelegateOptions options = {
.allow_precision_loss = false,
.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive,
.enable_quantization = true,
};
return options;
}

View File

@ -57,7 +57,7 @@ typedef NS_ENUM(NSUInteger, TFLMetalDelegateThreadWaitType) {
/**
* Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default is
* `false`.
* `true`.
*/
@property(nonatomic, getter=isQuantizationEnabled) BOOL quantizationEnabled;

View File

@ -29,6 +29,7 @@ NS_ASSUME_NONNULL_BEGIN
- (instancetype)init {
self = [super init];
if (self != nil) {
_quantizationEnabled = true;
_waitType = TFLMetalDelegateThreadWaitTypePassive;
}
return self;

View File

@ -62,8 +62,8 @@ extension MetalDelegate {
public var waitType: ThreadWaitType = .passive
/// Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default
/// is `false`.
public var isQuantizationEnabled = false
/// is `true`.
public var isQuantizationEnabled = true
/// Creates a new instance with the default values.
public init() {}

View File

@ -52,8 +52,7 @@ Pod::Spec.new do |s|
metal.test_spec 'Tests' do |ts|
ts.source_files = swift_dir + 'Tests/{Interpreter,MetalDelegate}Tests.swift'
ts.resources = [
tfl_dir + 'testdata/add.bin',
tfl_dir + 'testdata/add_quantized.bin',
tfl_dir + 'testdata/multi_add.bin',
]
end
end

View File

@ -34,18 +34,26 @@ class MetalDelegateTests: XCTestCase {
}
func testInitInterpreterWithDelegate() throws {
// If metal device is not available, skip.
if MTLCreateSystemDefaultDevice() == nil {
return
}
let metalDelegate = MetalDelegate()
let interpreter = try Interpreter(modelPath: AddQuantizedModel.path, delegates: [metalDelegate])
let interpreter = try Interpreter(modelPath: MultiAddModel.path, delegates: [metalDelegate])
XCTAssertEqual(interpreter.delegates?.count, 1)
XCTAssertNil(interpreter.options)
}
func testInitInterpreterWithOptionsAndDelegate() throws {
// If metal device is not available, skip.
if MTLCreateSystemDefaultDevice() == nil {
return
}
var options = Interpreter.Options()
options.threadCount = 1
let metalDelegate = MetalDelegate()
let interpreter = try Interpreter(
modelPath: AddQuantizedModel.path,
modelPath: MultiAddModel.path,
options: options,
delegates: [metalDelegate]
)
@ -91,3 +99,16 @@ class MetalDelegateOptionsTests: XCTestCase {
XCTAssertNotEqual(options1, options2)
}
}
/// Values for the `multi_add.bin` model.
enum MultiAddModel {
static let info = (name: "multi_add", extension: "bin")
static var path: String = {
let bundle = Bundle(for: MetalDelegateTests.self)
guard let path = bundle.path(forResource: info.name, ofType: info.extension) else { return "" }
return path
}()
}

View File

@ -12,11 +12,9 @@ resulting in lower latency. In the best scenario, inference on the GPU may now
run fast enough for previously not available real-time applications.
Unlike CPUs, GPUs compute with 16-bit or 32-bit floating point numbers and do
not require quantization for optimal performance.
**NOTE:** The delegate does accept 8-bit quantized models on Android. Support on
iOS is experimental. Refer to the [advanced documentation](gpu_advanced.md) for
details.
not require quantization for optimal performance. The delegate does accept 8-bit
quantized models, but the calculation will be performed in floating point
numbers. Refer to the [advanced documentation](gpu_advanced.md) for details.
Another benefit with GPU inference is its power efficiency. GPUs carry out the
computations in a very efficient and optimized manner, so that they consume less

View File

@ -325,7 +325,6 @@ Android APIs support quantized models by default. To disable, do the following:
**C++ API**
```c++
// NEW: Prepare custom options with feature enabled.
TfLiteGpuDelegateOptionsV2 options = TfLiteGpuDelegateOptionsV2Default();
options.experimental_flags = TFLITE_GPU_EXPERIMENTAL_FLAGS_NONE;
@ -336,7 +335,6 @@ if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) return false;
**Java API**
```java
// NEW: Prepare GPU delegate with feature turned on.
GpuDelegate delegate = new GpuDelegate(new GpuDelegate.Options().setQuantizedModelsAllowed(false));
Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
@ -344,27 +342,21 @@ Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
#### iOS
Support for quantized models on iOS APIs is experimental. To enable, do the
following:
iOD APIs support quantized models by default. To disable, do the following:
**Swift API**
```swift
// NEW: Prepare custom options with feature enabled.
var options = MetalDelegate.Options()
options.isQuantizationEnabled = true
options.isQuantizationEnabled = false
let delegate = MetalDelegate(options: options)
```
**C API (also used for Objective-C)**
```c
// THIS:
// NEW: Prepare custom options with feature enabled.
const TFLGpuDelegateOptions options = {
.enable_quantization = true,
};
TFLGpuDelegateOptions options = TFLGpuDelegateOptionsDefault();
options.enable_quantization = false;
auto* delegate = TFLGpuDelegateCreate(options);
```