Add quantization option to Metal delegate Swift API

Also updated outdated GPU delegate documentation. PiperOrigin-RevId: 315469909 Change-Id: I7b524373a397763c886905e83a2e8b75226d9471
2020-06-09 06:17:36 -07:00 · 2020-06-09 06:17:36 -07:00 · 7c1b0d0a37
commit 7c1b0d0a37
parent c254833717
3 changed files with 119 additions and 46 deletions
--- a/tensorflow/lite/experimental/swift/Sources/MetalDelegate.swift
+++ b/tensorflow/lite/experimental/swift/Sources/MetalDelegate.swift
@ -35,6 +35,7 @@ public final class MetalDelegate: Delegate {
    var delegateOptions = TFLGpuDelegateOptions()
    delegateOptions.allow_precision_loss = options.allowsPrecisionLoss
    delegateOptions.wait_type = options.waitType.cWaitType
    delegateOptions.enable_quantization = options.isQuantizationEnabled
    cDelegate = TFLGpuDelegateCreate(&delegateOptions)
  }
@ -54,6 +55,10 @@ extension MetalDelegate {
    /// default is `passive`.
    public var waitType: ThreadWaitType = .passive
    /// Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default
    /// is `false`.
    public var isQuantizationEnabled = false
    /// Creates a new instance with the default values.
    public init() {}
  }
--- a/tensorflow/lite/g3doc/performance/gpu.md
+++ b/tensorflow/lite/g3doc/performance/gpu.md
@ -179,6 +179,28 @@ delegate.close();
 ### iOS
 #### Swift
 Initialize TensorFlow Lite interpreter with the GPU delegate.
 ```swift
 import TensorFlowLite
 // Load model ...
 let delegate = MetalDelegate()
 if let interpreter = try Interpreter(modelPath: modelPath,
                                     delegates: [delegate]) {
  // Run inference ...
 }
 ```
 #### Objective-C
 Note: For Objective-C, GPU delegate is provided via C API.
 In your application code, include the GPU delegate header and call the
 `Interpreter::ModifyGraphWithDelegate` function to register the GPU delegate to
 the interpreter:
--- a/tensorflow/lite/g3doc/performance/gpu_advanced.md
+++ b/tensorflow/lite/g3doc/performance/gpu_advanced.md
@ -126,10 +126,28 @@ bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:gl_deleg
 bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_gl.so  # for dynamic library
 ```
-### iOS (ObjC++)
+### iOS (Swift)
-To use TensorFlow Lite on GPU, get the GPU delegate via `NewGpuDelegate()` and
+Initialize TensorFlow Lite interpreter with the GPU delegate.
-then pass it to `Interpreter::ModifyGraphWithDelegate()` (instead of calling
+
 ```swift
 import TensorFlowLite
 let delegate = MetalDelegate()
 if let interpreter = try Interpreter(modelPath: modelPath,
                                     delegates: [delegate]) {
  // Run inference ...
 }
 ```
 ### iOS (Objective-C)
 Note: For Objective-C, GPU delegate is provided via C API.
 To use TensorFlow Lite on GPU, get the GPU delegate via `TFLGpuDelegateCreate()`
 and then pass it to `Interpreter::ModifyGraphWithDelegate()` (instead of calling
 `Interpreter::AllocateTensors()`).
 ```c++
@ -142,12 +160,7 @@ InterpreterBuilder(*model, op_resolver)(&interpreter);
 // NEW: Prepare GPU delegate.
-const GpuDelegateOptions options = {
+auto* delegate = TFLGpuDelegateCreate(/*default options=*/nullptr);
  .allow_precision_loss = false,
  .wait_type = kGpuDelegateOptions::WaitType::Passive,
 };
 auto* delegate = NewGpuDelegate(options);
 if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) return false;
 // Run inference.
@ -156,7 +169,7 @@ if (interpreter->Invoke() != kTfLiteOk) return false;
 ReadFromOutputTensor(interpreter->typed_output_tensor<float>(0));
 // Clean up.
-DeleteGpuDelegate(delegate);
+TFLGpuDelegateDelete(delegate);
 ```
 Note: When calling `Interpreter::ModifyGraphWithDelegate()` or
@ -169,7 +182,54 @@ called.
 ## Advanced usage
-### Running quantized models (Experimental, Android only)
+### Delegate Options for iOS
 `TFLGpuDelegateCreate()` accepts a `struct` of options.
 ([C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/metal_delegate.h),
 [Swift API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/swift/Sources/MetalDelegate.swift))
 Passing `nullptr`(C API) or nothing (Swift API) to the initializer sets the
 default options (which are explicated in the Basic Usage example above).
 **Swift API**
 ```swift
 // THIS:
 var options = MetalDelegate.Options()
 options.allowsPrecisionLoss = false
 options.waitType = .passive
 options.isQuantizationEnabled = false
 let delegate = MetalDelegate(options: options)
 // IS THE SAME AS THIS:
 let delegate = MetalDelegate()
 ```
 **C API (also used for Objective-C)**
 ```c++
 // THIS:
 const TFLGpuDelegateOptions options = {
  .allow_precision_loss = false,
  .wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive,
  .enable_quantization = false,
 };
 auto* delegate = TFLGpuDelegateCreate(options);
 // IS THE SAME AS THIS:
 auto* delegate = TFLGpuDelegateCreate(nullptr);
 ```
 While it is convenient to use `nullptr`, we recommend that you explicitly set
 the options, to avoid any unexpected behavior if default values are changed in
 the future.
 ### Running quantized models (Experimental)
 The GPU delegate already supports
 [float16 quantized](https://www.tensorflow.org/lite/performance/post_training_float16_quant)
@ -186,6 +246,8 @@ tensors.
 This feature can be enabled using delegate options as follows:
 #### Android
 **C++ API**
 ```c++
@ -206,51 +268,30 @@ GpuDelegate delegate = new GpuDelegate(new GpuDelegate.Options().setQuantizedMod
 Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate);
 ```
-### Delegate Options for iOS
+#### iOS
-`NewGpuDelegate()` accepts a `struct` of options.
+**Swift API**
-```c++
+```swift
-struct GpuDelegateOptions {
+// NEW: Prepare custom options with feature enabled.
-  // Allows to quantify tensors, downcast values, process in float16 etc.
+var options = MetalDelegate.Options()
-  bool allow_precision_loss;
+options.isQuantizationEnabled = true
-
+let delegate = MetalDelegate(options: options)
  enum class WaitType {
    // waitUntilCompleted
    kPassive,
    // Minimize latency. It uses active spinning instead of mutex and consumes
    // additional CPU resources.
    kActive,
    // Useful when the output is used with GPU pipeline then or if external
    // command encoder is set
    kDoNotWait,
  };
  WaitType wait_type;
 };
 ```
-Passing `nullptr` into `NewGpuDelegate()` sets the default options (which are
+**C API (also used for Objective-C)**
 explicated in the Basic Usage example above).
-```c++
+```c
 // THIS:
-const GpuDelegateOptions options = {
+// NEW: Prepare custom options with feature enabled.
-  .allow_precision_loss = false,
+const TFLGpuDelegateOptions options = {
-  .wait_type = kGpuDelegateOptions::WaitType::Passive,
+  .enable_quantization = true,
 };
-auto* delegate = NewGpuDelegate(options);
+auto* delegate = TFLGpuDelegateCreate(options);
 // IS THE SAME AS THIS:
 auto* delegate = NewGpuDelegate(nullptr);
 ```
 While it is convenient to use `nullptr`, we recommend that you explicitly set
 the options, to avoid any unexpected behavior if default values are changed in
 the future.
 ### Input/Output Buffers (iOS only)
 To do computation on the GPU, data must be made available to the GPU. This often
@ -280,8 +321,13 @@ off by calling `Interpreter::SetAllowBufferHandleOutput(true)` during
 initialization.
 ```c++
 #include "tensorflow/lite/delegates/gpu/metal_delegate.h"
 #include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h"
 // ...
 // Prepare GPU delegate.
-auto* delegate = NewGpuDelegate(nullptr);
+auto* delegate = TFLGpuDelegateCreate(nullptr);
 interpreter->SetAllowBufferHandleOutput(true);  // disable default gpu->cpu copy
 if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->inputs()[0], user_provided_input_buffer)) return false;
 if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->outputs()[0], user_provided_output_buffer)) return false;