Support binding of Metal buffer to quantized models

When an input or output of quantized model is read from Metal buffer directly, correctly link the internal FP32 tensor to the buffer and disable dequantization/quantization for that input/output. PiperOrigin-RevId: 318978466 Change-Id: I052bf6c1daab6425e6ff66166221f36b075326e8
2020-06-30 01:36:33 -07:00 · 2020-06-30 01:36:33 -07:00 · 3c7a1d8827
commit 3c7a1d8827
parent 35b3d90361
2 changed files with 12 additions and 2 deletions
--- a/tensorflow/lite/delegates/gpu/metal_delegate.mm
+++ b/tensorflow/lite/delegates/gpu/metal_delegate.mm
@ -205,6 +205,14 @@ class Delegate {
  }

  absl::Status BindBufferToTensor(id<MTLBuffer> buffer, int tensor_index) {
+    // The tensor index is expected to be an input or output tensor of the interpreter.
+    // For quantized model, the buffer should be linked with their dequantized counterpart.
+    if (quant_conversion_map_.find(tensor_index) != quant_conversion_map_.end()) {
+      tensor_index = quant_conversion_map_[tensor_index];
+      // remove [dequantized tensor ID] -> [quantized tensor ID] mapping, to prevent extra
+      // dequant/quant on in/outputs.
+      quant_conversion_map_.erase(tensor_index);
+    }
    for (auto& input : graph_inputs_) {
      if (input.tensor_id == tensor_index) {
        input_output_buffers_[input.id] = buffer;
--- a/tensorflow/lite/delegates/gpu/metal_delegate_internal.h
+++ b/tensorflow/lite/delegates/gpu/metal_delegate_internal.h
@ -24,9 +24,11 @@ struct TfLiteDelegate;

 // Binds Metal buffer to an input or an output tensor in the initialized
 // delegate. Bound buffer should have sufficient storage to accommodate all
-// elements of a tensor. Returns non-zero on success, or zero otherwise.
+// elements of a tensor. For quantized model, the buffer is bound to internal
+// dequantized float32 tensor.
+// Returns non-zero on success, or zero otherwise.
 //
-// *** Must be called *before* `Interpreter::ModifyGraphWithDelegate`. ***
+// *** Must be called *after* `Interpreter::ModifyGraphWithDelegate`. ***
 bool TFLGpuDelegateBindMetalBufferToTensor(TfLiteDelegate* delegate,
                                           int tensor_index,
                                           id<MTLBuffer> metal_buffer);