From 251a4740969ed724f07c0fb5cde86fcf624d95ec Mon Sep 17 00:00:00 2001
From: Taehee Jeong <taeheej@google.com>
Date: Mon, 8 Jun 2020 04:11:43 -0700
Subject: [PATCH] Support benchmark of quantized models in model benchmark

Reformat benchmark documentation to make GPU delegate options for Android and iOS be seen clearly.

PiperOrigin-RevId: 315248409
Change-Id: Iac19e8afdcf9414496f961b836fc510883a77757
---
 tensorflow/lite/tools/delegates/README.md     | 23 +++++++++++--------
 .../tools/delegates/gpu_delegate_provider.cc  | 16 +++++++------
 2 files changed, 23 insertions(+), 16 deletions(-)
diff --git a/tensorflow/lite/tools/delegates/README.md b/tensorflow/lite/tools/delegates/README.md
index bc1bffd49b6..ed583cce070 100644
--- a/tensorflow/lite/tools/delegates/README.md
+++ b/tensorflow/lite/tools/delegates/README.md
@@ -31,26 +31,31 @@ TFLite delegate.
     This option is currently supported by the Hexagon and CoreML delegate.
 
 ### GPU delegate provider
+
+Only Android and iOS devices support GPU delegate.
+
+#### Common options
 *   `use_gpu`: `bool` (default=false) \
     Whether to use the
     [GPU accelerator delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/gpu).
-    This option is currently only available on Android and iOS devices.
 *   `gpu_precision_loss_allowed`: `bool` (default=true) \
-    Whethre to allow the GPU delegate to carry out computation with some
+    Whether to allow the GPU delegate to carry out computation with some
     precision loss (i.e. processing in FP16) or not. If allowed, the performance
     will increase.
 *   `gpu_experimental_enable_quant`: `bool` (default=true) \
-    Whether to allow the GPU delegate to run a quantized model or not. \
-    This option is currently only available on Android.
+    Whether to allow the GPU delegate to run a 8-bit quantized model or not.
+
+#### Android options
 *  `gpu_backend`: `string` (default="") \
     Force the GPU delegate to use a particular backend for execution, and fail
     if unsuccessful. Should be one of: cl, gl. By default, the GPU delegate will
-    try OpenCL first and then OpenGL if the former fails.\
-    Note this option is only available on Android.
+    try OpenCL first and then OpenGL if the former fails.
+
+#### iOS options
 *   `gpu_wait_type`: `string` (default="") \
-    Which GPU wait_type option to use, when using GPU delegate on iOS. Should be
-    one of the following: passive, active, do_not_wait, aggressive. When left
-    blank, passive mode is used by default.
+    Which GPU wait_type option to use. Should be one of the following: passive,
+    active, do_not_wait, aggressive. When left blank, passive mode is used by
+    default.
 
 ### NNAPI delegate provider
 *   `use_nnapi`: `bool` (default=false) \
diff --git a/tensorflow/lite/tools/delegates/gpu_delegate_provider.cc b/tensorflow/lite/tools/delegates/gpu_delegate_provider.cc
index 62805b2644b..8dc46dcaaac 100644
--- a/tensorflow/lite/tools/delegates/gpu_delegate_provider.cc
+++ b/tensorflow/lite/tools/delegates/gpu_delegate_provider.cc
@@ -37,10 +37,10 @@ class GpuDelegateProvider : public DelegateProvider {
 #if defined(__ANDROID__) || defined(REAL_IPHONE_DEVICE)
     default_params_.AddParam("gpu_precision_loss_allowed",
                              ToolParam::Create<bool>(true));
-#endif
-#if defined(__ANDROID__)
     default_params_.AddParam("gpu_experimental_enable_quant",
                              ToolParam::Create<bool>(true));
+#endif
+#if defined(__ANDROID__)
     default_params_.AddParam("gpu_backend", ToolParam::Create<std::string>(""));
 #endif
 #if defined(REAL_IPHONE_DEVICE)
@@ -66,11 +66,11 @@ std::vector<Flag> GpuDelegateProvider::CreateFlags(ToolParams* params) const {
     CreateFlag<bool>("gpu_precision_loss_allowed", params,
                      "Allow to process computation in lower precision than "
                      "FP32 in GPU. By default, it's enabled."),
-#endif
-#if defined(__ANDROID__)
     CreateFlag<bool>("gpu_experimental_enable_quant", params,
                      "Whether to enable the GPU delegate to run quantized "
-                     "models or not. By default, it's disabled."),
+                     "models or not. By default, it's enabled."),
+#endif
+#if defined(__ANDROID__)
     CreateFlag<std::string>(
         "gpu_backend", params,
         "Force the GPU delegate to use a particular backend for execution, and "
@@ -91,10 +91,10 @@ void GpuDelegateProvider::LogParams(const ToolParams& params) const {
 #if defined(__ANDROID__) || defined(REAL_IPHONE_DEVICE)
   TFLITE_LOG(INFO) << "Allow lower precision in gpu : ["
                    << params.Get<bool>("gpu_precision_loss_allowed") << "]";
-#endif
-#if defined(__ANDROID__)
   TFLITE_LOG(INFO) << "Enable running quant models in gpu : ["
                    << params.Get<bool>("gpu_experimental_enable_quant") << "]";
+#endif
+#if defined(__ANDROID__)
   TFLITE_LOG(INFO) << "GPU backend : ["
                    << params.Get<std::string>("gpu_backend") << "]";
 #endif
@@ -136,6 +136,8 @@ TfLiteDelegatePtr GpuDelegateProvider::CreateTfLiteDelegate(
     TFLGpuDelegateOptions gpu_opts = {0};
     gpu_opts.allow_precision_loss =
         params.Get<bool>("gpu_precision_loss_allowed");
+    gpu_opts.enable_quantization =
+        params.Get<bool>("gpu_experimental_enable_quant");
 
     std::string string_gpu_wait_type = params.Get<std::string>("gpu_wait_type");
     if (!string_gpu_wait_type.empty()) {