Support benchmark of quantized models in model benchmark

Reformat benchmark documentation to make GPU delegate options for Android and iOS be seen clearly. PiperOrigin-RevId: 315248409 Change-Id: Iac19e8afdcf9414496f961b836fc510883a77757
2020-06-08 04:11:43 -07:00 · 2020-06-08 04:11:43 -07:00 · 251a474096
commit 251a474096
parent dab7b46024
2 changed files with 23 additions and 16 deletions
--- a/tensorflow/lite/tools/delegates/README.md
+++ b/tensorflow/lite/tools/delegates/README.md
@ -31,26 +31,31 @@ TFLite delegate.
    This option is currently supported by the Hexagon and CoreML delegate.

 ### GPU delegate provider
+
+Only Android and iOS devices support GPU delegate.
+
+#### Common options
 *   `use_gpu`: `bool` (default=false) \
    Whether to use the
    [GPU accelerator delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/gpu).
-    This option is currently only available on Android and iOS devices.
 *   `gpu_precision_loss_allowed`: `bool` (default=true) \
-    Whethre to allow the GPU delegate to carry out computation with some
+    Whether to allow the GPU delegate to carry out computation with some
    precision loss (i.e. processing in FP16) or not. If allowed, the performance
    will increase.
 *   `gpu_experimental_enable_quant`: `bool` (default=true) \
-    Whether to allow the GPU delegate to run a quantized model or not. \
-    This option is currently only available on Android.
+    Whether to allow the GPU delegate to run a 8-bit quantized model or not.
+
+#### Android options
 *  `gpu_backend`: `string` (default="") \
    Force the GPU delegate to use a particular backend for execution, and fail
    if unsuccessful. Should be one of: cl, gl. By default, the GPU delegate will
-    try OpenCL first and then OpenGL if the former fails.\
-    Note this option is only available on Android.
+    try OpenCL first and then OpenGL if the former fails.
+
+#### iOS options
 *   `gpu_wait_type`: `string` (default="") \
-    Which GPU wait_type option to use, when using GPU delegate on iOS. Should be
-    one of the following: passive, active, do_not_wait, aggressive. When left
-    blank, passive mode is used by default.
+    Which GPU wait_type option to use. Should be one of the following: passive,
+    active, do_not_wait, aggressive. When left blank, passive mode is used by
+    default.

 ### NNAPI delegate provider
 *   `use_nnapi`: `bool` (default=false) \
--- a/tensorflow/lite/tools/delegates/gpu_delegate_provider.cc
+++ b/tensorflow/lite/tools/delegates/gpu_delegate_provider.cc
@ -37,10 +37,10 @@ class GpuDelegateProvider : public DelegateProvider {
 #if defined(__ANDROID__) || defined(REAL_IPHONE_DEVICE)
    default_params_.AddParam("gpu_precision_loss_allowed",
                             ToolParam::Create<bool>(true));
-#endif
-#if defined(__ANDROID__)
    default_params_.AddParam("gpu_experimental_enable_quant",
                             ToolParam::Create<bool>(true));
+#endif
+#if defined(__ANDROID__)
    default_params_.AddParam("gpu_backend", ToolParam::Create<std::string>(""));
 #endif
 #if defined(REAL_IPHONE_DEVICE)
@ -66,11 +66,11 @@ std::vector<Flag> GpuDelegateProvider::CreateFlags(ToolParams* params) const {
    CreateFlag<bool>("gpu_precision_loss_allowed", params,
                     "Allow to process computation in lower precision than "
                     "FP32 in GPU. By default, it's enabled."),
-#endif
-#if defined(__ANDROID__)
    CreateFlag<bool>("gpu_experimental_enable_quant", params,
                     "Whether to enable the GPU delegate to run quantized "
-                     "models or not. By default, it's disabled."),
+                     "models or not. By default, it's enabled."),
+#endif
+#if defined(__ANDROID__)
    CreateFlag<std::string>(
        "gpu_backend", params,
        "Force the GPU delegate to use a particular backend for execution, and "
@ -91,10 +91,10 @@ void GpuDelegateProvider::LogParams(const ToolParams& params) const {
 #if defined(__ANDROID__) || defined(REAL_IPHONE_DEVICE)
  TFLITE_LOG(INFO) << "Allow lower precision in gpu : ["
                   << params.Get<bool>("gpu_precision_loss_allowed") << "]";
-#endif
-#if defined(__ANDROID__)
  TFLITE_LOG(INFO) << "Enable running quant models in gpu : ["
                   << params.Get<bool>("gpu_experimental_enable_quant") << "]";
+#endif
+#if defined(__ANDROID__)
  TFLITE_LOG(INFO) << "GPU backend : ["
                   << params.Get<std::string>("gpu_backend") << "]";
 #endif
@ -136,6 +136,8 @@ TfLiteDelegatePtr GpuDelegateProvider::CreateTfLiteDelegate(
    TFLGpuDelegateOptions gpu_opts = {0};
    gpu_opts.allow_precision_loss =
        params.Get<bool>("gpu_precision_loss_allowed");
+    gpu_opts.enable_quantization =
+        params.Get<bool>("gpu_experimental_enable_quant");

    std::string string_gpu_wait_type = params.Get<std::string>("gpu_wait_type");
    if (!string_gpu_wait_type.empty()) {