Support benchmark of quantized models in model benchmark

Reformat benchmark documentation to make GPU delegate options for Android and iOS be seen clearly.

PiperOrigin-RevId: 315248409
Change-Id: Iac19e8afdcf9414496f961b836fc510883a77757
This commit is contained in:
Taehee Jeong 2020-06-08 04:11:43 -07:00 committed by TensorFlower Gardener
parent dab7b46024
commit 251a474096
2 changed files with 23 additions and 16 deletions

View File

@ -31,26 +31,31 @@ TFLite delegate.
This option is currently supported by the Hexagon and CoreML delegate.
### GPU delegate provider
Only Android and iOS devices support GPU delegate.
#### Common options
* `use_gpu`: `bool` (default=false) \
Whether to use the
[GPU accelerator delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/gpu).
This option is currently only available on Android and iOS devices.
* `gpu_precision_loss_allowed`: `bool` (default=true) \
Whethre to allow the GPU delegate to carry out computation with some
Whether to allow the GPU delegate to carry out computation with some
precision loss (i.e. processing in FP16) or not. If allowed, the performance
will increase.
* `gpu_experimental_enable_quant`: `bool` (default=true) \
Whether to allow the GPU delegate to run a quantized model or not. \
This option is currently only available on Android.
Whether to allow the GPU delegate to run a 8-bit quantized model or not.
#### Android options
* `gpu_backend`: `string` (default="") \
Force the GPU delegate to use a particular backend for execution, and fail
if unsuccessful. Should be one of: cl, gl. By default, the GPU delegate will
try OpenCL first and then OpenGL if the former fails.\
Note this option is only available on Android.
try OpenCL first and then OpenGL if the former fails.
#### iOS options
* `gpu_wait_type`: `string` (default="") \
Which GPU wait_type option to use, when using GPU delegate on iOS. Should be
one of the following: passive, active, do_not_wait, aggressive. When left
blank, passive mode is used by default.
Which GPU wait_type option to use. Should be one of the following: passive,
active, do_not_wait, aggressive. When left blank, passive mode is used by
default.
### NNAPI delegate provider
* `use_nnapi`: `bool` (default=false) \

View File

@ -37,10 +37,10 @@ class GpuDelegateProvider : public DelegateProvider {
#if defined(__ANDROID__) || defined(REAL_IPHONE_DEVICE)
default_params_.AddParam("gpu_precision_loss_allowed",
ToolParam::Create<bool>(true));
#endif
#if defined(__ANDROID__)
default_params_.AddParam("gpu_experimental_enable_quant",
ToolParam::Create<bool>(true));
#endif
#if defined(__ANDROID__)
default_params_.AddParam("gpu_backend", ToolParam::Create<std::string>(""));
#endif
#if defined(REAL_IPHONE_DEVICE)
@ -66,11 +66,11 @@ std::vector<Flag> GpuDelegateProvider::CreateFlags(ToolParams* params) const {
CreateFlag<bool>("gpu_precision_loss_allowed", params,
"Allow to process computation in lower precision than "
"FP32 in GPU. By default, it's enabled."),
#endif
#if defined(__ANDROID__)
CreateFlag<bool>("gpu_experimental_enable_quant", params,
"Whether to enable the GPU delegate to run quantized "
"models or not. By default, it's disabled."),
"models or not. By default, it's enabled."),
#endif
#if defined(__ANDROID__)
CreateFlag<std::string>(
"gpu_backend", params,
"Force the GPU delegate to use a particular backend for execution, and "
@ -91,10 +91,10 @@ void GpuDelegateProvider::LogParams(const ToolParams& params) const {
#if defined(__ANDROID__) || defined(REAL_IPHONE_DEVICE)
TFLITE_LOG(INFO) << "Allow lower precision in gpu : ["
<< params.Get<bool>("gpu_precision_loss_allowed") << "]";
#endif
#if defined(__ANDROID__)
TFLITE_LOG(INFO) << "Enable running quant models in gpu : ["
<< params.Get<bool>("gpu_experimental_enable_quant") << "]";
#endif
#if defined(__ANDROID__)
TFLITE_LOG(INFO) << "GPU backend : ["
<< params.Get<std::string>("gpu_backend") << "]";
#endif
@ -136,6 +136,8 @@ TfLiteDelegatePtr GpuDelegateProvider::CreateTfLiteDelegate(
TFLGpuDelegateOptions gpu_opts = {0};
gpu_opts.allow_precision_loss =
params.Get<bool>("gpu_precision_loss_allowed");
gpu_opts.enable_quantization =
params.Get<bool>("gpu_experimental_enable_quant");
std::string string_gpu_wait_type = params.Get<std::string>("gpu_wait_type");
if (!string_gpu_wait_type.empty()) {