Support benchmark of quantized models in model benchmark
Reformat benchmark documentation to make GPU delegate options for Android and iOS be seen clearly. PiperOrigin-RevId: 315248409 Change-Id: Iac19e8afdcf9414496f961b836fc510883a77757
This commit is contained in:
parent
dab7b46024
commit
251a474096
@ -31,26 +31,31 @@ TFLite delegate.
|
||||
This option is currently supported by the Hexagon and CoreML delegate.
|
||||
|
||||
### GPU delegate provider
|
||||
|
||||
Only Android and iOS devices support GPU delegate.
|
||||
|
||||
#### Common options
|
||||
* `use_gpu`: `bool` (default=false) \
|
||||
Whether to use the
|
||||
[GPU accelerator delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/gpu).
|
||||
This option is currently only available on Android and iOS devices.
|
||||
* `gpu_precision_loss_allowed`: `bool` (default=true) \
|
||||
Whethre to allow the GPU delegate to carry out computation with some
|
||||
Whether to allow the GPU delegate to carry out computation with some
|
||||
precision loss (i.e. processing in FP16) or not. If allowed, the performance
|
||||
will increase.
|
||||
* `gpu_experimental_enable_quant`: `bool` (default=true) \
|
||||
Whether to allow the GPU delegate to run a quantized model or not. \
|
||||
This option is currently only available on Android.
|
||||
Whether to allow the GPU delegate to run a 8-bit quantized model or not.
|
||||
|
||||
#### Android options
|
||||
* `gpu_backend`: `string` (default="") \
|
||||
Force the GPU delegate to use a particular backend for execution, and fail
|
||||
if unsuccessful. Should be one of: cl, gl. By default, the GPU delegate will
|
||||
try OpenCL first and then OpenGL if the former fails.\
|
||||
Note this option is only available on Android.
|
||||
try OpenCL first and then OpenGL if the former fails.
|
||||
|
||||
#### iOS options
|
||||
* `gpu_wait_type`: `string` (default="") \
|
||||
Which GPU wait_type option to use, when using GPU delegate on iOS. Should be
|
||||
one of the following: passive, active, do_not_wait, aggressive. When left
|
||||
blank, passive mode is used by default.
|
||||
Which GPU wait_type option to use. Should be one of the following: passive,
|
||||
active, do_not_wait, aggressive. When left blank, passive mode is used by
|
||||
default.
|
||||
|
||||
### NNAPI delegate provider
|
||||
* `use_nnapi`: `bool` (default=false) \
|
||||
|
@ -37,10 +37,10 @@ class GpuDelegateProvider : public DelegateProvider {
|
||||
#if defined(__ANDROID__) || defined(REAL_IPHONE_DEVICE)
|
||||
default_params_.AddParam("gpu_precision_loss_allowed",
|
||||
ToolParam::Create<bool>(true));
|
||||
#endif
|
||||
#if defined(__ANDROID__)
|
||||
default_params_.AddParam("gpu_experimental_enable_quant",
|
||||
ToolParam::Create<bool>(true));
|
||||
#endif
|
||||
#if defined(__ANDROID__)
|
||||
default_params_.AddParam("gpu_backend", ToolParam::Create<std::string>(""));
|
||||
#endif
|
||||
#if defined(REAL_IPHONE_DEVICE)
|
||||
@ -66,11 +66,11 @@ std::vector<Flag> GpuDelegateProvider::CreateFlags(ToolParams* params) const {
|
||||
CreateFlag<bool>("gpu_precision_loss_allowed", params,
|
||||
"Allow to process computation in lower precision than "
|
||||
"FP32 in GPU. By default, it's enabled."),
|
||||
#endif
|
||||
#if defined(__ANDROID__)
|
||||
CreateFlag<bool>("gpu_experimental_enable_quant", params,
|
||||
"Whether to enable the GPU delegate to run quantized "
|
||||
"models or not. By default, it's disabled."),
|
||||
"models or not. By default, it's enabled."),
|
||||
#endif
|
||||
#if defined(__ANDROID__)
|
||||
CreateFlag<std::string>(
|
||||
"gpu_backend", params,
|
||||
"Force the GPU delegate to use a particular backend for execution, and "
|
||||
@ -91,10 +91,10 @@ void GpuDelegateProvider::LogParams(const ToolParams& params) const {
|
||||
#if defined(__ANDROID__) || defined(REAL_IPHONE_DEVICE)
|
||||
TFLITE_LOG(INFO) << "Allow lower precision in gpu : ["
|
||||
<< params.Get<bool>("gpu_precision_loss_allowed") << "]";
|
||||
#endif
|
||||
#if defined(__ANDROID__)
|
||||
TFLITE_LOG(INFO) << "Enable running quant models in gpu : ["
|
||||
<< params.Get<bool>("gpu_experimental_enable_quant") << "]";
|
||||
#endif
|
||||
#if defined(__ANDROID__)
|
||||
TFLITE_LOG(INFO) << "GPU backend : ["
|
||||
<< params.Get<std::string>("gpu_backend") << "]";
|
||||
#endif
|
||||
@ -136,6 +136,8 @@ TfLiteDelegatePtr GpuDelegateProvider::CreateTfLiteDelegate(
|
||||
TFLGpuDelegateOptions gpu_opts = {0};
|
||||
gpu_opts.allow_precision_loss =
|
||||
params.Get<bool>("gpu_precision_loss_allowed");
|
||||
gpu_opts.enable_quantization =
|
||||
params.Get<bool>("gpu_experimental_enable_quant");
|
||||
|
||||
std::string string_gpu_wait_type = params.Get<std::string>("gpu_wait_type");
|
||||
if (!string_gpu_wait_type.empty()) {
|
||||
|
Loading…
Reference in New Issue
Block a user