From 9f93835bbdf8c53c975f5b466921834708e39b6c Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 9 Mar 2020 08:55:06 -0700 Subject: [PATCH] Changed accumulator precision for devices that does not support round to nearest. PiperOrigin-RevId: 299852182 Change-Id: I386b9518201675e1357027de483f82419ba2cd7c --- tensorflow/lite/delegates/gpu/BUILD | 1 + tensorflow/lite/delegates/gpu/metal_delegate.mm | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD index ba2a05b09ec..72af2534988 100644 --- a/tensorflow/lite/delegates/gpu/BUILD +++ b/tensorflow/lite/delegates/gpu/BUILD @@ -99,6 +99,7 @@ objc_library( "//tensorflow/lite/delegates/gpu/metal:api", "//tensorflow/lite/delegates/gpu/metal:buffer_convert", "//tensorflow/lite/delegates/gpu/metal:compiled_model", + "//tensorflow/lite/delegates/gpu/metal:environment", "//tensorflow/lite/delegates/gpu/metal:inference_context", "@com_google_absl//absl/types:span", ], diff --git a/tensorflow/lite/delegates/gpu/metal_delegate.mm b/tensorflow/lite/delegates/gpu/metal_delegate.mm index 6635a3a1388..f7f08b273ae 100644 --- a/tensorflow/lite/delegates/gpu/metal_delegate.mm +++ b/tensorflow/lite/delegates/gpu/metal_delegate.mm @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/metal/api.h" #include "tensorflow/lite/delegates/gpu/metal/buffer_convert.h" #include "tensorflow/lite/delegates/gpu/metal/common.h" +#include "tensorflow/lite/delegates/gpu/metal/environment.h" #include "tensorflow/lite/delegates/gpu/metal/compiled_model.h" #include "tensorflow/lite/delegates/gpu/metal/inference_context.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" @@ -295,7 +296,17 @@ class Delegate { if (options_.allow_precision_loss) { storage_type_size = sizeof(HalfBits); runtime_options.storage_precision = RuntimeOptions::Precision::FP16; - runtime_options.accumulator_precision = RuntimeOptions::Precision::FP16; + const auto gpu_type = GetGpuType(); + const bool powervr = gpu_type == GpuType::kA7 || gpu_type == GpuType::kA8 || + gpu_type == GpuType::kA9 || gpu_type == GpuType::kA10; + if (powervr) { + // PowerVR gpus support only round to zero for floating-point operations, + // to increase precision we will use F32 accumulator in this case + runtime_options.accumulator_precision = RuntimeOptions::Precision::FP32; + } else { + // Apple own gpus support round to nearest and have better precision + runtime_options.accumulator_precision = RuntimeOptions::Precision::FP16; + } } else { storage_type_size = sizeof(float); runtime_options.storage_precision = RuntimeOptions::Precision::FP32;