From 9f93835bbdf8c53c975f5b466921834708e39b6c Mon Sep 17 00:00:00 2001
From: Raman Sarokin <sorokin@google.com>
Date: Mon, 9 Mar 2020 08:55:06 -0700
Subject: [PATCH] Changed accumulator precision for devices that does not
 support round to nearest.

PiperOrigin-RevId: 299852182
Change-Id: I386b9518201675e1357027de483f82419ba2cd7c
---
 tensorflow/lite/delegates/gpu/BUILD             |  1 +
 tensorflow/lite/delegates/gpu/metal_delegate.mm | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD
index ba2a05b09ec..72af2534988 100644
--- a/tensorflow/lite/delegates/gpu/BUILD
+++ b/tensorflow/lite/delegates/gpu/BUILD
@@ -99,6 +99,7 @@ objc_library(
         "//tensorflow/lite/delegates/gpu/metal:api",
         "//tensorflow/lite/delegates/gpu/metal:buffer_convert",
         "//tensorflow/lite/delegates/gpu/metal:compiled_model",
+        "//tensorflow/lite/delegates/gpu/metal:environment",
         "//tensorflow/lite/delegates/gpu/metal:inference_context",
         "@com_google_absl//absl/types:span",
     ],
diff --git a/tensorflow/lite/delegates/gpu/metal_delegate.mm b/tensorflow/lite/delegates/gpu/metal_delegate.mm
index 6635a3a1388..f7f08b273ae 100644
--- a/tensorflow/lite/delegates/gpu/metal_delegate.mm
+++ b/tensorflow/lite/delegates/gpu/metal_delegate.mm
@@ -40,6 +40,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/metal/api.h"
 #include "tensorflow/lite/delegates/gpu/metal/buffer_convert.h"
 #include "tensorflow/lite/delegates/gpu/metal/common.h"
+#include "tensorflow/lite/delegates/gpu/metal/environment.h"
 #include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
 #include "tensorflow/lite/delegates/gpu/metal/inference_context.h"
 #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
@@ -295,7 +296,17 @@ class Delegate {
     if (options_.allow_precision_loss) {
       storage_type_size = sizeof(HalfBits);
       runtime_options.storage_precision = RuntimeOptions::Precision::FP16;
-      runtime_options.accumulator_precision = RuntimeOptions::Precision::FP16;
+      const auto gpu_type = GetGpuType();
+      const bool powervr = gpu_type == GpuType::kA7 || gpu_type == GpuType::kA8 ||
+                           gpu_type == GpuType::kA9 || gpu_type == GpuType::kA10;
+      if (powervr) {
+        // PowerVR gpus support only round to zero for floating-point operations,
+        // to increase precision we will use F32 accumulator in this case
+        runtime_options.accumulator_precision = RuntimeOptions::Precision::FP32;
+      } else {
+        // Apple own gpus support round to nearest and have better precision
+        runtime_options.accumulator_precision = RuntimeOptions::Precision::FP16;
+      }
     } else {
       storage_type_size = sizeof(float);
       runtime_options.storage_precision = RuntimeOptions::Precision::FP32;