Get the TFLite Vulkan backend to work on iOS and macOS with MoltenVK.

PiperOrigin-RevId: 303994534 Change-Id: Id4f41a428368ac31d05ffb004da69d0deaaf6cec
2020-03-31 10:28:10 -07:00 · 2020-03-31 10:28:10 -07:00 · b38cb7c7a1
commit b38cb7c7a1
parent 8f195b3c5b
2 changed files with 18 additions and 1 deletions
--- a/tensorflow/lite/delegates/gpu/common/gpu_info.h
+++ b/tensorflow/lite/delegates/gpu/common/gpu_info.h
@ -22,7 +22,17 @@ limitations under the License.
 namespace tflite {
 namespace gpu {

-enum class GpuType { UNKNOWN, MALI, ADRENO, POWERVR, INTEL, NVIDIA };
+// The VendorID returned by the GPU driver.
+enum class GpuType {
+  UNKNOWN,
+  APPLE,
+  MALI,
+  ADRENO,
+  POWERVR,
+  INTEL,
+  AMD,
+  NVIDIA,
+};
 enum class GpuModel {
  UNKNOWN,
  // Adreno 6xx series
--- a/tensorflow/lite/delegates/gpu/gl/kernels/fully_connected.cc
+++ b/tensorflow/lite/delegates/gpu/gl/kernels/fully_connected.cc
@ -92,8 +92,15 @@ class FullyConnectedBuffers : public NodeShader {
    source += "  $output_data_0[0, 0, gid.x] = value_0$;";

    std::vector<Variable> shared_variables = {
+#ifdef __APPLE__
+        // MoltenVK has problems with shared memory sized using the workgroup
+        // size. Fortunately with Metal a fixed workgroup size of 32 seems to
+        // give optimal results.
+        {"sh_mem", std::vector<float4>(32)},
+#else
        // The actual size of sh_mem depends on the WorkgroupSize
        {"sh_mem", std::vector<float4>(0)},
+#endif
    };

    *generated_code = {