From 5874c1424db542293276cdaeb21f8de9febabd60 Mon Sep 17 00:00:00 2001
From: Fabio Riccardi <fricc@google.com>
Date: Thu, 10 Sep 2020 15:24:40 -0700
Subject: [PATCH] Add experimental Vulkan support to TFLite delegate.

Note that this is still WIP, and is not yet available at runtime
with the default TFLite GPU prebuilts.

PiperOrigin-RevId: 331029289
Change-Id: I31d1b2cfbe478ca0bf2026b451d20eb3ac86b387
---
 tensorflow/lite/delegates/gpu/BUILD           |  87 ++-------
 tensorflow/lite/delegates/gpu/delegate.cc     | 178 +++++++++++++-----
 tensorflow/lite/delegates/gpu/delegate.h      |   2 +-
 tensorflow/lite/delegates/gpu/gpu_backend.cc  |  46 -----
 tensorflow/lite/delegates/gpu/gpu_backend.h   |  55 ------
 .../lite/delegates/gpu/gpu_backend_default.cc | 157 ---------------
 6 files changed, 151 insertions(+), 374 deletions(-)
 delete mode 100644 tensorflow/lite/delegates/gpu/gpu_backend.cc
 delete mode 100644 tensorflow/lite/delegates/gpu/gpu_backend.h
 delete mode 100644 tensorflow/lite/delegates/gpu/gpu_backend_default.cc

diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD
index c319b215879..8778653b586 100644
--- a/tensorflow/lite/delegates/gpu/BUILD
+++ b/tensorflow/lite/delegates/gpu/BUILD
@@ -26,8 +26,6 @@ config_setting(
     },
 )
 
-cc_library(name = "egl_glsl_headers")
-
 cc_library(
     name = "gl_delegate",
     srcs = ["gl_delegate.cc"],
@@ -44,7 +42,6 @@ cc_library(
         "//conditions:default": [],
     }),
     deps = [
-        ":egl_glsl_headers",
         "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/types:span",
         "//tensorflow/lite:kernel_api",
@@ -233,65 +230,9 @@ selects.config_setting_group(
 )
 
 cc_library(
-    name = "delegate_header",
-    hdrs = ["delegate.h"],
-    deps = [
-        "//tensorflow/lite/c:common",
-        "//tensorflow/lite/delegates/gpu:api",
-    ],
-)
-
-cc_library(
-    name = "delegate_base",
+    name = "delegate",
     srcs = ["delegate.cc"],
     hdrs = ["delegate.h"],
-    deps = [
-        ":gpu_backend",
-        "//tensorflow/lite:kernel_api",
-        "//tensorflow/lite:minimal_logging",
-        "//tensorflow/lite/c:common",
-        "//tensorflow/lite/delegates/gpu:api",
-        "//tensorflow/lite/delegates/gpu/common:model",
-        "//tensorflow/lite/delegates/gpu/common:model_builder",
-        "//tensorflow/lite/delegates/gpu/common:model_transformer",
-        "//tensorflow/lite/delegates/gpu/common:quantization_util",
-        "//tensorflow/lite/delegates/gpu/common:status",
-        "//tensorflow/lite/delegates/gpu/gl:api2",
-        "//tensorflow/lite/kernels/internal:optimized_base",
-        "@com_google_absl//absl/container:flat_hash_map",
-        "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/types:span",
-    ],
-)
-
-cc_library(
-    name = "delegate",
-    hdrs = ["delegate.h"],
-    deps = [
-        ":delegate_base",
-        ":gpu_backend_default",
-        "//tensorflow/lite/c:common",
-    ],
-)
-
-cc_library(
-    name = "gpu_backend",
-    srcs = ["gpu_backend.cc"],
-    hdrs = ["gpu_backend.h"],
-    deps = [
-        ":api",
-        ":delegate_header",
-        "//tensorflow/lite/delegates/gpu/common:model",
-        "//tensorflow/lite/delegates/gpu/common:shape",
-        "//tensorflow/lite/delegates/gpu/common:tensor",
-        "@com_google_absl//absl/types:any",
-        "@opencl_headers",
-    ],
-)
-
-cc_library(
-    name = "gpu_backend_default",
-    srcs = ["gpu_backend_default.cc"],
     linkopts = select({
         "//tensorflow:android": [
             "-lEGL",
@@ -303,15 +244,27 @@ cc_library(
         ],
         "//conditions:default": [],
     }),
-    deps = [
-        ":gpu_backend",
-        "//tensorflow/lite:minimal_logging",
-        "//tensorflow/lite/delegates/gpu/cl:api",
-    ] + select({
+    deps = select({
         "//tensorflow/lite/delegates/gpu/cl:opencl_delegate_no_gl": [],
         "//conditions:default": [
-            ":egl_glsl_headers",
             "//tensorflow/lite/delegates/gpu/gl:api2",
         ],
-    }),
+    }) + [
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/types:span",
+        "//tensorflow/lite:kernel_api",
+        "//tensorflow/lite:minimal_logging",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/delegates/gpu:api",
+        "//tensorflow/lite/delegates/gpu/cl:api",
+        "//tensorflow/lite/delegates/gpu/cl:opencl_wrapper",
+        "//tensorflow/lite/delegates/gpu/cl:tensor_type_util",
+        "//tensorflow/lite/delegates/gpu/common:model",
+        "//tensorflow/lite/delegates/gpu/common:model_builder",
+        "//tensorflow/lite/delegates/gpu/common:model_transformer",
+        "//tensorflow/lite/delegates/gpu/common:quantization_util",
+        "//tensorflow/lite/delegates/gpu/common:status",
+        "//tensorflow/lite/kernels/internal:optimized_base",
+    ],
 )
diff --git a/tensorflow/lite/delegates/gpu/delegate.cc b/tensorflow/lite/delegates/gpu/delegate.cc
index 897395fe08b..bfc2b7f08c4 100644
--- a/tensorflow/lite/delegates/gpu/delegate.cc
+++ b/tensorflow/lite/delegates/gpu/delegate.cc
@@ -25,12 +25,15 @@ limitations under the License.
 #include "absl/types/span.h"
 #include "tensorflow/lite/builtin_ops.h"
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/delegates/gpu/api.h"
+#include "tensorflow/lite/delegates/gpu/cl/api.h"
+#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
 #include "tensorflow/lite/delegates/gpu/common/model.h"
 #include "tensorflow/lite/delegates/gpu/common/model_builder.h"
 #include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
 #include "tensorflow/lite/delegates/gpu/common/quantization_util.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
-#include "tensorflow/lite/delegates/gpu/gpu_backend.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/minimal_logging.h"
 
@@ -40,18 +43,39 @@ limitations under the License.
 
 namespace tflite {
 namespace gpu {
-
 namespace {
 
+InferencePriority ToPriority(int32_t priority) {
+  switch (priority) {
+    case TFLITE_GPU_INFERENCE_PRIORITY_AUTO:
+      return InferencePriority::AUTO;
+    case TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
+      return InferencePriority::MAX_PRECISION;
+    case TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
+      return InferencePriority::MIN_LATENCY;
+    case TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE:
+      return InferencePriority::MIN_MEMORY_USAGE;
+  }
+  return InferencePriority::UNKNOWN;
+}
+
+InferenceUsage ToUsage(int32_t usage) {
+  switch (usage) {
+    case TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER:
+      return InferenceUsage::FAST_SINGLE_ANSWER;
+    case TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED:
+      return InferenceUsage::SUSTAINED_SPEED;
+  }
+  return InferenceUsage::UNKNOWN;
+}
+
 // Forward declarations.
 TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate);
 
 class Delegate {
  public:
-  explicit Delegate(GpuBackend* gpu_backend,
-                    const TfLiteGpuDelegateOptionsV2* options)
-      : gpu_backend_(std::unique_ptr<GpuBackend>(gpu_backend)),
-        num_delegate_kernels_(0) {
+  explicit Delegate(const TfLiteGpuDelegateOptionsV2* options)
+      : num_delegate_kernels_(0) {
     options_ = options ? *options : TfLiteGpuDelegateOptionsV2Default();
     if (options_.max_delegated_partitions <= 0) {
       options_.max_delegated_partitions = 1;
@@ -70,17 +94,6 @@ class Delegate {
   }
   int num_delegate_kernels() const { return num_delegate_kernels_; }
 
-  absl::Status Prepare(
-      tflite::gpu::GraphFloat32* graph,
-      std::function<absl::Status(GraphFloat32* graph)> initialize_graph,
-      std::unique_ptr<tflite::gpu::InferenceBuilder>* builder) {
-    return gpu_backend_->Prepare(options_, graph, initialize_graph, builder);
-  }
-
-  bool enforce_same_thread() const {
-    return gpu_backend_->enforce_same_thread();
-  }
-
  private:
   TfLiteDelegate delegate_ = {
       .data_ = reinterpret_cast<void*>(this),
@@ -91,8 +104,6 @@ class Delegate {
       .flags = kTfLiteDelegateFlagsNone,
   };
 
-  std::unique_ptr<GpuBackend> gpu_backend_;
-
   TfLiteGpuDelegateOptionsV2 options_;
   int num_delegate_kernels_ = 0;
 
@@ -111,7 +122,7 @@ class DelegateKernel {
                        const TfLiteDelegateParams* delegate_params) {
     thread_id_prepare_ = std::this_thread::get_id();
 
-    // Extract TfLite delegate execution plan from the context and convert it
+    // Extract TFLite delegate execution plan from the context and convert it
     // into GraphFloat32.
     GraphFloat32 graph;
     std::vector<uint32_t> input_refs;
@@ -120,16 +131,31 @@ class DelegateKernel {
                                     &input_refs, &output_refs));
 
     std::unique_ptr<InferenceBuilder> builder;
-    RETURN_IF_ERROR(delegate_->Prepare(
-        &graph,
-        [&](GraphFloat32* graph) -> absl::Status {
-          return InitializeGraph(context, delegate_params, graph, &input_refs,
-                                 &output_refs);
-        },
-        &builder));
+    bool graph_is_destroyed;
+    const int experimental_flags = delegate_->options().experimental_flags;
+    if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY) {
+      RETURN_IF_ERROR(
+          InitializeOpenClApi(&graph, &builder, &graph_is_destroyed));
+    } else if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY) {
+      RETURN_IF_ERROR(InitializeOpenGlApi(&graph, &builder));
+    } else {
+      // By default, we try CL first & fall back to GL if that fails.
+      absl::Status status =
+          InitializeOpenClApi(&graph, &builder, &graph_is_destroyed);
+      if (!status.ok()) {
+        TF_LITE_KERNEL_LOG(context, std::string(status.message()).c_str());
+        TF_LITE_KERNEL_LOG(context, "Falling back to OpenGL");
 
-    // See if the GPU backend want us to be always on the same thread.
-    enforce_same_thread_ = delegate_->enforce_same_thread();
+        // Graph needs to be re-created because it is moved above.
+        GraphFloat32 graph2;
+        if (graph_is_destroyed) {
+          RETURN_IF_ERROR(InitializeGraph(context, delegate_params, &graph2,
+                                          &input_refs, &output_refs));
+        }
+        RETURN_IF_ERROR(InitializeOpenGlApi(
+            graph_is_destroyed ? &graph2 : &graph, &builder));
+      }
+    }
 
     // At this point tflite didn't allocate tensors yet, therefore, collect
     // indices and set all input and output tensors from tflite later.
@@ -253,12 +279,71 @@ class DelegateKernel {
     for (const auto& output : outputs) {
       output_refs->push_back(output->tensor.ref);
     }
+
+    return absl::OkStatus();
+  }
+
+  absl::Status InitializeOpenClApi(GraphFloat32* graph,
+                                   std::unique_ptr<InferenceBuilder>* builder,
+                                   bool* graph_is_destroyed) {
+    *graph_is_destroyed = false;
+    cl::InferenceEnvironmentOptions env_options;
+    cl::InferenceEnvironmentProperties properties;
+    RETURN_IF_ERROR(cl::NewInferenceEnvironment(env_options, &cl_environment_,
+                                                &properties));
+    auto delegate_options = delegate_->options();
+    cl::InferenceOptions options;
+    // If is_precision_loss_allowed == -1, then just use priorities instead
+    // of paying attention to is_precision_loss_allowed value.
+    if (delegate_options.is_precision_loss_allowed == -1) {
+      options.priority1 = ToPriority(delegate_options.inference_priority1);
+      options.priority2 = ToPriority(delegate_options.inference_priority2);
+      options.priority3 = ToPriority(delegate_options.inference_priority3);
+    } else {
+      // Users set is_precision_loss_allowed explicitly, thus use it explicitly.
+      if (delegate_options.is_precision_loss_allowed == 0) {
+        options.priority1 = InferencePriority::MAX_PRECISION;
+      } else {
+        options.priority1 = InferencePriority::MIN_LATENCY;
+      }
+    }
+    options.usage = ToUsage(delegate_options.inference_preference);
+    *graph_is_destroyed = true;
+    RETURN_IF_ERROR(cl_environment_->NewInferenceBuilder(
+        options, std::move(*graph), builder));
+    TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
+                         "Initialized OpenCL-based API.");
+    return absl::OkStatus();
+  }
+
+  absl::Status InitializeOpenGlApi(GraphFloat32* graph,
+                                   std::unique_ptr<InferenceBuilder>* builder) {
+#ifndef CL_DELEGATE_NO_GL
+    gl::InferenceEnvironmentOptions env_options;
+    gl::InferenceEnvironmentProperties properties;
+    RETURN_IF_ERROR(
+        NewInferenceEnvironment(env_options, &gl_environment_, &properties));
+    auto delegate_options = delegate_->options();
+    gl::InferenceOptions options;
+    options.usage = ToUsage(delegate_options.inference_preference);
+    options.priority1 = ToPriority(delegate_options.inference_priority1);
+    options.priority2 = ToPriority(delegate_options.inference_priority2);
+    options.priority3 = ToPriority(delegate_options.inference_priority3);
+    RETURN_IF_ERROR(gl_environment_->NewInferenceBuilder(std::move(*graph),
+                                                         options, builder));
+    enforce_same_thread_ = true;
+    TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
+                         "Initialized OpenGL-based API.");
+#endif
     return absl::OkStatus();
   }
 
   // The Delegate instance that's shared across all DelegateKernel instances.
-  Delegate* const delegate_;
-
+  Delegate* const delegate_;  // doesn't own the memory.
+  std::unique_ptr<cl::InferenceEnvironment> cl_environment_;
+#ifndef CL_DELEGATE_NO_GL
+  std::unique_ptr<gl::InferenceEnvironment> gl_environment_;
+#endif
   std::unique_ptr<InferenceRunner> runner_;
   std::vector<int64_t> input_indices_;
   std::vector<int64_t> output_indices_;
@@ -285,7 +370,7 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
         const auto* params =
             reinterpret_cast<const TfLiteDelegateParams*>(buffer);
         auto* gpu_delegate = GetDelegate(params->delegate);
-        // Everything below should happen in prepare function call, but TfLite
+        // Everything below should happen in prepare function call, but TFLite
         // for whatever reason forbids that.
         auto gpu_delegate_kernel =
             absl::make_unique<DelegateKernel>(gpu_delegate);
@@ -351,25 +436,10 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
 }
 
 }  // namespace
-
-TfLiteDelegate* TfLiteGpuDelegateCreateInternal(
-    GpuBackend* gpu_backend, const TfLiteGpuDelegateOptionsV2* options) {
-  auto* gpu_delegate = new tflite::gpu::Delegate(gpu_backend, options);
-  if (gpu_delegate) {
-    TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
-                         "Created TensorFlow Lite delegate for GPU.");
-  }
-  return gpu_delegate ? gpu_delegate->tflite_delegate() : nullptr;
-}
-
-void TfLiteGpuDelegateDeleteInternal(TfLiteDelegate* delegate) {
-  delete tflite::gpu::GetDelegate(delegate);
-}
-
 }  // namespace gpu
 }  // namespace tflite
 
-extern "C" TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() {
+TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() {
   TfLiteGpuDelegateOptionsV2 options = {
       // set it to -1 to detect whether it was later adjusted.
       .is_precision_loss_allowed = -1,
@@ -383,3 +453,15 @@ extern "C" TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() {
   };
   return options;
 }
+
+TfLiteDelegate* TfLiteGpuDelegateV2Create(
+    const TfLiteGpuDelegateOptionsV2* options) {
+  auto* gpu_delegate = new tflite::gpu::Delegate(options);
+  TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
+                       "Created TensorFlow Lite delegate for GPU.");
+  return gpu_delegate ? gpu_delegate->tflite_delegate() : nullptr;
+}
+
+void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate) {
+  delete tflite::gpu::GetDelegate(delegate);
+}
diff --git a/tensorflow/lite/delegates/gpu/delegate.h b/tensorflow/lite/delegates/gpu/delegate.h
index 633d5a66296..9af586bfd75 100644
--- a/tensorflow/lite/delegates/gpu/delegate.h
+++ b/tensorflow/lite/delegates/gpu/delegate.h
@@ -111,7 +111,7 @@ typedef struct {
 TFL_CAPI_EXPORT TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default();
 
 // Creates a new delegate instance that need to be destroyed with
-// TfLiteGpuDelegateV2Delete when delegate is no longer used by TfLite.
+// TfLiteGpuDelegateV2Delete when delegate is no longer used by TFLite.
 //
 // This delegate encapsulates multiple GPU-acceleration APIs under the hood to
 // make use of the fastest available on a device.
diff --git a/tensorflow/lite/delegates/gpu/gpu_backend.cc b/tensorflow/lite/delegates/gpu/gpu_backend.cc
deleted file mode 100644
index d1b5ad744d2..00000000000
--- a/tensorflow/lite/delegates/gpu/gpu_backend.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/delegates/gpu/gpu_backend.h"
-
-namespace tflite {
-namespace gpu {
-
-InferencePriority GpuBackend::ToPriority(int32_t priority) {
-  switch (priority) {
-    case TFLITE_GPU_INFERENCE_PRIORITY_AUTO:
-      return InferencePriority::AUTO;
-    case TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
-      return InferencePriority::MAX_PRECISION;
-    case TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
-      return InferencePriority::MIN_LATENCY;
-    case TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE:
-      return InferencePriority::MIN_MEMORY_USAGE;
-  }
-  return InferencePriority::UNKNOWN;
-}
-
-InferenceUsage GpuBackend::ToUsage(int32_t usage) {
-  switch (usage) {
-    case TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER:
-      return InferenceUsage::FAST_SINGLE_ANSWER;
-    case TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED:
-      return InferenceUsage::SUSTAINED_SPEED;
-  }
-  return InferenceUsage::UNKNOWN;
-}
-
-}  // namespace gpu
-}  // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/gpu_backend.h b/tensorflow/lite/delegates/gpu/gpu_backend.h
deleted file mode 100644
index 7c6615ef447..00000000000
--- a/tensorflow/lite/delegates/gpu/gpu_backend.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_
-#define TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_
-
-#include <functional>
-
-#include "tensorflow/lite/delegates/gpu/api.h"
-#include "tensorflow/lite/delegates/gpu/common/model.h"
-#include "tensorflow/lite/delegates/gpu/delegate.h"
-
-namespace tflite {
-namespace gpu {
-
-class GpuBackend {
- public:
-  virtual absl::Status Prepare(
-      const TfLiteGpuDelegateOptionsV2& delegate_options, GraphFloat32* graph,
-      std::function<absl::Status(GraphFloat32* graph)> initialize_graph,
-      std::unique_ptr<InferenceBuilder>* builder) = 0;
-
-  bool enforce_same_thread() const { return enforce_same_thread_; }
-
-  virtual ~GpuBackend() = default;
-
-  static InferencePriority ToPriority(int32_t priority);
-  static InferenceUsage ToUsage(int32_t usage);
-
- protected:
-  bool enforce_same_thread_ = false;
-};
-
-TfLiteDelegate* TfLiteGpuDelegateCreateInternal(
-    GpuBackend* backend, const TfLiteGpuDelegateOptionsV2* options);
-
-// Destroys a delegate created with `TfLiteGpuDelegateCreateInternal` call.
-void TfLiteGpuDelegateDeleteInternal(TfLiteDelegate* delegate);
-
-}  // namespace gpu
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_
diff --git a/tensorflow/lite/delegates/gpu/gpu_backend_default.cc b/tensorflow/lite/delegates/gpu/gpu_backend_default.cc
deleted file mode 100644
index 2925c3945ac..00000000000
--- a/tensorflow/lite/delegates/gpu/gpu_backend_default.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/delegates/gpu/cl/api.h"
-#ifndef CL_DELEGATE_NO_GL
-#include "tensorflow/lite/delegates/gpu/gl/api2.h"
-#endif
-#include "tensorflow/lite/delegates/gpu/gpu_backend.h"
-#include "tensorflow/lite/minimal_logging.h"
-
-namespace tflite {
-namespace gpu {
-
-absl::Status InitializeOpenClApi(
-    GraphFloat32* graph, const TfLiteGpuDelegateOptionsV2& delegate_options,
-    std::unique_ptr<InferenceBuilder>* builder,
-    std::unique_ptr<cl::InferenceEnvironment>* inference_environment,
-    bool* graph_is_destroyed) {
-  if (graph_is_destroyed) {
-    *graph_is_destroyed = false;
-  }
-  cl::InferenceEnvironmentOptions env_options;
-  cl::InferenceEnvironmentProperties properties;
-  RETURN_IF_ERROR(cl::NewInferenceEnvironment(
-      env_options, inference_environment, &properties));
-  cl::InferenceOptions options;
-  // If is_precision_loss_allowed == -1, then just use priorities instead
-  // of paying attention to is_precision_loss_allowed value.
-  if (delegate_options.is_precision_loss_allowed == -1) {
-    options.priority1 =
-        GpuBackend::ToPriority(delegate_options.inference_priority1);
-    options.priority2 =
-        GpuBackend::ToPriority(delegate_options.inference_priority2);
-    options.priority3 =
-        GpuBackend::ToPriority(delegate_options.inference_priority3);
-  } else {
-    // Users set is_precision_loss_allowed explicitly, thus use it explicitly.
-    if (delegate_options.is_precision_loss_allowed == 0) {
-      options.priority1 = InferencePriority::MAX_PRECISION;
-    } else {
-      options.priority1 = InferencePriority::MIN_LATENCY;
-    }
-  }
-  options.usage = GpuBackend::ToUsage(delegate_options.inference_preference);
-  if (graph_is_destroyed) {
-    *graph_is_destroyed = true;
-  }
-  RETURN_IF_ERROR(
-      (*inference_environment)
-          ->NewInferenceBuilder(options, std::move(*graph), builder));
-  TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
-                       "Initialized OpenCL-based API.");
-  return absl::OkStatus();
-}
-
-#ifndef CL_DELEGATE_NO_GL
-absl::Status InitializeOpenGlApi(
-    GraphFloat32* graph, const TfLiteGpuDelegateOptionsV2& delegate_options,
-    std::unique_ptr<InferenceBuilder>* builder,
-    std::unique_ptr<gl::InferenceEnvironment>* inference_environment) {
-  gl::InferenceEnvironmentOptions env_options;
-  gl::InferenceEnvironmentProperties properties;
-  RETURN_IF_ERROR(
-      NewInferenceEnvironment(env_options, inference_environment, &properties));
-  gl::InferenceOptions options;
-  options.usage = GpuBackend::ToUsage(delegate_options.inference_preference);
-  options.priority1 =
-      GpuBackend::ToPriority(delegate_options.inference_priority1);
-  options.priority2 =
-      GpuBackend::ToPriority(delegate_options.inference_priority2);
-  options.priority3 =
-      GpuBackend::ToPriority(delegate_options.inference_priority3);
-  RETURN_IF_ERROR(
-      (*inference_environment)
-          ->NewInferenceBuilder(std::move(*graph), options, builder));
-  TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
-                       "Initialized OpenGL-based API.");
-  return absl::OkStatus();
-}
-#endif
-
-class GpuBackendDefault : public GpuBackend {
- public:
-  GpuBackendDefault() {}
-
-  absl::Status Prepare(
-      const TfLiteGpuDelegateOptionsV2& delegate_options, GraphFloat32* graph,
-      std::function<absl::Status(GraphFloat32* graph)> initialize_graph,
-      std::unique_ptr<InferenceBuilder>* builder) override {
-#ifdef CL_DELEGATE_NO_GL
-    return InitializeOpenClApi(graph, delegate_options, builder,
-                               &cl_inference_environment_, nullptr);
-#else
-    bool graph_is_destroyed;
-    const int experimental_flags = delegate_options.experimental_flags;
-    if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY) {
-      RETURN_IF_ERROR(InitializeOpenClApi(graph, delegate_options, builder,
-                                          &cl_inference_environment_,
-                                          &graph_is_destroyed));
-    } else if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY) {
-      RETURN_IF_ERROR(InitializeOpenGlApi(graph, delegate_options, builder,
-                                          &gl_inference_environment_));
-    } else {
-      // By default, we try CL first & fall back to GL if that fails.
-      absl::Status status =
-          InitializeOpenClApi(graph, delegate_options, builder,
-                              &cl_inference_environment_, &graph_is_destroyed);
-      if (!status.ok()) {
-        TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
-                             std::string(status.message()).c_str());
-        TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "Falling back to OpenGL");
-
-        // Graph needs to be re-created because it is moved above.
-        GraphFloat32 graph2;
-        if (graph_is_destroyed) {
-          RETURN_IF_ERROR(initialize_graph(&graph2));
-        }
-        RETURN_IF_ERROR(InitializeOpenGlApi(
-            graph_is_destroyed ? &graph2 : graph, delegate_options, builder,
-            &gl_inference_environment_));
-      }
-    }
-    return absl::OkStatus();
-#endif
-  }
-
- private:
-  std::unique_ptr<cl::InferenceEnvironment> cl_inference_environment_;
-#ifndef CL_DELEGATE_NO_GL
-  std::unique_ptr<gl::InferenceEnvironment> gl_inference_environment_;
-#endif
-};
-
-}  // namespace gpu
-}  // namespace tflite
-
-extern "C" TfLiteDelegate* TfLiteGpuDelegateV2Create(
-    const TfLiteGpuDelegateOptionsV2* options) {
-  return tflite::gpu::TfLiteGpuDelegateCreateInternal(
-      new tflite::gpu::GpuBackendDefault(), options);
-}
-
-extern "C" void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate) {
-  return tflite::gpu::TfLiteGpuDelegateDeleteInternal(delegate);
-}