From 5874c1424db542293276cdaeb21f8de9febabd60 Mon Sep 17 00:00:00 2001 From: Fabio Riccardi Date: Thu, 10 Sep 2020 15:24:40 -0700 Subject: [PATCH] Add experimental Vulkan support to TFLite delegate. Note that this is still WIP, and is not yet available at runtime with the default TFLite GPU prebuilts. PiperOrigin-RevId: 331029289 Change-Id: I31d1b2cfbe478ca0bf2026b451d20eb3ac86b387 --- tensorflow/lite/delegates/gpu/BUILD | 87 ++------- tensorflow/lite/delegates/gpu/delegate.cc | 178 +++++++++++++----- tensorflow/lite/delegates/gpu/delegate.h | 2 +- tensorflow/lite/delegates/gpu/gpu_backend.cc | 46 ----- tensorflow/lite/delegates/gpu/gpu_backend.h | 55 ------ .../lite/delegates/gpu/gpu_backend_default.cc | 157 --------------- 6 files changed, 151 insertions(+), 374 deletions(-) delete mode 100644 tensorflow/lite/delegates/gpu/gpu_backend.cc delete mode 100644 tensorflow/lite/delegates/gpu/gpu_backend.h delete mode 100644 tensorflow/lite/delegates/gpu/gpu_backend_default.cc diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD index c319b215879..8778653b586 100644 --- a/tensorflow/lite/delegates/gpu/BUILD +++ b/tensorflow/lite/delegates/gpu/BUILD @@ -26,8 +26,6 @@ config_setting( }, ) -cc_library(name = "egl_glsl_headers") - cc_library( name = "gl_delegate", srcs = ["gl_delegate.cc"], @@ -44,7 +42,6 @@ cc_library( "//conditions:default": [], }), deps = [ - ":egl_glsl_headers", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/types:span", "//tensorflow/lite:kernel_api", @@ -233,65 +230,9 @@ selects.config_setting_group( ) cc_library( - name = "delegate_header", - hdrs = ["delegate.h"], - deps = [ - "//tensorflow/lite/c:common", - "//tensorflow/lite/delegates/gpu:api", - ], -) - -cc_library( - name = "delegate_base", + name = "delegate", srcs = ["delegate.cc"], hdrs = ["delegate.h"], - deps = [ - ":gpu_backend", - "//tensorflow/lite:kernel_api", - "//tensorflow/lite:minimal_logging", - "//tensorflow/lite/c:common", - "//tensorflow/lite/delegates/gpu:api", - "//tensorflow/lite/delegates/gpu/common:model", - "//tensorflow/lite/delegates/gpu/common:model_builder", - "//tensorflow/lite/delegates/gpu/common:model_transformer", - "//tensorflow/lite/delegates/gpu/common:quantization_util", - "//tensorflow/lite/delegates/gpu/common:status", - "//tensorflow/lite/delegates/gpu/gl:api2", - "//tensorflow/lite/kernels/internal:optimized_base", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/types:span", - ], -) - -cc_library( - name = "delegate", - hdrs = ["delegate.h"], - deps = [ - ":delegate_base", - ":gpu_backend_default", - "//tensorflow/lite/c:common", - ], -) - -cc_library( - name = "gpu_backend", - srcs = ["gpu_backend.cc"], - hdrs = ["gpu_backend.h"], - deps = [ - ":api", - ":delegate_header", - "//tensorflow/lite/delegates/gpu/common:model", - "//tensorflow/lite/delegates/gpu/common:shape", - "//tensorflow/lite/delegates/gpu/common:tensor", - "@com_google_absl//absl/types:any", - "@opencl_headers", - ], -) - -cc_library( - name = "gpu_backend_default", - srcs = ["gpu_backend_default.cc"], linkopts = select({ "//tensorflow:android": [ "-lEGL", @@ -303,15 +244,27 @@ cc_library( ], "//conditions:default": [], }), - deps = [ - ":gpu_backend", - "//tensorflow/lite:minimal_logging", - "//tensorflow/lite/delegates/gpu/cl:api", - ] + select({ + deps = select({ "//tensorflow/lite/delegates/gpu/cl:opencl_delegate_no_gl": [], "//conditions:default": [ - ":egl_glsl_headers", "//tensorflow/lite/delegates/gpu/gl:api2", ], - }), + }) + [ + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/types:span", + "//tensorflow/lite:kernel_api", + "//tensorflow/lite:minimal_logging", + "//tensorflow/lite/c:common", + "//tensorflow/lite/delegates/gpu:api", + "//tensorflow/lite/delegates/gpu/cl:api", + "//tensorflow/lite/delegates/gpu/cl:opencl_wrapper", + "//tensorflow/lite/delegates/gpu/cl:tensor_type_util", + "//tensorflow/lite/delegates/gpu/common:model", + "//tensorflow/lite/delegates/gpu/common:model_builder", + "//tensorflow/lite/delegates/gpu/common:model_transformer", + "//tensorflow/lite/delegates/gpu/common:quantization_util", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/kernels/internal:optimized_base", + ], ) diff --git a/tensorflow/lite/delegates/gpu/delegate.cc b/tensorflow/lite/delegates/gpu/delegate.cc index 897395fe08b..bfc2b7f08c4 100644 --- a/tensorflow/lite/delegates/gpu/delegate.cc +++ b/tensorflow/lite/delegates/gpu/delegate.cc @@ -25,12 +25,15 @@ limitations under the License. #include "absl/types/span.h" #include "tensorflow/lite/builtin_ops.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/delegates/gpu/api.h" +#include "tensorflow/lite/delegates/gpu/cl/api.h" +#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/model_builder.h" #include "tensorflow/lite/delegates/gpu/common/model_transformer.h" #include "tensorflow/lite/delegates/gpu/common/quantization_util.h" #include "tensorflow/lite/delegates/gpu/common/status.h" -#include "tensorflow/lite/delegates/gpu/gpu_backend.h" #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" #include "tensorflow/lite/minimal_logging.h" @@ -40,18 +43,39 @@ limitations under the License. namespace tflite { namespace gpu { - namespace { +InferencePriority ToPriority(int32_t priority) { + switch (priority) { + case TFLITE_GPU_INFERENCE_PRIORITY_AUTO: + return InferencePriority::AUTO; + case TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION: + return InferencePriority::MAX_PRECISION; + case TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY: + return InferencePriority::MIN_LATENCY; + case TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE: + return InferencePriority::MIN_MEMORY_USAGE; + } + return InferencePriority::UNKNOWN; +} + +InferenceUsage ToUsage(int32_t usage) { + switch (usage) { + case TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER: + return InferenceUsage::FAST_SINGLE_ANSWER; + case TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED: + return InferenceUsage::SUSTAINED_SPEED; + } + return InferenceUsage::UNKNOWN; +} + // Forward declarations. TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate); class Delegate { public: - explicit Delegate(GpuBackend* gpu_backend, - const TfLiteGpuDelegateOptionsV2* options) - : gpu_backend_(std::unique_ptr(gpu_backend)), - num_delegate_kernels_(0) { + explicit Delegate(const TfLiteGpuDelegateOptionsV2* options) + : num_delegate_kernels_(0) { options_ = options ? *options : TfLiteGpuDelegateOptionsV2Default(); if (options_.max_delegated_partitions <= 0) { options_.max_delegated_partitions = 1; @@ -70,17 +94,6 @@ class Delegate { } int num_delegate_kernels() const { return num_delegate_kernels_; } - absl::Status Prepare( - tflite::gpu::GraphFloat32* graph, - std::function initialize_graph, - std::unique_ptr* builder) { - return gpu_backend_->Prepare(options_, graph, initialize_graph, builder); - } - - bool enforce_same_thread() const { - return gpu_backend_->enforce_same_thread(); - } - private: TfLiteDelegate delegate_ = { .data_ = reinterpret_cast(this), @@ -91,8 +104,6 @@ class Delegate { .flags = kTfLiteDelegateFlagsNone, }; - std::unique_ptr gpu_backend_; - TfLiteGpuDelegateOptionsV2 options_; int num_delegate_kernels_ = 0; @@ -111,7 +122,7 @@ class DelegateKernel { const TfLiteDelegateParams* delegate_params) { thread_id_prepare_ = std::this_thread::get_id(); - // Extract TfLite delegate execution plan from the context and convert it + // Extract TFLite delegate execution plan from the context and convert it // into GraphFloat32. GraphFloat32 graph; std::vector input_refs; @@ -120,16 +131,31 @@ class DelegateKernel { &input_refs, &output_refs)); std::unique_ptr builder; - RETURN_IF_ERROR(delegate_->Prepare( - &graph, - [&](GraphFloat32* graph) -> absl::Status { - return InitializeGraph(context, delegate_params, graph, &input_refs, - &output_refs); - }, - &builder)); + bool graph_is_destroyed; + const int experimental_flags = delegate_->options().experimental_flags; + if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY) { + RETURN_IF_ERROR( + InitializeOpenClApi(&graph, &builder, &graph_is_destroyed)); + } else if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY) { + RETURN_IF_ERROR(InitializeOpenGlApi(&graph, &builder)); + } else { + // By default, we try CL first & fall back to GL if that fails. + absl::Status status = + InitializeOpenClApi(&graph, &builder, &graph_is_destroyed); + if (!status.ok()) { + TF_LITE_KERNEL_LOG(context, std::string(status.message()).c_str()); + TF_LITE_KERNEL_LOG(context, "Falling back to OpenGL"); - // See if the GPU backend want us to be always on the same thread. - enforce_same_thread_ = delegate_->enforce_same_thread(); + // Graph needs to be re-created because it is moved above. + GraphFloat32 graph2; + if (graph_is_destroyed) { + RETURN_IF_ERROR(InitializeGraph(context, delegate_params, &graph2, + &input_refs, &output_refs)); + } + RETURN_IF_ERROR(InitializeOpenGlApi( + graph_is_destroyed ? &graph2 : &graph, &builder)); + } + } // At this point tflite didn't allocate tensors yet, therefore, collect // indices and set all input and output tensors from tflite later. @@ -253,12 +279,71 @@ class DelegateKernel { for (const auto& output : outputs) { output_refs->push_back(output->tensor.ref); } + + return absl::OkStatus(); + } + + absl::Status InitializeOpenClApi(GraphFloat32* graph, + std::unique_ptr* builder, + bool* graph_is_destroyed) { + *graph_is_destroyed = false; + cl::InferenceEnvironmentOptions env_options; + cl::InferenceEnvironmentProperties properties; + RETURN_IF_ERROR(cl::NewInferenceEnvironment(env_options, &cl_environment_, + &properties)); + auto delegate_options = delegate_->options(); + cl::InferenceOptions options; + // If is_precision_loss_allowed == -1, then just use priorities instead + // of paying attention to is_precision_loss_allowed value. + if (delegate_options.is_precision_loss_allowed == -1) { + options.priority1 = ToPriority(delegate_options.inference_priority1); + options.priority2 = ToPriority(delegate_options.inference_priority2); + options.priority3 = ToPriority(delegate_options.inference_priority3); + } else { + // Users set is_precision_loss_allowed explicitly, thus use it explicitly. + if (delegate_options.is_precision_loss_allowed == 0) { + options.priority1 = InferencePriority::MAX_PRECISION; + } else { + options.priority1 = InferencePriority::MIN_LATENCY; + } + } + options.usage = ToUsage(delegate_options.inference_preference); + *graph_is_destroyed = true; + RETURN_IF_ERROR(cl_environment_->NewInferenceBuilder( + options, std::move(*graph), builder)); + TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, + "Initialized OpenCL-based API."); + return absl::OkStatus(); + } + + absl::Status InitializeOpenGlApi(GraphFloat32* graph, + std::unique_ptr* builder) { +#ifndef CL_DELEGATE_NO_GL + gl::InferenceEnvironmentOptions env_options; + gl::InferenceEnvironmentProperties properties; + RETURN_IF_ERROR( + NewInferenceEnvironment(env_options, &gl_environment_, &properties)); + auto delegate_options = delegate_->options(); + gl::InferenceOptions options; + options.usage = ToUsage(delegate_options.inference_preference); + options.priority1 = ToPriority(delegate_options.inference_priority1); + options.priority2 = ToPriority(delegate_options.inference_priority2); + options.priority3 = ToPriority(delegate_options.inference_priority3); + RETURN_IF_ERROR(gl_environment_->NewInferenceBuilder(std::move(*graph), + options, builder)); + enforce_same_thread_ = true; + TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, + "Initialized OpenGL-based API."); +#endif return absl::OkStatus(); } // The Delegate instance that's shared across all DelegateKernel instances. - Delegate* const delegate_; - + Delegate* const delegate_; // doesn't own the memory. + std::unique_ptr cl_environment_; +#ifndef CL_DELEGATE_NO_GL + std::unique_ptr gl_environment_; +#endif std::unique_ptr runner_; std::vector input_indices_; std::vector output_indices_; @@ -285,7 +370,7 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) { const auto* params = reinterpret_cast(buffer); auto* gpu_delegate = GetDelegate(params->delegate); - // Everything below should happen in prepare function call, but TfLite + // Everything below should happen in prepare function call, but TFLite // for whatever reason forbids that. auto gpu_delegate_kernel = absl::make_unique(gpu_delegate); @@ -351,25 +436,10 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) { } } // namespace - -TfLiteDelegate* TfLiteGpuDelegateCreateInternal( - GpuBackend* gpu_backend, const TfLiteGpuDelegateOptionsV2* options) { - auto* gpu_delegate = new tflite::gpu::Delegate(gpu_backend, options); - if (gpu_delegate) { - TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, - "Created TensorFlow Lite delegate for GPU."); - } - return gpu_delegate ? gpu_delegate->tflite_delegate() : nullptr; -} - -void TfLiteGpuDelegateDeleteInternal(TfLiteDelegate* delegate) { - delete tflite::gpu::GetDelegate(delegate); -} - } // namespace gpu } // namespace tflite -extern "C" TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() { +TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() { TfLiteGpuDelegateOptionsV2 options = { // set it to -1 to detect whether it was later adjusted. .is_precision_loss_allowed = -1, @@ -383,3 +453,15 @@ extern "C" TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() { }; return options; } + +TfLiteDelegate* TfLiteGpuDelegateV2Create( + const TfLiteGpuDelegateOptionsV2* options) { + auto* gpu_delegate = new tflite::gpu::Delegate(options); + TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, + "Created TensorFlow Lite delegate for GPU."); + return gpu_delegate ? gpu_delegate->tflite_delegate() : nullptr; +} + +void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate) { + delete tflite::gpu::GetDelegate(delegate); +} diff --git a/tensorflow/lite/delegates/gpu/delegate.h b/tensorflow/lite/delegates/gpu/delegate.h index 633d5a66296..9af586bfd75 100644 --- a/tensorflow/lite/delegates/gpu/delegate.h +++ b/tensorflow/lite/delegates/gpu/delegate.h @@ -111,7 +111,7 @@ typedef struct { TFL_CAPI_EXPORT TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default(); // Creates a new delegate instance that need to be destroyed with -// TfLiteGpuDelegateV2Delete when delegate is no longer used by TfLite. +// TfLiteGpuDelegateV2Delete when delegate is no longer used by TFLite. // // This delegate encapsulates multiple GPU-acceleration APIs under the hood to // make use of the fastest available on a device. diff --git a/tensorflow/lite/delegates/gpu/gpu_backend.cc b/tensorflow/lite/delegates/gpu/gpu_backend.cc deleted file mode 100644 index d1b5ad744d2..00000000000 --- a/tensorflow/lite/delegates/gpu/gpu_backend.cc +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/delegates/gpu/gpu_backend.h" - -namespace tflite { -namespace gpu { - -InferencePriority GpuBackend::ToPriority(int32_t priority) { - switch (priority) { - case TFLITE_GPU_INFERENCE_PRIORITY_AUTO: - return InferencePriority::AUTO; - case TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION: - return InferencePriority::MAX_PRECISION; - case TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY: - return InferencePriority::MIN_LATENCY; - case TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE: - return InferencePriority::MIN_MEMORY_USAGE; - } - return InferencePriority::UNKNOWN; -} - -InferenceUsage GpuBackend::ToUsage(int32_t usage) { - switch (usage) { - case TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER: - return InferenceUsage::FAST_SINGLE_ANSWER; - case TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED: - return InferenceUsage::SUSTAINED_SPEED; - } - return InferenceUsage::UNKNOWN; -} - -} // namespace gpu -} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/gpu_backend.h b/tensorflow/lite/delegates/gpu/gpu_backend.h deleted file mode 100644 index 7c6615ef447..00000000000 --- a/tensorflow/lite/delegates/gpu/gpu_backend.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_ -#define TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_ - -#include - -#include "tensorflow/lite/delegates/gpu/api.h" -#include "tensorflow/lite/delegates/gpu/common/model.h" -#include "tensorflow/lite/delegates/gpu/delegate.h" - -namespace tflite { -namespace gpu { - -class GpuBackend { - public: - virtual absl::Status Prepare( - const TfLiteGpuDelegateOptionsV2& delegate_options, GraphFloat32* graph, - std::function initialize_graph, - std::unique_ptr* builder) = 0; - - bool enforce_same_thread() const { return enforce_same_thread_; } - - virtual ~GpuBackend() = default; - - static InferencePriority ToPriority(int32_t priority); - static InferenceUsage ToUsage(int32_t usage); - - protected: - bool enforce_same_thread_ = false; -}; - -TfLiteDelegate* TfLiteGpuDelegateCreateInternal( - GpuBackend* backend, const TfLiteGpuDelegateOptionsV2* options); - -// Destroys a delegate created with `TfLiteGpuDelegateCreateInternal` call. -void TfLiteGpuDelegateDeleteInternal(TfLiteDelegate* delegate); - -} // namespace gpu -} // namespace tflite - -#endif // TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_ diff --git a/tensorflow/lite/delegates/gpu/gpu_backend_default.cc b/tensorflow/lite/delegates/gpu/gpu_backend_default.cc deleted file mode 100644 index 2925c3945ac..00000000000 --- a/tensorflow/lite/delegates/gpu/gpu_backend_default.cc +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/delegates/gpu/cl/api.h" -#ifndef CL_DELEGATE_NO_GL -#include "tensorflow/lite/delegates/gpu/gl/api2.h" -#endif -#include "tensorflow/lite/delegates/gpu/gpu_backend.h" -#include "tensorflow/lite/minimal_logging.h" - -namespace tflite { -namespace gpu { - -absl::Status InitializeOpenClApi( - GraphFloat32* graph, const TfLiteGpuDelegateOptionsV2& delegate_options, - std::unique_ptr* builder, - std::unique_ptr* inference_environment, - bool* graph_is_destroyed) { - if (graph_is_destroyed) { - *graph_is_destroyed = false; - } - cl::InferenceEnvironmentOptions env_options; - cl::InferenceEnvironmentProperties properties; - RETURN_IF_ERROR(cl::NewInferenceEnvironment( - env_options, inference_environment, &properties)); - cl::InferenceOptions options; - // If is_precision_loss_allowed == -1, then just use priorities instead - // of paying attention to is_precision_loss_allowed value. - if (delegate_options.is_precision_loss_allowed == -1) { - options.priority1 = - GpuBackend::ToPriority(delegate_options.inference_priority1); - options.priority2 = - GpuBackend::ToPriority(delegate_options.inference_priority2); - options.priority3 = - GpuBackend::ToPriority(delegate_options.inference_priority3); - } else { - // Users set is_precision_loss_allowed explicitly, thus use it explicitly. - if (delegate_options.is_precision_loss_allowed == 0) { - options.priority1 = InferencePriority::MAX_PRECISION; - } else { - options.priority1 = InferencePriority::MIN_LATENCY; - } - } - options.usage = GpuBackend::ToUsage(delegate_options.inference_preference); - if (graph_is_destroyed) { - *graph_is_destroyed = true; - } - RETURN_IF_ERROR( - (*inference_environment) - ->NewInferenceBuilder(options, std::move(*graph), builder)); - TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, - "Initialized OpenCL-based API."); - return absl::OkStatus(); -} - -#ifndef CL_DELEGATE_NO_GL -absl::Status InitializeOpenGlApi( - GraphFloat32* graph, const TfLiteGpuDelegateOptionsV2& delegate_options, - std::unique_ptr* builder, - std::unique_ptr* inference_environment) { - gl::InferenceEnvironmentOptions env_options; - gl::InferenceEnvironmentProperties properties; - RETURN_IF_ERROR( - NewInferenceEnvironment(env_options, inference_environment, &properties)); - gl::InferenceOptions options; - options.usage = GpuBackend::ToUsage(delegate_options.inference_preference); - options.priority1 = - GpuBackend::ToPriority(delegate_options.inference_priority1); - options.priority2 = - GpuBackend::ToPriority(delegate_options.inference_priority2); - options.priority3 = - GpuBackend::ToPriority(delegate_options.inference_priority3); - RETURN_IF_ERROR( - (*inference_environment) - ->NewInferenceBuilder(std::move(*graph), options, builder)); - TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, - "Initialized OpenGL-based API."); - return absl::OkStatus(); -} -#endif - -class GpuBackendDefault : public GpuBackend { - public: - GpuBackendDefault() {} - - absl::Status Prepare( - const TfLiteGpuDelegateOptionsV2& delegate_options, GraphFloat32* graph, - std::function initialize_graph, - std::unique_ptr* builder) override { -#ifdef CL_DELEGATE_NO_GL - return InitializeOpenClApi(graph, delegate_options, builder, - &cl_inference_environment_, nullptr); -#else - bool graph_is_destroyed; - const int experimental_flags = delegate_options.experimental_flags; - if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY) { - RETURN_IF_ERROR(InitializeOpenClApi(graph, delegate_options, builder, - &cl_inference_environment_, - &graph_is_destroyed)); - } else if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY) { - RETURN_IF_ERROR(InitializeOpenGlApi(graph, delegate_options, builder, - &gl_inference_environment_)); - } else { - // By default, we try CL first & fall back to GL if that fails. - absl::Status status = - InitializeOpenClApi(graph, delegate_options, builder, - &cl_inference_environment_, &graph_is_destroyed); - if (!status.ok()) { - TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, - std::string(status.message()).c_str()); - TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "Falling back to OpenGL"); - - // Graph needs to be re-created because it is moved above. - GraphFloat32 graph2; - if (graph_is_destroyed) { - RETURN_IF_ERROR(initialize_graph(&graph2)); - } - RETURN_IF_ERROR(InitializeOpenGlApi( - graph_is_destroyed ? &graph2 : graph, delegate_options, builder, - &gl_inference_environment_)); - } - } - return absl::OkStatus(); -#endif - } - - private: - std::unique_ptr cl_inference_environment_; -#ifndef CL_DELEGATE_NO_GL - std::unique_ptr gl_inference_environment_; -#endif -}; - -} // namespace gpu -} // namespace tflite - -extern "C" TfLiteDelegate* TfLiteGpuDelegateV2Create( - const TfLiteGpuDelegateOptionsV2* options) { - return tflite::gpu::TfLiteGpuDelegateCreateInternal( - new tflite::gpu::GpuBackendDefault(), options); -} - -extern "C" void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate) { - return tflite::gpu::TfLiteGpuDelegateDeleteInternal(delegate); -}