Add experimental Vulkan support to TFLite delegate.

Note that this is still WIP, and is not yet available at runtime
with the default TFLite GPU prebuilts.

PiperOrigin-RevId: 331029289
Change-Id: I31d1b2cfbe478ca0bf2026b451d20eb3ac86b387
This commit is contained in:
Fabio Riccardi 2020-09-10 15:24:40 -07:00 committed by TensorFlower Gardener
parent ffca41ef1b
commit 5874c1424d
6 changed files with 151 additions and 374 deletions

View File

@ -26,8 +26,6 @@ config_setting(
},
)
cc_library(name = "egl_glsl_headers")
cc_library(
name = "gl_delegate",
srcs = ["gl_delegate.cc"],
@ -44,7 +42,6 @@ cc_library(
"//conditions:default": [],
}),
deps = [
":egl_glsl_headers",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/types:span",
"//tensorflow/lite:kernel_api",
@ -233,65 +230,9 @@ selects.config_setting_group(
)
cc_library(
name = "delegate_header",
hdrs = ["delegate.h"],
deps = [
"//tensorflow/lite/c:common",
"//tensorflow/lite/delegates/gpu:api",
],
)
cc_library(
name = "delegate_base",
name = "delegate",
srcs = ["delegate.cc"],
hdrs = ["delegate.h"],
deps = [
":gpu_backend",
"//tensorflow/lite:kernel_api",
"//tensorflow/lite:minimal_logging",
"//tensorflow/lite/c:common",
"//tensorflow/lite/delegates/gpu:api",
"//tensorflow/lite/delegates/gpu/common:model",
"//tensorflow/lite/delegates/gpu/common:model_builder",
"//tensorflow/lite/delegates/gpu/common:model_transformer",
"//tensorflow/lite/delegates/gpu/common:quantization_util",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/gl:api2",
"//tensorflow/lite/kernels/internal:optimized_base",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/types:span",
],
)
cc_library(
name = "delegate",
hdrs = ["delegate.h"],
deps = [
":delegate_base",
":gpu_backend_default",
"//tensorflow/lite/c:common",
],
)
cc_library(
name = "gpu_backend",
srcs = ["gpu_backend.cc"],
hdrs = ["gpu_backend.h"],
deps = [
":api",
":delegate_header",
"//tensorflow/lite/delegates/gpu/common:model",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:tensor",
"@com_google_absl//absl/types:any",
"@opencl_headers",
],
)
cc_library(
name = "gpu_backend_default",
srcs = ["gpu_backend_default.cc"],
linkopts = select({
"//tensorflow:android": [
"-lEGL",
@ -303,15 +244,27 @@ cc_library(
],
"//conditions:default": [],
}),
deps = [
":gpu_backend",
"//tensorflow/lite:minimal_logging",
"//tensorflow/lite/delegates/gpu/cl:api",
] + select({
deps = select({
"//tensorflow/lite/delegates/gpu/cl:opencl_delegate_no_gl": [],
"//conditions:default": [
":egl_glsl_headers",
"//tensorflow/lite/delegates/gpu/gl:api2",
],
}),
}) + [
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/types:span",
"//tensorflow/lite:kernel_api",
"//tensorflow/lite:minimal_logging",
"//tensorflow/lite/c:common",
"//tensorflow/lite/delegates/gpu:api",
"//tensorflow/lite/delegates/gpu/cl:api",
"//tensorflow/lite/delegates/gpu/cl:opencl_wrapper",
"//tensorflow/lite/delegates/gpu/cl:tensor_type_util",
"//tensorflow/lite/delegates/gpu/common:model",
"//tensorflow/lite/delegates/gpu/common:model_builder",
"//tensorflow/lite/delegates/gpu/common:model_transformer",
"//tensorflow/lite/delegates/gpu/common:quantization_util",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/kernels/internal:optimized_base",
],
)

View File

@ -25,12 +25,15 @@ limitations under the License.
#include "absl/types/span.h"
#include "tensorflow/lite/builtin_ops.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/delegates/gpu/api.h"
#include "tensorflow/lite/delegates/gpu/cl/api.h"
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
#include "tensorflow/lite/delegates/gpu/common/model.h"
#include "tensorflow/lite/delegates/gpu/common/model_builder.h"
#include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
#include "tensorflow/lite/delegates/gpu/common/quantization_util.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/gpu_backend.h"
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
#include "tensorflow/lite/minimal_logging.h"
@ -40,18 +43,39 @@ limitations under the License.
namespace tflite {
namespace gpu {
namespace {
InferencePriority ToPriority(int32_t priority) {
switch (priority) {
case TFLITE_GPU_INFERENCE_PRIORITY_AUTO:
return InferencePriority::AUTO;
case TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
return InferencePriority::MAX_PRECISION;
case TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
return InferencePriority::MIN_LATENCY;
case TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE:
return InferencePriority::MIN_MEMORY_USAGE;
}
return InferencePriority::UNKNOWN;
}
InferenceUsage ToUsage(int32_t usage) {
switch (usage) {
case TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER:
return InferenceUsage::FAST_SINGLE_ANSWER;
case TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED:
return InferenceUsage::SUSTAINED_SPEED;
}
return InferenceUsage::UNKNOWN;
}
// Forward declarations.
TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate);
class Delegate {
public:
explicit Delegate(GpuBackend* gpu_backend,
const TfLiteGpuDelegateOptionsV2* options)
: gpu_backend_(std::unique_ptr<GpuBackend>(gpu_backend)),
num_delegate_kernels_(0) {
explicit Delegate(const TfLiteGpuDelegateOptionsV2* options)
: num_delegate_kernels_(0) {
options_ = options ? *options : TfLiteGpuDelegateOptionsV2Default();
if (options_.max_delegated_partitions <= 0) {
options_.max_delegated_partitions = 1;
@ -70,17 +94,6 @@ class Delegate {
}
int num_delegate_kernels() const { return num_delegate_kernels_; }
absl::Status Prepare(
tflite::gpu::GraphFloat32* graph,
std::function<absl::Status(GraphFloat32* graph)> initialize_graph,
std::unique_ptr<tflite::gpu::InferenceBuilder>* builder) {
return gpu_backend_->Prepare(options_, graph, initialize_graph, builder);
}
bool enforce_same_thread() const {
return gpu_backend_->enforce_same_thread();
}
private:
TfLiteDelegate delegate_ = {
.data_ = reinterpret_cast<void*>(this),
@ -91,8 +104,6 @@ class Delegate {
.flags = kTfLiteDelegateFlagsNone,
};
std::unique_ptr<GpuBackend> gpu_backend_;
TfLiteGpuDelegateOptionsV2 options_;
int num_delegate_kernels_ = 0;
@ -111,7 +122,7 @@ class DelegateKernel {
const TfLiteDelegateParams* delegate_params) {
thread_id_prepare_ = std::this_thread::get_id();
// Extract TfLite delegate execution plan from the context and convert it
// Extract TFLite delegate execution plan from the context and convert it
// into GraphFloat32.
GraphFloat32 graph;
std::vector<uint32_t> input_refs;
@ -120,16 +131,31 @@ class DelegateKernel {
&input_refs, &output_refs));
std::unique_ptr<InferenceBuilder> builder;
RETURN_IF_ERROR(delegate_->Prepare(
&graph,
[&](GraphFloat32* graph) -> absl::Status {
return InitializeGraph(context, delegate_params, graph, &input_refs,
&output_refs);
},
&builder));
bool graph_is_destroyed;
const int experimental_flags = delegate_->options().experimental_flags;
if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY) {
RETURN_IF_ERROR(
InitializeOpenClApi(&graph, &builder, &graph_is_destroyed));
} else if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY) {
RETURN_IF_ERROR(InitializeOpenGlApi(&graph, &builder));
} else {
// By default, we try CL first & fall back to GL if that fails.
absl::Status status =
InitializeOpenClApi(&graph, &builder, &graph_is_destroyed);
if (!status.ok()) {
TF_LITE_KERNEL_LOG(context, std::string(status.message()).c_str());
TF_LITE_KERNEL_LOG(context, "Falling back to OpenGL");
// See if the GPU backend want us to be always on the same thread.
enforce_same_thread_ = delegate_->enforce_same_thread();
// Graph needs to be re-created because it is moved above.
GraphFloat32 graph2;
if (graph_is_destroyed) {
RETURN_IF_ERROR(InitializeGraph(context, delegate_params, &graph2,
&input_refs, &output_refs));
}
RETURN_IF_ERROR(InitializeOpenGlApi(
graph_is_destroyed ? &graph2 : &graph, &builder));
}
}
// At this point tflite didn't allocate tensors yet, therefore, collect
// indices and set all input and output tensors from tflite later.
@ -253,12 +279,71 @@ class DelegateKernel {
for (const auto& output : outputs) {
output_refs->push_back(output->tensor.ref);
}
return absl::OkStatus();
}
absl::Status InitializeOpenClApi(GraphFloat32* graph,
std::unique_ptr<InferenceBuilder>* builder,
bool* graph_is_destroyed) {
*graph_is_destroyed = false;
cl::InferenceEnvironmentOptions env_options;
cl::InferenceEnvironmentProperties properties;
RETURN_IF_ERROR(cl::NewInferenceEnvironment(env_options, &cl_environment_,
&properties));
auto delegate_options = delegate_->options();
cl::InferenceOptions options;
// If is_precision_loss_allowed == -1, then just use priorities instead
// of paying attention to is_precision_loss_allowed value.
if (delegate_options.is_precision_loss_allowed == -1) {
options.priority1 = ToPriority(delegate_options.inference_priority1);
options.priority2 = ToPriority(delegate_options.inference_priority2);
options.priority3 = ToPriority(delegate_options.inference_priority3);
} else {
// Users set is_precision_loss_allowed explicitly, thus use it explicitly.
if (delegate_options.is_precision_loss_allowed == 0) {
options.priority1 = InferencePriority::MAX_PRECISION;
} else {
options.priority1 = InferencePriority::MIN_LATENCY;
}
}
options.usage = ToUsage(delegate_options.inference_preference);
*graph_is_destroyed = true;
RETURN_IF_ERROR(cl_environment_->NewInferenceBuilder(
options, std::move(*graph), builder));
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
"Initialized OpenCL-based API.");
return absl::OkStatus();
}
absl::Status InitializeOpenGlApi(GraphFloat32* graph,
std::unique_ptr<InferenceBuilder>* builder) {
#ifndef CL_DELEGATE_NO_GL
gl::InferenceEnvironmentOptions env_options;
gl::InferenceEnvironmentProperties properties;
RETURN_IF_ERROR(
NewInferenceEnvironment(env_options, &gl_environment_, &properties));
auto delegate_options = delegate_->options();
gl::InferenceOptions options;
options.usage = ToUsage(delegate_options.inference_preference);
options.priority1 = ToPriority(delegate_options.inference_priority1);
options.priority2 = ToPriority(delegate_options.inference_priority2);
options.priority3 = ToPriority(delegate_options.inference_priority3);
RETURN_IF_ERROR(gl_environment_->NewInferenceBuilder(std::move(*graph),
options, builder));
enforce_same_thread_ = true;
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
"Initialized OpenGL-based API.");
#endif
return absl::OkStatus();
}
// The Delegate instance that's shared across all DelegateKernel instances.
Delegate* const delegate_;
Delegate* const delegate_; // doesn't own the memory.
std::unique_ptr<cl::InferenceEnvironment> cl_environment_;
#ifndef CL_DELEGATE_NO_GL
std::unique_ptr<gl::InferenceEnvironment> gl_environment_;
#endif
std::unique_ptr<InferenceRunner> runner_;
std::vector<int64_t> input_indices_;
std::vector<int64_t> output_indices_;
@ -285,7 +370,7 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
const auto* params =
reinterpret_cast<const TfLiteDelegateParams*>(buffer);
auto* gpu_delegate = GetDelegate(params->delegate);
// Everything below should happen in prepare function call, but TfLite
// Everything below should happen in prepare function call, but TFLite
// for whatever reason forbids that.
auto gpu_delegate_kernel =
absl::make_unique<DelegateKernel>(gpu_delegate);
@ -351,25 +436,10 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
}
} // namespace
TfLiteDelegate* TfLiteGpuDelegateCreateInternal(
GpuBackend* gpu_backend, const TfLiteGpuDelegateOptionsV2* options) {
auto* gpu_delegate = new tflite::gpu::Delegate(gpu_backend, options);
if (gpu_delegate) {
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
"Created TensorFlow Lite delegate for GPU.");
}
return gpu_delegate ? gpu_delegate->tflite_delegate() : nullptr;
}
void TfLiteGpuDelegateDeleteInternal(TfLiteDelegate* delegate) {
delete tflite::gpu::GetDelegate(delegate);
}
} // namespace gpu
} // namespace tflite
extern "C" TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() {
TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() {
TfLiteGpuDelegateOptionsV2 options = {
// set it to -1 to detect whether it was later adjusted.
.is_precision_loss_allowed = -1,
@ -383,3 +453,15 @@ extern "C" TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() {
};
return options;
}
TfLiteDelegate* TfLiteGpuDelegateV2Create(
const TfLiteGpuDelegateOptionsV2* options) {
auto* gpu_delegate = new tflite::gpu::Delegate(options);
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
"Created TensorFlow Lite delegate for GPU.");
return gpu_delegate ? gpu_delegate->tflite_delegate() : nullptr;
}
void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate) {
delete tflite::gpu::GetDelegate(delegate);
}

View File

@ -111,7 +111,7 @@ typedef struct {
TFL_CAPI_EXPORT TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default();
// Creates a new delegate instance that need to be destroyed with
// TfLiteGpuDelegateV2Delete when delegate is no longer used by TfLite.
// TfLiteGpuDelegateV2Delete when delegate is no longer used by TFLite.
//
// This delegate encapsulates multiple GPU-acceleration APIs under the hood to
// make use of the fastest available on a device.

View File

@ -1,46 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/gpu/gpu_backend.h"
namespace tflite {
namespace gpu {
InferencePriority GpuBackend::ToPriority(int32_t priority) {
switch (priority) {
case TFLITE_GPU_INFERENCE_PRIORITY_AUTO:
return InferencePriority::AUTO;
case TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
return InferencePriority::MAX_PRECISION;
case TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
return InferencePriority::MIN_LATENCY;
case TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE:
return InferencePriority::MIN_MEMORY_USAGE;
}
return InferencePriority::UNKNOWN;
}
InferenceUsage GpuBackend::ToUsage(int32_t usage) {
switch (usage) {
case TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER:
return InferenceUsage::FAST_SINGLE_ANSWER;
case TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED:
return InferenceUsage::SUSTAINED_SPEED;
}
return InferenceUsage::UNKNOWN;
}
} // namespace gpu
} // namespace tflite

View File

@ -1,55 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_
#include <functional>
#include "tensorflow/lite/delegates/gpu/api.h"
#include "tensorflow/lite/delegates/gpu/common/model.h"
#include "tensorflow/lite/delegates/gpu/delegate.h"
namespace tflite {
namespace gpu {
class GpuBackend {
public:
virtual absl::Status Prepare(
const TfLiteGpuDelegateOptionsV2& delegate_options, GraphFloat32* graph,
std::function<absl::Status(GraphFloat32* graph)> initialize_graph,
std::unique_ptr<InferenceBuilder>* builder) = 0;
bool enforce_same_thread() const { return enforce_same_thread_; }
virtual ~GpuBackend() = default;
static InferencePriority ToPriority(int32_t priority);
static InferenceUsage ToUsage(int32_t usage);
protected:
bool enforce_same_thread_ = false;
};
TfLiteDelegate* TfLiteGpuDelegateCreateInternal(
GpuBackend* backend, const TfLiteGpuDelegateOptionsV2* options);
// Destroys a delegate created with `TfLiteGpuDelegateCreateInternal` call.
void TfLiteGpuDelegateDeleteInternal(TfLiteDelegate* delegate);
} // namespace gpu
} // namespace tflite
#endif // TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_

View File

@ -1,157 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/gpu/cl/api.h"
#ifndef CL_DELEGATE_NO_GL
#include "tensorflow/lite/delegates/gpu/gl/api2.h"
#endif
#include "tensorflow/lite/delegates/gpu/gpu_backend.h"
#include "tensorflow/lite/minimal_logging.h"
namespace tflite {
namespace gpu {
absl::Status InitializeOpenClApi(
GraphFloat32* graph, const TfLiteGpuDelegateOptionsV2& delegate_options,
std::unique_ptr<InferenceBuilder>* builder,
std::unique_ptr<cl::InferenceEnvironment>* inference_environment,
bool* graph_is_destroyed) {
if (graph_is_destroyed) {
*graph_is_destroyed = false;
}
cl::InferenceEnvironmentOptions env_options;
cl::InferenceEnvironmentProperties properties;
RETURN_IF_ERROR(cl::NewInferenceEnvironment(
env_options, inference_environment, &properties));
cl::InferenceOptions options;
// If is_precision_loss_allowed == -1, then just use priorities instead
// of paying attention to is_precision_loss_allowed value.
if (delegate_options.is_precision_loss_allowed == -1) {
options.priority1 =
GpuBackend::ToPriority(delegate_options.inference_priority1);
options.priority2 =
GpuBackend::ToPriority(delegate_options.inference_priority2);
options.priority3 =
GpuBackend::ToPriority(delegate_options.inference_priority3);
} else {
// Users set is_precision_loss_allowed explicitly, thus use it explicitly.
if (delegate_options.is_precision_loss_allowed == 0) {
options.priority1 = InferencePriority::MAX_PRECISION;
} else {
options.priority1 = InferencePriority::MIN_LATENCY;
}
}
options.usage = GpuBackend::ToUsage(delegate_options.inference_preference);
if (graph_is_destroyed) {
*graph_is_destroyed = true;
}
RETURN_IF_ERROR(
(*inference_environment)
->NewInferenceBuilder(options, std::move(*graph), builder));
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
"Initialized OpenCL-based API.");
return absl::OkStatus();
}
#ifndef CL_DELEGATE_NO_GL
absl::Status InitializeOpenGlApi(
GraphFloat32* graph, const TfLiteGpuDelegateOptionsV2& delegate_options,
std::unique_ptr<InferenceBuilder>* builder,
std::unique_ptr<gl::InferenceEnvironment>* inference_environment) {
gl::InferenceEnvironmentOptions env_options;
gl::InferenceEnvironmentProperties properties;
RETURN_IF_ERROR(
NewInferenceEnvironment(env_options, inference_environment, &properties));
gl::InferenceOptions options;
options.usage = GpuBackend::ToUsage(delegate_options.inference_preference);
options.priority1 =
GpuBackend::ToPriority(delegate_options.inference_priority1);
options.priority2 =
GpuBackend::ToPriority(delegate_options.inference_priority2);
options.priority3 =
GpuBackend::ToPriority(delegate_options.inference_priority3);
RETURN_IF_ERROR(
(*inference_environment)
->NewInferenceBuilder(std::move(*graph), options, builder));
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
"Initialized OpenGL-based API.");
return absl::OkStatus();
}
#endif
class GpuBackendDefault : public GpuBackend {
public:
GpuBackendDefault() {}
absl::Status Prepare(
const TfLiteGpuDelegateOptionsV2& delegate_options, GraphFloat32* graph,
std::function<absl::Status(GraphFloat32* graph)> initialize_graph,
std::unique_ptr<InferenceBuilder>* builder) override {
#ifdef CL_DELEGATE_NO_GL
return InitializeOpenClApi(graph, delegate_options, builder,
&cl_inference_environment_, nullptr);
#else
bool graph_is_destroyed;
const int experimental_flags = delegate_options.experimental_flags;
if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY) {
RETURN_IF_ERROR(InitializeOpenClApi(graph, delegate_options, builder,
&cl_inference_environment_,
&graph_is_destroyed));
} else if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY) {
RETURN_IF_ERROR(InitializeOpenGlApi(graph, delegate_options, builder,
&gl_inference_environment_));
} else {
// By default, we try CL first & fall back to GL if that fails.
absl::Status status =
InitializeOpenClApi(graph, delegate_options, builder,
&cl_inference_environment_, &graph_is_destroyed);
if (!status.ok()) {
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
std::string(status.message()).c_str());
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "Falling back to OpenGL");
// Graph needs to be re-created because it is moved above.
GraphFloat32 graph2;
if (graph_is_destroyed) {
RETURN_IF_ERROR(initialize_graph(&graph2));
}
RETURN_IF_ERROR(InitializeOpenGlApi(
graph_is_destroyed ? &graph2 : graph, delegate_options, builder,
&gl_inference_environment_));
}
}
return absl::OkStatus();
#endif
}
private:
std::unique_ptr<cl::InferenceEnvironment> cl_inference_environment_;
#ifndef CL_DELEGATE_NO_GL
std::unique_ptr<gl::InferenceEnvironment> gl_inference_environment_;
#endif
};
} // namespace gpu
} // namespace tflite
extern "C" TfLiteDelegate* TfLiteGpuDelegateV2Create(
const TfLiteGpuDelegateOptionsV2* options) {
return tflite::gpu::TfLiteGpuDelegateCreateInternal(
new tflite::gpu::GpuBackendDefault(), options);
}
extern "C" void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate) {
return tflite::gpu::TfLiteGpuDelegateDeleteInternal(delegate);
}