Add experimental Vulkan support to TFLite delegate.
Note that this is still WIP, and is not yet available at runtime with the default TFLite GPU prebuilts. PiperOrigin-RevId: 331029289 Change-Id: I31d1b2cfbe478ca0bf2026b451d20eb3ac86b387
This commit is contained in:
parent
ffca41ef1b
commit
5874c1424d
tensorflow/lite/delegates/gpu
@ -26,8 +26,6 @@ config_setting(
|
||||
},
|
||||
)
|
||||
|
||||
cc_library(name = "egl_glsl_headers")
|
||||
|
||||
cc_library(
|
||||
name = "gl_delegate",
|
||||
srcs = ["gl_delegate.cc"],
|
||||
@ -44,7 +42,6 @@ cc_library(
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
deps = [
|
||||
":egl_glsl_headers",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/types:span",
|
||||
"//tensorflow/lite:kernel_api",
|
||||
@ -233,65 +230,9 @@ selects.config_setting_group(
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "delegate_header",
|
||||
hdrs = ["delegate.h"],
|
||||
deps = [
|
||||
"//tensorflow/lite/c:common",
|
||||
"//tensorflow/lite/delegates/gpu:api",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "delegate_base",
|
||||
name = "delegate",
|
||||
srcs = ["delegate.cc"],
|
||||
hdrs = ["delegate.h"],
|
||||
deps = [
|
||||
":gpu_backend",
|
||||
"//tensorflow/lite:kernel_api",
|
||||
"//tensorflow/lite:minimal_logging",
|
||||
"//tensorflow/lite/c:common",
|
||||
"//tensorflow/lite/delegates/gpu:api",
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:model_builder",
|
||||
"//tensorflow/lite/delegates/gpu/common:model_transformer",
|
||||
"//tensorflow/lite/delegates/gpu/common:quantization_util",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/gl:api2",
|
||||
"//tensorflow/lite/kernels/internal:optimized_base",
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/types:span",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "delegate",
|
||||
hdrs = ["delegate.h"],
|
||||
deps = [
|
||||
":delegate_base",
|
||||
":gpu_backend_default",
|
||||
"//tensorflow/lite/c:common",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "gpu_backend",
|
||||
srcs = ["gpu_backend.cc"],
|
||||
hdrs = ["gpu_backend.h"],
|
||||
deps = [
|
||||
":api",
|
||||
":delegate_header",
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:tensor",
|
||||
"@com_google_absl//absl/types:any",
|
||||
"@opencl_headers",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "gpu_backend_default",
|
||||
srcs = ["gpu_backend_default.cc"],
|
||||
linkopts = select({
|
||||
"//tensorflow:android": [
|
||||
"-lEGL",
|
||||
@ -303,15 +244,27 @@ cc_library(
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
deps = [
|
||||
":gpu_backend",
|
||||
"//tensorflow/lite:minimal_logging",
|
||||
"//tensorflow/lite/delegates/gpu/cl:api",
|
||||
] + select({
|
||||
deps = select({
|
||||
"//tensorflow/lite/delegates/gpu/cl:opencl_delegate_no_gl": [],
|
||||
"//conditions:default": [
|
||||
":egl_glsl_headers",
|
||||
"//tensorflow/lite/delegates/gpu/gl:api2",
|
||||
],
|
||||
}),
|
||||
}) + [
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/types:span",
|
||||
"//tensorflow/lite:kernel_api",
|
||||
"//tensorflow/lite:minimal_logging",
|
||||
"//tensorflow/lite/c:common",
|
||||
"//tensorflow/lite/delegates/gpu:api",
|
||||
"//tensorflow/lite/delegates/gpu/cl:api",
|
||||
"//tensorflow/lite/delegates/gpu/cl:opencl_wrapper",
|
||||
"//tensorflow/lite/delegates/gpu/cl:tensor_type_util",
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:model_builder",
|
||||
"//tensorflow/lite/delegates/gpu/common:model_transformer",
|
||||
"//tensorflow/lite/delegates/gpu/common:quantization_util",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/kernels/internal:optimized_base",
|
||||
],
|
||||
)
|
||||
|
@ -25,12 +25,15 @@ limitations under the License.
|
||||
#include "absl/types/span.h"
|
||||
#include "tensorflow/lite/builtin_ops.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/delegates/gpu/api.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/api.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/model_builder.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/quantization_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gpu_backend.h"
|
||||
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
|
||||
#include "tensorflow/lite/minimal_logging.h"
|
||||
|
||||
@ -40,18 +43,39 @@ limitations under the License.
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
|
||||
namespace {
|
||||
|
||||
InferencePriority ToPriority(int32_t priority) {
|
||||
switch (priority) {
|
||||
case TFLITE_GPU_INFERENCE_PRIORITY_AUTO:
|
||||
return InferencePriority::AUTO;
|
||||
case TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
|
||||
return InferencePriority::MAX_PRECISION;
|
||||
case TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
|
||||
return InferencePriority::MIN_LATENCY;
|
||||
case TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE:
|
||||
return InferencePriority::MIN_MEMORY_USAGE;
|
||||
}
|
||||
return InferencePriority::UNKNOWN;
|
||||
}
|
||||
|
||||
InferenceUsage ToUsage(int32_t usage) {
|
||||
switch (usage) {
|
||||
case TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER:
|
||||
return InferenceUsage::FAST_SINGLE_ANSWER;
|
||||
case TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED:
|
||||
return InferenceUsage::SUSTAINED_SPEED;
|
||||
}
|
||||
return InferenceUsage::UNKNOWN;
|
||||
}
|
||||
|
||||
// Forward declarations.
|
||||
TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate);
|
||||
|
||||
class Delegate {
|
||||
public:
|
||||
explicit Delegate(GpuBackend* gpu_backend,
|
||||
const TfLiteGpuDelegateOptionsV2* options)
|
||||
: gpu_backend_(std::unique_ptr<GpuBackend>(gpu_backend)),
|
||||
num_delegate_kernels_(0) {
|
||||
explicit Delegate(const TfLiteGpuDelegateOptionsV2* options)
|
||||
: num_delegate_kernels_(0) {
|
||||
options_ = options ? *options : TfLiteGpuDelegateOptionsV2Default();
|
||||
if (options_.max_delegated_partitions <= 0) {
|
||||
options_.max_delegated_partitions = 1;
|
||||
@ -70,17 +94,6 @@ class Delegate {
|
||||
}
|
||||
int num_delegate_kernels() const { return num_delegate_kernels_; }
|
||||
|
||||
absl::Status Prepare(
|
||||
tflite::gpu::GraphFloat32* graph,
|
||||
std::function<absl::Status(GraphFloat32* graph)> initialize_graph,
|
||||
std::unique_ptr<tflite::gpu::InferenceBuilder>* builder) {
|
||||
return gpu_backend_->Prepare(options_, graph, initialize_graph, builder);
|
||||
}
|
||||
|
||||
bool enforce_same_thread() const {
|
||||
return gpu_backend_->enforce_same_thread();
|
||||
}
|
||||
|
||||
private:
|
||||
TfLiteDelegate delegate_ = {
|
||||
.data_ = reinterpret_cast<void*>(this),
|
||||
@ -91,8 +104,6 @@ class Delegate {
|
||||
.flags = kTfLiteDelegateFlagsNone,
|
||||
};
|
||||
|
||||
std::unique_ptr<GpuBackend> gpu_backend_;
|
||||
|
||||
TfLiteGpuDelegateOptionsV2 options_;
|
||||
int num_delegate_kernels_ = 0;
|
||||
|
||||
@ -111,7 +122,7 @@ class DelegateKernel {
|
||||
const TfLiteDelegateParams* delegate_params) {
|
||||
thread_id_prepare_ = std::this_thread::get_id();
|
||||
|
||||
// Extract TfLite delegate execution plan from the context and convert it
|
||||
// Extract TFLite delegate execution plan from the context and convert it
|
||||
// into GraphFloat32.
|
||||
GraphFloat32 graph;
|
||||
std::vector<uint32_t> input_refs;
|
||||
@ -120,16 +131,31 @@ class DelegateKernel {
|
||||
&input_refs, &output_refs));
|
||||
|
||||
std::unique_ptr<InferenceBuilder> builder;
|
||||
RETURN_IF_ERROR(delegate_->Prepare(
|
||||
&graph,
|
||||
[&](GraphFloat32* graph) -> absl::Status {
|
||||
return InitializeGraph(context, delegate_params, graph, &input_refs,
|
||||
&output_refs);
|
||||
},
|
||||
&builder));
|
||||
bool graph_is_destroyed;
|
||||
const int experimental_flags = delegate_->options().experimental_flags;
|
||||
if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY) {
|
||||
RETURN_IF_ERROR(
|
||||
InitializeOpenClApi(&graph, &builder, &graph_is_destroyed));
|
||||
} else if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY) {
|
||||
RETURN_IF_ERROR(InitializeOpenGlApi(&graph, &builder));
|
||||
} else {
|
||||
// By default, we try CL first & fall back to GL if that fails.
|
||||
absl::Status status =
|
||||
InitializeOpenClApi(&graph, &builder, &graph_is_destroyed);
|
||||
if (!status.ok()) {
|
||||
TF_LITE_KERNEL_LOG(context, std::string(status.message()).c_str());
|
||||
TF_LITE_KERNEL_LOG(context, "Falling back to OpenGL");
|
||||
|
||||
// See if the GPU backend want us to be always on the same thread.
|
||||
enforce_same_thread_ = delegate_->enforce_same_thread();
|
||||
// Graph needs to be re-created because it is moved above.
|
||||
GraphFloat32 graph2;
|
||||
if (graph_is_destroyed) {
|
||||
RETURN_IF_ERROR(InitializeGraph(context, delegate_params, &graph2,
|
||||
&input_refs, &output_refs));
|
||||
}
|
||||
RETURN_IF_ERROR(InitializeOpenGlApi(
|
||||
graph_is_destroyed ? &graph2 : &graph, &builder));
|
||||
}
|
||||
}
|
||||
|
||||
// At this point tflite didn't allocate tensors yet, therefore, collect
|
||||
// indices and set all input and output tensors from tflite later.
|
||||
@ -253,12 +279,71 @@ class DelegateKernel {
|
||||
for (const auto& output : outputs) {
|
||||
output_refs->push_back(output->tensor.ref);
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status InitializeOpenClApi(GraphFloat32* graph,
|
||||
std::unique_ptr<InferenceBuilder>* builder,
|
||||
bool* graph_is_destroyed) {
|
||||
*graph_is_destroyed = false;
|
||||
cl::InferenceEnvironmentOptions env_options;
|
||||
cl::InferenceEnvironmentProperties properties;
|
||||
RETURN_IF_ERROR(cl::NewInferenceEnvironment(env_options, &cl_environment_,
|
||||
&properties));
|
||||
auto delegate_options = delegate_->options();
|
||||
cl::InferenceOptions options;
|
||||
// If is_precision_loss_allowed == -1, then just use priorities instead
|
||||
// of paying attention to is_precision_loss_allowed value.
|
||||
if (delegate_options.is_precision_loss_allowed == -1) {
|
||||
options.priority1 = ToPriority(delegate_options.inference_priority1);
|
||||
options.priority2 = ToPriority(delegate_options.inference_priority2);
|
||||
options.priority3 = ToPriority(delegate_options.inference_priority3);
|
||||
} else {
|
||||
// Users set is_precision_loss_allowed explicitly, thus use it explicitly.
|
||||
if (delegate_options.is_precision_loss_allowed == 0) {
|
||||
options.priority1 = InferencePriority::MAX_PRECISION;
|
||||
} else {
|
||||
options.priority1 = InferencePriority::MIN_LATENCY;
|
||||
}
|
||||
}
|
||||
options.usage = ToUsage(delegate_options.inference_preference);
|
||||
*graph_is_destroyed = true;
|
||||
RETURN_IF_ERROR(cl_environment_->NewInferenceBuilder(
|
||||
options, std::move(*graph), builder));
|
||||
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
||||
"Initialized OpenCL-based API.");
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status InitializeOpenGlApi(GraphFloat32* graph,
|
||||
std::unique_ptr<InferenceBuilder>* builder) {
|
||||
#ifndef CL_DELEGATE_NO_GL
|
||||
gl::InferenceEnvironmentOptions env_options;
|
||||
gl::InferenceEnvironmentProperties properties;
|
||||
RETURN_IF_ERROR(
|
||||
NewInferenceEnvironment(env_options, &gl_environment_, &properties));
|
||||
auto delegate_options = delegate_->options();
|
||||
gl::InferenceOptions options;
|
||||
options.usage = ToUsage(delegate_options.inference_preference);
|
||||
options.priority1 = ToPriority(delegate_options.inference_priority1);
|
||||
options.priority2 = ToPriority(delegate_options.inference_priority2);
|
||||
options.priority3 = ToPriority(delegate_options.inference_priority3);
|
||||
RETURN_IF_ERROR(gl_environment_->NewInferenceBuilder(std::move(*graph),
|
||||
options, builder));
|
||||
enforce_same_thread_ = true;
|
||||
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
||||
"Initialized OpenGL-based API.");
|
||||
#endif
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
// The Delegate instance that's shared across all DelegateKernel instances.
|
||||
Delegate* const delegate_;
|
||||
|
||||
Delegate* const delegate_; // doesn't own the memory.
|
||||
std::unique_ptr<cl::InferenceEnvironment> cl_environment_;
|
||||
#ifndef CL_DELEGATE_NO_GL
|
||||
std::unique_ptr<gl::InferenceEnvironment> gl_environment_;
|
||||
#endif
|
||||
std::unique_ptr<InferenceRunner> runner_;
|
||||
std::vector<int64_t> input_indices_;
|
||||
std::vector<int64_t> output_indices_;
|
||||
@ -285,7 +370,7 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
|
||||
const auto* params =
|
||||
reinterpret_cast<const TfLiteDelegateParams*>(buffer);
|
||||
auto* gpu_delegate = GetDelegate(params->delegate);
|
||||
// Everything below should happen in prepare function call, but TfLite
|
||||
// Everything below should happen in prepare function call, but TFLite
|
||||
// for whatever reason forbids that.
|
||||
auto gpu_delegate_kernel =
|
||||
absl::make_unique<DelegateKernel>(gpu_delegate);
|
||||
@ -351,25 +436,10 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteDelegate* TfLiteGpuDelegateCreateInternal(
|
||||
GpuBackend* gpu_backend, const TfLiteGpuDelegateOptionsV2* options) {
|
||||
auto* gpu_delegate = new tflite::gpu::Delegate(gpu_backend, options);
|
||||
if (gpu_delegate) {
|
||||
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
||||
"Created TensorFlow Lite delegate for GPU.");
|
||||
}
|
||||
return gpu_delegate ? gpu_delegate->tflite_delegate() : nullptr;
|
||||
}
|
||||
|
||||
void TfLiteGpuDelegateDeleteInternal(TfLiteDelegate* delegate) {
|
||||
delete tflite::gpu::GetDelegate(delegate);
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
extern "C" TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() {
|
||||
TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() {
|
||||
TfLiteGpuDelegateOptionsV2 options = {
|
||||
// set it to -1 to detect whether it was later adjusted.
|
||||
.is_precision_loss_allowed = -1,
|
||||
@ -383,3 +453,15 @@ extern "C" TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default() {
|
||||
};
|
||||
return options;
|
||||
}
|
||||
|
||||
TfLiteDelegate* TfLiteGpuDelegateV2Create(
|
||||
const TfLiteGpuDelegateOptionsV2* options) {
|
||||
auto* gpu_delegate = new tflite::gpu::Delegate(options);
|
||||
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
||||
"Created TensorFlow Lite delegate for GPU.");
|
||||
return gpu_delegate ? gpu_delegate->tflite_delegate() : nullptr;
|
||||
}
|
||||
|
||||
void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate) {
|
||||
delete tflite::gpu::GetDelegate(delegate);
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ typedef struct {
|
||||
TFL_CAPI_EXPORT TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default();
|
||||
|
||||
// Creates a new delegate instance that need to be destroyed with
|
||||
// TfLiteGpuDelegateV2Delete when delegate is no longer used by TfLite.
|
||||
// TfLiteGpuDelegateV2Delete when delegate is no longer used by TFLite.
|
||||
//
|
||||
// This delegate encapsulates multiple GPU-acceleration APIs under the hood to
|
||||
// make use of the fastest available on a device.
|
||||
|
@ -1,46 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/gpu_backend.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
|
||||
InferencePriority GpuBackend::ToPriority(int32_t priority) {
|
||||
switch (priority) {
|
||||
case TFLITE_GPU_INFERENCE_PRIORITY_AUTO:
|
||||
return InferencePriority::AUTO;
|
||||
case TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
|
||||
return InferencePriority::MAX_PRECISION;
|
||||
case TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
|
||||
return InferencePriority::MIN_LATENCY;
|
||||
case TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE:
|
||||
return InferencePriority::MIN_MEMORY_USAGE;
|
||||
}
|
||||
return InferencePriority::UNKNOWN;
|
||||
}
|
||||
|
||||
InferenceUsage GpuBackend::ToUsage(int32_t usage) {
|
||||
switch (usage) {
|
||||
case TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER:
|
||||
return InferenceUsage::FAST_SINGLE_ANSWER;
|
||||
case TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED:
|
||||
return InferenceUsage::SUSTAINED_SPEED;
|
||||
}
|
||||
return InferenceUsage::UNKNOWN;
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
@ -1,55 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/api.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/delegate.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
|
||||
class GpuBackend {
|
||||
public:
|
||||
virtual absl::Status Prepare(
|
||||
const TfLiteGpuDelegateOptionsV2& delegate_options, GraphFloat32* graph,
|
||||
std::function<absl::Status(GraphFloat32* graph)> initialize_graph,
|
||||
std::unique_ptr<InferenceBuilder>* builder) = 0;
|
||||
|
||||
bool enforce_same_thread() const { return enforce_same_thread_; }
|
||||
|
||||
virtual ~GpuBackend() = default;
|
||||
|
||||
static InferencePriority ToPriority(int32_t priority);
|
||||
static InferenceUsage ToUsage(int32_t usage);
|
||||
|
||||
protected:
|
||||
bool enforce_same_thread_ = false;
|
||||
};
|
||||
|
||||
TfLiteDelegate* TfLiteGpuDelegateCreateInternal(
|
||||
GpuBackend* backend, const TfLiteGpuDelegateOptionsV2* options);
|
||||
|
||||
// Destroys a delegate created with `TfLiteGpuDelegateCreateInternal` call.
|
||||
void TfLiteGpuDelegateDeleteInternal(TfLiteDelegate* delegate);
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_DELEGATES_GPU_GPU_BACKEND_INTERNAL_H_
|
@ -1,157 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/api.h"
|
||||
#ifndef CL_DELEGATE_NO_GL
|
||||
#include "tensorflow/lite/delegates/gpu/gl/api2.h"
|
||||
#endif
|
||||
#include "tensorflow/lite/delegates/gpu/gpu_backend.h"
|
||||
#include "tensorflow/lite/minimal_logging.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
|
||||
absl::Status InitializeOpenClApi(
|
||||
GraphFloat32* graph, const TfLiteGpuDelegateOptionsV2& delegate_options,
|
||||
std::unique_ptr<InferenceBuilder>* builder,
|
||||
std::unique_ptr<cl::InferenceEnvironment>* inference_environment,
|
||||
bool* graph_is_destroyed) {
|
||||
if (graph_is_destroyed) {
|
||||
*graph_is_destroyed = false;
|
||||
}
|
||||
cl::InferenceEnvironmentOptions env_options;
|
||||
cl::InferenceEnvironmentProperties properties;
|
||||
RETURN_IF_ERROR(cl::NewInferenceEnvironment(
|
||||
env_options, inference_environment, &properties));
|
||||
cl::InferenceOptions options;
|
||||
// If is_precision_loss_allowed == -1, then just use priorities instead
|
||||
// of paying attention to is_precision_loss_allowed value.
|
||||
if (delegate_options.is_precision_loss_allowed == -1) {
|
||||
options.priority1 =
|
||||
GpuBackend::ToPriority(delegate_options.inference_priority1);
|
||||
options.priority2 =
|
||||
GpuBackend::ToPriority(delegate_options.inference_priority2);
|
||||
options.priority3 =
|
||||
GpuBackend::ToPriority(delegate_options.inference_priority3);
|
||||
} else {
|
||||
// Users set is_precision_loss_allowed explicitly, thus use it explicitly.
|
||||
if (delegate_options.is_precision_loss_allowed == 0) {
|
||||
options.priority1 = InferencePriority::MAX_PRECISION;
|
||||
} else {
|
||||
options.priority1 = InferencePriority::MIN_LATENCY;
|
||||
}
|
||||
}
|
||||
options.usage = GpuBackend::ToUsage(delegate_options.inference_preference);
|
||||
if (graph_is_destroyed) {
|
||||
*graph_is_destroyed = true;
|
||||
}
|
||||
RETURN_IF_ERROR(
|
||||
(*inference_environment)
|
||||
->NewInferenceBuilder(options, std::move(*graph), builder));
|
||||
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
||||
"Initialized OpenCL-based API.");
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
#ifndef CL_DELEGATE_NO_GL
|
||||
absl::Status InitializeOpenGlApi(
|
||||
GraphFloat32* graph, const TfLiteGpuDelegateOptionsV2& delegate_options,
|
||||
std::unique_ptr<InferenceBuilder>* builder,
|
||||
std::unique_ptr<gl::InferenceEnvironment>* inference_environment) {
|
||||
gl::InferenceEnvironmentOptions env_options;
|
||||
gl::InferenceEnvironmentProperties properties;
|
||||
RETURN_IF_ERROR(
|
||||
NewInferenceEnvironment(env_options, inference_environment, &properties));
|
||||
gl::InferenceOptions options;
|
||||
options.usage = GpuBackend::ToUsage(delegate_options.inference_preference);
|
||||
options.priority1 =
|
||||
GpuBackend::ToPriority(delegate_options.inference_priority1);
|
||||
options.priority2 =
|
||||
GpuBackend::ToPriority(delegate_options.inference_priority2);
|
||||
options.priority3 =
|
||||
GpuBackend::ToPriority(delegate_options.inference_priority3);
|
||||
RETURN_IF_ERROR(
|
||||
(*inference_environment)
|
||||
->NewInferenceBuilder(std::move(*graph), options, builder));
|
||||
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
||||
"Initialized OpenGL-based API.");
|
||||
return absl::OkStatus();
|
||||
}
|
||||
#endif
|
||||
|
||||
class GpuBackendDefault : public GpuBackend {
|
||||
public:
|
||||
GpuBackendDefault() {}
|
||||
|
||||
absl::Status Prepare(
|
||||
const TfLiteGpuDelegateOptionsV2& delegate_options, GraphFloat32* graph,
|
||||
std::function<absl::Status(GraphFloat32* graph)> initialize_graph,
|
||||
std::unique_ptr<InferenceBuilder>* builder) override {
|
||||
#ifdef CL_DELEGATE_NO_GL
|
||||
return InitializeOpenClApi(graph, delegate_options, builder,
|
||||
&cl_inference_environment_, nullptr);
|
||||
#else
|
||||
bool graph_is_destroyed;
|
||||
const int experimental_flags = delegate_options.experimental_flags;
|
||||
if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY) {
|
||||
RETURN_IF_ERROR(InitializeOpenClApi(graph, delegate_options, builder,
|
||||
&cl_inference_environment_,
|
||||
&graph_is_destroyed));
|
||||
} else if (experimental_flags & TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY) {
|
||||
RETURN_IF_ERROR(InitializeOpenGlApi(graph, delegate_options, builder,
|
||||
&gl_inference_environment_));
|
||||
} else {
|
||||
// By default, we try CL first & fall back to GL if that fails.
|
||||
absl::Status status =
|
||||
InitializeOpenClApi(graph, delegate_options, builder,
|
||||
&cl_inference_environment_, &graph_is_destroyed);
|
||||
if (!status.ok()) {
|
||||
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
||||
std::string(status.message()).c_str());
|
||||
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "Falling back to OpenGL");
|
||||
|
||||
// Graph needs to be re-created because it is moved above.
|
||||
GraphFloat32 graph2;
|
||||
if (graph_is_destroyed) {
|
||||
RETURN_IF_ERROR(initialize_graph(&graph2));
|
||||
}
|
||||
RETURN_IF_ERROR(InitializeOpenGlApi(
|
||||
graph_is_destroyed ? &graph2 : graph, delegate_options, builder,
|
||||
&gl_inference_environment_));
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<cl::InferenceEnvironment> cl_inference_environment_;
|
||||
#ifndef CL_DELEGATE_NO_GL
|
||||
std::unique_ptr<gl::InferenceEnvironment> gl_inference_environment_;
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
extern "C" TfLiteDelegate* TfLiteGpuDelegateV2Create(
|
||||
const TfLiteGpuDelegateOptionsV2* options) {
|
||||
return tflite::gpu::TfLiteGpuDelegateCreateInternal(
|
||||
new tflite::gpu::GpuBackendDefault(), options);
|
||||
}
|
||||
|
||||
extern "C" void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate) {
|
||||
return tflite::gpu::TfLiteGpuDelegateDeleteInternal(delegate);
|
||||
}
|
Loading…
Reference in New Issue
Block a user