Define for building OpenCL only delegate.
By default OpenCL delegate supports GL interop and links GL/EGL libraries. With define we can build delegate without dependencies on GL libs. PiperOrigin-RevId: 322210374 Change-Id: Id02c53747873a474c31971553048c73d3506f4ae
This commit is contained in:
parent
ecaf86d96c
commit
6ee2d328fd
tensorflow/lite/delegates/gpu
@ -234,7 +234,14 @@ cc_library(
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
deps = [
|
||||
deps = select({
|
||||
"//tensorflow/lite/delegates/gpu/cl:opencl_delegate_no_gl": [],
|
||||
"//conditions:default": [
|
||||
"//tensorflow/lite/delegates/gpu/gl:api2",
|
||||
],
|
||||
}) + [
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/types:span",
|
||||
"//tensorflow/lite:kernel_api",
|
||||
"//tensorflow/lite:minimal_logging",
|
||||
"//tensorflow/lite/c:common",
|
||||
@ -247,9 +254,6 @@ cc_library(
|
||||
"//tensorflow/lite/delegates/gpu/common:model_transformer",
|
||||
"//tensorflow/lite/delegates/gpu/common:quantization_util",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/gl:api2",
|
||||
"//tensorflow/lite/kernels/internal:optimized_base",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/types:span",
|
||||
],
|
||||
)
|
||||
|
@ -43,9 +43,14 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#define GL_NO_PROTOTYPES
|
||||
#define EGL_NO_PROTOTYPES
|
||||
#include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
|
||||
#undef GL_NO_PROTOTYPES
|
||||
#undef EGL_NO_PROTOTYPES
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
|
||||
|
@ -9,23 +9,34 @@ package(
|
||||
licenses = ["notice"], # Apache 2.0
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "opencl_delegate_no_gl",
|
||||
values = {"copt": "-DCL_DELEGATE_NO_GL"},
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "api",
|
||||
srcs = ["api.cc"],
|
||||
hdrs = ["api.h"],
|
||||
deps = [
|
||||
deps = select({
|
||||
":opencl_delegate_no_gl": [],
|
||||
"//conditions:default": [
|
||||
":egl_sync",
|
||||
":gl_interop",
|
||||
],
|
||||
}) + [
|
||||
":cl_command_queue",
|
||||
":cl_errors",
|
||||
":cl_event",
|
||||
":egl_sync",
|
||||
":environment",
|
||||
":gl_interop",
|
||||
":inference_context",
|
||||
":opencl_wrapper",
|
||||
":precision",
|
||||
":tensor",
|
||||
":tensor_type",
|
||||
":tensor_type_util",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/types:span",
|
||||
"//tensorflow/lite/delegates/gpu:api",
|
||||
"//tensorflow/lite/delegates/gpu/cl/kernels:converter",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
@ -33,8 +44,6 @@ cc_library(
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common:tensor",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/types:span",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -15,7 +15,9 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/api.h"
|
||||
|
||||
#include <EGL/eglext.h>
|
||||
#ifndef CL_DELEGATE_NO_GL
|
||||
#define CL_DELEGATE_ALLOW_GL
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
@ -25,9 +27,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_event.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/egl_sync.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/environment.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
||||
@ -39,6 +39,13 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
|
||||
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
#include <EGL/eglext.h>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/egl_sync.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
|
||||
#endif
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
@ -87,11 +94,13 @@ class DefaultTensorTie : public TensorTie {
|
||||
const TensorTieDef& def,
|
||||
const TensorObjectConverterBuilder& converter_builder) {
|
||||
auto object_type = def.external_def.object_def.object_type;
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
if (def.external_def.object_def.user_provided &&
|
||||
GlClBufferCopier::IsSupported(def.external_def.object_def,
|
||||
def.internal_def.object_def)) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
return (object_type == ObjectType::OPENCL_BUFFER ||
|
||||
object_type == ObjectType::OPENCL_TEXTURE ||
|
||||
object_type == ObjectType::CPU_MEMORY) &&
|
||||
@ -138,6 +147,7 @@ class DefaultTensorTie : public TensorTie {
|
||||
private:
|
||||
absl::Status Init(TensorObjectConverterBuilder* converter_builder,
|
||||
Environment* env) {
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
if (def().external_def.object_def.user_provided &&
|
||||
GlClBufferCopier::IsSupported(def().external_def.object_def,
|
||||
def().internal_def.object_def)) {
|
||||
@ -156,6 +166,12 @@ class DefaultTensorTie : public TensorTie {
|
||||
RETURN_IF_ERROR(converter_builder->MakeConverter(
|
||||
def().internal_def, def().external_def, &converter_to_));
|
||||
}
|
||||
#else
|
||||
RETURN_IF_ERROR(converter_builder->MakeConverter(
|
||||
def().external_def, def().internal_def, &converter_from_));
|
||||
RETURN_IF_ERROR(converter_builder->MakeConverter(
|
||||
def().internal_def, def().external_def, &converter_to_));
|
||||
#endif
|
||||
return MaybeAllocateExternalObject(env);
|
||||
}
|
||||
|
||||
@ -275,6 +291,7 @@ class TwoStepTensorTie : public TensorTie {
|
||||
std::unique_ptr<TensorTie> outer_tie_;
|
||||
};
|
||||
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
// Captures GL object into CL context before performing a conversion.
|
||||
class GlBufferHolder : public TensorTie {
|
||||
public:
|
||||
@ -351,6 +368,7 @@ class GlBufferHolder : public TensorTie {
|
||||
std::unique_ptr<TensorTie> tie_;
|
||||
TensorObject external_obj_;
|
||||
};
|
||||
#endif
|
||||
|
||||
TensorObject TensorToObj(const Tensor& tensor) {
|
||||
if (tensor.GetStorageType() == TensorStorageType::BUFFER) {
|
||||
@ -365,19 +383,28 @@ TensorObject TensorToObj(const Tensor& tensor) {
|
||||
// Responsible for creating new tensor objects.
|
||||
class TensorTieFactory {
|
||||
public:
|
||||
TensorTieFactory(Environment* env, InferenceContext* context,
|
||||
GlInteropFabric* gl_interop_fabric)
|
||||
TensorTieFactory(Environment* env, InferenceContext* context
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
,
|
||||
GlInteropFabric* gl_interop_fabric
|
||||
#endif
|
||||
)
|
||||
: env_(*env),
|
||||
context_(*context),
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
gl_interop_fabric_(gl_interop_fabric),
|
||||
converter_builder_(NewConverterBuilder(env)) {}
|
||||
#endif
|
||||
converter_builder_(NewConverterBuilder(env)) {
|
||||
}
|
||||
|
||||
bool IsSupported(const TensorTieDef& def) const {
|
||||
return IsValid(def.external_def.object_def) &&
|
||||
(NoopTensorTie::IsSupported(def) ||
|
||||
DefaultTensorTie::IsSupported(def, *converter_builder_) ||
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
(gl_interop_fabric_ &&
|
||||
GlBufferHolder::IsSupported(def, *converter_builder_)) ||
|
||||
#endif
|
||||
TwoStepTensorTie::IsSupported(def, *converter_builder_));
|
||||
}
|
||||
|
||||
@ -392,10 +419,12 @@ class TensorTieFactory {
|
||||
if (DefaultTensorTie::IsSupported(def, *converter)) {
|
||||
return DefaultTensorTie::New(def, internal_object, converter, &env_, tie);
|
||||
}
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
if (gl_interop_fabric_ && GlBufferHolder::IsSupported(def, *converter)) {
|
||||
return GlBufferHolder::New(def, internal_object, converter,
|
||||
gl_interop_fabric_, &env_, tie);
|
||||
}
|
||||
#endif
|
||||
if (TwoStepTensorTie::IsSupported(def, *converter)) {
|
||||
return TwoStepTensorTie::New(def, internal_object, converter, &env_, tie);
|
||||
}
|
||||
@ -405,18 +434,29 @@ class TensorTieFactory {
|
||||
private:
|
||||
Environment& env_;
|
||||
InferenceContext& context_;
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
GlInteropFabric* gl_interop_fabric_;
|
||||
#endif
|
||||
std::unique_ptr<TensorObjectConverterBuilder> converter_builder_;
|
||||
};
|
||||
|
||||
class InferenceRunnerImpl : public InferenceRunner {
|
||||
public:
|
||||
InferenceRunnerImpl(Environment* environment,
|
||||
std::unique_ptr<InferenceContext> context,
|
||||
std::unique_ptr<GlInteropFabric> gl_interop_fabric)
|
||||
std::unique_ptr<InferenceContext> context
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
,
|
||||
std::unique_ptr<GlInteropFabric> gl_interop_fabric
|
||||
#endif
|
||||
)
|
||||
: queue_(environment->queue()),
|
||||
context_(std::move(context)),
|
||||
gl_interop_fabric_(std::move(gl_interop_fabric)) {}
|
||||
context_(std::move(context))
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
,
|
||||
gl_interop_fabric_(std::move(gl_interop_fabric))
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
absl::Status Initialize(const std::vector<TensorTieDef>& inputs,
|
||||
const std::vector<TensorTieDef>& outputs,
|
||||
@ -464,9 +504,11 @@ class InferenceRunnerImpl : public InferenceRunner {
|
||||
}
|
||||
|
||||
absl::Status Run() override {
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
if (gl_interop_fabric_) {
|
||||
RETURN_IF_ERROR(gl_interop_fabric_->Start());
|
||||
}
|
||||
#endif
|
||||
for (auto& obj : inputs_) {
|
||||
RETURN_IF_ERROR(obj->CopyFromExternalObject());
|
||||
}
|
||||
@ -475,9 +517,11 @@ class InferenceRunnerImpl : public InferenceRunner {
|
||||
for (auto& obj : outputs_) {
|
||||
RETURN_IF_ERROR(obj->CopyToExternalObject());
|
||||
}
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
if (gl_interop_fabric_) {
|
||||
RETURN_IF_ERROR(gl_interop_fabric_->Finish());
|
||||
}
|
||||
#endif
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
@ -506,7 +550,9 @@ class InferenceRunnerImpl : public InferenceRunner {
|
||||
|
||||
CLCommandQueue* queue_;
|
||||
std::unique_ptr<InferenceContext> context_;
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
|
||||
#endif
|
||||
std::vector<std::unique_ptr<TensorTie>> inputs_;
|
||||
std::vector<std::unique_ptr<TensorTie>> outputs_;
|
||||
};
|
||||
@ -542,6 +588,7 @@ class InferenceBuilderImpl : public InferenceBuilder {
|
||||
}
|
||||
RETURN_IF_ERROR(context_->InitFromGraph(create_info, graph, environment_));
|
||||
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
if (env_options.IsGlAware() &&
|
||||
IsGlSharingSupported(environment_->device())) {
|
||||
gl_interop_fabric_ = absl::make_unique<GlInteropFabric>(
|
||||
@ -549,6 +596,10 @@ class InferenceBuilderImpl : public InferenceBuilder {
|
||||
}
|
||||
tie_factory_ = absl::make_unique<TensorTieFactory>(
|
||||
environment_, context_.get(), gl_interop_fabric_.get());
|
||||
#else
|
||||
tie_factory_ =
|
||||
absl::make_unique<TensorTieFactory>(environment_, context_.get());
|
||||
#endif
|
||||
|
||||
inputs_ = LinkTensors(graph, graph.inputs());
|
||||
outputs_ = LinkTensors(graph, graph.outputs());
|
||||
@ -599,6 +650,7 @@ class InferenceBuilderImpl : public InferenceBuilder {
|
||||
}
|
||||
|
||||
absl::Status Build(std::unique_ptr<InferenceRunner>* runner) override {
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
if (gl_interop_fabric_ && !HasGlObjects()) {
|
||||
// destroy interop layer when there are no GL objects to avoid
|
||||
// extra synchronization cost.
|
||||
@ -606,6 +658,10 @@ class InferenceBuilderImpl : public InferenceBuilder {
|
||||
}
|
||||
auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
|
||||
environment_, std::move(context_), std::move(gl_interop_fabric_));
|
||||
#else
|
||||
auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
|
||||
environment_, std::move(context_));
|
||||
#endif
|
||||
RETURN_IF_ERROR(
|
||||
runner_impl->Initialize(inputs_, outputs_, tie_factory_.get()));
|
||||
*runner = std::move(runner_impl);
|
||||
@ -676,6 +732,7 @@ class InferenceBuilderImpl : public InferenceBuilder {
|
||||
}
|
||||
|
||||
bool HasGlObjects() const {
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
auto is_gl = [](ObjectType t) {
|
||||
return t == ObjectType::OPENGL_SSBO || t == ObjectType::OPENGL_TEXTURE;
|
||||
};
|
||||
@ -689,6 +746,7 @@ class InferenceBuilderImpl : public InferenceBuilder {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -703,7 +761,9 @@ class InferenceBuilderImpl : public InferenceBuilder {
|
||||
}
|
||||
|
||||
std::unique_ptr<InferenceContext> context_;
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
|
||||
#endif
|
||||
Environment* environment_;
|
||||
|
||||
std::vector<TensorTieDef> inputs_;
|
||||
@ -730,20 +790,25 @@ class InferenceEnvironmentImpl : public InferenceEnvironment {
|
||||
RETURN_IF_ERROR(CreateDefaultGPUDevice(&device));
|
||||
}
|
||||
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
properties_.is_gl_sharing_supported = IsGlSharingSupported(device);
|
||||
properties_.is_gl_to_cl_fast_sync_supported =
|
||||
IsClEventFromEglSyncSupported(device);
|
||||
properties_.is_cl_to_gl_fast_sync_supported =
|
||||
IsEglSyncFromClEventSupported();
|
||||
#endif
|
||||
|
||||
CLContext context;
|
||||
if (options_.context) {
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
if (options_.IsGlAware()) {
|
||||
return absl::InvalidArgumentError(
|
||||
"OpenCL context and EGL parameters are set in the same time.");
|
||||
}
|
||||
#endif
|
||||
context = CLContext(options_.context, /* has_ownership = */ false);
|
||||
} else {
|
||||
#ifdef CL_DELEGATE_ALLOW_GL
|
||||
if (options_.IsGlAware() && properties_.is_gl_sharing_supported) {
|
||||
RETURN_IF_ERROR(CreateCLGLContext(
|
||||
device,
|
||||
@ -753,6 +818,9 @@ class InferenceEnvironmentImpl : public InferenceEnvironment {
|
||||
} else {
|
||||
RETURN_IF_ERROR(CreateCLContext(device, &context));
|
||||
}
|
||||
#else
|
||||
RETURN_IF_ERROR(CreateCLContext(device, &context));
|
||||
#endif
|
||||
}
|
||||
|
||||
CLCommandQueue queue;
|
||||
|
@ -16,6 +16,10 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_API_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_API_H_
|
||||
|
||||
#ifdef CL_DELEGATE_NO_GL
|
||||
#define EGL_NO_PROTOTYPES
|
||||
#endif
|
||||
|
||||
#include <EGL/egl.h>
|
||||
|
||||
#include <cstdint>
|
||||
|
@ -16,8 +16,13 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_GPU_API_DELEGATE_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_GPU_API_DELEGATE_H_
|
||||
|
||||
#define GL_NO_PROTOTYPES
|
||||
#define EGL_NO_PROTOTYPES
|
||||
#include <EGL/egl.h>
|
||||
#include <GLES3/gl31.h>
|
||||
#undef GL_NO_PROTOTYPES
|
||||
#undef EGL_NO_PROTOTYPES
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
@ -76,8 +81,8 @@ typedef struct {
|
||||
// .compile_options = {
|
||||
// .precision_loss_allowed = false,
|
||||
// }
|
||||
// .egl_display = eglGetCurrentDisplay(),
|
||||
// .egl_context = eglGetCurrentContext();
|
||||
// .egl_display = EGL_NO_DISPLAY;
|
||||
// .egl_context = EGL_NO_CONTEXT;
|
||||
TFL_CAPI_EXPORT TfLiteDelegate* TfLiteGpuDelegateCreate_New(
|
||||
const TfLiteGpuDelegateOptions_New* options);
|
||||
|
||||
|
@ -16,3 +16,21 @@ cc_binary(
|
||||
"@com_google_absl//absl/time",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "delegate_testing",
|
||||
srcs = ["delegate_testing.cc"],
|
||||
tags = [
|
||||
"nobuilder",
|
||||
"notap",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/lite/delegates/gpu:delegate",
|
||||
"//tensorflow/lite/delegates/gpu/cl:gpu_api_delegate",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common/testing:tflite_model_reader",
|
||||
"//tensorflow/lite/kernels:builtin_ops",
|
||||
"//tensorflow/lite/kernels:kernel_util",
|
||||
"@com_google_absl//absl/time",
|
||||
],
|
||||
)
|
||||
|
158
tensorflow/lite/delegates/gpu/cl/testing/delegate_testing.cc
Normal file
158
tensorflow/lite/delegates/gpu/cl/testing/delegate_testing.cc
Normal file
@ -0,0 +1,158 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include <algorithm>
|
||||
#include <chrono> // NOLINT(build/c++11)
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "absl/time/time.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/testing/tflite_model_reader.h"
|
||||
#include "tensorflow/lite/delegates/gpu/delegate.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/register.h"
|
||||
|
||||
namespace {
|
||||
|
||||
void FillInputTensor(tflite::Interpreter* interpreter) {
|
||||
for (int k = 0; k < interpreter->inputs().size(); ++k) {
|
||||
float* p = interpreter->typed_input_tensor<float>(k);
|
||||
const auto n =
|
||||
tflite::NumElements(interpreter->tensor(interpreter->inputs()[k]));
|
||||
for (int i = 0; i < n; ++i) {
|
||||
p[i] = std::sin(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CompareCPUGPUResults(tflite::Interpreter* cpu, tflite::Interpreter* gpu,
|
||||
float eps) {
|
||||
for (int i = 0; i < cpu->outputs().size(); ++i) {
|
||||
const float* cpu_out = cpu->typed_output_tensor<float>(i);
|
||||
const float* gpu_out = gpu->typed_output_tensor<float>(i);
|
||||
auto out_n = tflite::NumElements(cpu->tensor(cpu->outputs()[i]));
|
||||
const int kMaxPrint = 10;
|
||||
int printed = 0;
|
||||
int total_different = 0;
|
||||
for (int k = 0; k < out_n; ++k) {
|
||||
const float abs_diff = fabs(cpu_out[k] - gpu_out[k]);
|
||||
if (abs_diff > eps) {
|
||||
total_different++;
|
||||
if (printed < kMaxPrint) {
|
||||
std::cout << "Output #" << i << ": element #" << k << ": CPU value - "
|
||||
<< cpu_out[k] << ", GPU value - " << gpu_out[k]
|
||||
<< ", abs diff - " << abs_diff << std::endl;
|
||||
printed++;
|
||||
}
|
||||
if (printed == kMaxPrint) {
|
||||
std::cout << "Printed " << kMaxPrint
|
||||
<< " different elements, threshhold - " << eps
|
||||
<< ", next different elements skipped" << std::endl;
|
||||
printed++;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << "Total " << total_different
|
||||
<< " different elements, for output #" << i << ", threshhold - "
|
||||
<< eps << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc <= 1) {
|
||||
std::cerr << "Expected model path as second argument." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto model = tflite::FlatBufferModel::BuildFromFile(argv[1]);
|
||||
if (!model) {
|
||||
std::cerr << "FlatBufferModel::BuildFromFile failed, model path - "
|
||||
<< argv[1] << std::endl;
|
||||
return -1;
|
||||
}
|
||||
tflite::ops::builtin::BuiltinOpResolver op_resolver;
|
||||
tflite::InterpreterBuilder builder(*model, op_resolver);
|
||||
|
||||
// CPU.
|
||||
std::unique_ptr<tflite::Interpreter> cpu_inference;
|
||||
builder(&cpu_inference);
|
||||
if (!cpu_inference) {
|
||||
std::cerr << "Failed to build CPU inference." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
auto status = cpu_inference->AllocateTensors();
|
||||
if (status != kTfLiteOk) {
|
||||
std::cerr << "Failed to AllocateTensors for CPU inference." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
FillInputTensor(cpu_inference.get());
|
||||
status = cpu_inference->Invoke();
|
||||
if (status != kTfLiteOk) {
|
||||
std::cerr << "Failed to Invoke CPU inference." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// GPU.
|
||||
std::unique_ptr<tflite::Interpreter> gpu_inference;
|
||||
builder(&gpu_inference);
|
||||
if (!gpu_inference) {
|
||||
std::cerr << "Failed to build GPU inference." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
TfLiteGpuDelegateOptionsV2 options;
|
||||
options.is_precision_loss_allowed = -1;
|
||||
options.inference_preference =
|
||||
TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER;
|
||||
options.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
|
||||
options.inference_priority2 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE;
|
||||
options.inference_priority3 = TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION;
|
||||
auto* gpu_delegate = TfLiteGpuDelegateV2Create(&options);
|
||||
status = gpu_inference->ModifyGraphWithDelegate(gpu_delegate);
|
||||
if (status != kTfLiteOk) {
|
||||
std::cerr << "ModifyGraphWithDelegate failed." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
FillInputTensor(gpu_inference.get());
|
||||
status = gpu_inference->Invoke();
|
||||
if (status != kTfLiteOk) {
|
||||
std::cerr << "Failed to Invoke GPU inference." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
CompareCPUGPUResults(cpu_inference.get(), gpu_inference.get(), 1e-4f);
|
||||
|
||||
// CPU inference latency.
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
cpu_inference->Invoke();
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
std::cout << "CPU time - " << (end - start).count() * 1e-6f << "ms"
|
||||
<< std::endl;
|
||||
|
||||
// GPU inference latency.
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
gpu_inference->Invoke();
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
std::cout << "GPU time(CPU->GPU->CPU) - " << (end - start).count() * 1e-6f
|
||||
<< "ms" << std::endl;
|
||||
|
||||
TfLiteGpuDelegateV2Delete(gpu_delegate);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
@ -34,10 +34,13 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/quantization_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/api2.h"
|
||||
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
|
||||
#include "tensorflow/lite/minimal_logging.h"
|
||||
|
||||
#ifndef CL_DELEGATE_NO_GL
|
||||
#include "tensorflow/lite/delegates/gpu/gl/api2.h"
|
||||
#endif
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace {
|
||||
@ -315,6 +318,7 @@ class DelegateKernel {
|
||||
|
||||
absl::Status InitializeOpenGlApi(GraphFloat32* graph,
|
||||
std::unique_ptr<InferenceBuilder>* builder) {
|
||||
#ifndef CL_DELEGATE_NO_GL
|
||||
gl::InferenceEnvironmentOptions env_options;
|
||||
gl::InferenceEnvironmentProperties properties;
|
||||
RETURN_IF_ERROR(
|
||||
@ -330,13 +334,16 @@ class DelegateKernel {
|
||||
enforce_same_thread_ = true;
|
||||
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
||||
"Initialized OpenGL-based API.");
|
||||
#endif
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
// The Delegate instance that's shared across all DelegateKernel instances.
|
||||
Delegate* const delegate_; // doesn't own the memory.
|
||||
std::unique_ptr<cl::InferenceEnvironment> cl_environment_;
|
||||
#ifndef CL_DELEGATE_NO_GL
|
||||
std::unique_ptr<gl::InferenceEnvironment> gl_environment_;
|
||||
#endif
|
||||
std::unique_ptr<InferenceRunner> runner_;
|
||||
std::vector<int64_t> input_indices_;
|
||||
std::vector<int64_t> output_indices_;
|
||||
|
Loading…
Reference in New Issue
Block a user