Add cmake support of TFLite GPU delegate

Only Android OpenCL support is added.

$ mkdir tf-build; cd tf-build
$ cmake -DCMAKE_TOOLCHAIN_FILE=<NDK path>/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DTFLITE_ENABLE_GPU=ON ../tensorflow/lite/
$ make all benchmark_model -j

You can run the benchmark_model with "--use_gpu=true".

PiperOrigin-RevId: 336785571
Change-Id: I3e875f1ea6eacd1fe2912a62ca8a1301e6f04f19
This commit is contained in:
Terry Heo 2020-10-12 18:14:36 -07:00 committed by TensorFlower Gardener
parent 3d657bfd84
commit a731eb769c
4 changed files with 330 additions and 7 deletions

View File

@ -20,8 +20,6 @@
# This has only been tested on Windows, Linux and macOS.
#
# The following are not currently supported:
# - GPU acceleration
# - Android
# - iOS
# - Micro backend
# - Tests
@ -60,7 +58,7 @@ option(TFLITE_ENABLE_RUY "Enable experimental RUY integration" OFF)
option(TFLITE_ENABLE_RESOURCE "Enable experimental support for resources" ON)
option(TFLITE_ENABLE_NNAPI "Enable NNAPI (Android only)." ON)
option(TFLITE_ENABLE_MMAP "Enable MMAP (unsupported on Windows)" ON)
option(TFLITE_ENABLE_GPU "Enable GPU (not supported)" OFF)
option(TFLITE_ENABLE_GPU "Enable GPU" OFF)
# This must be enabled when converting from TF models with SELECT_TF_OPS
# enabled.
# https://www.tensorflow.org/lite/guide/ops_select#converting_the_model
@ -192,9 +190,59 @@ if(TFLITE_ENABLE_FLEX)
)
endif()
if(TFLITE_ENABLE_GPU)
# Implementation is under delegates/gpu.
message(FATAL_ERROR
"GPU acceleration is not currently supported in CMake builds"
find_package(opencl_headers REQUIRED)
populate_tflite_source_vars(
"delegates/gpu/cl" TFLITE_DELEGATES_GPU_CL_SRCS
FILTER "(_test|gl_interop|egl_sync)\\.(cc|h)$"
)
populate_tflite_source_vars(
"delegates/gpu/cl/kernels" TFLITE_DELEGATES_GPU_CL_KERNELS_SRCS
FILTER "(_test)\\.(cc|h)$"
)
populate_tflite_source_vars(
"delegates/gpu/cl/kernels/special"
TFLITE_DELEGATES_GPU_CL_KERNELS_SPECIAL_SRCS
FILTER "(_test)\\.(cc|h)$"
)
populate_tflite_source_vars(
"delegates/gpu/cl/selectors" TFLITE_DELEGATES_GPU_CL_SELECTORS_SRCS
FILTER "(_test)\\.(cc|h)$"
)
populate_tflite_source_vars(
"delegates/gpu/common" TFLITE_DELEGATES_GPU_COMMON_SRCS
FILTER "(_test)\\.(cc|h)$"
)
populate_tflite_source_vars(
"delegates/gpu/common/default" TFLITE_DELEGATES_GPU_COMMON_DEFAULT_SRCS
FILTER "(_test)\\.(cc|h)$"
)
populate_tflite_source_vars(
"delegates/gpu/common/memory_management"
TFLITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_SRCS
FILTER "(_test)\\.(cc|h)$"
)
populate_tflite_source_vars(
"delegates/gpu/common/transformations"
TFLITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_SRCS
FILTER "(_test)\\.(cc|h)$"
)
list(APPEND TFLITE_DELEGATES_GPU_SRCS
${TFLITE_SOURCE_DIR}/delegates/gpu/api.cc
${TFLITE_SOURCE_DIR}/delegates/gpu/delegate.cc
${TFLITE_DELEGATES_GPU_CL_SRCS}
${TFLITE_DELEGATES_GPU_CL_KERNELS_SRCS}
${TFLITE_DELEGATES_GPU_CL_KERNELS_SPECIAL_SRCS}
${TFLITE_DELEGATES_GPU_CL_SELECTORS_SRCS}
${TFLITE_SOURCE_DIR}/delegates/gpu/cl/selectors/default/default_selector.cc
${TFLITE_DELEGATES_GPU_COMMON_SRCS}
${TFLITE_DELEGATES_GPU_COMMON_DEFAULT_SRCS}
${TFLITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_SRCS}
${TFLITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_SRCS}
)
list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DCL_DELEGATE_NO_GL")
list(APPEND TFLITE_TARGET_DEPENDENCIES
absl::any
absl::flat_hash_map
)
endif()
if(_TFLITE_ENABLE_NNAPI)
@ -281,6 +329,7 @@ add_library(tensorflow-lite
${TFLITE_CORE_SRCS}
${TFLITE_C_SRCS}
${TFLITE_DELEGATES_FLEX_SRCS}
${TFLITE_DELEGATES_GPU_SRCS}
${TFLITE_DELEGATES_NNAPI_SRCS}
${TFLITE_DELEGATES_SRCS}
${TFLITE_DELEGATES_XNNPACK_SRCS}
@ -298,6 +347,7 @@ add_library(tensorflow-lite
${TFLITE_NNAPI_SRCS}
${TFLITE_SRCS}
${TFLITE_SOURCE_DIR}/profiling/platform_profiler.cc
${TFLITE_SOURCE_DIR}/schema/schema_utils.cc
${TFLITE_SOURCE_DIR}/tools/optimize/sparsity/format_converter.cc
)
target_include_directories(tensorflow-lite
@ -362,14 +412,24 @@ endif() # TFLITE_ENABLE_XNNPACK
if(CMAKE_SYSTEM_NAME MATCHES "Android")
list(APPEND TFLITE_BENCHMARK_SRCS
${TFLITE_SOURCE_DIR}/profiling/atrace_profiler.cc
${TFLITE_SOURCE_DIR}/tools/delegates/nnapi_delegate_provider.cc
)
if(_TFLITE_ENABLE_NNAPI)
list(APPEND TFLITE_BENCHMARK_SRCS
${TFLITE_SOURCE_DIR}/tools/delegates/nnapi_delegate_provider.cc
)
endif() # _TFLITE_ENABLE_NNAPI
list(APPEND TFLITE_BENCHMARK_LIBS
${ANDROID_LOG_LIB}
absl::strings
)
endif() # Android
if(TFLITE_ENABLE_GPU)
list(APPEND TFLITE_BENCHMARK_SRCS
${TFLITE_SOURCE_DIR}/tools/delegates/gpu_delegate_provider.cc
)
endif() # TFLITE_ENABLE_GPU
add_executable(benchmark_model
EXCLUDE_FROM_ALL
${TFLITE_BENCHMARK_SRCS}

View File

@ -0,0 +1,207 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// automatically generated by the FlatBuffers compiler, do not modify
#ifndef FLATBUFFERS_GENERATED_COMPILEDPROGRAMCACHE_TFLITE_GPU_CL_DATA_H_
#define FLATBUFFERS_GENERATED_COMPILEDPROGRAMCACHE_TFLITE_GPU_CL_DATA_H_
#include "flatbuffers/flatbuffers.h"
namespace tflite {
namespace gpu {
namespace cl {
namespace data {
struct Program;
struct CompiledCache;
struct Program FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_FINGERPRINT = 4,
VT_BINARY = 6
};
uint64_t fingerprint() const {
return GetField<uint64_t>(VT_FINGERPRINT, 0);
}
const flatbuffers::Vector<uint8_t> *binary() const {
return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_BINARY);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<uint64_t>(verifier, VT_FINGERPRINT) &&
VerifyOffset(verifier, VT_BINARY) &&
verifier.VerifyVector(binary()) &&
verifier.EndTable();
}
};
struct ProgramBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_fingerprint(uint64_t fingerprint) {
fbb_.AddElement<uint64_t>(Program::VT_FINGERPRINT, fingerprint, 0);
}
void add_binary(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> binary) {
fbb_.AddOffset(Program::VT_BINARY, binary);
}
explicit ProgramBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
ProgramBuilder &operator=(const ProgramBuilder &);
flatbuffers::Offset<Program> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<Program>(end);
return o;
}
};
inline flatbuffers::Offset<Program> CreateProgram(
flatbuffers::FlatBufferBuilder &_fbb,
uint64_t fingerprint = 0,
flatbuffers::Offset<flatbuffers::Vector<uint8_t>> binary = 0) {
ProgramBuilder builder_(_fbb);
builder_.add_fingerprint(fingerprint);
builder_.add_binary(binary);
return builder_.Finish();
}
inline flatbuffers::Offset<Program> CreateProgramDirect(
flatbuffers::FlatBufferBuilder &_fbb,
uint64_t fingerprint = 0,
const std::vector<uint8_t> *binary = nullptr) {
auto binary__ = binary ? _fbb.CreateVector<uint8_t>(*binary) : 0;
return tflite::gpu::cl::data::CreateProgram(
_fbb,
fingerprint,
binary__);
}
struct CompiledCache FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_DRIVER_VERSION = 4,
VT_PROGRAMS = 6
};
const flatbuffers::String *driver_version() const {
return GetPointer<const flatbuffers::String *>(VT_DRIVER_VERSION);
}
const flatbuffers::Vector<flatbuffers::Offset<Program>> *programs() const {
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Program>> *>(VT_PROGRAMS);
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyOffset(verifier, VT_DRIVER_VERSION) &&
verifier.VerifyString(driver_version()) &&
VerifyOffset(verifier, VT_PROGRAMS) &&
verifier.VerifyVector(programs()) &&
verifier.VerifyVectorOfTables(programs()) &&
verifier.EndTable();
}
};
struct CompiledCacheBuilder {
flatbuffers::FlatBufferBuilder &fbb_;
flatbuffers::uoffset_t start_;
void add_driver_version(flatbuffers::Offset<flatbuffers::String> driver_version) {
fbb_.AddOffset(CompiledCache::VT_DRIVER_VERSION, driver_version);
}
void add_programs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Program>>> programs) {
fbb_.AddOffset(CompiledCache::VT_PROGRAMS, programs);
}
explicit CompiledCacheBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
}
CompiledCacheBuilder &operator=(const CompiledCacheBuilder &);
flatbuffers::Offset<CompiledCache> Finish() {
const auto end = fbb_.EndTable(start_);
auto o = flatbuffers::Offset<CompiledCache>(end);
return o;
}
};
inline flatbuffers::Offset<CompiledCache> CreateCompiledCache(
flatbuffers::FlatBufferBuilder &_fbb,
flatbuffers::Offset<flatbuffers::String> driver_version = 0,
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Program>>> programs = 0) {
CompiledCacheBuilder builder_(_fbb);
builder_.add_programs(programs);
builder_.add_driver_version(driver_version);
return builder_.Finish();
}
inline flatbuffers::Offset<CompiledCache> CreateCompiledCacheDirect(
flatbuffers::FlatBufferBuilder &_fbb,
const char *driver_version = nullptr,
const std::vector<flatbuffers::Offset<Program>> *programs = nullptr) {
auto driver_version__ = driver_version ? _fbb.CreateString(driver_version) : 0;
auto programs__ = programs ? _fbb.CreateVector<flatbuffers::Offset<Program>>(*programs) : 0;
return tflite::gpu::cl::data::CreateCompiledCache(
_fbb,
driver_version__,
programs__);
}
inline const tflite::gpu::cl::data::CompiledCache *GetCompiledCache(const void *buf) {
return flatbuffers::GetRoot<tflite::gpu::cl::data::CompiledCache>(buf);
}
inline const tflite::gpu::cl::data::CompiledCache *GetSizePrefixedCompiledCache(const void *buf) {
return flatbuffers::GetSizePrefixedRoot<tflite::gpu::cl::data::CompiledCache>(buf);
}
inline const char *CompiledCacheIdentifier() {
return "AFCM";
}
inline bool CompiledCacheBufferHasIdentifier(const void *buf) {
return flatbuffers::BufferHasIdentifier(
buf, CompiledCacheIdentifier());
}
inline bool VerifyCompiledCacheBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifyBuffer<tflite::gpu::cl::data::CompiledCache>(CompiledCacheIdentifier());
}
inline bool VerifySizePrefixedCompiledCacheBuffer(
flatbuffers::Verifier &verifier) {
return verifier.VerifySizePrefixedBuffer<tflite::gpu::cl::data::CompiledCache>(CompiledCacheIdentifier());
}
inline const char *CompiledCacheExtension() {
return "jetbin";
}
inline void FinishCompiledCacheBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<tflite::gpu::cl::data::CompiledCache> root) {
fbb.Finish(root, CompiledCacheIdentifier());
}
inline void FinishSizePrefixedCompiledCacheBuffer(
flatbuffers::FlatBufferBuilder &fbb,
flatbuffers::Offset<tflite::gpu::cl::data::CompiledCache> root) {
fbb.FinishSizePrefixed(root, CompiledCacheIdentifier());
}
} // namespace data
} // namespace cl
} // namespace gpu
} // namespace tflite
#endif // FLATBUFFERS_GENERATED_COMPILEDPROGRAMCACHE_TFLITE_GPU_CL_DATA_H_

View File

@ -0,0 +1,16 @@
#
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include(opencl_headers)

View File

@ -0,0 +1,40 @@
#
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(TARGET opencl_headers OR opencl_headers_POPULATED)
return()
endif()
include(FetchContent)
OverridableFetchContent_Declare(
opencl_headers
GIT_REPOSITORY https://github.com/KhronosGroup/OpenCL-Headers
# GIT_TAG must keep in sync with tensorflow/third_party/opencl_headers/workspace.bzl
GIT_TAG 0d5f18c6e7196863bc1557a693f1509adfcee056
GIT_PROGRESS TRUE
PREFIX "${CMAKE_BINARY_DIR}"
SOURCE_DIR "${CMAKE_BINARY_DIR}/opencl_headers"
)
OverridableFetchContent_GetProperties(opencl_headers)
if(NOT opencl_headers)
OverridableFetchContent_Populate(opencl_headers)
endif()
include_directories(
AFTER
"${opencl_headers_SOURCE_DIR}/"
)