Add cmake support of TFLite GPU delegate
Only Android OpenCL support is added. $ mkdir tf-build; cd tf-build $ cmake -DCMAKE_TOOLCHAIN_FILE=<NDK path>/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DTFLITE_ENABLE_GPU=ON ../tensorflow/lite/ $ make all benchmark_model -j You can run the benchmark_model with "--use_gpu=true". PiperOrigin-RevId: 336785571 Change-Id: I3e875f1ea6eacd1fe2912a62ca8a1301e6f04f19
This commit is contained in:
parent
3d657bfd84
commit
a731eb769c
@ -20,8 +20,6 @@
|
||||
# This has only been tested on Windows, Linux and macOS.
|
||||
#
|
||||
# The following are not currently supported:
|
||||
# - GPU acceleration
|
||||
# - Android
|
||||
# - iOS
|
||||
# - Micro backend
|
||||
# - Tests
|
||||
@ -60,7 +58,7 @@ option(TFLITE_ENABLE_RUY "Enable experimental RUY integration" OFF)
|
||||
option(TFLITE_ENABLE_RESOURCE "Enable experimental support for resources" ON)
|
||||
option(TFLITE_ENABLE_NNAPI "Enable NNAPI (Android only)." ON)
|
||||
option(TFLITE_ENABLE_MMAP "Enable MMAP (unsupported on Windows)" ON)
|
||||
option(TFLITE_ENABLE_GPU "Enable GPU (not supported)" OFF)
|
||||
option(TFLITE_ENABLE_GPU "Enable GPU" OFF)
|
||||
# This must be enabled when converting from TF models with SELECT_TF_OPS
|
||||
# enabled.
|
||||
# https://www.tensorflow.org/lite/guide/ops_select#converting_the_model
|
||||
@ -192,9 +190,59 @@ if(TFLITE_ENABLE_FLEX)
|
||||
)
|
||||
endif()
|
||||
if(TFLITE_ENABLE_GPU)
|
||||
# Implementation is under delegates/gpu.
|
||||
message(FATAL_ERROR
|
||||
"GPU acceleration is not currently supported in CMake builds"
|
||||
find_package(opencl_headers REQUIRED)
|
||||
populate_tflite_source_vars(
|
||||
"delegates/gpu/cl" TFLITE_DELEGATES_GPU_CL_SRCS
|
||||
FILTER "(_test|gl_interop|egl_sync)\\.(cc|h)$"
|
||||
)
|
||||
populate_tflite_source_vars(
|
||||
"delegates/gpu/cl/kernels" TFLITE_DELEGATES_GPU_CL_KERNELS_SRCS
|
||||
FILTER "(_test)\\.(cc|h)$"
|
||||
)
|
||||
populate_tflite_source_vars(
|
||||
"delegates/gpu/cl/kernels/special"
|
||||
TFLITE_DELEGATES_GPU_CL_KERNELS_SPECIAL_SRCS
|
||||
FILTER "(_test)\\.(cc|h)$"
|
||||
)
|
||||
populate_tflite_source_vars(
|
||||
"delegates/gpu/cl/selectors" TFLITE_DELEGATES_GPU_CL_SELECTORS_SRCS
|
||||
FILTER "(_test)\\.(cc|h)$"
|
||||
)
|
||||
populate_tflite_source_vars(
|
||||
"delegates/gpu/common" TFLITE_DELEGATES_GPU_COMMON_SRCS
|
||||
FILTER "(_test)\\.(cc|h)$"
|
||||
)
|
||||
populate_tflite_source_vars(
|
||||
"delegates/gpu/common/default" TFLITE_DELEGATES_GPU_COMMON_DEFAULT_SRCS
|
||||
FILTER "(_test)\\.(cc|h)$"
|
||||
)
|
||||
populate_tflite_source_vars(
|
||||
"delegates/gpu/common/memory_management"
|
||||
TFLITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_SRCS
|
||||
FILTER "(_test)\\.(cc|h)$"
|
||||
)
|
||||
populate_tflite_source_vars(
|
||||
"delegates/gpu/common/transformations"
|
||||
TFLITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_SRCS
|
||||
FILTER "(_test)\\.(cc|h)$"
|
||||
)
|
||||
list(APPEND TFLITE_DELEGATES_GPU_SRCS
|
||||
${TFLITE_SOURCE_DIR}/delegates/gpu/api.cc
|
||||
${TFLITE_SOURCE_DIR}/delegates/gpu/delegate.cc
|
||||
${TFLITE_DELEGATES_GPU_CL_SRCS}
|
||||
${TFLITE_DELEGATES_GPU_CL_KERNELS_SRCS}
|
||||
${TFLITE_DELEGATES_GPU_CL_KERNELS_SPECIAL_SRCS}
|
||||
${TFLITE_DELEGATES_GPU_CL_SELECTORS_SRCS}
|
||||
${TFLITE_SOURCE_DIR}/delegates/gpu/cl/selectors/default/default_selector.cc
|
||||
${TFLITE_DELEGATES_GPU_COMMON_SRCS}
|
||||
${TFLITE_DELEGATES_GPU_COMMON_DEFAULT_SRCS}
|
||||
${TFLITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_SRCS}
|
||||
${TFLITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_SRCS}
|
||||
)
|
||||
list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DCL_DELEGATE_NO_GL")
|
||||
list(APPEND TFLITE_TARGET_DEPENDENCIES
|
||||
absl::any
|
||||
absl::flat_hash_map
|
||||
)
|
||||
endif()
|
||||
if(_TFLITE_ENABLE_NNAPI)
|
||||
@ -281,6 +329,7 @@ add_library(tensorflow-lite
|
||||
${TFLITE_CORE_SRCS}
|
||||
${TFLITE_C_SRCS}
|
||||
${TFLITE_DELEGATES_FLEX_SRCS}
|
||||
${TFLITE_DELEGATES_GPU_SRCS}
|
||||
${TFLITE_DELEGATES_NNAPI_SRCS}
|
||||
${TFLITE_DELEGATES_SRCS}
|
||||
${TFLITE_DELEGATES_XNNPACK_SRCS}
|
||||
@ -298,6 +347,7 @@ add_library(tensorflow-lite
|
||||
${TFLITE_NNAPI_SRCS}
|
||||
${TFLITE_SRCS}
|
||||
${TFLITE_SOURCE_DIR}/profiling/platform_profiler.cc
|
||||
${TFLITE_SOURCE_DIR}/schema/schema_utils.cc
|
||||
${TFLITE_SOURCE_DIR}/tools/optimize/sparsity/format_converter.cc
|
||||
)
|
||||
target_include_directories(tensorflow-lite
|
||||
@ -362,14 +412,24 @@ endif() # TFLITE_ENABLE_XNNPACK
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Android")
|
||||
list(APPEND TFLITE_BENCHMARK_SRCS
|
||||
${TFLITE_SOURCE_DIR}/profiling/atrace_profiler.cc
|
||||
${TFLITE_SOURCE_DIR}/tools/delegates/nnapi_delegate_provider.cc
|
||||
)
|
||||
if(_TFLITE_ENABLE_NNAPI)
|
||||
list(APPEND TFLITE_BENCHMARK_SRCS
|
||||
${TFLITE_SOURCE_DIR}/tools/delegates/nnapi_delegate_provider.cc
|
||||
)
|
||||
endif() # _TFLITE_ENABLE_NNAPI
|
||||
list(APPEND TFLITE_BENCHMARK_LIBS
|
||||
${ANDROID_LOG_LIB}
|
||||
absl::strings
|
||||
)
|
||||
endif() # Android
|
||||
|
||||
if(TFLITE_ENABLE_GPU)
|
||||
list(APPEND TFLITE_BENCHMARK_SRCS
|
||||
${TFLITE_SOURCE_DIR}/tools/delegates/gpu_delegate_provider.cc
|
||||
)
|
||||
endif() # TFLITE_ENABLE_GPU
|
||||
|
||||
add_executable(benchmark_model
|
||||
EXCLUDE_FROM_ALL
|
||||
${TFLITE_BENCHMARK_SRCS}
|
||||
|
@ -0,0 +1,207 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
// automatically generated by the FlatBuffers compiler, do not modify
|
||||
|
||||
|
||||
#ifndef FLATBUFFERS_GENERATED_COMPILEDPROGRAMCACHE_TFLITE_GPU_CL_DATA_H_
|
||||
#define FLATBUFFERS_GENERATED_COMPILEDPROGRAMCACHE_TFLITE_GPU_CL_DATA_H_
|
||||
|
||||
#include "flatbuffers/flatbuffers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
namespace data {
|
||||
|
||||
struct Program;
|
||||
|
||||
struct CompiledCache;
|
||||
|
||||
struct Program FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
|
||||
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
|
||||
VT_FINGERPRINT = 4,
|
||||
VT_BINARY = 6
|
||||
};
|
||||
uint64_t fingerprint() const {
|
||||
return GetField<uint64_t>(VT_FINGERPRINT, 0);
|
||||
}
|
||||
const flatbuffers::Vector<uint8_t> *binary() const {
|
||||
return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_BINARY);
|
||||
}
|
||||
bool Verify(flatbuffers::Verifier &verifier) const {
|
||||
return VerifyTableStart(verifier) &&
|
||||
VerifyField<uint64_t>(verifier, VT_FINGERPRINT) &&
|
||||
VerifyOffset(verifier, VT_BINARY) &&
|
||||
verifier.VerifyVector(binary()) &&
|
||||
verifier.EndTable();
|
||||
}
|
||||
};
|
||||
|
||||
struct ProgramBuilder {
|
||||
flatbuffers::FlatBufferBuilder &fbb_;
|
||||
flatbuffers::uoffset_t start_;
|
||||
void add_fingerprint(uint64_t fingerprint) {
|
||||
fbb_.AddElement<uint64_t>(Program::VT_FINGERPRINT, fingerprint, 0);
|
||||
}
|
||||
void add_binary(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> binary) {
|
||||
fbb_.AddOffset(Program::VT_BINARY, binary);
|
||||
}
|
||||
explicit ProgramBuilder(flatbuffers::FlatBufferBuilder &_fbb)
|
||||
: fbb_(_fbb) {
|
||||
start_ = fbb_.StartTable();
|
||||
}
|
||||
ProgramBuilder &operator=(const ProgramBuilder &);
|
||||
flatbuffers::Offset<Program> Finish() {
|
||||
const auto end = fbb_.EndTable(start_);
|
||||
auto o = flatbuffers::Offset<Program>(end);
|
||||
return o;
|
||||
}
|
||||
};
|
||||
|
||||
inline flatbuffers::Offset<Program> CreateProgram(
|
||||
flatbuffers::FlatBufferBuilder &_fbb,
|
||||
uint64_t fingerprint = 0,
|
||||
flatbuffers::Offset<flatbuffers::Vector<uint8_t>> binary = 0) {
|
||||
ProgramBuilder builder_(_fbb);
|
||||
builder_.add_fingerprint(fingerprint);
|
||||
builder_.add_binary(binary);
|
||||
return builder_.Finish();
|
||||
}
|
||||
|
||||
inline flatbuffers::Offset<Program> CreateProgramDirect(
|
||||
flatbuffers::FlatBufferBuilder &_fbb,
|
||||
uint64_t fingerprint = 0,
|
||||
const std::vector<uint8_t> *binary = nullptr) {
|
||||
auto binary__ = binary ? _fbb.CreateVector<uint8_t>(*binary) : 0;
|
||||
return tflite::gpu::cl::data::CreateProgram(
|
||||
_fbb,
|
||||
fingerprint,
|
||||
binary__);
|
||||
}
|
||||
|
||||
struct CompiledCache FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
|
||||
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
|
||||
VT_DRIVER_VERSION = 4,
|
||||
VT_PROGRAMS = 6
|
||||
};
|
||||
const flatbuffers::String *driver_version() const {
|
||||
return GetPointer<const flatbuffers::String *>(VT_DRIVER_VERSION);
|
||||
}
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Program>> *programs() const {
|
||||
return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Program>> *>(VT_PROGRAMS);
|
||||
}
|
||||
bool Verify(flatbuffers::Verifier &verifier) const {
|
||||
return VerifyTableStart(verifier) &&
|
||||
VerifyOffset(verifier, VT_DRIVER_VERSION) &&
|
||||
verifier.VerifyString(driver_version()) &&
|
||||
VerifyOffset(verifier, VT_PROGRAMS) &&
|
||||
verifier.VerifyVector(programs()) &&
|
||||
verifier.VerifyVectorOfTables(programs()) &&
|
||||
verifier.EndTable();
|
||||
}
|
||||
};
|
||||
|
||||
struct CompiledCacheBuilder {
|
||||
flatbuffers::FlatBufferBuilder &fbb_;
|
||||
flatbuffers::uoffset_t start_;
|
||||
void add_driver_version(flatbuffers::Offset<flatbuffers::String> driver_version) {
|
||||
fbb_.AddOffset(CompiledCache::VT_DRIVER_VERSION, driver_version);
|
||||
}
|
||||
void add_programs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Program>>> programs) {
|
||||
fbb_.AddOffset(CompiledCache::VT_PROGRAMS, programs);
|
||||
}
|
||||
explicit CompiledCacheBuilder(flatbuffers::FlatBufferBuilder &_fbb)
|
||||
: fbb_(_fbb) {
|
||||
start_ = fbb_.StartTable();
|
||||
}
|
||||
CompiledCacheBuilder &operator=(const CompiledCacheBuilder &);
|
||||
flatbuffers::Offset<CompiledCache> Finish() {
|
||||
const auto end = fbb_.EndTable(start_);
|
||||
auto o = flatbuffers::Offset<CompiledCache>(end);
|
||||
return o;
|
||||
}
|
||||
};
|
||||
|
||||
inline flatbuffers::Offset<CompiledCache> CreateCompiledCache(
|
||||
flatbuffers::FlatBufferBuilder &_fbb,
|
||||
flatbuffers::Offset<flatbuffers::String> driver_version = 0,
|
||||
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Program>>> programs = 0) {
|
||||
CompiledCacheBuilder builder_(_fbb);
|
||||
builder_.add_programs(programs);
|
||||
builder_.add_driver_version(driver_version);
|
||||
return builder_.Finish();
|
||||
}
|
||||
|
||||
inline flatbuffers::Offset<CompiledCache> CreateCompiledCacheDirect(
|
||||
flatbuffers::FlatBufferBuilder &_fbb,
|
||||
const char *driver_version = nullptr,
|
||||
const std::vector<flatbuffers::Offset<Program>> *programs = nullptr) {
|
||||
auto driver_version__ = driver_version ? _fbb.CreateString(driver_version) : 0;
|
||||
auto programs__ = programs ? _fbb.CreateVector<flatbuffers::Offset<Program>>(*programs) : 0;
|
||||
return tflite::gpu::cl::data::CreateCompiledCache(
|
||||
_fbb,
|
||||
driver_version__,
|
||||
programs__);
|
||||
}
|
||||
|
||||
inline const tflite::gpu::cl::data::CompiledCache *GetCompiledCache(const void *buf) {
|
||||
return flatbuffers::GetRoot<tflite::gpu::cl::data::CompiledCache>(buf);
|
||||
}
|
||||
|
||||
inline const tflite::gpu::cl::data::CompiledCache *GetSizePrefixedCompiledCache(const void *buf) {
|
||||
return flatbuffers::GetSizePrefixedRoot<tflite::gpu::cl::data::CompiledCache>(buf);
|
||||
}
|
||||
|
||||
inline const char *CompiledCacheIdentifier() {
|
||||
return "AFCM";
|
||||
}
|
||||
|
||||
inline bool CompiledCacheBufferHasIdentifier(const void *buf) {
|
||||
return flatbuffers::BufferHasIdentifier(
|
||||
buf, CompiledCacheIdentifier());
|
||||
}
|
||||
|
||||
inline bool VerifyCompiledCacheBuffer(
|
||||
flatbuffers::Verifier &verifier) {
|
||||
return verifier.VerifyBuffer<tflite::gpu::cl::data::CompiledCache>(CompiledCacheIdentifier());
|
||||
}
|
||||
|
||||
inline bool VerifySizePrefixedCompiledCacheBuffer(
|
||||
flatbuffers::Verifier &verifier) {
|
||||
return verifier.VerifySizePrefixedBuffer<tflite::gpu::cl::data::CompiledCache>(CompiledCacheIdentifier());
|
||||
}
|
||||
|
||||
inline const char *CompiledCacheExtension() {
|
||||
return "jetbin";
|
||||
}
|
||||
|
||||
inline void FinishCompiledCacheBuffer(
|
||||
flatbuffers::FlatBufferBuilder &fbb,
|
||||
flatbuffers::Offset<tflite::gpu::cl::data::CompiledCache> root) {
|
||||
fbb.Finish(root, CompiledCacheIdentifier());
|
||||
}
|
||||
|
||||
inline void FinishSizePrefixedCompiledCacheBuffer(
|
||||
flatbuffers::FlatBufferBuilder &fbb,
|
||||
flatbuffers::Offset<tflite::gpu::cl::data::CompiledCache> root) {
|
||||
fbb.FinishSizePrefixed(root, CompiledCacheIdentifier());
|
||||
}
|
||||
|
||||
} // namespace data
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
#endif // FLATBUFFERS_GENERATED_COMPILEDPROGRAMCACHE_TFLITE_GPU_CL_DATA_H_
|
16
tensorflow/lite/tools/cmake/modules/Findopencl_headers.cmake
Normal file
16
tensorflow/lite/tools/cmake/modules/Findopencl_headers.cmake
Normal file
@ -0,0 +1,16 @@
|
||||
#
|
||||
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
include(opencl_headers)
|
40
tensorflow/lite/tools/cmake/modules/opencl_headers.cmake
Normal file
40
tensorflow/lite/tools/cmake/modules/opencl_headers.cmake
Normal file
@ -0,0 +1,40 @@
|
||||
#
|
||||
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
if(TARGET opencl_headers OR opencl_headers_POPULATED)
|
||||
return()
|
||||
endif()
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
OverridableFetchContent_Declare(
|
||||
opencl_headers
|
||||
GIT_REPOSITORY https://github.com/KhronosGroup/OpenCL-Headers
|
||||
# GIT_TAG must keep in sync with tensorflow/third_party/opencl_headers/workspace.bzl
|
||||
GIT_TAG 0d5f18c6e7196863bc1557a693f1509adfcee056
|
||||
GIT_PROGRESS TRUE
|
||||
PREFIX "${CMAKE_BINARY_DIR}"
|
||||
SOURCE_DIR "${CMAKE_BINARY_DIR}/opencl_headers"
|
||||
)
|
||||
|
||||
OverridableFetchContent_GetProperties(opencl_headers)
|
||||
if(NOT opencl_headers)
|
||||
OverridableFetchContent_Populate(opencl_headers)
|
||||
endif()
|
||||
|
||||
include_directories(
|
||||
AFTER
|
||||
"${opencl_headers_SOURCE_DIR}/"
|
||||
)
|
Loading…
x
Reference in New Issue
Block a user