Add flag for using optimized TFLite CPU kernels on iOS

This adds new experimental flags to the interpreter options of TFLite Obj-C and
Swift APIs, which can be used for opting in to a set of highly optimized
floating point kernels provided via the XNNPACK delegate. The flags can be used
as follows.

Obj-C:

    TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
    options.useXNNPACK = YES;
    NSError *error;
    TFLInterpreter *interpreter =
        [[TFLInterpreter alloc] initWithModelPath:@"model/path"
                                          options:options
                                            error:&error];

Swift:

    var options = InterpreterOptions()
    options.isXNNPackEnabled = true
    var interpreter = try Interpreter(modelPath: "model/path", options: options)

PiperOrigin-RevId: 317270012
Change-Id: I82aae43c3de13ab08af3c70513e2a458e807b0f1
This commit is contained in:
YoungSeok Yoon 2020-06-19 01:58:58 -07:00 committed by TensorFlower Gardener
parent e51b17f458
commit 772433a2a2
11 changed files with 171 additions and 29 deletions

View File

@ -14,6 +14,10 @@ EMSCRIPTEN_LINKOPTS = [
"-s TOTAL_MEMORY=134217728",
]
exports_files([
"xnnpack_delegate.h",
])
cc_library(
name = "xnnpack_delegate",
srcs = ["xnnpack_delegate.cc"],

View File

@ -18,10 +18,26 @@ sh_binary(
],
)
# When the static framework is built with bazel, the all header files are moved
# to the "Headers" directory with no header path prefixes. This auxiliary rule
# is used for stripping the path prefix to the "common.h" file included by the
# "xnnpack_delegate.h" header.
genrule(
name = "strip_xnnpack_include_hdr",
srcs = ["//tensorflow/lite/delegates/xnnpack:xnnpack_delegate.h"],
outs = ["xnnpack_delegate.h"],
cmd = """
sed 's|#include ".*common.h"|#include "common.h"|'\
"$(location //tensorflow/lite/delegates/xnnpack:xnnpack_delegate.h)"\
> "$@"
""",
)
# bazel build -c opt --config=ios_fat //tensorflow/lite/experimental/ios:TensorFlowLiteC_framework
tflite_ios_static_framework(
name = "TensorFlowLiteC_framework",
hdrs = [
":xnnpack_delegate.h",
"//tensorflow/lite/c:c_api.h",
"//tensorflow/lite/c:common.h",
],
@ -105,6 +121,7 @@ cc_library(
hdrs = [
"//tensorflow/lite/c:c_api.h",
"//tensorflow/lite/c:common.h",
"//tensorflow/lite/delegates/xnnpack:xnnpack_delegate.h",
],
tags = [
"nobuilder",
@ -112,6 +129,7 @@ cc_library(
],
deps = [
"//tensorflow/lite/c:c_api",
"//tensorflow/lite/delegates/xnnpack:xnnpack_delegate",
],
)

View File

@ -64,6 +64,7 @@ objc_library(
visibility = ios_visibility_whitelist(),
deps = [
"//tensorflow/lite/c:c_api",
"//tensorflow/lite/delegates/xnnpack:xnnpack_delegate",
],
alwayslink = 1,
)

View File

@ -26,6 +26,7 @@ Pod::Spec.new do |s|
objc_dir + '{apis,sources}/*.{h,m,mm}',
tfl_dir + 'c/c_api.h',
tfl_dir + 'c/common.h',
tfl_dir + 'delegates/xnnpack/xnnpack_delegate.h',
]
s.module_map = objc_dir + 'apis/framework.modulemap'
s.dependency 'TensorFlowLiteC', "~> #{s.version}"

View File

@ -26,6 +26,7 @@ Pod::Spec.new do |s|
objc_dir + '{apis,sources}/*.{h,m,mm}',
tfl_dir + 'c/c_api.h',
tfl_dir + 'c/common.h',
tfl_dir + 'delegates/xnnpack/xnnpack_delegate.h',
]
s.module_map = objc_dir + 'apis/framework.modulemap'
s.dependency 'TensorFlowLiteC', "#{s.version}"

View File

@ -26,6 +26,7 @@ Pod::Spec.new do |s|
objc_dir + '{apis,sources}/*.{h,m,mm}',
tfl_dir + 'c/c_api.h',
tfl_dir + 'c/common.h',
tfl_dir + 'delegates/xnnpack/xnnpack_delegate.h',
]
s.module_map = objc_dir + 'apis/framework.modulemap'
s.dependency 'TensorFlowLiteC', '~> 0.0.1-nightly'

View File

@ -25,6 +25,27 @@ NS_ASSUME_NONNULL_BEGIN
*/
@property(nonatomic) NSUInteger numberOfThreads;
/**
* Experimental: Enable an optimized set of floating point CPU kernels (provided by XNNPACK).
*
* Enabling this flag will enable use of a new, highly optimized set of CPU kernels provided via the
* XNNPACK delegate. Currently, this is restricted to a subset of floating point operations.
* Eventually, we plan to enable this by default, as it can provide significant performance benefits
* for many classes of floating point models. See
* https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/README.md
* for more details.
*
* Things to keep in mind when enabling this flag:
*
* * Startup time and resize time may increase.
* * Baseline memory consumption may increase.
* * Compatibility with other delegates (e.g., GPU) has not been fully validated.
* * Quantized models will not see any benefit.
*
* WARNING: This is an experimental interface that is subject to change.
*/
@property(nonatomic) BOOL useXNNPACK;
/**
* Initializes a new instance of `TFLInterpreterOptions`.
*

View File

@ -23,6 +23,7 @@
#import "tensorflow/lite/experimental/objc/apis/TFLTensor.h"
#include "tensorflow/lite/c/c_api.h"
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
NS_ASSUME_NONNULL_BEGIN
@ -45,6 +46,9 @@ static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_
/** TfLiteInterpreter backed by C API. */
@property(nonatomic, nullable) TfLiteInterpreter *interpreter;
/** TfLiteDelegate backed by C API. */
@property(nonatomic, nullable) TfLiteDelegate *xnnpack_delegate;
@end
@implementation TFLInterpreter
@ -53,6 +57,7 @@ static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_
- (void)dealloc {
TfLiteInterpreterDelete(_interpreter);
TfLiteXNNPackDelegateDelete(_xnnpack_delegate);
}
#pragma mark - Public
@ -104,6 +109,16 @@ static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_
}
TfLiteInterpreterOptionsSetErrorReporter(cOptions, TFLInterpreterErrorReporter, nullptr);
if (options.useXNNPACK) {
TfLiteXNNPackDelegateOptions xnnpack_options = TfLiteXNNPackDelegateOptionsDefault();
if (options.numberOfThreads > 0) {
xnnpack_options.num_threads = (int32_t)options.numberOfThreads;
}
_xnnpack_delegate = TfLiteXNNPackDelegateCreate(&xnnpack_options);
TfLiteInterpreterOptionsAddDelegate(cOptions, _xnnpack_delegate);
}
_interpreter = TfLiteInterpreterCreate(model, cOptions);
if (_interpreter == nullptr) {
[TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter

View File

@ -32,6 +32,7 @@ NS_ASSUME_NONNULL_BEGIN
TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
XCTAssertNotNil(options);
XCTAssertEqual(options.numberOfThreads, 0);
XCTAssertFalse(options.useXNNPACK);
}
- (void)testSetNumberOfThread {
@ -44,6 +45,14 @@ NS_ASSUME_NONNULL_BEGIN
XCTAssertEqual(options.numberOfThreads, 3);
}
- (void)testUseXNNPACK {
TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
options.useXNNPACK = YES;
XCTAssertTrue(options.useXNNPACK);
options.useXNNPACK = NO;
XCTAssertFalse(options.useXNNPACK);
}
@end
NS_ASSUME_NONNULL_END

View File

@ -39,6 +39,9 @@ public final class Interpreter {
/// The underlying `TfLiteInterpreter` C pointer.
private var cInterpreter: CInterpreter?
/// The underlying `TfLiteDelegate` C pointer for XNNPACK delegate.
private var cXNNPackDelegate: Delegate.CDelegate?
/// Creates a new instance with the given values.
///
/// - Parameters:
@ -78,6 +81,14 @@ public final class Interpreter {
)
}
delegates?.forEach { TfLiteInterpreterOptionsAddDelegate(cInterpreterOptions, $0.cDelegate) }
// Configure the XNNPack delegate after the other delegates explicitly added by the user.
options.map {
if $0.isXNNPackEnabled {
configureXNNPack(options: $0, cInterpreterOptions: cInterpreterOptions)
}
}
guard let cInterpreter = TfLiteInterpreterCreate(model.cModel, cInterpreterOptions) else {
throw InterpreterError.failedToCreateInterpreter
}
@ -86,6 +97,7 @@ public final class Interpreter {
deinit {
TfLiteInterpreterDelete(cInterpreter)
TfLiteXNNPackDelegateDelete(cXNNPackDelegate)
}
/// Invokes the interpreter to perform inference from the loaded graph.
@ -201,7 +213,8 @@ public final class Interpreter {
guard case 0...maxIndex = index else {
throw InterpreterError.invalidTensorIndex(index: index, maxIndex: maxIndex)
}
guard TfLiteInterpreterResizeInputTensor(
guard
TfLiteInterpreterResizeInputTensor(
cInterpreter,
Int32(index),
shape.int32Dimensions,
@ -256,6 +269,18 @@ public final class Interpreter {
throw InterpreterError.failedToAllocateTensors
}
}
// MARK: - Private
private func configureXNNPack(options: Options, cInterpreterOptions: OpaquePointer) {
var cXNNPackOptions = TfLiteXNNPackDelegateOptionsDefault()
if let threadCount = options.threadCount, threadCount > 0 {
cXNNPackOptions.num_threads = Int32(threadCount)
}
cXNNPackDelegate = TfLiteXNNPackDelegateCreate(&cXNNPackOptions)
TfLiteInterpreterOptionsAddDelegate(cInterpreterOptions, cXNNPackDelegate)
}
}
extension Interpreter {
@ -265,6 +290,28 @@ extension Interpreter {
/// indicating that the `Interpreter` will decide the number of threads to use.
public var threadCount: Int? = nil
/// Indicates whether an optimized set of floating point CPU kernels, provided by XNNPACK, is
/// enabled.
///
/// - Experiment:
/// Enabling this flag will enable use of a new, highly optimized set of CPU kernels provided
/// via the XNNPACK delegate. Currently, this is restricted to a subset of floating point
/// operations. Eventually, we plan to enable this by default, as it can provide significant
/// performance benefits for many classes of floating point models. See
/// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/README.md
/// for more details.
///
/// - Important:
/// Things to keep in mind when enabling this flag:
///
/// * Startup time and resize time may increase.
/// * Baseline memory consumption may increase.
/// * Compatibility with other delegates (e.g., GPU) has not been fully validated.
/// * Quantized models will not see any benefit.
///
/// - Warning: This is an experimental interface that is subject to change.
public var isXNNPackEnabled: Bool = false
/// Creates a new instance with the default values.
public init() {}
}

View File

@ -142,10 +142,12 @@ class InterpreterTests: XCTestCase {
}
func testResizeInputTensorAtIndexToShape_ThrowsInvalidIndex() {
XCTAssertThrowsError(try interpreter.resizeInput(
XCTAssertThrowsError(
try interpreter.resizeInput(
at: AddModel.invalidIndex,
to: [2, 2, 3]
)) { error in
)
) { error in
let maxIndex = AddModel.inputTensorCount - 1
self.assertEqualErrors(
actual: error,
@ -162,10 +164,12 @@ class InterpreterTests: XCTestCase {
}
func testCopyDataToInputTensorAtIndex_ThrowsInvalidIndex() {
XCTAssertThrowsError(try interpreter.copy(
XCTAssertThrowsError(
try interpreter.copy(
AddModel.inputData,
toInputAt: AddModel.invalidIndex
)) { error in
)
) { error in
let maxIndex = AddModel.inputTensorCount - 1
self.assertEqualErrors(
actual: error,
@ -178,10 +182,12 @@ class InterpreterTests: XCTestCase {
try interpreter.resizeInput(at: AddModel.validIndex, to: AddModel.shape)
try interpreter.allocateTensors()
let invalidData = Data(count: AddModel.dataCount - 1)
XCTAssertThrowsError(try interpreter.copy(
XCTAssertThrowsError(
try interpreter.copy(
invalidData,
toInputAt: AddModel.validIndex
)) { error in
)
) { error in
self.assertEqualErrors(
actual: error,
expected: .invalidTensorDataCount(provided: invalidData.count, required: AddModel.dataCount)
@ -223,12 +229,20 @@ class InterpreterOptionsTests: XCTestCase {
func testInitWithDefaultValues() {
let options = Interpreter.Options()
XCTAssertNil(options.threadCount)
XCTAssertFalse(options.isXNNPackEnabled)
}
func testInitWithCustomValues() {
var options = Interpreter.Options()
options.threadCount = 2
XCTAssertEqual(options.threadCount, 2)
options.isXNNPackEnabled = false
XCTAssertFalse(options.isXNNPackEnabled)
options.isXNNPackEnabled = true
XCTAssertTrue(options.isXNNPackEnabled)
}
func testEquatable() {
@ -242,6 +256,15 @@ class InterpreterOptionsTests: XCTestCase {
options2.threadCount = 3
XCTAssertNotEqual(options1, options2)
options2.threadCount = 2
XCTAssertEqual(options1, options2)
options2.isXNNPackEnabled = true
XCTAssertNotEqual(options1, options2)
options1.isXNNPackEnabled = true
XCTAssertEqual(options1, options2)
}
}
@ -329,7 +352,8 @@ extension Array {
self = unsafeData.withUnsafeBytes { .init($0.bindMemory(to: Element.self)) }
#else
self = unsafeData.withUnsafeBytes {
.init(UnsafeBufferPointer<Element>(
.init(
UnsafeBufferPointer<Element>(
start: $0,
count: unsafeData.count / MemoryLayout<Element>.stride
))