diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc index 09c386b55f0..2581b58f1e4 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc +++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc @@ -87,6 +87,16 @@ OpBuilder* GraphBuilder::AddBuilder( CoreML::Specification::Model* GraphBuilder::BuildModel() { CoreML::Specification::Model* model = new CoreML::Specification::Model(); + if (coreml_version_ == 2) { // Core ML 2, iOS >= 12.0 + model->set_specificationversion(3); + } else if (coreml_version_ == 3) { // Core ML 3, iOS >= 13.0 + model->set_specificationversion(4); + model->mutable_neuralnetwork()->set_arrayinputshapemapping( + CoreML::Specification::EXACT_ARRAY_MAPPING); + } else { + fprintf(stderr, "Unsupported Core ML version: %d\n", coreml_version_); + return nullptr; + } auto* neural_network = model->mutable_neuralnetwork(); for (auto& builder : builders_) { CoreML::Specification::NeuralNetworkLayer* layer = builder->Build(); diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h index 5367ae20d2f..c59c30a5a28 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h +++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h @@ -52,6 +52,8 @@ class TensorID { // API is experimental and subject to change. class GraphBuilder { public: + explicit GraphBuilder(int coreml_version) : coreml_version_(coreml_version) {} + // Returns pointer to the created builder. Ownership still belongs // to the GraphBuilder. OpBuilder* AddBuilder(int builtin_code, const TfLiteNode* node); @@ -79,6 +81,8 @@ class GraphBuilder { // This information is used to mark constant tensors that are used as input. bool IsTensorUsed(int tflite_tensor_index); + const int coreml_version_; + private: std::vector> builders_; // Index in the vector is the tflite_tensor_index, the value diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h b/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h index b0fe24ee288..501a304706c 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h +++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h @@ -32,7 +32,8 @@ bool IsFullyConnectedOpSupported(const TfLiteRegistration* registration, const TfLiteNode* node, TfLiteContext* context); bool IsReshapeOpSupported(const TfLiteRegistration* registration, - const TfLiteNode* node, TfLiteContext* context); + const TfLiteNode* node, TfLiteContext* context, + int coreml_version); bool IsResizeBilinearOpSupported(const TfLiteRegistration* registration, const TfLiteNode* node, TfLiteContext* context); diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc b/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc index 33040e2e070..b7b78653d36 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc +++ b/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc @@ -114,7 +114,11 @@ TfLiteStatus ReshapeOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs, } bool IsReshapeOpSupported(const TfLiteRegistration* registration, - const TfLiteNode* node, TfLiteContext* context) { + const TfLiteNode* node, TfLiteContext* context, + int coreml_version) { + if (coreml_version >= 3) { + return false; + } if (node->inputs->size == 1) { const auto* params = reinterpret_cast(node->builtin_data); diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h index 0d75afc8e34..8ad81040499 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h @@ -31,6 +31,13 @@ typedef enum { typedef struct { // Only create delegate when Neural Engine is available on the device. TfLiteCoreMlDelegateEnabledDevices enabled_devices; + // Specifies target Core ML version for model conversion. + // Core ML 3 come with a lot more ops, but some ops (e.g. reshape) is not + // delegated due to input rank constraint. + // if not set to one of the valid versions, the delegate will use highest + // version possible in the platform. + // Valid versions: (2, 3) + int coreml_version; // This sets the maximum number of Core ML delegates created. // Each graph corresponds to one delegated node subset in the // TFLite model. Set this to 0 to delegate all possible partitions. diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm index 5d0564ebc48..58728659894 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm @@ -36,7 +36,7 @@ constexpr int kMinNodesPerCoreMlDelegate = 2; using delegates::coreml::CoreMlDelegateKernel; bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfLiteNode* node, - TfLiteContext* context) { + TfLiteContext* context, const TfLiteCoreMlDelegateOptions* options) { if (@available(iOS 11.0, *)) { } else { return false; @@ -120,7 +120,8 @@ bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfL return true; } case kTfLiteBuiltinReshape: { - return delegates::coreml::IsReshapeOpSupported(registration, node, context); + return delegates::coreml::IsReshapeOpSupported(registration, node, context, + options->coreml_version); } case kTfLiteBuiltinResizeBilinear: { return delegates::coreml::IsResizeBilinearOpSupported(registration, node, context); @@ -142,6 +143,39 @@ bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfL return false; } +class CoreMlDelegate : public TfLiteDelegate { + public: + explicit CoreMlDelegate(const TfLiteCoreMlDelegateOptions* params) + : params_(params != nullptr ? *params : TfLiteCoreMlDelegateOptions()) { + { + if (@available(iOS 13.0, *)) { + if (params_.coreml_version != 2 && params_.coreml_version != 3) { + NSLog(@"coreml_version must be 2 or 3. Setting to 3."); + params_.coreml_version = 3; + } + } else if (@available(iOS 12.0, *)) { + if (params_.coreml_version != 2) { + NSLog(@"coreml_version must be 2 - using Core ML version 2."); + params_.coreml_version = 2; + } + } + if (params_.max_delegated_partitions <= 0) { + params_.max_delegated_partitions = std::numeric_limits::max(); + } + if (params_.min_nodes_per_partition <= 0) { + params_.min_nodes_per_partition = kMinNodesPerCoreMlDelegate; + } + } + } + + TfLiteCoreMlDelegateOptions* params() { return ¶ms_; } + + bool VerifyDelegate() { return true; } + + private: + TfLiteCoreMlDelegateOptions params_; +}; + TfLiteRegistration GetCoreMlKernelRegistration() { // This is the registration for the Delegate Node that gets added to // the TFLite graph instead of the subGraph it replaces it. @@ -158,8 +192,10 @@ TfLiteRegistration GetCoreMlKernelRegistration() { }; kernel_registration.init = [](TfLiteContext* context, const char* buffer, size_t length) -> void* { - const TfLiteDelegateParams* params = reinterpret_cast(buffer); - CoreMlDelegateKernel* coreml_kernel = new CoreMlDelegateKernel(); + const auto* params = reinterpret_cast(buffer); + const auto* coreml_options = + (reinterpret_cast(params->delegate))->params(); + CoreMlDelegateKernel* coreml_kernel = new CoreMlDelegateKernel(coreml_options->coreml_version); if (coreml_kernel->Init(context, params) != kTfLiteOk) { delete coreml_kernel; return nullptr; @@ -187,14 +223,12 @@ TfLiteRegistration GetCoreMlKernelRegistration() { } TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) { - const auto* params = - reinterpret_cast(delegate->data_); + const auto* params = reinterpret_cast(delegate->data_); - delegates::IsNodeSupportedFn node_supported_fn = - [=](TfLiteContext* context, TfLiteNode* node, - TfLiteRegistration* registration, - std::string* unsupported_details) -> bool { - return IsNodeSupportedByDelegate(registration, node, context); + delegates::IsNodeSupportedFn node_supported_fn = [=](TfLiteContext* context, TfLiteNode* node, + TfLiteRegistration* registration, + std::string* unsupported_details) -> bool { + return IsNodeSupportedByDelegate(registration, node, context, params); }; delegates::GraphPartitionHelper helper(context, node_supported_fn); @@ -214,7 +248,8 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) { // Set first element to the number of nodes to replace. supported_nodes[0] = supported_nodes.size() - 1; - TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO, "CoreML delegate: %d nodes delegated out of %d nodes, " + TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO, + "CoreML delegate: %d nodes delegated out of %d nodes, " "with %d partitions.\n", supported_nodes[0], helper.num_total_nodes(), delegate_partitions.size()); @@ -223,28 +258,6 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) { reinterpret_cast(supported_nodes.data()), delegate); } -class CoreMlDelegate : public TfLiteDelegate { - public: - explicit CoreMlDelegate(const TfLiteCoreMlDelegateOptions* params) - : params_(params != nullptr ? *params : TfLiteCoreMlDelegateOptions()) { - { - if (params_.max_delegated_partitions <= 0) { - params_.max_delegated_partitions = std::numeric_limits::max(); - } - if (params_.min_nodes_per_partition <= 0) { - params_.min_nodes_per_partition = kMinNodesPerCoreMlDelegate; - } - } - } - - TfLiteCoreMlDelegateOptions* params() { return ¶ms_; } - - bool VerifyDelegate() { return true; } - - private: - TfLiteCoreMlDelegateOptions params_; -}; - TfLiteDelegate* CreateCoreMlDelegate(const TfLiteCoreMlDelegateOptions* options) { TfLiteDelegate* delegate = new CoreMlDelegate(options); if (!static_cast(delegate)->VerifyDelegate()) { @@ -288,7 +301,7 @@ bool IsNeuralEngineAvailable() { } // namespace TfLiteDelegate* TfLiteCoreMlDelegateCreate(const TfLiteCoreMlDelegateOptions* options) { - if (@available(iOS 11.0, *)) { + if (@available(iOS 12.0, *)) { if (options->enabled_devices == TfLiteCoreMlDelegateDevicesWithNeuralEngine && !IsNeuralEngineAvailable()) { NSLog(@"This device does not have Neural Engine, so Core ML delegate will not be enabled. " @@ -299,7 +312,7 @@ TfLiteDelegate* TfLiteCoreMlDelegateCreate(const TfLiteCoreMlDelegateOptions* op return tflite::CreateCoreMlDelegate(options); } else { NSLog(@"Core ML delegate is not supported in this iOS version. " - "Minimum required iOS version is 11.0."); + "Minimum required iOS version is 12.0."); return nullptr; } } diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h index 04053ea81c1..8c983fb11aa 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h @@ -29,6 +29,8 @@ namespace coreml { // implements Init/Prepare/Invoke as TFLite kernel nodes. class CoreMlDelegateKernel { public: + explicit CoreMlDelegateKernel(int coreml_version) + : coreml_version_(coreml_version) {} // Initialize the delegated graph and add required nodes. TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params); @@ -56,6 +58,7 @@ class CoreMlDelegateKernel { std::unique_ptr builder_; std::unique_ptr model_; ::CoreMlExecutor* executor_; + int coreml_version_; std::vector input_tensor_ids_; std::vector inputs_; diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm index a36837bcc44..6a668bc971b 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm @@ -60,7 +60,7 @@ void TransposeToHWC(const float* chw, float* hwc, const TfLiteIntArray* hwc_dims TfLiteStatus CoreMlDelegateKernel::Init(TfLiteContext* context, const TfLiteDelegateParams* delegate_params) { - if (@available(iOS 11.0, *)) { + if (@available(iOS 12.0, *)) { executor_ = [[::CoreMlExecutor alloc] init]; TF_LITE_ENSURE_STATUS(BuildModel(context, delegate_params)); // Serialize the model protocol buffer and compile it. @@ -76,7 +76,7 @@ TfLiteStatus CoreMlDelegateKernel::Init(TfLiteContext* context, } return kTfLiteOk; } else { - TF_LITE_KERNEL_LOG(context, "Minimum required iOS version is 11.0."); + TF_LITE_KERNEL_LOG(context, "Minimum required iOS version is 12.0."); return kTfLiteError; } } @@ -104,6 +104,9 @@ void CoreMlDelegateKernel::AddOutputTensors(const TfLiteIntArray* output_tensors int batch_size, height_size, width_size, depth_size; GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims); multi_array->set_datatype(CoreML::Specification::ArrayFeatureType::FLOAT32); + if (coreml_version_ >= 3) { + multi_array->mutable_shape()->Add(batch_size); + } multi_array->mutable_shape()->Add(depth_size); multi_array->mutable_shape()->Add(height_size); multi_array->mutable_shape()->Add(width_size); @@ -114,7 +117,7 @@ TfLiteStatus CoreMlDelegateKernel::BuildModel(TfLiteContext* context, const TfLiteDelegateParams* delegate_params) { TfLiteNode* node; TfLiteRegistration* reg; - builder_.reset(new delegates::coreml::GraphBuilder()); + builder_.reset(new delegates::coreml::GraphBuilder(coreml_version_)); // Add Inputs AddInputTensors(delegate_params->input_tensors, context); // Build all ops. @@ -144,8 +147,6 @@ TfLiteStatus CoreMlDelegateKernel::BuildModel(TfLiteContext* context, return kTfLiteError; } AddOutputTensors(delegate_params->output_tensors, context); - // TODO(karimnosseir): Set correct version ? - model_->set_specificationversion(1); auto* model_description = model_->mutable_description(); for (int i = 0; i < delegate_params->input_tensors->size; ++i) { const int tensor_id = delegate_params->input_tensors->data[i]; @@ -158,6 +159,9 @@ TfLiteStatus CoreMlDelegateKernel::BuildModel(TfLiteContext* context, int batch_size, height_size, width_size, depth_size; GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims); multi_array->set_datatype(CoreML::Specification::ArrayFeatureType::FLOAT32); + if (coreml_version_ >= 3) { + multi_array->mutable_shape()->Add(batch_size); + } multi_array->mutable_shape()->Add(depth_size); multi_array->mutable_shape()->Add(height_size); multi_array->mutable_shape()->Add(width_size); @@ -181,9 +185,12 @@ TfLiteStatus CoreMlDelegateKernel::Prepare(TfLiteContext* context, TfLiteNode* n int batch_size, height_size, width_size, depth_size; GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor->dims); - inputs_.push_back({std::vector(input_size), - builder_->GetTensorName(tensor_index), - {depth_size, height_size, width_size}}); + std::vector input_shape = {depth_size, height_size, width_size}; + if (coreml_version_ >= 3) { + input_shape.insert(input_shape.begin(), batch_size); + } + inputs_.push_back( + {std::vector(input_size), builder_->GetTensorName(tensor_index), input_shape}); } outputs_.reserve(node->outputs->size); @@ -222,9 +229,7 @@ TfLiteStatus CoreMlDelegateKernel::Invoke(TfLiteContext* context, TfLiteNode* no } } -CoreMlDelegateKernel::~CoreMlDelegateKernel() { - [executor_ cleanup]; -} +CoreMlDelegateKernel::~CoreMlDelegateKernel() { [executor_ cleanup]; } } // namespace coreml } // namespace delegates diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h index edec3020cbc..5ce0a0ade6c 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h @@ -45,4 +45,5 @@ struct TensorData { @property MLModel* model API_AVAILABLE(ios(11)); @property NSString* mlModelFilePath; @property NSString* compiledModelFilePath; +@property(nonatomic, readonly) int coreMlVersion; @end diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm index 2091c0d7ca0..1f808e08d49 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm @@ -39,17 +39,22 @@ NSURL* createTemporaryFile() { NSSet* _featureNames; } -- (instancetype)initWithInputs:(const std::vector*)inputs; +- (instancetype)initWithInputs:(const std::vector*)inputs + coreMlVersion:(int)coreMlVersion; - (MLFeatureValue*)featureValueForName:(NSString*)featureName API_AVAILABLE(ios(11)); - (NSSet*)featureNames; +@property(nonatomic, readonly) int coreMlVersion; + @end @implementation MultiArrayFeatureProvider -- (instancetype)initWithInputs:(const std::vector*)inputs { +- (instancetype)initWithInputs:(const std::vector*)inputs + coreMlVersion:(int)coreMlVersion { self = [super init]; _inputs = inputs; + _coreMlVersion = coreMlVersion; for (auto& input : *_inputs) { if (input.name.empty()) { return nil; @@ -74,8 +79,31 @@ NSURL* createTemporaryFile() { for (auto& input : *_inputs) { if ([featureName cStringUsingEncoding:NSUTF8StringEncoding] == input.name) { // TODO(b/141492326): Update shape handling for higher ranks - NSArray* shape = @[ @(input.shape[0]), @(input.shape[1]), @(input.shape[2]) ]; - NSArray* strides = @[ @(input.shape[1] * input.shape[2]), @(input.shape[2]), @1 ]; + NSArray* shape = @[ + @(input.shape[0]), + @(input.shape[1]), + @(input.shape[2]), + ]; + NSArray* strides = @[ + @(input.shape[1] * input.shape[2]), + @(input.shape[2]), + @1, + ]; + + if ([self coreMlVersion] >= 3) { + shape = @[ + @(input.shape[0]), + @(input.shape[1]), + @(input.shape[2]), + @(input.shape[3]), + ]; + strides = @[ + @(input.shape[1] * input.shape[2] * input.shape[3]), + @(input.shape[2] * input.shape[3]), + @(input.shape[3]), + @1, + ]; + }; NSError* error = nil; MLMultiArray* mlArray = [[MLMultiArray alloc] initWithDataPointer:(float*)input.data.data() shape:shape @@ -106,7 +134,7 @@ NSURL* createTemporaryFile() { } NSError* error = nil; MultiArrayFeatureProvider* inputFeature = - [[MultiArrayFeatureProvider alloc] initWithInputs:&inputs]; + [[MultiArrayFeatureProvider alloc] initWithInputs:&inputs coreMlVersion:[self coreMlVersion]]; if (inputFeature == nil) { NSLog(@"inputFeature is not initialized."); return NO; @@ -153,6 +181,14 @@ NSURL* createTemporaryFile() { - (NSURL*)saveModel:(CoreML::Specification::Model*)model { NSURL* modelUrl = createTemporaryFile(); NSString* modelPath = [modelUrl path]; + if (model->specificationversion() == 3) { + _coreMlVersion = 2; + } else if (model->specificationversion() == 4) { + _coreMlVersion = 3; + } else { + NSLog(@"Only Core ML models with specification version 3 or 4 are supported"); + return nil; + } // Flush data to file. // TODO(karimnosseir): Can we mmap this instead of actual writing it to phone ? std::ofstream file_stream([modelPath UTF8String], std::ios::out | std::ios::binary); diff --git a/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift b/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift index 9862de31e2c..5a1526d45ea 100644 --- a/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift +++ b/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift @@ -35,6 +35,7 @@ public final class CoreMLDelegate: Delegate { self.options = options var delegateOptions = TfLiteCoreMlDelegateOptions() delegateOptions.enabled_devices = options.enabledDevices.cEnabledDevices + delegateOptions.coreml_version = Int32(options.coreMLVersion) delegateOptions.max_delegated_partitions = Int32(options.maxDelegatedPartitions) delegateOptions.min_nodes_per_partition = Int32(options.minNodesPerPartition) guard let delegate = TfLiteCoreMlDelegateCreate(&delegateOptions) else { return nil } @@ -72,6 +73,9 @@ extension CoreMLDelegate { /// value is `.neuralEngine` indicating that the delegate is enabled for Neural Engine devices /// only. public var enabledDevices: EnabledDevices = .neuralEngine + /// Target Core ML version for the model conversion. When it's not set, Core ML version will + /// be set to highest available version for the platform. + public var coreMLVersion = 0 /// The maximum number of Core ML delegate partitions created. Each graph corresponds to one /// delegated node subset in the TFLite model. The default value is `0` indicating that all /// possible partitions are delegated. diff --git a/tensorflow/lite/g3doc/performance/coreml_delegate.md b/tensorflow/lite/g3doc/performance/coreml_delegate.md index da3b943fd89..c267347cf3f 100644 --- a/tensorflow/lite/g3doc/performance/coreml_delegate.md +++ b/tensorflow/lite/g3doc/performance/coreml_delegate.md @@ -6,7 +6,7 @@ which results in faster model inference on iOS devices. Note: This delegate is in experimental (beta) phase. -Note: Core ML delegate is using Core ML version 2.1. +Note: Core ML delegate supports Core ML version 2 and later. **Supported iOS versions and devices:** @@ -158,6 +158,14 @@ for more detail. Alternatively, you can implement your own set of blacklist devices using other libraries such as [DeviceKit](https://github.com/devicekit/DeviceKit). +### Using older Core ML version + +Although iOS 13 supprots Core ML 3, the model might work better when it is +converted with Core ML 2 model specification. The target conversion version is +set to the latest version by default, but you can change this by setting +`coreMLVersion` (in Swift, `coreml_version` in C API) in the delegate option to +older version. + ## Supported ops Following ops are supported by the Core ML delegate. @@ -187,6 +195,8 @@ Following ops are supported by the Core ML delegate. * ReluN1To1 * Relu6 * Reshape + * Only supported when target Core ML version is 2, not supported when + targeting Core ML 3. * ResizeBilinear * SoftMax * Tanh