Juhyun Lee f19161ecb7 TFLite GPU: Replace tflite::gpu::Status with absl::Status.
PiperOrigin-RevId: 302720429
Change-Id: I5b7987e677dad4a335ab4dae9480cba8779706ca
2020-03-24 12:26:00 -07:00

170 lines
7.2 KiB
Plaintext

/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/gpu/metal/inference_context.h"
#include <map>
#include <vector>
#include "absl/strings/substitute.h"
#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
#include "tensorflow/lite/delegates/gpu/common/model.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task.h"
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
using ::tflite::gpu::BHWC;
using ::tflite::gpu::metal::ComputeTaskDescriptorPtr;
using ::tflite::gpu::metal::RuntimeOptions;
using ::tflite::gpu::ValueId;
using ::tflite::gpu::AlignByN;
using ::tflite::gpu::HalfBits;
using ::tflite::gpu::MemoryStrategy;
using ::tflite::gpu::TensorUsageRecord;
@implementation TFLInferenceContext {
std::vector<TFLComputeTask*> _computeTasks;
std::vector<ValueId> _outputIds;
id<MTLDevice> _device;
RuntimeOptions _options;
}
- (absl::Status)compileModelWithDevice:(id<MTLDevice>)device
taskDescriptors:(const std::vector<ComputeTaskDescriptorPtr>&)taskDescriptors
outputBufferIDs:(const std::vector<ValueId>&)requestedOutputBufferIDs
runtimeOptions:(const RuntimeOptions&)options {
_device = device;
_outputIds = requestedOutputBufferIDs;
_options = options;
// Metal resources are created here.
for (const auto& node : taskDescriptors) {
TFLComputeTask* task = [[TFLComputeTask alloc] init];
RETURN_IF_ERROR([task compileWithDevice:_device taskDescriptor:node runtimeOptions:_options]);
_computeTasks.emplace_back(task);
}
return absl::OkStatus();
}
- (absl::Status)setInputDimensions:(const std::map<ValueId, BHWC>&)inputDimensions
outputDimensions:(std::map<ValueId, BHWC>*)outputDimensions
taskDescriptors:(const std::vector<ComputeTaskDescriptorPtr>&)taskDescriptors {
// These maps contain all input/output/intermediate buffers shared across model.
std::map<ValueId, BHWC> dimensions = inputDimensions;
std::map<ValueId, id<MTLBuffer>> buffers;
std::set<ValueId> preallocatedIds;
// Insert uninitialized input buffers. This buffers will be set externally.
for (auto dimension : dimensions) {
buffers[dimension.first] = nil;
preallocatedIds.insert(dimension.first);
}
for (const auto& outputId : _outputIds) {
preallocatedIds.insert(outputId);
}
for (auto& task : _computeTasks) {
// The same device must be used here as well as on shader compilation stage.
RETURN_IF_ERROR([task setInputDimensionsWithDevice:_device dimensions:&dimensions]);
}
for (auto id : _outputIds) {
(*outputDimensions)[id] = dimensions[id];
}
// TODO(ypisarchyk): it make sense to move it to separate function
// Generate usage records for each intermediate tensor in order of their first_task
std::vector<TensorUsageRecord<size_t>> usageRecords;
std::map<ValueId, size_t> usageRecordIds;
for (uint32_t i = 0; i < taskDescriptors.size(); ++i) {
auto outputId = taskDescriptors[i]->output_buffer.id;
if (!preallocatedIds.count(outputId)) {
if (!usageRecordIds.count(outputId)) {
const auto it = dimensions.find(outputId);
if (it == dimensions.end()) {
return absl::InternalError("Dimensions for intermediate tensor not found.");
}
usageRecordIds[outputId] = usageRecords.size();
usageRecords.emplace_back(it->second.w * it->second.h * AlignByN(it->second.c, 4), i, i);
} else {
usageRecords[usageRecordIds[outputId]].last_task = i;
}
}
for (auto& buffer : taskDescriptors[i]->input_buffers) {
if (!preallocatedIds.count(buffer.id)) {
usageRecords[usageRecordIds[buffer.id]].last_task = i;
}
}
}
tflite::gpu::ObjectsAssignment<size_t> assignment;
RETURN_IF_ERROR(AssignObjectsToTensors(usageRecords, MemoryStrategy::GREEDY_BEST, &assignment));
auto objectsCount = assignment.object_sizes.size();
std::vector<id<MTLBuffer>> sharedBuffers(objectsCount);
size_t dataTypeSize = _options.storage_precision == RuntimeOptions::Precision::FP32
? sizeof(float)
: sizeof(HalfBits);
// allocate buffers for each shared object
for (size_t i = 0; i < objectsCount; ++i) {
// Initialize metal buffer
NSUInteger bufferSize = dataTypeSize * assignment.object_sizes[i];
#if (defined(__MAC_10_14) && __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_14) || \
(defined(__IPHONE_12_0) && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_12_0) || \
(defined(__TVOS_12_0) && __TV_OS_VERSION_MIN_REQUIRED >= __TVOS_12_0)
if (bufferSize > [_device maxBufferLength]) {
std::string error("Tensor id: ");
error += std::to_string(assignment.object_ids[i]) +
" with size: " + std::to_string(bufferSize) +
" exceeds MTLDevice maxBufferLength: " + std::to_string([_device maxBufferLength]);
return absl::ResourceExhaustedError(error);
}
#endif
#if defined(__MAC_10_12) && __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_12
if ([_device currentAllocatedSize] + bufferSize > [_device recommendedMaxWorkingSetSize]) {
std::string error("Out of memory in MTLBuffer allocation. Currently allocated: ");
error += std::to_string([_device currentAllocatedSize]);
return absl::ResourceExhaustedError(error);
}
#endif
sharedBuffers[i] = [_device newBufferWithLength:bufferSize
options:MTLResourceStorageModeShared];
}
for (auto& task : _computeTasks) {
RETURN_IF_ERROR([task assignBuffers:&buffers
outputIds:_outputIds
usageRecordIds:usageRecordIds
sharedBufferIds:assignment.object_ids
sharedBuffers:sharedBuffers]);
}
return absl::OkStatus();
}
- (void)encodeWithEncoder:(id<MTLComputeCommandEncoder>)commandEncoder
inputOutputBuffers:(const std::map<ValueId, id<MTLBuffer>>&)inputOutputBuffers
encoderBlock:(id<MTLComputeCommandEncoder> (^)(bool isLast))encoderBlock {
for (int i = 0; i < _computeTasks.size(); ++i) {
auto& task = _computeTasks[i];
[task encodeWithEncoder:commandEncoder inputOutputBuffers:inputOutputBuffers];
if (encoderBlock != nil) {
commandEncoder = encoderBlock(i == _computeTasks.size() - 1);
}
}
}
@end