RuntimeOptions replaced with CalculationsPrecision.
PiperOrigin-RevId: 346263927 Change-Id: I73b3035df2490633c526a865069c09209de214ed
This commit is contained in:
parent
0939f0b7a8
commit
cb3870d004
@ -92,6 +92,7 @@ objc_library(
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:model_builder",
|
||||
"//tensorflow/lite/delegates/gpu/common:model_transformer",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:quantization_util",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
|
@ -26,10 +26,10 @@ cc_library(
|
||||
deps = [
|
||||
":compiled_model",
|
||||
":compute_task_descriptor",
|
||||
":runtime_options",
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:operations",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
@ -163,8 +163,8 @@ objc_library(
|
||||
":common",
|
||||
":compute_task_descriptor",
|
||||
":metal_arguments",
|
||||
":runtime_options",
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common:types",
|
||||
@ -211,9 +211,9 @@ objc_library(
|
||||
":compiled_model",
|
||||
":compute_task",
|
||||
":compute_task_descriptor",
|
||||
":runtime_options",
|
||||
"//tensorflow/lite/delegates/gpu/common:memory_management",
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
@ -292,11 +292,6 @@ objc_library(
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "runtime_options",
|
||||
hdrs = ["runtime_options.h"],
|
||||
)
|
||||
|
||||
objc_library(
|
||||
name = "TestBinary",
|
||||
testonly = 1,
|
||||
@ -342,7 +337,6 @@ objc_library(
|
||||
"//tensorflow/lite/delegates/gpu/metal:common",
|
||||
"//tensorflow/lite/delegates/gpu/metal:inference_context",
|
||||
"//tensorflow/lite/delegates/gpu/metal:metal_spatial_tensor",
|
||||
"//tensorflow/lite/delegates/gpu/metal:runtime_options",
|
||||
"//tensorflow/lite/delegates/gpu/metal/kernels:test_util",
|
||||
"@com_google_absl//absl/memory",
|
||||
],
|
||||
|
@ -48,7 +48,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/winograd.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
@ -183,7 +182,7 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
|
||||
const std::vector<ValueId>& inputs,
|
||||
const std::vector<ValueId>& outputs,
|
||||
const GpuInfo& gpu_info,
|
||||
const RuntimeOptions& options,
|
||||
CalculationsPrecision precision,
|
||||
int* last_value_id,
|
||||
std::map<ValueId, BHWC>* tensor_shapes,
|
||||
std::vector<NodeDescriptor>* nodes) {
|
||||
@ -199,15 +198,7 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
|
||||
node_desc.src_tensors_ids = inputs;
|
||||
node_desc.dst_tensors_ids = outputs;
|
||||
OperationDef op_def;
|
||||
if (options.storage_precision == RuntimeOptions::Precision::FP32) {
|
||||
op_def.precision = CalculationsPrecision::F32;
|
||||
} else {
|
||||
if (options.accumulator_precision == RuntimeOptions::Precision::FP32) {
|
||||
op_def.precision = CalculationsPrecision::F32_F16;
|
||||
} else {
|
||||
op_def.precision = CalculationsPrecision::F16;
|
||||
}
|
||||
}
|
||||
op_def.precision = precision;
|
||||
DataType data_type = DeduceDataTypeFromPrecision(op_def.precision);
|
||||
TensorDescriptor tensor_descriptor =
|
||||
TensorDescriptor{data_type, TensorStorageType::BUFFER, Layout::HWC};
|
||||
@ -536,7 +527,7 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
|
||||
} // namespace
|
||||
|
||||
absl::Status Compile(const GraphFloat32& graph, const GpuInfo& gpu_info,
|
||||
const RuntimeOptions& options,
|
||||
CalculationsPrecision precision,
|
||||
CompiledModel* compiled_model) {
|
||||
int last_value_id = 0;
|
||||
for (const auto& value : graph.values()) {
|
||||
@ -555,11 +546,11 @@ absl::Status Compile(const GraphFloat32& graph, const GpuInfo& gpu_info,
|
||||
}
|
||||
std::vector<NodeDescriptor> node_descs;
|
||||
std::vector<ComputeTaskDescriptorPtr> custom_tasks;
|
||||
auto custom_status =
|
||||
RegisterCustomOps(graph, node, inputs, outputs, options, &custom_tasks);
|
||||
auto custom_status = RegisterCustomOps(graph, node, inputs, outputs,
|
||||
precision, &custom_tasks);
|
||||
if (!custom_status.ok()) {
|
||||
auto primary_status = RegisterPrimaryOps(
|
||||
graph, node, inputs, outputs, gpu_info, options, &last_value_id,
|
||||
graph, node, inputs, outputs, gpu_info, precision, &last_value_id,
|
||||
&compiled_model->tensor_shapes, &node_descs);
|
||||
if (!primary_status.ok()) {
|
||||
return absl::UnimplementedError(
|
||||
|
@ -18,9 +18,9 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
@ -28,7 +28,7 @@ namespace metal {
|
||||
|
||||
// Builds CompiledModel out of GraphFloat32 graph using provided RuntimeOptions.
|
||||
absl::Status Compile(const GraphFloat32& graph, const GpuInfo& gpu_info,
|
||||
const RuntimeOptions& options,
|
||||
CalculationsPrecision precision,
|
||||
CompiledModel* compiled_model);
|
||||
|
||||
} // namespace metal
|
||||
|
@ -24,17 +24,17 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
@interface TFLComputeTask : NSObject
|
||||
|
||||
/// Returns empty string or error if shader can't be compiled.
|
||||
- (absl::Status)compileWithDevice:(id<MTLDevice>)device
|
||||
taskDescriptor:(const tflite::gpu::metal::NodeDescriptor&)desc
|
||||
runtimeOptions:(const ::tflite::gpu::metal::RuntimeOptions&)options;
|
||||
precision:(tflite::gpu::CalculationsPrecision)precision;
|
||||
|
||||
/// Updates parameters for inputs/outputs/intermediate tensors
|
||||
- (absl::Status)updateParamsWithDevice:(id<MTLDevice>)device
|
||||
|
@ -26,7 +26,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/common.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::AlignByN;
|
||||
using ::tflite::gpu::BHWC;
|
||||
@ -34,7 +33,7 @@ using ::tflite::gpu::HalfBits;
|
||||
using ::tflite::gpu::metal::ComputeTaskDescriptorPtr;
|
||||
using ::tflite::gpu::metal::CreateComputeProgram;
|
||||
using ::tflite::gpu::metal::DispatchParamsFunction;
|
||||
using ::tflite::gpu::metal::RuntimeOptions;
|
||||
using ::tflite::gpu::CalculationsPrecision;
|
||||
using ::tflite::gpu::metal::UniformsFunction;
|
||||
using ::tflite::gpu::uint3;
|
||||
using ::tflite::gpu::ValueId;
|
||||
@ -73,7 +72,7 @@ struct UniformBuffer {
|
||||
|
||||
- (absl::Status)compileWithDevice:(id<MTLDevice>)device
|
||||
taskDescriptor:(const tflite::gpu::metal::NodeDescriptor&)desc
|
||||
runtimeOptions:(const RuntimeOptions&)options {
|
||||
precision:(CalculationsPrecision)precision; {
|
||||
size_t offset = desc.task->src_tensors_names.size() + desc.task->uniform_buffers.size()
|
||||
+ desc.task->immutable_buffers.size() + 1;
|
||||
RETURN_IF_ERROR(_metal_args.Init(device, offset, &desc.task->args, &desc.task->shader_source));
|
||||
@ -90,13 +89,13 @@ struct UniformBuffer {
|
||||
NSString* toAccumulatorType2 = @"";
|
||||
NSString* toAccumulatorType3 = @"";
|
||||
NSString* toAccumulatorType4 = @"";
|
||||
if (options.storage_precision == RuntimeOptions::Precision::FP32) {
|
||||
if (precision == CalculationsPrecision::F32) {
|
||||
storageType = @"float";
|
||||
accumulatorType = @"float";
|
||||
} else {
|
||||
// FP16
|
||||
storageType = @"half";
|
||||
if (options.accumulator_precision == RuntimeOptions::Precision::FP32) {
|
||||
if (precision == CalculationsPrecision::F32_F16) {
|
||||
accumulatorType = @"float";
|
||||
toAccumulatorType = @"float";
|
||||
toAccumulatorType2 = @"float2";
|
||||
@ -136,10 +135,9 @@ struct UniformBuffer {
|
||||
_uniformBuffers.emplace_back(UniformBuffer{{}, uniform.data_function});
|
||||
}
|
||||
_outputBuffers.emplace_back(OutputBuffer{desc.dst_tensors_ids[0], nil});
|
||||
const bool f32_storage = precision == CalculationsPrecision::F32;
|
||||
for (auto& immutable : desc.task->immutable_buffers) {
|
||||
int padding =
|
||||
4 * (options.storage_precision == RuntimeOptions::Precision::FP32 ? sizeof(float)
|
||||
: sizeof(HalfBits));
|
||||
int padding = 4 * (f32_storage ? sizeof(float) : sizeof(HalfBits));
|
||||
int paddedSize = AlignByN(immutable.data.size(), padding);
|
||||
immutable.data.resize(paddedSize);
|
||||
id<MTLBuffer> metalBuffer = [device newBufferWithBytes:immutable.data.data()
|
||||
|
@ -23,11 +23,11 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
/// Stages of model preprocessing:
|
||||
/// 1. Operations' initialization. All operations are initialized and added into
|
||||
@ -56,7 +56,7 @@ limitations under the License.
|
||||
model:(const tflite::gpu::metal::CompiledModel&)compiledModel
|
||||
inputBufferIDs:(const std::vector<tflite::gpu::ValueId>&)inputBufferIDs
|
||||
outputBufferIDs:(const std::vector<tflite::gpu::ValueId>&)outputBufferIDs
|
||||
runtimeOptions:(const tflite::gpu::metal::RuntimeOptions&)options;
|
||||
precision:(tflite::gpu::CalculationsPrecision)precision;
|
||||
|
||||
/// Inserts all GPU compute tasks into the command encoder.
|
||||
/// @param inputOutputBuffers Must be created and passed into the method with pairs ID:buffer
|
||||
|
@ -22,16 +22,16 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::metal::ComputeTaskDescriptorPtr;
|
||||
using ::tflite::gpu::metal::RuntimeOptions;
|
||||
using ::tflite::gpu::CalculationsPrecision;
|
||||
using ::tflite::gpu::ValueId;
|
||||
using ::tflite::gpu::AlignByN;
|
||||
using ::tflite::gpu::HalfBits;
|
||||
@ -45,7 +45,7 @@ using ::tflite::gpu::TensorUsageRecord;
|
||||
std::vector<ValueId> _inputIds;
|
||||
std::vector<ValueId> _outputIds;
|
||||
id<MTLDevice> _device;
|
||||
RuntimeOptions _options;
|
||||
CalculationsPrecision _precision;
|
||||
std::map<ValueId, BHWC> _tensorShapes;
|
||||
}
|
||||
|
||||
@ -53,17 +53,17 @@ using ::tflite::gpu::TensorUsageRecord;
|
||||
model:(const tflite::gpu::metal::CompiledModel&) compiledModel
|
||||
inputBufferIDs:(const std::vector<tflite::gpu::ValueId>&)inputBufferIDs
|
||||
outputBufferIDs:(const std::vector<tflite::gpu::ValueId>&)outputBufferIDs
|
||||
runtimeOptions:(const RuntimeOptions&)options {
|
||||
precision:(tflite::gpu::CalculationsPrecision)precision {
|
||||
_device = device;
|
||||
_inputIds = inputBufferIDs;
|
||||
_outputIds = outputBufferIDs;
|
||||
_options = options;
|
||||
_precision = precision;
|
||||
// Metal resources are created here.
|
||||
for (const auto& node : compiledModel.nodes) {
|
||||
TFLComputeTask* task = [[TFLComputeTask alloc] init];
|
||||
RETURN_IF_ERROR([task compileWithDevice:_device
|
||||
taskDescriptor:node
|
||||
runtimeOptions:_options]);
|
||||
precision:_precision]);
|
||||
[task setDescription:node.description];
|
||||
_computeTasks.emplace_back(task);
|
||||
}
|
||||
@ -119,9 +119,8 @@ using ::tflite::gpu::TensorUsageRecord;
|
||||
RETURN_IF_ERROR(AssignObjectsToTensors(usageRecords, MemoryStrategy::GREEDY_BEST, &assignment));
|
||||
auto objectsCount = assignment.object_sizes.size();
|
||||
std::vector<id<MTLBuffer>> sharedBuffers(objectsCount);
|
||||
size_t dataTypeSize = _options.storage_precision == RuntimeOptions::Precision::FP32
|
||||
? sizeof(float)
|
||||
: sizeof(HalfBits);
|
||||
const bool f32_storage = _precision == CalculationsPrecision::F32;
|
||||
size_t dataTypeSize = f32_storage ? sizeof(float) : sizeof(HalfBits);
|
||||
|
||||
// allocate buffers for each shared object
|
||||
for (size_t i = 0; i < objectsCount; ++i) {
|
||||
|
@ -174,9 +174,9 @@ cc_library(
|
||||
hdrs = ["custom_registry.h"],
|
||||
deps = [
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor",
|
||||
"//tensorflow/lite/delegates/gpu/metal:runtime_options",
|
||||
],
|
||||
)
|
||||
|
||||
@ -814,6 +814,7 @@ objc_library(
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:operations",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common:tensor",
|
||||
@ -823,7 +824,6 @@ objc_library(
|
||||
"//tensorflow/lite/delegates/gpu/metal:common",
|
||||
"//tensorflow/lite/delegates/gpu/metal:compiled_model",
|
||||
"//tensorflow/lite/delegates/gpu/metal:inference_context",
|
||||
"//tensorflow/lite/delegates/gpu/metal:runtime_options",
|
||||
"@FP16",
|
||||
"@com_google_absl//absl/memory",
|
||||
],
|
||||
@ -897,12 +897,12 @@ objc_library(
|
||||
deps = [
|
||||
":test_util",
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:types",
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
"//tensorflow/lite/delegates/gpu/metal:common",
|
||||
"//tensorflow/lite/delegates/gpu/metal:inference_context",
|
||||
"//tensorflow/lite/delegates/gpu/metal:runtime_options",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::ElementwiseAttributes;
|
||||
using ::tflite::gpu::BHWC;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::Axis;
|
||||
using ::tflite::gpu::BHWC;
|
||||
|
@ -28,7 +28,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::Axis;
|
||||
using ::tflite::gpu::BHWC;
|
||||
@ -286,9 +285,6 @@ using ::tflite::gpu::metal::SingleOpModel;
|
||||
}
|
||||
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
tflite::gpu::metal::RuntimeOptions options;
|
||||
options.storage_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
|
||||
options.accumulator_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
|
||||
|
||||
std::map<ValueId, TensorFloat32> inputs_v0;
|
||||
inputs_v0[0] = src_tensor;
|
||||
|
@ -18,9 +18,9 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
@ -29,7 +29,7 @@ namespace metal {
|
||||
absl::Status RegisterCustomOps(const GraphFloat32& graph, const Node* node,
|
||||
const std::vector<ValueId>& inputs,
|
||||
const std::vector<ValueId>& outputs,
|
||||
const RuntimeOptions& options,
|
||||
CalculationsPrecision precision,
|
||||
std::vector<ComputeTaskDescriptorPtr>* tasks) {
|
||||
return absl::UnimplementedError("Unsupported op: " + node->operation.type);
|
||||
}
|
||||
|
@ -19,9 +19,9 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
@ -31,7 +31,7 @@ namespace metal {
|
||||
absl::Status RegisterCustomOps(const GraphFloat32& graph, const Node* node,
|
||||
const std::vector<ValueId>& inputs,
|
||||
const std::vector<ValueId>& outputs,
|
||||
const RuntimeOptions& options,
|
||||
CalculationsPrecision precision,
|
||||
std::vector<ComputeTaskDescriptorPtr>* tasks);
|
||||
|
||||
} // namespace metal
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::Axis;
|
||||
using ::tflite::gpu::BHWC;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::DataType;
|
||||
using ::tflite::gpu::HWC;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::DataType;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::DataType;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::Axis;
|
||||
using ::tflite::gpu::BHWC;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::DataType;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::DataType;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::DataType;
|
||||
|
@ -25,7 +25,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
|
||||
using ::tflite::NudgeQuantizationRange;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::DataType;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::DataType;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::DataType;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::DataType;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::Axis;
|
||||
using ::tflite::gpu::BHWC;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
using ::tflite::gpu::DataType;
|
||||
|
@ -26,7 +26,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/inference_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
|
@ -33,7 +33,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/inference_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
|
||||
namespace tflite {
|
||||
@ -84,11 +84,9 @@ absl::Status SingleOpModel::Invoke() {
|
||||
std::string device_name = std::string([[device name] UTF8String]);
|
||||
GpuInfo gpu_info;
|
||||
GetGpuInfoFromDeviceDescription(device_name, GpuApi::kMetal, &gpu_info);
|
||||
RuntimeOptions options;
|
||||
options.storage_precision = RuntimeOptions::Precision::FP32;
|
||||
options.accumulator_precision = RuntimeOptions::Precision::FP32;
|
||||
CalculationsPrecision precision = CalculationsPrecision::F32;
|
||||
CompiledModel compiled_model;
|
||||
RETURN_IF_ERROR(Compile(graph_, gpu_info, options, &compiled_model));
|
||||
RETURN_IF_ERROR(Compile(graph_, gpu_info, precision, &compiled_model));
|
||||
CompiledModel optimized_model;
|
||||
RETURN_IF_ERROR(ValidateOptimizeModel(input_ids, output_ids, compiled_model, &optimized_model));
|
||||
|
||||
@ -97,7 +95,7 @@ absl::Status SingleOpModel::Invoke() {
|
||||
model:optimized_model
|
||||
inputBufferIDs:input_ids
|
||||
outputBufferIDs:output_ids
|
||||
runtimeOptions:options]);
|
||||
precision:precision]);
|
||||
std::map<ValueId, BHWC> input_dimensions;
|
||||
std::map<ValueId, id<MTLBuffer>> input_buffers;
|
||||
for (auto& input : inputs_) {
|
||||
@ -193,16 +191,14 @@ absl::Status RunGraph(const std::vector<NodeDescriptor>& nodes, id<MTLDevice> de
|
||||
RETURN_IF_ERROR(
|
||||
ValidateOptimizeModel(inputBufferIDs, outputBufferIDs, raw_model, &optimized_model));
|
||||
|
||||
RuntimeOptions options;
|
||||
options.storage_precision = RuntimeOptions::Precision::FP32;
|
||||
options.accumulator_precision = RuntimeOptions::Precision::FP32;
|
||||
CalculationsPrecision precision = CalculationsPrecision::F32;
|
||||
|
||||
TFLInferenceContext* graph = [[TFLInferenceContext alloc] init];
|
||||
RETURN_IF_ERROR([graph compileModelWithDevice:device
|
||||
model:optimized_model
|
||||
inputBufferIDs:inputBufferIDs
|
||||
outputBufferIDs:outputBufferIDs
|
||||
runtimeOptions:options]);
|
||||
precision:precision]);
|
||||
std::map<ValueId, BHWC> inputDimensions;
|
||||
std::map<ValueId, std::vector<float>> inputBuffersCPU;
|
||||
std::map<ValueId, id<MTLBuffer>> inputBuffersGPU;
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
|
||||
using ::tflite::gpu::ConvolutionTransposedAttributes;
|
||||
using ::tflite::gpu::BHWC;
|
||||
|
@ -26,7 +26,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
|
||||
|
||||
using ::tflite::gpu::BHWC;
|
||||
@ -151,10 +150,6 @@ using ::tflite::gpu::metal::CompareVectors;
|
||||
}
|
||||
}
|
||||
|
||||
tflite::gpu::metal::RuntimeOptions options;
|
||||
options.storage_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
|
||||
options.accumulator_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
|
||||
|
||||
tflite::gpu::metal::Winograd4x4To36Attributes attr;
|
||||
attr.padding.prepended = tflite::gpu::HW(1, 1);
|
||||
attr.padding.appended = tflite::gpu::HW(1, 1);
|
||||
@ -229,10 +224,6 @@ using ::tflite::gpu::metal::CompareVectors;
|
||||
attr.biases.shape = tflite::gpu::Linear(1);
|
||||
attr.biases.data.resize(1, 0.0f);
|
||||
|
||||
tflite::gpu::metal::RuntimeOptions options;
|
||||
options.storage_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
|
||||
options.accumulator_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
|
||||
|
||||
tflite::gpu::OperationDef op_def;
|
||||
op_def.precision = tflite::gpu::CalculationsPrecision::F32;
|
||||
tflite::gpu::TensorDescriptor tensor_descriptor = tflite::gpu::TensorDescriptor{
|
||||
@ -304,10 +295,6 @@ using ::tflite::gpu::metal::CompareVectors;
|
||||
attr.biases.shape = tflite::gpu::Linear(1);
|
||||
attr.biases.data.resize(1, 0.0f);
|
||||
|
||||
tflite::gpu::metal::RuntimeOptions options;
|
||||
options.storage_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
|
||||
options.accumulator_precision = tflite::gpu::metal::RuntimeOptions::Precision::FP32;
|
||||
|
||||
tflite::gpu::OperationDef op_def;
|
||||
op_def.precision = tflite::gpu::CalculationsPrecision::F32;
|
||||
tflite::gpu::TensorDescriptor tensor_descriptor = tflite::gpu::TensorDescriptor{
|
||||
|
@ -1,38 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_RUNTIME_OPTIONS_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_RUNTIME_OPTIONS_H_
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace metal {
|
||||
|
||||
struct RuntimeOptions {
|
||||
enum class Precision {
|
||||
FP16,
|
||||
FP32,
|
||||
};
|
||||
// Buffer storage format. If FP32 then accumulator must be FP32.
|
||||
Precision storage_precision = Precision::FP32;
|
||||
// Accumulator precision. Defines the precision for convolutions.
|
||||
Precision accumulator_precision = Precision::FP32;
|
||||
};
|
||||
|
||||
} // namespace metal
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_RUNTIME_OPTIONS_H_
|
@ -45,10 +45,11 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/inference_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/runtime_options.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/minimal_logging.h"
|
||||
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace metal {
|
||||
@ -338,19 +339,17 @@ class Delegate {
|
||||
GpuInfo gpu_info;
|
||||
GetGpuInfoFromDeviceDescription(device_name, GpuApi::kMetal, &gpu_info);
|
||||
size_t storage_type_size;
|
||||
RuntimeOptions runtime_options;
|
||||
CalculationsPrecision precision;
|
||||
if (options_.allow_precision_loss) {
|
||||
storage_type_size = sizeof(HalfBits);
|
||||
runtime_options.storage_precision = RuntimeOptions::Precision::FP16;
|
||||
if (gpu_info.IsRoundToNearestSupported()) {
|
||||
runtime_options.accumulator_precision = RuntimeOptions::Precision::FP16;
|
||||
precision = CalculationsPrecision::F16;
|
||||
} else {
|
||||
runtime_options.accumulator_precision = RuntimeOptions::Precision::FP32;
|
||||
precision = CalculationsPrecision::F32_F16;
|
||||
}
|
||||
} else {
|
||||
storage_type_size = sizeof(float);
|
||||
runtime_options.storage_precision = RuntimeOptions::Precision::FP32;
|
||||
runtime_options.accumulator_precision = RuntimeOptions::Precision::FP32;
|
||||
precision = CalculationsPrecision::F32;
|
||||
}
|
||||
|
||||
// TODO(impjdi): Merge logic with above.
|
||||
@ -435,7 +434,7 @@ class Delegate {
|
||||
|
||||
// TODO(impjdi): Merge these.
|
||||
CompiledModel compiled_model;
|
||||
RETURN_IF_ERROR(Compile(graph, gpu_info, runtime_options, &compiled_model));
|
||||
RETURN_IF_ERROR(Compile(graph, gpu_info, precision, &compiled_model));
|
||||
CompiledModel optimized_model;
|
||||
RETURN_IF_ERROR(ValidateOptimizeModel(input_ids, output_ids, compiled_model, &optimized_model));
|
||||
|
||||
@ -444,7 +443,7 @@ class Delegate {
|
||||
model:optimized_model
|
||||
inputBufferIDs:input_ids
|
||||
outputBufferIDs:output_ids
|
||||
runtimeOptions:runtime_options]);
|
||||
precision:precision]);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user