Provide builtin_op_kernels target with Ruy and GEMV caching unconditionally enabled
PiperOrigin-RevId: 311776871 Change-Id: I948ea5524fdcf17c36e6219fb1ae18fafdecee4e
This commit is contained in:
parent
73d63eefe6
commit
0b59eaf0bf
@ -235,6 +235,15 @@ cc_library(
|
|||||||
visibility = ["//visibility:private"],
|
visibility = ["//visibility:private"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "tflite_with_ruy_and_caching_enabled",
|
||||||
|
defines = [
|
||||||
|
"TFLITE_WITH_RUY",
|
||||||
|
"TFLITE_WITH_RUY_GEMV",
|
||||||
|
],
|
||||||
|
visibility = ["//visibility:private"],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "tflite_with_ruy_default",
|
name = "tflite_with_ruy_default",
|
||||||
build_for_embedded = True,
|
build_for_embedded = True,
|
||||||
@ -423,140 +432,157 @@ cc_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
BUILTIN_KERNEL_SRCS = [
|
||||||
|
"activations.cc",
|
||||||
|
"add.cc",
|
||||||
|
"add_n.cc",
|
||||||
|
"arg_min_max.cc",
|
||||||
|
"audio_spectrogram.cc",
|
||||||
|
"basic_rnn.cc",
|
||||||
|
"batch_matmul.cc",
|
||||||
|
"batch_to_space_nd.cc",
|
||||||
|
"bidirectional_sequence_lstm.cc",
|
||||||
|
"bidirectional_sequence_rnn.cc",
|
||||||
|
"cast.cc",
|
||||||
|
"ceil.cc",
|
||||||
|
"comparisons.cc",
|
||||||
|
"concatenation.cc",
|
||||||
|
"conv.cc",
|
||||||
|
"densify.cc",
|
||||||
|
"depth_to_space.cc",
|
||||||
|
"depthwise_conv.cc",
|
||||||
|
"dequantize.cc",
|
||||||
|
"detection_postprocess.cc",
|
||||||
|
"div.cc",
|
||||||
|
"elementwise.cc",
|
||||||
|
"embedding_lookup.cc",
|
||||||
|
"embedding_lookup_sparse.cc",
|
||||||
|
"exp.cc",
|
||||||
|
"expand_dims.cc",
|
||||||
|
"fake_quant.cc",
|
||||||
|
"fill.cc",
|
||||||
|
"floor.cc",
|
||||||
|
"floor_div.cc",
|
||||||
|
"floor_mod.cc",
|
||||||
|
"fully_connected.cc",
|
||||||
|
"gather.cc",
|
||||||
|
"gather_nd.cc",
|
||||||
|
"hashtable_lookup.cc",
|
||||||
|
"if.cc",
|
||||||
|
"l2norm.cc",
|
||||||
|
"local_response_norm.cc",
|
||||||
|
"logical.cc",
|
||||||
|
"lsh_projection.cc",
|
||||||
|
"lstm.cc",
|
||||||
|
"matrix_diag.cc",
|
||||||
|
"matrix_set_diag.cc",
|
||||||
|
"maximum_minimum.cc",
|
||||||
|
"mfcc.cc",
|
||||||
|
"mirror_pad.cc",
|
||||||
|
"mul.cc",
|
||||||
|
"neg.cc",
|
||||||
|
"non_max_suppression.cc",
|
||||||
|
"numeric_verify.cc",
|
||||||
|
"one_hot.cc",
|
||||||
|
"pack.cc",
|
||||||
|
"pad.cc",
|
||||||
|
"pooling.cc",
|
||||||
|
"pow.cc",
|
||||||
|
"quantize.cc",
|
||||||
|
"range.cc",
|
||||||
|
"rank.cc",
|
||||||
|
"reduce.cc",
|
||||||
|
"reshape.cc",
|
||||||
|
"resize_bilinear.cc",
|
||||||
|
"resize_nearest_neighbor.cc",
|
||||||
|
"reverse.cc",
|
||||||
|
"reverse_sequence.cc",
|
||||||
|
"round.cc",
|
||||||
|
"scatter_nd.cc",
|
||||||
|
"segment_sum.cc",
|
||||||
|
"select.cc",
|
||||||
|
"shape.cc",
|
||||||
|
"skip_gram.cc",
|
||||||
|
"slice.cc",
|
||||||
|
"space_to_batch_nd.cc",
|
||||||
|
"space_to_depth.cc",
|
||||||
|
"sparse_to_dense.cc",
|
||||||
|
"split.cc",
|
||||||
|
"split_v.cc",
|
||||||
|
"squared_difference.cc",
|
||||||
|
"squeeze.cc",
|
||||||
|
"strided_slice.cc",
|
||||||
|
"sub.cc",
|
||||||
|
"svdf.cc",
|
||||||
|
"tile.cc",
|
||||||
|
"topk_v2.cc",
|
||||||
|
"transpose.cc",
|
||||||
|
"transpose_conv.cc",
|
||||||
|
"unidirectional_sequence_lstm.cc",
|
||||||
|
"unidirectional_sequence_rnn.cc",
|
||||||
|
"unique.cc",
|
||||||
|
"unpack.cc",
|
||||||
|
"where.cc",
|
||||||
|
"while.cc",
|
||||||
|
"zeros_like.cc",
|
||||||
|
]
|
||||||
|
|
||||||
|
BUILTIN_KERNEL_DEPS = [
|
||||||
|
":cpu_backend_context",
|
||||||
|
":cpu_backend_gemm",
|
||||||
|
":cpu_backend_threadpool",
|
||||||
|
":eigen_support",
|
||||||
|
":kernel_util",
|
||||||
|
":lstm_eval",
|
||||||
|
":lstm_shared",
|
||||||
|
":op_macros",
|
||||||
|
":padding",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
"//third_party/eigen3",
|
||||||
|
"@flatbuffers",
|
||||||
|
"//tensorflow/lite:framework_lib",
|
||||||
|
"//tensorflow/lite:minimal_logging",
|
||||||
|
"//tensorflow/lite:string_util",
|
||||||
|
"//tensorflow/lite/c:common",
|
||||||
|
"//tensorflow/lite/kernels/internal:audio_utils",
|
||||||
|
"//tensorflow/lite/kernels/internal:common",
|
||||||
|
"//tensorflow/lite/kernels/internal:compatibility",
|
||||||
|
"//tensorflow/lite/kernels/internal:cpu_check",
|
||||||
|
"//tensorflow/lite/kernels/internal:kernel_utils",
|
||||||
|
"//tensorflow/lite/kernels/internal:optimized",
|
||||||
|
"//tensorflow/lite/kernels/internal:optimized_base",
|
||||||
|
"//tensorflow/lite/kernels/internal:quantization_util",
|
||||||
|
"//tensorflow/lite/kernels/internal:reference_base",
|
||||||
|
"//tensorflow/lite/kernels/internal:strided_slice_logic",
|
||||||
|
"//tensorflow/lite/kernels/internal:tensor",
|
||||||
|
"//tensorflow/lite/kernels/internal:tensor_utils",
|
||||||
|
"//tensorflow/lite/kernels/internal:types",
|
||||||
|
]
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "builtin_op_kernels",
|
name = "builtin_op_kernels",
|
||||||
srcs = [
|
srcs = BUILTIN_KERNEL_SRCS,
|
||||||
"activations.cc",
|
|
||||||
"add.cc",
|
|
||||||
"add_n.cc",
|
|
||||||
"arg_min_max.cc",
|
|
||||||
"audio_spectrogram.cc",
|
|
||||||
"basic_rnn.cc",
|
|
||||||
"batch_matmul.cc",
|
|
||||||
"batch_to_space_nd.cc",
|
|
||||||
"bidirectional_sequence_lstm.cc",
|
|
||||||
"bidirectional_sequence_rnn.cc",
|
|
||||||
"cast.cc",
|
|
||||||
"ceil.cc",
|
|
||||||
"comparisons.cc",
|
|
||||||
"concatenation.cc",
|
|
||||||
"conv.cc",
|
|
||||||
"densify.cc",
|
|
||||||
"depth_to_space.cc",
|
|
||||||
"depthwise_conv.cc",
|
|
||||||
"dequantize.cc",
|
|
||||||
"detection_postprocess.cc",
|
|
||||||
"div.cc",
|
|
||||||
"elementwise.cc",
|
|
||||||
"embedding_lookup.cc",
|
|
||||||
"embedding_lookup_sparse.cc",
|
|
||||||
"exp.cc",
|
|
||||||
"expand_dims.cc",
|
|
||||||
"fake_quant.cc",
|
|
||||||
"fill.cc",
|
|
||||||
"floor.cc",
|
|
||||||
"floor_div.cc",
|
|
||||||
"floor_mod.cc",
|
|
||||||
"fully_connected.cc",
|
|
||||||
"gather.cc",
|
|
||||||
"gather_nd.cc",
|
|
||||||
"hashtable_lookup.cc",
|
|
||||||
"if.cc",
|
|
||||||
"l2norm.cc",
|
|
||||||
"local_response_norm.cc",
|
|
||||||
"logical.cc",
|
|
||||||
"lsh_projection.cc",
|
|
||||||
"lstm.cc",
|
|
||||||
"matrix_diag.cc",
|
|
||||||
"matrix_set_diag.cc",
|
|
||||||
"maximum_minimum.cc",
|
|
||||||
"mfcc.cc",
|
|
||||||
"mirror_pad.cc",
|
|
||||||
"mul.cc",
|
|
||||||
"neg.cc",
|
|
||||||
"non_max_suppression.cc",
|
|
||||||
"numeric_verify.cc",
|
|
||||||
"one_hot.cc",
|
|
||||||
"pack.cc",
|
|
||||||
"pad.cc",
|
|
||||||
"pooling.cc",
|
|
||||||
"pow.cc",
|
|
||||||
"quantize.cc",
|
|
||||||
"range.cc",
|
|
||||||
"rank.cc",
|
|
||||||
"reduce.cc",
|
|
||||||
"reshape.cc",
|
|
||||||
"resize_bilinear.cc",
|
|
||||||
"resize_nearest_neighbor.cc",
|
|
||||||
"reverse.cc",
|
|
||||||
"reverse_sequence.cc",
|
|
||||||
"round.cc",
|
|
||||||
"scatter_nd.cc",
|
|
||||||
"segment_sum.cc",
|
|
||||||
"select.cc",
|
|
||||||
"shape.cc",
|
|
||||||
"skip_gram.cc",
|
|
||||||
"slice.cc",
|
|
||||||
"space_to_batch_nd.cc",
|
|
||||||
"space_to_depth.cc",
|
|
||||||
"sparse_to_dense.cc",
|
|
||||||
"split.cc",
|
|
||||||
"split_v.cc",
|
|
||||||
"squared_difference.cc",
|
|
||||||
"squeeze.cc",
|
|
||||||
"strided_slice.cc",
|
|
||||||
"sub.cc",
|
|
||||||
"svdf.cc",
|
|
||||||
"tile.cc",
|
|
||||||
"topk_v2.cc",
|
|
||||||
"transpose.cc",
|
|
||||||
"transpose_conv.cc",
|
|
||||||
"unidirectional_sequence_lstm.cc",
|
|
||||||
"unidirectional_sequence_rnn.cc",
|
|
||||||
"unique.cc",
|
|
||||||
"unpack.cc",
|
|
||||||
"where.cc",
|
|
||||||
"while.cc",
|
|
||||||
"zeros_like.cc",
|
|
||||||
],
|
|
||||||
hdrs = [
|
hdrs = [
|
||||||
"dequantize.h",
|
"dequantize.h",
|
||||||
],
|
],
|
||||||
copts = tflite_copts() + tf_opts_nortti_if_android() + EXTRA_EIGEN_COPTS,
|
copts = tflite_copts() + tf_opts_nortti_if_android() + EXTRA_EIGEN_COPTS,
|
||||||
visibility = ["//visibility:private"],
|
visibility = ["//visibility:private"],
|
||||||
deps = [
|
deps = BUILTIN_KERNEL_DEPS + ["@farmhash_archive//:farmhash"],
|
||||||
":cpu_backend_context",
|
)
|
||||||
":cpu_backend_gemm",
|
|
||||||
":cpu_backend_threadpool",
|
# Creates a target where Ruy is unconditionally enabled along with caching
|
||||||
":eigen_support",
|
# on GEMV operations. This is useful for TF Lite deployments where custom
|
||||||
":kernel_util",
|
# copts are not allowed, e.g. b/156119344
|
||||||
":lstm_eval",
|
cc_library(
|
||||||
":lstm_shared",
|
name = "builtin_op_kernels_ruy_and_caching",
|
||||||
":op_macros",
|
srcs = BUILTIN_KERNEL_SRCS,
|
||||||
":padding",
|
hdrs = [
|
||||||
"//tensorflow/lite:framework_lib",
|
"dequantize.h",
|
||||||
"//tensorflow/lite:minimal_logging",
|
|
||||||
"//tensorflow/lite:string_util",
|
|
||||||
"//tensorflow/lite/c:common",
|
|
||||||
"//tensorflow/lite/kernels/internal:audio_utils",
|
|
||||||
"//tensorflow/lite/kernels/internal:common",
|
|
||||||
"//tensorflow/lite/kernels/internal:compatibility",
|
|
||||||
"//tensorflow/lite/kernels/internal:cpu_check",
|
|
||||||
"//tensorflow/lite/kernels/internal:kernel_utils",
|
|
||||||
"//tensorflow/lite/kernels/internal:optimized",
|
|
||||||
"//tensorflow/lite/kernels/internal:optimized_base",
|
|
||||||
"//tensorflow/lite/kernels/internal:quantization_util",
|
|
||||||
"//tensorflow/lite/kernels/internal:reference_base",
|
|
||||||
"//tensorflow/lite/kernels/internal:strided_slice_logic",
|
|
||||||
"//tensorflow/lite/kernels/internal:tensor",
|
|
||||||
"//tensorflow/lite/kernels/internal:tensor_utils",
|
|
||||||
"//tensorflow/lite/kernels/internal:types",
|
|
||||||
"//third_party/eigen3",
|
|
||||||
"@com_google_absl//absl/memory",
|
|
||||||
"@com_google_absl//absl/strings",
|
|
||||||
"@farmhash_archive//:farmhash",
|
|
||||||
"@flatbuffers",
|
|
||||||
],
|
],
|
||||||
|
copts = tflite_copts() + tf_opts_nortti_if_android() + EXTRA_EIGEN_COPTS,
|
||||||
|
visibility = ["//visibility:private"],
|
||||||
|
deps = BUILTIN_KERNEL_DEPS + ["@farmhash_archive//:farmhash"] + [":tflite_with_ruy_and_caching_enabled"],
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
@ -673,6 +699,22 @@ cc_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# TODO(b/156664104) Remove once runtime flag available.
|
||||||
|
cc_library(
|
||||||
|
name = "builtin_ops_ruy_and_caching_enabled",
|
||||||
|
srcs = ["register.cc"],
|
||||||
|
hdrs = [
|
||||||
|
"builtin_op_kernels.h",
|
||||||
|
"fully_connected.h",
|
||||||
|
"register.h",
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
":builtin_op_kernels_ruy_and_caching",
|
||||||
|
"//tensorflow/lite:framework_lib",
|
||||||
|
"//tensorflow/lite/c:common",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
# The builtin_ops target will resolve to optimized kernels when available. This
|
# The builtin_ops target will resolve to optimized kernels when available. This
|
||||||
# target uses reference kernels only, and is useful for validation and testing.
|
# target uses reference kernels only, and is useful for validation and testing.
|
||||||
# It should *not* generally be used in production.
|
# It should *not* generally be used in production.
|
||||||
|
Loading…
Reference in New Issue
Block a user