Provide builtin_op_kernels target with Ruy and GEMV caching unconditionally enabled

PiperOrigin-RevId: 311776871
Change-Id: I948ea5524fdcf17c36e6219fb1ae18fafdecee4e
This commit is contained in:
T.J. Alumbaugh 2020-05-15 12:09:13 -07:00 committed by TensorFlower Gardener
parent 73d63eefe6
commit 0b59eaf0bf

View File

@ -235,6 +235,15 @@ cc_library(
visibility = ["//visibility:private"],
)
cc_library(
name = "tflite_with_ruy_and_caching_enabled",
defines = [
"TFLITE_WITH_RUY",
"TFLITE_WITH_RUY_GEMV",
],
visibility = ["//visibility:private"],
)
cc_library(
name = "tflite_with_ruy_default",
build_for_embedded = True,
@ -423,140 +432,157 @@ cc_library(
],
)
BUILTIN_KERNEL_SRCS = [
"activations.cc",
"add.cc",
"add_n.cc",
"arg_min_max.cc",
"audio_spectrogram.cc",
"basic_rnn.cc",
"batch_matmul.cc",
"batch_to_space_nd.cc",
"bidirectional_sequence_lstm.cc",
"bidirectional_sequence_rnn.cc",
"cast.cc",
"ceil.cc",
"comparisons.cc",
"concatenation.cc",
"conv.cc",
"densify.cc",
"depth_to_space.cc",
"depthwise_conv.cc",
"dequantize.cc",
"detection_postprocess.cc",
"div.cc",
"elementwise.cc",
"embedding_lookup.cc",
"embedding_lookup_sparse.cc",
"exp.cc",
"expand_dims.cc",
"fake_quant.cc",
"fill.cc",
"floor.cc",
"floor_div.cc",
"floor_mod.cc",
"fully_connected.cc",
"gather.cc",
"gather_nd.cc",
"hashtable_lookup.cc",
"if.cc",
"l2norm.cc",
"local_response_norm.cc",
"logical.cc",
"lsh_projection.cc",
"lstm.cc",
"matrix_diag.cc",
"matrix_set_diag.cc",
"maximum_minimum.cc",
"mfcc.cc",
"mirror_pad.cc",
"mul.cc",
"neg.cc",
"non_max_suppression.cc",
"numeric_verify.cc",
"one_hot.cc",
"pack.cc",
"pad.cc",
"pooling.cc",
"pow.cc",
"quantize.cc",
"range.cc",
"rank.cc",
"reduce.cc",
"reshape.cc",
"resize_bilinear.cc",
"resize_nearest_neighbor.cc",
"reverse.cc",
"reverse_sequence.cc",
"round.cc",
"scatter_nd.cc",
"segment_sum.cc",
"select.cc",
"shape.cc",
"skip_gram.cc",
"slice.cc",
"space_to_batch_nd.cc",
"space_to_depth.cc",
"sparse_to_dense.cc",
"split.cc",
"split_v.cc",
"squared_difference.cc",
"squeeze.cc",
"strided_slice.cc",
"sub.cc",
"svdf.cc",
"tile.cc",
"topk_v2.cc",
"transpose.cc",
"transpose_conv.cc",
"unidirectional_sequence_lstm.cc",
"unidirectional_sequence_rnn.cc",
"unique.cc",
"unpack.cc",
"where.cc",
"while.cc",
"zeros_like.cc",
]
BUILTIN_KERNEL_DEPS = [
":cpu_backend_context",
":cpu_backend_gemm",
":cpu_backend_threadpool",
":eigen_support",
":kernel_util",
":lstm_eval",
":lstm_shared",
":op_macros",
":padding",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"//third_party/eigen3",
"@flatbuffers",
"//tensorflow/lite:framework_lib",
"//tensorflow/lite:minimal_logging",
"//tensorflow/lite:string_util",
"//tensorflow/lite/c:common",
"//tensorflow/lite/kernels/internal:audio_utils",
"//tensorflow/lite/kernels/internal:common",
"//tensorflow/lite/kernels/internal:compatibility",
"//tensorflow/lite/kernels/internal:cpu_check",
"//tensorflow/lite/kernels/internal:kernel_utils",
"//tensorflow/lite/kernels/internal:optimized",
"//tensorflow/lite/kernels/internal:optimized_base",
"//tensorflow/lite/kernels/internal:quantization_util",
"//tensorflow/lite/kernels/internal:reference_base",
"//tensorflow/lite/kernels/internal:strided_slice_logic",
"//tensorflow/lite/kernels/internal:tensor",
"//tensorflow/lite/kernels/internal:tensor_utils",
"//tensorflow/lite/kernels/internal:types",
]
cc_library(
name = "builtin_op_kernels",
srcs = [
"activations.cc",
"add.cc",
"add_n.cc",
"arg_min_max.cc",
"audio_spectrogram.cc",
"basic_rnn.cc",
"batch_matmul.cc",
"batch_to_space_nd.cc",
"bidirectional_sequence_lstm.cc",
"bidirectional_sequence_rnn.cc",
"cast.cc",
"ceil.cc",
"comparisons.cc",
"concatenation.cc",
"conv.cc",
"densify.cc",
"depth_to_space.cc",
"depthwise_conv.cc",
"dequantize.cc",
"detection_postprocess.cc",
"div.cc",
"elementwise.cc",
"embedding_lookup.cc",
"embedding_lookup_sparse.cc",
"exp.cc",
"expand_dims.cc",
"fake_quant.cc",
"fill.cc",
"floor.cc",
"floor_div.cc",
"floor_mod.cc",
"fully_connected.cc",
"gather.cc",
"gather_nd.cc",
"hashtable_lookup.cc",
"if.cc",
"l2norm.cc",
"local_response_norm.cc",
"logical.cc",
"lsh_projection.cc",
"lstm.cc",
"matrix_diag.cc",
"matrix_set_diag.cc",
"maximum_minimum.cc",
"mfcc.cc",
"mirror_pad.cc",
"mul.cc",
"neg.cc",
"non_max_suppression.cc",
"numeric_verify.cc",
"one_hot.cc",
"pack.cc",
"pad.cc",
"pooling.cc",
"pow.cc",
"quantize.cc",
"range.cc",
"rank.cc",
"reduce.cc",
"reshape.cc",
"resize_bilinear.cc",
"resize_nearest_neighbor.cc",
"reverse.cc",
"reverse_sequence.cc",
"round.cc",
"scatter_nd.cc",
"segment_sum.cc",
"select.cc",
"shape.cc",
"skip_gram.cc",
"slice.cc",
"space_to_batch_nd.cc",
"space_to_depth.cc",
"sparse_to_dense.cc",
"split.cc",
"split_v.cc",
"squared_difference.cc",
"squeeze.cc",
"strided_slice.cc",
"sub.cc",
"svdf.cc",
"tile.cc",
"topk_v2.cc",
"transpose.cc",
"transpose_conv.cc",
"unidirectional_sequence_lstm.cc",
"unidirectional_sequence_rnn.cc",
"unique.cc",
"unpack.cc",
"where.cc",
"while.cc",
"zeros_like.cc",
],
srcs = BUILTIN_KERNEL_SRCS,
hdrs = [
"dequantize.h",
],
copts = tflite_copts() + tf_opts_nortti_if_android() + EXTRA_EIGEN_COPTS,
visibility = ["//visibility:private"],
deps = [
":cpu_backend_context",
":cpu_backend_gemm",
":cpu_backend_threadpool",
":eigen_support",
":kernel_util",
":lstm_eval",
":lstm_shared",
":op_macros",
":padding",
"//tensorflow/lite:framework_lib",
"//tensorflow/lite:minimal_logging",
"//tensorflow/lite:string_util",
"//tensorflow/lite/c:common",
"//tensorflow/lite/kernels/internal:audio_utils",
"//tensorflow/lite/kernels/internal:common",
"//tensorflow/lite/kernels/internal:compatibility",
"//tensorflow/lite/kernels/internal:cpu_check",
"//tensorflow/lite/kernels/internal:kernel_utils",
"//tensorflow/lite/kernels/internal:optimized",
"//tensorflow/lite/kernels/internal:optimized_base",
"//tensorflow/lite/kernels/internal:quantization_util",
"//tensorflow/lite/kernels/internal:reference_base",
"//tensorflow/lite/kernels/internal:strided_slice_logic",
"//tensorflow/lite/kernels/internal:tensor",
"//tensorflow/lite/kernels/internal:tensor_utils",
"//tensorflow/lite/kernels/internal:types",
"//third_party/eigen3",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@farmhash_archive//:farmhash",
"@flatbuffers",
deps = BUILTIN_KERNEL_DEPS + ["@farmhash_archive//:farmhash"],
)
# Creates a target where Ruy is unconditionally enabled along with caching
# on GEMV operations. This is useful for TF Lite deployments where custom
# copts are not allowed, e.g. b/156119344
cc_library(
name = "builtin_op_kernels_ruy_and_caching",
srcs = BUILTIN_KERNEL_SRCS,
hdrs = [
"dequantize.h",
],
copts = tflite_copts() + tf_opts_nortti_if_android() + EXTRA_EIGEN_COPTS,
visibility = ["//visibility:private"],
deps = BUILTIN_KERNEL_DEPS + ["@farmhash_archive//:farmhash"] + [":tflite_with_ruy_and_caching_enabled"],
)
cc_library(
@ -673,6 +699,22 @@ cc_library(
],
)
# TODO(b/156664104) Remove once runtime flag available.
cc_library(
name = "builtin_ops_ruy_and_caching_enabled",
srcs = ["register.cc"],
hdrs = [
"builtin_op_kernels.h",
"fully_connected.h",
"register.h",
],
deps = [
":builtin_op_kernels_ruy_and_caching",
"//tensorflow/lite:framework_lib",
"//tensorflow/lite/c:common",
],
)
# The builtin_ops target will resolve to optimized kernels when available. This
# target uses reference kernels only, and is useful for validation and testing.
# It should *not* generally be used in production.