Specify -O3 and, on ARM32, -mfpu=neon as rule copts, for all our binary rules.
See the comment. PiperOrigin-RevId: 262564280
This commit is contained in:
parent
a79c52ed09
commit
6c526e012c
@ -7,6 +7,25 @@ load(":ruy_test_ext.bzl", "ruy_test_ext_defines", "ruy_test_ext_deps")
|
|||||||
load(":ruy_test.bzl", "ruy_benchmark", "ruy_benchmark_opt_sets", "ruy_test")
|
load(":ruy_test.bzl", "ruy_benchmark", "ruy_benchmark_opt_sets", "ruy_test")
|
||||||
load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite")
|
load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite")
|
||||||
|
|
||||||
|
# 1. Enable -mfpu=neon unconditionally on ARM32. If it turns out that we need to support
|
||||||
|
# ARM32 without NEON then we'll implement runtime detection and dispatch at that point.
|
||||||
|
# 2. Explicitly pass -O3 on mobile configs where just "-c opt" means "optimize for code size".
|
||||||
|
# We would want to only do that when compilation_mode is "opt", but limitations of
|
||||||
|
# the "select" keyword (no nested selects, no AND boolean) seem to make that difficult
|
||||||
|
# at the moment. For debugging purposes, this can be overridded on the command line, e.g.
|
||||||
|
# bazel build -c dbg --copt=-O0 ...
|
||||||
|
RUY_COPTS = select({
|
||||||
|
"//tensorflow:android_arm64": [
|
||||||
|
"-O3",
|
||||||
|
],
|
||||||
|
"//tensorflow:android_arm": [
|
||||||
|
"-O3",
|
||||||
|
"-mfpu=neon",
|
||||||
|
],
|
||||||
|
"//conditions:default": [
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
package(
|
package(
|
||||||
default_visibility = ["//visibility:private"],
|
default_visibility = ["//visibility:private"],
|
||||||
licenses = ["notice"], # Apache 2.0
|
licenses = ["notice"], # Apache 2.0
|
||||||
@ -15,28 +34,33 @@ package(
|
|||||||
cc_library(
|
cc_library(
|
||||||
name = "platform",
|
name = "platform",
|
||||||
hdrs = ["platform.h"],
|
hdrs = ["platform.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "check_macros",
|
name = "check_macros",
|
||||||
hdrs = ["check_macros.h"],
|
hdrs = ["check_macros.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = ["//tensorflow/lite/kernels/internal:compatibility"],
|
deps = ["//tensorflow/lite/kernels/internal:compatibility"],
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "opt_set",
|
name = "opt_set",
|
||||||
hdrs = ["opt_set.h"],
|
hdrs = ["opt_set.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "time",
|
name = "time",
|
||||||
hdrs = ["time.h"],
|
hdrs = ["time.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "wait",
|
name = "wait",
|
||||||
srcs = ["wait.cc"],
|
srcs = ["wait.cc"],
|
||||||
hdrs = ["wait.h"],
|
hdrs = ["wait.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [":time"],
|
deps = [":time"],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -52,6 +76,7 @@ cc_test(
|
|||||||
cc_library(
|
cc_library(
|
||||||
name = "size_util",
|
name = "size_util",
|
||||||
hdrs = ["size_util.h"],
|
hdrs = ["size_util.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [":check_macros"],
|
deps = [":check_macros"],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -63,6 +88,7 @@ cc_library(
|
|||||||
hdrs = [
|
hdrs = [
|
||||||
"tune.h",
|
"tune.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":opt_set",
|
":opt_set",
|
||||||
":platform",
|
":platform",
|
||||||
@ -95,6 +121,7 @@ cc_library(
|
|||||||
hdrs = [
|
hdrs = [
|
||||||
"allocator.h",
|
"allocator.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":check_macros",
|
":check_macros",
|
||||||
":size_util",
|
":size_util",
|
||||||
@ -113,6 +140,7 @@ cc_test(
|
|||||||
cc_library(
|
cc_library(
|
||||||
name = "side_pair",
|
name = "side_pair",
|
||||||
hdrs = ["side_pair.h"],
|
hdrs = ["side_pair.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [":check_macros"],
|
deps = [":check_macros"],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -124,6 +152,7 @@ cc_library(
|
|||||||
hdrs = [
|
hdrs = [
|
||||||
"block_map.h",
|
"block_map.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":check_macros",
|
":check_macros",
|
||||||
":opt_set",
|
":opt_set",
|
||||||
@ -141,6 +170,7 @@ cc_library(
|
|||||||
hdrs = [
|
hdrs = [
|
||||||
"blocking_counter.h",
|
"blocking_counter.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":check_macros",
|
":check_macros",
|
||||||
":wait",
|
":wait",
|
||||||
@ -155,6 +185,7 @@ cc_library(
|
|||||||
hdrs = [
|
hdrs = [
|
||||||
"thread_pool.h",
|
"thread_pool.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
visibility = ruy_visibility(),
|
visibility = ruy_visibility(),
|
||||||
deps = [
|
deps = [
|
||||||
":blocking_counter",
|
":blocking_counter",
|
||||||
@ -171,12 +202,14 @@ cc_library(
|
|||||||
hdrs = [
|
hdrs = [
|
||||||
"detect_dotprod.h",
|
"detect_dotprod.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
visibility = ruy_visibility(),
|
visibility = ruy_visibility(),
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "path",
|
name = "path",
|
||||||
hdrs = ["path.h"],
|
hdrs = ["path.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
visibility = ruy_visibility(),
|
visibility = ruy_visibility(),
|
||||||
deps = [
|
deps = [
|
||||||
":platform",
|
":platform",
|
||||||
@ -192,6 +225,7 @@ cc_library(
|
|||||||
hdrs = [
|
hdrs = [
|
||||||
"trace.h",
|
"trace.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":block_map",
|
":block_map",
|
||||||
":check_macros",
|
":check_macros",
|
||||||
@ -208,6 +242,7 @@ cc_library(
|
|||||||
hdrs = [
|
hdrs = [
|
||||||
"context.h",
|
"context.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
visibility = ruy_visibility(),
|
visibility = ruy_visibility(),
|
||||||
deps = [
|
deps = [
|
||||||
":allocator",
|
":allocator",
|
||||||
@ -223,6 +258,7 @@ cc_library(
|
|||||||
cc_library(
|
cc_library(
|
||||||
name = "matrix",
|
name = "matrix",
|
||||||
hdrs = ["matrix.h"],
|
hdrs = ["matrix.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
visibility = ruy_visibility(),
|
visibility = ruy_visibility(),
|
||||||
deps = [":check_macros"],
|
deps = [":check_macros"],
|
||||||
)
|
)
|
||||||
@ -230,6 +266,7 @@ cc_library(
|
|||||||
cc_library(
|
cc_library(
|
||||||
name = "spec",
|
name = "spec",
|
||||||
hdrs = ["spec.h"],
|
hdrs = ["spec.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
visibility = ruy_visibility(),
|
visibility = ruy_visibility(),
|
||||||
deps = [":matrix"],
|
deps = [":matrix"],
|
||||||
)
|
)
|
||||||
@ -237,6 +274,7 @@ cc_library(
|
|||||||
cc_library(
|
cc_library(
|
||||||
name = "internal_matrix",
|
name = "internal_matrix",
|
||||||
hdrs = ["internal_matrix.h"],
|
hdrs = ["internal_matrix.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":check_macros",
|
":check_macros",
|
||||||
":common",
|
":common",
|
||||||
@ -250,6 +288,7 @@ cc_library(
|
|||||||
hdrs = [
|
hdrs = [
|
||||||
"common.h",
|
"common.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":check_macros",
|
":check_macros",
|
||||||
":matrix",
|
":matrix",
|
||||||
@ -272,6 +311,7 @@ cc_library(
|
|||||||
"kernel_common.h",
|
"kernel_common.h",
|
||||||
"kernel_x86.h",
|
"kernel_x86.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":check_macros",
|
":check_macros",
|
||||||
":common",
|
":common",
|
||||||
@ -301,6 +341,7 @@ cc_library(
|
|||||||
"pack_common.h",
|
"pack_common.h",
|
||||||
"pack_x86.h",
|
"pack_x86.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":check_macros",
|
":check_macros",
|
||||||
":common",
|
":common",
|
||||||
@ -317,6 +358,7 @@ cc_library(
|
|||||||
cc_library(
|
cc_library(
|
||||||
name = "trmul_params",
|
name = "trmul_params",
|
||||||
hdrs = ["trmul_params.h"],
|
hdrs = ["trmul_params.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":internal_matrix",
|
":internal_matrix",
|
||||||
":side_pair",
|
":side_pair",
|
||||||
@ -328,6 +370,7 @@ cc_library(
|
|||||||
name = "trmul",
|
name = "trmul",
|
||||||
srcs = ["trmul.cc"],
|
srcs = ["trmul.cc"],
|
||||||
hdrs = ["trmul.h"],
|
hdrs = ["trmul.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":allocator",
|
":allocator",
|
||||||
":block_map",
|
":block_map",
|
||||||
@ -359,6 +402,7 @@ cc_library(
|
|||||||
"ruy.h",
|
"ruy.h",
|
||||||
"ruy_advanced.h",
|
"ruy_advanced.h",
|
||||||
],
|
],
|
||||||
|
copts = RUY_COPTS,
|
||||||
visibility = ruy_visibility(),
|
visibility = ruy_visibility(),
|
||||||
deps = [
|
deps = [
|
||||||
":check_macros",
|
":check_macros",
|
||||||
@ -414,6 +458,7 @@ cc_library(
|
|||||||
testonly = True,
|
testonly = True,
|
||||||
srcs = ["pmu.cc"],
|
srcs = ["pmu.cc"],
|
||||||
hdrs = ["pmu.h"],
|
hdrs = ["pmu.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
deps = [":check_macros"],
|
deps = [":check_macros"],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -422,6 +467,7 @@ cc_library(
|
|||||||
name = "test_lib",
|
name = "test_lib",
|
||||||
testonly = True,
|
testonly = True,
|
||||||
hdrs = ["test.h"],
|
hdrs = ["test.h"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
# need defines, not copts, because it's controlling a header, test.h
|
# need defines, not copts, because it's controlling a header, test.h
|
||||||
defines = ruy_test_ext_defines(),
|
defines = ruy_test_ext_defines(),
|
||||||
linkopts = select({
|
linkopts = select({
|
||||||
@ -442,6 +488,7 @@ cc_library(
|
|||||||
ruy_benchmark(
|
ruy_benchmark(
|
||||||
name = "benchmark",
|
name = "benchmark",
|
||||||
srcs = ["benchmark.cc"],
|
srcs = ["benchmark.cc"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
lhs_rhs_accum_dst = [
|
lhs_rhs_accum_dst = [
|
||||||
("f32", "f32", "f32", "f32"),
|
("f32", "f32", "f32", "f32"),
|
||||||
("u8", "u8", "i32", "u8"),
|
("u8", "u8", "i32", "u8"),
|
||||||
@ -455,6 +502,7 @@ ruy_benchmark(
|
|||||||
ruy_test(
|
ruy_test(
|
||||||
name = "test_fast",
|
name = "test_fast",
|
||||||
srcs = ["test_fast.cc"],
|
srcs = ["test_fast.cc"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
lhs_rhs_accum_dst = [
|
lhs_rhs_accum_dst = [
|
||||||
("f32", "f32", "f32", "f32"),
|
("f32", "f32", "f32", "f32"),
|
||||||
("f64", "f32", "f64", "f32"),
|
("f64", "f32", "f64", "f32"),
|
||||||
@ -470,6 +518,7 @@ ruy_test(
|
|||||||
ruy_test(
|
ruy_test(
|
||||||
name = "test_slow",
|
name = "test_slow",
|
||||||
srcs = ["test_slow.cc"],
|
srcs = ["test_slow.cc"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
lhs_rhs_accum_dst = [
|
lhs_rhs_accum_dst = [
|
||||||
("f32", "f32", "f32", "f32"),
|
("f32", "f32", "f32", "f32"),
|
||||||
("u8", "u8", "i32", "u8"),
|
("u8", "u8", "i32", "u8"),
|
||||||
@ -483,6 +532,7 @@ ruy_test(
|
|||||||
ruy_test(
|
ruy_test(
|
||||||
name = "test_special_specs",
|
name = "test_special_specs",
|
||||||
srcs = ["test_special_specs.cc"],
|
srcs = ["test_special_specs.cc"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
lhs_rhs_accum_dst = [
|
lhs_rhs_accum_dst = [
|
||||||
("f32", "f32", "f32", "f32"),
|
("f32", "f32", "f32", "f32"),
|
||||||
("u8", "u8", "i32", "u8"),
|
("u8", "u8", "i32", "u8"),
|
||||||
@ -493,6 +543,7 @@ ruy_test(
|
|||||||
ruy_benchmark_opt_sets(
|
ruy_benchmark_opt_sets(
|
||||||
name = "benchmark_opt_set",
|
name = "benchmark_opt_set",
|
||||||
srcs = ["benchmark.cc"],
|
srcs = ["benchmark.cc"],
|
||||||
|
copts = RUY_COPTS,
|
||||||
lhs_rhs_accum_dst = [
|
lhs_rhs_accum_dst = [
|
||||||
("f32", "f32", "f32", "f32"),
|
("f32", "f32", "f32", "f32"),
|
||||||
("u8", "u8", "i32", "u8"),
|
("u8", "u8", "i32", "u8"),
|
||||||
|
@ -6,12 +6,12 @@ corresponding to tuples of types for LHS, RHS, accumulator
|
|||||||
and destination.
|
and destination.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def ruy_test(name, srcs, lhs_rhs_accum_dst, tags = []):
|
def ruy_test(name, srcs, lhs_rhs_accum_dst, copts, tags = []):
|
||||||
for (lhs, rhs, accum, dst) in lhs_rhs_accum_dst:
|
for (lhs, rhs, accum, dst) in lhs_rhs_accum_dst:
|
||||||
native.cc_test(
|
native.cc_test(
|
||||||
name = "%s_%s_%s_%s_%s" % (name, lhs, rhs, accum, dst),
|
name = "%s_%s_%s_%s_%s" % (name, lhs, rhs, accum, dst),
|
||||||
srcs = srcs,
|
srcs = srcs,
|
||||||
copts = [
|
copts = copts + [
|
||||||
"-DRUY_TEST_LHSSCALAR=%s" % lhs,
|
"-DRUY_TEST_LHSSCALAR=%s" % lhs,
|
||||||
"-DRUY_TEST_RHSSCALAR=%s" % rhs,
|
"-DRUY_TEST_RHSSCALAR=%s" % rhs,
|
||||||
"-DRUY_TEST_ACCUMSCALAR=%s" % accum,
|
"-DRUY_TEST_ACCUMSCALAR=%s" % accum,
|
||||||
@ -24,14 +24,14 @@ def ruy_test(name, srcs, lhs_rhs_accum_dst, tags = []):
|
|||||||
tags = tags,
|
tags = tags,
|
||||||
)
|
)
|
||||||
|
|
||||||
def ruy_benchmark(name, srcs, lhs_rhs_accum_dst):
|
def ruy_benchmark(name, srcs, lhs_rhs_accum_dst, copts):
|
||||||
tags = ["req_dep=@gemmlowp//:profiler"]
|
tags = ["req_dep=@gemmlowp//:profiler"]
|
||||||
for (lhs, rhs, accum, dst) in lhs_rhs_accum_dst:
|
for (lhs, rhs, accum, dst) in lhs_rhs_accum_dst:
|
||||||
native.cc_binary(
|
native.cc_binary(
|
||||||
name = "%s_%s_%s_%s_%s" % (name, lhs, rhs, accum, dst),
|
name = "%s_%s_%s_%s_%s" % (name, lhs, rhs, accum, dst),
|
||||||
testonly = True,
|
testonly = True,
|
||||||
srcs = srcs,
|
srcs = srcs,
|
||||||
copts = [
|
copts = copts + [
|
||||||
"-DRUY_TEST_LHSSCALAR=%s" % lhs,
|
"-DRUY_TEST_LHSSCALAR=%s" % lhs,
|
||||||
"-DRUY_TEST_RHSSCALAR=%s" % rhs,
|
"-DRUY_TEST_RHSSCALAR=%s" % rhs,
|
||||||
"-DRUY_TEST_ACCUMSCALAR=%s" % accum,
|
"-DRUY_TEST_ACCUMSCALAR=%s" % accum,
|
||||||
@ -44,7 +44,7 @@ def ruy_benchmark(name, srcs, lhs_rhs_accum_dst):
|
|||||||
tags = tags,
|
tags = tags,
|
||||||
)
|
)
|
||||||
|
|
||||||
def ruy_benchmark_opt_sets(name, opt_sets, srcs, lhs_rhs_accum_dst):
|
def ruy_benchmark_opt_sets(name, opt_sets, srcs, lhs_rhs_accum_dst, copts):
|
||||||
tags = ["req_dep=@gemmlowp//:profiler"]
|
tags = ["req_dep=@gemmlowp//:profiler"]
|
||||||
for opt_set in opt_sets:
|
for opt_set in opt_sets:
|
||||||
for (lhs, rhs, accum, dst) in lhs_rhs_accum_dst:
|
for (lhs, rhs, accum, dst) in lhs_rhs_accum_dst:
|
||||||
@ -52,7 +52,7 @@ def ruy_benchmark_opt_sets(name, opt_sets, srcs, lhs_rhs_accum_dst):
|
|||||||
name = "%s_%s_%s_%s_%s_%s" % (name, opt_set, lhs, rhs, accum, dst),
|
name = "%s_%s_%s_%s_%s_%s" % (name, opt_set, lhs, rhs, accum, dst),
|
||||||
testonly = True,
|
testonly = True,
|
||||||
srcs = srcs,
|
srcs = srcs,
|
||||||
copts = [
|
copts = copts + [
|
||||||
"-DRUY_TEST_LHSSCALAR=%s" % lhs,
|
"-DRUY_TEST_LHSSCALAR=%s" % lhs,
|
||||||
"-DRUY_TEST_RHSSCALAR=%s" % rhs,
|
"-DRUY_TEST_RHSSCALAR=%s" % rhs,
|
||||||
"-DRUY_TEST_ACCUMSCALAR=%s" % accum,
|
"-DRUY_TEST_ACCUMSCALAR=%s" % accum,
|
||||||
|
Loading…
Reference in New Issue
Block a user