- Disable tuning on Apple - we don't want to use an in-order-tuned
kernel on an Apple CPU. We shouldn't even with tuning, as Apple CPUs are out-of-order, but we don't want to risk the case of misdetection by the tuning nanobenchmark. - Whenever tuning is not enabled, have the tuning resolver just return without even the overhead of querying a timestamp. PiperOrigin-RevId: 259036253
This commit is contained in:
parent
f14756c25c
commit
d7cb6d0a3f
@ -49,4 +49,11 @@ limitations under the License.
|
||||
#define RUY_DONOTUSEDIRECTLY_NEON_64 \
|
||||
(RUY_DONOTUSEDIRECTLY_NEON && RUY_DONOTUSEDIRECTLY_ARM_64)
|
||||
|
||||
// Detect APPLE
|
||||
#ifdef __APPLE__
|
||||
#define RUY_DONOTUSEDIRECTLY_APPLE 1
|
||||
#else
|
||||
#define RUY_DONOTUSEDIRECTLY_APPLE 0
|
||||
#endif
|
||||
|
||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_PLATFORM_H_
|
||||
|
@ -18,13 +18,11 @@ limitations under the License.
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/experimental/ruy/opt_set.h"
|
||||
#include "tensorflow/lite/experimental/ruy/platform.h"
|
||||
#include "tensorflow/lite/experimental/ruy/time.h"
|
||||
|
||||
namespace ruy {
|
||||
|
||||
#if RUY_PLATFORM(NEON_64)
|
||||
#ifdef RUY_IMPLEMENT_TUNING
|
||||
|
||||
namespace {
|
||||
|
||||
@ -131,7 +129,7 @@ Tuning TuningResolver::ResolveNow() {
|
||||
return is_probably_inorder ? Tuning::kInOrder : Tuning::kOutOfOrder;
|
||||
}
|
||||
|
||||
#else // not RUY_PLATFORM(NEON_64)
|
||||
#else // not defined RUY_IMPLEMENT_TUNING
|
||||
|
||||
float TuningResolver::EvalRatio() { return 0; }
|
||||
float TuningResolver::ThresholdRatio() { return 0; }
|
||||
@ -146,9 +144,7 @@ TuningResolver::TuningResolver()
|
||||
: expiry_duration_(DurationFromSeconds(kExpirySecs)) {}
|
||||
|
||||
Tuning TuningResolver::Resolve() {
|
||||
#if !RUY_OPT_ENABLED(RUY_OPT_TUNING)
|
||||
return Tuning::kOutOfOrder;
|
||||
#endif
|
||||
#ifdef RUY_IMPLEMENT_TUNING
|
||||
if (unresolved_tuning_ != Tuning::kAuto) {
|
||||
return unresolved_tuning_;
|
||||
}
|
||||
@ -160,6 +156,9 @@ Tuning TuningResolver::Resolve() {
|
||||
last_resolved_timepoint_ = new_timepoint;
|
||||
last_resolved_tuning_ = ResolveNow();
|
||||
return last_resolved_tuning_;
|
||||
#else
|
||||
return Tuning::kOutOfOrder;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace ruy
|
||||
|
@ -74,8 +74,21 @@ limitations under the License.
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/experimental/ruy/opt_set.h"
|
||||
#include "tensorflow/lite/experimental/ruy/platform.h"
|
||||
#include "tensorflow/lite/experimental/ruy/time.h"
|
||||
|
||||
// Tuning only implemented on NEON_64 at the moment (see assembly code
|
||||
// in the nano-benchmark) and not on Apple (some Apple CPUs produce incorrect
|
||||
// results on in-order-tuned kernels combining ARM and NEON load instructions
|
||||
// and NEON `ins` instructions).
|
||||
//
|
||||
// When tuning is not implemented, we simply always use Tuning::kOutOfOrder.
|
||||
#if RUY_OPT_ENABLED(RUY_OPT_TUNING) && RUY_PLATFORM(NEON_64) && \
|
||||
!RUY_PLATFORM(APPLE)
|
||||
#define RUY_IMPLEMENT_TUNING
|
||||
#endif
|
||||
|
||||
namespace ruy {
|
||||
|
||||
enum class Tuning {
|
||||
|
@ -33,6 +33,7 @@ TEST(TuneTest, TuneTest) {
|
||||
|
||||
tuning_resolver.SetTuning(Tuning::kAuto);
|
||||
|
||||
#ifdef RUY_IMPLEMENT_TUNING
|
||||
for (auto tuning : {Tuning::kOutOfOrder, Tuning::kInOrder}) {
|
||||
tuning_resolver.SetTuning(tuning);
|
||||
ASSERT_TRUE(tuning_resolver.Resolve() == tuning);
|
||||
@ -40,6 +41,7 @@ TEST(TuneTest, TuneTest) {
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
ASSERT_TRUE(tuning_resolver.Resolve() == tuning);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user