- Disable tuning on Apple - we don't want to use an in-order-tuned

kernel on an Apple CPU. We shouldn't even with tuning, as Apple
  CPUs are out-of-order, but we don't want to risk the case of
  misdetection by the tuning nanobenchmark.
- Whenever tuning is not enabled, have the tuning resolver just return
  without even the overhead of querying a timestamp.

PiperOrigin-RevId: 259036253
This commit is contained in:
Benoit Jacob 2019-07-19 14:12:27 -07:00 committed by TensorFlower Gardener
parent f14756c25c
commit d7cb6d0a3f
4 changed files with 28 additions and 7 deletions

View File

@ -49,4 +49,11 @@ limitations under the License.
#define RUY_DONOTUSEDIRECTLY_NEON_64 \
(RUY_DONOTUSEDIRECTLY_NEON && RUY_DONOTUSEDIRECTLY_ARM_64)
// Detect APPLE
#ifdef __APPLE__
#define RUY_DONOTUSEDIRECTLY_APPLE 1
#else
#define RUY_DONOTUSEDIRECTLY_APPLE 0
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_PLATFORM_H_

View File

@ -18,13 +18,11 @@ limitations under the License.
#include <algorithm>
#include <cstdint>
#include "tensorflow/lite/experimental/ruy/opt_set.h"
#include "tensorflow/lite/experimental/ruy/platform.h"
#include "tensorflow/lite/experimental/ruy/time.h"
namespace ruy {
#if RUY_PLATFORM(NEON_64)
#ifdef RUY_IMPLEMENT_TUNING
namespace {
@ -131,7 +129,7 @@ Tuning TuningResolver::ResolveNow() {
return is_probably_inorder ? Tuning::kInOrder : Tuning::kOutOfOrder;
}
#else // not RUY_PLATFORM(NEON_64)
#else // not defined RUY_IMPLEMENT_TUNING
float TuningResolver::EvalRatio() { return 0; }
float TuningResolver::ThresholdRatio() { return 0; }
@ -146,9 +144,7 @@ TuningResolver::TuningResolver()
: expiry_duration_(DurationFromSeconds(kExpirySecs)) {}
Tuning TuningResolver::Resolve() {
#if !RUY_OPT_ENABLED(RUY_OPT_TUNING)
return Tuning::kOutOfOrder;
#endif
#ifdef RUY_IMPLEMENT_TUNING
if (unresolved_tuning_ != Tuning::kAuto) {
return unresolved_tuning_;
}
@ -160,6 +156,9 @@ Tuning TuningResolver::Resolve() {
last_resolved_timepoint_ = new_timepoint;
last_resolved_tuning_ = ResolveNow();
return last_resolved_tuning_;
#else
return Tuning::kOutOfOrder;
#endif
}
} // namespace ruy

View File

@ -74,8 +74,21 @@ limitations under the License.
#include <cstdint>
#include "tensorflow/lite/experimental/ruy/opt_set.h"
#include "tensorflow/lite/experimental/ruy/platform.h"
#include "tensorflow/lite/experimental/ruy/time.h"
// Tuning only implemented on NEON_64 at the moment (see assembly code
// in the nano-benchmark) and not on Apple (some Apple CPUs produce incorrect
// results on in-order-tuned kernels combining ARM and NEON load instructions
// and NEON `ins` instructions).
//
// When tuning is not implemented, we simply always use Tuning::kOutOfOrder.
#if RUY_OPT_ENABLED(RUY_OPT_TUNING) && RUY_PLATFORM(NEON_64) && \
!RUY_PLATFORM(APPLE)
#define RUY_IMPLEMENT_TUNING
#endif
namespace ruy {
enum class Tuning {

View File

@ -33,6 +33,7 @@ TEST(TuneTest, TuneTest) {
tuning_resolver.SetTuning(Tuning::kAuto);
#ifdef RUY_IMPLEMENT_TUNING
for (auto tuning : {Tuning::kOutOfOrder, Tuning::kInOrder}) {
tuning_resolver.SetTuning(tuning);
ASSERT_TRUE(tuning_resolver.Resolve() == tuning);
@ -40,6 +41,7 @@ TEST(TuneTest, TuneTest) {
std::this_thread::sleep_for(std::chrono::seconds(1));
ASSERT_TRUE(tuning_resolver.Resolve() == tuning);
}
#endif
}
} // namespace