diff --git a/tensorflow/lite/experimental/ruy/detect_arm.cc b/tensorflow/lite/experimental/ruy/detect_arm.cc index f40a963ef33..5da92f11ba7 100644 --- a/tensorflow/lite/experimental/ruy/detect_arm.cc +++ b/tensorflow/lite/experimental/ruy/detect_arm.cc @@ -13,189 +13,44 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -/* Temporary dotprod-detection until we can rely on proper feature-detection -such as getauxval on Linux (requires a newer Linux kernel than we can -currently rely on on Android). - -There are two main ways that this could be implemented: using a signal -handler or a fork. The current implementation uses a signal handler. -This is because on current Android, an uncaught signal gives a latency -of over 100 ms. In order for the fork approach to be worthwhile, it would -have to save us the hassle of handling signals, and such an approach thus -has an unavoidable 100ms latency. By contrast, the present signal-handling -approach has low latency. - -Downsides of the current signal-handling approach include: - 1. Setting and restoring signal handlers is not thread-safe: we can't - prevent another thread from interfering with us. We at least prevent - other threads from calling our present code concurrently by using a lock, - but we can't do anything about other threads using their own code to - set signal handlers. - 2. Signal handlers are not entirely portable, e.g. b/132973173 showed that - on Apple platform the EXC_BAD_INSTRUCTION signal is not always caught - by a SIGILL handler (difference between Release and Debug builds). - 3. The signal handler approach looks confusing in a debugger (has to - tell the debugger to 'continue' past the signal every time). Fix: - ``` - (gdb) handle SIGILL nostop noprint pass - ``` - -Here is what the nicer fork-based alternative would look like. -Its only downside, as discussed above, is high latency, 100 ms on Android. - -``` -bool try_asm_snippet(bool (*asm_snippet)()) { - int child_pid = fork(); - if (child_pid == -1) { - // Fork failed. - return false; - } - if (child_pid == 0) { - // Child process code path. Pass the raw boolean return value of - // asm_snippet as exit code (unconventional: 1 means true == success). - _exit(asm_snippet()); - } - - int child_status; - waitpid(child_pid, &child_status, 0); - if (WIFSIGNALED(child_status)) { - // Child process terminated by signal, meaning the instruction was - // not supported. - return false; - } - // Return the exit code of the child, which per child code above was - // the return value of asm_snippet(). - return WEXITSTATUS(child_status); -} -``` -*/ +/* Detection of dotprod instructions on ARM. + * The current Linux-specific code relies on sufficiently new Linux kernels: + * At least Linux 4.15 in general; on Android, at least Linux 4.14.111 thanks to + * a late backport. This was backported just before the Android 10 release, so + * this is leaving out pre-release Android 10 builds as well as earlier Android + * versions. + * + * It is possible to detect instructions in other ways that don't rely on + * an OS-provided feature identification mechanism: + * + * (A) We used to have a SIGILL-handler-based method that worked at least + * on Linux. Its downsides were (1) crashes on a few devices where + * signal handler installation didn't work as intended; (2) additional + * complexity to generalize to other Unix-ish operating systems including + * iOS; (3) source code complexity and fragility of anything installing + * and restoring signal handlers; (4) confusing behavior under a debugger. + * + * (B) We also experimented with a fork-ing approach where a subprocess + * tries the instruction. Compared to (A), this is much simpler and more + * reliable and portable, but also much higher latency on Android where + * an uncaught signal typically causes a 100 ms latency. + * + * Should there be interest in either technique again in the future, + * code implementing both (A) and (B) can be found in earlier revisions of this + * file - in actual code for (A) and in a comment for (B). + */ #include "tensorflow/lite/experimental/ruy/detect_arm.h" -#if defined __aarch64__ && defined __linux__ -#define RUY_IMPLEMENT_DETECT_DOTPROD -#endif - -#ifdef RUY_IMPLEMENT_DETECT_DOTPROD - -#include -#include -#include - -#include -#include -#include -#include // NOLINT(build/c++11) - -// Intentionally keep checking for __linux__ here in case we want to -// extend RUY_IMPLEMENT_DETECT_DOTPROD outside of linux in the future. #ifdef __linux__ #include -#include -#endif - #endif namespace ruy { -#ifdef RUY_IMPLEMENT_DETECT_DOTPROD - namespace { -// Waits until there are no pending SIGILL's. -void wait_until_no_pending_sigill() { - sigset_t pending; - do { - sigemptyset(&pending); - sigpending(&pending); - } while (sigismember(&pending, SIGILL)); -} - -// long-jump buffer used to continue execution after a caught SIGILL. -sigjmp_buf& global_sigjmp_buf_just_before_trying_snippet() { - static sigjmp_buf g; - return g; -} - -// SIGILL signal handler. Long-jumps to just before -// we ran the snippet that we know is the only thing that could have generated -// the SIGILL. -void sigill_handler(int) { - siglongjmp(global_sigjmp_buf_just_before_trying_snippet(), 1); -} - -// Try an asm snippet. Returns true if it passed i.e. ran without generating -// a SIGILL and returned true. Returns false if a SIGILL was generated, or -// if it returned false. -// Other signals are not handled. -bool try_asm_snippet(bool (*asm_snippet)()) { - // This function installs and restores signal handlers. The only way it's ever - // going to be reentrant is with a big lock around it. - static std::mutex mutex; - std::lock_guard lock(mutex); - - // Install the SIGILL signal handler. Save any existing signal handler for - // restoring later. - struct sigaction sigill_action; - memset(&sigill_action, 0, sizeof(sigill_action)); - sigill_action.sa_handler = sigill_handler; - sigemptyset(&sigill_action.sa_mask); - struct sigaction old_action; - sigaction(SIGILL, &sigill_action, &old_action); - - // Try the snippet. - bool got_sigill = - sigsetjmp(global_sigjmp_buf_just_before_trying_snippet(), true); - bool snippet_retval = false; - if (!got_sigill) { - snippet_retval = asm_snippet(); - wait_until_no_pending_sigill(); - } - - // Restore the old signal handler. - sigaction(SIGILL, &old_action, nullptr); - - return snippet_retval && !got_sigill; -} - -bool dotprod_asm_snippet() { - // maratek@ mentioned that for some other ISA extensions (fp16) - // there have been implementations that failed to generate SIGILL even - // though they did not correctly implement the instruction. Just in case - // a similar situation might exist here, we do a simple correctness test. - int result = 0; - asm volatile( - "mov w0, #100\n" - "dup v0.16b, w0\n" - "dup v1.4s, w0\n" - ".word 0x6e809401 // udot v1.4s, v0.16b, v0.16b\n" - "mov %w[result], v1.s[0]\n" - : [ result ] "=r"(result) - : - : "x0", "v0", "v1"); - // Expecting 100 (input accumulator value) + 100 * 100 + ... (repeat 4 times) - return result == 40100; -} - -bool DetectDotprodBySigIllMethod() { - return try_asm_snippet(dotprod_asm_snippet); -} - -// Intentionally keep checking for __linux__ here in case we want to -// extend RUY_IMPLEMENT_DETECT_DOTPROD outside of linux in the future. -#ifdef __linux__ -bool IsLinuxAuxvMethodAvailable() { - struct utsname utsbuf; - uname(&utsbuf); - int major, minor, patch; - if (3 != sscanf(utsbuf.release, "%d.%d.%d", &major, &minor, &patch)) { - return false; - } - // This is implemented in linux 4.14.111, not in 4.14.105. - return major > 4 || - (major == 4 && (minor > 14 || (minor == 14 && patch >= 111))); -} - +#if defined __linux__ && defined __aarch64__ bool DetectDotprodByLinuxAuxvMethod() { // This is the value of HWCAP_ASIMDDP in sufficiently recent Linux headers, // however we need to support building against older headers for the time @@ -208,17 +63,11 @@ bool DetectDotprodByLinuxAuxvMethod() { } // namespace bool DetectDotprod() { -#ifdef __linux__ - if (IsLinuxAuxvMethodAvailable()) { - return DetectDotprodByLinuxAuxvMethod(); - } +#if defined __linux__ && defined __aarch64__ + return DetectDotprodByLinuxAuxvMethod(); #endif - return DetectDotprodBySigIllMethod(); + return false; } -#else // RUY_IMPLEMENT_DETECT_DOTPROD -bool DetectDotprod() { return false; } -#endif // RUY_IMPLEMENT_DETECT_DOTPROD - } // namespace ruy