From 354972df8a10436667aed1b9155e73e64f59953c Mon Sep 17 00:00:00 2001
From: Pete Warden <petewarden@google.com>
Date: Wed, 4 Jan 2017 10:49:06 -0800
Subject: [PATCH] Move SIMD feature warnings to the first use of intensive CPU
 computation, rather than startup. Change: 143570811

---
 tensorflow/core/BUILD                         |   1 +
 .../core/common_runtime/local_device.cc       |   4 +
 tensorflow/core/platform/cpu_feature_guard.cc | 106 +++++++++++-------
 tensorflow/core/platform/cpu_feature_guard.h  |  32 ++++++
 4 files changed, 102 insertions(+), 41 deletions(-)
 create mode 100644 tensorflow/core/platform/cpu_feature_guard.h

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 95b111f23b1..b50ac83d034 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -239,6 +239,7 @@ cc_library(
         "lib/strings/str_util.h",
         "lib/strings/strcat.h",
         "lib/strings/stringprintf.h",
+        "platform/cpu_feature_guard.h",
         "platform/cpu_info.h",
         "platform/dynamic_annotations.h",
         "platform/env.h",
diff --git a/tensorflow/core/common_runtime/local_device.cc b/tensorflow/core/common_runtime/local_device.cc
index e55456c039a..0a6342ed736 100644
--- a/tensorflow/core/common_runtime/local_device.cc
+++ b/tensorflow/core/common_runtime/local_device.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/eigen_thread_pool.h"
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/platform/cpu_feature_guard.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
@@ -63,6 +64,9 @@ LocalDevice::LocalDevice(const SessionOptions& options,
                          Allocator* device_allocator)
     : Device(options.env, attributes, device_allocator),
       owned_tp_info_(nullptr) {
+  // If we're running on the CPU, log warnings if we're not compiled using the
+  // best flags for performance.
+  port::WarnAboutUnusedCPUFeatures();
   LocalDevice::EigenThreadPoolInfo* tp_info;
   if (use_global_threadpool_) {
     // All ThreadPoolDevices in the process will use this single fixed
diff --git a/tensorflow/core/platform/cpu_feature_guard.cc b/tensorflow/core/platform/cpu_feature_guard.cc
index fd38b614c32..1cfeb2580fa 100644
--- a/tensorflow/core/platform/cpu_feature_guard.cc
+++ b/tensorflow/core/platform/cpu_feature_guard.cc
@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/core/platform/cpu_feature_guard.h"
+
+#include <mutex>
+
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -20,6 +24,31 @@ namespace tensorflow {
 namespace port {
 namespace {
 
+// If the CPU feature isn't present, log a fatal error.
+void CheckFeatureOrDie(CPUFeature feature, const string& feature_name) {
+  if (!TestCPUFeature(feature)) {
+#ifdef __ANDROID__
+    // Some Android emulators seem to indicate they don't support SSE, so to
+    // avoid crashes when testing, switch this to a warning.
+    LOG(WARNING)
+#else
+    LOG(FATAL)
+#endif
+        << "The TensorFlow library was compiled to use " << feature_name
+        << " instructions, but these aren't available on your machine.";
+  }
+}
+
+// If the CPU feature is present, log warning.
+void WarnIfFeatureUnused(CPUFeature feature, const string& feature_name) {
+  if (TestCPUFeature(feature)) {
+    LOG(WARNING) << "The TensorFlow library wasn't compiled to use "
+                 << feature_name
+                 << " instructions, but these are available on your machine "
+                    "and could speed up CPU computations.";
+  }
+}
+
 // Raises an error if the binary has been compiled for a CPU feature (like AVX)
 // that isn't available on the current machine. It also warns of performance
 // loss if there's a feature available that's not being used.
@@ -31,76 +60,71 @@ class CPUFeatureGuard {
   CPUFeatureGuard() {
 #ifdef __SSE__
     CheckFeatureOrDie(CPUFeature::SSE, "SSE");
-#else
-    WarnIfFeatureUnused(CPUFeature::SSE, "SSE");
 #endif  // __SSE__
 #ifdef __SSE2__
     CheckFeatureOrDie(CPUFeature::SSE2, "SSE2");
-#else
-    WarnIfFeatureUnused(CPUFeature::SSE2, "SSE2");
 #endif  // __SSE2__
 #ifdef __SSE3__
     CheckFeatureOrDie(CPUFeature::SSE3, "SSE3");
-#else
-    WarnIfFeatureUnused(CPUFeature::SSE3, "SSE3");
 #endif  // __SSE3__
 #ifdef __SSE4_1__
     CheckFeatureOrDie(CPUFeature::SSE4_1, "SSE4.1");
-#else
-    WarnIfFeatureUnused(CPUFeature::SSE4_1, "SSE4.1");
 #endif  // __SSE4_1__
 #ifdef __SSE4_2__
     CheckFeatureOrDie(CPUFeature::SSE4_2, "SSE4.2");
-#else
-    WarnIfFeatureUnused(CPUFeature::SSE4_2, "SSE4.2");
 #endif  // __SSE4_2__
 #ifdef __AVX__
     CheckFeatureOrDie(CPUFeature::AVX, "AVX");
-#else
-    WarnIfFeatureUnused(CPUFeature::AVX, "AVX");
 #endif  // __AVX__
 #ifdef __AVX2__
     CheckFeatureOrDie(CPUFeature::AVX2, "AVX2");
-#else
-    WarnIfFeatureUnused(CPUFeature::AVX2, "AVX2");
 #endif  // __AVX2__
 #ifdef __AVX512F__
     CheckFeatureOrDie(CPUFeature::AVX512F, "AVX512F");
-#else
-    WarnIfFeatureUnused(CPUFeature::AVX512F, "AVX512F");
 #endif  // __AVX512F__
 #ifdef __FMA__
     CheckFeatureOrDie(CPUFeature::FMA, "FMA");
-#else
-    WarnIfFeatureUnused(CPUFeature::FMA, "FMA");
 #endif  // __FMA__
   }
-
-  void CheckFeatureOrDie(CPUFeature feature, const string& feature_name) {
-    if (!TestCPUFeature(feature)) {
-#ifdef __ANDROID__
-      // Some Android emulators seem to indicate they don't support SSE, so to
-      // avoid crashes when testing, switch this to a warning.
-      LOG(WARNING)
-#else
-      LOG(FATAL)
-#endif
-          << "The TensorFlow library was compiled to use " << feature_name
-          << " instructions, but these aren't available on your machine.";
-    }
-  }
-
-  void WarnIfFeatureUnused(CPUFeature feature, const string& feature_name) {
-    if (TestCPUFeature(feature)) {
-      LOG(WARNING) << "The TensorFlow library wasn't compiled to use "
-                   << feature_name
-                   << " instructions, but these are available on your machine "
-                      "and could speed up CPU computations.";
-    }
-  }
 };
 
 CPUFeatureGuard g_cpu_feature_guard_singleton;
+
+std::once_flag g_cpu_feature_guard_warn_once_flag;
+
 }  // namespace
+
+void WarnAboutUnusedCPUFeatures() {
+  std::call_once(g_cpu_feature_guard_warn_once_flag, [] {
+#ifndef __SSE__
+    WarnIfFeatureUnused(CPUFeature::SSE, "SSE");
+#endif  // __SSE__
+#ifndef __SSE2__
+    WarnIfFeatureUnused(CPUFeature::SSE2, "SSE2");
+#endif  // __SSE2__
+#ifndef __SSE3__
+    WarnIfFeatureUnused(CPUFeature::SSE3, "SSE3");
+#endif  // __SSE3__
+#ifndef __SSE4_1__
+    WarnIfFeatureUnused(CPUFeature::SSE4_1, "SSE4.1");
+#endif  // __SSE4_1__
+#ifndef __SSE4_2__
+    WarnIfFeatureUnused(CPUFeature::SSE4_2, "SSE4.2");
+#endif  // __SSE4_2__
+#ifndef __AVX__
+    WarnIfFeatureUnused(CPUFeature::AVX, "AVX");
+#endif  // __AVX__
+#ifndef __AVX2__
+    WarnIfFeatureUnused(CPUFeature::AVX2, "AVX2");
+#endif  // __AVX2__
+#ifndef __AVX512F__
+    WarnIfFeatureUnused(CPUFeature::AVX512F, "AVX512F");
+#endif  // __AVX512F__
+#ifndef __FMA__
+    WarnIfFeatureUnused(CPUFeature::FMA, "FMA");
+#endif  // __FMA__
+  });
+}
+
 }  // namespace port
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cpu_feature_guard.h b/tensorflow/core/platform/cpu_feature_guard.h
new file mode 100644
index 00000000000..fda9f817380
--- /dev/null
+++ b/tensorflow/core/platform/cpu_feature_guard.h
@@ -0,0 +1,32 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PLATFORM_CPU_FEATURE_GUARD_H_
+#define TENSORFLOW_PLATFORM_CPU_FEATURE_GUARD_H_
+
+namespace tensorflow {
+namespace port {
+
+// Called by the framework when we expect heavy CPU computation and we want to
+// be sure that the code has been compiled to run optimally on the current
+// hardware. The first time it's called it will run lightweight checks of
+// available SIMD acceleration features and log warnings about any that aren't
+// used.
+void WarnAboutUnusedCPUFeatures();
+
+}  // namespace port
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_PLATFORM_CPU_FEATURE_GUARD_H_