From 354972df8a10436667aed1b9155e73e64f59953c Mon Sep 17 00:00:00 2001 From: Pete Warden <petewarden@google.com> Date: Wed, 4 Jan 2017 10:49:06 -0800 Subject: [PATCH] Move SIMD feature warnings to the first use of intensive CPU computation, rather than startup. Change: 143570811 --- tensorflow/core/BUILD | 1 + .../core/common_runtime/local_device.cc | 4 + tensorflow/core/platform/cpu_feature_guard.cc | 106 +++++++++++------- tensorflow/core/platform/cpu_feature_guard.h | 32 ++++++ 4 files changed, 102 insertions(+), 41 deletions(-) create mode 100644 tensorflow/core/platform/cpu_feature_guard.h diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 95b111f23b1..b50ac83d034 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -239,6 +239,7 @@ cc_library( "lib/strings/str_util.h", "lib/strings/strcat.h", "lib/strings/stringprintf.h", + "platform/cpu_feature_guard.h", "platform/cpu_info.h", "platform/dynamic_annotations.h", "platform/env.h", diff --git a/tensorflow/core/common_runtime/local_device.cc b/tensorflow/core/common_runtime/local_device.cc index e55456c039a..0a6342ed736 100644 --- a/tensorflow/core/common_runtime/local_device.cc +++ b/tensorflow/core/common_runtime/local_device.cc @@ -19,6 +19,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/common_runtime/eigen_thread_pool.h" #include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/cpu_feature_guard.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" @@ -63,6 +64,9 @@ LocalDevice::LocalDevice(const SessionOptions& options, Allocator* device_allocator) : Device(options.env, attributes, device_allocator), owned_tp_info_(nullptr) { + // If we're running on the CPU, log warnings if we're not compiled using the + // best flags for performance. + port::WarnAboutUnusedCPUFeatures(); LocalDevice::EigenThreadPoolInfo* tp_info; if (use_global_threadpool_) { // All ThreadPoolDevices in the process will use this single fixed diff --git a/tensorflow/core/platform/cpu_feature_guard.cc b/tensorflow/core/platform/cpu_feature_guard.cc index fd38b614c32..1cfeb2580fa 100644 --- a/tensorflow/core/platform/cpu_feature_guard.cc +++ b/tensorflow/core/platform/cpu_feature_guard.cc @@ -13,6 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/core/platform/cpu_feature_guard.h" + +#include <mutex> + #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" @@ -20,6 +24,31 @@ namespace tensorflow { namespace port { namespace { +// If the CPU feature isn't present, log a fatal error. +void CheckFeatureOrDie(CPUFeature feature, const string& feature_name) { + if (!TestCPUFeature(feature)) { +#ifdef __ANDROID__ + // Some Android emulators seem to indicate they don't support SSE, so to + // avoid crashes when testing, switch this to a warning. + LOG(WARNING) +#else + LOG(FATAL) +#endif + << "The TensorFlow library was compiled to use " << feature_name + << " instructions, but these aren't available on your machine."; + } +} + +// If the CPU feature is present, log warning. +void WarnIfFeatureUnused(CPUFeature feature, const string& feature_name) { + if (TestCPUFeature(feature)) { + LOG(WARNING) << "The TensorFlow library wasn't compiled to use " + << feature_name + << " instructions, but these are available on your machine " + "and could speed up CPU computations."; + } +} + // Raises an error if the binary has been compiled for a CPU feature (like AVX) // that isn't available on the current machine. It also warns of performance // loss if there's a feature available that's not being used. @@ -31,76 +60,71 @@ class CPUFeatureGuard { CPUFeatureGuard() { #ifdef __SSE__ CheckFeatureOrDie(CPUFeature::SSE, "SSE"); -#else - WarnIfFeatureUnused(CPUFeature::SSE, "SSE"); #endif // __SSE__ #ifdef __SSE2__ CheckFeatureOrDie(CPUFeature::SSE2, "SSE2"); -#else - WarnIfFeatureUnused(CPUFeature::SSE2, "SSE2"); #endif // __SSE2__ #ifdef __SSE3__ CheckFeatureOrDie(CPUFeature::SSE3, "SSE3"); -#else - WarnIfFeatureUnused(CPUFeature::SSE3, "SSE3"); #endif // __SSE3__ #ifdef __SSE4_1__ CheckFeatureOrDie(CPUFeature::SSE4_1, "SSE4.1"); -#else - WarnIfFeatureUnused(CPUFeature::SSE4_1, "SSE4.1"); #endif // __SSE4_1__ #ifdef __SSE4_2__ CheckFeatureOrDie(CPUFeature::SSE4_2, "SSE4.2"); -#else - WarnIfFeatureUnused(CPUFeature::SSE4_2, "SSE4.2"); #endif // __SSE4_2__ #ifdef __AVX__ CheckFeatureOrDie(CPUFeature::AVX, "AVX"); -#else - WarnIfFeatureUnused(CPUFeature::AVX, "AVX"); #endif // __AVX__ #ifdef __AVX2__ CheckFeatureOrDie(CPUFeature::AVX2, "AVX2"); -#else - WarnIfFeatureUnused(CPUFeature::AVX2, "AVX2"); #endif // __AVX2__ #ifdef __AVX512F__ CheckFeatureOrDie(CPUFeature::AVX512F, "AVX512F"); -#else - WarnIfFeatureUnused(CPUFeature::AVX512F, "AVX512F"); #endif // __AVX512F__ #ifdef __FMA__ CheckFeatureOrDie(CPUFeature::FMA, "FMA"); -#else - WarnIfFeatureUnused(CPUFeature::FMA, "FMA"); #endif // __FMA__ } - - void CheckFeatureOrDie(CPUFeature feature, const string& feature_name) { - if (!TestCPUFeature(feature)) { -#ifdef __ANDROID__ - // Some Android emulators seem to indicate they don't support SSE, so to - // avoid crashes when testing, switch this to a warning. - LOG(WARNING) -#else - LOG(FATAL) -#endif - << "The TensorFlow library was compiled to use " << feature_name - << " instructions, but these aren't available on your machine."; - } - } - - void WarnIfFeatureUnused(CPUFeature feature, const string& feature_name) { - if (TestCPUFeature(feature)) { - LOG(WARNING) << "The TensorFlow library wasn't compiled to use " - << feature_name - << " instructions, but these are available on your machine " - "and could speed up CPU computations."; - } - } }; CPUFeatureGuard g_cpu_feature_guard_singleton; + +std::once_flag g_cpu_feature_guard_warn_once_flag; + } // namespace + +void WarnAboutUnusedCPUFeatures() { + std::call_once(g_cpu_feature_guard_warn_once_flag, [] { +#ifndef __SSE__ + WarnIfFeatureUnused(CPUFeature::SSE, "SSE"); +#endif // __SSE__ +#ifndef __SSE2__ + WarnIfFeatureUnused(CPUFeature::SSE2, "SSE2"); +#endif // __SSE2__ +#ifndef __SSE3__ + WarnIfFeatureUnused(CPUFeature::SSE3, "SSE3"); +#endif // __SSE3__ +#ifndef __SSE4_1__ + WarnIfFeatureUnused(CPUFeature::SSE4_1, "SSE4.1"); +#endif // __SSE4_1__ +#ifndef __SSE4_2__ + WarnIfFeatureUnused(CPUFeature::SSE4_2, "SSE4.2"); +#endif // __SSE4_2__ +#ifndef __AVX__ + WarnIfFeatureUnused(CPUFeature::AVX, "AVX"); +#endif // __AVX__ +#ifndef __AVX2__ + WarnIfFeatureUnused(CPUFeature::AVX2, "AVX2"); +#endif // __AVX2__ +#ifndef __AVX512F__ + WarnIfFeatureUnused(CPUFeature::AVX512F, "AVX512F"); +#endif // __AVX512F__ +#ifndef __FMA__ + WarnIfFeatureUnused(CPUFeature::FMA, "FMA"); +#endif // __FMA__ + }); +} + } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/cpu_feature_guard.h b/tensorflow/core/platform/cpu_feature_guard.h new file mode 100644 index 00000000000..fda9f817380 --- /dev/null +++ b/tensorflow/core/platform/cpu_feature_guard.h @@ -0,0 +1,32 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_PLATFORM_CPU_FEATURE_GUARD_H_ +#define TENSORFLOW_PLATFORM_CPU_FEATURE_GUARD_H_ + +namespace tensorflow { +namespace port { + +// Called by the framework when we expect heavy CPU computation and we want to +// be sure that the code has been compiled to run optimally on the current +// hardware. The first time it's called it will run lightweight checks of +// available SIMD acceleration features and log warnings about any that aren't +// used. +void WarnAboutUnusedCPUFeatures(); + +} // namespace port +} // namespace tensorflow + +#endif // TENSORFLOW_PLATFORM_CPU_FEATURE_GUARD_H_