Move SIMD feature warnings to the first use of intensive CPU computation, rather than startup.
Change: 143570811
This commit is contained in:
parent
ffd7338ecb
commit
354972df8a
@ -239,6 +239,7 @@ cc_library(
|
||||
"lib/strings/str_util.h",
|
||||
"lib/strings/strcat.h",
|
||||
"lib/strings/stringprintf.h",
|
||||
"platform/cpu_feature_guard.h",
|
||||
"platform/cpu_info.h",
|
||||
"platform/dynamic_annotations.h",
|
||||
"platform/env.h",
|
||||
|
@ -19,6 +19,7 @@ limitations under the License.
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/core/common_runtime/eigen_thread_pool.h"
|
||||
#include "tensorflow/core/lib/core/threadpool.h"
|
||||
#include "tensorflow/core/platform/cpu_feature_guard.h"
|
||||
#include "tensorflow/core/platform/cpu_info.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
@ -63,6 +64,9 @@ LocalDevice::LocalDevice(const SessionOptions& options,
|
||||
Allocator* device_allocator)
|
||||
: Device(options.env, attributes, device_allocator),
|
||||
owned_tp_info_(nullptr) {
|
||||
// If we're running on the CPU, log warnings if we're not compiled using the
|
||||
// best flags for performance.
|
||||
port::WarnAboutUnusedCPUFeatures();
|
||||
LocalDevice::EigenThreadPoolInfo* tp_info;
|
||||
if (use_global_threadpool_) {
|
||||
// All ThreadPoolDevices in the process will use this single fixed
|
||||
|
@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/platform/cpu_feature_guard.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "tensorflow/core/platform/cpu_info.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
|
||||
@ -20,6 +24,31 @@ namespace tensorflow {
|
||||
namespace port {
|
||||
namespace {
|
||||
|
||||
// If the CPU feature isn't present, log a fatal error.
|
||||
void CheckFeatureOrDie(CPUFeature feature, const string& feature_name) {
|
||||
if (!TestCPUFeature(feature)) {
|
||||
#ifdef __ANDROID__
|
||||
// Some Android emulators seem to indicate they don't support SSE, so to
|
||||
// avoid crashes when testing, switch this to a warning.
|
||||
LOG(WARNING)
|
||||
#else
|
||||
LOG(FATAL)
|
||||
#endif
|
||||
<< "The TensorFlow library was compiled to use " << feature_name
|
||||
<< " instructions, but these aren't available on your machine.";
|
||||
}
|
||||
}
|
||||
|
||||
// If the CPU feature is present, log warning.
|
||||
void WarnIfFeatureUnused(CPUFeature feature, const string& feature_name) {
|
||||
if (TestCPUFeature(feature)) {
|
||||
LOG(WARNING) << "The TensorFlow library wasn't compiled to use "
|
||||
<< feature_name
|
||||
<< " instructions, but these are available on your machine "
|
||||
"and could speed up CPU computations.";
|
||||
}
|
||||
}
|
||||
|
||||
// Raises an error if the binary has been compiled for a CPU feature (like AVX)
|
||||
// that isn't available on the current machine. It also warns of performance
|
||||
// loss if there's a feature available that's not being used.
|
||||
@ -31,76 +60,71 @@ class CPUFeatureGuard {
|
||||
CPUFeatureGuard() {
|
||||
#ifdef __SSE__
|
||||
CheckFeatureOrDie(CPUFeature::SSE, "SSE");
|
||||
#else
|
||||
WarnIfFeatureUnused(CPUFeature::SSE, "SSE");
|
||||
#endif // __SSE__
|
||||
#ifdef __SSE2__
|
||||
CheckFeatureOrDie(CPUFeature::SSE2, "SSE2");
|
||||
#else
|
||||
WarnIfFeatureUnused(CPUFeature::SSE2, "SSE2");
|
||||
#endif // __SSE2__
|
||||
#ifdef __SSE3__
|
||||
CheckFeatureOrDie(CPUFeature::SSE3, "SSE3");
|
||||
#else
|
||||
WarnIfFeatureUnused(CPUFeature::SSE3, "SSE3");
|
||||
#endif // __SSE3__
|
||||
#ifdef __SSE4_1__
|
||||
CheckFeatureOrDie(CPUFeature::SSE4_1, "SSE4.1");
|
||||
#else
|
||||
WarnIfFeatureUnused(CPUFeature::SSE4_1, "SSE4.1");
|
||||
#endif // __SSE4_1__
|
||||
#ifdef __SSE4_2__
|
||||
CheckFeatureOrDie(CPUFeature::SSE4_2, "SSE4.2");
|
||||
#else
|
||||
WarnIfFeatureUnused(CPUFeature::SSE4_2, "SSE4.2");
|
||||
#endif // __SSE4_2__
|
||||
#ifdef __AVX__
|
||||
CheckFeatureOrDie(CPUFeature::AVX, "AVX");
|
||||
#else
|
||||
WarnIfFeatureUnused(CPUFeature::AVX, "AVX");
|
||||
#endif // __AVX__
|
||||
#ifdef __AVX2__
|
||||
CheckFeatureOrDie(CPUFeature::AVX2, "AVX2");
|
||||
#else
|
||||
WarnIfFeatureUnused(CPUFeature::AVX2, "AVX2");
|
||||
#endif // __AVX2__
|
||||
#ifdef __AVX512F__
|
||||
CheckFeatureOrDie(CPUFeature::AVX512F, "AVX512F");
|
||||
#else
|
||||
WarnIfFeatureUnused(CPUFeature::AVX512F, "AVX512F");
|
||||
#endif // __AVX512F__
|
||||
#ifdef __FMA__
|
||||
CheckFeatureOrDie(CPUFeature::FMA, "FMA");
|
||||
#else
|
||||
WarnIfFeatureUnused(CPUFeature::FMA, "FMA");
|
||||
#endif // __FMA__
|
||||
}
|
||||
|
||||
void CheckFeatureOrDie(CPUFeature feature, const string& feature_name) {
|
||||
if (!TestCPUFeature(feature)) {
|
||||
#ifdef __ANDROID__
|
||||
// Some Android emulators seem to indicate they don't support SSE, so to
|
||||
// avoid crashes when testing, switch this to a warning.
|
||||
LOG(WARNING)
|
||||
#else
|
||||
LOG(FATAL)
|
||||
#endif
|
||||
<< "The TensorFlow library was compiled to use " << feature_name
|
||||
<< " instructions, but these aren't available on your machine.";
|
||||
}
|
||||
}
|
||||
|
||||
void WarnIfFeatureUnused(CPUFeature feature, const string& feature_name) {
|
||||
if (TestCPUFeature(feature)) {
|
||||
LOG(WARNING) << "The TensorFlow library wasn't compiled to use "
|
||||
<< feature_name
|
||||
<< " instructions, but these are available on your machine "
|
||||
"and could speed up CPU computations.";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
CPUFeatureGuard g_cpu_feature_guard_singleton;
|
||||
|
||||
std::once_flag g_cpu_feature_guard_warn_once_flag;
|
||||
|
||||
} // namespace
|
||||
|
||||
void WarnAboutUnusedCPUFeatures() {
|
||||
std::call_once(g_cpu_feature_guard_warn_once_flag, [] {
|
||||
#ifndef __SSE__
|
||||
WarnIfFeatureUnused(CPUFeature::SSE, "SSE");
|
||||
#endif // __SSE__
|
||||
#ifndef __SSE2__
|
||||
WarnIfFeatureUnused(CPUFeature::SSE2, "SSE2");
|
||||
#endif // __SSE2__
|
||||
#ifndef __SSE3__
|
||||
WarnIfFeatureUnused(CPUFeature::SSE3, "SSE3");
|
||||
#endif // __SSE3__
|
||||
#ifndef __SSE4_1__
|
||||
WarnIfFeatureUnused(CPUFeature::SSE4_1, "SSE4.1");
|
||||
#endif // __SSE4_1__
|
||||
#ifndef __SSE4_2__
|
||||
WarnIfFeatureUnused(CPUFeature::SSE4_2, "SSE4.2");
|
||||
#endif // __SSE4_2__
|
||||
#ifndef __AVX__
|
||||
WarnIfFeatureUnused(CPUFeature::AVX, "AVX");
|
||||
#endif // __AVX__
|
||||
#ifndef __AVX2__
|
||||
WarnIfFeatureUnused(CPUFeature::AVX2, "AVX2");
|
||||
#endif // __AVX2__
|
||||
#ifndef __AVX512F__
|
||||
WarnIfFeatureUnused(CPUFeature::AVX512F, "AVX512F");
|
||||
#endif // __AVX512F__
|
||||
#ifndef __FMA__
|
||||
WarnIfFeatureUnused(CPUFeature::FMA, "FMA");
|
||||
#endif // __FMA__
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace port
|
||||
} // namespace tensorflow
|
||||
|
32
tensorflow/core/platform/cpu_feature_guard.h
Normal file
32
tensorflow/core/platform/cpu_feature_guard.h
Normal file
@ -0,0 +1,32 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_PLATFORM_CPU_FEATURE_GUARD_H_
|
||||
#define TENSORFLOW_PLATFORM_CPU_FEATURE_GUARD_H_
|
||||
|
||||
namespace tensorflow {
|
||||
namespace port {
|
||||
|
||||
// Called by the framework when we expect heavy CPU computation and we want to
|
||||
// be sure that the code has been compiled to run optimally on the current
|
||||
// hardware. The first time it's called it will run lightweight checks of
|
||||
// available SIMD acceleration features and log warnings about any that aren't
|
||||
// used.
|
||||
void WarnAboutUnusedCPUFeatures();
|
||||
|
||||
} // namespace port
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_PLATFORM_CPU_FEATURE_GUARD_H_
|
Loading…
Reference in New Issue
Block a user