Step 1 of re-namespace'ing StreamExecutor into ::stream_executor. This moves everything inside of stream_executor/..., and leaves a namespace alias into ::perftools::gputools. The next steps will clean up users to use the new namespace. This is mostly a mechanical change, but it also includes a bunch of non-mechanical changes that ideally would be split out into separate patches. Unfortunately they all sort of need to be shoved in here for various reasons: - forward declarations need to be in the same namespace as the actual types, so we need to change all forward declarations of StreamExecutor types in this one patch. - Uses of these forward declarations need to be changed to the new namespace (or otherwise we need to add a namespace alias to the relevant header, but this is pretty ugly). - Various initialization code needs to live in StreamExecutor's "real" namespace, so all this needs to be changed. PiperOrigin-RevId: 193256128
102 lines
4.2 KiB
C++
102 lines
4.2 KiB
C++
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
|
|
#ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_MATHUTIL_H_
|
|
#define TENSORFLOW_STREAM_EXECUTOR_LIB_MATHUTIL_H_
|
|
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <limits>
|
|
#include <type_traits>
|
|
#include <vector>
|
|
|
|
#include "tensorflow/stream_executor/platform/logging.h"
|
|
#include "tensorflow/stream_executor/platform/port.h"
|
|
|
|
namespace stream_executor {
|
|
namespace port {
|
|
|
|
class MathUtil {
|
|
public:
|
|
template <typename IntegralType>
|
|
static IntegralType CeilOfRatio(IntegralType numerator,
|
|
IntegralType denominator) {
|
|
return CeilOrFloorOfRatio<IntegralType, true>(numerator, denominator);
|
|
}
|
|
template <typename IntegralType>
|
|
static IntegralType FloorOfRatio(IntegralType numerator,
|
|
IntegralType denominator) {
|
|
return CeilOrFloorOfRatio<IntegralType, false>(numerator, denominator);
|
|
}
|
|
template <typename IntegralType, bool ceil>
|
|
static IntegralType CeilOrFloorOfRatio(IntegralType numerator,
|
|
IntegralType denominator);
|
|
};
|
|
|
|
// ---- CeilOrFloorOfRatio ----
|
|
// This is a branching-free, cast-to-double-free implementation.
|
|
//
|
|
// Casting to double is in general incorrect because of loss of precision
|
|
// when casting an int64 into a double.
|
|
//
|
|
// There's a bunch of 'recipes' to compute a integer ceil (or floor) on the web,
|
|
// and most of them are incorrect.
|
|
template<typename IntegralType, bool ceil>
|
|
IntegralType MathUtil::CeilOrFloorOfRatio(IntegralType numerator,
|
|
IntegralType denominator) {
|
|
static_assert(std::is_integral<IntegralType>::value,
|
|
"CeilOfRatio_is_only_defined_for_integral_types");
|
|
assert(denominator != 0);
|
|
// Dividing the smallest signed integer by -1 is not supported: it would
|
|
// SIGFPE
|
|
assert(!std::is_signed<IntegralType>::value ||
|
|
numerator != std::numeric_limits<IntegralType>::min() ||
|
|
denominator != -1);
|
|
|
|
const IntegralType rounded_toward_zero = numerator / denominator;
|
|
const IntegralType intermediate_product = rounded_toward_zero * denominator;
|
|
|
|
if (ceil) { // Compile-time condition: not an actual branching
|
|
// When rounded_toward_zero is negative, then an adjustment is never needed:
|
|
// the real ratio is negative, and so rounded toward zero is the ceil.
|
|
// When rounded_toward_zero is non-negative, an adjustment is needed if the
|
|
// sign of the difference numerator - intermediate_product is the same as
|
|
// the sign of the denominator.
|
|
//
|
|
// Using a bool and then a static_cast to IntegralType is not strictly
|
|
// necessary, but it makes the code clear, and anyway the compiler should
|
|
// get rid of it.
|
|
const bool needs_adjustment = (rounded_toward_zero >= 0) &&
|
|
((denominator > 0 && numerator > intermediate_product) ||
|
|
(denominator < 0 && numerator < intermediate_product));
|
|
const IntegralType adjustment = static_cast<IntegralType>(needs_adjustment);
|
|
const IntegralType ceil_of_ratio = rounded_toward_zero + adjustment;
|
|
return ceil_of_ratio;
|
|
} else {
|
|
// Floor case: symmetrical to the previous one
|
|
const bool needs_adjustment = (rounded_toward_zero <= 0) &&
|
|
((denominator > 0 && numerator < intermediate_product) ||
|
|
(denominator < 0 && numerator > intermediate_product));
|
|
const IntegralType adjustment = static_cast<IntegralType>(needs_adjustment);
|
|
const IntegralType floor_of_ratio = rounded_toward_zero - adjustment;
|
|
return floor_of_ratio;
|
|
}
|
|
}
|
|
|
|
} // namespace port
|
|
} // namespace stream_executor
|
|
|
|
#endif // TENSORFLOW_STREAM_EXECUTOR_LIB_MATHUTIL_H_
|