diff --git a/tensorflow/lite/experimental/ruy/BUILD b/tensorflow/lite/experimental/ruy/BUILD index b46a67021b5..b8e1cba1c43 100644 --- a/tensorflow/lite/experimental/ruy/BUILD +++ b/tensorflow/lite/experimental/ruy/BUILD @@ -55,6 +55,15 @@ cc_library( deps = [":check_macros"], ) +cc_test( + name = "size_util_test", + srcs = ["size_util_test.cc"], + deps = [ + ":size_util", + "@com_google_googletest//:gtest", + ], +) + cc_library( name = "tune", srcs = [ diff --git a/tensorflow/lite/experimental/ruy/allocator.cc b/tensorflow/lite/experimental/ruy/allocator.cc index 044288847bb..8c4536bdeb1 100644 --- a/tensorflow/lite/experimental/ruy/allocator.cc +++ b/tensorflow/lite/experimental/ruy/allocator.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/lite/experimental/ruy/allocator.h" +#include #include #ifdef _WIN32 @@ -25,7 +26,7 @@ namespace ruy { namespace detail { -void *AlignedAllocator::SystemAlignedAlloc(std::size_t num_bytes) { +void *AlignedAllocator::SystemAlignedAlloc(std::ptrdiff_t num_bytes) { #ifdef _WIN32 return _aligned_malloc(num_bytes, kAlignment); #else diff --git a/tensorflow/lite/experimental/ruy/allocator.h b/tensorflow/lite/experimental/ruy/allocator.h index ef1db4da269..d4652dfdc46 100644 --- a/tensorflow/lite/experimental/ruy/allocator.h +++ b/tensorflow/lite/experimental/ruy/allocator.h @@ -27,7 +27,8 @@ namespace ruy { namespace detail { -inline void* VoidPtrAdd(void* p, std::size_t offset) { +inline void* VoidPtrAdd(void* p, std::ptrdiff_t offset) { + RUY_DCHECK(p); std::uintptr_t addr = reinterpret_cast(p) + offset; return reinterpret_cast(addr); } @@ -62,7 +63,7 @@ class AlignedAllocator { // ARM reference manual mentions that this granule size may be as large // as 2048 bytes, in practice we observe it to be 64 bytes. It can // be queried cheaply, at runtime, from userspace, if needed. - static constexpr std::size_t kAlignment = 64; + static constexpr std::ptrdiff_t kAlignment = 64; void operator=(const AlignedAllocator&) = delete; ~AlignedAllocator() { @@ -70,7 +71,7 @@ class AlignedAllocator { SystemAlignedFree(ptr_); } - void* AllocateAlignedBytes(std::size_t num_bytes) { + void* AllocateAlignedBytes(std::ptrdiff_t num_bytes) { RUY_DCHECK(num_bytes > 0); RUY_DCHECK((num_bytes & (kAlignment - 1)) == 0); if (void* p = AllocateFast(num_bytes)) { @@ -85,7 +86,13 @@ class AlignedAllocator { return; } - std::size_t new_size = round_up_pot(size_ + fallback_blocks_total_size_); + // No rounding-up of the size means linear instead of logarithmic + // bound on the number of allocation in some worst-case calling patterns. + // This is considered worth it because minimizing memory usage is important + // and actual calling patterns in applications that we care about still + // reach the no-further-allocations steady state in a small finite number + // of iterations. + std::ptrdiff_t new_size = size_ + fallback_blocks_total_size_; SystemAlignedFree(ptr_); ptr_ = SystemAlignedAlloc(new_size); size_ = new_size; @@ -98,16 +105,16 @@ class AlignedAllocator { } private: - void* AllocateFast(std::size_t num_bytes) { - if (current_ + num_bytes <= size_) { - void* ret = VoidPtrAdd(ptr_, current_); - current_ += num_bytes; - return ret; + void* AllocateFast(std::ptrdiff_t num_bytes) { + if (current_ + num_bytes > size_) { + return nullptr; } - return nullptr; + void* ret = VoidPtrAdd(ptr_, current_); + current_ += num_bytes; + return ret; } - void* AllocateSlow(std::size_t num_bytes) { + void* AllocateSlow(std::ptrdiff_t num_bytes) { void* p = SystemAlignedAlloc(num_bytes); fallback_blocks_total_size_ += num_bytes; fallback_blocks_.push_back(p); @@ -116,7 +123,7 @@ class AlignedAllocator { // Primitive allocation functions obtaining aligned memory from the // operating system. - void* SystemAlignedAlloc(std::size_t num_bytes); + void* SystemAlignedAlloc(std::ptrdiff_t num_bytes); void SystemAlignedFree(void* ptr); // Theory of operation: @@ -135,10 +142,10 @@ class AlignedAllocator { // bump-ptr allocator's buffer so that the next sequence of allocations // will hopefully not need any fallback blocks. void* ptr_ = nullptr; - std::size_t current_ = 0; - std::size_t size_ = 0; + std::ptrdiff_t current_ = 0; + std::ptrdiff_t size_ = 0; std::vector fallback_blocks_; - std::size_t fallback_blocks_total_size_ = 0; + std::ptrdiff_t fallback_blocks_total_size_ = 0; }; } // namespace detail @@ -147,7 +154,7 @@ class AlignedAllocator { // typed buffer. class Allocator { public: - void* AllocateBytes(std::size_t num_bytes) { + void* AllocateBytes(std::ptrdiff_t num_bytes) { if (num_bytes == 0) { return nullptr; } @@ -155,7 +162,7 @@ class Allocator { round_up_pot(num_bytes, detail::AlignedAllocator::kAlignment)); } template - void Allocate(std::size_t count, Pointer* out) { + void Allocate(std::ptrdiff_t count, Pointer* out) { using T = typename std::pointer_traits::element_type; *out = static_cast(AllocateBytes(count * sizeof(T))); } diff --git a/tensorflow/lite/experimental/ruy/size_util.h b/tensorflow/lite/experimental/ruy/size_util.h index 78ff90f62e1..437523b04ac 100644 --- a/tensorflow/lite/experimental/ruy/size_util.h +++ b/tensorflow/lite/experimental/ruy/size_util.h @@ -16,6 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_SIZE_UTIL_H_ #define TENSORFLOW_LITE_EXPERIMENTAL_RUY_SIZE_UTIL_H_ +#include + #include "tensorflow/lite/experimental/ruy/check_macros.h" #ifdef _WIN32 @@ -24,40 +26,64 @@ limitations under the License. namespace ruy { -inline int floor_log2(int n) { +template +inline Integer floor_log2(Integer n) { + static_assert(std::is_integral::value, ""); + static_assert(std::is_signed::value, ""); + static_assert(sizeof(Integer) == 4 || sizeof(Integer) == 8, ""); + RUY_DCHECK_GE(n, 1); #ifdef _WIN32 unsigned long result; - _BitScanReverse(&result, n); + if (sizeof(Integer) == 4) { + _BitScanReverse(&result, n); + } else { + _BitScanReverse64(&result, n); + } return result; #else - return 31 - __builtin_clz(n); + if (sizeof(Integer) == 4) { + return 31 - __builtin_clz(n); + } else { + return 63 - __builtin_clzll(n); + } #endif } -inline int ceil_log2(int n) { +template +Integer ceil_log2(Integer n) { RUY_DCHECK_GE(n, 1); return n == 1 ? 0 : floor_log2(n - 1) + 1; } -inline bool is_pot(int value) { +template +bool is_pot(Integer value) { return (value > 0) && ((value & (value - 1)) == 0); } -inline int round_down_pot(int value) { return 1 << floor_log2(value); } +template +Integer round_down_pot(Integer value) { + return static_cast(1) << floor_log2(value); +} -inline int round_up_pot(int value) { return 1 << ceil_log2(value); } +template +Integer round_up_pot(Integer value) { + return static_cast(1) << ceil_log2(value); +} -inline int round_down_pot(int value, int modulo) { +template +Integer round_down_pot(Integer value, Modulo modulo) { RUY_DCHECK_EQ(modulo & (modulo - 1), 0); return value & ~(modulo - 1); } -inline int round_up_pot(int value, int modulo) { +template +Integer round_up_pot(Integer value, Modulo modulo) { return round_down_pot(value + modulo - 1, modulo); } -inline int clamp(int x, int lo, int hi) { +template +Integer clamp(Integer x, Integer lo, Integer hi) { if (x < lo) { return lo; } else if (x > hi) { diff --git a/tensorflow/lite/experimental/ruy/size_util_test.cc b/tensorflow/lite/experimental/ruy/size_util_test.cc new file mode 100644 index 00000000000..bd97e1aae0c --- /dev/null +++ b/tensorflow/lite/experimental/ruy/size_util_test.cc @@ -0,0 +1,100 @@ +/* Copyright 2019 Google LLC. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/experimental/ruy/size_util.h" + +#include +#include +#include + +#include + +namespace ruy { +namespace { + +template +void SizeUtilTestValue(Integer value) { + if (value == 0) { + return; + } + + EXPECT_LE(0, floor_log2(value)); + EXPECT_LE(floor_log2(value), ceil_log2(value)); + EXPECT_LE(ceil_log2(value), 8 * sizeof(Integer)); + + if (is_pot(value)) { + EXPECT_EQ(floor_log2(value), ceil_log2(value)); + } else { + EXPECT_EQ(floor_log2(value) + 1, ceil_log2(value)); + } + EXPECT_EQ(value >> floor_log2(value), 1); + EXPECT_EQ(round_down_pot(value), static_cast(1) + << floor_log2(value)); + EXPECT_LE(round_down_pot(value), value); + EXPECT_GE(round_down_pot(value), value >> 1); + EXPECT_TRUE(is_pot(round_down_pot(value))); + + if (ceil_log2(value) < 8 * sizeof(Integer) - 1) { + EXPECT_EQ(value >> ceil_log2(value), is_pot(value) ? 1 : 0); + EXPECT_EQ(round_up_pot(value), static_cast(1) << ceil_log2(value)); + EXPECT_GE(round_up_pot(value), value); + EXPECT_LE(round_up_pot(value) >> 1, value); + EXPECT_TRUE(is_pot(round_up_pot(value))); + } + + for (std::uint8_t modulo : {1, 2, 8, 32, 128}) { + EXPECT_GE(value, round_down_pot(value, modulo)); + EXPECT_EQ(round_down_pot(value, modulo) % modulo, 0); + + if (value <= std::numeric_limits::max() - modulo) { + EXPECT_LE(value, round_up_pot(value, modulo)); + EXPECT_EQ(round_up_pot(value, modulo) % modulo, 0); + } + } +} + +template +void SizeUtilTest() { + for (int exponent = 0; exponent < 8 * sizeof(Integer) - 1; exponent++) { + const Integer pot = static_cast(1) << exponent; + SizeUtilTestValue(pot - 1); + SizeUtilTestValue(pot); + SizeUtilTestValue(pot + 1); + SizeUtilTestValue(pot + 12); + SizeUtilTestValue(pot + 123); + } + SizeUtilTestValue(std::numeric_limits::max() - 1); + SizeUtilTestValue(std::numeric_limits::max()); +} + +TEST(SizeUtilTest, Int) { SizeUtilTest(); } + +TEST(SizeUtilTest, Long) { SizeUtilTest(); } // NOLINT + +TEST(SizeUtilTest, LongLong) { SizeUtilTest(); } // NOLINT + +TEST(SizeUtilTest, Int32) { SizeUtilTest(); } + +TEST(SizeUtilTest, Int64) { SizeUtilTest(); } + +TEST(SizeUtilTest, Ptrdiff) { SizeUtilTest(); } + +} // namespace +} // namespace ruy + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}