Implement memory prefetching for PresizedCuckooMap

PiperOrigin-RevId: 227920727
This commit is contained in:
A. Unique TensorFlower 2019-01-04 15:07:06 -08:00 committed by TensorFlower Gardener
parent 58edbcb97b
commit 07777005d9
3 changed files with 59 additions and 1 deletions

View File

@ -3813,6 +3813,7 @@ tf_cc_tests(
"//tensorflow/core/kernels:ops_util",
"//third_party/eigen3",
"@com_google_absl//absl/base",
"@com_google_absl//absl/time",
],
)

View File

@ -20,6 +20,7 @@ limitations under the License.
#include <vector>
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/platform/macros.h"
#include "tensorflow/core/platform/prefetch.h"
namespace tensorflow {
@ -132,6 +133,15 @@ class PresizedCuckooMap {
FindInBucket(k, fast_map_to_buckets(h2(tk)), out);
}
// Prefetch memory associated with the key k into cache levels specified by
// hint.
template <port::PrefetchHint hint = port::PREFETCH_HINT_T0>
void PrefetchKey(const key_type k) const {
const uint64 tk = key_transform(k);
port::prefetch<hint>(&buckets_[fast_map_to_buckets(tk)].keys);
port::prefetch<hint>(&buckets_[fast_map_to_buckets(h2(tk))].keys);
}
int64 MemoryUsed() const {
return sizeof(PresizedCuckooMap<value>) + sizeof(CuckooPathQueue);
}

View File

@ -13,12 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/util/presized_cuckoo_map.h"
#include <array>
#include "absl/time/clock.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/fingerprint.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/platform/test_benchmark.h"
#include "tensorflow/core/util/presized_cuckoo_map.h"
namespace tensorflow {
namespace {
@ -50,6 +52,51 @@ TEST(PresizedCuckooMapTest, Basic) {
EXPECT_EQ(out, 2);
}
TEST(PresizedCuckooMapTest, Prefetch) {
{
PresizedCuckooMap<int64> pscm(2);
EXPECT_TRUE(pscm.InsertUnique(1, 2));
// Works for both present and absent keys.
pscm.PrefetchKey(1);
pscm.PrefetchKey(2);
}
// Do not run in debug mode, when prefetch is not implemented, or when
// sanitizers are enabled.
#if defined(NDEBUG) && defined(__GNUC__) && !defined(ADDRESS_SANITIZER) && \
!defined(MEMORY_SANITIZER) && !defined(THREAD_SANITIZER) && \
!defined(UNDEFINED_BEHAVIOR_SANITIZER)
const auto now = [] { return absl::Now(); };
// Make size enough to not fit in L2 cache (16.7 Mb)
static constexpr int size = 1 << 22;
PresizedCuckooMap<int64> pscm(size);
for (int i = 0; i < size; ++i) {
pscm.InsertUnique(i, i);
}
absl::Duration no_prefetch, prefetch;
int64 out;
for (int iter = 0; iter < 10; ++iter) {
auto time = now();
for (int i = 0; i < size; ++i) {
testing::DoNotOptimize(pscm.Find(i, &out));
}
no_prefetch += now() - time;
time = now();
for (int i = 0; i < size; ++i) {
pscm.PrefetchKey(i + 20);
testing::DoNotOptimize(pscm.Find(i, &out));
}
prefetch += now() - time;
}
// no_prefetch is at least 30% slower.
EXPECT_GE(1.0 * no_prefetch / prefetch, 1.3);
#endif
}
TEST(PresizedCuckooMapTest, TooManyItems) {
static constexpr int kTableSize = 1000;
PresizedCuckooMap<int> pscm(kTableSize);