Implement memory prefetching for PresizedCuckooMap
PiperOrigin-RevId: 227920727
This commit is contained in:
parent
58edbcb97b
commit
07777005d9
tensorflow/core
@ -3813,6 +3813,7 @@ tf_cc_tests(
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
"//third_party/eigen3",
|
||||
"@com_google_absl//absl/base",
|
||||
"@com_google_absl//absl/time",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include <vector>
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
#include "tensorflow/core/platform/prefetch.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
@ -132,6 +133,15 @@ class PresizedCuckooMap {
|
||||
FindInBucket(k, fast_map_to_buckets(h2(tk)), out);
|
||||
}
|
||||
|
||||
// Prefetch memory associated with the key k into cache levels specified by
|
||||
// hint.
|
||||
template <port::PrefetchHint hint = port::PREFETCH_HINT_T0>
|
||||
void PrefetchKey(const key_type k) const {
|
||||
const uint64 tk = key_transform(k);
|
||||
port::prefetch<hint>(&buckets_[fast_map_to_buckets(tk)].keys);
|
||||
port::prefetch<hint>(&buckets_[fast_map_to_buckets(h2(tk))].keys);
|
||||
}
|
||||
|
||||
int64 MemoryUsed() const {
|
||||
return sizeof(PresizedCuckooMap<value>) + sizeof(CuckooPathQueue);
|
||||
}
|
||||
|
@ -13,12 +13,14 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/util/presized_cuckoo_map.h"
|
||||
#include <array>
|
||||
|
||||
#include "absl/time/clock.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/fingerprint.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/platform/test_benchmark.h"
|
||||
#include "tensorflow/core/util/presized_cuckoo_map.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
@ -50,6 +52,51 @@ TEST(PresizedCuckooMapTest, Basic) {
|
||||
EXPECT_EQ(out, 2);
|
||||
}
|
||||
|
||||
TEST(PresizedCuckooMapTest, Prefetch) {
|
||||
{
|
||||
PresizedCuckooMap<int64> pscm(2);
|
||||
EXPECT_TRUE(pscm.InsertUnique(1, 2));
|
||||
// Works for both present and absent keys.
|
||||
pscm.PrefetchKey(1);
|
||||
pscm.PrefetchKey(2);
|
||||
}
|
||||
|
||||
// Do not run in debug mode, when prefetch is not implemented, or when
|
||||
// sanitizers are enabled.
|
||||
#if defined(NDEBUG) && defined(__GNUC__) && !defined(ADDRESS_SANITIZER) && \
|
||||
!defined(MEMORY_SANITIZER) && !defined(THREAD_SANITIZER) && \
|
||||
!defined(UNDEFINED_BEHAVIOR_SANITIZER)
|
||||
const auto now = [] { return absl::Now(); };
|
||||
|
||||
// Make size enough to not fit in L2 cache (16.7 Mb)
|
||||
static constexpr int size = 1 << 22;
|
||||
PresizedCuckooMap<int64> pscm(size);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
pscm.InsertUnique(i, i);
|
||||
}
|
||||
|
||||
absl::Duration no_prefetch, prefetch;
|
||||
int64 out;
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
auto time = now();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
testing::DoNotOptimize(pscm.Find(i, &out));
|
||||
}
|
||||
no_prefetch += now() - time;
|
||||
|
||||
time = now();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
pscm.PrefetchKey(i + 20);
|
||||
testing::DoNotOptimize(pscm.Find(i, &out));
|
||||
}
|
||||
prefetch += now() - time;
|
||||
}
|
||||
|
||||
// no_prefetch is at least 30% slower.
|
||||
EXPECT_GE(1.0 * no_prefetch / prefetch, 1.3);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(PresizedCuckooMapTest, TooManyItems) {
|
||||
static constexpr int kTableSize = 1000;
|
||||
PresizedCuckooMap<int> pscm(kTableSize);
|
||||
|
Loading…
Reference in New Issue
Block a user