Implement memory prefetching for PresizedCuckooMap
PiperOrigin-RevId: 227920727
This commit is contained in:
parent
58edbcb97b
commit
07777005d9
@ -3813,6 +3813,7 @@ tf_cc_tests(
|
|||||||
"//tensorflow/core/kernels:ops_util",
|
"//tensorflow/core/kernels:ops_util",
|
||||||
"//third_party/eigen3",
|
"//third_party/eigen3",
|
||||||
"@com_google_absl//absl/base",
|
"@com_google_absl//absl/base",
|
||||||
|
"@com_google_absl//absl/time",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include "tensorflow/core/framework/types.h"
|
#include "tensorflow/core/framework/types.h"
|
||||||
#include "tensorflow/core/platform/macros.h"
|
#include "tensorflow/core/platform/macros.h"
|
||||||
|
#include "tensorflow/core/platform/prefetch.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
@ -132,6 +133,15 @@ class PresizedCuckooMap {
|
|||||||
FindInBucket(k, fast_map_to_buckets(h2(tk)), out);
|
FindInBucket(k, fast_map_to_buckets(h2(tk)), out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Prefetch memory associated with the key k into cache levels specified by
|
||||||
|
// hint.
|
||||||
|
template <port::PrefetchHint hint = port::PREFETCH_HINT_T0>
|
||||||
|
void PrefetchKey(const key_type k) const {
|
||||||
|
const uint64 tk = key_transform(k);
|
||||||
|
port::prefetch<hint>(&buckets_[fast_map_to_buckets(tk)].keys);
|
||||||
|
port::prefetch<hint>(&buckets_[fast_map_to_buckets(h2(tk))].keys);
|
||||||
|
}
|
||||||
|
|
||||||
int64 MemoryUsed() const {
|
int64 MemoryUsed() const {
|
||||||
return sizeof(PresizedCuckooMap<value>) + sizeof(CuckooPathQueue);
|
return sizeof(PresizedCuckooMap<value>) + sizeof(CuckooPathQueue);
|
||||||
}
|
}
|
||||||
|
@ -13,12 +13,14 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/core/util/presized_cuckoo_map.h"
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
|
#include "absl/time/clock.h"
|
||||||
#include "tensorflow/core/platform/env.h"
|
#include "tensorflow/core/platform/env.h"
|
||||||
#include "tensorflow/core/platform/fingerprint.h"
|
#include "tensorflow/core/platform/fingerprint.h"
|
||||||
#include "tensorflow/core/platform/test.h"
|
#include "tensorflow/core/platform/test.h"
|
||||||
#include "tensorflow/core/platform/test_benchmark.h"
|
#include "tensorflow/core/platform/test_benchmark.h"
|
||||||
|
#include "tensorflow/core/util/presized_cuckoo_map.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace {
|
namespace {
|
||||||
@ -50,6 +52,51 @@ TEST(PresizedCuckooMapTest, Basic) {
|
|||||||
EXPECT_EQ(out, 2);
|
EXPECT_EQ(out, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(PresizedCuckooMapTest, Prefetch) {
|
||||||
|
{
|
||||||
|
PresizedCuckooMap<int64> pscm(2);
|
||||||
|
EXPECT_TRUE(pscm.InsertUnique(1, 2));
|
||||||
|
// Works for both present and absent keys.
|
||||||
|
pscm.PrefetchKey(1);
|
||||||
|
pscm.PrefetchKey(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do not run in debug mode, when prefetch is not implemented, or when
|
||||||
|
// sanitizers are enabled.
|
||||||
|
#if defined(NDEBUG) && defined(__GNUC__) && !defined(ADDRESS_SANITIZER) && \
|
||||||
|
!defined(MEMORY_SANITIZER) && !defined(THREAD_SANITIZER) && \
|
||||||
|
!defined(UNDEFINED_BEHAVIOR_SANITIZER)
|
||||||
|
const auto now = [] { return absl::Now(); };
|
||||||
|
|
||||||
|
// Make size enough to not fit in L2 cache (16.7 Mb)
|
||||||
|
static constexpr int size = 1 << 22;
|
||||||
|
PresizedCuckooMap<int64> pscm(size);
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
pscm.InsertUnique(i, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Duration no_prefetch, prefetch;
|
||||||
|
int64 out;
|
||||||
|
for (int iter = 0; iter < 10; ++iter) {
|
||||||
|
auto time = now();
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
testing::DoNotOptimize(pscm.Find(i, &out));
|
||||||
|
}
|
||||||
|
no_prefetch += now() - time;
|
||||||
|
|
||||||
|
time = now();
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
pscm.PrefetchKey(i + 20);
|
||||||
|
testing::DoNotOptimize(pscm.Find(i, &out));
|
||||||
|
}
|
||||||
|
prefetch += now() - time;
|
||||||
|
}
|
||||||
|
|
||||||
|
// no_prefetch is at least 30% slower.
|
||||||
|
EXPECT_GE(1.0 * no_prefetch / prefetch, 1.3);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
TEST(PresizedCuckooMapTest, TooManyItems) {
|
TEST(PresizedCuckooMapTest, TooManyItems) {
|
||||||
static constexpr int kTableSize = 1000;
|
static constexpr int kTableSize = 1000;
|
||||||
PresizedCuckooMap<int> pscm(kTableSize);
|
PresizedCuckooMap<int> pscm(kTableSize);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user