From 543e52730dd8411928e858c5d89d4663bc40740a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 19 Nov 2022 18:17:48 +0300 Subject: [PATCH] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1=D0=BE?= =?UTF-8?q?=D1=82=D0=BA=D0=B0=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5=D1=80=D0=B6?= =?UTF-8?q?=D0=BA=D0=B8=20=D0=B0=D0=B2=D1=82=D0=BE-=D1=81=D0=BB=D0=B8?= =?UTF-8?q?=D1=8F=D0=BD=D0=B8=D1=8F=20=D0=B7=D0=B0=D0=BF=D0=B8=D1=81=D0=B5?= =?UTF-8?q?=D0=B9=20GC=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20`page=5Fal?= =?UTF-8?q?loc=5Fslowpath()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 110 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 72 insertions(+), 38 deletions(-) diff --git a/src/core.c b/src/core.c index 16a5fe6c..625c291a 100644 --- a/src/core.c +++ b/src/core.c @@ -6595,7 +6595,8 @@ static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, #define MDBX_ALLOC_RESERVE 16 #define MDBX_ALLOC_BACKLOG 32 #define MDBX_ALLOC_ALL (MDBX_ALLOC_GC | MDBX_ALLOC_NEW) -#define MDBX_ALLOC_LIFO 128 +#define MDBX_ALLOC_SHOULD_SCAN 64 /* internal state */ +#define MDBX_ALLOC_LIFO 128 /* internal state */ static __inline bool is_gc_usable(const MDBX_txn *txn) { /* If txn is updating the GC, then the retired-list cannot play catch-up with @@ -6692,13 +6693,18 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num, if (unlikely(!is_gc_usable(txn))) goto no_gc; - eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO)) == 0); + eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO | + MDBX_ALLOC_SHOULD_SCAN)) == 0); flags += (env->me_flags & MDBX_LIFORECLAIM) ? MDBX_ALLOC_LIFO : 0; - const unsigned coalesce_threshold = env->me_maxgc_ov1page >> 2; - if (txn->mt_dbs[FREE_DBI].md_branch_pages && - MDBX_PNL_GETSIZE(txn->tw.relist) < coalesce_threshold && num) - flags += MDBX_ALLOC_COALESCE; + if (/* Не коагулируем записи при подготовке резерва для обновления GC. + * Иначе попытка увеличить резерв может приводить к необходимости ещё + * большего резерва из-за увеличения списка переработанных страниц. */ + flags < MDBX_ALLOC_COALESCE) { + if (txn->mt_dbs[FREE_DBI].md_branch_pages && + re_len < env->me_maxgc_ov1page / 2) + flags += MDBX_ALLOC_COALESCE; + } MDBX_cursor recur; ret.err = gc_cursor_init(&recur, txn); @@ -6718,7 +6724,6 @@ retry_gc_have_oldest: const txnid_t detent = oldest + 1; txnid_t id = 0; - bool should_scan = false; MDBX_cursor_op op = MDBX_FIRST; if (flags & MDBX_ALLOC_LIFO) { if (!txn->tw.lifo_reclaimed) { @@ -6790,24 +6795,54 @@ next_gc:; ret.err = MDBX_CORRUPTED; goto fail; } + const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl); - if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE( - txn->tw.relist) >= env->me_options.rp_augment_limit) && - ((/* not a slot-request from gc-update */ - (flags & MDBX_ALLOC_SLOT) == 0 && - /* have enough unallocated space */ txn->mt_geo.upper >= - txn->mt_next_pgno + num) || - gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= MDBX_PGL_LIMIT)) { - /* Stop reclaiming to avoid large/overflow the page list. - * This is a rare case while search for a continuously multi-page region - * in a large database. - * https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/123 - */ - NOTICE("stop reclaiming to avoid PNL overflow: %zu (current) + %zu " - "(chunk) -> %zu", - MDBX_PNL_GETSIZE(txn->tw.relist), gc_len, - gc_len + MDBX_PNL_GETSIZE(txn->tw.relist)); - goto depleted_gc; + TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len, + gc_len + re_len); + + eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist)); + if (unlikely(gc_len + re_len >= env->me_maxgc_ov1page)) { + /* Don't try to coalesce too much. */ + if (flags & MDBX_ALLOC_SHOULD_SCAN) { + eASSERT(env, flags & MDBX_ALLOC_COALESCE); + eASSERT(env, num > 0); +#if MDBX_ENABLE_PROFGC + env->me_lck->mti_pgop_stat.gc_prof.coalescences += 1; +#endif /* MDBX_ENABLE_PROFGC */ + TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold"); + if (re_len >= num) { + eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno && + MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno); + range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len); + pgno = *range; + if (num == 1) + goto done; + range = scan4seq(range, re_len, num - 1); + eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1)); + if (likely(range)) { + pgno = *range; + goto done; + } + } + flags -= MDBX_ALLOC_COALESCE | MDBX_ALLOC_SHOULD_SCAN; + } + if (unlikely(/* list is too long already */ re_len >= + env->me_options.rp_augment_limit) && + ((/* not a slot-request from gc-update */ + (flags & MDBX_ALLOC_SLOT) == 0 && + /* have enough unallocated space */ txn->mt_geo.upper >= + txn->mt_next_pgno + num) || + gc_len + re_len >= MDBX_PGL_LIMIT)) { + /* Stop reclaiming to avoid large/overflow the page list. + * This is a rare case while search for a continuously multi-page region + * in a large database. + * https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/123 + */ + NOTICE("stop reclaiming to avoid PNL overflow: %zu (current) + %zu " + "(chunk) -> %zu", + re_len, gc_len, gc_len + re_len); + goto depleted_gc; + } } /* Remember ID of readed GC record */ @@ -6834,7 +6869,7 @@ next_gc:; /* Merge in descending sorted order */ re_len = pnl_merge(txn->tw.relist, gc_pnl); - should_scan = true; + flags |= MDBX_ALLOC_SHOULD_SCAN; if (AUDIT_ENABLED()) { if (unlikely(!pnl_check(txn->tw.relist, txn->mt_next_pgno))) { ret.err = MDBX_CORRUPTED; @@ -6860,26 +6895,22 @@ next_gc:; /* Done for a kick-reclaim mode, actually no page needed */ if (unlikely(flags & MDBX_ALLOC_SLOT)) { eASSERT(env, ret.err == MDBX_SUCCESS); + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id, + re_len); goto early_exit; } /* TODO: delete reclaimed records */ - /* Don't try to coalesce too much. */ eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT); if (flags & MDBX_ALLOC_COALESCE) { - if (re_len /* current size */ < coalesce_threshold) { -#if MDBX_ENABLE_PROFGC - env->me_lck->mti_pgop_stat.gc_prof.coalescences += 1; -#endif /* MDBX_ENABLE_PROFGC */ - goto next_gc; - } - TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold"); - flags &= ~MDBX_ALLOC_COALESCE; + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id, + re_len); + goto next_gc; } scan: - eASSERT(env, should_scan); + eASSERT(env, flags & MDBX_ALLOC_SHOULD_SCAN); if (re_len >= num) { eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno && MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno); @@ -6894,13 +6925,16 @@ scan: goto done; } } - should_scan = false; - if (ret.err == MDBX_SUCCESS) + flags -= MDBX_ALLOC_SHOULD_SCAN; + if (ret.err == MDBX_SUCCESS) { + TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id, + re_len); goto next_gc; + } depleted_gc: ret.err = MDBX_NOTFOUND; - if (should_scan) + if (flags & MDBX_ALLOC_SHOULD_SCAN) goto scan; //-------------------------------------------------------------------------