mirror of
https://gitflic.ru/project/erthink/libmdbx.git
synced 2025-06-13 18:48:52 +00:00
mdbx: рефакторинг с удалением cursor_spill()
и MDBX_NOSPILL
.
This commit is contained in:
parent
fe55f25665
commit
686145ec2e
@ -15,7 +15,7 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD
|
|||||||
12|0000 1000| | | | | | | | |
|
12|0000 1000| | | | | | | | |
|
||||||
13|0000 2000|VALIDATION | | | | | |P_SPILLED | |
|
13|0000 2000|VALIDATION | | | | | |P_SPILLED | |
|
||||||
14|0000 4000|NOSUBDIR | | | | | |P_LOOSE | |
|
14|0000 4000|NOSUBDIR | | | | | |P_LOOSE | |
|
||||||
15|0000 8000| | |DB_VALID |NOSPILL | | |P_FROZEN | |
|
15|0000 8000| | |DB_VALID | | | |P_FROZEN | |
|
||||||
16|0001 0000|SAFE_NOSYNC|TXN_NOSYNC | |RESERVE | |RESERVE | | |
|
16|0001 0000|SAFE_NOSYNC|TXN_NOSYNC | |RESERVE | |RESERVE | | |
|
||||||
17|0002 0000|RDONLY |TXN_RDONLY | |APPEND | |APPEND | | <= |
|
17|0002 0000|RDONLY |TXN_RDONLY | |APPEND | |APPEND | | <= |
|
||||||
18|0004 0000|NOMETASYNC |TXN_NOMETASYNC|CREATE |APPENDDUP | | | | |
|
18|0004 0000|NOMETASYNC |TXN_NOMETASYNC|CREATE |APPENDDUP | | | | |
|
||||||
|
165
src/core.c
165
src/core.c
@ -3210,8 +3210,8 @@ static txnid_t kick_longlived_readers(MDBX_env *env, const txnid_t laggard);
|
|||||||
static pgr_t page_new(MDBX_cursor *mc, const unsigned flags);
|
static pgr_t page_new(MDBX_cursor *mc, const unsigned flags);
|
||||||
static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages);
|
static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages);
|
||||||
static int page_touch(MDBX_cursor *mc);
|
static int page_touch(MDBX_cursor *mc);
|
||||||
static int cursor_touch(MDBX_cursor *mc);
|
static int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key,
|
||||||
static int touch_dbi(MDBX_cursor *mc);
|
const MDBX_val *data);
|
||||||
|
|
||||||
#define MDBX_END_NAMES \
|
#define MDBX_END_NAMES \
|
||||||
{ \
|
{ \
|
||||||
@ -5141,7 +5141,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|||||||
for (size_t i = 1; i <= dl->length; ++i) {
|
for (size_t i = 1; i <= dl->length; ++i) {
|
||||||
MDBX_page *dp = dl->items[i].ptr;
|
MDBX_page *dp = dl->items[i].ptr;
|
||||||
VERBOSE(
|
VERBOSE(
|
||||||
"dirtylist[%zu]: pgno %u, npages %u, flags 0x%04X, age %u, prio %u",
|
"unspillable[%zu]: pgno %u, npages %u, flags 0x%04X, age %u, prio %u",
|
||||||
i, dp->mp_pgno, dpl_npages(dl, i), dp->mp_flags, dpl_age(txn, i),
|
i, dp->mp_pgno, dpl_npages(dl, i), dp->mp_flags, dpl_age(txn, i),
|
||||||
spill_prio(txn, i, reciprocal));
|
spill_prio(txn, i, reciprocal));
|
||||||
}
|
}
|
||||||
@ -5168,39 +5168,6 @@ done:
|
|||||||
: MDBX_TXN_FULL;
|
: MDBX_TXN_FULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cursor_spill(MDBX_cursor *mc, const MDBX_val *key,
|
|
||||||
const MDBX_val *data) {
|
|
||||||
MDBX_txn *txn = mc->mc_txn;
|
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
|
||||||
|
|
||||||
/* Estimate how much space this operation will take: */
|
|
||||||
/* 1) Max b-tree height, reasonable enough with including dups' sub-tree */
|
|
||||||
size_t need = CURSOR_STACK + 3;
|
|
||||||
/* 2) GC/FreeDB for any payload */
|
|
||||||
if (mc->mc_dbi > FREE_DBI) {
|
|
||||||
need += txn->mt_dbs[FREE_DBI].md_depth + 3;
|
|
||||||
/* 3) Named DBs also dirty the main DB */
|
|
||||||
if (mc->mc_dbi > MAIN_DBI)
|
|
||||||
need += txn->mt_dbs[MAIN_DBI].md_depth + 3;
|
|
||||||
}
|
|
||||||
#if xMDBX_DEBUG_SPILLING != 2
|
|
||||||
/* production mode */
|
|
||||||
/* 4) Double the page chain estimation
|
|
||||||
* for extensively splitting, rebalance and merging */
|
|
||||||
need += need;
|
|
||||||
/* 5) Factor the key+data which to be put in */
|
|
||||||
need += bytes2pgno(txn->mt_env, node_size(key, data)) + 1;
|
|
||||||
#else
|
|
||||||
/* debug mode */
|
|
||||||
(void)key;
|
|
||||||
(void)data;
|
|
||||||
mc->mc_txn->mt_env->debug_dirtied_est = ++need;
|
|
||||||
mc->mc_txn->mt_env->debug_dirtied_act = 0;
|
|
||||||
#endif /* xMDBX_DEBUG_SPILLING == 2 */
|
|
||||||
|
|
||||||
return txn_spill(txn, mc, need);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------*/
|
/*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static bool meta_bootid_match(const MDBX_meta *meta) {
|
static bool meta_bootid_match(const MDBX_meta *meta) {
|
||||||
@ -7833,7 +7800,7 @@ __hot static int page_touch(MDBX_cursor *mc) {
|
|||||||
txn->tw.dirtylist->items[n].ptr == mp);
|
txn->tw.dirtylist->items[n].ptr == mp);
|
||||||
txn->tw.dirtylist->items[n].mlru =
|
txn->tw.dirtylist->items[n].mlru =
|
||||||
(txn->tw.dirtylist->items[n].mlru & MDBX_dp_multi_mask) +
|
(txn->tw.dirtylist->items[n].mlru & MDBX_dp_multi_mask) +
|
||||||
txn_lru_turn(txn);
|
(txn->tw.dirtylru & MDBX_dp_lru_mask);
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
if (IS_SUBP(mp)) {
|
if (IS_SUBP(mp)) {
|
||||||
@ -9991,8 +9958,12 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int gcu_touch(gcu_context_t *ctx) {
|
static int gcu_touch(gcu_context_t *ctx) {
|
||||||
|
MDBX_val key, val;
|
||||||
|
key.iov_base = val.iov_base = nullptr;
|
||||||
|
key.iov_len = sizeof(txnid_t);
|
||||||
|
val.iov_len = MDBX_PNL_SIZEOF(ctx->cursor.mc_txn->tw.retired_pages);
|
||||||
ctx->cursor.mc_flags |= C_GCU;
|
ctx->cursor.mc_flags |= C_GCU;
|
||||||
int err = cursor_touch(&ctx->cursor);
|
int err = cursor_touch(&ctx->cursor, &key, &val);
|
||||||
ctx->cursor.mc_flags -= C_GCU;
|
ctx->cursor.mc_flags -= C_GCU;
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@ -10036,18 +10007,7 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx) {
|
|||||||
for_all_before_touch, for_relist, for_split, for_cow,
|
for_all_before_touch, for_relist, for_split, for_cow,
|
||||||
for_tree_before_touch);
|
for_tree_before_touch);
|
||||||
|
|
||||||
int err;
|
int err = gcu_touch(ctx);
|
||||||
if (unlikely(for_relist > 2)) {
|
|
||||||
MDBX_val key, val;
|
|
||||||
key.iov_base = val.iov_base = nullptr;
|
|
||||||
key.iov_len = sizeof(txnid_t);
|
|
||||||
val.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
|
|
||||||
err = cursor_spill(&ctx->cursor, &key, &val);
|
|
||||||
if (unlikely(err != MDBX_SUCCESS))
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = gcu_touch(ctx);
|
|
||||||
TRACE("== after-touch, backlog %zu, err %d", gcu_backlog_size(txn), err);
|
TRACE("== after-touch, backlog %zu, err %d", gcu_backlog_size(txn), err);
|
||||||
|
|
||||||
if (!MDBX_ENABLE_BIGFOOT && unlikely(for_relist > 1) &&
|
if (!MDBX_ENABLE_BIGFOOT && unlikely(for_relist > 1) &&
|
||||||
@ -15517,7 +15477,8 @@ __hot __noinline static int page_search_root(MDBX_cursor *mc,
|
|||||||
|
|
||||||
ready:
|
ready:
|
||||||
if (flags & MDBX_PS_MODIFY) {
|
if (flags & MDBX_PS_MODIFY) {
|
||||||
if (unlikely((rc = page_touch(mc)) != 0))
|
rc = page_touch(mc);
|
||||||
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
return rc;
|
return rc;
|
||||||
mp = mc->mc_pg[mc->mc_top];
|
mp = mc->mc_pg[mc->mc_top];
|
||||||
}
|
}
|
||||||
@ -15731,8 +15692,6 @@ __hot static int page_search(MDBX_cursor *mc, const MDBX_val *key, int flags) {
|
|||||||
mc->mc_pg[0]->mp_flags);
|
mc->mc_pg[0]->mp_flags);
|
||||||
|
|
||||||
if (flags & MDBX_PS_MODIFY) {
|
if (flags & MDBX_PS_MODIFY) {
|
||||||
if (!(*mc->mc_dbistate & DBI_DIRTY) && unlikely(rc = touch_dbi(mc)))
|
|
||||||
return rc;
|
|
||||||
if (unlikely(rc = page_touch(mc)))
|
if (unlikely(rc = page_touch(mc)))
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -16878,21 +16837,61 @@ static int touch_dbi(MDBX_cursor *mc) {
|
|||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Touch all the pages in the cursor stack. Set mc_top.
|
static int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key,
|
||||||
* Makes sure all the pages are writable, before attempting a write operation.
|
const MDBX_val *data) {
|
||||||
* [in] mc The cursor to operate on. */
|
cASSERT(mc, (mc->mc_txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
||||||
static int cursor_touch(MDBX_cursor *mc) {
|
cASSERT(mc, (mc->mc_flags & C_INITIALIZED) || mc->mc_snum == 0);
|
||||||
int rc = MDBX_SUCCESS;
|
cASSERT(mc, cursor_is_tracked(mc));
|
||||||
|
|
||||||
|
txn_lru_turn(mc->mc_txn);
|
||||||
|
|
||||||
if (unlikely((*mc->mc_dbistate & DBI_DIRTY) == 0)) {
|
if (unlikely((*mc->mc_dbistate & DBI_DIRTY) == 0)) {
|
||||||
rc = touch_dbi(mc);
|
int err = touch_dbi(mc);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(err != MDBX_SUCCESS))
|
||||||
return rc;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((mc->mc_flags & C_SUB) == 0) {
|
||||||
|
MDBX_txn *const txn = mc->mc_txn;
|
||||||
|
/* Estimate how much space this operation will take: */
|
||||||
|
/* 1) Max b-tree height, reasonable enough with including dups' sub-tree */
|
||||||
|
size_t need = CURSOR_STACK + 3;
|
||||||
|
/* 2) GC/FreeDB for any payload */
|
||||||
|
if (mc->mc_dbi > FREE_DBI) {
|
||||||
|
need += txn->mt_dbs[FREE_DBI].md_depth + 3;
|
||||||
|
/* 3) Named DBs also dirty the main DB */
|
||||||
|
if (mc->mc_dbi > MAIN_DBI)
|
||||||
|
need += txn->mt_dbs[MAIN_DBI].md_depth + 3;
|
||||||
|
}
|
||||||
|
#if xMDBX_DEBUG_SPILLING != 2
|
||||||
|
/* production mode */
|
||||||
|
/* 4) Double the page chain estimation
|
||||||
|
* for extensively splitting, rebalance and merging */
|
||||||
|
need += need;
|
||||||
|
/* 5) Factor the key+data which to be put in */
|
||||||
|
need += bytes2pgno(txn->mt_env, node_size(key, data)) + 1;
|
||||||
|
#else
|
||||||
|
/* debug mode */
|
||||||
|
(void)key;
|
||||||
|
(void)data;
|
||||||
|
txn->mt_env->debug_dirtied_est = ++need;
|
||||||
|
txn->mt_env->debug_dirtied_act = 0;
|
||||||
|
#endif /* xMDBX_DEBUG_SPILLING == 2 */
|
||||||
|
|
||||||
|
int err = txn_spill(txn, mc, need);
|
||||||
|
if (unlikely(err != MDBX_SUCCESS))
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
int rc = MDBX_SUCCESS;
|
||||||
if (likely(mc->mc_snum)) {
|
if (likely(mc->mc_snum)) {
|
||||||
mc->mc_top = 0;
|
mc->mc_top = 0;
|
||||||
do {
|
do {
|
||||||
rc = page_touch(mc);
|
rc = page_touch(mc);
|
||||||
} while (!rc && ++(mc->mc_top) < mc->mc_snum);
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
|
break;
|
||||||
|
mc->mc_top += 1;
|
||||||
|
} while (mc->mc_top < mc->mc_snum);
|
||||||
mc->mc_top = mc->mc_snum - 1;
|
mc->mc_top = mc->mc_snum - 1;
|
||||||
}
|
}
|
||||||
return rc;
|
return rc;
|
||||||
@ -16952,9 +16951,6 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
|
|||||||
data->iov_base = nullptr;
|
data->iov_base = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
const unsigned nospill = flags & MDBX_NOSPILL;
|
|
||||||
flags -= nospill;
|
|
||||||
|
|
||||||
if (unlikely(mc->mc_txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED)))
|
if (unlikely(mc->mc_txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED)))
|
||||||
return (mc->mc_txn->mt_flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS
|
return (mc->mc_txn->mt_flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS
|
||||||
: MDBX_BAD_TXN;
|
: MDBX_BAD_TXN;
|
||||||
@ -17159,26 +17155,19 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
|
|||||||
}
|
}
|
||||||
|
|
||||||
mc->mc_flags &= ~C_DEL;
|
mc->mc_flags &= ~C_DEL;
|
||||||
|
|
||||||
/* Cursor is positioned, check for room in the dirty list */
|
/* Cursor is positioned, check for room in the dirty list */
|
||||||
if (!nospill) {
|
rdata = data;
|
||||||
rdata = data;
|
if (unlikely(flags & MDBX_MULTIPLE)) {
|
||||||
if (unlikely(flags & MDBX_MULTIPLE)) {
|
rdata = &xdata;
|
||||||
rdata = &xdata;
|
xdata.iov_len = data->iov_len * dcount;
|
||||||
xdata.iov_len = data->iov_len * dcount;
|
|
||||||
}
|
|
||||||
if (unlikely(err = cursor_spill(mc, key, rdata)))
|
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
err = cursor_touch(mc, key, rdata);
|
||||||
|
if (unlikely(err))
|
||||||
|
return err;
|
||||||
|
|
||||||
if (unlikely(rc == MDBX_NO_ROOT)) {
|
if (unlikely(rc == MDBX_NO_ROOT)) {
|
||||||
/* new database, write a root leaf page */
|
/* new database, write a root leaf page */
|
||||||
DEBUG("%s", "allocating new root leaf page");
|
DEBUG("%s", "allocating new root leaf page");
|
||||||
if (unlikely((*mc->mc_dbistate & DBI_DIRTY) == 0)) {
|
|
||||||
err = touch_dbi(mc);
|
|
||||||
if (unlikely(err != MDBX_SUCCESS))
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
pgr_t npr = page_new(mc, P_LEAF);
|
pgr_t npr = page_new(mc, P_LEAF);
|
||||||
if (unlikely(npr.err != MDBX_SUCCESS))
|
if (unlikely(npr.err != MDBX_SUCCESS))
|
||||||
return npr.err;
|
return npr.err;
|
||||||
@ -17205,11 +17194,6 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
|
|||||||
if ((mc->mc_db->md_flags & (MDBX_DUPSORT | MDBX_DUPFIXED)) == MDBX_DUPFIXED)
|
if ((mc->mc_db->md_flags & (MDBX_DUPSORT | MDBX_DUPFIXED)) == MDBX_DUPFIXED)
|
||||||
npr.page->mp_flags |= P_LEAF2;
|
npr.page->mp_flags |= P_LEAF2;
|
||||||
mc->mc_flags |= C_INITIALIZED;
|
mc->mc_flags |= C_INITIALIZED;
|
||||||
} else {
|
|
||||||
/* make sure all cursor pages are writable */
|
|
||||||
err = cursor_touch(mc);
|
|
||||||
if (unlikely(err))
|
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool insert_key, insert_data, do_sub = false;
|
bool insert_key, insert_data, do_sub = false;
|
||||||
@ -17602,9 +17586,8 @@ new_sub:;
|
|||||||
STATIC_ASSERT(
|
STATIC_ASSERT(
|
||||||
(MDBX_NODUPDATA >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE) ==
|
(MDBX_NODUPDATA >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE) ==
|
||||||
MDBX_NOOVERWRITE);
|
MDBX_NOOVERWRITE);
|
||||||
xflags = MDBX_CURRENT | MDBX_NOSPILL |
|
xflags = MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >>
|
||||||
((flags & MDBX_NODUPDATA) >>
|
SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE);
|
||||||
SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE);
|
|
||||||
if ((flags & MDBX_CURRENT) == 0) {
|
if ((flags & MDBX_CURRENT) == 0) {
|
||||||
xflags -= MDBX_CURRENT;
|
xflags -= MDBX_CURRENT;
|
||||||
err = cursor_xinit1(mc, node, mc->mc_pg[mc->mc_top]);
|
err = cursor_xinit1(mc, node, mc->mc_pg[mc->mc_top]);
|
||||||
@ -17718,11 +17701,7 @@ __hot int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
|
|||||||
if (unlikely(mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top])))
|
if (unlikely(mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top])))
|
||||||
return MDBX_NOTFOUND;
|
return MDBX_NOTFOUND;
|
||||||
|
|
||||||
if (likely((flags & MDBX_NOSPILL) == 0) &&
|
rc = cursor_touch(mc, nullptr, nullptr);
|
||||||
unlikely(rc = cursor_spill(mc, NULL, NULL)))
|
|
||||||
return rc;
|
|
||||||
|
|
||||||
rc = cursor_touch(mc);
|
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
@ -17744,7 +17723,7 @@ __hot int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
|
|||||||
} else {
|
} else {
|
||||||
if (!(node_flags(node) & F_SUBDATA))
|
if (!(node_flags(node) & F_SUBDATA))
|
||||||
mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node);
|
mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node);
|
||||||
rc = mdbx_cursor_del(&mc->mc_xcursor->mx_cursor, MDBX_NOSPILL);
|
rc = mdbx_cursor_del(&mc->mc_xcursor->mx_cursor, 0);
|
||||||
if (unlikely(rc))
|
if (unlikely(rc))
|
||||||
return rc;
|
return rc;
|
||||||
/* If sub-DB still has entries, we're done */
|
/* If sub-DB still has entries, we're done */
|
||||||
|
@ -1681,9 +1681,6 @@ typedef struct MDBX_node {
|
|||||||
#define CMP2INT(a, b) (((a) > (b)) - ((b) > (a)))
|
#define CMP2INT(a, b) (((a) > (b)) - ((b) > (a)))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Do not spill pages to disk if txn is getting full, may fail instead */
|
|
||||||
#define MDBX_NOSPILL 0x8000
|
|
||||||
|
|
||||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
|
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline pgno_t
|
||||||
int64pgno(int64_t i64) {
|
int64pgno(int64_t i64) {
|
||||||
if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1))
|
if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1))
|
||||||
|
Loading…
Reference in New Issue
Block a user