From b7d27c1b36b8dfc9ee389401dc2ba19389dc98dc Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Tue, 22 Oct 2019 22:31:06 +0300 Subject: [PATCH] mdbx: rework internals for using C99 `flexible array member` (preparation for -fsanitize=undefined). Change-Id: I0d1836d6108ef379c43231720ef703ff69fc426d --- src/elements/core.c | 373 +++++++++++++++++++-------------------- src/elements/internals.h | 33 ++-- 2 files changed, 193 insertions(+), 213 deletions(-) diff --git a/src/elements/core.c b/src/elements/core.c index 92268a6c..d44deb15 100644 --- a/src/elements/core.c +++ b/src/elements/core.c @@ -129,6 +129,14 @@ static __pure_function __inline void *page_data(MDBX_page *mp) { return mp->mp_ptrs; } +static __pure_function __inline const MDBX_page *data_page(const void *data) { + return container_of(data, MDBX_page, mp_ptrs); +} + +static __pure_function __inline MDBX_meta *page_meta(MDBX_page *mp) { + return (MDBX_meta *)page_data(mp); +} + /* Number of nodes on a page */ static __pure_function __inline unsigned page_numkeys(const MDBX_page *mp) { return mp->mp_lower >> 1; @@ -2158,7 +2166,7 @@ static __maybe_unused void mdbx_page_list(MDBX_page *mp) { return; case P_META: mdbx_verbose("Meta-page %" PRIaPGNO " txnid %" PRIu64 "\n", pgno, - ((MDBX_meta *)page_data(mp))->mm_txnid_a.inconsistent); + page_meta(mp)->mm_txnid_a.inconsistent); return; default: mdbx_verbose("Bad page %" PRIaPGNO " flags 0x%X\n", pgno, mp->mp_flags); @@ -2621,8 +2629,10 @@ static __hot int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp) { mdbx_kill_page(txn->mt_env, mp, pgno, npages); VALGRIND_MAKE_MEM_UNDEFINED(mp, PAGEHDRSZ); } - VALGRIND_MAKE_MEM_NOACCESS(&mp->mp_data, txn->mt_env->me_psize - PAGEHDRSZ); - ASAN_POISON_MEMORY_REGION(&mp->mp_data, txn->mt_env->me_psize - PAGEHDRSZ); + VALGRIND_MAKE_MEM_NOACCESS(page_data(mp), + pgno2bytes(txn->mt_env, npages) - PAGEHDRSZ); + ASAN_POISON_MEMORY_REGION(page_data(mp), + pgno2bytes(txn->mt_env, npages) - PAGEHDRSZ); if (unlikely(npages > 1 /* overflow pages doesn't comes to the loose-list */)) { @@ -2919,8 +2929,7 @@ bailout: /*----------------------------------------------------------------------------*/ -#define METAPAGE(env, n) (&pgno2page(env, n)->mp_meta) - +#define METAPAGE(env, n) page_meta(pgno2page(env, n)) #define METAPAGE_END(env) METAPAGE(env, NUM_METAS) static __inline txnid_t meta_txnid(const MDBX_env *env, const MDBX_meta *meta, @@ -3393,10 +3402,8 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp, mdbx_debug("db %d use loose page %" PRIaPGNO, DDBI(mc), np->mp_pgno); mdbx_tassert(txn, np->mp_pgno < txn->mt_next_pgno); mdbx_ensure(env, np->mp_pgno >= NUM_METAS); - VALGRIND_MAKE_MEM_UNDEFINED(&np->mp_data, - txn->mt_env->me_psize - PAGEHDRSZ); - ASAN_UNPOISON_MEMORY_REGION(&np->mp_data, - txn->mt_env->me_psize - PAGEHDRSZ); + VALGRIND_MAKE_MEM_UNDEFINED(page_data(np), page_space(txn->mt_env)); + ASAN_UNPOISON_MEMORY_REGION(page_data(np), page_space(txn->mt_env)); *mp = np; return MDBX_SUCCESS; } @@ -4054,8 +4061,8 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) { if (!META_IS_STEADY(head) || ((flags & (MDBX_NOSYNC | MDBX_MAPASYNC)) == 0 && unsynced_pages)) { mdbx_debug("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIaPGNO, - container_of(head, MDBX_page, mp_data)->mp_pgno, - mdbx_durable_str(head), unsynced_pages); + data_page(head)->mp_pgno, mdbx_durable_str(head), + unsynced_pages); MDBX_meta meta = *head; int err = mdbx_sync_locked(env, flags | MDBX_SHRINK_ALLOWED, &meta); if (unlikely(err != MDBX_SUCCESS)) { @@ -6392,26 +6399,22 @@ fail: /* Read the environment parameters of a DB environment * before mapping it into memory. */ -static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta, +static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *dest, uint64_t *filesize) { - STATIC_ASSERT(offsetof(MDBX_page, mp_meta) == PAGEHDRSZ); - int rc = mdbx_filesize(env->me_fd, filesize); if (unlikely(rc != MDBX_SUCCESS)) return rc; - memset(meta, 0, sizeof(MDBX_meta)); - meta->mm_datasync_sign = MDBX_DATASIGN_WEAK; + memset(dest, 0, sizeof(MDBX_meta)); + dest->mm_datasync_sign = MDBX_DATASIGN_WEAK; rc = MDBX_CORRUPTED; /* Read twice all meta pages so we can find the latest one. */ unsigned loop_limit = NUM_METAS * 2; for (unsigned loop_count = 0; loop_count < loop_limit; ++loop_count) { - MDBX_page page; - /* We don't know the page size on first time. * So, just guess it. */ - unsigned guess_pagesize = meta->mm_psize; + unsigned guess_pagesize = dest->mm_psize; if (guess_pagesize == 0) guess_pagesize = (loop_count > NUM_METAS) ? env->me_psize : env->me_os_psize; @@ -6419,52 +6422,55 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta, const unsigned meta_number = loop_count % NUM_METAS; const unsigned offset = guess_pagesize * meta_number; + char buffer[MIN_PAGESIZE]; unsigned retryleft = 42; while (1) { mdbx_trace("reading meta[%d]: offset %u, bytes %u, retry-left %u", - meta_number, offset, (unsigned)sizeof(page), retryleft); - int err = mdbx_pread(env->me_fd, &page, sizeof(page), offset); + meta_number, offset, MIN_PAGESIZE, retryleft); + int err = mdbx_pread(env->me_fd, buffer, MIN_PAGESIZE, offset); if (err != MDBX_SUCCESS) { if (err == MDBX_ENODATA && offset == 0 && loop_count == 0 && *filesize == 0 && (env->me_flags & MDBX_RDONLY) == 0) mdbx_notice("read meta: empty file (%d, %s)", err, mdbx_strerror(err)); else - mdbx_error("read meta[%u,%u]: %i, %s", offset, (unsigned)sizeof(page), - err, mdbx_strerror(err)); + mdbx_error("read meta[%u,%u]: %i, %s", offset, MIN_PAGESIZE, err, + mdbx_strerror(err)); return err; } - MDBX_page again; - err = mdbx_pread(env->me_fd, &again, sizeof(again), offset); + char again[MIN_PAGESIZE]; + err = mdbx_pread(env->me_fd, again, MIN_PAGESIZE, offset); if (err != MDBX_SUCCESS) { - mdbx_error("read meta[%u,%u]: %i, %s", offset, (unsigned)sizeof(again), - err, mdbx_strerror(err)); + mdbx_error("read meta[%u,%u]: %i, %s", offset, MIN_PAGESIZE, err, + mdbx_strerror(err)); return err; } - if (memcmp(&page, &again, sizeof(page)) == 0 || --retryleft == 0) + if (memcmp(buffer, again, MIN_PAGESIZE) == 0 || --retryleft == 0) break; mdbx_verbose("meta[%u] was updated, re-read it", meta_number); } - if (page.mp_meta.mm_magic_and_version != MDBX_DATA_MAGIC && - page.mp_meta.mm_magic_and_version != MDBX_DATA_MAGIC_DEVEL) { + MDBX_page *const page = (MDBX_page *)buffer; + MDBX_meta *const meta = page_meta(page); + if (meta->mm_magic_and_version != MDBX_DATA_MAGIC && + meta->mm_magic_and_version != MDBX_DATA_MAGIC_DEVEL) { mdbx_error("meta[%u] has invalid magic/version %" PRIx64, meta_number, - page.mp_meta.mm_magic_and_version); - return ((page.mp_meta.mm_magic_and_version >> 8) != MDBX_MAGIC) + meta->mm_magic_and_version); + return ((meta->mm_magic_and_version >> 8) != MDBX_MAGIC) ? MDBX_INVALID : MDBX_VERSION_MISMATCH; } - if (page.mp_pgno != meta_number) { + if (page->mp_pgno != meta_number) { mdbx_error("meta[%u] has invalid pageno %" PRIaPGNO, meta_number, - page.mp_pgno); + page->mp_pgno); return MDBX_INVALID; } - if (page.mp_flags != P_META) { + if (page->mp_flags != P_META) { mdbx_error("page #%u not a meta-page", meta_number); return MDBX_INVALID; } @@ -6475,79 +6481,69 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta, } /* LY: check pagesize */ - if (!is_powerof2(page.mp_meta.mm_psize) || - page.mp_meta.mm_psize < MIN_PAGESIZE || - page.mp_meta.mm_psize > MAX_PAGESIZE) { + if (!is_powerof2(meta->mm_psize) || meta->mm_psize < MIN_PAGESIZE || + meta->mm_psize > MAX_PAGESIZE) { mdbx_notice("meta[%u] has invalid pagesize (%u), skip it", meta_number, - page.mp_meta.mm_psize); - rc = is_powerof2(page.mp_meta.mm_psize) ? MDBX_VERSION_MISMATCH - : MDBX_INVALID; + meta->mm_psize); + rc = is_powerof2(meta->mm_psize) ? MDBX_VERSION_MISMATCH : MDBX_INVALID; continue; } - if (meta_number == 0 && guess_pagesize != page.mp_meta.mm_psize) { - meta->mm_psize = page.mp_meta.mm_psize; - mdbx_verbose("meta[%u] took pagesize %u", meta_number, - page.mp_meta.mm_psize); + if (meta_number == 0 && guess_pagesize != meta->mm_psize) { + dest->mm_psize = meta->mm_psize; + mdbx_verbose("meta[%u] took pagesize %u", meta_number, meta->mm_psize); } - if (safe64_read(&page.mp_meta.mm_txnid_a) != - safe64_read(&page.mp_meta.mm_txnid_b)) { + if (safe64_read(&meta->mm_txnid_a) != safe64_read(&meta->mm_txnid_b)) { mdbx_warning("meta[%u] not completely updated, skip it", meta_number); continue; } /* LY: check signature as a checksum */ - if (META_IS_STEADY(&page.mp_meta) && - page.mp_meta.mm_datasync_sign != mdbx_meta_sign(&page.mp_meta)) { + if (META_IS_STEADY(meta) && + meta->mm_datasync_sign != mdbx_meta_sign(meta)) { mdbx_notice("meta[%u] has invalid steady-checksum (0x%" PRIx64 " != 0x%" PRIx64 "), skip it", - meta_number, page.mp_meta.mm_datasync_sign, - mdbx_meta_sign(&page.mp_meta)); + meta_number, meta->mm_datasync_sign, mdbx_meta_sign(meta)); continue; } mdbx_debug("read meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN ", %s", - page.mp_pgno, page.mp_meta.mm_dbs[MAIN_DBI].md_root, - page.mp_meta.mm_dbs[FREE_DBI].md_root, page.mp_meta.mm_geo.lower, - page.mp_meta.mm_geo.next, page.mp_meta.mm_geo.now, - page.mp_meta.mm_geo.upper, page.mp_meta.mm_geo.grow, - page.mp_meta.mm_geo.shrink, page.mp_meta.mm_txnid_a.inconsistent, - mdbx_durable_str(&page.mp_meta)); + page->mp_pgno, meta->mm_dbs[MAIN_DBI].md_root, + meta->mm_dbs[FREE_DBI].md_root, meta->mm_geo.lower, + meta->mm_geo.next, meta->mm_geo.now, meta->mm_geo.upper, + meta->mm_geo.grow, meta->mm_geo.shrink, + meta->mm_txnid_a.inconsistent, mdbx_durable_str(meta)); /* LY: check min-pages value */ - if (page.mp_meta.mm_geo.lower < MIN_PAGENO || - page.mp_meta.mm_geo.lower > MAX_PAGENO) { + if (meta->mm_geo.lower < MIN_PAGENO || meta->mm_geo.lower > MAX_PAGENO) { mdbx_notice("meta[%u] has invalid min-pages (%" PRIaPGNO "), skip it", - meta_number, page.mp_meta.mm_geo.lower); + meta_number, meta->mm_geo.lower); rc = MDBX_INVALID; continue; } /* LY: check max-pages value */ - if (page.mp_meta.mm_geo.upper < MIN_PAGENO || - page.mp_meta.mm_geo.upper > MAX_PAGENO || - page.mp_meta.mm_geo.upper < page.mp_meta.mm_geo.lower) { + if (meta->mm_geo.upper < MIN_PAGENO || meta->mm_geo.upper > MAX_PAGENO || + meta->mm_geo.upper < meta->mm_geo.lower) { mdbx_notice("meta[%u] has invalid max-pages (%" PRIaPGNO "), skip it", - meta_number, page.mp_meta.mm_geo.upper); + meta_number, meta->mm_geo.upper); rc = MDBX_INVALID; continue; } /* LY: check last_pgno */ - if (page.mp_meta.mm_geo.next < MIN_PAGENO || - page.mp_meta.mm_geo.next - 1 > MAX_PAGENO) { + if (meta->mm_geo.next < MIN_PAGENO || meta->mm_geo.next - 1 > MAX_PAGENO) { mdbx_notice("meta[%u] has invalid next-pageno (%" PRIaPGNO "), skip it", - meta_number, page.mp_meta.mm_geo.next); + meta_number, meta->mm_geo.next); rc = MDBX_CORRUPTED; continue; } /* LY: check filesize & used_bytes */ - const uint64_t used_bytes = - page.mp_meta.mm_geo.next * (uint64_t)page.mp_meta.mm_psize; + const uint64_t used_bytes = meta->mm_geo.next * (uint64_t)meta->mm_psize; if (used_bytes > *filesize) { /* Here could be a race with DB-shrinking performed by other process */ rc = mdbx_filesize(env->me_fd, filesize); @@ -6563,8 +6559,7 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta, } /* LY: check mapsize limits */ - const uint64_t mapsize_min = - page.mp_meta.mm_geo.lower * (uint64_t)page.mp_meta.mm_psize; + const uint64_t mapsize_min = meta->mm_geo.lower * (uint64_t)meta->mm_psize; STATIC_ASSERT(MAX_MAPSIZE < PTRDIFF_MAX - MAX_PAGESIZE); STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE); if (mapsize_min < MIN_MAPSIZE || mapsize_min > MAX_MAPSIZE) { @@ -6574,14 +6569,12 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta, continue; } - const uint64_t mapsize_max = - page.mp_meta.mm_geo.upper * (uint64_t)page.mp_meta.mm_psize; + const uint64_t mapsize_max = meta->mm_geo.upper * (uint64_t)meta->mm_psize; STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE); if (mapsize_max > MAX_MAPSIZE || MAX_PAGENO < roundup_powerof2((size_t)mapsize_max, env->me_os_psize) / - (size_t)page.mp_meta.mm_psize) { - if (page.mp_meta.mm_geo.next - 1 > MAX_PAGENO || - used_bytes > MAX_MAPSIZE) { + (size_t)meta->mm_psize) { + if (meta->mm_geo.next - 1 > MAX_PAGENO || used_bytes > MAX_MAPSIZE) { mdbx_notice("meta[%u] has too large max-mapsize (%" PRIu64 "), skip it", meta_number, mapsize_max); rc = MDBX_TOO_LARGE; @@ -6592,7 +6585,7 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta, mdbx_notice("meta[%u] has too large max-mapsize (%" PRIu64 "), " "but size of used space still acceptable (%" PRIu64 ")", meta_number, mapsize_max, used_bytes); - page.mp_meta.mm_geo.upper = (pgno_t)(MAX_MAPSIZE / page.mp_meta.mm_psize); + meta->mm_geo.upper = (pgno_t)(MAX_MAPSIZE / meta->mm_psize); } /* LY: check and silently put mm_geo.now into [geo.lower...geo.upper]. @@ -6602,72 +6595,70 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta, * at all. This is not a problem as there is no damage or loss of data. * Therefore it is better not to consider such situation as an error, but * silently correct it. */ - if (page.mp_meta.mm_geo.now < page.mp_meta.mm_geo.lower) - page.mp_meta.mm_geo.now = page.mp_meta.mm_geo.lower; - if (page.mp_meta.mm_geo.now > page.mp_meta.mm_geo.upper) - page.mp_meta.mm_geo.now = page.mp_meta.mm_geo.upper; + if (meta->mm_geo.now < meta->mm_geo.lower) + meta->mm_geo.now = meta->mm_geo.lower; + if (meta->mm_geo.now > meta->mm_geo.upper && + meta->mm_geo.next <= meta->mm_geo.upper) + meta->mm_geo.now = meta->mm_geo.upper; - if (page.mp_meta.mm_geo.next > page.mp_meta.mm_geo.now) { + if (meta->mm_geo.next > meta->mm_geo.now) { mdbx_notice("meta[%u] next-pageno (%" PRIaPGNO ") is beyond end-pgno (%" PRIaPGNO "), skip it", - meta_number, page.mp_meta.mm_geo.next, - page.mp_meta.mm_geo.now); + meta_number, meta->mm_geo.next, meta->mm_geo.now); rc = MDBX_CORRUPTED; continue; } /* LY: GC root */ - if (page.mp_meta.mm_dbs[FREE_DBI].md_root == P_INVALID) { - if (page.mp_meta.mm_dbs[FREE_DBI].md_branch_pages || - page.mp_meta.mm_dbs[FREE_DBI].md_depth || - page.mp_meta.mm_dbs[FREE_DBI].md_entries || - page.mp_meta.mm_dbs[FREE_DBI].md_leaf_pages || - page.mp_meta.mm_dbs[FREE_DBI].md_overflow_pages) { + if (meta->mm_dbs[FREE_DBI].md_root == P_INVALID) { + if (meta->mm_dbs[FREE_DBI].md_branch_pages || + meta->mm_dbs[FREE_DBI].md_depth || + meta->mm_dbs[FREE_DBI].md_entries || + meta->mm_dbs[FREE_DBI].md_leaf_pages || + meta->mm_dbs[FREE_DBI].md_overflow_pages) { mdbx_notice("meta[%u] has false-empty GC, skip it", meta_number); rc = MDBX_CORRUPTED; continue; } - } else if (page.mp_meta.mm_dbs[FREE_DBI].md_root >= - page.mp_meta.mm_geo.next) { + } else if (meta->mm_dbs[FREE_DBI].md_root >= meta->mm_geo.next) { mdbx_notice("meta[%u] has invalid GC-root %" PRIaPGNO ", skip it", - meta_number, page.mp_meta.mm_dbs[FREE_DBI].md_root); + meta_number, meta->mm_dbs[FREE_DBI].md_root); rc = MDBX_CORRUPTED; continue; } /* LY: MainDB root */ - if (page.mp_meta.mm_dbs[MAIN_DBI].md_root == P_INVALID) { - if (page.mp_meta.mm_dbs[MAIN_DBI].md_branch_pages || - page.mp_meta.mm_dbs[MAIN_DBI].md_depth || - page.mp_meta.mm_dbs[MAIN_DBI].md_entries || - page.mp_meta.mm_dbs[MAIN_DBI].md_leaf_pages || - page.mp_meta.mm_dbs[MAIN_DBI].md_overflow_pages) { + if (meta->mm_dbs[MAIN_DBI].md_root == P_INVALID) { + if (meta->mm_dbs[MAIN_DBI].md_branch_pages || + meta->mm_dbs[MAIN_DBI].md_depth || + meta->mm_dbs[MAIN_DBI].md_entries || + meta->mm_dbs[MAIN_DBI].md_leaf_pages || + meta->mm_dbs[MAIN_DBI].md_overflow_pages) { mdbx_notice("meta[%u] has false-empty maindb", meta_number); rc = MDBX_CORRUPTED; continue; } - } else if (page.mp_meta.mm_dbs[MAIN_DBI].md_root >= - page.mp_meta.mm_geo.next) { + } else if (meta->mm_dbs[MAIN_DBI].md_root >= meta->mm_geo.next) { mdbx_notice("meta[%u] has invalid maindb-root %" PRIaPGNO ", skip it", - meta_number, page.mp_meta.mm_dbs[MAIN_DBI].md_root); + meta_number, meta->mm_dbs[MAIN_DBI].md_root); rc = MDBX_CORRUPTED; continue; } - if (safe64_read(&page.mp_meta.mm_txnid_a) == 0) { + if (safe64_read(&meta->mm_txnid_a) == 0) { mdbx_warning("meta[%u] has zero txnid, skip it", meta_number); continue; } - if (mdbx_meta_ot(prefer_noweak, env, meta, &page.mp_meta)) { - *meta = page.mp_meta; - if (META_IS_WEAK(meta)) + if (mdbx_meta_ot(prefer_noweak, env, dest, meta)) { + *dest = *meta; + if (META_IS_WEAK(dest)) loop_limit += 1; /* LY: should re-read to hush race with update */ mdbx_verbose("latch meta[%u]", meta_number); } } - if (META_IS_WEAK(meta)) { + if (META_IS_WEAK(dest)) { mdbx_error("no usable meta-pages, database is corrupted"); return rc; } @@ -6689,49 +6680,49 @@ static MDBX_page *__cold mdbx_meta_model(const MDBX_env *env, MDBX_page *model, memset(model, 0, sizeof(*model)); model->mp_pgno = num; model->mp_flags = P_META; - model->mp_meta.mm_magic_and_version = MDBX_DATA_MAGIC; + MDBX_meta *const model_meta = page_meta(model); + model_meta->mm_magic_and_version = MDBX_DATA_MAGIC; - model->mp_meta.mm_geo.lower = bytes2pgno(env, env->me_dbgeo.lower); - model->mp_meta.mm_geo.upper = bytes2pgno(env, env->me_dbgeo.upper); - model->mp_meta.mm_geo.grow = (uint16_t)bytes2pgno(env, env->me_dbgeo.grow); - model->mp_meta.mm_geo.shrink = - (uint16_t)bytes2pgno(env, env->me_dbgeo.shrink); - model->mp_meta.mm_geo.now = bytes2pgno(env, env->me_dbgeo.now); - model->mp_meta.mm_geo.next = NUM_METAS; + model_meta->mm_geo.lower = bytes2pgno(env, env->me_dbgeo.lower); + model_meta->mm_geo.upper = bytes2pgno(env, env->me_dbgeo.upper); + model_meta->mm_geo.grow = (uint16_t)bytes2pgno(env, env->me_dbgeo.grow); + model_meta->mm_geo.shrink = (uint16_t)bytes2pgno(env, env->me_dbgeo.shrink); + model_meta->mm_geo.now = bytes2pgno(env, env->me_dbgeo.now); + model_meta->mm_geo.next = NUM_METAS; - mdbx_ensure(env, model->mp_meta.mm_geo.lower >= MIN_PAGENO); - mdbx_ensure(env, model->mp_meta.mm_geo.upper <= MAX_PAGENO); - mdbx_ensure(env, model->mp_meta.mm_geo.now >= model->mp_meta.mm_geo.lower); - mdbx_ensure(env, model->mp_meta.mm_geo.now <= model->mp_meta.mm_geo.upper); - mdbx_ensure(env, model->mp_meta.mm_geo.next >= MIN_PAGENO); - mdbx_ensure(env, model->mp_meta.mm_geo.next <= model->mp_meta.mm_geo.now); - mdbx_ensure(env, model->mp_meta.mm_geo.grow == - bytes2pgno(env, env->me_dbgeo.grow)); - mdbx_ensure(env, model->mp_meta.mm_geo.shrink == + mdbx_ensure(env, model_meta->mm_geo.lower >= MIN_PAGENO); + mdbx_ensure(env, model_meta->mm_geo.upper <= MAX_PAGENO); + mdbx_ensure(env, model_meta->mm_geo.now >= model_meta->mm_geo.lower); + mdbx_ensure(env, model_meta->mm_geo.now <= model_meta->mm_geo.upper); + mdbx_ensure(env, model_meta->mm_geo.next >= MIN_PAGENO); + mdbx_ensure(env, model_meta->mm_geo.next <= model_meta->mm_geo.now); + mdbx_ensure(env, + model_meta->mm_geo.grow == bytes2pgno(env, env->me_dbgeo.grow)); + mdbx_ensure(env, model_meta->mm_geo.shrink == bytes2pgno(env, env->me_dbgeo.shrink)); - model->mp_meta.mm_psize = env->me_psize; - model->mp_meta.mm_flags = (uint16_t)env->me_flags; - model->mp_meta.mm_flags |= + model_meta->mm_psize = env->me_psize; + model_meta->mm_flags = (uint16_t)env->me_flags; + model_meta->mm_flags |= MDBX_INTEGERKEY; /* this is mm_dbs[FREE_DBI].md_flags */ - model->mp_meta.mm_dbs[FREE_DBI].md_root = P_INVALID; - model->mp_meta.mm_dbs[MAIN_DBI].md_root = P_INVALID; - mdbx_meta_set_txnid(env, &model->mp_meta, MIN_TXNID + num); - model->mp_meta.mm_datasync_sign = mdbx_meta_sign(&model->mp_meta); + model_meta->mm_dbs[FREE_DBI].md_root = P_INVALID; + model_meta->mm_dbs[MAIN_DBI].md_root = P_INVALID; + mdbx_meta_set_txnid(env, model_meta, MIN_TXNID + num); + model_meta->mm_datasync_sign = mdbx_meta_sign(model_meta); return (MDBX_page *)((uint8_t *)model + env->me_psize); } /* Fill in most of the zeroed meta-pages for an empty database environment. * Return pointer to recenly (head) meta-page. */ -static MDBX_page *__cold mdbx_init_metas(const MDBX_env *env, void *buffer) { +static MDBX_meta *__cold mdbx_init_metas(const MDBX_env *env, void *buffer) { MDBX_page *page0 = (MDBX_page *)buffer; MDBX_page *page1 = mdbx_meta_model(env, page0, 0); MDBX_page *page2 = mdbx_meta_model(env, page1, 1); mdbx_meta_model(env, page2, 2); - mdbx_assert(env, !mdbx_meta_eq(env, &page0->mp_meta, &page1->mp_meta)); - mdbx_assert(env, !mdbx_meta_eq(env, &page1->mp_meta, &page2->mp_meta)); - mdbx_assert(env, !mdbx_meta_eq(env, &page2->mp_meta, &page0->mp_meta)); - return page2; + mdbx_assert(env, !mdbx_meta_eq(env, page_meta(page0), page_meta(page1))); + mdbx_assert(env, !mdbx_meta_eq(env, page_meta(page1), page_meta(page2))); + mdbx_assert(env, !mdbx_meta_eq(env, page_meta(page2), page_meta(page0))); + return page_meta(page2); } static int mdbx_sync_locked(MDBX_env *env, unsigned flags, @@ -6895,8 +6886,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, mdbx_debug("writing meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN ", %s", - container_of(target, MDBX_page, mp_data)->mp_pgno, - pending->mm_dbs[MAIN_DBI].md_root, + data_page(target)->mp_pgno, pending->mm_dbs[MAIN_DBI].md_root, pending->mm_dbs[FREE_DBI].md_root, pending->mm_geo.lower, pending->mm_geo.next, pending->mm_geo.now, pending->mm_geo.upper, pending->mm_geo.grow, pending->mm_geo.shrink, @@ -6988,9 +6978,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, if ((flags & (MDBX_NOSYNC | MDBX_NOMETASYNC)) == 0) { mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0); if (flags & MDBX_WRITEMAP) { - const size_t offset = - ((uint8_t *)container_of(head, MDBX_page, mp_meta)) - - env->me_dxb_mmap.dxb; + const size_t offset = (uint8_t *)data_page(head) - env->me_dxb_mmap.dxb; const size_t paged_offset = offset & ~(env->me_os_psize - 1); const size_t paged_length = roundup_powerof2( env->me_psize + offset - paged_offset, env->me_os_psize); @@ -7038,7 +7026,7 @@ int __cold mdbx_env_get_maxkeysize(MDBX_env *env) { static void __cold mdbx_setup_pagesize(MDBX_env *env, const size_t pagesize) { STATIC_ASSERT(PTRDIFF_MAX > MAX_MAPSIZE); - STATIC_ASSERT(MIN_PAGESIZE > sizeof(MDBX_page)); + STATIC_ASSERT(MIN_PAGESIZE > sizeof(MDBX_page) + sizeof(MDBX_meta)); mdbx_ensure(env, is_powerof2(pagesize)); mdbx_ensure(env, pagesize >= MIN_PAGESIZE); mdbx_ensure(env, pagesize <= MAX_PAGESIZE); @@ -7613,7 +7601,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) { if (!buffer) return MDBX_ENOMEM; - meta = mdbx_init_metas(env, buffer)->mp_meta; + meta = *mdbx_init_metas(env, buffer); err = mdbx_pwrite(env->me_fd, buffer, env->me_psize * NUM_METAS, 0); mdbx_free(buffer); if (unlikely(err != MDBX_SUCCESS)) @@ -7824,9 +7812,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) { head->mm_txnid_a.inconsistent = undo_txnid; head->mm_datasync_sign = MDBX_DATASIGN_WEAK; head->mm_txnid_b.inconsistent = undo_txnid; - const size_t offset = - ((uint8_t *)container_of(head, MDBX_page, mp_meta)) - - env->me_dxb_mmap.dxb; + const size_t offset = (uint8_t *)data_page(head) - env->me_dxb_mmap.dxb; const size_t paged_offset = offset & ~(env->me_os_psize - 1); const size_t paged_length = roundup_powerof2( env->me_psize + offset - paged_offset, env->me_os_psize); @@ -8015,9 +8001,9 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, goto bailout; if (lck_seize_rc == MDBX_RESULT_TRUE) { - uint64_t wanna = roundup_powerof2( - (env->me_maxreaders - 1) * sizeof(MDBX_reader) + sizeof(MDBX_lockinfo), - env->me_os_psize); + uint64_t wanna = roundup_powerof2(env->me_maxreaders * sizeof(MDBX_reader) + + sizeof(MDBX_lockinfo), + env->me_os_psize); #ifndef NDEBUG err = mdbx_ftruncate(env->me_lfd, size = 0); if (unlikely(err != MDBX_SUCCESS)) @@ -8045,8 +8031,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, } const size_t maxreaders = - ((size_t)size - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader) + 1; - if (maxreaders < 2 || maxreaders > MDBX_READERS_LIMIT) { + ((size_t)size - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader); + if (size > 65536 || maxreaders < 2 || maxreaders > MDBX_READERS_LIMIT) { mdbx_error("lck-size too big (up to %" PRIuPTR " readers)", maxreaders); err = MDBX_PROBLEM; goto bailout; @@ -8424,8 +8410,7 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags, mdbx_debug("opened database version %u, pagesize %u", (uint8_t)meta->mm_magic_and_version, env->me_psize); mdbx_debug("using meta page %" PRIaPGNO ", txn %" PRIaTXN, - container_of(meta, MDBX_page, mp_data)->mp_pgno, - mdbx_meta_txnid_fluid(env, meta)); + data_page(meta)->mp_pgno, mdbx_meta_txnid_fluid(env, meta)); mdbx_debug("depth: %u", db->md_depth); mdbx_debug("entries: %" PRIu64, db->md_entries); mdbx_debug("branch pages: %" PRIaPGNO, db->md_branch_pages); @@ -13428,34 +13413,31 @@ done: return rc; } -static void compact_fixup_meta(MDBX_env *env, MDBX_page *meta) { +static __cold void compact_fixup_meta(MDBX_env *env, MDBX_meta *meta) { /* Calculate filesize taking in account shrink/growing thresholds */ - if (meta->mp_meta.mm_geo.next > meta->mp_meta.mm_geo.now) { - const pgno_t aligned = - pgno_align2os_pgno(env, pgno_add(meta->mp_meta.mm_geo.next, - meta->mp_meta.mm_geo.grow - - meta->mp_meta.mm_geo.next % - meta->mp_meta.mm_geo.grow)); - meta->mp_meta.mm_geo.now = aligned; - } else if (meta->mp_meta.mm_geo.next < meta->mp_meta.mm_geo.now) { - meta->mp_meta.mm_geo.now = meta->mp_meta.mm_geo.next; - const pgno_t aligner = meta->mp_meta.mm_geo.grow - ? meta->mp_meta.mm_geo.grow - : meta->mp_meta.mm_geo.shrink; - const pgno_t aligned = - pgno_align2os_pgno(env, meta->mp_meta.mm_geo.next + aligner - - meta->mp_meta.mm_geo.next % aligner); - meta->mp_meta.mm_geo.now = aligned; + if (meta->mm_geo.next > meta->mm_geo.now) { + const pgno_t aligned = pgno_align2os_pgno( + env, + pgno_add(meta->mm_geo.next, + meta->mm_geo.grow - meta->mm_geo.next % meta->mm_geo.grow)); + meta->mm_geo.now = aligned; + } else if (meta->mm_geo.next < meta->mm_geo.now) { + meta->mm_geo.now = meta->mm_geo.next; + const pgno_t aligner = + meta->mm_geo.grow ? meta->mm_geo.grow : meta->mm_geo.shrink; + const pgno_t aligned = pgno_align2os_pgno( + env, meta->mm_geo.next + aligner - meta->mm_geo.next % aligner); + meta->mm_geo.now = aligned; } - if (meta->mp_meta.mm_geo.now < meta->mp_meta.mm_geo.lower) - meta->mp_meta.mm_geo.now = meta->mp_meta.mm_geo.lower; - if (meta->mp_meta.mm_geo.now > meta->mp_meta.mm_geo.upper) - meta->mp_meta.mm_geo.now = meta->mp_meta.mm_geo.upper; + if (meta->mm_geo.now < meta->mm_geo.lower) + meta->mm_geo.now = meta->mm_geo.lower; + if (meta->mm_geo.now > meta->mm_geo.upper) + meta->mm_geo.now = meta->mm_geo.upper; /* Update signature */ - assert(meta->mp_meta.mm_geo.now >= meta->mp_meta.mm_geo.next); - meta->mp_meta.mm_datasync_sign = mdbx_meta_sign(&meta->mp_meta); + assert(meta->mm_geo.now >= meta->mm_geo.next); + meta->mm_datasync_sign = mdbx_meta_sign(meta); } /* Copy environment with compaction. */ @@ -13463,12 +13445,13 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn, mdbx_filehandle_t fd, uint8_t *buffer, const bool dest_is_pipe) { const size_t meta_bytes = pgno2bytes(env, NUM_METAS); - uint8_t *const data_buffer = buffer + meta_bytes; - MDBX_page *const meta = mdbx_init_metas(env, buffer); + uint8_t *const data_buffer = + buffer + roundup_powerof2(meta_bytes, env->me_os_psize); + MDBX_meta *const meta = mdbx_init_metas(env, buffer); /* copy canary sequenses if present */ if (read_txn->mt_canary.v) { - meta->mp_meta.mm_canary = read_txn->mt_canary; - meta->mp_meta.mm_canary.v = mdbx_meta_txnid_stable(env, &meta->mp_meta); + meta->mm_canary = read_txn->mt_canary; + meta->mm_canary.v = mdbx_meta_txnid_stable(env, meta); } /* Set metapage 1 with current main DB */ @@ -13476,8 +13459,7 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn, if ((new_root = root) == P_INVALID) { /* When the DB is empty, handle it specially to * fix any breakage like page leaks from ITS#8174. */ - meta->mp_meta.mm_dbs[MAIN_DBI].md_flags = - read_txn->mt_dbs[MAIN_DBI].md_flags; + meta->mm_dbs[MAIN_DBI].md_flags = read_txn->mt_dbs[MAIN_DBI].md_flags; compact_fixup_meta(env, meta); if (dest_is_pipe) { int rc = mdbx_write(fd, buffer, meta_bytes); @@ -13504,9 +13486,9 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn, read_txn->mt_dbs[FREE_DBI].md_overflow_pages; new_root = read_txn->mt_next_pgno - 1 - freecount; - meta->mp_meta.mm_geo.next = new_root + 1; - meta->mp_meta.mm_dbs[MAIN_DBI] = read_txn->mt_dbs[MAIN_DBI]; - meta->mp_meta.mm_dbs[MAIN_DBI].md_root = new_root; + meta->mm_geo.next = new_root + 1; + meta->mm_dbs[MAIN_DBI] = read_txn->mt_dbs[MAIN_DBI]; + meta->mm_dbs[MAIN_DBI].md_root = new_root; mdbx_copy ctx; memset(&ctx, 0, sizeof(ctx)); @@ -13562,20 +13544,20 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn, " LT expected %" PRIaPGNO " (page leak(s) in source DB)", root, new_root); /* fixup meta */ - meta->mp_meta.mm_dbs[MAIN_DBI].md_root = root; - meta->mp_meta.mm_geo.next = root + 1; + meta->mm_dbs[MAIN_DBI].md_root = root; + meta->mm_geo.next = root + 1; } compact_fixup_meta(env, meta); } } /* Extend file if required */ - if (meta->mp_meta.mm_geo.now != meta->mp_meta.mm_geo.next) { - const size_t whole_size = pgno2bytes(env, meta->mp_meta.mm_geo.now); + if (meta->mm_geo.now != meta->mm_geo.next) { + const size_t whole_size = pgno2bytes(env, meta->mm_geo.now); if (!dest_is_pipe) return mdbx_ftruncate(fd, whole_size); - const size_t used_size = pgno2bytes(env, meta->mp_meta.mm_geo.next); + const size_t used_size = pgno2bytes(env, meta->mm_geo.next); memset(data_buffer, 0, MDBX_WBUF); for (size_t offset = used_size; offset < whole_size;) { const size_t chunk = @@ -13630,7 +13612,8 @@ static int __cold mdbx_env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, if (dest_is_pipe) rc = mdbx_write(fd, buffer, meta_bytes); - uint8_t *const data_buffer = buffer + meta_bytes; + uint8_t *const data_buffer = + buffer + roundup_powerof2(meta_bytes, env->me_os_psize); for (size_t offset = meta_bytes; rc == MDBX_SUCCESS && offset < used_size;) { if (dest_is_pipe) { #if defined(__linux__) || defined(__gnu_linux__) @@ -13705,10 +13688,10 @@ int __cold mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, return rc; } - const size_t buffer_size = roundup_powerof2( - pgno2bytes(env, NUM_METAS) + - ((flags & MDBX_CP_COMPACT) ? MDBX_WBUF * 2 : MDBX_WBUF), - env->me_os_psize); + const size_t buffer_size = + roundup_powerof2(pgno2bytes(env, NUM_METAS), env->me_os_psize) + + roundup_powerof2(((flags & MDBX_CP_COMPACT) ? MDBX_WBUF * 2 : MDBX_WBUF), + env->me_os_psize); uint8_t *buffer = NULL; int rc = mdbx_memalign_alloc(env->me_os_psize, buffer_size, (void **)&buffer); diff --git a/src/elements/internals.h b/src/elements/internals.h index a430c0c7..7e1dbc34 100644 --- a/src/elements/internals.h +++ b/src/elements/internals.h @@ -467,11 +467,7 @@ typedef struct MDBX_page { pgno_t mp_pgno; /* page number */ /* dynamic size */ - union { - indx_t mp_ptrs[1]; - MDBX_meta mp_meta; - uint8_t mp_data[1]; - }; + indx_t mp_ptrs[/* C99 */]; } MDBX_page; /* Size of the page header, excluding dynamic data at the end */ @@ -629,7 +625,7 @@ typedef struct MDBX_lockinfo { volatile unsigned mti_readers_refresh_flag; alignas(MDBX_CACHELINE_SIZE) /* cacheline ---------------------------------*/ - MDBX_reader mti_readers[1]; + MDBX_reader mti_readers[/* C99 */]; } MDBX_lockinfo; /* Lockfile format signature: version, features and field layout */ @@ -637,7 +633,8 @@ typedef struct MDBX_lockinfo { (MDBX_OSAL_LOCK_SIGN * 27733 + (unsigned)sizeof(MDBX_reader) * 13 + \ (unsigned)offsetof(MDBX_reader, mr_snapshot_pages_used) * 251 + \ (unsigned)offsetof(MDBX_lockinfo, mti_oldest_reader) * 83 + \ - (unsigned)offsetof(MDBX_lockinfo, mti_numreaders) * 29) + (unsigned)offsetof(MDBX_lockinfo, mti_numreaders) * 37 + \ + (unsigned)offsetof(MDBX_lockinfo, mti_readers) * 29) #define MDBX_DATA_MAGIC ((MDBX_MAGIC << 8) + MDBX_DATA_VERSION) #define MDBX_DATA_MAGIC_DEVEL ((MDBX_MAGIC << 8) + 255) @@ -676,7 +673,7 @@ typedef struct MDBX_lockinfo { #if MDBX_WORDBITS >= 64 #define MAX_MAPSIZE MAX_MAPSIZE64 #define MDBX_READERS_LIMIT \ - ((65536 - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader) + 1) + ((65536 - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader)) #else #define MDBX_READERS_LIMIT 1024 #define MAX_MAPSIZE MAX_MAPSIZE32 @@ -779,7 +776,7 @@ struct MDBX_txn { #define mt_end_pgno mt_geo.now /* Transaction Flags */ -/* mdbx_txn_begin() flags */ + /* mdbx_txn_begin() flags */ #define MDBX_TXN_BEGIN_FLAGS \ (MDBX_NOMETASYNC | MDBX_NOSYNC | MDBX_MAPASYNC | MDBX_RDONLY | MDBX_TRYTXN) /* internal txn flags */ @@ -788,7 +785,7 @@ struct MDBX_txn { #define MDBX_TXN_DIRTY 0x04 /* must write, even if dirty list is empty */ #define MDBX_TXN_SPILLS 0x08 /* txn or a parent has spilled pages */ #define MDBX_TXN_HAS_CHILD 0x10 /* txn has an MDBX_txn.mt_child */ -/* most operations on the txn are currently illegal */ + /* most operations on the txn are currently illegal */ #define MDBX_TXN_BLOCKED \ (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_HAS_CHILD) unsigned mt_flags; @@ -804,7 +801,7 @@ struct MDBX_txn { /* Array of sequence numbers for each DB handle */ unsigned *mt_dbiseqs; -/* Transaction DB Flags */ + /* Transaction DB Flags */ #define DB_DIRTY MDBX_TBL_DIRTY /* DB was written in this txn */ #define DB_STALE MDBX_TBL_STALE /* Named-DB record is older than txnID */ #define DB_FRESH MDBX_TBL_FRESH /* Named-DB handle opened in this txn */ @@ -948,13 +945,13 @@ struct MDBX_env { #define me_lfd me_lck_mmap.fd #define me_lck me_lck_mmap.lck -/* Failed to update the meta page. Probably an I/O error. */ + /* Failed to update the meta page. Probably an I/O error. */ #define MDBX_FATAL_ERROR UINT32_C(0x80000000) -/* Additional flag for mdbx_sync_locked() */ + /* Additional flag for mdbx_sync_locked() */ #define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000) -/* Some fields are initialized. */ + /* Some fields are initialized. */ #define MDBX_ENV_ACTIVE UINT32_C(0x20000000) -/* me_txkey is set */ + /* me_txkey is set */ #define MDBX_ENV_TXKEY UINT32_C(0x10000000) uint32_t me_flags; /* see mdbx_env */ unsigned me_psize; /* DB page size, inited from me_os_psize */ @@ -1231,7 +1228,7 @@ MDBX_INTERNAL_FUNC void mdbx_rthc_thread_dtor(void *ptr); /* Default size of memory map. * This is certainly too small for any actual applications. Apps should * always set the size explicitly using mdbx_env_set_mapsize(). */ -#define DEFAULT_MAPSIZE 1048576 +#define DEFAULT_MAPSIZE MEGABYTE /* Number of slots in the reader table. * This value was chosen somewhat arbitrarily. The 61 is a prime number, @@ -1290,12 +1287,12 @@ typedef struct MDBX_node { }; #endif -/* mdbx_node Flags */ + /* mdbx_node Flags */ #define F_BIGDATA 0x01 /* data put on overflow page */ #define F_SUBDATA 0x02 /* data is a sub-database */ #define F_DUPDATA 0x04 /* data has duplicates */ -/* valid flags for mdbx_node_add() */ + /* valid flags for mdbx_node_add() */ #define NODE_ADD_FLAGS (F_DUPDATA | F_SUBDATA | MDBX_RESERVE | MDBX_APPEND) uint8_t mn_data[/* C99 */]; /* key and data are appended here */ } MDBX_node;