mdbx: рефакторинг `dxb_resize()` и связанного кода.
В том числе, для устранения срабатывания assert-проверки `size_bytes == env->me_dxb_mmap.current` в специфических многопоточных сценариях использования. Проверка срабатывала только в отладочных сборках, при специфическом наложении во времени читающей и пишущей транзакции в разных потоках, одновременно с изменением размера БД. Кроме срабатывание проверки, каких-либо других последствий не возникало.
This commit is contained in:
parent
0979a93a78
commit
a484a1f89b
|
@ -10,6 +10,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic
|
||||||
|
|
||||||
- Max <maxc0d3r@protonmail.com> за сообщение о проблеме ERROR_SHARING_VIOLATION
|
- Max <maxc0d3r@protonmail.com> за сообщение о проблеме ERROR_SHARING_VIOLATION
|
||||||
в режиме MDBX_EXCLUSIVE на Windows.
|
в режиме MDBX_EXCLUSIVE на Windows.
|
||||||
|
- Alisher Ashyrov https://t.me/a1is43ras4 за сообщение о проблеме с assert-проверкой и содействие в отладке.
|
||||||
|
|
||||||
Исправления (без корректировок новых функций):
|
Исправления (без корректировок новых функций):
|
||||||
|
|
||||||
|
@ -21,6 +22,13 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic
|
||||||
предотвращении ошибки ERROR_NOT_ENOUGH_MEMORY в Windows, которая совсем
|
предотвращении ошибки ERROR_NOT_ENOUGH_MEMORY в Windows, которая совсем
|
||||||
не информативна для пользователя и возникает в этом случае.
|
не информативна для пользователя и возникает в этом случае.
|
||||||
|
|
||||||
|
- Рефакторинг `dxb_resize()`. В том числе, для устранения срабатывания
|
||||||
|
assert-проверки `size_bytes == env->me_dxb_mmap.current` в специфических
|
||||||
|
многопоточных сценариях использования. Проверка срабатывала только в
|
||||||
|
отладочных сборках, при специфическом наложении во времени читающей и
|
||||||
|
пишущей транзакции в разных потоках, одновременно с изменением размера БД.
|
||||||
|
Кроме срабатывание проверки, каких-либо других последствий не возникало.
|
||||||
|
|
||||||
Ликвидация технических долгов и мелочи:
|
Ликвидация технических долгов и мелочи:
|
||||||
|
|
||||||
- Исправление опечаток.
|
- Исправление опечаток.
|
||||||
|
|
255
src/core.c
255
src/core.c
|
@ -6011,21 +6011,43 @@ static void adjust_defaults(MDBX_env *env) {
|
||||||
bytes2pgno(env, bytes_align2os_bytes(env, threshold));
|
bytes2pgno(env, bytes_align2os_bytes(env, threshold));
|
||||||
}
|
}
|
||||||
|
|
||||||
__cold static int map_resize(MDBX_env *env, const pgno_t used_pgno,
|
enum resize_mode { implicit_grow, impilict_shrink, explicit_resize };
|
||||||
const pgno_t size_pgno, const pgno_t limit_pgno,
|
|
||||||
const bool implicit) {
|
__cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno,
|
||||||
const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno);
|
const pgno_t size_pgno, pgno_t limit_pgno,
|
||||||
const size_t size_bytes = pgno_align2os_bytes(env, size_pgno);
|
const enum resize_mode mode) {
|
||||||
|
/* Acquire guard to avoid collision between read and write txns
|
||||||
|
* around me_dbgeo and me_dxb_mmap */
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
osal_srwlock_AcquireExclusive(&env->me_remap_guard);
|
||||||
|
int rc = MDBX_SUCCESS;
|
||||||
|
mdbx_handle_array_t *suspended = NULL;
|
||||||
|
mdbx_handle_array_t array_onstack;
|
||||||
|
#else
|
||||||
|
int rc = osal_fastmutex_acquire(&env->me_remap_guard);
|
||||||
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
|
return rc;
|
||||||
|
#endif
|
||||||
|
|
||||||
const size_t prev_size = env->me_dxb_mmap.current;
|
const size_t prev_size = env->me_dxb_mmap.current;
|
||||||
const size_t prev_limit = env->me_dxb_mmap.limit;
|
const size_t prev_limit = env->me_dxb_mmap.limit;
|
||||||
|
const pgno_t prev_limit_pgno = bytes2pgno(env, prev_limit);
|
||||||
|
eASSERT(env, prev_limit_pgno >= used_pgno);
|
||||||
|
if (mode < explicit_resize && size_pgno <= prev_limit_pgno) {
|
||||||
|
/* The actual mapsize may be less since the geo.upper may be changed
|
||||||
|
* by other process. Avoids remapping until it necessary. */
|
||||||
|
limit_pgno = prev_limit_pgno;
|
||||||
|
}
|
||||||
|
const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno);
|
||||||
|
const size_t size_bytes = pgno_align2os_bytes(env, size_pgno);
|
||||||
#if MDBX_ENABLE_MADVISE || defined(MDBX_USE_VALGRIND)
|
#if MDBX_ENABLE_MADVISE || defined(MDBX_USE_VALGRIND)
|
||||||
const void *const prev_map = env->me_dxb_mmap.base;
|
const void *const prev_map = env->me_dxb_mmap.base;
|
||||||
#endif /* MDBX_ENABLE_MADVISE || MDBX_USE_VALGRIND */
|
#endif /* MDBX_ENABLE_MADVISE || MDBX_USE_VALGRIND */
|
||||||
|
|
||||||
VERBOSE("resize datafile/mapping: "
|
VERBOSE("resize/%d datafile/mapping: "
|
||||||
"present %" PRIuPTR " -> %" PRIuPTR ", "
|
"present %" PRIuPTR " -> %" PRIuPTR ", "
|
||||||
"limit %" PRIuPTR " -> %" PRIuPTR,
|
"limit %" PRIuPTR " -> %" PRIuPTR,
|
||||||
prev_size, size_bytes, prev_limit, limit_bytes);
|
mode, prev_size, size_bytes, prev_limit, limit_bytes);
|
||||||
|
|
||||||
eASSERT(env, limit_bytes >= size_bytes);
|
eASSERT(env, limit_bytes >= size_bytes);
|
||||||
eASSERT(env, bytes2pgno(env, size_bytes) >= size_pgno);
|
eASSERT(env, bytes2pgno(env, size_bytes) >= size_pgno);
|
||||||
|
@ -6033,20 +6055,18 @@ __cold static int map_resize(MDBX_env *env, const pgno_t used_pgno,
|
||||||
|
|
||||||
unsigned mresize_flags =
|
unsigned mresize_flags =
|
||||||
env->me_flags & (MDBX_RDONLY | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC);
|
env->me_flags & (MDBX_RDONLY | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC);
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
if (mode >= impilict_shrink)
|
||||||
/* Acquire guard in exclusive mode for:
|
mresize_flags |= MDBX_SHRINK_ALLOWED;
|
||||||
* - to avoid collision between read and write txns around env->me_dbgeo;
|
|
||||||
* - to avoid attachment of new reading threads (see osal_rdt_lock); */
|
|
||||||
osal_srwlock_AcquireExclusive(&env->me_remap_guard);
|
|
||||||
mdbx_handle_array_t *suspended = NULL;
|
|
||||||
mdbx_handle_array_t array_onstack;
|
|
||||||
int rc = MDBX_SUCCESS;
|
|
||||||
if (limit_bytes == env->me_dxb_mmap.limit &&
|
if (limit_bytes == env->me_dxb_mmap.limit &&
|
||||||
size_bytes == env->me_dxb_mmap.current &&
|
size_bytes == env->me_dxb_mmap.current &&
|
||||||
size_bytes == env->me_dxb_mmap.filesize)
|
size_bytes == env->me_dxb_mmap.filesize)
|
||||||
goto bailout;
|
goto bailout;
|
||||||
|
|
||||||
if ((env->me_flags & MDBX_NOTLS) == 0) {
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
if ((env->me_flags & MDBX_NOTLS) == 0 &&
|
||||||
|
((size_bytes < env->me_dxb_mmap.current && mode > implicit_grow) ||
|
||||||
|
limit_bytes != env->me_dxb_mmap.limit)) {
|
||||||
/* 1) Windows allows only extending a read-write section, but not a
|
/* 1) Windows allows only extending a read-write section, but not a
|
||||||
* corresponding mapped view. Therefore in other cases we must suspend
|
* corresponding mapped view. Therefore in other cases we must suspend
|
||||||
* the local threads for safe remap.
|
* the local threads for safe remap.
|
||||||
|
@ -6064,65 +6084,61 @@ __cold static int map_resize(MDBX_env *env, const pgno_t used_pgno,
|
||||||
ERROR("failed suspend-for-remap: errcode %d", rc);
|
ERROR("failed suspend-for-remap: errcode %d", rc);
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
mresize_flags |= implicit ? MDBX_MRESIZE_MAY_UNMAP
|
mresize_flags |= (mode < explicit_resize)
|
||||||
: MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE;
|
? MDBX_MRESIZE_MAY_UNMAP
|
||||||
|
: MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE;
|
||||||
}
|
}
|
||||||
#else /* Windows */
|
#else /* Windows */
|
||||||
/* Acquire guard to avoid collision between read and write txns
|
|
||||||
* around env->me_dbgeo */
|
|
||||||
int rc = osal_fastmutex_acquire(&env->me_remap_guard);
|
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
|
||||||
return rc;
|
|
||||||
if (limit_bytes == env->me_dxb_mmap.limit &&
|
|
||||||
size_bytes == env->me_dxb_mmap.current)
|
|
||||||
goto bailout;
|
|
||||||
|
|
||||||
MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
|
MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
|
||||||
if (limit_bytes != env->me_dxb_mmap.limit && !(env->me_flags & MDBX_NOTLS) &&
|
if (mode == explicit_resize && limit_bytes != env->me_dxb_mmap.limit &&
|
||||||
lck && !implicit) {
|
!(env->me_flags & MDBX_NOTLS)) {
|
||||||
int err = osal_rdt_lock(env) /* lock readers table until remap done */;
|
|
||||||
if (unlikely(MDBX_IS_ERROR(err))) {
|
|
||||||
rc = err;
|
|
||||||
goto bailout;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* looking for readers from this process */
|
|
||||||
const size_t snap_nreaders =
|
|
||||||
atomic_load32(&lck->mti_numreaders, mo_AcquireRelease);
|
|
||||||
eASSERT(env, !implicit);
|
|
||||||
mresize_flags |= MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE;
|
mresize_flags |= MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE;
|
||||||
for (size_t i = 0; i < snap_nreaders; ++i) {
|
if (lck) {
|
||||||
if (lck->mti_readers[i].mr_pid.weak == env->me_pid &&
|
int err = osal_rdt_lock(env) /* lock readers table until remap done */;
|
||||||
lck->mti_readers[i].mr_tid.weak != osal_thread_self()) {
|
if (unlikely(MDBX_IS_ERROR(err))) {
|
||||||
/* the base address of the mapping can't be changed since
|
rc = err;
|
||||||
* the other reader thread from this process exists. */
|
goto bailout;
|
||||||
osal_rdt_unlock(env);
|
}
|
||||||
mresize_flags &= ~(MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE);
|
|
||||||
break;
|
/* looking for readers from this process */
|
||||||
|
const size_t snap_nreaders =
|
||||||
|
atomic_load32(&lck->mti_numreaders, mo_AcquireRelease);
|
||||||
|
eASSERT(env, mode == explicit_resize);
|
||||||
|
for (size_t i = 0; i < snap_nreaders; ++i) {
|
||||||
|
if (lck->mti_readers[i].mr_pid.weak == env->me_pid &&
|
||||||
|
lck->mti_readers[i].mr_tid.weak != osal_thread_self()) {
|
||||||
|
/* the base address of the mapping can't be changed since
|
||||||
|
* the other reader thread from this process exists. */
|
||||||
|
osal_rdt_unlock(env);
|
||||||
|
mresize_flags &= ~(MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* ! Windows */
|
#endif /* ! Windows */
|
||||||
|
|
||||||
if ((env->me_flags & MDBX_WRITEMAP) && env->me_lck->mti_unsynced_pages.weak) {
|
|
||||||
#if MDBX_ENABLE_PGOP_STAT
|
|
||||||
env->me_lck->mti_pgop_stat.msync.weak += 1;
|
|
||||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
|
||||||
rc = osal_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno),
|
|
||||||
MDBX_SYNC_NONE);
|
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
|
||||||
goto bailout;
|
|
||||||
}
|
|
||||||
|
|
||||||
const pgno_t aligned_munlock_pgno =
|
const pgno_t aligned_munlock_pgno =
|
||||||
(mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE))
|
(mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE))
|
||||||
? 0
|
? 0
|
||||||
: bytes2pgno(env, size_bytes);
|
: bytes2pgno(env, size_bytes);
|
||||||
|
if (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) {
|
||||||
|
mincore_clean_cache(env);
|
||||||
|
if ((env->me_flags & MDBX_WRITEMAP) &&
|
||||||
|
env->me_lck->mti_unsynced_pages.weak) {
|
||||||
|
#if MDBX_ENABLE_PGOP_STAT
|
||||||
|
env->me_lck->mti_pgop_stat.msync.weak += 1;
|
||||||
|
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||||
|
rc = osal_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno),
|
||||||
|
MDBX_SYNC_NONE);
|
||||||
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
|
goto bailout;
|
||||||
|
}
|
||||||
|
}
|
||||||
munlock_after(env, aligned_munlock_pgno, size_bytes);
|
munlock_after(env, aligned_munlock_pgno, size_bytes);
|
||||||
mincore_clean_cache(env);
|
|
||||||
|
|
||||||
#if MDBX_ENABLE_MADVISE
|
#if MDBX_ENABLE_MADVISE
|
||||||
if (size_bytes < prev_size) {
|
if (size_bytes < prev_size && mode > implicit_grow) {
|
||||||
NOTICE("resize-MADV_%s %u..%u",
|
NOTICE("resize-MADV_%s %u..%u",
|
||||||
(env->me_flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", size_pgno,
|
(env->me_flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", size_pgno,
|
||||||
bytes2pgno(env, prev_size));
|
bytes2pgno(env, prev_size));
|
||||||
|
@ -6181,7 +6197,10 @@ __cold static int map_resize(MDBX_env *env, const pgno_t used_pgno,
|
||||||
if (rc == MDBX_SUCCESS) {
|
if (rc == MDBX_SUCCESS) {
|
||||||
eASSERT(env, limit_bytes == env->me_dxb_mmap.limit);
|
eASSERT(env, limit_bytes == env->me_dxb_mmap.limit);
|
||||||
eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize);
|
eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize);
|
||||||
eASSERT(env, size_bytes == env->me_dxb_mmap.current);
|
if (mode == explicit_resize)
|
||||||
|
eASSERT(env, size_bytes == env->me_dxb_mmap.current);
|
||||||
|
else
|
||||||
|
eASSERT(env, size_bytes <= env->me_dxb_mmap.current);
|
||||||
env->me_lck->mti_discarded_tail.weak = size_pgno;
|
env->me_lck->mti_discarded_tail.weak = size_pgno;
|
||||||
const bool readahead =
|
const bool readahead =
|
||||||
!(env->me_flags & MDBX_NORDAHEAD) &&
|
!(env->me_flags & MDBX_NORDAHEAD) &&
|
||||||
|
@ -6200,7 +6219,10 @@ bailout:
|
||||||
if (rc == MDBX_SUCCESS) {
|
if (rc == MDBX_SUCCESS) {
|
||||||
eASSERT(env, limit_bytes == env->me_dxb_mmap.limit);
|
eASSERT(env, limit_bytes == env->me_dxb_mmap.limit);
|
||||||
eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize);
|
eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize);
|
||||||
eASSERT(env, size_bytes == env->me_dxb_mmap.current);
|
if (mode == explicit_resize)
|
||||||
|
eASSERT(env, size_bytes == env->me_dxb_mmap.current);
|
||||||
|
else
|
||||||
|
eASSERT(env, size_bytes <= env->me_dxb_mmap.current);
|
||||||
/* update env-geo to avoid influences */
|
/* update env-geo to avoid influences */
|
||||||
env->me_dbgeo.now = env->me_dxb_mmap.current;
|
env->me_dbgeo.now = env->me_dxb_mmap.current;
|
||||||
env->me_dbgeo.upper = env->me_dxb_mmap.limit;
|
env->me_dbgeo.upper = env->me_dxb_mmap.limit;
|
||||||
|
@ -6255,21 +6277,6 @@ bailout:
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
__cold static int map_resize_implicit(MDBX_env *env, const pgno_t used_pgno,
|
|
||||||
const pgno_t size_pgno,
|
|
||||||
const pgno_t limit_pgno) {
|
|
||||||
const pgno_t mapped_pgno = bytes2pgno(env, env->me_dxb_mmap.limit);
|
|
||||||
eASSERT(env, mapped_pgno >= used_pgno);
|
|
||||||
return map_resize(
|
|
||||||
env, used_pgno, size_pgno,
|
|
||||||
(size_pgno > mapped_pgno)
|
|
||||||
? limit_pgno
|
|
||||||
: /* The actual mapsize may be less since the geo.upper may be changed
|
|
||||||
by other process. So, avoids remapping until it necessary. */
|
|
||||||
mapped_pgno,
|
|
||||||
true);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int meta_unsteady(int err, MDBX_env *env, const txnid_t early_than,
|
static int meta_unsteady(int err, MDBX_env *env, const txnid_t early_than,
|
||||||
const pgno_t pgno) {
|
const pgno_t pgno) {
|
||||||
MDBX_meta *const meta = METAPAGE(env, pgno);
|
MDBX_meta *const meta = METAPAGE(env, pgno);
|
||||||
|
@ -7649,8 +7656,8 @@ no_gc:
|
||||||
|
|
||||||
VERBOSE("try growth datafile to %zu pages (+%zu)", aligned,
|
VERBOSE("try growth datafile to %zu pages (+%zu)", aligned,
|
||||||
aligned - txn->mt_end_pgno);
|
aligned - txn->mt_end_pgno);
|
||||||
ret.err = map_resize_implicit(env, txn->mt_next_pgno, (pgno_t)aligned,
|
ret.err = dxb_resize(env, txn->mt_next_pgno, (pgno_t)aligned,
|
||||||
txn->mt_geo.upper);
|
txn->mt_geo.upper, implicit_grow);
|
||||||
if (ret.err != MDBX_SUCCESS) {
|
if (ret.err != MDBX_SUCCESS) {
|
||||||
ERROR("unable growth datafile to %zu pages (+%zu), errcode %d", aligned,
|
ERROR("unable growth datafile to %zu pages (+%zu), errcode %d", aligned,
|
||||||
aligned - txn->mt_end_pgno, ret.err);
|
aligned - txn->mt_end_pgno, ret.err);
|
||||||
|
@ -8095,8 +8102,8 @@ retry:;
|
||||||
if (!inside_txn && locked && (env->me_flags & MDBX_WRITEMAP) &&
|
if (!inside_txn && locked && (env->me_flags & MDBX_WRITEMAP) &&
|
||||||
unlikely(head.ptr_c->mm_geo.next >
|
unlikely(head.ptr_c->mm_geo.next >
|
||||||
bytes2pgno(env, env->me_dxb_mmap.current))) {
|
bytes2pgno(env, env->me_dxb_mmap.current))) {
|
||||||
rc = map_resize_implicit(env, head.ptr_c->mm_geo.next,
|
rc = dxb_resize(env, head.ptr_c->mm_geo.next, head.ptr_c->mm_geo.now,
|
||||||
head.ptr_c->mm_geo.now, head.ptr_c->mm_geo.upper);
|
head.ptr_c->mm_geo.upper, implicit_grow);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
|
@ -8974,6 +8981,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
|
||||||
txn->mt_dbistate[MAIN_DBI] = DBI_VALID | DBI_USRVALID;
|
txn->mt_dbistate[MAIN_DBI] = DBI_VALID | DBI_USRVALID;
|
||||||
rc =
|
rc =
|
||||||
setup_dbx(&txn->mt_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize);
|
setup_dbx(&txn->mt_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize);
|
||||||
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
|
goto bailout;
|
||||||
txn->mt_dbistate[FREE_DBI] = DBI_VALID;
|
txn->mt_dbistate[FREE_DBI] = DBI_VALID;
|
||||||
txn->mt_front =
|
txn->mt_front =
|
||||||
txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0);
|
txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0);
|
||||||
|
@ -8982,34 +8991,80 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
|
||||||
WARNING("%s", "environment had fatal error, must shutdown!");
|
WARNING("%s", "environment had fatal error, must shutdown!");
|
||||||
rc = MDBX_PANIC;
|
rc = MDBX_PANIC;
|
||||||
} else {
|
} else {
|
||||||
const size_t size =
|
const size_t size_bytes = pgno2bytes(env, txn->mt_end_pgno);
|
||||||
pgno2bytes(env, (txn->mt_flags & MDBX_TXN_RDONLY) ? txn->mt_next_pgno
|
const size_t used_bytes = pgno2bytes(env, txn->mt_next_pgno);
|
||||||
: txn->mt_end_pgno);
|
const size_t required_bytes =
|
||||||
if (unlikely(size > env->me_dxb_mmap.limit)) {
|
(txn->mt_flags & MDBX_TXN_RDONLY) ? used_bytes : size_bytes;
|
||||||
|
if (unlikely(required_bytes > env->me_dxb_mmap.current)) {
|
||||||
|
/* Размер БД (для пишущих транзакций) или используемых данных (для
|
||||||
|
* читающих транзакций) больше предыдущего/текущего размера внутри
|
||||||
|
* процесса, увеличиваем. Сюда также попадает случай увеличения верхней
|
||||||
|
* границы размера БД и отображения. В читающих транзакциях нельзя
|
||||||
|
* изменять размер файла, который может быть больше необходимого этой
|
||||||
|
* транзакции. */
|
||||||
if (txn->mt_geo.upper > MAX_PAGENO + 1 ||
|
if (txn->mt_geo.upper > MAX_PAGENO + 1 ||
|
||||||
bytes2pgno(env, pgno2bytes(env, txn->mt_geo.upper)) !=
|
bytes2pgno(env, pgno2bytes(env, txn->mt_geo.upper)) !=
|
||||||
txn->mt_geo.upper) {
|
txn->mt_geo.upper) {
|
||||||
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
|
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
rc = map_resize(env, txn->mt_next_pgno, txn->mt_end_pgno,
|
rc = dxb_resize(env, txn->mt_next_pgno, txn->mt_end_pgno,
|
||||||
txn->mt_geo.upper,
|
txn->mt_geo.upper, implicit_grow);
|
||||||
(txn->mt_flags & MDBX_TXN_RDONLY) ? true : false);
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
if (rc != MDBX_SUCCESS)
|
goto bailout;
|
||||||
|
} else if (unlikely(size_bytes < env->me_dxb_mmap.current)) {
|
||||||
|
/* Размер БД меньше предыдущего/текущего размера внутри процесса, можно
|
||||||
|
* уменьшить, но всё сложнее:
|
||||||
|
* - размер файла согласован со всеми читаемыми снимками на момент
|
||||||
|
* коммита последней транзакции;
|
||||||
|
* - в читающей транзакции размер файла может быть больше и него нельзя
|
||||||
|
* изменять, в том числе менять madvise (меньша размера файла нельзя,
|
||||||
|
* а за размером нет смысла).
|
||||||
|
* - в пишущей транзакции уменьшать размер файла можно только после
|
||||||
|
* проверки размера читаемых снимков, но в этом нет смысла, так как
|
||||||
|
* это будет сделано при фиксации транзакции.
|
||||||
|
*
|
||||||
|
* В сухом остатке, можно только установить dxb_mmap.current равным
|
||||||
|
* размеру файла, а это проще сделать без вызова dxb_resize() и усложения
|
||||||
|
* внутренней логики.
|
||||||
|
*
|
||||||
|
* В этой тактике есть недостаток: если пишущите транзакции не регулярны,
|
||||||
|
* и при завершении такой транзакции файл БД остаётся не-уменьшеным из-за
|
||||||
|
* читающих транзакций использующих предыдущие снимки. */
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
osal_srwlock_AcquireShared(&env->me_remap_guard);
|
||||||
|
#else
|
||||||
|
rc = osal_fastmutex_acquire(&env->me_remap_guard);
|
||||||
|
#endif
|
||||||
|
if (likely(rc == MDBX_SUCCESS)) {
|
||||||
|
rc = osal_filesize(env->me_dxb_mmap.fd, &env->me_dxb_mmap.filesize);
|
||||||
|
if (likely(rc == MDBX_SUCCESS)) {
|
||||||
|
eASSERT(env, env->me_dxb_mmap.filesize >= required_bytes);
|
||||||
|
if (env->me_dxb_mmap.current > env->me_dxb_mmap.filesize)
|
||||||
|
env->me_dxb_mmap.current = (size_t)env->me_dxb_mmap.filesize;
|
||||||
|
}
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
osal_srwlock_ReleaseShared(&env->me_remap_guard);
|
||||||
|
#else
|
||||||
|
int err = osal_fastmutex_release(&env->me_remap_guard);
|
||||||
|
if (unlikely(err) && likely(rc == MDBX_SUCCESS))
|
||||||
|
rc = err;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
} else {
|
|
||||||
env->me_dxb_mmap.current = size;
|
|
||||||
env->me_dxb_mmap.filesize =
|
|
||||||
(env->me_dxb_mmap.filesize < size) ? size : env->me_dxb_mmap.filesize;
|
|
||||||
}
|
}
|
||||||
|
eASSERT(env,
|
||||||
|
pgno2bytes(env, txn->mt_next_pgno) <= env->me_dxb_mmap.current);
|
||||||
|
eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current);
|
||||||
if (txn->mt_flags & MDBX_TXN_RDONLY) {
|
if (txn->mt_flags & MDBX_TXN_RDONLY) {
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
if (((size > env->me_dbgeo.lower && env->me_dbgeo.shrink) ||
|
if (((used_bytes > env->me_dbgeo.lower && env->me_dbgeo.shrink) ||
|
||||||
(mdbx_RunningUnderWine() &&
|
(mdbx_RunningUnderWine() &&
|
||||||
/* under Wine acquisition of remap_guard is always required,
|
/* under Wine acquisition of remap_guard is always required,
|
||||||
* since Wine don't support section extending,
|
* since Wine don't support section extending,
|
||||||
* i.e. in both cases unmap+map are required. */
|
* i.e. in both cases unmap+map are required. */
|
||||||
size < env->me_dbgeo.upper && env->me_dbgeo.grow)) &&
|
used_bytes < env->me_dbgeo.upper && env->me_dbgeo.grow)) &&
|
||||||
/* avoid recursive use SRW */ (txn->mt_flags & MDBX_NOTLS) == 0) {
|
/* avoid recursive use SRW */ (txn->mt_flags & MDBX_NOTLS) == 0) {
|
||||||
txn->mt_flags |= MDBX_SHRINK_ALLOWED;
|
txn->mt_flags |= MDBX_SHRINK_ALLOWED;
|
||||||
osal_srwlock_AcquireShared(&env->me_remap_guard);
|
osal_srwlock_AcquireShared(&env->me_remap_guard);
|
||||||
|
@ -9799,8 +9854,8 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) {
|
||||||
if (parent->mt_geo.upper != txn->mt_geo.upper ||
|
if (parent->mt_geo.upper != txn->mt_geo.upper ||
|
||||||
parent->mt_geo.now != txn->mt_geo.now) {
|
parent->mt_geo.now != txn->mt_geo.now) {
|
||||||
/* undo resize performed by child txn */
|
/* undo resize performed by child txn */
|
||||||
rc = map_resize_implicit(env, parent->mt_next_pgno, parent->mt_geo.now,
|
rc = dxb_resize(env, parent->mt_next_pgno, parent->mt_geo.now,
|
||||||
parent->mt_geo.upper);
|
parent->mt_geo.upper, impilict_shrink);
|
||||||
if (rc == MDBX_EPERM) {
|
if (rc == MDBX_EPERM) {
|
||||||
/* unable undo resize (it is regular for Windows),
|
/* unable undo resize (it is regular for Windows),
|
||||||
* therefore promote size changes from child to the parent txn */
|
* therefore promote size changes from child to the parent txn */
|
||||||
|
@ -12859,8 +12914,8 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
|
||||||
if (unlikely(shrink)) {
|
if (unlikely(shrink)) {
|
||||||
VERBOSE("shrink to %" PRIaPGNO " pages (-%" PRIaPGNO ")",
|
VERBOSE("shrink to %" PRIaPGNO " pages (-%" PRIaPGNO ")",
|
||||||
pending->mm_geo.now, shrink);
|
pending->mm_geo.now, shrink);
|
||||||
rc = map_resize_implicit(env, pending->mm_geo.next, pending->mm_geo.now,
|
rc = dxb_resize(env, pending->mm_geo.next, pending->mm_geo.now,
|
||||||
pending->mm_geo.upper);
|
pending->mm_geo.upper, impilict_shrink);
|
||||||
if (rc != MDBX_SUCCESS && rc != MDBX_EPERM)
|
if (rc != MDBX_SUCCESS && rc != MDBX_EPERM)
|
||||||
goto fail;
|
goto fail;
|
||||||
eASSERT(env, coherency_check_meta(env, target, true));
|
eASSERT(env, coherency_check_meta(env, target, true));
|
||||||
|
@ -13453,8 +13508,8 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
|
||||||
|
|
||||||
if (new_geo.now != current_geo->now ||
|
if (new_geo.now != current_geo->now ||
|
||||||
new_geo.upper != current_geo->upper) {
|
new_geo.upper != current_geo->upper) {
|
||||||
rc = map_resize(env, current_geo->next, new_geo.now, new_geo.upper,
|
rc = dxb_resize(env, current_geo->next, new_geo.now, new_geo.upper,
|
||||||
false);
|
explicit_resize);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
|
|
|
@ -752,7 +752,7 @@ static void WINAPI stub_srwlock_AcquireShared(osal_srwlock_t *srwl) {
|
||||||
// If there's a writer already, spin without unnecessarily
|
// If there's a writer already, spin without unnecessarily
|
||||||
// interlocking the CPUs
|
// interlocking the CPUs
|
||||||
if (srwl->writerCount != 0) {
|
if (srwl->writerCount != 0) {
|
||||||
YieldProcessor();
|
SwitchToThread();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -766,7 +766,7 @@ static void WINAPI stub_srwlock_AcquireShared(osal_srwlock_t *srwl) {
|
||||||
|
|
||||||
// Remove from the readers list, spin, try again
|
// Remove from the readers list, spin, try again
|
||||||
_InterlockedDecrement(&srwl->readerCount);
|
_InterlockedDecrement(&srwl->readerCount);
|
||||||
YieldProcessor();
|
SwitchToThread();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -782,7 +782,7 @@ static void WINAPI stub_srwlock_AcquireExclusive(osal_srwlock_t *srwl) {
|
||||||
// If there's a writer already, spin without unnecessarily
|
// If there's a writer already, spin without unnecessarily
|
||||||
// interlocking the CPUs
|
// interlocking the CPUs
|
||||||
if (srwl->writerCount != 0) {
|
if (srwl->writerCount != 0) {
|
||||||
YieldProcessor();
|
SwitchToThread();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -797,7 +797,7 @@ static void WINAPI stub_srwlock_AcquireExclusive(osal_srwlock_t *srwl) {
|
||||||
// that we're the writer.
|
// that we're the writer.
|
||||||
while (srwl->readerCount != 0) {
|
while (srwl->readerCount != 0) {
|
||||||
assert(srwl->writerCount >= 0 && srwl->readerCount >= 0);
|
assert(srwl->writerCount >= 0 && srwl->readerCount >= 0);
|
||||||
YieldProcessor();
|
SwitchToThread();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
73
src/osal.c
73
src/osal.c
|
@ -2181,6 +2181,7 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map, size_t size,
|
||||||
|
|
||||||
if ((flags & MDBX_RDONLY) == 0 && (options & MMAP_OPTION_TRUNCATE) != 0) {
|
if ((flags & MDBX_RDONLY) == 0 && (options & MMAP_OPTION_TRUNCATE) != 0) {
|
||||||
err = osal_ftruncate(map->fd, size);
|
err = osal_ftruncate(map->fd, size);
|
||||||
|
VERBOSE("ftruncate %zu, err %d", size, err);
|
||||||
if (err != MDBX_SUCCESS)
|
if (err != MDBX_SUCCESS)
|
||||||
return err;
|
return err;
|
||||||
map->filesize = size;
|
map->filesize = size;
|
||||||
|
@ -2189,6 +2190,7 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map, size_t size,
|
||||||
#endif /* !Windows */
|
#endif /* !Windows */
|
||||||
} else {
|
} else {
|
||||||
err = osal_filesize(map->fd, &map->filesize);
|
err = osal_filesize(map->fd, &map->filesize);
|
||||||
|
VERBOSE("filesize %" PRIu64 ", err %d", map->filesize, err);
|
||||||
if (err != MDBX_SUCCESS)
|
if (err != MDBX_SUCCESS)
|
||||||
return err;
|
return err;
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
@ -2306,8 +2308,7 @@ MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) {
|
||||||
VALGRIND_MAKE_MEM_NOACCESS(map->base, map->current);
|
VALGRIND_MAKE_MEM_NOACCESS(map->base, map->current);
|
||||||
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
||||||
* when this memory will re-used by malloc or another mmapping.
|
* when this memory will re-used by malloc or another mmapping.
|
||||||
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
|
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 */
|
||||||
*/
|
|
||||||
MDBX_ASAN_UNPOISON_MEMORY_REGION(
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(
|
||||||
map->base, (map->filesize && map->filesize < map->limit) ? map->filesize
|
map->base, (map->filesize && map->filesize < map->limit) ? map->filesize
|
||||||
: map->limit);
|
: map->limit);
|
||||||
|
@ -2332,25 +2333,38 @@ MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) {
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
|
MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
|
||||||
size_t size, size_t limit) {
|
size_t size, size_t limit) {
|
||||||
|
int rc = osal_filesize(map->fd, &map->filesize);
|
||||||
|
VERBOSE("flags 0x%x, size %zu, limit %zu, filesize %" PRIu64, flags, size,
|
||||||
|
limit, map->filesize);
|
||||||
assert(size <= limit);
|
assert(size <= limit);
|
||||||
|
if (rc != MDBX_SUCCESS) {
|
||||||
|
map->filesize = 0;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
assert(size != map->current || limit != map->limit || size < map->filesize);
|
assert(size != map->current || limit != map->limit || size < map->filesize);
|
||||||
|
|
||||||
NTSTATUS status;
|
NTSTATUS status;
|
||||||
LARGE_INTEGER SectionSize;
|
LARGE_INTEGER SectionSize;
|
||||||
int err, rc = MDBX_SUCCESS;
|
int err;
|
||||||
|
|
||||||
if (!(flags & MDBX_RDONLY) && limit == map->limit && size > map->current &&
|
if (limit == map->limit && size > map->current) {
|
||||||
/* workaround for Wine */ mdbx_NtExtendSection) {
|
if ((flags & MDBX_RDONLY) && map->filesize >= size) {
|
||||||
/* growth rw-section */
|
map->current = size;
|
||||||
SectionSize.QuadPart = size;
|
return MDBX_SUCCESS;
|
||||||
status = mdbx_NtExtendSection(map->section, &SectionSize);
|
} else if (!(flags & MDBX_RDONLY) &&
|
||||||
if (!NT_SUCCESS(status))
|
/* workaround for Wine */ mdbx_NtExtendSection) {
|
||||||
return ntstatus2errcode(status);
|
/* growth rw-section */
|
||||||
map->current = size;
|
SectionSize.QuadPart = size;
|
||||||
if (map->filesize < size)
|
status = mdbx_NtExtendSection(map->section, &SectionSize);
|
||||||
map->filesize = size;
|
if (!NT_SUCCESS(status))
|
||||||
return MDBX_SUCCESS;
|
return ntstatus2errcode(status);
|
||||||
|
map->current = size;
|
||||||
|
if (map->filesize < size)
|
||||||
|
map->filesize = size;
|
||||||
|
return MDBX_SUCCESS;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (limit > map->limit) {
|
if (limit > map->limit) {
|
||||||
|
@ -2379,13 +2393,15 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
|
||||||
* - change size of mapped view;
|
* - change size of mapped view;
|
||||||
* - extend read-only mapping;
|
* - extend read-only mapping;
|
||||||
* Therefore we should unmap/map entire section. */
|
* Therefore we should unmap/map entire section. */
|
||||||
if ((flags & MDBX_MRESIZE_MAY_UNMAP) == 0)
|
if ((flags & MDBX_MRESIZE_MAY_UNMAP) == 0) {
|
||||||
|
if (size <= map->current && limit == map->limit)
|
||||||
|
return MDBX_SUCCESS;
|
||||||
return MDBX_EPERM;
|
return MDBX_EPERM;
|
||||||
|
}
|
||||||
|
|
||||||
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
||||||
* when this memory will re-used by malloc or another mmapping.
|
* when this memory will re-used by malloc or another mmapping.
|
||||||
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
|
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 */
|
||||||
*/
|
|
||||||
MDBX_ASAN_UNPOISON_MEMORY_REGION(map->base, map->limit);
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(map->base, map->limit);
|
||||||
status = NtUnmapViewOfSection(GetCurrentProcess(), map->base);
|
status = NtUnmapViewOfSection(GetCurrentProcess(), map->base);
|
||||||
if (!NT_SUCCESS(status))
|
if (!NT_SUCCESS(status))
|
||||||
|
@ -2398,7 +2414,6 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
|
||||||
if (!NT_SUCCESS(status)) {
|
if (!NT_SUCCESS(status)) {
|
||||||
bailout_ntstatus:
|
bailout_ntstatus:
|
||||||
err = ntstatus2errcode(status);
|
err = ntstatus2errcode(status);
|
||||||
bailout:
|
|
||||||
map->base = NULL;
|
map->base = NULL;
|
||||||
map->current = map->limit = 0;
|
map->current = map->limit = 0;
|
||||||
if (ReservedAddress) {
|
if (ReservedAddress) {
|
||||||
|
@ -2427,10 +2442,6 @@ retry_file_and_section:
|
||||||
map->base = NULL;
|
map->base = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = osal_filesize(map->fd, &map->filesize);
|
|
||||||
if (err != MDBX_SUCCESS)
|
|
||||||
goto bailout;
|
|
||||||
|
|
||||||
if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) {
|
if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) {
|
||||||
err = osal_ftruncate(map->fd, size);
|
err = osal_ftruncate(map->fd, size);
|
||||||
if (err == MDBX_SUCCESS)
|
if (err == MDBX_SUCCESS)
|
||||||
|
@ -2507,18 +2518,17 @@ retry_mapview:;
|
||||||
|
|
||||||
#else /* Windows */
|
#else /* Windows */
|
||||||
|
|
||||||
map->filesize = 0;
|
|
||||||
int rc = osal_filesize(map->fd, &map->filesize);
|
|
||||||
if (rc != MDBX_SUCCESS)
|
|
||||||
return rc;
|
|
||||||
|
|
||||||
if (flags & MDBX_RDONLY) {
|
if (flags & MDBX_RDONLY) {
|
||||||
|
if (size > map->filesize)
|
||||||
|
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
|
||||||
|
else if (size < map->filesize && map->filesize > limit)
|
||||||
|
rc = MDBX_EPERM;
|
||||||
map->current = (map->filesize > limit) ? limit : (size_t)map->filesize;
|
map->current = (map->filesize > limit) ? limit : (size_t)map->filesize;
|
||||||
if (map->current != size)
|
|
||||||
rc = (size > map->current) ? MDBX_UNABLE_EXTEND_MAPSIZE : MDBX_EPERM;
|
|
||||||
} else {
|
} else {
|
||||||
if (map->filesize != size) {
|
if (size > map->filesize ||
|
||||||
|
(size < map->filesize && (flags & MDBX_SHRINK_ALLOWED))) {
|
||||||
rc = osal_ftruncate(map->fd, size);
|
rc = osal_ftruncate(map->fd, size);
|
||||||
|
VERBOSE("ftruncate %zu, err %d", size, rc);
|
||||||
if (rc != MDBX_SUCCESS)
|
if (rc != MDBX_SUCCESS)
|
||||||
return rc;
|
return rc;
|
||||||
map->filesize = size;
|
map->filesize = size;
|
||||||
|
@ -2713,7 +2723,8 @@ retry_mapview:;
|
||||||
|
|
||||||
assert(rc != MDBX_SUCCESS ||
|
assert(rc != MDBX_SUCCESS ||
|
||||||
(map->base != nullptr && map->base != MAP_FAILED &&
|
(map->base != nullptr && map->base != MAP_FAILED &&
|
||||||
map->current == size && map->limit == limit));
|
map->current == size && map->limit == limit &&
|
||||||
|
map->filesize >= size));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue