mdbx: рефакторинг `dxb_resize()` и связанного кода.

В том числе, для устранения срабатывания assert-проверки
`size_bytes == env->me_dxb_mmap.current` в специфических многопоточных
сценариях использования.

Проверка срабатывала только в отладочных сборках, при специфическом
наложении во времени читающей и пишущей транзакции в разных потоках,
одновременно с изменением размера БД.

Кроме срабатывание проверки, каких-либо других последствий не возникало.
This commit is contained in:
Леонид Юрьев (Leonid Yuriev) 2023-01-15 20:35:09 +03:00
parent 0979a93a78
commit a484a1f89b
4 changed files with 209 additions and 135 deletions

View File

@ -10,6 +10,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic
- Max <maxc0d3r@protonmail.com> за сообщение о проблеме ERROR_SHARING_VIOLATION - Max <maxc0d3r@protonmail.com> за сообщение о проблеме ERROR_SHARING_VIOLATION
в режиме MDBX_EXCLUSIVE на Windows. в режиме MDBX_EXCLUSIVE на Windows.
- Alisher Ashyrov https://t.me/a1is43ras4 за сообщение о проблеме с assert-проверкой и содействие в отладке.
Исправления (без корректировок новых функций): Исправления (без корректировок новых функций):
@ -21,6 +22,13 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic
предотвращении ошибки ERROR_NOT_ENOUGH_MEMORY в Windows, которая совсем предотвращении ошибки ERROR_NOT_ENOUGH_MEMORY в Windows, которая совсем
не информативна для пользователя и возникает в этом случае. не информативна для пользователя и возникает в этом случае.
- Рефакторинг `dxb_resize()`. В том числе, для устранения срабатывания
assert-проверки `size_bytes == env->me_dxb_mmap.current` в специфических
многопоточных сценариях использования. Проверка срабатывала только в
отладочных сборках, при специфическом наложении во времени читающей и
пишущей транзакции в разных потоках, одновременно с изменением размера БД.
Кроме срабатывание проверки, каких-либо других последствий не возникало.
Ликвидация технических долгов и мелочи: Ликвидация технических долгов и мелочи:
- Исправление опечаток. - Исправление опечаток.

View File

@ -6011,21 +6011,43 @@ static void adjust_defaults(MDBX_env *env) {
bytes2pgno(env, bytes_align2os_bytes(env, threshold)); bytes2pgno(env, bytes_align2os_bytes(env, threshold));
} }
__cold static int map_resize(MDBX_env *env, const pgno_t used_pgno, enum resize_mode { implicit_grow, impilict_shrink, explicit_resize };
const pgno_t size_pgno, const pgno_t limit_pgno,
const bool implicit) { __cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno,
const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno); const pgno_t size_pgno, pgno_t limit_pgno,
const size_t size_bytes = pgno_align2os_bytes(env, size_pgno); const enum resize_mode mode) {
/* Acquire guard to avoid collision between read and write txns
* around me_dbgeo and me_dxb_mmap */
#if defined(_WIN32) || defined(_WIN64)
osal_srwlock_AcquireExclusive(&env->me_remap_guard);
int rc = MDBX_SUCCESS;
mdbx_handle_array_t *suspended = NULL;
mdbx_handle_array_t array_onstack;
#else
int rc = osal_fastmutex_acquire(&env->me_remap_guard);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
#endif
const size_t prev_size = env->me_dxb_mmap.current; const size_t prev_size = env->me_dxb_mmap.current;
const size_t prev_limit = env->me_dxb_mmap.limit; const size_t prev_limit = env->me_dxb_mmap.limit;
const pgno_t prev_limit_pgno = bytes2pgno(env, prev_limit);
eASSERT(env, prev_limit_pgno >= used_pgno);
if (mode < explicit_resize && size_pgno <= prev_limit_pgno) {
/* The actual mapsize may be less since the geo.upper may be changed
* by other process. Avoids remapping until it necessary. */
limit_pgno = prev_limit_pgno;
}
const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno);
const size_t size_bytes = pgno_align2os_bytes(env, size_pgno);
#if MDBX_ENABLE_MADVISE || defined(MDBX_USE_VALGRIND) #if MDBX_ENABLE_MADVISE || defined(MDBX_USE_VALGRIND)
const void *const prev_map = env->me_dxb_mmap.base; const void *const prev_map = env->me_dxb_mmap.base;
#endif /* MDBX_ENABLE_MADVISE || MDBX_USE_VALGRIND */ #endif /* MDBX_ENABLE_MADVISE || MDBX_USE_VALGRIND */
VERBOSE("resize datafile/mapping: " VERBOSE("resize/%d datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", " "present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR, "limit %" PRIuPTR " -> %" PRIuPTR,
prev_size, size_bytes, prev_limit, limit_bytes); mode, prev_size, size_bytes, prev_limit, limit_bytes);
eASSERT(env, limit_bytes >= size_bytes); eASSERT(env, limit_bytes >= size_bytes);
eASSERT(env, bytes2pgno(env, size_bytes) >= size_pgno); eASSERT(env, bytes2pgno(env, size_bytes) >= size_pgno);
@ -6033,20 +6055,18 @@ __cold static int map_resize(MDBX_env *env, const pgno_t used_pgno,
unsigned mresize_flags = unsigned mresize_flags =
env->me_flags & (MDBX_RDONLY | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC); env->me_flags & (MDBX_RDONLY | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC);
#if defined(_WIN32) || defined(_WIN64) if (mode >= impilict_shrink)
/* Acquire guard in exclusive mode for: mresize_flags |= MDBX_SHRINK_ALLOWED;
* - to avoid collision between read and write txns around env->me_dbgeo;
* - to avoid attachment of new reading threads (see osal_rdt_lock); */
osal_srwlock_AcquireExclusive(&env->me_remap_guard);
mdbx_handle_array_t *suspended = NULL;
mdbx_handle_array_t array_onstack;
int rc = MDBX_SUCCESS;
if (limit_bytes == env->me_dxb_mmap.limit && if (limit_bytes == env->me_dxb_mmap.limit &&
size_bytes == env->me_dxb_mmap.current && size_bytes == env->me_dxb_mmap.current &&
size_bytes == env->me_dxb_mmap.filesize) size_bytes == env->me_dxb_mmap.filesize)
goto bailout; goto bailout;
if ((env->me_flags & MDBX_NOTLS) == 0) { #if defined(_WIN32) || defined(_WIN64)
if ((env->me_flags & MDBX_NOTLS) == 0 &&
((size_bytes < env->me_dxb_mmap.current && mode > implicit_grow) ||
limit_bytes != env->me_dxb_mmap.limit)) {
/* 1) Windows allows only extending a read-write section, but not a /* 1) Windows allows only extending a read-write section, but not a
* corresponding mapped view. Therefore in other cases we must suspend * corresponding mapped view. Therefore in other cases we must suspend
* the local threads for safe remap. * the local threads for safe remap.
@ -6064,65 +6084,61 @@ __cold static int map_resize(MDBX_env *env, const pgno_t used_pgno,
ERROR("failed suspend-for-remap: errcode %d", rc); ERROR("failed suspend-for-remap: errcode %d", rc);
goto bailout; goto bailout;
} }
mresize_flags |= implicit ? MDBX_MRESIZE_MAY_UNMAP mresize_flags |= (mode < explicit_resize)
: MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; ? MDBX_MRESIZE_MAY_UNMAP
: MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE;
} }
#else /* Windows */ #else /* Windows */
/* Acquire guard to avoid collision between read and write txns
* around env->me_dbgeo */
int rc = osal_fastmutex_acquire(&env->me_remap_guard);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
if (limit_bytes == env->me_dxb_mmap.limit &&
size_bytes == env->me_dxb_mmap.current)
goto bailout;
MDBX_lockinfo *const lck = env->me_lck_mmap.lck; MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
if (limit_bytes != env->me_dxb_mmap.limit && !(env->me_flags & MDBX_NOTLS) && if (mode == explicit_resize && limit_bytes != env->me_dxb_mmap.limit &&
lck && !implicit) { !(env->me_flags & MDBX_NOTLS)) {
int err = osal_rdt_lock(env) /* lock readers table until remap done */;
if (unlikely(MDBX_IS_ERROR(err))) {
rc = err;
goto bailout;
}
/* looking for readers from this process */
const size_t snap_nreaders =
atomic_load32(&lck->mti_numreaders, mo_AcquireRelease);
eASSERT(env, !implicit);
mresize_flags |= MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE; mresize_flags |= MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE;
for (size_t i = 0; i < snap_nreaders; ++i) { if (lck) {
if (lck->mti_readers[i].mr_pid.weak == env->me_pid && int err = osal_rdt_lock(env) /* lock readers table until remap done */;
lck->mti_readers[i].mr_tid.weak != osal_thread_self()) { if (unlikely(MDBX_IS_ERROR(err))) {
/* the base address of the mapping can't be changed since rc = err;
* the other reader thread from this process exists. */ goto bailout;
osal_rdt_unlock(env); }
mresize_flags &= ~(MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE);
break; /* looking for readers from this process */
const size_t snap_nreaders =
atomic_load32(&lck->mti_numreaders, mo_AcquireRelease);
eASSERT(env, mode == explicit_resize);
for (size_t i = 0; i < snap_nreaders; ++i) {
if (lck->mti_readers[i].mr_pid.weak == env->me_pid &&
lck->mti_readers[i].mr_tid.weak != osal_thread_self()) {
/* the base address of the mapping can't be changed since
* the other reader thread from this process exists. */
osal_rdt_unlock(env);
mresize_flags &= ~(MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE);
break;
}
} }
} }
} }
#endif /* ! Windows */ #endif /* ! Windows */
if ((env->me_flags & MDBX_WRITEMAP) && env->me_lck->mti_unsynced_pages.weak) {
#if MDBX_ENABLE_PGOP_STAT
env->me_lck->mti_pgop_stat.msync.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
rc = osal_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno),
MDBX_SYNC_NONE);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
}
const pgno_t aligned_munlock_pgno = const pgno_t aligned_munlock_pgno =
(mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE))
? 0 ? 0
: bytes2pgno(env, size_bytes); : bytes2pgno(env, size_bytes);
if (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) {
mincore_clean_cache(env);
if ((env->me_flags & MDBX_WRITEMAP) &&
env->me_lck->mti_unsynced_pages.weak) {
#if MDBX_ENABLE_PGOP_STAT
env->me_lck->mti_pgop_stat.msync.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
rc = osal_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno),
MDBX_SYNC_NONE);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
}
}
munlock_after(env, aligned_munlock_pgno, size_bytes); munlock_after(env, aligned_munlock_pgno, size_bytes);
mincore_clean_cache(env);
#if MDBX_ENABLE_MADVISE #if MDBX_ENABLE_MADVISE
if (size_bytes < prev_size) { if (size_bytes < prev_size && mode > implicit_grow) {
NOTICE("resize-MADV_%s %u..%u", NOTICE("resize-MADV_%s %u..%u",
(env->me_flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", size_pgno, (env->me_flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", size_pgno,
bytes2pgno(env, prev_size)); bytes2pgno(env, prev_size));
@ -6181,7 +6197,10 @@ __cold static int map_resize(MDBX_env *env, const pgno_t used_pgno,
if (rc == MDBX_SUCCESS) { if (rc == MDBX_SUCCESS) {
eASSERT(env, limit_bytes == env->me_dxb_mmap.limit); eASSERT(env, limit_bytes == env->me_dxb_mmap.limit);
eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize); eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize);
eASSERT(env, size_bytes == env->me_dxb_mmap.current); if (mode == explicit_resize)
eASSERT(env, size_bytes == env->me_dxb_mmap.current);
else
eASSERT(env, size_bytes <= env->me_dxb_mmap.current);
env->me_lck->mti_discarded_tail.weak = size_pgno; env->me_lck->mti_discarded_tail.weak = size_pgno;
const bool readahead = const bool readahead =
!(env->me_flags & MDBX_NORDAHEAD) && !(env->me_flags & MDBX_NORDAHEAD) &&
@ -6200,7 +6219,10 @@ bailout:
if (rc == MDBX_SUCCESS) { if (rc == MDBX_SUCCESS) {
eASSERT(env, limit_bytes == env->me_dxb_mmap.limit); eASSERT(env, limit_bytes == env->me_dxb_mmap.limit);
eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize); eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize);
eASSERT(env, size_bytes == env->me_dxb_mmap.current); if (mode == explicit_resize)
eASSERT(env, size_bytes == env->me_dxb_mmap.current);
else
eASSERT(env, size_bytes <= env->me_dxb_mmap.current);
/* update env-geo to avoid influences */ /* update env-geo to avoid influences */
env->me_dbgeo.now = env->me_dxb_mmap.current; env->me_dbgeo.now = env->me_dxb_mmap.current;
env->me_dbgeo.upper = env->me_dxb_mmap.limit; env->me_dbgeo.upper = env->me_dxb_mmap.limit;
@ -6255,21 +6277,6 @@ bailout:
return rc; return rc;
} }
__cold static int map_resize_implicit(MDBX_env *env, const pgno_t used_pgno,
const pgno_t size_pgno,
const pgno_t limit_pgno) {
const pgno_t mapped_pgno = bytes2pgno(env, env->me_dxb_mmap.limit);
eASSERT(env, mapped_pgno >= used_pgno);
return map_resize(
env, used_pgno, size_pgno,
(size_pgno > mapped_pgno)
? limit_pgno
: /* The actual mapsize may be less since the geo.upper may be changed
by other process. So, avoids remapping until it necessary. */
mapped_pgno,
true);
}
static int meta_unsteady(int err, MDBX_env *env, const txnid_t early_than, static int meta_unsteady(int err, MDBX_env *env, const txnid_t early_than,
const pgno_t pgno) { const pgno_t pgno) {
MDBX_meta *const meta = METAPAGE(env, pgno); MDBX_meta *const meta = METAPAGE(env, pgno);
@ -7649,8 +7656,8 @@ no_gc:
VERBOSE("try growth datafile to %zu pages (+%zu)", aligned, VERBOSE("try growth datafile to %zu pages (+%zu)", aligned,
aligned - txn->mt_end_pgno); aligned - txn->mt_end_pgno);
ret.err = map_resize_implicit(env, txn->mt_next_pgno, (pgno_t)aligned, ret.err = dxb_resize(env, txn->mt_next_pgno, (pgno_t)aligned,
txn->mt_geo.upper); txn->mt_geo.upper, implicit_grow);
if (ret.err != MDBX_SUCCESS) { if (ret.err != MDBX_SUCCESS) {
ERROR("unable growth datafile to %zu pages (+%zu), errcode %d", aligned, ERROR("unable growth datafile to %zu pages (+%zu), errcode %d", aligned,
aligned - txn->mt_end_pgno, ret.err); aligned - txn->mt_end_pgno, ret.err);
@ -8095,8 +8102,8 @@ retry:;
if (!inside_txn && locked && (env->me_flags & MDBX_WRITEMAP) && if (!inside_txn && locked && (env->me_flags & MDBX_WRITEMAP) &&
unlikely(head.ptr_c->mm_geo.next > unlikely(head.ptr_c->mm_geo.next >
bytes2pgno(env, env->me_dxb_mmap.current))) { bytes2pgno(env, env->me_dxb_mmap.current))) {
rc = map_resize_implicit(env, head.ptr_c->mm_geo.next, rc = dxb_resize(env, head.ptr_c->mm_geo.next, head.ptr_c->mm_geo.now,
head.ptr_c->mm_geo.now, head.ptr_c->mm_geo.upper); head.ptr_c->mm_geo.upper, implicit_grow);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto bailout; goto bailout;
} }
@ -8974,6 +8981,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
txn->mt_dbistate[MAIN_DBI] = DBI_VALID | DBI_USRVALID; txn->mt_dbistate[MAIN_DBI] = DBI_VALID | DBI_USRVALID;
rc = rc =
setup_dbx(&txn->mt_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); setup_dbx(&txn->mt_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
txn->mt_dbistate[FREE_DBI] = DBI_VALID; txn->mt_dbistate[FREE_DBI] = DBI_VALID;
txn->mt_front = txn->mt_front =
txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0);
@ -8982,34 +8991,80 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
WARNING("%s", "environment had fatal error, must shutdown!"); WARNING("%s", "environment had fatal error, must shutdown!");
rc = MDBX_PANIC; rc = MDBX_PANIC;
} else { } else {
const size_t size = const size_t size_bytes = pgno2bytes(env, txn->mt_end_pgno);
pgno2bytes(env, (txn->mt_flags & MDBX_TXN_RDONLY) ? txn->mt_next_pgno const size_t used_bytes = pgno2bytes(env, txn->mt_next_pgno);
: txn->mt_end_pgno); const size_t required_bytes =
if (unlikely(size > env->me_dxb_mmap.limit)) { (txn->mt_flags & MDBX_TXN_RDONLY) ? used_bytes : size_bytes;
if (unlikely(required_bytes > env->me_dxb_mmap.current)) {
/* Размер БД (для пишущих транзакций) или используемых данных (для
* читающих транзакций) больше предыдущего/текущего размера внутри
* процесса, увеличиваем. Сюда также попадает случай увеличения верхней
* границы размера БД и отображения. В читающих транзакциях нельзя
* изменять размер файла, который может быть больше необходимого этой
* транзакции. */
if (txn->mt_geo.upper > MAX_PAGENO + 1 || if (txn->mt_geo.upper > MAX_PAGENO + 1 ||
bytes2pgno(env, pgno2bytes(env, txn->mt_geo.upper)) != bytes2pgno(env, pgno2bytes(env, txn->mt_geo.upper)) !=
txn->mt_geo.upper) { txn->mt_geo.upper) {
rc = MDBX_UNABLE_EXTEND_MAPSIZE; rc = MDBX_UNABLE_EXTEND_MAPSIZE;
goto bailout; goto bailout;
} }
rc = map_resize(env, txn->mt_next_pgno, txn->mt_end_pgno, rc = dxb_resize(env, txn->mt_next_pgno, txn->mt_end_pgno,
txn->mt_geo.upper, txn->mt_geo.upper, implicit_grow);
(txn->mt_flags & MDBX_TXN_RDONLY) ? true : false); if (unlikely(rc != MDBX_SUCCESS))
if (rc != MDBX_SUCCESS) goto bailout;
} else if (unlikely(size_bytes < env->me_dxb_mmap.current)) {
/* Размер БД меньше предыдущего/текущего размера внутри процесса, можно
* уменьшить, но всё сложнее:
* - размер файла согласован со всеми читаемыми снимками на момент
* коммита последней транзакции;
* - в читающей транзакции размер файла может быть больше и него нельзя
* изменять, в том числе менять madvise (меньша размера файла нельзя,
* а за размером нет смысла).
* - в пишущей транзакции уменьшать размер файла можно только после
* проверки размера читаемых снимков, но в этом нет смысла, так как
* это будет сделано при фиксации транзакции.
*
* В сухом остатке, можно только установить dxb_mmap.current равным
* размеру файла, а это проще сделать без вызова dxb_resize() и усложения
* внутренней логики.
*
* В этой тактике есть недостаток: если пишущите транзакции не регулярны,
* и при завершении такой транзакции файл БД остаётся не-уменьшеным из-за
* читающих транзакций использующих предыдущие снимки. */
#if defined(_WIN32) || defined(_WIN64)
osal_srwlock_AcquireShared(&env->me_remap_guard);
#else
rc = osal_fastmutex_acquire(&env->me_remap_guard);
#endif
if (likely(rc == MDBX_SUCCESS)) {
rc = osal_filesize(env->me_dxb_mmap.fd, &env->me_dxb_mmap.filesize);
if (likely(rc == MDBX_SUCCESS)) {
eASSERT(env, env->me_dxb_mmap.filesize >= required_bytes);
if (env->me_dxb_mmap.current > env->me_dxb_mmap.filesize)
env->me_dxb_mmap.current = (size_t)env->me_dxb_mmap.filesize;
}
#if defined(_WIN32) || defined(_WIN64)
osal_srwlock_ReleaseShared(&env->me_remap_guard);
#else
int err = osal_fastmutex_release(&env->me_remap_guard);
if (unlikely(err) && likely(rc == MDBX_SUCCESS))
rc = err;
#endif
}
if (unlikely(rc != MDBX_SUCCESS))
goto bailout; goto bailout;
} else {
env->me_dxb_mmap.current = size;
env->me_dxb_mmap.filesize =
(env->me_dxb_mmap.filesize < size) ? size : env->me_dxb_mmap.filesize;
} }
eASSERT(env,
pgno2bytes(env, txn->mt_next_pgno) <= env->me_dxb_mmap.current);
eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current);
if (txn->mt_flags & MDBX_TXN_RDONLY) { if (txn->mt_flags & MDBX_TXN_RDONLY) {
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
if (((size > env->me_dbgeo.lower && env->me_dbgeo.shrink) || if (((used_bytes > env->me_dbgeo.lower && env->me_dbgeo.shrink) ||
(mdbx_RunningUnderWine() && (mdbx_RunningUnderWine() &&
/* under Wine acquisition of remap_guard is always required, /* under Wine acquisition of remap_guard is always required,
* since Wine don't support section extending, * since Wine don't support section extending,
* i.e. in both cases unmap+map are required. */ * i.e. in both cases unmap+map are required. */
size < env->me_dbgeo.upper && env->me_dbgeo.grow)) && used_bytes < env->me_dbgeo.upper && env->me_dbgeo.grow)) &&
/* avoid recursive use SRW */ (txn->mt_flags & MDBX_NOTLS) == 0) { /* avoid recursive use SRW */ (txn->mt_flags & MDBX_NOTLS) == 0) {
txn->mt_flags |= MDBX_SHRINK_ALLOWED; txn->mt_flags |= MDBX_SHRINK_ALLOWED;
osal_srwlock_AcquireShared(&env->me_remap_guard); osal_srwlock_AcquireShared(&env->me_remap_guard);
@ -9799,8 +9854,8 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) {
if (parent->mt_geo.upper != txn->mt_geo.upper || if (parent->mt_geo.upper != txn->mt_geo.upper ||
parent->mt_geo.now != txn->mt_geo.now) { parent->mt_geo.now != txn->mt_geo.now) {
/* undo resize performed by child txn */ /* undo resize performed by child txn */
rc = map_resize_implicit(env, parent->mt_next_pgno, parent->mt_geo.now, rc = dxb_resize(env, parent->mt_next_pgno, parent->mt_geo.now,
parent->mt_geo.upper); parent->mt_geo.upper, impilict_shrink);
if (rc == MDBX_EPERM) { if (rc == MDBX_EPERM) {
/* unable undo resize (it is regular for Windows), /* unable undo resize (it is regular for Windows),
* therefore promote size changes from child to the parent txn */ * therefore promote size changes from child to the parent txn */
@ -12859,8 +12914,8 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
if (unlikely(shrink)) { if (unlikely(shrink)) {
VERBOSE("shrink to %" PRIaPGNO " pages (-%" PRIaPGNO ")", VERBOSE("shrink to %" PRIaPGNO " pages (-%" PRIaPGNO ")",
pending->mm_geo.now, shrink); pending->mm_geo.now, shrink);
rc = map_resize_implicit(env, pending->mm_geo.next, pending->mm_geo.now, rc = dxb_resize(env, pending->mm_geo.next, pending->mm_geo.now,
pending->mm_geo.upper); pending->mm_geo.upper, impilict_shrink);
if (rc != MDBX_SUCCESS && rc != MDBX_EPERM) if (rc != MDBX_SUCCESS && rc != MDBX_EPERM)
goto fail; goto fail;
eASSERT(env, coherency_check_meta(env, target, true)); eASSERT(env, coherency_check_meta(env, target, true));
@ -13453,8 +13508,8 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
if (new_geo.now != current_geo->now || if (new_geo.now != current_geo->now ||
new_geo.upper != current_geo->upper) { new_geo.upper != current_geo->upper) {
rc = map_resize(env, current_geo->next, new_geo.now, new_geo.upper, rc = dxb_resize(env, current_geo->next, new_geo.now, new_geo.upper,
false); explicit_resize);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto bailout; goto bailout;
} }

View File

@ -752,7 +752,7 @@ static void WINAPI stub_srwlock_AcquireShared(osal_srwlock_t *srwl) {
// If there's a writer already, spin without unnecessarily // If there's a writer already, spin without unnecessarily
// interlocking the CPUs // interlocking the CPUs
if (srwl->writerCount != 0) { if (srwl->writerCount != 0) {
YieldProcessor(); SwitchToThread();
continue; continue;
} }
@ -766,7 +766,7 @@ static void WINAPI stub_srwlock_AcquireShared(osal_srwlock_t *srwl) {
// Remove from the readers list, spin, try again // Remove from the readers list, spin, try again
_InterlockedDecrement(&srwl->readerCount); _InterlockedDecrement(&srwl->readerCount);
YieldProcessor(); SwitchToThread();
} }
} }
@ -782,7 +782,7 @@ static void WINAPI stub_srwlock_AcquireExclusive(osal_srwlock_t *srwl) {
// If there's a writer already, spin without unnecessarily // If there's a writer already, spin without unnecessarily
// interlocking the CPUs // interlocking the CPUs
if (srwl->writerCount != 0) { if (srwl->writerCount != 0) {
YieldProcessor(); SwitchToThread();
continue; continue;
} }
@ -797,7 +797,7 @@ static void WINAPI stub_srwlock_AcquireExclusive(osal_srwlock_t *srwl) {
// that we're the writer. // that we're the writer.
while (srwl->readerCount != 0) { while (srwl->readerCount != 0) {
assert(srwl->writerCount >= 0 && srwl->readerCount >= 0); assert(srwl->writerCount >= 0 && srwl->readerCount >= 0);
YieldProcessor(); SwitchToThread();
} }
} }

View File

@ -2181,6 +2181,7 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map, size_t size,
if ((flags & MDBX_RDONLY) == 0 && (options & MMAP_OPTION_TRUNCATE) != 0) { if ((flags & MDBX_RDONLY) == 0 && (options & MMAP_OPTION_TRUNCATE) != 0) {
err = osal_ftruncate(map->fd, size); err = osal_ftruncate(map->fd, size);
VERBOSE("ftruncate %zu, err %d", size, err);
if (err != MDBX_SUCCESS) if (err != MDBX_SUCCESS)
return err; return err;
map->filesize = size; map->filesize = size;
@ -2189,6 +2190,7 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map, size_t size,
#endif /* !Windows */ #endif /* !Windows */
} else { } else {
err = osal_filesize(map->fd, &map->filesize); err = osal_filesize(map->fd, &map->filesize);
VERBOSE("filesize %" PRIu64 ", err %d", map->filesize, err);
if (err != MDBX_SUCCESS) if (err != MDBX_SUCCESS)
return err; return err;
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
@ -2306,8 +2308,7 @@ MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) {
VALGRIND_MAKE_MEM_NOACCESS(map->base, map->current); VALGRIND_MAKE_MEM_NOACCESS(map->base, map->current);
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
* when this memory will re-used by malloc or another mmapping. * when this memory will re-used by malloc or another mmapping.
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 * See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 */
*/
MDBX_ASAN_UNPOISON_MEMORY_REGION( MDBX_ASAN_UNPOISON_MEMORY_REGION(
map->base, (map->filesize && map->filesize < map->limit) ? map->filesize map->base, (map->filesize && map->filesize < map->limit) ? map->filesize
: map->limit); : map->limit);
@ -2332,25 +2333,38 @@ MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) {
MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map, MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
size_t size, size_t limit) { size_t size, size_t limit) {
int rc = osal_filesize(map->fd, &map->filesize);
VERBOSE("flags 0x%x, size %zu, limit %zu, filesize %" PRIu64, flags, size,
limit, map->filesize);
assert(size <= limit); assert(size <= limit);
if (rc != MDBX_SUCCESS) {
map->filesize = 0;
return rc;
}
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
assert(size != map->current || limit != map->limit || size < map->filesize); assert(size != map->current || limit != map->limit || size < map->filesize);
NTSTATUS status; NTSTATUS status;
LARGE_INTEGER SectionSize; LARGE_INTEGER SectionSize;
int err, rc = MDBX_SUCCESS; int err;
if (!(flags & MDBX_RDONLY) && limit == map->limit && size > map->current && if (limit == map->limit && size > map->current) {
/* workaround for Wine */ mdbx_NtExtendSection) { if ((flags & MDBX_RDONLY) && map->filesize >= size) {
/* growth rw-section */ map->current = size;
SectionSize.QuadPart = size; return MDBX_SUCCESS;
status = mdbx_NtExtendSection(map->section, &SectionSize); } else if (!(flags & MDBX_RDONLY) &&
if (!NT_SUCCESS(status)) /* workaround for Wine */ mdbx_NtExtendSection) {
return ntstatus2errcode(status); /* growth rw-section */
map->current = size; SectionSize.QuadPart = size;
if (map->filesize < size) status = mdbx_NtExtendSection(map->section, &SectionSize);
map->filesize = size; if (!NT_SUCCESS(status))
return MDBX_SUCCESS; return ntstatus2errcode(status);
map->current = size;
if (map->filesize < size)
map->filesize = size;
return MDBX_SUCCESS;
}
} }
if (limit > map->limit) { if (limit > map->limit) {
@ -2379,13 +2393,15 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
* - change size of mapped view; * - change size of mapped view;
* - extend read-only mapping; * - extend read-only mapping;
* Therefore we should unmap/map entire section. */ * Therefore we should unmap/map entire section. */
if ((flags & MDBX_MRESIZE_MAY_UNMAP) == 0) if ((flags & MDBX_MRESIZE_MAY_UNMAP) == 0) {
if (size <= map->current && limit == map->limit)
return MDBX_SUCCESS;
return MDBX_EPERM; return MDBX_EPERM;
}
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
* when this memory will re-used by malloc or another mmapping. * when this memory will re-used by malloc or another mmapping.
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 * See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 */
*/
MDBX_ASAN_UNPOISON_MEMORY_REGION(map->base, map->limit); MDBX_ASAN_UNPOISON_MEMORY_REGION(map->base, map->limit);
status = NtUnmapViewOfSection(GetCurrentProcess(), map->base); status = NtUnmapViewOfSection(GetCurrentProcess(), map->base);
if (!NT_SUCCESS(status)) if (!NT_SUCCESS(status))
@ -2398,7 +2414,6 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
if (!NT_SUCCESS(status)) { if (!NT_SUCCESS(status)) {
bailout_ntstatus: bailout_ntstatus:
err = ntstatus2errcode(status); err = ntstatus2errcode(status);
bailout:
map->base = NULL; map->base = NULL;
map->current = map->limit = 0; map->current = map->limit = 0;
if (ReservedAddress) { if (ReservedAddress) {
@ -2427,10 +2442,6 @@ retry_file_and_section:
map->base = NULL; map->base = NULL;
} }
err = osal_filesize(map->fd, &map->filesize);
if (err != MDBX_SUCCESS)
goto bailout;
if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) { if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) {
err = osal_ftruncate(map->fd, size); err = osal_ftruncate(map->fd, size);
if (err == MDBX_SUCCESS) if (err == MDBX_SUCCESS)
@ -2507,18 +2518,17 @@ retry_mapview:;
#else /* Windows */ #else /* Windows */
map->filesize = 0;
int rc = osal_filesize(map->fd, &map->filesize);
if (rc != MDBX_SUCCESS)
return rc;
if (flags & MDBX_RDONLY) { if (flags & MDBX_RDONLY) {
if (size > map->filesize)
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
else if (size < map->filesize && map->filesize > limit)
rc = MDBX_EPERM;
map->current = (map->filesize > limit) ? limit : (size_t)map->filesize; map->current = (map->filesize > limit) ? limit : (size_t)map->filesize;
if (map->current != size)
rc = (size > map->current) ? MDBX_UNABLE_EXTEND_MAPSIZE : MDBX_EPERM;
} else { } else {
if (map->filesize != size) { if (size > map->filesize ||
(size < map->filesize && (flags & MDBX_SHRINK_ALLOWED))) {
rc = osal_ftruncate(map->fd, size); rc = osal_ftruncate(map->fd, size);
VERBOSE("ftruncate %zu, err %d", size, rc);
if (rc != MDBX_SUCCESS) if (rc != MDBX_SUCCESS)
return rc; return rc;
map->filesize = size; map->filesize = size;
@ -2713,7 +2723,8 @@ retry_mapview:;
assert(rc != MDBX_SUCCESS || assert(rc != MDBX_SUCCESS ||
(map->base != nullptr && map->base != MAP_FAILED && (map->base != nullptr && map->base != MAP_FAILED &&
map->current == size && map->limit == limit)); map->current == size && map->limit == limit &&
map->filesize >= size));
return rc; return rc;
} }