mirror of
https://gitflic.ru/project/erthink/libmdbx.git
synced 2025-03-10 23:22:15 +00:00
mdbx: implements remapping of the database file when it it possible.
Change-Id: Ida15ba1f396a33b2c6063e680dff612f39a9608f
This commit is contained in:
parent
2d0a5c42a9
commit
3351c1f869
@ -13,6 +13,8 @@ v0.8.2 2020-07-??:
|
|||||||
- Refined mode bits while auto-creating LCK-file.
|
- Refined mode bits while auto-creating LCK-file.
|
||||||
- Avoids unnecessary database file re-mapping in case geometry changed by another process(es).
|
- Avoids unnecessary database file re-mapping in case geometry changed by another process(es).
|
||||||
From the user's point of view, the MDBX_UNABLE_EXTEND_MAPSIZE error will now be returned less frequently and only when using the DB in the current process really requires it to be reopened.
|
From the user's point of view, the MDBX_UNABLE_EXTEND_MAPSIZE error will now be returned less frequently and only when using the DB in the current process really requires it to be reopened.
|
||||||
|
- Remapping on-the-fly and of the database file was implemented.
|
||||||
|
Now remapping with a change of address is performed automatically if there are no dependent readers in the current process.
|
||||||
|
|
||||||
v0.8.1 2020-06-12:
|
v0.8.1 2020-06-12:
|
||||||
- Minor change versioning. The last number in the version now means the number of commits since last release/tag.
|
- Minor change versioning. The last number in the version now means the number of commits since last release/tag.
|
||||||
|
46
src/core.c
46
src/core.c
@ -4658,7 +4658,7 @@ static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
|
|||||||
|
|
||||||
static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
|
static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
|
||||||
const pgno_t size_pgno,
|
const pgno_t size_pgno,
|
||||||
const pgno_t limit_pgno) {
|
const pgno_t limit_pgno, const bool implicit) {
|
||||||
if ((env->me_flags & MDBX_WRITEMAP) && *env->me_unsynced_pages) {
|
if ((env->me_flags & MDBX_WRITEMAP) && *env->me_unsynced_pages) {
|
||||||
int err = mdbx_msync(&env->me_dxb_mmap, 0,
|
int err = mdbx_msync(&env->me_dxb_mmap, 0,
|
||||||
pgno_align2os_bytes(env, used_pgno), true);
|
pgno_align2os_bytes(env, used_pgno), true);
|
||||||
@ -4711,16 +4711,40 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
|
|||||||
mdbx_error("failed suspend-for-remap: errcode %d", rc);
|
mdbx_error("failed suspend-for-remap: errcode %d", rc);
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
#else
|
const bool mapping_can_be_moved = !implicit;
|
||||||
|
#else /* Windows */
|
||||||
/* Acquire guard to avoid collision between read and write txns
|
/* Acquire guard to avoid collision between read and write txns
|
||||||
* around env->me_dbgeo */
|
* around env->me_dbgeo */
|
||||||
|
bool mapping_can_be_moved = false;
|
||||||
int rc = mdbx_fastmutex_acquire(&env->me_remap_guard);
|
int rc = mdbx_fastmutex_acquire(&env->me_remap_guard);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
return rc;
|
return rc;
|
||||||
if (limit_bytes == env->me_dxb_mmap.limit &&
|
if (limit_bytes == env->me_dxb_mmap.limit &&
|
||||||
size_bytes == env->me_dxb_mmap.current)
|
size_bytes == env->me_dxb_mmap.current)
|
||||||
goto bailout;
|
goto bailout;
|
||||||
#endif /* Windows */
|
|
||||||
|
if (limit_bytes != env->me_dxb_mmap.limit && env->me_lck && !implicit) {
|
||||||
|
rc = mdbx_rdt_lock(env) /* lock readers table until remap done */;
|
||||||
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
|
goto bailout;
|
||||||
|
|
||||||
|
/* looking for readers from this process */
|
||||||
|
MDBX_lockinfo *const lck = env->me_lck;
|
||||||
|
const unsigned snap_nreaders = lck->mti_numreaders;
|
||||||
|
mapping_can_be_moved = true;
|
||||||
|
for (unsigned i = 0; i < snap_nreaders; ++i) {
|
||||||
|
if (lck->mti_readers[i].mr_pid == env->me_pid &&
|
||||||
|
lck->mti_readers[i].mr_tid != mdbx_thread_self()) {
|
||||||
|
/* the base address of the mapping can't be changed since
|
||||||
|
* the other reader thread from this process exists. */
|
||||||
|
mdbx_rdt_unlock(env);
|
||||||
|
mapping_can_be_moved = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* ! Windows */
|
||||||
|
|
||||||
const size_t prev_size = env->me_dxb_mmap.current;
|
const size_t prev_size = env->me_dxb_mmap.current;
|
||||||
if (size_bytes < prev_size) {
|
if (size_bytes < prev_size) {
|
||||||
@ -4758,7 +4782,8 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
|
|||||||
*env->me_discarded_tail = size_pgno;
|
*env->me_discarded_tail = size_pgno;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes);
|
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes,
|
||||||
|
mapping_can_be_moved);
|
||||||
if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_NORDAHEAD) == 0) {
|
if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_NORDAHEAD) == 0) {
|
||||||
const int readahead = mdbx_is_readahead_reasonable(size_bytes, 0);
|
const int readahead = mdbx_is_readahead_reasonable(size_bytes, 0);
|
||||||
if (readahead == MDBX_RESULT_FALSE)
|
if (readahead == MDBX_RESULT_FALSE)
|
||||||
@ -4829,6 +4854,8 @@ bailout:
|
|||||||
mdbx_free(suspended);
|
mdbx_free(suspended);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
if (env->me_lck && mapping_can_be_moved)
|
||||||
|
mdbx_rdt_unlock(env);
|
||||||
int err = mdbx_fastmutex_release(&env->me_remap_guard);
|
int err = mdbx_fastmutex_release(&env->me_remap_guard);
|
||||||
#endif /* Windows */
|
#endif /* Windows */
|
||||||
if (err != MDBX_SUCCESS) {
|
if (err != MDBX_SUCCESS) {
|
||||||
@ -4849,7 +4876,8 @@ static __cold int mdbx_mapresize_implicit(MDBX_env *env, const pgno_t used_pgno,
|
|||||||
? limit_pgno
|
? limit_pgno
|
||||||
: /* The actual mapsize may be less since the geo.upper may be changed
|
: /* The actual mapsize may be less since the geo.upper may be changed
|
||||||
by other process. So, avoids remapping until it necessary. */
|
by other process. So, avoids remapping until it necessary. */
|
||||||
mapped_pgno);
|
mapped_pgno,
|
||||||
|
true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mdbx_meta_unsteady(MDBX_env *env, const txnid_t last_steady,
|
static int mdbx_meta_unsteady(MDBX_env *env, const txnid_t last_steady,
|
||||||
@ -6115,8 +6143,9 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
|
|||||||
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
|
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
rc = mdbx_mapresize_implicit(env, txn->mt_next_pgno, txn->mt_end_pgno,
|
rc = mdbx_mapresize(env, txn->mt_next_pgno, txn->mt_end_pgno,
|
||||||
txn->mt_geo.upper);
|
txn->mt_geo.upper,
|
||||||
|
(txn->mt_flags & MDBX_RDONLY) ? true : false);
|
||||||
if (rc != MDBX_SUCCESS)
|
if (rc != MDBX_SUCCESS)
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
@ -9192,7 +9221,8 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
|
|||||||
|
|
||||||
if (new_geo.now != current_geo->now ||
|
if (new_geo.now != current_geo->now ||
|
||||||
new_geo.upper != current_geo->upper) {
|
new_geo.upper != current_geo->upper) {
|
||||||
rc = mdbx_mapresize(env, current_geo->next, new_geo.now, new_geo.upper);
|
rc = mdbx_mapresize(env, current_geo->next, new_geo.now, new_geo.upper,
|
||||||
|
false);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
mdbx_assert(env, (head == nullptr) == inside_txn);
|
mdbx_assert(env, (head == nullptr) == inside_txn);
|
||||||
|
75
src/osal.c
75
src/osal.c
@ -1403,7 +1403,7 @@ MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
|
MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
|
||||||
size_t limit) {
|
size_t limit, const bool may_move) {
|
||||||
assert(size <= limit);
|
assert(size <= limit);
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
assert(size != map->current || limit != map->limit || size < map->filesize);
|
assert(size != map->current || limit != map->limit || size < map->filesize);
|
||||||
@ -1482,9 +1482,9 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
|
|||||||
if (status != /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018)
|
if (status != /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018)
|
||||||
goto bailout_ntstatus /* no way to recovery */;
|
goto bailout_ntstatus /* no way to recovery */;
|
||||||
|
|
||||||
/* assume we can change base address if mapping size changed or prev address
|
if (may_move)
|
||||||
* couldn't be used */
|
/* the base address could be changed */
|
||||||
map->address = NULL;
|
map->address = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
retry_file_and_section:
|
retry_file_and_section:
|
||||||
@ -1541,7 +1541,7 @@ retry_mapview:;
|
|||||||
|
|
||||||
if (!NT_SUCCESS(status)) {
|
if (!NT_SUCCESS(status)) {
|
||||||
if (status == /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018 &&
|
if (status == /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018 &&
|
||||||
map->address) {
|
map->address && may_move) {
|
||||||
/* try remap at another base address */
|
/* try remap at another base address */
|
||||||
map->address = NULL;
|
map->address = NULL;
|
||||||
goto retry_mapview;
|
goto retry_mapview;
|
||||||
@ -1565,6 +1565,7 @@ retry_mapview:;
|
|||||||
|
|
||||||
map->current = (size_t)SectionSize.QuadPart;
|
map->current = (size_t)SectionSize.QuadPart;
|
||||||
map->limit = ViewSize;
|
map->limit = ViewSize;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
uint64_t filesize = 0;
|
uint64_t filesize = 0;
|
||||||
@ -1585,7 +1586,8 @@ retry_mapview:;
|
|||||||
|
|
||||||
if (limit != map->limit) {
|
if (limit != map->limit) {
|
||||||
#if defined(MREMAP_MAYMOVE)
|
#if defined(MREMAP_MAYMOVE)
|
||||||
void *ptr = mremap(map->address, map->limit, limit, 0);
|
void *ptr =
|
||||||
|
mremap(map->address, map->limit, limit, may_move ? MREMAP_MAYMOVE : 0);
|
||||||
if (ptr == MAP_FAILED) {
|
if (ptr == MAP_FAILED) {
|
||||||
rc = errno;
|
rc = errno;
|
||||||
switch (rc) {
|
switch (rc) {
|
||||||
@ -1596,7 +1598,59 @@ retry_mapview:;
|
|||||||
}
|
}
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
map->address = ptr;
|
#else
|
||||||
|
if (!may_move)
|
||||||
|
/* TODO: Perhaps here it is worth to implement suspend/resume threads
|
||||||
|
* and perform unmap/map as like for Windows. */
|
||||||
|
return MDBX_UNABLE_EXTEND_MAPSIZE;
|
||||||
|
|
||||||
|
if (unlikely(munmap(map->address, map->limit)))
|
||||||
|
return errno;
|
||||||
|
|
||||||
|
unsigned mmap_flags =
|
||||||
|
MAP_CONCEAL | MAP_SHARED | MAP_FILE |
|
||||||
|
(F_ISSET(flags, MDBX_UTTERLY_NOSYNC) ? MAP_NOSYNC : 0);
|
||||||
|
#ifdef MAP_FIXED
|
||||||
|
if (!may_move)
|
||||||
|
mmap_flags |= MAP_FIXED;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void *ptr =
|
||||||
|
mmap(map->address, limit,
|
||||||
|
(flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
|
||||||
|
mmap_flags, map->fd, 0);
|
||||||
|
if (unlikely(ptr == MAP_FAILED)) {
|
||||||
|
ptr = mmap(map->address, map->limit,
|
||||||
|
(flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
|
||||||
|
mmap_flags, map->fd, 0);
|
||||||
|
if (unlikely(ptr == MAP_FAILED)) {
|
||||||
|
VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current);
|
||||||
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
||||||
|
* when this memory will re-used by malloc or another mmaping.
|
||||||
|
* See https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
|
||||||
|
*/
|
||||||
|
ASAN_UNPOISON_MEMORY_REGION(map->address, map->limit);
|
||||||
|
map->limit = 0;
|
||||||
|
map->current = 0;
|
||||||
|
map->address = nullptr;
|
||||||
|
return errno;
|
||||||
|
}
|
||||||
|
return MDBX_UNABLE_EXTEND_MAPSIZE;
|
||||||
|
}
|
||||||
|
#endif /* !MREMAP_MAYMOVE */
|
||||||
|
|
||||||
|
if (map->address != ptr) {
|
||||||
|
VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current);
|
||||||
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
||||||
|
* when this memory will re-used by malloc or another mmaping.
|
||||||
|
* See https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
|
||||||
|
*/
|
||||||
|
ASAN_UNPOISON_MEMORY_REGION(map->address, map->limit);
|
||||||
|
|
||||||
|
VALGRIND_MAKE_MEM_DEFINED(ptr, map->current);
|
||||||
|
ASAN_UNPOISON_MEMORY_REGION(ptr, map->current);
|
||||||
|
map->address = ptr;
|
||||||
|
}
|
||||||
map->limit = limit;
|
map->limit = limit;
|
||||||
|
|
||||||
#ifdef MADV_DONTFORK
|
#ifdef MADV_DONTFORK
|
||||||
@ -1607,14 +1661,9 @@ retry_mapview:;
|
|||||||
#ifdef MADV_NOHUGEPAGE
|
#ifdef MADV_NOHUGEPAGE
|
||||||
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE);
|
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE);
|
||||||
#endif /* MADV_NOHUGEPAGE */
|
#endif /* MADV_NOHUGEPAGE */
|
||||||
|
|
||||||
#else /* MREMAP_MAYMOVE */
|
|
||||||
/* TODO: Perhaps here it is worth to implement suspend/resume threads
|
|
||||||
* and perform unmap/map as like for Windows. */
|
|
||||||
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
|
|
||||||
#endif /* !MREMAP_MAYMOVE */
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -623,7 +623,7 @@ MDBX_INTERNAL_FUNC int mdbx_mmap(const int flags, mdbx_mmap_t *map,
|
|||||||
const unsigned options);
|
const unsigned options);
|
||||||
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map);
|
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map);
|
||||||
MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t current,
|
MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t current,
|
||||||
size_t wanna);
|
size_t wanna, const bool may_move);
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned limit, count;
|
unsigned limit, count;
|
||||||
|
Loading…
Reference in New Issue
Block a user