mirror of
https://gitflic.ru/project/erthink/libmdbx.git
synced 2025-01-10 10:17:11 +00:00
mdbx: rework API and Docs around Handle-Slow-Readers (no algorithmic changes).
Change-Id: I5b76a8400ce6f5f241f8e4a7f53d746fe39f8e1e
This commit is contained in:
parent
6294e1710a
commit
c8a0951566
3
.github/actions/spelling/expect.txt
vendored
3
.github/actions/spelling/expect.txt
vendored
@ -601,6 +601,7 @@ hpp
|
|||||||
hppa
|
hppa
|
||||||
hpux
|
hpux
|
||||||
hrows
|
hrows
|
||||||
|
hsr
|
||||||
htags
|
htags
|
||||||
htm
|
htm
|
||||||
html
|
html
|
||||||
@ -795,6 +796,7 @@ LOCKNAME
|
|||||||
locktable
|
locktable
|
||||||
LOGFILE
|
LOGFILE
|
||||||
loglevel
|
loglevel
|
||||||
|
longlived
|
||||||
LONGLONG
|
LONGLONG
|
||||||
lowerbound
|
lowerbound
|
||||||
lowerboundvalue
|
lowerboundvalue
|
||||||
@ -1090,7 +1092,6 @@ onstack
|
|||||||
onstask
|
onstask
|
||||||
oom
|
oom
|
||||||
oomfunc
|
oomfunc
|
||||||
oomkick
|
|
||||||
openfile
|
openfile
|
||||||
openldap
|
openldap
|
||||||
openmp
|
openmp
|
||||||
|
@ -30,11 +30,14 @@ Added features:
|
|||||||
- Improved opening large DB (> 4Gb) from 32-bit code.
|
- Improved opening large DB (> 4Gb) from 32-bit code.
|
||||||
- Provided `pure-function` and `const-function` attributes to C API.
|
- Provided `pure-function` and `const-function` attributes to C API.
|
||||||
- Support for user-settable transaction context.
|
- Support for user-settable transaction context.
|
||||||
|
- Revised API and documentation related to Handle-Slow-Readers callback feature.
|
||||||
|
|
||||||
Deprecated functions and flags:
|
Deprecated functions and flags:
|
||||||
|
|
||||||
- For clarity and API simplification the `MDBX_MAPASYNC` flag is deprecated.
|
- For clarity and API simplification the `MDBX_MAPASYNC` flag is deprecated.
|
||||||
Just use `MDBX_SAFE_NOSYNC` or `MDBX_UTTERLY_NOSYNC` instead of it.
|
Just use `MDBX_SAFE_NOSYNC` or `MDBX_UTTERLY_NOSYNC` instead of it.
|
||||||
|
- `MDBX_oom_func`, `mdbx_env_set_oomfunc()` and `mdbx_env_get_oomfunc()`
|
||||||
|
replaced with `MDBX_hsr_func`, `mdbx_env_get_hsr` and `mdbx_env_get_hsr()`.
|
||||||
|
|
||||||
Fixes:
|
Fixes:
|
||||||
|
|
||||||
|
@ -241,7 +241,7 @@ Since version 0.9.1, the utility supports checking the database using any of the
|
|||||||
|
|
||||||
10. Sequence generation and three persistent 64-bit markers.
|
10. Sequence generation and three persistent 64-bit markers.
|
||||||
|
|
||||||
11. Callback for lack-of-space condition of database that allows you to control and/or resolve such situations.
|
11. Handle-Slow-Readers callback to resolve a database full/overflow issues due to long-lived read transaction(s).
|
||||||
|
|
||||||
12. Support for opening databases in the exclusive mode, including on a network share.
|
12. Support for opening databases in the exclusive mode, including on a network share.
|
||||||
|
|
||||||
|
@ -147,9 +147,9 @@ or debugging of a client application while retaining an active read
|
|||||||
transaction. LMDB this results in `MDB_MAP_FULL` error and subsequent write
|
transaction. LMDB this results in `MDB_MAP_FULL` error and subsequent write
|
||||||
performance degradation.
|
performance degradation.
|
||||||
|
|
||||||
MDBX mostly solve "long-lived" readers issue by the lack-of-space callback
|
MDBX mostly solve "long-lived" readers issue by using the Handle-Slow-Readers
|
||||||
which allow to aborts long readers, and by the `MDBX_LIFORECLAIM` mode which
|
\ref MDBX_hsr_func callback which allows to abort long-lived read transactions,
|
||||||
addresses subsequent performance degradation.
|
and using the \ref MDBX_LIFORECLAIM mode which addresses subsequent performance degradation.
|
||||||
The "next" version of libmdbx (MithrilDB) will completely solve this.
|
The "next" version of libmdbx (MithrilDB) will completely solve this.
|
||||||
|
|
||||||
- Avoid suspending a process with active transactions. These would then be
|
- Avoid suspending a process with active transactions. These would then be
|
||||||
|
@ -236,6 +236,7 @@ The full \ref c_api documentation lists further details below, like how to:
|
|||||||
- Sstimate size of range query result: \ref c_rqest.
|
- Sstimate size of range query result: \ref c_rqest.
|
||||||
- Double performance by LIFO reclaiming on storages with write-back: \ref MDBX_LIFORECLAIM.
|
- Double performance by LIFO reclaiming on storages with write-back: \ref MDBX_LIFORECLAIM.
|
||||||
- Use sequences and canary markers: \ref mdbx_dbi_sequence(), \ref MDBX_canary.
|
- Use sequences and canary markers: \ref mdbx_dbi_sequence(), \ref MDBX_canary.
|
||||||
- Use lack-of-space callback (aka OOM-KICK): \ref mdbx_env_set_oomfunc().
|
- Use Handle-Slow-Readers callback to resolve a database full/overflow issues
|
||||||
|
due to long-lived read transactions: \ref mdbx_env_set_hsr().
|
||||||
- Use exclusive mode: \ref MDBX_EXCLUSIVE.
|
- Use exclusive mode: \ref MDBX_EXCLUSIVE.
|
||||||
- Define custom sort orders (but this is recommended to be avoided).
|
- Define custom sort orders (but this is recommended to be avoided).
|
||||||
|
77
mdbx.h
77
mdbx.h
@ -2721,7 +2721,7 @@ struct MDBX_txn_info {
|
|||||||
to the snapshot being read. */
|
to the snapshot being read. */
|
||||||
uint64_t txn_id;
|
uint64_t txn_id;
|
||||||
|
|
||||||
/** For READ-ONLY transaction: the lag from a recent MVCC-snapshot, i.e. the
|
/** For READ-ONLY transaction: the lag from a recent MVCC-snapshot, i.e. the
|
||||||
number of committed transaction since read transaction started. For WRITE
|
number of committed transaction since read transaction started. For WRITE
|
||||||
transaction (provided if `scan_rlt=true`): the lag of the oldest reader
|
transaction (provided if `scan_rlt=true`): the lag of the oldest reader
|
||||||
from current transaction (i.e. at least 1 if any reader running). */
|
from current transaction (i.e. at least 1 if any reader running). */
|
||||||
@ -2747,9 +2747,9 @@ struct MDBX_txn_info {
|
|||||||
uint64_t txn_space_retired;
|
uint64_t txn_space_retired;
|
||||||
|
|
||||||
/** For READ-ONLY transaction: the space available for writer(s) and that
|
/** For READ-ONLY transaction: the space available for writer(s) and that
|
||||||
must be exhausted for reason to call the OOM-killer for this read
|
must be exhausted for reason to call the Handle-Slow-Readers callback for
|
||||||
transaction. For WRITE transaction: the space inside transaction that left
|
this read transaction. For WRITE transaction: the space inside transaction
|
||||||
to `MDBX_TXN_FULL` error. */
|
that left to `MDBX_TXN_FULL` error. */
|
||||||
uint64_t txn_space_leftover;
|
uint64_t txn_space_leftover;
|
||||||
|
|
||||||
/** For READ-ONLY transaction (provided if `scan_rlt=true`): The space that
|
/** For READ-ONLY transaction (provided if `scan_rlt=true`): The space that
|
||||||
@ -4196,14 +4196,25 @@ LIBMDBX_API int mdbx_thread_register(const MDBX_env *env);
|
|||||||
* \ref MDBX_RESULT_TRUE if thread is not registered or already unregistered. */
|
* \ref MDBX_RESULT_TRUE if thread is not registered or already unregistered. */
|
||||||
LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
|
LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
|
||||||
|
|
||||||
/** \brief A lack-of-space callback function to resolve issues with a laggard
|
/** \brief A Handle-Slow-Readers callback function to resolve database
|
||||||
* readers. \ingroup c_err
|
* full/overflow issue due to a reader(s) which prevents the old data from being
|
||||||
|
* recycled.
|
||||||
|
* \ingroup c_err
|
||||||
*
|
*
|
||||||
* Read transactions prevent reuse of pages freed by newer write transactions,
|
* Read transactions prevent reuse of pages freed by newer write transactions,
|
||||||
* thus the database can grow quickly. This callback will be called when there
|
* thus the database can grow quickly. This callback will be called when there
|
||||||
* is not enough space in the database (ie. before increasing the database size
|
* is not enough space in the database (i.e. before increasing the database size
|
||||||
* or before \ref MDBX_MAP_FULL error) and thus can be used to resolve issues
|
* or before \ref MDBX_MAP_FULL error) and thus can be used to resolve issues
|
||||||
* with a "long-lived" read transactions.
|
* with a "long-lived" read transactions.
|
||||||
|
* \see long-lived-read
|
||||||
|
*
|
||||||
|
* Using this callback you can choose how to resolve the situation:
|
||||||
|
* - abort the write transaction with an error;
|
||||||
|
* - wait for the read transaction(s) to complete;
|
||||||
|
* - notify a thread performing a long-lived read transaction
|
||||||
|
* and wait for an effect;
|
||||||
|
* - kill the thread or whole process that performs the long-lived read
|
||||||
|
* transaction;
|
||||||
*
|
*
|
||||||
* Depending on the arguments and needs, your implementation may wait,
|
* Depending on the arguments and needs, your implementation may wait,
|
||||||
* terminate a process or thread that is performing a long read, or perform
|
* terminate a process or thread that is performing a long read, or perform
|
||||||
@ -4211,9 +4222,11 @@ LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
|
|||||||
* corresponds to the performed action.
|
* corresponds to the performed action.
|
||||||
*
|
*
|
||||||
* \param [in] env An environment handle returned by \ref mdbx_env_create().
|
* \param [in] env An environment handle returned by \ref mdbx_env_create().
|
||||||
|
* \param [in] txn The current write transaction which internally at
|
||||||
|
* the \ref MDBX_MAP_FULL condition.
|
||||||
* \param [in] pid A pid of the reader process.
|
* \param [in] pid A pid of the reader process.
|
||||||
* \param [in] tid A thread_id of the reader thread.
|
* \param [in] tid A thread_id of the reader thread.
|
||||||
* \param [in] txn A transaction number on which stalled.
|
* \param [in] laggard An oldest read transaction number on which stalled.
|
||||||
* \param [in] gap A lag from the last commited txn.
|
* \param [in] gap A lag from the last commited txn.
|
||||||
* \param [in] space A space that actually become available for reuse after
|
* \param [in] space A space that actually become available for reuse after
|
||||||
* this reader finished. The callback function can take
|
* this reader finished. The callback function can take
|
||||||
@ -4221,9 +4234,9 @@ LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
|
|||||||
* a long-running transaction has.
|
* a long-running transaction has.
|
||||||
* \param [in] retry A retry number starting from 0.
|
* \param [in] retry A retry number starting from 0.
|
||||||
* If callback has returned 0 at least once, then at end
|
* If callback has returned 0 at least once, then at end
|
||||||
* of current OOM-handler loop callback will be called
|
* of current handling loop the callback function will be
|
||||||
* additionally with negative value to notify about the
|
* called additionally with negative value to notify about
|
||||||
* end of loop. The callback function can use this value
|
* the end of loop. The callback function can use this value
|
||||||
* to implement timeout logic while waiting for readers.
|
* to implement timeout logic while waiting for readers.
|
||||||
*
|
*
|
||||||
* \returns The RETURN CODE determines the further actions libmdbx and must
|
* \returns The RETURN CODE determines the further actions libmdbx and must
|
||||||
@ -4252,36 +4265,42 @@ LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
|
|||||||
* \retval 2 or great The reader process was terminated or killed,
|
* \retval 2 or great The reader process was terminated or killed,
|
||||||
* and libmdbx should entirely reset reader registration.
|
* and libmdbx should entirely reset reader registration.
|
||||||
*
|
*
|
||||||
* \see mdbx_env_set_oomfunc() \see mdbx_env_get_oomfunc()
|
* \see mdbx_env_set_hsr() \see mdbx_env_get_hsr()
|
||||||
*/
|
*/
|
||||||
typedef int(MDBX_oom_func)(MDBX_env *env, mdbx_pid_t pid, mdbx_tid_t tid,
|
typedef int(MDBX_hsr_func)(const MDBX_env *env, const MDBX_txn *txn,
|
||||||
uint64_t txn, unsigned gap, size_t space,
|
mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard,
|
||||||
|
unsigned gap, size_t space,
|
||||||
int retry) MDBX_CXX17_NOEXCEPT;
|
int retry) MDBX_CXX17_NOEXCEPT;
|
||||||
|
|
||||||
/** \brief Set the OOM callback.
|
/** \brief Sets a Handle-Slow-Readers callback to resolve database full/overflow
|
||||||
|
* issue due to a reader(s) which prevents the old data from being recycled.
|
||||||
* \ingroup c_err
|
* \ingroup c_err
|
||||||
*
|
*
|
||||||
* The callback will only be triggered on lack of space to resolve issues with
|
* The callback will only be triggered when the database is full due to a
|
||||||
* lagging reader(s) (i.e. to kill it) for resume reuse pages from the garbage
|
* reader(s) prevents the old data from being recycled.
|
||||||
* collector.
|
|
||||||
* \see mdbx_env_get_oomfunc()
|
|
||||||
*
|
*
|
||||||
* \param [in] env An environment handle returned
|
* \see mdbx_env_get_hsr()
|
||||||
* by \ref mdbx_env_create().
|
* \see long-lived-read
|
||||||
* \param [in] oom_func A \ref MDBX_oom_func function or NULL to disable.
|
*
|
||||||
|
* \param [in] env An environment handle returned
|
||||||
|
* by \ref mdbx_env_create().
|
||||||
|
* \param [in] hsr_callback A \ref MDBX_hsr_func function
|
||||||
|
* or NULL to disable.
|
||||||
*
|
*
|
||||||
* \returns A non-zero error value on failure and 0 on success. */
|
* \returns A non-zero error value on failure and 0 on success. */
|
||||||
LIBMDBX_API int mdbx_env_set_oomfunc(MDBX_env *env, MDBX_oom_func *oom_func);
|
LIBMDBX_API int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr_callback);
|
||||||
|
|
||||||
/** \brief Get the current oom_func callback.
|
/** \brief Gets current Handle-Slow-Readers callback used to resolve database
|
||||||
* \ingroup c_settings
|
* full/overflow issue due to a reader(s) which prevents the old data from being
|
||||||
* \see mdbx_env_set_oomfunc()
|
* recycled.
|
||||||
|
* \see mdbx_env_set_hsr()
|
||||||
*
|
*
|
||||||
* \param [in] env An environment handle returned by \ref mdbx_env_create().
|
* \param [in] env An environment handle returned by \ref mdbx_env_create().
|
||||||
*
|
*
|
||||||
* \returns A MDBX_oom_func function or NULL if disabled. */
|
* \returns A MDBX_hsr_func function or NULL if disabled
|
||||||
MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_oom_func *
|
* or something wrong. */
|
||||||
mdbx_env_get_oomfunc(const MDBX_env *env);
|
MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_hsr_func *
|
||||||
|
mdbx_env_get_hsr(const MDBX_env *env);
|
||||||
|
|
||||||
/** \defgroup btree_traversal B-tree Traversal
|
/** \defgroup btree_traversal B-tree Traversal
|
||||||
* This is internal API for mdbx_chk tool. You should avoid to use it, except
|
* This is internal API for mdbx_chk tool. You should avoid to use it, except
|
||||||
|
27
mdbx.h++
27
mdbx.h++
@ -2100,14 +2100,16 @@ public:
|
|||||||
/// return number of cleared slots.
|
/// return number of cleared slots.
|
||||||
inline unsigned check_readers();
|
inline unsigned check_readers();
|
||||||
|
|
||||||
/// \brief Sets the out-of-space callback.
|
/// \brief Sets a Handle-Slow-Readers callback to resolve database
|
||||||
|
/// full/overflow issue due to a reader(s) which prevents the old data from
|
||||||
|
/// being recycled.
|
||||||
///
|
///
|
||||||
/// Such callback will be triggered in a case where there is not enough free
|
/// Such callback will be triggered in a case where there is not enough free
|
||||||
/// space in the database due to long read transaction(s) which impedes
|
/// space in the database due to long read transaction(s) which impedes
|
||||||
/// reusing the pages of an old MVCC snapshot(s).
|
/// reusing the pages of an old MVCC snapshot(s).
|
||||||
///
|
///
|
||||||
/// Using this callback you can choose how to get out of the situation:
|
/// Using this callback you can choose how to resolve the situation:
|
||||||
/// - abort the record transaction with an error;
|
/// - abort the write transaction with an error;
|
||||||
/// - wait for the read transaction(s) to complete;
|
/// - wait for the read transaction(s) to complete;
|
||||||
/// - notify a thread performing a long-lived read transaction
|
/// - notify a thread performing a long-lived read transaction
|
||||||
/// and wait for an effect;
|
/// and wait for an effect;
|
||||||
@ -2115,10 +2117,13 @@ public:
|
|||||||
/// transaction;
|
/// transaction;
|
||||||
///
|
///
|
||||||
/// \see long-lived-read
|
/// \see long-lived-read
|
||||||
inline env &set_OutOfSpace_callback(MDBX_oom_func *);
|
inline env &set_HandleSlowReaders(MDBX_hsr_func *);
|
||||||
/// \brief Returns the current out-of-space callback.
|
|
||||||
/// \see set_OutOfSpace_callback()
|
/// \brief Returns the current Handle-Slow-Readers callback used to resolve
|
||||||
inline MDBX_oom_func *get_OutOfSpace_callback() const noexcept;
|
/// database full/overflow issue due to a reader(s) which prevents the old
|
||||||
|
/// data from being recycled.
|
||||||
|
/// \see set_HandleSlowReaders()
|
||||||
|
inline MDBX_hsr_func *get_HandleSlowReaders() const noexcept;
|
||||||
|
|
||||||
/// \brief Starts read (read-only) transaction.
|
/// \brief Starts read (read-only) transaction.
|
||||||
inline txn_managed start_read() const;
|
inline txn_managed start_read() const;
|
||||||
@ -3499,13 +3504,13 @@ inline unsigned env::check_readers() {
|
|||||||
return static_cast<unsigned>(dead_count);
|
return static_cast<unsigned>(dead_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline env &env::set_OutOfSpace_callback(MDBX_oom_func *cb) {
|
inline env &env::set_HandleSlowReaders(MDBX_hsr_func *cb) {
|
||||||
error::success_or_throw(::mdbx_env_set_oomfunc(handle_, cb));
|
error::success_or_throw(::mdbx_env_set_hsr(handle_, cb));
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline MDBX_oom_func *env::get_OutOfSpace_callback() const noexcept {
|
inline MDBX_hsr_func *env::get_HandleSlowReaders() const noexcept {
|
||||||
return ::mdbx_env_get_oomfunc(handle_);
|
return ::mdbx_env_get_hsr(handle_);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline txn_managed env::start_read() const {
|
inline txn_managed env::start_read() const {
|
||||||
|
65
src/core.c
65
src/core.c
@ -900,7 +900,7 @@ static __always_inline void safe64_reset(mdbx_safe64_t *ptr,
|
|||||||
static __always_inline bool safe64_reset_compare(mdbx_safe64_t *ptr,
|
static __always_inline bool safe64_reset_compare(mdbx_safe64_t *ptr,
|
||||||
txnid_t compare) {
|
txnid_t compare) {
|
||||||
mdbx_compiler_barrier();
|
mdbx_compiler_barrier();
|
||||||
/* LY: This function is used to reset `mr_txnid` from OOM-kick in case
|
/* LY: This function is used to reset `mr_txnid` from hsr-handler in case
|
||||||
* the asynchronously cancellation of read transaction. Therefore,
|
* the asynchronously cancellation of read transaction. Therefore,
|
||||||
* there may be a collision between the cleanup performed here and
|
* there may be a collision between the cleanup performed here and
|
||||||
* asynchronous termination and restarting of the read transaction
|
* asynchronous termination and restarting of the read transaction
|
||||||
@ -3074,7 +3074,8 @@ static __must_check_result int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp);
|
|||||||
static __must_check_result int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp);
|
static __must_check_result int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp);
|
||||||
static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num,
|
static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num,
|
||||||
MDBX_page **const mp, int flags);
|
MDBX_page **const mp, int flags);
|
||||||
static txnid_t mdbx_oomkick(MDBX_env *env, const txnid_t laggard);
|
static txnid_t mdbx_kick_longlived_readers(MDBX_env *env,
|
||||||
|
const txnid_t laggard);
|
||||||
|
|
||||||
static int mdbx_page_new(MDBX_cursor *mc, uint32_t flags, unsigned num,
|
static int mdbx_page_new(MDBX_cursor *mc, uint32_t flags, unsigned num,
|
||||||
MDBX_page **mp);
|
MDBX_page **mp);
|
||||||
@ -5071,7 +5072,7 @@ skip_cache:
|
|||||||
txnid_t oldest = 0, last = 0;
|
txnid_t oldest = 0, last = 0;
|
||||||
const unsigned wanna_range = num - 1;
|
const unsigned wanna_range = num - 1;
|
||||||
|
|
||||||
while (true) { /* oom-kick retry loop */
|
while (true) { /* hsr-kick retry loop */
|
||||||
/* If our dirty list is already full, we can't do anything */
|
/* If our dirty list is already full, we can't do anything */
|
||||||
if (unlikely(txn->tw.dirtyroom == 0)) {
|
if (unlikely(txn->tw.dirtyroom == 0)) {
|
||||||
rc = MDBX_TXN_FULL;
|
rc = MDBX_TXN_FULL;
|
||||||
@ -5374,7 +5375,7 @@ skip_cache:
|
|||||||
/* it is reasonable check/kick lagging reader(s) here,
|
/* it is reasonable check/kick lagging reader(s) here,
|
||||||
* since we made a new steady point or wipe the last. */
|
* since we made a new steady point or wipe the last. */
|
||||||
if (oldest < txn->mt_txnid - MDBX_TXNID_STEP &&
|
if (oldest < txn->mt_txnid - MDBX_TXNID_STEP &&
|
||||||
mdbx_oomkick(env, oldest) > oldest)
|
mdbx_kick_longlived_readers(env, oldest) > oldest)
|
||||||
continue;
|
continue;
|
||||||
} else if (unlikely(rc != MDBX_RESULT_TRUE))
|
} else if (unlikely(rc != MDBX_RESULT_TRUE))
|
||||||
goto fail;
|
goto fail;
|
||||||
@ -5386,7 +5387,7 @@ skip_cache:
|
|||||||
if ((flags & MDBX_ALLOC_NEW) && next <= txn->mt_end_pgno)
|
if ((flags & MDBX_ALLOC_NEW) && next <= txn->mt_end_pgno)
|
||||||
goto done;
|
goto done;
|
||||||
if ((flags & MDBX_ALLOC_GC) && oldest < txn->mt_txnid - MDBX_TXNID_STEP &&
|
if ((flags & MDBX_ALLOC_GC) && oldest < txn->mt_txnid - MDBX_TXNID_STEP &&
|
||||||
mdbx_oomkick(env, oldest) > oldest)
|
mdbx_kick_longlived_readers(env, oldest) > oldest)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
rc = MDBX_NOTFOUND;
|
rc = MDBX_NOTFOUND;
|
||||||
@ -6005,7 +6006,7 @@ static bind_rslot_result bind_rslot(MDBX_env *env, const uintptr_t tid) {
|
|||||||
if (likely(slot < env->me_maxreaders))
|
if (likely(slot < env->me_maxreaders))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
result.err = mdbx_reader_check0(env, true, NULL);
|
result.err = mdbx_cleanup_dead_readers(env, true, NULL);
|
||||||
if (result.err != MDBX_RESULT_TRUE) {
|
if (result.err != MDBX_RESULT_TRUE) {
|
||||||
mdbx_rdt_unlock(env);
|
mdbx_rdt_unlock(env);
|
||||||
result.err =
|
result.err =
|
||||||
@ -10613,7 +10614,7 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname,
|
|||||||
if (rc != MDBX_SUCCESS)
|
if (rc != MDBX_SUCCESS)
|
||||||
goto bailout;
|
goto bailout;
|
||||||
} else {
|
} else {
|
||||||
rc = mdbx_reader_check0(env, false, NULL);
|
rc = mdbx_cleanup_dead_readers(env, false, NULL);
|
||||||
if (MDBX_IS_ERROR(rc))
|
if (MDBX_IS_ERROR(rc))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
@ -17612,15 +17613,16 @@ static bool __cold mdbx_pid_insert(uint32_t *ids, uint32_t pid) {
|
|||||||
int __cold mdbx_reader_check(MDBX_env *env, int *dead) {
|
int __cold mdbx_reader_check(MDBX_env *env, int *dead) {
|
||||||
if (dead)
|
if (dead)
|
||||||
*dead = 0;
|
*dead = 0;
|
||||||
return mdbx_reader_check0(env, false, dead);
|
return mdbx_cleanup_dead_readers(env, false, dead);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return:
|
/* Return:
|
||||||
* MDBX_RESULT_TRUE - done and mutex recovered
|
* MDBX_RESULT_TRUE - done and mutex recovered
|
||||||
* MDBX_SUCCESS - done
|
* MDBX_SUCCESS - done
|
||||||
* Otherwise errcode. */
|
* Otherwise errcode. */
|
||||||
MDBX_INTERNAL_FUNC int __cold mdbx_reader_check0(MDBX_env *env, int rdt_locked,
|
MDBX_INTERNAL_FUNC int __cold mdbx_cleanup_dead_readers(MDBX_env *env,
|
||||||
int *dead) {
|
int rdt_locked,
|
||||||
|
int *dead) {
|
||||||
int rc = check_env(env);
|
int rc = check_env(env);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
return rc;
|
return rc;
|
||||||
@ -17737,8 +17739,9 @@ int __cold mdbx_setup_debug(int loglevel, int flags, MDBX_debug_func *logger) {
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
|
static txnid_t __cold mdbx_kick_longlived_readers(MDBX_env *env,
|
||||||
mdbx_debug("%s", "DB size maxed out");
|
const txnid_t laggard) {
|
||||||
|
mdbx_debug("DB size maxed out by reading #%" PRIaTXN, laggard);
|
||||||
|
|
||||||
int retry;
|
int retry;
|
||||||
for (retry = 0; retry < INT_MAX; ++retry) {
|
for (retry = 0; retry < INT_MAX; ++retry) {
|
||||||
@ -17746,10 +17749,10 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
|
|||||||
mdbx_assert(env, oldest < env->me_txn0->mt_txnid);
|
mdbx_assert(env, oldest < env->me_txn0->mt_txnid);
|
||||||
mdbx_assert(env, oldest >= laggard);
|
mdbx_assert(env, oldest >= laggard);
|
||||||
mdbx_assert(env, oldest >= *env->me_oldest);
|
mdbx_assert(env, oldest >= *env->me_oldest);
|
||||||
if (oldest == laggard || unlikely(env->me_lck == NULL /* exclusive mode */))
|
if (oldest == laggard || unlikely(!env->me_lck /* without-LCK mode */))
|
||||||
return oldest;
|
return oldest;
|
||||||
|
|
||||||
if (MDBX_IS_ERROR(mdbx_reader_check0(env, false, NULL)))
|
if (MDBX_IS_ERROR(mdbx_cleanup_dead_readers(env, false, NULL)))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
MDBX_reader *asleep = nullptr;
|
MDBX_reader *asleep = nullptr;
|
||||||
@ -17778,20 +17781,20 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (laggard < oldest || !asleep) {
|
if (laggard < oldest || !asleep) {
|
||||||
if (retry && env->me_oom_func) {
|
if (retry && env->me_hsr_callback) {
|
||||||
/* LY: notify end of oom-loop */
|
/* LY: notify end of hsr-loop */
|
||||||
const txnid_t gap = oldest - laggard;
|
const txnid_t gap = oldest - laggard;
|
||||||
env->me_oom_func(env, 0, 0, laggard,
|
env->me_hsr_callback(env, env->me_txn, 0, 0, laggard,
|
||||||
(gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, 0,
|
(gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, 0,
|
||||||
-retry);
|
-retry);
|
||||||
}
|
}
|
||||||
mdbx_notice("oom-kick: update oldest %" PRIaTXN " -> %" PRIaTXN,
|
mdbx_notice("hsr-kick: update oldest %" PRIaTXN " -> %" PRIaTXN,
|
||||||
*env->me_oldest, oldest);
|
*env->me_oldest, oldest);
|
||||||
mdbx_assert(env, *env->me_oldest <= oldest);
|
mdbx_assert(env, *env->me_oldest <= oldest);
|
||||||
return *env->me_oldest = oldest;
|
return *env->me_oldest = oldest;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!env->me_oom_func)
|
if (!env->me_hsr_callback)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
uint32_t pid = asleep->mr_pid;
|
uint32_t pid = asleep->mr_pid;
|
||||||
@ -17807,9 +17810,9 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
|
|||||||
(oldest_retired > head_retired)
|
(oldest_retired > head_retired)
|
||||||
? pgno2bytes(env, (pgno_t)(oldest_retired - head_retired))
|
? pgno2bytes(env, (pgno_t)(oldest_retired - head_retired))
|
||||||
: 0;
|
: 0;
|
||||||
int rc = env->me_oom_func(env, pid, (mdbx_tid_t)tid, laggard,
|
int rc = env->me_hsr_callback(
|
||||||
(gap < UINT_MAX) ? (unsigned)gap : UINT_MAX,
|
env, env->me_txn, pid, (mdbx_tid_t)tid, laggard,
|
||||||
space, retry);
|
(gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, space, retry);
|
||||||
if (rc < 0)
|
if (rc < 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -17827,9 +17830,9 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (retry && env->me_oom_func) {
|
if (retry && env->me_hsr_callback) {
|
||||||
/* LY: notify end of oom-loop */
|
/* LY: notify end of hsr-loop */
|
||||||
env->me_oom_func(env, 0, 0, laggard, 0, 0, -retry);
|
env->me_hsr_callback(env, env->me_txn, 0, 0, laggard, 0, 0, -retry);
|
||||||
}
|
}
|
||||||
return mdbx_find_oldest(env->me_txn);
|
return mdbx_find_oldest(env->me_txn);
|
||||||
}
|
}
|
||||||
@ -17874,18 +17877,18 @@ int __cold mdbx_env_set_syncperiod(MDBX_env *env, unsigned seconds_16dot16) {
|
|||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
int __cold mdbx_env_set_oomfunc(MDBX_env *env, MDBX_oom_func *oomfunc) {
|
int __cold mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr) {
|
||||||
int rc = check_env(env);
|
int rc = check_env(env);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
env->me_oom_func = oomfunc;
|
env->me_hsr_callback = hsr;
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
MDBX_oom_func *__cold mdbx_env_get_oomfunc(const MDBX_env *env) {
|
MDBX_hsr_func *__cold mdbx_env_get_hsr(const MDBX_env *env) {
|
||||||
return likely(env && env->me_signature == MDBX_ME_SIGNATURE)
|
return likely(env && env->me_signature == MDBX_ME_SIGNATURE)
|
||||||
? env->me_oom_func
|
? env->me_hsr_callback
|
||||||
: NULL;
|
: NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -989,7 +989,7 @@ struct MDBX_env {
|
|||||||
volatile pgno_t *me_autosync_threshold;
|
volatile pgno_t *me_autosync_threshold;
|
||||||
volatile pgno_t *me_discarded_tail;
|
volatile pgno_t *me_discarded_tail;
|
||||||
volatile uint32_t *me_meta_sync_txnid;
|
volatile uint32_t *me_meta_sync_txnid;
|
||||||
MDBX_oom_func *me_oom_func; /* Callback for kicking laggard readers */
|
MDBX_hsr_func *me_hsr_callback; /* Callback for kicking laggard readers */
|
||||||
struct {
|
struct {
|
||||||
#if MDBX_LOCKING > 0
|
#if MDBX_LOCKING > 0
|
||||||
mdbx_ipclock_t wlock;
|
mdbx_ipclock_t wlock;
|
||||||
@ -1203,8 +1203,8 @@ mdbx_flush_incoherent_mmap(void *addr, size_t nbytes, const intptr_t pagesize) {
|
|||||||
/*----------------------------------------------------------------------------*/
|
/*----------------------------------------------------------------------------*/
|
||||||
/* Internal prototypes */
|
/* Internal prototypes */
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int mdbx_reader_check0(MDBX_env *env, int rlocked,
|
MDBX_INTERNAL_FUNC int mdbx_cleanup_dead_readers(MDBX_env *env, int rlocked,
|
||||||
int *dead);
|
int *dead);
|
||||||
MDBX_INTERNAL_FUNC int mdbx_rthc_alloc(mdbx_thread_key_t *key,
|
MDBX_INTERNAL_FUNC int mdbx_rthc_alloc(mdbx_thread_key_t *key,
|
||||||
MDBX_reader *begin, MDBX_reader *end);
|
MDBX_reader *begin, MDBX_reader *end);
|
||||||
MDBX_INTERNAL_FUNC void mdbx_rthc_remove(const mdbx_thread_key_t key);
|
MDBX_INTERNAL_FUNC void mdbx_rthc_remove(const mdbx_thread_key_t key);
|
||||||
|
@ -702,7 +702,7 @@ static int __cold mdbx_ipclock_failed(MDBX_env *env, mdbx_ipclock_t *ipc,
|
|||||||
mdbx_warning("%clock owner died, %s", (rlocked ? 'r' : 'w'),
|
mdbx_warning("%clock owner died, %s", (rlocked ? 'r' : 'w'),
|
||||||
(rc ? "this process' env is hosed" : "recovering"));
|
(rc ? "this process' env is hosed" : "recovering"));
|
||||||
|
|
||||||
int check_rc = mdbx_reader_check0(env, rlocked, NULL);
|
int check_rc = mdbx_cleanup_dead_readers(env, rlocked, NULL);
|
||||||
check_rc = (check_rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : check_rc;
|
check_rc = (check_rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : check_rc;
|
||||||
|
|
||||||
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
|
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
|
||||||
|
15
test/test.cc
15
test/test.cc
@ -78,16 +78,17 @@ const char *keygencase2str(const keygen_case keycase) {
|
|||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
|
|
||||||
int testcase::oom_callback(MDBX_env *env, mdbx_pid_t pid, mdbx_tid_t tid,
|
int testcase::hsr_callback(const MDBX_env *env, const MDBX_txn *txn,
|
||||||
uint64_t txn, unsigned gap, size_t space,
|
mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard,
|
||||||
|
unsigned gap, size_t space,
|
||||||
int retry) MDBX_CXX17_NOEXCEPT {
|
int retry) MDBX_CXX17_NOEXCEPT {
|
||||||
|
(void)txn;
|
||||||
testcase *self = (testcase *)mdbx_env_get_userctx(env);
|
testcase *self = (testcase *)mdbx_env_get_userctx(env);
|
||||||
|
|
||||||
if (retry == 0)
|
if (retry == 0)
|
||||||
log_notice("oom_callback: waitfor pid %lu, thread %" PRIuPTR
|
log_notice("hsr_callback: waitfor pid %lu, thread %" PRIuPTR
|
||||||
", txn #%" PRIu64 ", gap %d, scape %zu",
|
", txn #%" PRIu64 ", gap %d, scape %zu",
|
||||||
(long)pid, (size_t)tid, txn, gap, space);
|
(long)pid, (size_t)tid, laggard, gap, space);
|
||||||
|
|
||||||
if (self->should_continue(true)) {
|
if (self->should_continue(true)) {
|
||||||
osal_yield();
|
osal_yield();
|
||||||
@ -123,9 +124,9 @@ void testcase::db_prepare() {
|
|||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
failure_perror("mdbx_env_set_maxdbs()", rc);
|
failure_perror("mdbx_env_set_maxdbs()", rc);
|
||||||
|
|
||||||
rc = mdbx_env_set_oomfunc(env, testcase::oom_callback);
|
rc = mdbx_env_set_hsr(env, testcase::hsr_callback);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
failure_perror("mdbx_env_set_oomfunc()", rc);
|
failure_perror("mdbx_env_set_hsr()", rc);
|
||||||
|
|
||||||
rc = mdbx_env_set_geometry(
|
rc = mdbx_env_set_geometry(
|
||||||
env, config.params.size_lower, config.params.size_now,
|
env, config.params.size_lower, config.params.size_now,
|
||||||
|
@ -166,8 +166,9 @@ protected:
|
|||||||
const keygen::buffer &old_value, MDBX_put_flags_t flags);
|
const keygen::buffer &old_value, MDBX_put_flags_t flags);
|
||||||
int remove(const keygen::buffer &akey, const keygen::buffer &adata);
|
int remove(const keygen::buffer &akey, const keygen::buffer &adata);
|
||||||
|
|
||||||
static int oom_callback(MDBX_env *env, mdbx_pid_t pid, mdbx_tid_t tid,
|
static int hsr_callback(const MDBX_env *env, const MDBX_txn *txn,
|
||||||
uint64_t txn, unsigned gap, size_t space,
|
mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard,
|
||||||
|
unsigned gap, size_t space,
|
||||||
int retry) MDBX_CXX17_NOEXCEPT;
|
int retry) MDBX_CXX17_NOEXCEPT;
|
||||||
|
|
||||||
MDBX_env_flags_t actual_env_mode{MDBX_ENV_DEFAULTS};
|
MDBX_env_flags_t actual_env_mode{MDBX_ENV_DEFAULTS};
|
||||||
|
Loading…
Reference in New Issue
Block a user