mdbx: rework API and Docs around Handle-Slow-Readers (no algorithmic changes).

Change-Id: I5b76a8400ce6f5f241f8e4a7f53d746fe39f8e1e
This commit is contained in:
Leonid Yuriev 2020-09-29 19:24:57 +03:00
parent 6294e1710a
commit c8a0951566
12 changed files with 124 additions and 90 deletions

View File

@ -601,6 +601,7 @@ hpp
hppa hppa
hpux hpux
hrows hrows
hsr
htags htags
htm htm
html html
@ -795,6 +796,7 @@ LOCKNAME
locktable locktable
LOGFILE LOGFILE
loglevel loglevel
longlived
LONGLONG LONGLONG
lowerbound lowerbound
lowerboundvalue lowerboundvalue
@ -1090,7 +1092,6 @@ onstack
onstask onstask
oom oom
oomfunc oomfunc
oomkick
openfile openfile
openldap openldap
openmp openmp

View File

@ -30,11 +30,14 @@ Added features:
- Improved opening large DB (> 4Gb) from 32-bit code. - Improved opening large DB (> 4Gb) from 32-bit code.
- Provided `pure-function` and `const-function` attributes to C API. - Provided `pure-function` and `const-function` attributes to C API.
- Support for user-settable transaction context. - Support for user-settable transaction context.
- Revised API and documentation related to Handle-Slow-Readers callback feature.
Deprecated functions and flags: Deprecated functions and flags:
- For clarity and API simplification the `MDBX_MAPASYNC` flag is deprecated. - For clarity and API simplification the `MDBX_MAPASYNC` flag is deprecated.
Just use `MDBX_SAFE_NOSYNC` or `MDBX_UTTERLY_NOSYNC` instead of it. Just use `MDBX_SAFE_NOSYNC` or `MDBX_UTTERLY_NOSYNC` instead of it.
- `MDBX_oom_func`, `mdbx_env_set_oomfunc()` and `mdbx_env_get_oomfunc()`
replaced with `MDBX_hsr_func`, `mdbx_env_get_hsr` and `mdbx_env_get_hsr()`.
Fixes: Fixes:

View File

@ -241,7 +241,7 @@ Since version 0.9.1, the utility supports checking the database using any of the
10. Sequence generation and three persistent 64-bit markers. 10. Sequence generation and three persistent 64-bit markers.
11. Callback for lack-of-space condition of database that allows you to control and/or resolve such situations. 11. Handle-Slow-Readers callback to resolve a database full/overflow issues due to long-lived read transaction(s).
12. Support for opening databases in the exclusive mode, including on a network share. 12. Support for opening databases in the exclusive mode, including on a network share.

View File

@ -147,9 +147,9 @@ or debugging of a client application while retaining an active read
transaction. LMDB this results in `MDB_MAP_FULL` error and subsequent write transaction. LMDB this results in `MDB_MAP_FULL` error and subsequent write
performance degradation. performance degradation.
MDBX mostly solve "long-lived" readers issue by the lack-of-space callback MDBX mostly solve "long-lived" readers issue by using the Handle-Slow-Readers
which allow to aborts long readers, and by the `MDBX_LIFORECLAIM` mode which \ref MDBX_hsr_func callback which allows to abort long-lived read transactions,
addresses subsequent performance degradation. and using the \ref MDBX_LIFORECLAIM mode which addresses subsequent performance degradation.
The "next" version of libmdbx (MithrilDB) will completely solve this. The "next" version of libmdbx (MithrilDB) will completely solve this.
- Avoid suspending a process with active transactions. These would then be - Avoid suspending a process with active transactions. These would then be

View File

@ -236,6 +236,7 @@ The full \ref c_api documentation lists further details below, like how to:
- Sstimate size of range query result: \ref c_rqest. - Sstimate size of range query result: \ref c_rqest.
- Double performance by LIFO reclaiming on storages with write-back: \ref MDBX_LIFORECLAIM. - Double performance by LIFO reclaiming on storages with write-back: \ref MDBX_LIFORECLAIM.
- Use sequences and canary markers: \ref mdbx_dbi_sequence(), \ref MDBX_canary. - Use sequences and canary markers: \ref mdbx_dbi_sequence(), \ref MDBX_canary.
- Use lack-of-space callback (aka OOM-KICK): \ref mdbx_env_set_oomfunc(). - Use Handle-Slow-Readers callback to resolve a database full/overflow issues
due to long-lived read transactions: \ref mdbx_env_set_hsr().
- Use exclusive mode: \ref MDBX_EXCLUSIVE. - Use exclusive mode: \ref MDBX_EXCLUSIVE.
- Define custom sort orders (but this is recommended to be avoided). - Define custom sort orders (but this is recommended to be avoided).

77
mdbx.h
View File

@ -2721,7 +2721,7 @@ struct MDBX_txn_info {
to the snapshot being read. */ to the snapshot being read. */
uint64_t txn_id; uint64_t txn_id;
/** For READ-ONLY transaction: the lag from a recent MVCC-snapshot, i.e. the /** For READ-ONLY transaction: the lag from a recent MVCC-snapshot, i.e. the
number of committed transaction since read transaction started. For WRITE number of committed transaction since read transaction started. For WRITE
transaction (provided if `scan_rlt=true`): the lag of the oldest reader transaction (provided if `scan_rlt=true`): the lag of the oldest reader
from current transaction (i.e. at least 1 if any reader running). */ from current transaction (i.e. at least 1 if any reader running). */
@ -2747,9 +2747,9 @@ struct MDBX_txn_info {
uint64_t txn_space_retired; uint64_t txn_space_retired;
/** For READ-ONLY transaction: the space available for writer(s) and that /** For READ-ONLY transaction: the space available for writer(s) and that
must be exhausted for reason to call the OOM-killer for this read must be exhausted for reason to call the Handle-Slow-Readers callback for
transaction. For WRITE transaction: the space inside transaction that left this read transaction. For WRITE transaction: the space inside transaction
to `MDBX_TXN_FULL` error. */ that left to `MDBX_TXN_FULL` error. */
uint64_t txn_space_leftover; uint64_t txn_space_leftover;
/** For READ-ONLY transaction (provided if `scan_rlt=true`): The space that /** For READ-ONLY transaction (provided if `scan_rlt=true`): The space that
@ -4196,14 +4196,25 @@ LIBMDBX_API int mdbx_thread_register(const MDBX_env *env);
* \ref MDBX_RESULT_TRUE if thread is not registered or already unregistered. */ * \ref MDBX_RESULT_TRUE if thread is not registered or already unregistered. */
LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env); LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
/** \brief A lack-of-space callback function to resolve issues with a laggard /** \brief A Handle-Slow-Readers callback function to resolve database
* readers. \ingroup c_err * full/overflow issue due to a reader(s) which prevents the old data from being
* recycled.
* \ingroup c_err
* *
* Read transactions prevent reuse of pages freed by newer write transactions, * Read transactions prevent reuse of pages freed by newer write transactions,
* thus the database can grow quickly. This callback will be called when there * thus the database can grow quickly. This callback will be called when there
* is not enough space in the database (ie. before increasing the database size * is not enough space in the database (i.e. before increasing the database size
* or before \ref MDBX_MAP_FULL error) and thus can be used to resolve issues * or before \ref MDBX_MAP_FULL error) and thus can be used to resolve issues
* with a "long-lived" read transactions. * with a "long-lived" read transactions.
* \see long-lived-read
*
* Using this callback you can choose how to resolve the situation:
* - abort the write transaction with an error;
* - wait for the read transaction(s) to complete;
* - notify a thread performing a long-lived read transaction
* and wait for an effect;
* - kill the thread or whole process that performs the long-lived read
* transaction;
* *
* Depending on the arguments and needs, your implementation may wait, * Depending on the arguments and needs, your implementation may wait,
* terminate a process or thread that is performing a long read, or perform * terminate a process or thread that is performing a long read, or perform
@ -4211,9 +4222,11 @@ LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
* corresponds to the performed action. * corresponds to the performed action.
* *
* \param [in] env An environment handle returned by \ref mdbx_env_create(). * \param [in] env An environment handle returned by \ref mdbx_env_create().
* \param [in] txn The current write transaction which internally at
* the \ref MDBX_MAP_FULL condition.
* \param [in] pid A pid of the reader process. * \param [in] pid A pid of the reader process.
* \param [in] tid A thread_id of the reader thread. * \param [in] tid A thread_id of the reader thread.
* \param [in] txn A transaction number on which stalled. * \param [in] laggard An oldest read transaction number on which stalled.
* \param [in] gap A lag from the last commited txn. * \param [in] gap A lag from the last commited txn.
* \param [in] space A space that actually become available for reuse after * \param [in] space A space that actually become available for reuse after
* this reader finished. The callback function can take * this reader finished. The callback function can take
@ -4221,9 +4234,9 @@ LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
* a long-running transaction has. * a long-running transaction has.
* \param [in] retry A retry number starting from 0. * \param [in] retry A retry number starting from 0.
* If callback has returned 0 at least once, then at end * If callback has returned 0 at least once, then at end
* of current OOM-handler loop callback will be called * of current handling loop the callback function will be
* additionally with negative value to notify about the * called additionally with negative value to notify about
* end of loop. The callback function can use this value * the end of loop. The callback function can use this value
* to implement timeout logic while waiting for readers. * to implement timeout logic while waiting for readers.
* *
* \returns The RETURN CODE determines the further actions libmdbx and must * \returns The RETURN CODE determines the further actions libmdbx and must
@ -4252,36 +4265,42 @@ LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
* \retval 2 or great The reader process was terminated or killed, * \retval 2 or great The reader process was terminated or killed,
* and libmdbx should entirely reset reader registration. * and libmdbx should entirely reset reader registration.
* *
* \see mdbx_env_set_oomfunc() \see mdbx_env_get_oomfunc() * \see mdbx_env_set_hsr() \see mdbx_env_get_hsr()
*/ */
typedef int(MDBX_oom_func)(MDBX_env *env, mdbx_pid_t pid, mdbx_tid_t tid, typedef int(MDBX_hsr_func)(const MDBX_env *env, const MDBX_txn *txn,
uint64_t txn, unsigned gap, size_t space, mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard,
unsigned gap, size_t space,
int retry) MDBX_CXX17_NOEXCEPT; int retry) MDBX_CXX17_NOEXCEPT;
/** \brief Set the OOM callback. /** \brief Sets a Handle-Slow-Readers callback to resolve database full/overflow
* issue due to a reader(s) which prevents the old data from being recycled.
* \ingroup c_err * \ingroup c_err
* *
* The callback will only be triggered on lack of space to resolve issues with * The callback will only be triggered when the database is full due to a
* lagging reader(s) (i.e. to kill it) for resume reuse pages from the garbage * reader(s) prevents the old data from being recycled.
* collector.
* \see mdbx_env_get_oomfunc()
* *
* \param [in] env An environment handle returned * \see mdbx_env_get_hsr()
* by \ref mdbx_env_create(). * \see long-lived-read
* \param [in] oom_func A \ref MDBX_oom_func function or NULL to disable. *
* \param [in] env An environment handle returned
* by \ref mdbx_env_create().
* \param [in] hsr_callback A \ref MDBX_hsr_func function
* or NULL to disable.
* *
* \returns A non-zero error value on failure and 0 on success. */ * \returns A non-zero error value on failure and 0 on success. */
LIBMDBX_API int mdbx_env_set_oomfunc(MDBX_env *env, MDBX_oom_func *oom_func); LIBMDBX_API int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr_callback);
/** \brief Get the current oom_func callback. /** \brief Gets current Handle-Slow-Readers callback used to resolve database
* \ingroup c_settings * full/overflow issue due to a reader(s) which prevents the old data from being
* \see mdbx_env_set_oomfunc() * recycled.
* \see mdbx_env_set_hsr()
* *
* \param [in] env An environment handle returned by \ref mdbx_env_create(). * \param [in] env An environment handle returned by \ref mdbx_env_create().
* *
* \returns A MDBX_oom_func function or NULL if disabled. */ * \returns A MDBX_hsr_func function or NULL if disabled
MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_oom_func * * or something wrong. */
mdbx_env_get_oomfunc(const MDBX_env *env); MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_hsr_func *
mdbx_env_get_hsr(const MDBX_env *env);
/** \defgroup btree_traversal B-tree Traversal /** \defgroup btree_traversal B-tree Traversal
* This is internal API for mdbx_chk tool. You should avoid to use it, except * This is internal API for mdbx_chk tool. You should avoid to use it, except

View File

@ -2100,14 +2100,16 @@ public:
/// return number of cleared slots. /// return number of cleared slots.
inline unsigned check_readers(); inline unsigned check_readers();
/// \brief Sets the out-of-space callback. /// \brief Sets a Handle-Slow-Readers callback to resolve database
/// full/overflow issue due to a reader(s) which prevents the old data from
/// being recycled.
/// ///
/// Such callback will be triggered in a case where there is not enough free /// Such callback will be triggered in a case where there is not enough free
/// space in the database due to long read transaction(s) which impedes /// space in the database due to long read transaction(s) which impedes
/// reusing the pages of an old MVCC snapshot(s). /// reusing the pages of an old MVCC snapshot(s).
/// ///
/// Using this callback you can choose how to get out of the situation: /// Using this callback you can choose how to resolve the situation:
/// - abort the record transaction with an error; /// - abort the write transaction with an error;
/// - wait for the read transaction(s) to complete; /// - wait for the read transaction(s) to complete;
/// - notify a thread performing a long-lived read transaction /// - notify a thread performing a long-lived read transaction
/// and wait for an effect; /// and wait for an effect;
@ -2115,10 +2117,13 @@ public:
/// transaction; /// transaction;
/// ///
/// \see long-lived-read /// \see long-lived-read
inline env &set_OutOfSpace_callback(MDBX_oom_func *); inline env &set_HandleSlowReaders(MDBX_hsr_func *);
/// \brief Returns the current out-of-space callback.
/// \see set_OutOfSpace_callback() /// \brief Returns the current Handle-Slow-Readers callback used to resolve
inline MDBX_oom_func *get_OutOfSpace_callback() const noexcept; /// database full/overflow issue due to a reader(s) which prevents the old
/// data from being recycled.
/// \see set_HandleSlowReaders()
inline MDBX_hsr_func *get_HandleSlowReaders() const noexcept;
/// \brief Starts read (read-only) transaction. /// \brief Starts read (read-only) transaction.
inline txn_managed start_read() const; inline txn_managed start_read() const;
@ -3499,13 +3504,13 @@ inline unsigned env::check_readers() {
return static_cast<unsigned>(dead_count); return static_cast<unsigned>(dead_count);
} }
inline env &env::set_OutOfSpace_callback(MDBX_oom_func *cb) { inline env &env::set_HandleSlowReaders(MDBX_hsr_func *cb) {
error::success_or_throw(::mdbx_env_set_oomfunc(handle_, cb)); error::success_or_throw(::mdbx_env_set_hsr(handle_, cb));
return *this; return *this;
} }
inline MDBX_oom_func *env::get_OutOfSpace_callback() const noexcept { inline MDBX_hsr_func *env::get_HandleSlowReaders() const noexcept {
return ::mdbx_env_get_oomfunc(handle_); return ::mdbx_env_get_hsr(handle_);
} }
inline txn_managed env::start_read() const { inline txn_managed env::start_read() const {

View File

@ -900,7 +900,7 @@ static __always_inline void safe64_reset(mdbx_safe64_t *ptr,
static __always_inline bool safe64_reset_compare(mdbx_safe64_t *ptr, static __always_inline bool safe64_reset_compare(mdbx_safe64_t *ptr,
txnid_t compare) { txnid_t compare) {
mdbx_compiler_barrier(); mdbx_compiler_barrier();
/* LY: This function is used to reset `mr_txnid` from OOM-kick in case /* LY: This function is used to reset `mr_txnid` from hsr-handler in case
* the asynchronously cancellation of read transaction. Therefore, * the asynchronously cancellation of read transaction. Therefore,
* there may be a collision between the cleanup performed here and * there may be a collision between the cleanup performed here and
* asynchronous termination and restarting of the read transaction * asynchronous termination and restarting of the read transaction
@ -3074,7 +3074,8 @@ static __must_check_result int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp);
static __must_check_result int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp); static __must_check_result int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp);
static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num, static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num,
MDBX_page **const mp, int flags); MDBX_page **const mp, int flags);
static txnid_t mdbx_oomkick(MDBX_env *env, const txnid_t laggard); static txnid_t mdbx_kick_longlived_readers(MDBX_env *env,
const txnid_t laggard);
static int mdbx_page_new(MDBX_cursor *mc, uint32_t flags, unsigned num, static int mdbx_page_new(MDBX_cursor *mc, uint32_t flags, unsigned num,
MDBX_page **mp); MDBX_page **mp);
@ -5071,7 +5072,7 @@ skip_cache:
txnid_t oldest = 0, last = 0; txnid_t oldest = 0, last = 0;
const unsigned wanna_range = num - 1; const unsigned wanna_range = num - 1;
while (true) { /* oom-kick retry loop */ while (true) { /* hsr-kick retry loop */
/* If our dirty list is already full, we can't do anything */ /* If our dirty list is already full, we can't do anything */
if (unlikely(txn->tw.dirtyroom == 0)) { if (unlikely(txn->tw.dirtyroom == 0)) {
rc = MDBX_TXN_FULL; rc = MDBX_TXN_FULL;
@ -5374,7 +5375,7 @@ skip_cache:
/* it is reasonable check/kick lagging reader(s) here, /* it is reasonable check/kick lagging reader(s) here,
* since we made a new steady point or wipe the last. */ * since we made a new steady point or wipe the last. */
if (oldest < txn->mt_txnid - MDBX_TXNID_STEP && if (oldest < txn->mt_txnid - MDBX_TXNID_STEP &&
mdbx_oomkick(env, oldest) > oldest) mdbx_kick_longlived_readers(env, oldest) > oldest)
continue; continue;
} else if (unlikely(rc != MDBX_RESULT_TRUE)) } else if (unlikely(rc != MDBX_RESULT_TRUE))
goto fail; goto fail;
@ -5386,7 +5387,7 @@ skip_cache:
if ((flags & MDBX_ALLOC_NEW) && next <= txn->mt_end_pgno) if ((flags & MDBX_ALLOC_NEW) && next <= txn->mt_end_pgno)
goto done; goto done;
if ((flags & MDBX_ALLOC_GC) && oldest < txn->mt_txnid - MDBX_TXNID_STEP && if ((flags & MDBX_ALLOC_GC) && oldest < txn->mt_txnid - MDBX_TXNID_STEP &&
mdbx_oomkick(env, oldest) > oldest) mdbx_kick_longlived_readers(env, oldest) > oldest)
continue; continue;
rc = MDBX_NOTFOUND; rc = MDBX_NOTFOUND;
@ -6005,7 +6006,7 @@ static bind_rslot_result bind_rslot(MDBX_env *env, const uintptr_t tid) {
if (likely(slot < env->me_maxreaders)) if (likely(slot < env->me_maxreaders))
break; break;
result.err = mdbx_reader_check0(env, true, NULL); result.err = mdbx_cleanup_dead_readers(env, true, NULL);
if (result.err != MDBX_RESULT_TRUE) { if (result.err != MDBX_RESULT_TRUE) {
mdbx_rdt_unlock(env); mdbx_rdt_unlock(env);
result.err = result.err =
@ -10613,7 +10614,7 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname,
if (rc != MDBX_SUCCESS) if (rc != MDBX_SUCCESS)
goto bailout; goto bailout;
} else { } else {
rc = mdbx_reader_check0(env, false, NULL); rc = mdbx_cleanup_dead_readers(env, false, NULL);
if (MDBX_IS_ERROR(rc)) if (MDBX_IS_ERROR(rc))
goto bailout; goto bailout;
} }
@ -17612,15 +17613,16 @@ static bool __cold mdbx_pid_insert(uint32_t *ids, uint32_t pid) {
int __cold mdbx_reader_check(MDBX_env *env, int *dead) { int __cold mdbx_reader_check(MDBX_env *env, int *dead) {
if (dead) if (dead)
*dead = 0; *dead = 0;
return mdbx_reader_check0(env, false, dead); return mdbx_cleanup_dead_readers(env, false, dead);
} }
/* Return: /* Return:
* MDBX_RESULT_TRUE - done and mutex recovered * MDBX_RESULT_TRUE - done and mutex recovered
* MDBX_SUCCESS - done * MDBX_SUCCESS - done
* Otherwise errcode. */ * Otherwise errcode. */
MDBX_INTERNAL_FUNC int __cold mdbx_reader_check0(MDBX_env *env, int rdt_locked, MDBX_INTERNAL_FUNC int __cold mdbx_cleanup_dead_readers(MDBX_env *env,
int *dead) { int rdt_locked,
int *dead) {
int rc = check_env(env); int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
return rc; return rc;
@ -17737,8 +17739,9 @@ int __cold mdbx_setup_debug(int loglevel, int flags, MDBX_debug_func *logger) {
return rc; return rc;
} }
static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) { static txnid_t __cold mdbx_kick_longlived_readers(MDBX_env *env,
mdbx_debug("%s", "DB size maxed out"); const txnid_t laggard) {
mdbx_debug("DB size maxed out by reading #%" PRIaTXN, laggard);
int retry; int retry;
for (retry = 0; retry < INT_MAX; ++retry) { for (retry = 0; retry < INT_MAX; ++retry) {
@ -17746,10 +17749,10 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
mdbx_assert(env, oldest < env->me_txn0->mt_txnid); mdbx_assert(env, oldest < env->me_txn0->mt_txnid);
mdbx_assert(env, oldest >= laggard); mdbx_assert(env, oldest >= laggard);
mdbx_assert(env, oldest >= *env->me_oldest); mdbx_assert(env, oldest >= *env->me_oldest);
if (oldest == laggard || unlikely(env->me_lck == NULL /* exclusive mode */)) if (oldest == laggard || unlikely(!env->me_lck /* without-LCK mode */))
return oldest; return oldest;
if (MDBX_IS_ERROR(mdbx_reader_check0(env, false, NULL))) if (MDBX_IS_ERROR(mdbx_cleanup_dead_readers(env, false, NULL)))
break; break;
MDBX_reader *asleep = nullptr; MDBX_reader *asleep = nullptr;
@ -17778,20 +17781,20 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
} }
if (laggard < oldest || !asleep) { if (laggard < oldest || !asleep) {
if (retry && env->me_oom_func) { if (retry && env->me_hsr_callback) {
/* LY: notify end of oom-loop */ /* LY: notify end of hsr-loop */
const txnid_t gap = oldest - laggard; const txnid_t gap = oldest - laggard;
env->me_oom_func(env, 0, 0, laggard, env->me_hsr_callback(env, env->me_txn, 0, 0, laggard,
(gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, 0, (gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, 0,
-retry); -retry);
} }
mdbx_notice("oom-kick: update oldest %" PRIaTXN " -> %" PRIaTXN, mdbx_notice("hsr-kick: update oldest %" PRIaTXN " -> %" PRIaTXN,
*env->me_oldest, oldest); *env->me_oldest, oldest);
mdbx_assert(env, *env->me_oldest <= oldest); mdbx_assert(env, *env->me_oldest <= oldest);
return *env->me_oldest = oldest; return *env->me_oldest = oldest;
} }
if (!env->me_oom_func) if (!env->me_hsr_callback)
break; break;
uint32_t pid = asleep->mr_pid; uint32_t pid = asleep->mr_pid;
@ -17807,9 +17810,9 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
(oldest_retired > head_retired) (oldest_retired > head_retired)
? pgno2bytes(env, (pgno_t)(oldest_retired - head_retired)) ? pgno2bytes(env, (pgno_t)(oldest_retired - head_retired))
: 0; : 0;
int rc = env->me_oom_func(env, pid, (mdbx_tid_t)tid, laggard, int rc = env->me_hsr_callback(
(gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, env, env->me_txn, pid, (mdbx_tid_t)tid, laggard,
space, retry); (gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, space, retry);
if (rc < 0) if (rc < 0)
break; break;
@ -17827,9 +17830,9 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
} }
} }
if (retry && env->me_oom_func) { if (retry && env->me_hsr_callback) {
/* LY: notify end of oom-loop */ /* LY: notify end of hsr-loop */
env->me_oom_func(env, 0, 0, laggard, 0, 0, -retry); env->me_hsr_callback(env, env->me_txn, 0, 0, laggard, 0, 0, -retry);
} }
return mdbx_find_oldest(env->me_txn); return mdbx_find_oldest(env->me_txn);
} }
@ -17874,18 +17877,18 @@ int __cold mdbx_env_set_syncperiod(MDBX_env *env, unsigned seconds_16dot16) {
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
int __cold mdbx_env_set_oomfunc(MDBX_env *env, MDBX_oom_func *oomfunc) { int __cold mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr) {
int rc = check_env(env); int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
return rc; return rc;
env->me_oom_func = oomfunc; env->me_hsr_callback = hsr;
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
MDBX_oom_func *__cold mdbx_env_get_oomfunc(const MDBX_env *env) { MDBX_hsr_func *__cold mdbx_env_get_hsr(const MDBX_env *env) {
return likely(env && env->me_signature == MDBX_ME_SIGNATURE) return likely(env && env->me_signature == MDBX_ME_SIGNATURE)
? env->me_oom_func ? env->me_hsr_callback
: NULL; : NULL;
} }

View File

@ -989,7 +989,7 @@ struct MDBX_env {
volatile pgno_t *me_autosync_threshold; volatile pgno_t *me_autosync_threshold;
volatile pgno_t *me_discarded_tail; volatile pgno_t *me_discarded_tail;
volatile uint32_t *me_meta_sync_txnid; volatile uint32_t *me_meta_sync_txnid;
MDBX_oom_func *me_oom_func; /* Callback for kicking laggard readers */ MDBX_hsr_func *me_hsr_callback; /* Callback for kicking laggard readers */
struct { struct {
#if MDBX_LOCKING > 0 #if MDBX_LOCKING > 0
mdbx_ipclock_t wlock; mdbx_ipclock_t wlock;
@ -1203,8 +1203,8 @@ mdbx_flush_incoherent_mmap(void *addr, size_t nbytes, const intptr_t pagesize) {
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
/* Internal prototypes */ /* Internal prototypes */
MDBX_INTERNAL_FUNC int mdbx_reader_check0(MDBX_env *env, int rlocked, MDBX_INTERNAL_FUNC int mdbx_cleanup_dead_readers(MDBX_env *env, int rlocked,
int *dead); int *dead);
MDBX_INTERNAL_FUNC int mdbx_rthc_alloc(mdbx_thread_key_t *key, MDBX_INTERNAL_FUNC int mdbx_rthc_alloc(mdbx_thread_key_t *key,
MDBX_reader *begin, MDBX_reader *end); MDBX_reader *begin, MDBX_reader *end);
MDBX_INTERNAL_FUNC void mdbx_rthc_remove(const mdbx_thread_key_t key); MDBX_INTERNAL_FUNC void mdbx_rthc_remove(const mdbx_thread_key_t key);

View File

@ -702,7 +702,7 @@ static int __cold mdbx_ipclock_failed(MDBX_env *env, mdbx_ipclock_t *ipc,
mdbx_warning("%clock owner died, %s", (rlocked ? 'r' : 'w'), mdbx_warning("%clock owner died, %s", (rlocked ? 'r' : 'w'),
(rc ? "this process' env is hosed" : "recovering")); (rc ? "this process' env is hosed" : "recovering"));
int check_rc = mdbx_reader_check0(env, rlocked, NULL); int check_rc = mdbx_cleanup_dead_readers(env, rlocked, NULL);
check_rc = (check_rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : check_rc; check_rc = (check_rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : check_rc;
#if MDBX_LOCKING == MDBX_LOCKING_SYSV #if MDBX_LOCKING == MDBX_LOCKING_SYSV

View File

@ -78,16 +78,17 @@ const char *keygencase2str(const keygen_case keycase) {
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
int testcase::oom_callback(MDBX_env *env, mdbx_pid_t pid, mdbx_tid_t tid, int testcase::hsr_callback(const MDBX_env *env, const MDBX_txn *txn,
uint64_t txn, unsigned gap, size_t space, mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard,
unsigned gap, size_t space,
int retry) MDBX_CXX17_NOEXCEPT { int retry) MDBX_CXX17_NOEXCEPT {
(void)txn;
testcase *self = (testcase *)mdbx_env_get_userctx(env); testcase *self = (testcase *)mdbx_env_get_userctx(env);
if (retry == 0) if (retry == 0)
log_notice("oom_callback: waitfor pid %lu, thread %" PRIuPTR log_notice("hsr_callback: waitfor pid %lu, thread %" PRIuPTR
", txn #%" PRIu64 ", gap %d, scape %zu", ", txn #%" PRIu64 ", gap %d, scape %zu",
(long)pid, (size_t)tid, txn, gap, space); (long)pid, (size_t)tid, laggard, gap, space);
if (self->should_continue(true)) { if (self->should_continue(true)) {
osal_yield(); osal_yield();
@ -123,9 +124,9 @@ void testcase::db_prepare() {
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_env_set_maxdbs()", rc); failure_perror("mdbx_env_set_maxdbs()", rc);
rc = mdbx_env_set_oomfunc(env, testcase::oom_callback); rc = mdbx_env_set_hsr(env, testcase::hsr_callback);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_env_set_oomfunc()", rc); failure_perror("mdbx_env_set_hsr()", rc);
rc = mdbx_env_set_geometry( rc = mdbx_env_set_geometry(
env, config.params.size_lower, config.params.size_now, env, config.params.size_lower, config.params.size_now,

View File

@ -166,8 +166,9 @@ protected:
const keygen::buffer &old_value, MDBX_put_flags_t flags); const keygen::buffer &old_value, MDBX_put_flags_t flags);
int remove(const keygen::buffer &akey, const keygen::buffer &adata); int remove(const keygen::buffer &akey, const keygen::buffer &adata);
static int oom_callback(MDBX_env *env, mdbx_pid_t pid, mdbx_tid_t tid, static int hsr_callback(const MDBX_env *env, const MDBX_txn *txn,
uint64_t txn, unsigned gap, size_t space, mdbx_pid_t pid, mdbx_tid_t tid, uint64_t laggard,
unsigned gap, size_t space,
int retry) MDBX_CXX17_NOEXCEPT; int retry) MDBX_CXX17_NOEXCEPT;
MDBX_env_flags_t actual_env_mode{MDBX_ENV_DEFAULTS}; MDBX_env_flags_t actual_env_mode{MDBX_ENV_DEFAULTS};