diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/include')
63 files changed, 15875 insertions, 15655 deletions
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h index c78be89d319..6210450927b 100644 --- a/src/third_party/wiredtiger/src/include/api.h +++ b/src/third_party/wiredtiger/src/include/api.h @@ -8,243 +8,234 @@ #ifdef HAVE_DIAGNOSTIC /* - * Capture cases where a single session handle is used by multiple threads - * in parallel. The check isn't trivial because some API calls re-enter - * via public API entry points and the session with ID 0 is the default - * session in the connection handle which can be used across multiple threads. - * It is safe to use the reference count without atomic operations because the - * reference count is only tracking a thread re-entering the API. + * Capture cases where a single session handle is used by multiple threads in parallel. The check + * isn't trivial because some API calls re-enter via public API entry points and the session with ID + * 0 is the default session in the connection handle which can be used across multiple threads. It + * is safe to use the reference count without atomic operations because the reference count is only + * tracking a thread re-entering the API. */ -#define WT_SINGLE_THREAD_CHECK_START(s) \ - { \ - uintmax_t __tmp_api_tid; \ - __wt_thread_id(&__tmp_api_tid); \ - WT_ASSERT(session, (s)->id == 0 || (s)->api_tid == 0 || \ - (s)->api_tid == __tmp_api_tid); \ - if ((s)->api_tid == 0) \ - WT_PUBLISH((s)->api_tid, __tmp_api_tid); \ - ++(s)->api_enter_refcnt; \ - } - -#define WT_SINGLE_THREAD_CHECK_STOP(s) \ - if (--(s)->api_enter_refcnt == 0) \ - WT_PUBLISH((s)->api_tid, 0); +#define WT_SINGLE_THREAD_CHECK_START(s) \ + { \ + uintmax_t __tmp_api_tid; \ + __wt_thread_id(&__tmp_api_tid); \ + WT_ASSERT(session, (s)->id == 0 || (s)->api_tid == 0 || (s)->api_tid == __tmp_api_tid); \ + if ((s)->api_tid == 0) \ + WT_PUBLISH((s)->api_tid, __tmp_api_tid); \ + ++(s)->api_enter_refcnt; \ + } + +#define WT_SINGLE_THREAD_CHECK_STOP(s) \ + if (--(s)->api_enter_refcnt == 0) \ + WT_PUBLISH((s)->api_tid, 0); #else -#define WT_SINGLE_THREAD_CHECK_START(s) -#define WT_SINGLE_THREAD_CHECK_STOP(s) +#define WT_SINGLE_THREAD_CHECK_START(s) +#define WT_SINGLE_THREAD_CHECK_STOP(s) #endif /* Standard entry points to the API: declares/initializes local variables. */ -#define API_SESSION_INIT(s, h, n, dh) \ - WT_TRACK_OP_DECL; \ - WT_DATA_HANDLE *__olddh = (s)->dhandle; \ - const char *__oldname = (s)->name; \ - (s)->dhandle = (dh); \ - (s)->name = (s)->lastop = #h "." #n; \ - /* \ - * No code before this line, otherwise error handling won't be \ - * correct. \ - */ \ - WT_TRACK_OP_INIT(s); \ - WT_SINGLE_THREAD_CHECK_START(s); \ - WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ - /* Reset wait time if this isn't an API reentry. */ \ - if (__oldname == NULL) \ - (s)->cache_wait_us = 0; \ - __wt_verbose((s), WT_VERB_API, "%s", "CALL: " #h ":" #n) - -#define API_CALL_NOCONF(s, h, n, dh) do { \ - API_SESSION_INIT(s, h, n, dh) - -#define API_CALL(s, h, n, dh, config, cfg) do { \ - const char *(cfg)[] = \ - { WT_CONFIG_BASE(s, h##_##n), config, NULL }; \ - API_SESSION_INIT(s, h, n, dh); \ - if ((config) != NULL) \ - WT_ERR(__wt_config_check((s), \ - WT_CONFIG_REF(session, h##_##n), (config), 0)) - -#define API_END(s, ret) \ - if ((s) != NULL) { \ - WT_TRACK_OP_END(s); \ - WT_SINGLE_THREAD_CHECK_STOP(s); \ - if ((ret) != 0 && \ - (ret) != WT_NOTFOUND && \ - (ret) != WT_DUPLICATE_KEY && \ - (ret) != WT_PREPARE_CONFLICT && \ - F_ISSET(&(s)->txn, WT_TXN_RUNNING)) \ - F_SET(&(s)->txn, WT_TXN_ERROR); \ - /* \ - * No code after this line, otherwise error handling \ - * won't be correct. \ - */ \ - (s)->dhandle = __olddh; \ - (s)->name = __oldname; \ - } \ -} while (0) +#define API_SESSION_INIT(s, h, n, dh) \ + WT_TRACK_OP_DECL; \ + WT_DATA_HANDLE *__olddh = (s)->dhandle; \ + const char *__oldname = (s)->name; \ + (s)->dhandle = (dh); \ + (s)->name = (s)->lastop = #h "." #n; \ + /* \ + * No code before this line, otherwise error handling won't be \ + * correct. \ + */ \ + WT_TRACK_OP_INIT(s); \ + WT_SINGLE_THREAD_CHECK_START(s); \ + WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ + /* Reset wait time if this isn't an API reentry. */ \ + if (__oldname == NULL) \ + (s)->cache_wait_us = 0; \ + __wt_verbose((s), WT_VERB_API, "%s", "CALL: " #h ":" #n) + +#define API_CALL_NOCONF(s, h, n, dh) \ + do { \ + API_SESSION_INIT(s, h, n, dh) + +#define API_CALL(s, h, n, dh, config, cfg) \ + do { \ + const char *(cfg)[] = {WT_CONFIG_BASE(s, h##_##n), config, NULL}; \ + API_SESSION_INIT(s, h, n, dh); \ + if ((config) != NULL) \ + WT_ERR(__wt_config_check((s), WT_CONFIG_REF(session, h##_##n), (config), 0)) + +#define API_END(s, ret) \ + if ((s) != NULL) { \ + WT_TRACK_OP_END(s); \ + WT_SINGLE_THREAD_CHECK_STOP(s); \ + if ((ret) != 0 && (ret) != WT_NOTFOUND && (ret) != WT_DUPLICATE_KEY && \ + (ret) != WT_PREPARE_CONFLICT && F_ISSET(&(s)->txn, WT_TXN_RUNNING)) \ + F_SET(&(s)->txn, WT_TXN_ERROR); \ + /* \ + * No code after this line, otherwise error handling \ + * won't be correct. \ + */ \ + (s)->dhandle = __olddh; \ + (s)->name = __oldname; \ + } \ + } \ + while (0) /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL(s, h, n, bt, config, cfg) do { \ - bool __autotxn = false, __update = false; \ - API_CALL(s, h, n, bt, config, cfg); \ - __wt_txn_timestamp_flags(s); \ - __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ - if (__autotxn) \ - F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \ - __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \ - if (__update) \ - F_SET(&(s)->txn, WT_TXN_UPDATE); +#define TXN_API_CALL(s, h, n, bt, config, cfg) \ + do { \ + bool __autotxn = false, __update = false; \ + API_CALL(s, h, n, bt, config, cfg); \ + __wt_txn_timestamp_flags(s); \ + __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING); \ + if (__autotxn) \ + F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \ + __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \ + if (__update) \ + F_SET(&(s)->txn, WT_TXN_UPDATE); /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL_NOCONF(s, h, n, dh) do { \ - bool __autotxn = false, __update = false; \ - API_CALL_NOCONF(s, h, n, dh); \ - __wt_txn_timestamp_flags(s); \ - __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ - if (__autotxn) \ - F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \ - __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \ - if (__update) \ - F_SET(&(s)->txn, WT_TXN_UPDATE); +#define TXN_API_CALL_NOCONF(s, h, n, dh) \ + do { \ + bool __autotxn = false, __update = false; \ + API_CALL_NOCONF(s, h, n, dh); \ + __wt_txn_timestamp_flags(s); \ + __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING); \ + if (__autotxn) \ + F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \ + __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \ + if (__update) \ + F_SET(&(s)->txn, WT_TXN_UPDATE); /* End a transactional API call, optional retry on deadlock. */ -#define TXN_API_END_RETRY(s, ret, retry) \ - API_END(s, ret); \ - if (__update) \ - F_CLR(&(s)->txn, WT_TXN_UPDATE); \ - if (__autotxn) { \ - if (F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT)) \ - F_CLR(&(s)->txn, WT_TXN_AUTOCOMMIT); \ - else if ((ret) == 0 && \ - !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \ - (ret) = __wt_txn_commit((s), NULL); \ - else { \ - if (retry) \ - WT_TRET(__wt_session_copy_values(s)); \ - WT_TRET(__wt_txn_rollback((s), NULL)); \ - if (((ret) == 0 || (ret) == WT_ROLLBACK) && \ - (retry)) { \ - (ret) = 0; \ - continue; \ - } \ - WT_TRET(__wt_session_reset_cursors(s, false)); \ - } \ - } \ - break; \ -} while (1) +#define TXN_API_END_RETRY(s, ret, retry) \ + API_END(s, ret); \ + if (__update) \ + F_CLR(&(s)->txn, WT_TXN_UPDATE); \ + if (__autotxn) { \ + if (F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT)) \ + F_CLR(&(s)->txn, WT_TXN_AUTOCOMMIT); \ + else if ((ret) == 0 && !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \ + (ret) = __wt_txn_commit((s), NULL); \ + else { \ + if (retry) \ + WT_TRET(__wt_session_copy_values(s)); \ + WT_TRET(__wt_txn_rollback((s), NULL)); \ + if (((ret) == 0 || (ret) == WT_ROLLBACK) && (retry)) { \ + (ret) = 0; \ + continue; \ + } \ + WT_TRET(__wt_session_reset_cursors(s, false)); \ + } \ + } \ + break; \ + } \ + while (1) /* End a transactional API call, retry on deadlock. */ -#define TXN_API_END(s, ret) TXN_API_END_RETRY(s, ret, 1) +#define TXN_API_END(s, ret) TXN_API_END_RETRY(s, ret, 1) /* - * In almost all cases, API_END is returning immediately, make it simple. - * If a session or connection method is about to return WT_NOTFOUND (some - * underlying object was not found), map it to ENOENT, only cursor methods - * return WT_NOTFOUND. + * In almost all cases, API_END is returning immediately, make it simple. If a session or connection + * method is about to return WT_NOTFOUND (some underlying object was not found), map it to ENOENT, + * only cursor methods return WT_NOTFOUND. */ -#define API_END_RET(s, ret) \ - API_END(s, ret); \ - return (ret) -#define API_END_RET_NOTFOUND_MAP(s, ret) \ - API_END(s, ret); \ - return ((ret) == WT_NOTFOUND ? ENOENT : (ret)) +#define API_END_RET(s, ret) \ + API_END(s, ret); \ + return (ret) +#define API_END_RET_NOTFOUND_MAP(s, ret) \ + API_END(s, ret); \ + return ((ret) == WT_NOTFOUND ? ENOENT : (ret)) /* - * Used in cases where transaction error should not be set, but the error is - * returned from the API. Success is passed to the API_END macro. If the - * method is about to return WT_NOTFOUND map it to ENOENT. + * Used in cases where transaction error should not be set, but the error is returned from the API. + * Success is passed to the API_END macro. If the method is about to return WT_NOTFOUND map it to + * ENOENT. */ -#define API_END_RET_NO_TXN_ERROR(s, ret) \ - API_END(s, 0); \ - return ((ret) == WT_NOTFOUND ? ENOENT : (ret)) - -#define CONNECTION_API_CALL(conn, s, n, config, cfg) \ - s = (conn)->default_session; \ - API_CALL(s, WT_CONNECTION, n, NULL, config, cfg) - -#define CONNECTION_API_CALL_NOCONF(conn, s, n) \ - s = (conn)->default_session; \ - API_CALL_NOCONF(s, WT_CONNECTION, n, NULL) - -#define SESSION_API_CALL_PREPARE_ALLOWED(s, n, config, cfg) \ - API_CALL(s, WT_SESSION, n, NULL, config, cfg) - -#define SESSION_API_CALL(s, n, config, cfg) \ - API_CALL(s, WT_SESSION, n, NULL, config, cfg); \ - WT_ERR(__wt_txn_context_prepare_check((s))) - -#define SESSION_API_CALL_NOCONF(s, n) \ - API_CALL_NOCONF(s, WT_SESSION, n, NULL) - -#define SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(s, n) \ - API_CALL_NOCONF(s, WT_SESSION, n, NULL); \ - WT_ERR(__wt_txn_context_prepare_check((s))) - -#define SESSION_TXN_API_CALL(s, n, config, cfg) \ - TXN_API_CALL(s, WT_SESSION, n, NULL, config, cfg); \ - WT_ERR(__wt_txn_context_prepare_check((s))) - -#define CURSOR_API_CALL(cur, s, n, bt) \ - (s) = (WT_SESSION_IMPL *)(cur)->session; \ - API_CALL_NOCONF(s, WT_CURSOR, n, \ - ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \ - WT_ERR(__wt_txn_context_prepare_check((s))); \ - if (F_ISSET(cur, WT_CURSTD_CACHED)) \ - WT_ERR(__wt_cursor_cached(cur)) - -#define CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt) \ - (s) = (WT_SESSION_IMPL *)(cur)->session; \ - API_CALL_NOCONF(s, WT_CURSOR, n, \ - ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \ - if (F_ISSET(cur, WT_CURSTD_CACHED)) \ - WT_ERR(__wt_cursor_cached(cur)) - -#define JOINABLE_CURSOR_CALL_CHECK(cur) \ - if (F_ISSET(cur, WT_CURSTD_JOINED)) \ - WT_ERR(__wt_curjoin_joined(cur)) - -#define JOINABLE_CURSOR_API_CALL(cur, s, n, bt) \ - CURSOR_API_CALL(cur, s, n, bt); \ - JOINABLE_CURSOR_CALL_CHECK(cur) - -#define JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt) \ - CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt); \ - JOINABLE_CURSOR_CALL_CHECK(cur) - -#define CURSOR_REMOVE_API_CALL(cur, s, bt) \ - (s) = (WT_SESSION_IMPL *)(cur)->session; \ - TXN_API_CALL_NOCONF(s, WT_CURSOR, remove, \ - ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \ - WT_ERR(__wt_txn_context_prepare_check((s))) - -#define JOINABLE_CURSOR_REMOVE_API_CALL(cur, s, bt) \ - CURSOR_REMOVE_API_CALL(cur, s, bt); \ - JOINABLE_CURSOR_CALL_CHECK(cur) - -#define CURSOR_UPDATE_API_CALL_BTREE(cur, s, n, bt) \ - (s) = (WT_SESSION_IMPL *)(cur)->session; \ - TXN_API_CALL_NOCONF( \ - s, WT_CURSOR, n, ((WT_BTREE *)(bt))->dhandle); \ - WT_ERR(__wt_txn_context_prepare_check((s))); \ - if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && \ - !F_ISSET((WT_BTREE *)(bt), WT_BTREE_IGNORE_CACHE) && \ - __wt_cache_full(s)) \ - WT_ERR(WT_CACHE_FULL); - -#define CURSOR_UPDATE_API_CALL(cur, s, n) \ - (s) = (WT_SESSION_IMPL *)(cur)->session; \ - TXN_API_CALL_NOCONF(s, WT_CURSOR, n, NULL); \ - WT_ERR(__wt_txn_context_prepare_check((s))) - -#define JOINABLE_CURSOR_UPDATE_API_CALL(cur, s, n) \ - CURSOR_UPDATE_API_CALL(cur, s, n); \ - JOINABLE_CURSOR_CALL_CHECK(cur) - -#define CURSOR_UPDATE_API_END(s, ret) \ - if ((ret) == WT_PREPARE_CONFLICT) \ - (ret) = WT_ROLLBACK; \ - TXN_API_END(s, ret) - -#define ASYNCOP_API_CALL(conn, s, n) \ - s = (conn)->default_session; \ - API_CALL_NOCONF(s, asyncop, n, NULL) +#define API_END_RET_NO_TXN_ERROR(s, ret) \ + API_END(s, 0); \ + return ((ret) == WT_NOTFOUND ? ENOENT : (ret)) + +#define CONNECTION_API_CALL(conn, s, n, config, cfg) \ + s = (conn)->default_session; \ + API_CALL(s, WT_CONNECTION, n, NULL, config, cfg) + +#define CONNECTION_API_CALL_NOCONF(conn, s, n) \ + s = (conn)->default_session; \ + API_CALL_NOCONF(s, WT_CONNECTION, n, NULL) + +#define SESSION_API_CALL_PREPARE_ALLOWED(s, n, config, cfg) \ + API_CALL(s, WT_SESSION, n, NULL, config, cfg) + +#define SESSION_API_CALL(s, n, config, cfg) \ + API_CALL(s, WT_SESSION, n, NULL, config, cfg); \ + WT_ERR(__wt_txn_context_prepare_check((s))) + +#define SESSION_API_CALL_NOCONF(s, n) API_CALL_NOCONF(s, WT_SESSION, n, NULL) + +#define SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(s, n) \ + API_CALL_NOCONF(s, WT_SESSION, n, NULL); \ + WT_ERR(__wt_txn_context_prepare_check((s))) + +#define SESSION_TXN_API_CALL(s, n, config, cfg) \ + TXN_API_CALL(s, WT_SESSION, n, NULL, config, cfg); \ + WT_ERR(__wt_txn_context_prepare_check((s))) + +#define CURSOR_API_CALL(cur, s, n, bt) \ + (s) = (WT_SESSION_IMPL *)(cur)->session; \ + API_CALL_NOCONF(s, WT_CURSOR, n, ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \ + WT_ERR(__wt_txn_context_prepare_check((s))); \ + if (F_ISSET(cur, WT_CURSTD_CACHED)) \ + WT_ERR(__wt_cursor_cached(cur)) + +#define CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt) \ + (s) = (WT_SESSION_IMPL *)(cur)->session; \ + API_CALL_NOCONF(s, WT_CURSOR, n, ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \ + if (F_ISSET(cur, WT_CURSTD_CACHED)) \ + WT_ERR(__wt_cursor_cached(cur)) + +#define JOINABLE_CURSOR_CALL_CHECK(cur) \ + if (F_ISSET(cur, WT_CURSTD_JOINED)) \ + WT_ERR(__wt_curjoin_joined(cur)) + +#define JOINABLE_CURSOR_API_CALL(cur, s, n, bt) \ + CURSOR_API_CALL(cur, s, n, bt); \ + JOINABLE_CURSOR_CALL_CHECK(cur) + +#define JOINABLE_CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt) \ + CURSOR_API_CALL_PREPARE_ALLOWED(cur, s, n, bt); \ + JOINABLE_CURSOR_CALL_CHECK(cur) + +#define CURSOR_REMOVE_API_CALL(cur, s, bt) \ + (s) = (WT_SESSION_IMPL *)(cur)->session; \ + TXN_API_CALL_NOCONF( \ + s, WT_CURSOR, remove, ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \ + WT_ERR(__wt_txn_context_prepare_check((s))) + +#define JOINABLE_CURSOR_REMOVE_API_CALL(cur, s, bt) \ + CURSOR_REMOVE_API_CALL(cur, s, bt); \ + JOINABLE_CURSOR_CALL_CHECK(cur) + +#define CURSOR_UPDATE_API_CALL_BTREE(cur, s, n, bt) \ + (s) = (WT_SESSION_IMPL *)(cur)->session; \ + TXN_API_CALL_NOCONF(s, WT_CURSOR, n, ((WT_BTREE *)(bt))->dhandle); \ + WT_ERR(__wt_txn_context_prepare_check((s))); \ + if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && !F_ISSET((WT_BTREE *)(bt), WT_BTREE_IGNORE_CACHE) && \ + __wt_cache_full(s)) \ + WT_ERR(WT_CACHE_FULL); + +#define CURSOR_UPDATE_API_CALL(cur, s, n) \ + (s) = (WT_SESSION_IMPL *)(cur)->session; \ + TXN_API_CALL_NOCONF(s, WT_CURSOR, n, NULL); \ + WT_ERR(__wt_txn_context_prepare_check((s))) + +#define JOINABLE_CURSOR_UPDATE_API_CALL(cur, s, n) \ + CURSOR_UPDATE_API_CALL(cur, s, n); \ + JOINABLE_CURSOR_CALL_CHECK(cur) + +#define CURSOR_UPDATE_API_END(s, ret) \ + if ((ret) == WT_PREPARE_CONFLICT) \ + (ret) = WT_ROLLBACK; \ + TXN_API_END(s, ret) + +#define ASYNCOP_API_CALL(conn, s, n) \ + s = (conn)->default_session; \ + API_CALL_NOCONF(s, asyncop, n, NULL) diff --git a/src/third_party/wiredtiger/src/include/async.h b/src/third_party/wiredtiger/src/include/async.h index edf61c149a8..9a32ce6e0d2 100644 --- a/src/third_party/wiredtiger/src/include/async.h +++ b/src/third_party/wiredtiger/src/include/async.h @@ -6,24 +6,23 @@ * See the file LICENSE for redistribution information. */ -#define MAX_ASYNC_SLEEP_USECS 100000 /* Maximum sleep waiting for work */ -#define MAX_ASYNC_YIELD 200 /* Maximum number of yields for work */ +#define MAX_ASYNC_SLEEP_USECS 100000 /* Maximum sleep waiting for work */ +#define MAX_ASYNC_YIELD 200 /* Maximum number of yields for work */ -#define O2C(op) ((WT_CONNECTION_IMPL *)(op)->iface.connection) -#define O2S(op) \ - (((WT_CONNECTION_IMPL *)(op)->iface.connection)->default_session) +#define O2C(op) ((WT_CONNECTION_IMPL *)(op)->iface.connection) +#define O2S(op) (((WT_CONNECTION_IMPL *)(op)->iface.connection)->default_session) /* * WT_ASYNC_FORMAT -- * The URI/config/format cache. */ struct __wt_async_format { - TAILQ_ENTRY(__wt_async_format) q; - const char *config; - uint64_t cfg_hash; /* Config hash */ - const char *uri; - uint64_t uri_hash; /* URI hash */ - const char *key_format; - const char *value_format; + TAILQ_ENTRY(__wt_async_format) q; + const char *config; + uint64_t cfg_hash; /* Config hash */ + const char *uri; + uint64_t uri_hash; /* URI hash */ + const char *key_format; + const char *value_format; }; /* @@ -31,77 +30,77 @@ struct __wt_async_format { * Implementation of the WT_ASYNC_OP. */ struct __wt_async_op_impl { - WT_ASYNC_OP iface; + WT_ASYNC_OP iface; - WT_ASYNC_CALLBACK *cb; + WT_ASYNC_CALLBACK *cb; - uint32_t internal_id; /* Array position id. */ - uint64_t unique_id; /* Unique identifier. */ + uint32_t internal_id; /* Array position id. */ + uint64_t unique_id; /* Unique identifier. */ - WT_ASYNC_FORMAT *format; /* Format structure */ + WT_ASYNC_FORMAT *format; /* Format structure */ -#define WT_ASYNCOP_ENQUEUED 0 /* Placed on the work queue */ -#define WT_ASYNCOP_FREE 1 /* Able to be allocated to user */ -#define WT_ASYNCOP_READY 2 /* Allocated, ready for user to use */ -#define WT_ASYNCOP_WORKING 3 /* Operation in progress by worker */ - uint32_t state; +#define WT_ASYNCOP_ENQUEUED 0 /* Placed on the work queue */ +#define WT_ASYNCOP_FREE 1 /* Able to be allocated to user */ +#define WT_ASYNCOP_READY 2 /* Allocated, ready for user to use */ +#define WT_ASYNCOP_WORKING 3 /* Operation in progress by worker */ + uint32_t state; - WT_ASYNC_OPTYPE optype; /* Operation type */ + WT_ASYNC_OPTYPE optype; /* Operation type */ }; /* * Definition of the async subsystem. */ struct __wt_async { - /* - * Ops array protected by the ops_lock. - */ - WT_SPINLOCK ops_lock; /* Locked: ops array */ - WT_ASYNC_OP_IMPL *async_ops; /* Async ops */ -#define OPS_INVALID_INDEX 0xffffffff - uint32_t ops_index; /* Active slot index */ - uint64_t op_id; /* Unique ID counter */ - WT_ASYNC_OP_IMPL **async_queue; /* Async ops work queue */ - uint32_t async_qsize; /* Async work queue size */ - /* - * We need to have two head and tail values. All but one is - * maintained as an ever increasing value to ease wrap around. - * - * alloc_head: the next one to allocate for producers. - * head: the current head visible to consumers. - * head is always <= alloc_head. - * alloc_tail: the next slot for consumers to dequeue. - * alloc_tail is always <= head. - * tail_slot: the last slot consumed. - * A producer may need wait for tail_slot to advance. - */ - uint64_t alloc_head; /* Next slot to enqueue */ - uint64_t head; /* Head visible to worker */ - uint64_t alloc_tail; /* Next slot to dequeue */ - uint64_t tail_slot; /* Worker slot consumed */ - - TAILQ_HEAD(__wt_async_format_qh, __wt_async_format) formatqh; - uint32_t cur_queue; /* Currently enqueued */ - uint32_t max_queue; /* Maximum enqueued */ - -#define WT_ASYNC_FLUSH_NONE 0 /* No flush in progress */ -#define WT_ASYNC_FLUSH_COMPLETE 1 /* Notify flush caller done */ -#define WT_ASYNC_FLUSH_IN_PROGRESS 2 /* Prevent other callers */ -#define WT_ASYNC_FLUSHING 3 /* Notify workers */ - uint32_t flush_state; - - /* Notify any waiting threads when flushing is done. */ - WT_CONDVAR *flush_cond; - WT_ASYNC_OP_IMPL flush_op; /* Special flush op */ - uint32_t flush_count; /* Worker count */ - uint64_t flush_gen; /* Flush generation number */ - -#define WT_ASYNC_MAX_WORKERS 20 - WT_SESSION_IMPL *worker_sessions[WT_ASYNC_MAX_WORKERS]; - /* Async worker threads */ - wt_thread_t worker_tids[WT_ASYNC_MAX_WORKERS]; - - uint32_t flags; /* Currently unused. */ + /* + * Ops array protected by the ops_lock. + */ + WT_SPINLOCK ops_lock; /* Locked: ops array */ + WT_ASYNC_OP_IMPL *async_ops; /* Async ops */ +#define OPS_INVALID_INDEX 0xffffffff + uint32_t ops_index; /* Active slot index */ + uint64_t op_id; /* Unique ID counter */ + WT_ASYNC_OP_IMPL **async_queue; /* Async ops work queue */ + uint32_t async_qsize; /* Async work queue size */ + /* + * We need to have two head and tail values. All but one is + * maintained as an ever increasing value to ease wrap around. + * + * alloc_head: the next one to allocate for producers. + * head: the current head visible to consumers. + * head is always <= alloc_head. + * alloc_tail: the next slot for consumers to dequeue. + * alloc_tail is always <= head. + * tail_slot: the last slot consumed. + * A producer may need wait for tail_slot to advance. + */ + uint64_t alloc_head; /* Next slot to enqueue */ + uint64_t head; /* Head visible to worker */ + uint64_t alloc_tail; /* Next slot to dequeue */ + uint64_t tail_slot; /* Worker slot consumed */ + + TAILQ_HEAD(__wt_async_format_qh, __wt_async_format) formatqh; + uint32_t cur_queue; /* Currently enqueued */ + uint32_t max_queue; /* Maximum enqueued */ + +#define WT_ASYNC_FLUSH_NONE 0 /* No flush in progress */ +#define WT_ASYNC_FLUSH_COMPLETE 1 /* Notify flush caller done */ +#define WT_ASYNC_FLUSH_IN_PROGRESS 2 /* Prevent other callers */ +#define WT_ASYNC_FLUSHING 3 /* Notify workers */ + uint32_t flush_state; + + /* Notify any waiting threads when flushing is done. */ + WT_CONDVAR *flush_cond; + WT_ASYNC_OP_IMPL flush_op; /* Special flush op */ + uint32_t flush_count; /* Worker count */ + uint64_t flush_gen; /* Flush generation number */ + +#define WT_ASYNC_MAX_WORKERS 20 + WT_SESSION_IMPL *worker_sessions[WT_ASYNC_MAX_WORKERS]; + /* Async worker threads */ + wt_thread_t worker_tids[WT_ASYNC_MAX_WORKERS]; + + uint32_t flags; /* Currently unused. */ }; /* @@ -110,10 +109,10 @@ struct __wt_async { * has a cache of async cursors to reuse for operations. */ struct __wt_async_cursor { - TAILQ_ENTRY(__wt_async_cursor) q; /* Worker cache */ - uint64_t cfg_hash; /* Config hash */ - uint64_t uri_hash; /* URI hash */ - WT_CURSOR *c; /* WT cursor */ + TAILQ_ENTRY(__wt_async_cursor) q; /* Worker cache */ + uint64_t cfg_hash; /* Config hash */ + uint64_t uri_hash; /* URI hash */ + WT_CURSOR *c; /* WT cursor */ }; /* @@ -121,7 +120,7 @@ struct __wt_async_cursor { * State for an async worker thread. */ struct __wt_async_worker_state { - uint32_t id; - TAILQ_HEAD(__wt_cursor_qh, __wt_async_cursor) cursorqh; - uint32_t num_cursors; + uint32_t id; + TAILQ_HEAD(__wt_cursor_qh, __wt_async_cursor) cursorqh; + uint32_t num_cursors; }; diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h index 4cfe07f759d..cc045ae20b4 100644 --- a/src/third_party/wiredtiger/src/include/block.h +++ b/src/third_party/wiredtiger/src/include/block.h @@ -11,10 +11,10 @@ */ /* - * The file's description is written into the first block of the file, which - * means we can use an offset of 0 as an invalid offset. + * The file's description is written into the first block of the file, which means we can use an + * offset of 0 as an invalid offset. */ -#define WT_BLOCK_INVALID_OFFSET 0 +#define WT_BLOCK_INVALID_OFFSET 0 /* * The block manager maintains three per-checkpoint extent lists: @@ -46,21 +46,21 @@ * An extent list. */ struct __wt_extlist { - char *name; /* Name */ + char *name; /* Name */ - uint64_t bytes; /* Byte count */ - uint32_t entries; /* Entry count */ + uint64_t bytes; /* Byte count */ + uint32_t entries; /* Entry count */ - wt_off_t offset; /* Written extent offset */ - uint32_t checksum; /* Written extent checksum */ - uint32_t size; /* Written extent size */ + wt_off_t offset; /* Written extent offset */ + uint32_t checksum; /* Written extent checksum */ + uint32_t size; /* Written extent size */ - bool track_size; /* Maintain per-size skiplist */ + bool track_size; /* Maintain per-size skiplist */ - WT_EXT *last; /* Cached last element */ + WT_EXT *last; /* Cached last element */ - WT_EXT *off[WT_SKIP_MAXDEPTH]; /* Size/offset skiplists */ - WT_SIZE *sz[WT_SKIP_MAXDEPTH]; + WT_EXT *off[WT_SKIP_MAXDEPTH]; /* Size/offset skiplists */ + WT_SIZE *sz[WT_SKIP_MAXDEPTH]; }; /* @@ -69,17 +69,17 @@ struct __wt_extlist { * checkpoint. */ struct __wt_ext { - wt_off_t off; /* Extent's file offset */ - wt_off_t size; /* Extent's Size */ + wt_off_t off; /* Extent's file offset */ + wt_off_t size; /* Extent's Size */ - uint8_t depth; /* Skip list depth */ + uint8_t depth; /* Skip list depth */ - /* - * Variable-length array, sized by the number of skiplist elements. - * The first depth array entries are the address skiplist elements, - * the second depth array entries are the size skiplist. - */ - WT_EXT *next[0]; /* Offset, size skiplists */ + /* + * Variable-length array, sized by the number of skiplist elements. The first depth array + * entries are the address skiplist elements, the second depth array entries are the size + * skiplist. + */ + WT_EXT *next[0]; /* Offset, size skiplists */ }; /* @@ -87,18 +87,18 @@ struct __wt_ext { * Encapsulation of a block size skiplist entry. */ struct __wt_size { - wt_off_t size; /* Size */ + wt_off_t size; /* Size */ - uint8_t depth; /* Skip list depth */ + uint8_t depth; /* Skip list depth */ - WT_EXT *off[WT_SKIP_MAXDEPTH]; /* Per-size offset skiplist */ + WT_EXT *off[WT_SKIP_MAXDEPTH]; /* Per-size offset skiplist */ - /* - * We don't use a variable-length array for the size skiplist, we want - * to be able to use any cached WT_SIZE structure as the head of a list, - * and we don't know the related WT_EXT structure's depth. - */ - WT_SIZE *next[WT_SKIP_MAXDEPTH]; /* Size skiplist */ + /* + * We don't use a variable-length array for the size skiplist, we want to be able to use any + * cached WT_SIZE structure as the head of a list, and we don't know the related WT_EXT + * structure's depth. + */ + WT_SIZE *next[WT_SKIP_MAXDEPTH]; /* Size skiplist */ }; /* @@ -108,12 +108,10 @@ struct __wt_size { * Walk a block manager skiplist where the WT_EXT.next entries are offset * by the depth. */ -#define WT_EXT_FOREACH(skip, head) \ - for ((skip) = (head)[0]; \ - (skip) != NULL; (skip) = (skip)->next[0]) -#define WT_EXT_FOREACH_OFF(skip, head) \ - for ((skip) = (head)[0]; \ - (skip) != NULL; (skip) = (skip)->next[(skip)->depth]) +#define WT_EXT_FOREACH(skip, head) \ + for ((skip) = (head)[0]; (skip) != NULL; (skip) = (skip)->next[0]) +#define WT_EXT_FOREACH_OFF(skip, head) \ + for ((skip) = (head)[0]; (skip) != NULL; (skip) = (skip)->next[(skip)->depth]) /* * Checkpoint cookie: carries a version number as I don't want to rev the schema @@ -123,46 +121,44 @@ struct __wt_size { * [1] [root addr] [alloc addr] [avail addr] [discard addr] * [file size] [checkpoint size] [write generation] */ -#define WT_BM_CHECKPOINT_VERSION 1 /* Checkpoint format version */ -#define WT_BLOCK_EXTLIST_MAGIC 71002 /* Identify a list */ +#define WT_BM_CHECKPOINT_VERSION 1 /* Checkpoint format version */ +#define WT_BLOCK_EXTLIST_MAGIC 71002 /* Identify a list */ /* - * There are two versions of the extent list blocks: the original, and a second - * version where current checkpoint information is appended to the avail extent - * list. + * There are two versions of the extent list blocks: the original, and a second version where + * current checkpoint information is appended to the avail extent list. */ -#define WT_BLOCK_EXTLIST_VERSION_ORIG 0 /* Original version */ -#define WT_BLOCK_EXTLIST_VERSION_CKPT 1 /* Checkpoint in avail output */ +#define WT_BLOCK_EXTLIST_VERSION_ORIG 0 /* Original version */ +#define WT_BLOCK_EXTLIST_VERSION_CKPT 1 /* Checkpoint in avail output */ /* * Maximum buffer required to store a checkpoint: 1 version byte followed by * 14 packed 8B values. */ -#define WT_BLOCK_CHECKPOINT_BUFFER (1 + 14 * WT_INTPACK64_MAXSIZE) +#define WT_BLOCK_CHECKPOINT_BUFFER (1 + 14 * WT_INTPACK64_MAXSIZE) struct __wt_block_ckpt { - uint8_t version; /* Version */ + uint8_t version; /* Version */ - wt_off_t root_offset; /* The root */ - uint32_t root_checksum, root_size; + wt_off_t root_offset; /* The root */ + uint32_t root_checksum, root_size; - WT_EXTLIST alloc; /* Extents allocated */ - WT_EXTLIST avail; /* Extents available */ - WT_EXTLIST discard; /* Extents discarded */ + WT_EXTLIST alloc; /* Extents allocated */ + WT_EXTLIST avail; /* Extents available */ + WT_EXTLIST discard; /* Extents discarded */ - wt_off_t file_size; /* Checkpoint file size */ - uint64_t ckpt_size; /* Checkpoint byte count */ + wt_off_t file_size; /* Checkpoint file size */ + uint64_t ckpt_size; /* Checkpoint byte count */ - WT_EXTLIST ckpt_avail; /* Checkpoint free'd extents */ + WT_EXTLIST ckpt_avail; /* Checkpoint free'd extents */ - /* - * Checkpoint archive: the block manager may potentially free a lot of - * memory from the allocation and discard extent lists when checkpoint - * completes. Put it off until the checkpoint resolves, that lets the - * upper btree layer continue eviction sooner. - */ - WT_EXTLIST ckpt_alloc; /* Checkpoint archive */ - WT_EXTLIST ckpt_discard; /* Checkpoint archive */ + /* + * Checkpoint archive: the block manager may potentially free a lot of memory from the + * allocation and discard extent lists when checkpoint completes. Put it off until the + * checkpoint resolves, that lets the upper btree layer continue eviction sooner. + */ + WT_EXTLIST ckpt_alloc; /* Checkpoint archive */ + WT_EXTLIST ckpt_discard; /* Checkpoint archive */ }; /* @@ -170,62 +166,51 @@ struct __wt_block_ckpt { * Block manager handle, references a single checkpoint in a file. */ struct __wt_bm { - /* Methods */ - int (*addr_invalid) - (WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); - int (*addr_string) - (WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t); - u_int (*block_header)(WT_BM *); - int (*checkpoint) - (WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, WT_CKPT *, bool); - int (*checkpoint_last) - (WT_BM *, WT_SESSION_IMPL *, char **, char **, WT_ITEM *); - int (*checkpoint_load)(WT_BM *, WT_SESSION_IMPL *, - const uint8_t *, size_t, uint8_t *, size_t *, bool); - int (*checkpoint_resolve)(WT_BM *, WT_SESSION_IMPL *, bool); - int (*checkpoint_start)(WT_BM *, WT_SESSION_IMPL *); - int (*checkpoint_unload)(WT_BM *, WT_SESSION_IMPL *); - int (*close)(WT_BM *, WT_SESSION_IMPL *); - int (*compact_end)(WT_BM *, WT_SESSION_IMPL *); - int (*compact_page_skip) - (WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t, bool *); - int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *); - int (*compact_start)(WT_BM *, WT_SESSION_IMPL *); - int (*corrupt)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); - int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); - bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *); - int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t); - int (*preload)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); - int (*read) - (WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t); - int (*salvage_end)(WT_BM *, WT_SESSION_IMPL *); - int (*salvage_next) - (WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t *, bool *); - int (*salvage_start)(WT_BM *, WT_SESSION_IMPL *); - int (*salvage_valid) - (WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t, bool); - int (*size)(WT_BM *, WT_SESSION_IMPL *, wt_off_t *); - int (*stat)(WT_BM *, WT_SESSION_IMPL *, WT_DSRC_STATS *stats); - int (*sync)(WT_BM *, WT_SESSION_IMPL *, bool); - int (*verify_addr)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); - int (*verify_end)(WT_BM *, WT_SESSION_IMPL *); - int (*verify_start) - (WT_BM *, WT_SESSION_IMPL *, WT_CKPT *, const char *[]); - int (*write) (WT_BM *, - WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool, bool); - int (*write_size)(WT_BM *, WT_SESSION_IMPL *, size_t *); - - WT_BLOCK *block; /* Underlying file */ - - void *map; /* Mapped region */ - size_t maplen; - void *mapped_cookie; - - /* - * There's only a single block manager handle that can be written, all - * others are checkpoints. - */ - bool is_live; /* The live system */ + /* Methods */ + int (*addr_invalid)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); + int (*addr_string)(WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t); + u_int (*block_header)(WT_BM *); + int (*checkpoint)(WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, WT_CKPT *, bool); + int (*checkpoint_last)(WT_BM *, WT_SESSION_IMPL *, char **, char **, WT_ITEM *); + int (*checkpoint_load)( + WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t, uint8_t *, size_t *, bool); + int (*checkpoint_resolve)(WT_BM *, WT_SESSION_IMPL *, bool); + int (*checkpoint_start)(WT_BM *, WT_SESSION_IMPL *); + int (*checkpoint_unload)(WT_BM *, WT_SESSION_IMPL *); + int (*close)(WT_BM *, WT_SESSION_IMPL *); + int (*compact_end)(WT_BM *, WT_SESSION_IMPL *); + int (*compact_page_skip)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t, bool *); + int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *); + int (*compact_start)(WT_BM *, WT_SESSION_IMPL *); + int (*corrupt)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); + int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); + bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *); + int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t); + int (*preload)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); + int (*read)(WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t); + int (*salvage_end)(WT_BM *, WT_SESSION_IMPL *); + int (*salvage_next)(WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t *, bool *); + int (*salvage_start)(WT_BM *, WT_SESSION_IMPL *); + int (*salvage_valid)(WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t, bool); + int (*size)(WT_BM *, WT_SESSION_IMPL *, wt_off_t *); + int (*stat)(WT_BM *, WT_SESSION_IMPL *, WT_DSRC_STATS *stats); + int (*sync)(WT_BM *, WT_SESSION_IMPL *, bool); + int (*verify_addr)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); + int (*verify_end)(WT_BM *, WT_SESSION_IMPL *); + int (*verify_start)(WT_BM *, WT_SESSION_IMPL *, WT_CKPT *, const char *[]); + int (*write)(WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool, bool); + int (*write_size)(WT_BM *, WT_SESSION_IMPL *, size_t *); + + WT_BLOCK *block; /* Underlying file */ + + void *map; /* Mapped region */ + size_t maplen; + void *mapped_cookie; + + /* + * There's only a single block manager handle that can be written, all others are checkpoints. + */ + bool is_live; /* The live system */ }; /* @@ -233,64 +218,67 @@ struct __wt_bm { * Block manager handle, references a single file. */ struct __wt_block { - const char *name; /* Name */ - uint64_t name_hash; /* Hash of name */ - - /* A list of block manager handles, sharing a file descriptor. */ - uint32_t ref; /* References */ - TAILQ_ENTRY(__wt_block) q; /* Linked list of handles */ - TAILQ_ENTRY(__wt_block) hashq; /* Hashed list of handles */ - - WT_FH *fh; /* Backing file handle */ - wt_off_t size; /* File size */ - wt_off_t extend_size; /* File extended size */ - wt_off_t extend_len; /* File extend chunk size */ - - /* Configuration information, set when the file is opened. */ - uint32_t allocfirst; /* Allocation is first-fit */ - uint32_t allocsize; /* Allocation size */ - size_t os_cache; /* System buffer cache flush max */ - size_t os_cache_max; - size_t os_cache_dirty_max; - - u_int block_header; /* Header length */ - - /* - * There is only a single checkpoint in a file that can be written. The - * information could logically live in the WT_BM structure, but then we - * would be re-creating it every time we opened a new checkpoint and I'd - * rather not do that. So, it's stored here, only accessed by one WT_BM - * handle. - */ - WT_SPINLOCK live_lock; /* Live checkpoint lock */ - WT_BLOCK_CKPT live; /* Live checkpoint */ + const char *name; /* Name */ + uint64_t name_hash; /* Hash of name */ + + /* A list of block manager handles, sharing a file descriptor. */ + uint32_t ref; /* References */ + TAILQ_ENTRY(__wt_block) q; /* Linked list of handles */ + TAILQ_ENTRY(__wt_block) hashq; /* Hashed list of handles */ + + WT_FH *fh; /* Backing file handle */ + wt_off_t size; /* File size */ + wt_off_t extend_size; /* File extended size */ + wt_off_t extend_len; /* File extend chunk size */ + + /* Configuration information, set when the file is opened. */ + uint32_t allocfirst; /* Allocation is first-fit */ + uint32_t allocsize; /* Allocation size */ + size_t os_cache; /* System buffer cache flush max */ + size_t os_cache_max; + size_t os_cache_dirty_max; + + u_int block_header; /* Header length */ + + /* + * There is only a single checkpoint in a file that can be written. The information could + * logically live in the WT_BM structure, but then we would be re-creating it every time we + * opened a new checkpoint and I'd rather not do that. So, it's stored here, only accessed by + * one WT_BM handle. + */ + WT_SPINLOCK live_lock; /* Live checkpoint lock */ + WT_BLOCK_CKPT live; /* Live checkpoint */ #ifdef HAVE_DIAGNOSTIC - bool live_open; /* Live system is open */ + bool live_open; /* Live system is open */ #endif - /* Live checkpoint status */ - enum { WT_CKPT_NONE=0, WT_CKPT_INPROGRESS, - WT_CKPT_PANIC_ON_FAILURE, WT_CKPT_SALVAGE } ckpt_state; - - WT_CKPT *final_ckpt; /* Final live checkpoint write */ - - /* Compaction support */ - int compact_pct_tenths; /* Percent to compact */ - uint64_t compact_pages_reviewed;/* Pages reviewed */ - uint64_t compact_pages_skipped; /* Pages skipped */ - uint64_t compact_pages_written; /* Pages rewritten */ - - /* Salvage support */ - wt_off_t slvg_off; /* Salvage file offset */ - - /* Verification support */ - bool verify; /* If performing verification */ - bool verify_layout; /* Print out file layout information */ - bool verify_strict; /* Fail hard on any error */ - wt_off_t verify_size; /* Checkpoint's file size */ - WT_EXTLIST verify_alloc; /* Verification allocation list */ - uint64_t frags; /* Maximum frags in the file */ - uint8_t *fragfile; /* Per-file frag tracking list */ - uint8_t *fragckpt; /* Per-checkpoint frag tracking list */ + /* Live checkpoint status */ + enum { + WT_CKPT_NONE = 0, + WT_CKPT_INPROGRESS, + WT_CKPT_PANIC_ON_FAILURE, + WT_CKPT_SALVAGE + } ckpt_state; + + WT_CKPT *final_ckpt; /* Final live checkpoint write */ + + /* Compaction support */ + int compact_pct_tenths; /* Percent to compact */ + uint64_t compact_pages_reviewed; /* Pages reviewed */ + uint64_t compact_pages_skipped; /* Pages skipped */ + uint64_t compact_pages_written; /* Pages rewritten */ + + /* Salvage support */ + wt_off_t slvg_off; /* Salvage file offset */ + + /* Verification support */ + bool verify; /* If performing verification */ + bool verify_layout; /* Print out file layout information */ + bool verify_strict; /* Fail hard on any error */ + wt_off_t verify_size; /* Checkpoint's file size */ + WT_EXTLIST verify_alloc; /* Verification allocation list */ + uint64_t frags; /* Maximum frags in the file */ + uint8_t *fragfile; /* Per-file frag tracking list */ + uint8_t *fragckpt; /* Per-checkpoint frag tracking list */ }; /* @@ -298,39 +286,38 @@ struct __wt_block { * The file's description. */ struct __wt_block_desc { -#define WT_BLOCK_MAGIC 120897 - uint32_t magic; /* 00-03: Magic number */ -#define WT_BLOCK_MAJOR_VERSION 1 - uint16_t majorv; /* 04-05: Major version */ -#define WT_BLOCK_MINOR_VERSION 0 - uint16_t minorv; /* 06-07: Minor version */ +#define WT_BLOCK_MAGIC 120897 + uint32_t magic; /* 00-03: Magic number */ +#define WT_BLOCK_MAJOR_VERSION 1 + uint16_t majorv; /* 04-05: Major version */ +#define WT_BLOCK_MINOR_VERSION 0 + uint16_t minorv; /* 06-07: Minor version */ - uint32_t checksum; /* 08-11: Description block checksum */ + uint32_t checksum; /* 08-11: Description block checksum */ - uint32_t unused; /* 12-15: Padding */ + uint32_t unused; /* 12-15: Padding */ }; /* - * WT_BLOCK_DESC_SIZE is the expected structure size -- we verify the build to - * ensure the compiler hasn't inserted padding (padding won't cause failure, - * we reserve the first allocation-size block of the file for this information, - * but it would be worth investigation, regardless). + * WT_BLOCK_DESC_SIZE is the expected structure size -- we verify the build to ensure the compiler + * hasn't inserted padding (padding won't cause failure, we reserve the first allocation-size block + * of the file for this information, but it would be worth investigation, regardless). */ -#define WT_BLOCK_DESC_SIZE 16 +#define WT_BLOCK_DESC_SIZE 16 /* * __wt_block_desc_byteswap -- - * Handle big- and little-endian transformation of a description block. + * Handle big- and little-endian transformation of a description block. */ static inline void __wt_block_desc_byteswap(WT_BLOCK_DESC *desc) { #ifdef WORDS_BIGENDIAN - desc->magic = __wt_bswap32(desc->magic); - desc->majorv = __wt_bswap16(desc->majorv); - desc->minorv = __wt_bswap16(desc->minorv); - desc->checksum = __wt_bswap32(desc->checksum); + desc->magic = __wt_bswap32(desc->magic); + desc->majorv = __wt_bswap16(desc->majorv); + desc->minorv = __wt_bswap16(desc->minorv); + desc->checksum = __wt_bswap32(desc->checksum); #else - WT_UNUSED(desc); + WT_UNUSED(desc); #endif } @@ -340,72 +327,67 @@ __wt_block_desc_byteswap(WT_BLOCK_DESC *desc) * block-manager specific structure: WT_BLOCK_HEADER is WiredTiger's default. */ struct __wt_block_header { - /* - * We write the page size in the on-disk page header because it makes - * salvage easier. (If we don't know the expected page length, we'd - * have to read increasingly larger chunks from the file until we find - * one that checksums, and that's going to be harsh given WiredTiger's - * potentially large page sizes.) - */ - uint32_t disk_size; /* 00-03: on-disk page size */ - - /* - * Page checksums are stored in two places. First, the page checksum - * is written within the internal page that references it as part of - * the address cookie. This is done to improve the chances of detecting - * not only disk corruption but other bugs (for example, overwriting a - * page with another valid page image). Second, a page's checksum is - * stored in the disk header. This is for salvage, so salvage knows it - * has found a page that may be useful. - */ - uint32_t checksum; /* 04-07: checksum */ - - /* - * No automatic generation: flag values cannot change, they're written - * to disk. - */ -#define WT_BLOCK_DATA_CKSUM 0x1u /* Block data is part of the checksum */ - uint8_t flags; /* 08: flags */ - - /* - * End the structure with 3 bytes of padding: it wastes space, but it - * leaves the structure 32-bit aligned and having a few bytes to play - * with in the future can't hurt. - */ - uint8_t unused[3]; /* 09-11: unused padding */ + /* + * We write the page size in the on-disk page header because it makes salvage easier. (If we + * don't know the expected page length, we'd have to read increasingly larger chunks from the + * file until we find one that checksums, and that's going to be harsh given WiredTiger's + * potentially large page sizes.) + */ + uint32_t disk_size; /* 00-03: on-disk page size */ + + /* + * Page checksums are stored in two places. First, the page checksum is written within the + * internal page that references it as part of the address cookie. This is done to improve the + * chances of detecting not only disk corruption but other bugs (for example, overwriting a page + * with another valid page image). Second, a page's checksum is stored in the disk header. This + * is for salvage, so salvage knows it has found a page that may be useful. + */ + uint32_t checksum; /* 04-07: checksum */ + +/* + * No automatic generation: flag values cannot change, they're written to disk. + */ +#define WT_BLOCK_DATA_CKSUM 0x1u /* Block data is part of the checksum */ + uint8_t flags; /* 08: flags */ + + /* + * End the structure with 3 bytes of padding: it wastes space, but it leaves the structure + * 32-bit aligned and having a few bytes to play with in the future can't hurt. + */ + uint8_t unused[3]; /* 09-11: unused padding */ }; /* - * WT_BLOCK_HEADER_SIZE is the number of bytes we allocate for the structure: if - * the compiler inserts padding it will break the world. + * WT_BLOCK_HEADER_SIZE is the number of bytes we allocate for the structure: if the compiler + * inserts padding it will break the world. */ -#define WT_BLOCK_HEADER_SIZE 12 +#define WT_BLOCK_HEADER_SIZE 12 /* * __wt_block_header_byteswap_copy -- - * Handle big- and little-endian transformation of a header block, - * copying from a source to a target. + * Handle big- and little-endian transformation of a header block, copying from a source to a + * target. */ static inline void __wt_block_header_byteswap_copy(WT_BLOCK_HEADER *from, WT_BLOCK_HEADER *to) { - *to = *from; + *to = *from; #ifdef WORDS_BIGENDIAN - to->disk_size = __wt_bswap32(from->disk_size); - to->checksum = __wt_bswap32(from->checksum); + to->disk_size = __wt_bswap32(from->disk_size); + to->checksum = __wt_bswap32(from->checksum); #endif } /* * __wt_block_header_byteswap -- - * Handle big- and little-endian transformation of a header block. + * Handle big- and little-endian transformation of a header block. */ static inline void __wt_block_header_byteswap(WT_BLOCK_HEADER *blk) { #ifdef WORDS_BIGENDIAN - __wt_block_header_byteswap_copy(blk, blk); + __wt_block_header_byteswap_copy(blk, blk); #else - WT_UNUSED(blk); + WT_UNUSED(blk); #endif } @@ -414,33 +396,29 @@ __wt_block_header_byteswap(WT_BLOCK_HEADER *blk) * WT_BLOCK_HEADER_BYTE_SIZE -- * The first usable data byte on the block (past the combined headers). */ -#define WT_BLOCK_HEADER_BYTE_SIZE \ - (WT_PAGE_HEADER_SIZE + WT_BLOCK_HEADER_SIZE) -#define WT_BLOCK_HEADER_BYTE(dsk) \ - ((void *)((uint8_t *)(dsk) + WT_BLOCK_HEADER_BYTE_SIZE)) +#define WT_BLOCK_HEADER_BYTE_SIZE (WT_PAGE_HEADER_SIZE + WT_BLOCK_HEADER_SIZE) +#define WT_BLOCK_HEADER_BYTE(dsk) ((void *)((uint8_t *)(dsk) + WT_BLOCK_HEADER_BYTE_SIZE)) /* - * We don't compress or encrypt the block's WT_PAGE_HEADER or WT_BLOCK_HEADER - * structures because we need both available with decompression or decryption. - * We use the WT_BLOCK_HEADER checksum and on-disk size during salvage to - * figure out where the blocks are, and we use the WT_PAGE_HEADER in-memory - * size during decompression and decryption to know how large a target buffer - * to allocate. We can only skip the header information when doing encryption, - * but we skip the first 64B when doing compression; a 64B boundary may offer - * better alignment for the underlying compression engine, and skipping 64B - * shouldn't make any difference in terms of compression efficiency. + * We don't compress or encrypt the block's WT_PAGE_HEADER or WT_BLOCK_HEADER structures because we + * need both available with decompression or decryption. We use the WT_BLOCK_HEADER checksum and + * on-disk size during salvage to figure out where the blocks are, and we use the WT_PAGE_HEADER + * in-memory size during decompression and decryption to know how large a target buffer to allocate. + * We can only skip the header information when doing encryption, but we skip the first 64B when + * doing compression; a 64B boundary may offer better alignment for the underlying compression + * engine, and skipping 64B shouldn't make any difference in terms of compression efficiency. */ -#define WT_BLOCK_COMPRESS_SKIP 64 -#define WT_BLOCK_ENCRYPT_SKIP WT_BLOCK_HEADER_BYTE_SIZE +#define WT_BLOCK_COMPRESS_SKIP 64 +#define WT_BLOCK_ENCRYPT_SKIP WT_BLOCK_HEADER_BYTE_SIZE /* * __wt_block_header -- - * Return the size of the block-specific header. + * Return the size of the block-specific header. */ static inline u_int __wt_block_header(WT_BLOCK *block) { - WT_UNUSED(block); + WT_UNUSED(block); - return ((u_int)WT_BLOCK_HEADER_SIZE); + return ((u_int)WT_BLOCK_HEADER_SIZE); } diff --git a/src/third_party/wiredtiger/src/include/block.i b/src/third_party/wiredtiger/src/include/block.i index 3b9183a19fa..d504a981b97 100644 --- a/src/third_party/wiredtiger/src/include/block.i +++ b/src/third_party/wiredtiger/src/include/block.i @@ -12,28 +12,28 @@ /* * __wt_extlist_write_pair -- - * Write an extent list pair. + * Write an extent list pair. */ static inline int __wt_extlist_write_pair(uint8_t **p, wt_off_t off, wt_off_t size) { - WT_RET(__wt_vpack_uint(p, 0, (uint64_t)(off))); - WT_RET(__wt_vpack_uint(p, 0, (uint64_t)(size))); - return (0); + WT_RET(__wt_vpack_uint(p, 0, (uint64_t)(off))); + WT_RET(__wt_vpack_uint(p, 0, (uint64_t)(size))); + return (0); } /* * __wt_extlist_read_pair -- - * Read an extent list pair. + * Read an extent list pair. */ static inline int __wt_extlist_read_pair(const uint8_t **p, wt_off_t *offp, wt_off_t *sizep) { - uint64_t v; + uint64_t v; - WT_RET(__wt_vunpack_uint(p, 0, &v)); - *offp = (wt_off_t)v; - WT_RET(__wt_vunpack_uint(p, 0, &v)); - *sizep = (wt_off_t)v; - return (0); + WT_RET(__wt_vunpack_uint(p, 0, &v)); + *offp = (wt_off_t)v; + WT_RET(__wt_vunpack_uint(p, 0, &v)); + *sizep = (wt_off_t)v; + return (0); } diff --git a/src/third_party/wiredtiger/src/include/bloom.h b/src/third_party/wiredtiger/src/include/bloom.h index f9f071cd2f7..aa47e4b1289 100644 --- a/src/third_party/wiredtiger/src/include/bloom.h +++ b/src/third_party/wiredtiger/src/include/bloom.h @@ -7,18 +7,18 @@ */ struct __wt_bloom { - const char *uri; - char *config; - uint8_t *bitstring; /* For in memory representation. */ - WT_SESSION_IMPL *session; - WT_CURSOR *c; + const char *uri; + char *config; + uint8_t *bitstring; /* For in memory representation. */ + WT_SESSION_IMPL *session; + WT_CURSOR *c; - uint32_t k; /* The number of hash functions used. */ - uint32_t factor; /* The number of bits per item inserted. */ - uint64_t m; /* The number of slots in the bit string. */ - uint64_t n; /* The number of items to be inserted. */ + uint32_t k; /* The number of hash functions used. */ + uint32_t factor; /* The number of bits per item inserted. */ + uint64_t m; /* The number of slots in the bit string. */ + uint64_t n; /* The number of items to be inserted. */ }; struct __wt_bloom_hash { - uint64_t h1, h2; /* The two hashes used to calculate bits. */ + uint64_t h1, h2; /* The two hashes used to calculate bits. */ }; diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 03643f473e1..729264c6c65 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -6,34 +6,34 @@ * See the file LICENSE for redistribution information. */ -#define WT_RECNO_OOB 0 /* Illegal record number */ +#define WT_RECNO_OOB 0 /* Illegal record number */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_READ_CACHE 0x0001u -#define WT_READ_DELETED_CHECK 0x0002u -#define WT_READ_DELETED_SKIP 0x0004u -#define WT_READ_IGNORE_CACHE_SIZE 0x0008u -#define WT_READ_LOOKASIDE 0x0010u -#define WT_READ_NOTFOUND_OK 0x0020u -#define WT_READ_NO_GEN 0x0040u -#define WT_READ_NO_SPLIT 0x0080u -#define WT_READ_NO_WAIT 0x0100u -#define WT_READ_PREV 0x0200u -#define WT_READ_RESTART_OK 0x0400u -#define WT_READ_SKIP_INTL 0x0800u -#define WT_READ_TRUNCATE 0x1000u -#define WT_READ_WONT_NEED 0x2000u +#define WT_READ_CACHE 0x0001u +#define WT_READ_DELETED_CHECK 0x0002u +#define WT_READ_DELETED_SKIP 0x0004u +#define WT_READ_IGNORE_CACHE_SIZE 0x0008u +#define WT_READ_LOOKASIDE 0x0010u +#define WT_READ_NOTFOUND_OK 0x0020u +#define WT_READ_NO_GEN 0x0040u +#define WT_READ_NO_SPLIT 0x0080u +#define WT_READ_NO_WAIT 0x0100u +#define WT_READ_PREV 0x0200u +#define WT_READ_RESTART_OK 0x0400u +#define WT_READ_SKIP_INTL 0x0800u +#define WT_READ_TRUNCATE 0x1000u +#define WT_READ_WONT_NEED 0x2000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_REC_CHECKPOINT 0x01u -#define WT_REC_EVICT 0x02u -#define WT_REC_IN_MEMORY 0x04u -#define WT_REC_LOOKASIDE 0x08u -#define WT_REC_SCRUB 0x10u -#define WT_REC_UPDATE_RESTORE 0x20u -#define WT_REC_VISIBILITY_ERR 0x40u -#define WT_REC_VISIBLE_ALL 0x80u +#define WT_REC_CHECKPOINT 0x01u +#define WT_REC_EVICT 0x02u +#define WT_REC_IN_MEMORY 0x04u +#define WT_REC_LOOKASIDE 0x08u +#define WT_REC_SCRUB 0x10u +#define WT_REC_UPDATE_RESTORE 0x20u +#define WT_REC_VISIBILITY_ERR 0x40u +#define WT_REC_VISIBLE_ALL 0x80u /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* @@ -42,164 +42,153 @@ * block-manager specific structure. */ struct __wt_page_header { - /* - * The record number of the first record of the page is stored on disk - * so we can figure out where the column-store leaf page fits into the - * key space during salvage. - */ - uint64_t recno; /* 00-07: column-store starting recno */ - - /* - * We maintain page write-generations in the non-transactional case - * as that's how salvage can determine the most recent page between - * pages overlapping the same key range. - */ - uint64_t write_gen; /* 08-15: write generation */ - - /* - * The page's in-memory size isn't rounded or aligned, it's the actual - * number of bytes the disk-image consumes when instantiated in memory. - */ - uint32_t mem_size; /* 16-19: in-memory page size */ - - union { - uint32_t entries; /* 20-23: number of cells on page */ - uint32_t datalen; /* 20-23: overflow data length */ - } u; - - uint8_t type; /* 24: page type */ - - /* - * No automatic generation: flag values cannot change, they're written - * to disk. - */ -#define WT_PAGE_COMPRESSED 0x01u /* Page is compressed on disk */ -#define WT_PAGE_EMPTY_V_ALL 0x02u /* Page has all zero-length values */ -#define WT_PAGE_EMPTY_V_NONE 0x04u /* Page has no zero-length values */ -#define WT_PAGE_ENCRYPTED 0x08u /* Page is encrypted on disk */ -#define WT_PAGE_LAS_UPDATE 0x10u /* Page updates in lookaside store */ - uint8_t flags; /* 25: flags */ - - /* A byte of padding, positioned to be added to the flags. */ - uint8_t unused; /* 26: unused padding */ - -#define WT_PAGE_VERSION_ORIG 0 /* Original version */ -#define WT_PAGE_VERSION_TS 1 /* Timestamps added */ - uint8_t version; /* 27: version */ + /* + * The record number of the first record of the page is stored on disk so we can figure out + * where the column-store leaf page fits into the key space during salvage. + */ + uint64_t recno; /* 00-07: column-store starting recno */ + + /* + * We maintain page write-generations in the non-transactional case as that's how salvage can + * determine the most recent page between pages overlapping the same key range. + */ + uint64_t write_gen; /* 08-15: write generation */ + + /* + * The page's in-memory size isn't rounded or aligned, it's the actual number of bytes the + * disk-image consumes when instantiated in memory. + */ + uint32_t mem_size; /* 16-19: in-memory page size */ + + union { + uint32_t entries; /* 20-23: number of cells on page */ + uint32_t datalen; /* 20-23: overflow data length */ + } u; + + uint8_t type; /* 24: page type */ + +/* + * No automatic generation: flag values cannot change, they're written to disk. + */ +#define WT_PAGE_COMPRESSED 0x01u /* Page is compressed on disk */ +#define WT_PAGE_EMPTY_V_ALL 0x02u /* Page has all zero-length values */ +#define WT_PAGE_EMPTY_V_NONE 0x04u /* Page has no zero-length values */ +#define WT_PAGE_ENCRYPTED 0x08u /* Page is encrypted on disk */ +#define WT_PAGE_LAS_UPDATE 0x10u /* Page updates in lookaside store */ + uint8_t flags; /* 25: flags */ + + /* A byte of padding, positioned to be added to the flags. */ + uint8_t unused; /* 26: unused padding */ + +#define WT_PAGE_VERSION_ORIG 0 /* Original version */ +#define WT_PAGE_VERSION_TS 1 /* Timestamps added */ + uint8_t version; /* 27: version */ }; /* - * WT_PAGE_HEADER_SIZE is the number of bytes we allocate for the structure: if - * the compiler inserts padding it will break the world. + * WT_PAGE_HEADER_SIZE is the number of bytes we allocate for the structure: if the compiler inserts + * padding it will break the world. */ -#define WT_PAGE_HEADER_SIZE 28 +#define WT_PAGE_HEADER_SIZE 28 /* * __wt_page_header_byteswap -- - * Handle big- and little-endian transformation of a page header. + * Handle big- and little-endian transformation of a page header. */ static inline void __wt_page_header_byteswap(WT_PAGE_HEADER *dsk) { #ifdef WORDS_BIGENDIAN - dsk->recno = __wt_bswap64(dsk->recno); - dsk->write_gen = __wt_bswap64(dsk->write_gen); - dsk->mem_size = __wt_bswap32(dsk->mem_size); - dsk->u.entries = __wt_bswap32(dsk->u.entries); + dsk->recno = __wt_bswap64(dsk->recno); + dsk->write_gen = __wt_bswap64(dsk->write_gen); + dsk->mem_size = __wt_bswap32(dsk->mem_size); + dsk->u.entries = __wt_bswap32(dsk->u.entries); #else - WT_UNUSED(dsk); + WT_UNUSED(dsk); #endif } /* - * The block-manager specific information immediately follows the WT_PAGE_HEADER - * structure. + * The block-manager specific information immediately follows the WT_PAGE_HEADER structure. */ -#define WT_BLOCK_HEADER_REF(dsk) \ - ((void *)((uint8_t *)(dsk) + WT_PAGE_HEADER_SIZE)) +#define WT_BLOCK_HEADER_REF(dsk) ((void *)((uint8_t *)(dsk) + WT_PAGE_HEADER_SIZE)) /* * WT_PAGE_HEADER_BYTE -- * WT_PAGE_HEADER_BYTE_SIZE -- * The first usable data byte on the block (past the combined headers). */ -#define WT_PAGE_HEADER_BYTE_SIZE(btree) \ - ((u_int)(WT_PAGE_HEADER_SIZE + (btree)->block_header)) -#define WT_PAGE_HEADER_BYTE(btree, dsk) \ - ((void *)((uint8_t *)(dsk) + WT_PAGE_HEADER_BYTE_SIZE(btree))) +#define WT_PAGE_HEADER_BYTE_SIZE(btree) ((u_int)(WT_PAGE_HEADER_SIZE + (btree)->block_header)) +#define WT_PAGE_HEADER_BYTE(btree, dsk) \ + ((void *)((uint8_t *)(dsk) + WT_PAGE_HEADER_BYTE_SIZE(btree))) /* * WT_ADDR -- * An in-memory structure to hold a block's location. */ struct __wt_addr { - /* Validity window */ - wt_timestamp_t newest_durable_ts; - wt_timestamp_t oldest_start_ts; - uint64_t oldest_start_txn; - wt_timestamp_t newest_stop_ts; - uint64_t newest_stop_txn; - - uint8_t *addr; /* Block-manager's cookie */ - uint8_t size; /* Block-manager's cookie length */ - -#define WT_ADDR_INT 1 /* Internal page */ -#define WT_ADDR_LEAF 2 /* Leaf page */ -#define WT_ADDR_LEAF_NO 3 /* Leaf page, no overflow */ - uint8_t type; - - /* - * If an address is both as an address for the previous and the current - * multi-block reconciliations, that is, a block we're writing matches - * the block written the last time, it will appear in both the current - * boundary points as well as the page modification's list of previous - * blocks. The reuse flag is how we know that's happening so the block - * is treated correctly (not free'd on error, for example). - */ - uint8_t reuse; + /* Validity window */ + wt_timestamp_t newest_durable_ts; + wt_timestamp_t oldest_start_ts; + uint64_t oldest_start_txn; + wt_timestamp_t newest_stop_ts; + uint64_t newest_stop_txn; + + uint8_t *addr; /* Block-manager's cookie */ + uint8_t size; /* Block-manager's cookie length */ + +#define WT_ADDR_INT 1 /* Internal page */ +#define WT_ADDR_LEAF 2 /* Leaf page */ +#define WT_ADDR_LEAF_NO 3 /* Leaf page, no overflow */ + uint8_t type; + + /* + * If an address is both as an address for the previous and the current multi-block + * reconciliations, that is, a block we're writing matches the block written the last time, it + * will appear in both the current boundary points as well as the page modification's list of + * previous blocks. The reuse flag is how we know that's happening so the block is treated + * correctly (not free'd on error, for example). + */ + uint8_t reuse; }; /* - * Overflow tracking for reuse: When a page is reconciled, we write new K/V - * overflow items. If pages are reconciled multiple times, we need to know - * if we've already written a particular overflow record (so we don't write - * it again), as well as if we've modified an overflow record previously - * written (in which case we want to write a new record and discard blocks - * used by the previously written record). Track overflow records written - * for the page, storing the values in a skiplist with the record's value as - * the "key". + * Overflow tracking for reuse: When a page is reconciled, we write new K/V overflow items. If pages + * are reconciled multiple times, we need to know if we've already written a particular overflow + * record (so we don't write it again), as well as if we've modified an overflow record previously + * written (in which case we want to write a new record and discard blocks used by the previously + * written record). Track overflow records written for the page, storing the values in a skiplist + * with the record's value as the "key". */ struct __wt_ovfl_reuse { - uint32_t value_offset; /* Overflow value offset */ - uint32_t value_size; /* Overflow value size */ - uint8_t addr_offset; /* Overflow addr offset */ - uint8_t addr_size; /* Overflow addr size */ - - /* - * On each page reconciliation, we clear the entry's in-use flag, and - * reset it as the overflow record is re-used. After reconciliation - * completes, unused skiplist entries are discarded, along with their - * underlying blocks. - * - * On each page reconciliation, set the just-added flag for each new - * skiplist entry; if reconciliation fails for any reason, discard the - * newly added skiplist entries, along with their underlying blocks. - */ + uint32_t value_offset; /* Overflow value offset */ + uint32_t value_size; /* Overflow value size */ + uint8_t addr_offset; /* Overflow addr offset */ + uint8_t addr_size; /* Overflow addr size */ + +/* + * On each page reconciliation, we clear the entry's in-use flag, and + * reset it as the overflow record is re-used. After reconciliation + * completes, unused skiplist entries are discarded, along with their + * underlying blocks. + * + * On each page reconciliation, set the just-added flag for each new + * skiplist entry; if reconciliation fails for any reason, discard the + * newly added skiplist entries, along with their underlying blocks. + */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_OVFL_REUSE_INUSE 0x1u -#define WT_OVFL_REUSE_JUST_ADDED 0x2u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint8_t flags; - - /* - * The untyped address immediately follows the WT_OVFL_REUSE structure, - * the untyped value immediately follows the address. - */ -#define WT_OVFL_REUSE_ADDR(p) \ - ((void *)((uint8_t *)(p) + (p)->addr_offset)) -#define WT_OVFL_REUSE_VALUE(p) \ - ((void *)((uint8_t *)(p) + (p)->value_offset)) - - WT_OVFL_REUSE *next[0]; /* Forward-linked skip list */ +#define WT_OVFL_REUSE_INUSE 0x1u +#define WT_OVFL_REUSE_JUST_ADDED 0x2u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint8_t flags; + +/* + * The untyped address immediately follows the WT_OVFL_REUSE structure, the untyped value + * immediately follows the address. + */ +#define WT_OVFL_REUSE_ADDR(p) ((void *)((uint8_t *)(p) + (p)->addr_offset)) +#define WT_OVFL_REUSE_VALUE(p) ((void *)((uint8_t *)(p) + (p)->value_offset)) + + WT_OVFL_REUSE *next[0]; /* Forward-linked skip list */ }; /* @@ -229,34 +218,33 @@ struct __wt_ovfl_reuse { * the row-store key is relatively large. */ #ifdef HAVE_BUILTIN_EXTENSION_SNAPPY -#define WT_LOOKASIDE_COMPRESSOR "snappy" +#define WT_LOOKASIDE_COMPRESSOR "snappy" #else -#define WT_LOOKASIDE_COMPRESSOR "none" +#define WT_LOOKASIDE_COMPRESSOR "none" #endif -#define WT_LAS_CONFIG \ - "key_format=" WT_UNCHECKED_STRING(QIQu) \ - ",value_format=" WT_UNCHECKED_STRING(QQQBBu) \ - ",block_compressor=" WT_LOOKASIDE_COMPRESSOR \ - ",leaf_value_max=64MB" \ - ",prefix_compression=true" +#define WT_LAS_CONFIG \ + "key_format=" WT_UNCHECKED_STRING(QIQu) ",value_format=" WT_UNCHECKED_STRING( \ + QQQBBu) ",block_compressor=" WT_LOOKASIDE_COMPRESSOR \ + ",leaf_value_max=64MB" \ + ",prefix_compression=true" /* * WT_PAGE_LOOKASIDE -- * Related information for on-disk pages with lookaside entries. */ struct __wt_page_lookaside { - uint64_t las_pageid; /* Page ID in lookaside */ - uint64_t max_txn; /* Maximum transaction ID */ - uint64_t unstable_txn; /* First transaction ID not on page */ - wt_timestamp_t max_timestamp; /* Maximum timestamp */ - wt_timestamp_t unstable_timestamp;/* First timestamp not on page */ - wt_timestamp_t unstable_durable_timestamp; - /* First durable timestamp not on - * page */ - bool eviction_to_lookaside; /* Revert to lookaside on eviction */ - bool has_prepares; /* One or more updates are prepared */ - bool resolved; /* History has been read into cache */ - bool skew_newest; /* Page image has newest versions */ + uint64_t las_pageid; /* Page ID in lookaside */ + uint64_t max_txn; /* Maximum transaction ID */ + uint64_t unstable_txn; /* First transaction ID not on page */ + wt_timestamp_t max_timestamp; /* Maximum timestamp */ + wt_timestamp_t unstable_timestamp; /* First timestamp not on page */ + wt_timestamp_t unstable_durable_timestamp; + /* First durable timestamp not on + * page */ + bool eviction_to_lookaside; /* Revert to lookaside on eviction */ + bool has_prepares; /* One or more updates are prepared */ + bool resolved; /* History has been read into cache */ + bool skew_newest; /* Page image has newest versions */ }; /* @@ -264,484 +252,476 @@ struct __wt_page_lookaside { * When a page is modified, there's additional information to maintain. */ struct __wt_page_modify { - /* The first unwritten transaction ID (approximate). */ - uint64_t first_dirty_txn; + /* The first unwritten transaction ID (approximate). */ + uint64_t first_dirty_txn; - /* The transaction state last time eviction was attempted. */ - uint64_t last_evict_pass_gen; - uint64_t last_eviction_id; - wt_timestamp_t last_eviction_timestamp; + /* The transaction state last time eviction was attempted. */ + uint64_t last_evict_pass_gen; + uint64_t last_eviction_id; + wt_timestamp_t last_eviction_timestamp; #ifdef HAVE_DIAGNOSTIC - /* Check that transaction time moves forward. */ - uint64_t last_oldest_id; + /* Check that transaction time moves forward. */ + uint64_t last_oldest_id; #endif - /* Avoid checking for obsolete updates during checkpoints. */ - uint64_t obsolete_check_txn; - wt_timestamp_t obsolete_check_timestamp; - - /* The largest transaction seen on the page by reconciliation. */ - uint64_t rec_max_txn; - wt_timestamp_t rec_max_timestamp; - - /* Stable timestamp at last reconciliation. */ - wt_timestamp_t last_stable_timestamp; - - /* The largest update transaction ID (approximate). */ - uint64_t update_txn; - - /* Dirty bytes added to the cache. */ - size_t bytes_dirty; - - /* - * When pages are reconciled, the result is one or more replacement - * blocks. A replacement block can be in one of two states: it was - * written to disk, and so we have a block address, or it contained - * unresolved modifications and we have a disk image for it with a - * list of those unresolved modifications. The former is the common - * case: we only build lists of unresolved modifications when we're - * evicting a page, and we only expect to see unresolved modifications - * on a page being evicted in the case of a hot page that's too large - * to keep in memory as it is. In other words, checkpoints will skip - * unresolved modifications, and will write the blocks rather than - * build lists of unresolved modifications. - * - * Ugly union/struct layout to conserve memory, we never have both - * a replace address and multiple replacement blocks. - */ - union { - struct { /* Single, written replacement block */ - WT_ADDR replace; - - /* - * A disk image that may or may not have been written, used to - * re-instantiate the page in memory. - */ - void *disk_image; - - /* The page has lookaside entries. */ - WT_PAGE_LOOKASIDE page_las; - } r; -#undef mod_replace -#define mod_replace u1.r.replace -#undef mod_disk_image -#define mod_disk_image u1.r.disk_image -#undef mod_page_las -#define mod_page_las u1.r.page_las - - struct { /* Multiple replacement blocks */ - struct __wt_multi { - /* - * Block's key: either a column-store record number or a - * row-store variable length byte string. - */ - union { - uint64_t recno; - WT_IKEY *ikey; - } key; - - /* - * A disk image that may or may not have been written, used to - * re-instantiate the page in memory. - */ - void *disk_image; - - /* - * List of unresolved updates. Updates are either a row-store - * insert or update list, or column-store insert list. When - * creating lookaside records, there is an additional value, - * the committed item's transaction information. - * - * If there are unresolved updates, the block wasn't written and - * there will always be a disk image. - */ - struct __wt_save_upd { - WT_INSERT *ins; /* Insert list reference */ - WT_ROW *ripcip; /* Original on-page reference */ - WT_UPDATE *onpage_upd; - } *supd; - uint32_t supd_entries; - - /* - * Disk image was written: address, size and checksum. - * On subsequent reconciliations of this page, we avoid writing - * the block if it's unchanged by comparing size and checksum; - * the reuse flag is set when the block is unchanged and we're - * reusing a previous address. - */ - WT_ADDR addr; - uint32_t size; - uint32_t checksum; - - WT_PAGE_LOOKASIDE page_las; - } *multi; - uint32_t multi_entries; /* Multiple blocks element count */ - } m; -#undef mod_multi -#define mod_multi u1.m.multi -#undef mod_multi_entries -#define mod_multi_entries u1.m.multi_entries - } u1; - - /* - * Internal pages need to be able to chain root-page splits and have a - * special transactional eviction requirement. Column-store leaf pages - * need update and append lists. - * - * Ugly union/struct layout to conserve memory, a page is either a leaf - * page or an internal page. - */ - union { - struct { - /* - * When a root page splits, we create a new page and write it; - * the new page can also split and so on, and we continue this - * process until we write a single replacement root page. We - * use the root split field to track the list of created pages - * so they can be discarded when no longer needed. - */ - WT_PAGE *root_split; /* Linked list of root split pages */ - } intl; -#undef mod_root_split -#define mod_root_split u2.intl.root_split - struct { - /* - * Appended items to column-stores: there is only a single one - * of these active at a time per column-store tree. - */ - WT_INSERT_HEAD **append; - - /* - * Updated items in column-stores: variable-length RLE entries - * can expand to multiple entries which requires some kind of - * list we can expand on demand. Updated items in fixed-length - * files could be done based on an WT_UPDATE array as in - * row-stores, but there can be a very large number of bits on - * a single page, and the cost of the WT_UPDATE array would be - * huge. - */ - WT_INSERT_HEAD **update; - - /* - * Split-saved last column-store page record. If a column-store - * page is split, we save the first record number moved so that - * during reconciliation we know the page's last record and can - * write any implicitly created deleted records for the page. - */ - uint64_t split_recno; - } column_leaf; -#undef mod_col_append -#define mod_col_append u2.column_leaf.append -#undef mod_col_update -#define mod_col_update u2.column_leaf.update -#undef mod_col_split_recno -#define mod_col_split_recno u2.column_leaf.split_recno - struct { - /* Inserted items for row-store. */ - WT_INSERT_HEAD **insert; - - /* Updated items for row-stores. */ - WT_UPDATE **update; - } row_leaf; -#undef mod_row_insert -#define mod_row_insert u2.row_leaf.insert -#undef mod_row_update -#define mod_row_update u2.row_leaf.update - } u2; - - /* - * Overflow record tracking for reconciliation. We assume overflow - * records are relatively rare, so we don't allocate the structures - * to track them until we actually see them in the data. - */ - struct __wt_ovfl_track { - /* - * Overflow key/value address/byte-string pairs we potentially - * reuse each time we reconcile the page. - */ - WT_OVFL_REUSE *ovfl_reuse[WT_SKIP_MAXDEPTH]; - - /* - * Overflow key/value addresses to be discarded from the block - * manager after reconciliation completes successfully. - */ - WT_CELL **discard; - size_t discard_entries; - size_t discard_allocated; - - /* Cached overflow value cell/update address pairs. */ - struct { - WT_CELL *cell; - uint8_t *data; - size_t size; - } *remove; - size_t remove_allocated; - uint32_t remove_next; - } *ovfl_track; - -#define WT_PAGE_LOCK(s, p) \ - __wt_spin_lock((s), &(p)->modify->page_lock) -#define WT_PAGE_TRYLOCK(s, p) \ - __wt_spin_trylock((s), &(p)->modify->page_lock) -#define WT_PAGE_UNLOCK(s, p) \ - __wt_spin_unlock((s), &(p)->modify->page_lock) - WT_SPINLOCK page_lock; /* Page's spinlock */ - - /* - * The page state is incremented when a page is modified. - * - * WT_PAGE_CLEAN -- - * The page is clean. - * WT_PAGE_DIRTY_FIRST -- - * The page is in this state after the first operation that marks a - * page dirty, or when reconciliation is checking to see if it has - * done enough work to be able to mark the page clean. - * WT_PAGE_DIRTY -- - * Two or more updates have been added to the page. - */ -#define WT_PAGE_CLEAN 0 -#define WT_PAGE_DIRTY_FIRST 1 -#define WT_PAGE_DIRTY 2 - uint32_t page_state; - -#define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */ -#define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */ -#define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */ - uint8_t rec_result; /* Reconciliation state */ - -#define WT_PAGE_RS_LOOKASIDE 0x1 -#define WT_PAGE_RS_RESTORED 0x2 - uint8_t restore_state; /* Created by restoring updates */ + /* Avoid checking for obsolete updates during checkpoints. */ + uint64_t obsolete_check_txn; + wt_timestamp_t obsolete_check_timestamp; + + /* The largest transaction seen on the page by reconciliation. */ + uint64_t rec_max_txn; + wt_timestamp_t rec_max_timestamp; + + /* Stable timestamp at last reconciliation. */ + wt_timestamp_t last_stable_timestamp; + + /* The largest update transaction ID (approximate). */ + uint64_t update_txn; + + /* Dirty bytes added to the cache. */ + size_t bytes_dirty; + + /* + * When pages are reconciled, the result is one or more replacement + * blocks. A replacement block can be in one of two states: it was + * written to disk, and so we have a block address, or it contained + * unresolved modifications and we have a disk image for it with a + * list of those unresolved modifications. The former is the common + * case: we only build lists of unresolved modifications when we're + * evicting a page, and we only expect to see unresolved modifications + * on a page being evicted in the case of a hot page that's too large + * to keep in memory as it is. In other words, checkpoints will skip + * unresolved modifications, and will write the blocks rather than + * build lists of unresolved modifications. + * + * Ugly union/struct layout to conserve memory, we never have both + * a replace address and multiple replacement blocks. + */ + union { + struct { /* Single, written replacement block */ + WT_ADDR replace; + + /* + * A disk image that may or may not have been written, used to re-instantiate the page + * in memory. + */ + void *disk_image; + + /* The page has lookaside entries. */ + WT_PAGE_LOOKASIDE page_las; + } r; +#undef mod_replace +#define mod_replace u1.r.replace +#undef mod_disk_image +#define mod_disk_image u1.r.disk_image +#undef mod_page_las +#define mod_page_las u1.r.page_las + + struct { /* Multiple replacement blocks */ + struct __wt_multi { + /* + * Block's key: either a column-store record number or a row-store variable length + * byte string. + */ + union { + uint64_t recno; + WT_IKEY *ikey; + } key; + + /* + * A disk image that may or may not have been written, used to re-instantiate the + * page in memory. + */ + void *disk_image; + + /* + * List of unresolved updates. Updates are either a row-store + * insert or update list, or column-store insert list. When + * creating lookaside records, there is an additional value, + * the committed item's transaction information. + * + * If there are unresolved updates, the block wasn't written and + * there will always be a disk image. + */ + struct __wt_save_upd { + WT_INSERT *ins; /* Insert list reference */ + WT_ROW *ripcip; /* Original on-page reference */ + WT_UPDATE *onpage_upd; + } * supd; + uint32_t supd_entries; + + /* + * Disk image was written: address, size and checksum. On subsequent reconciliations + * of this page, we avoid writing the block if it's unchanged by comparing size and + * checksum; the reuse flag is set when the block is unchanged and we're reusing a + * previous address. + */ + WT_ADDR addr; + uint32_t size; + uint32_t checksum; + + WT_PAGE_LOOKASIDE page_las; + } * multi; + uint32_t multi_entries; /* Multiple blocks element count */ + } m; +#undef mod_multi +#define mod_multi u1.m.multi +#undef mod_multi_entries +#define mod_multi_entries u1.m.multi_entries + } u1; + + /* + * Internal pages need to be able to chain root-page splits and have a + * special transactional eviction requirement. Column-store leaf pages + * need update and append lists. + * + * Ugly union/struct layout to conserve memory, a page is either a leaf + * page or an internal page. + */ + union { + struct { + /* + * When a root page splits, we create a new page and write it; the new page can also + * split and so on, and we continue this process until we write a single replacement + * root page. We use the root split field to track the list of created pages so they can + * be discarded when no longer needed. + */ + WT_PAGE *root_split; /* Linked list of root split pages */ + } intl; +#undef mod_root_split +#define mod_root_split u2.intl.root_split + struct { + /* + * Appended items to column-stores: there is only a single one of these active at a time + * per column-store tree. + */ + WT_INSERT_HEAD **append; + + /* + * Updated items in column-stores: variable-length RLE entries can expand to multiple + * entries which requires some kind of list we can expand on demand. Updated items in + * fixed-length files could be done based on an WT_UPDATE array as in row-stores, but + * there can be a very large number of bits on a single page, and the cost of the + * WT_UPDATE array would be huge. + */ + WT_INSERT_HEAD **update; + + /* + * Split-saved last column-store page record. If a column-store page is split, we save + * the first record number moved so that during reconciliation we know the page's last + * record and can write any implicitly created deleted records for the page. + */ + uint64_t split_recno; + } column_leaf; +#undef mod_col_append +#define mod_col_append u2.column_leaf.append +#undef mod_col_update +#define mod_col_update u2.column_leaf.update +#undef mod_col_split_recno +#define mod_col_split_recno u2.column_leaf.split_recno + struct { + /* Inserted items for row-store. */ + WT_INSERT_HEAD **insert; + + /* Updated items for row-stores. */ + WT_UPDATE **update; + } row_leaf; +#undef mod_row_insert +#define mod_row_insert u2.row_leaf.insert +#undef mod_row_update +#define mod_row_update u2.row_leaf.update + } u2; + + /* + * Overflow record tracking for reconciliation. We assume overflow records are relatively rare, + * so we don't allocate the structures to track them until we actually see them in the data. + */ + struct __wt_ovfl_track { + /* + * Overflow key/value address/byte-string pairs we potentially reuse each time we reconcile + * the page. + */ + WT_OVFL_REUSE *ovfl_reuse[WT_SKIP_MAXDEPTH]; + + /* + * Overflow key/value addresses to be discarded from the block manager after reconciliation + * completes successfully. + */ + WT_CELL **discard; + size_t discard_entries; + size_t discard_allocated; + + /* Cached overflow value cell/update address pairs. */ + struct { + WT_CELL *cell; + uint8_t *data; + size_t size; + } * remove; + size_t remove_allocated; + uint32_t remove_next; + } * ovfl_track; + +#define WT_PAGE_LOCK(s, p) __wt_spin_lock((s), &(p)->modify->page_lock) +#define WT_PAGE_TRYLOCK(s, p) __wt_spin_trylock((s), &(p)->modify->page_lock) +#define WT_PAGE_UNLOCK(s, p) __wt_spin_unlock((s), &(p)->modify->page_lock) + WT_SPINLOCK page_lock; /* Page's spinlock */ + +/* + * The page state is incremented when a page is modified. + * + * WT_PAGE_CLEAN -- + * The page is clean. + * WT_PAGE_DIRTY_FIRST -- + * The page is in this state after the first operation that marks a + * page dirty, or when reconciliation is checking to see if it has + * done enough work to be able to mark the page clean. + * WT_PAGE_DIRTY -- + * Two or more updates have been added to the page. + */ +#define WT_PAGE_CLEAN 0 +#define WT_PAGE_DIRTY_FIRST 1 +#define WT_PAGE_DIRTY 2 + uint32_t page_state; + +#define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */ +#define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */ +#define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */ + uint8_t rec_result; /* Reconciliation state */ + +#define WT_PAGE_RS_LOOKASIDE 0x1 +#define WT_PAGE_RS_RESTORED 0x2 + uint8_t restore_state; /* Created by restoring updates */ }; /* * WT_COL_RLE -- - * Variable-length column-store pages have an array of page entries with RLE - * counts greater than 1 when reading the page, so it's not necessary to walk - * the page counting records to find a specific entry. We can do a binary search - * in this array, then an offset calculation to find the cell. + * Variable-length column-store pages have an array of page entries with + * RLE counts greater than 1 when reading the page, so it's not necessary + * to walk the page counting records to find a specific entry. We can do a + * binary search in this array, then an offset calculation to find the + * cell. */ WT_PACKED_STRUCT_BEGIN(__wt_col_rle) - uint64_t recno; /* Record number of first repeat. */ - uint64_t rle; /* Repeat count. */ - uint32_t indx; /* Slot of entry in col_var. */ + uint64_t recno; /* Record number of first repeat. */ + uint64_t rle; /* Repeat count. */ + uint32_t indx; /* Slot of entry in col_var. */ WT_PACKED_STRUCT_END /* * WT_PAGE -- - * The WT_PAGE structure describes the in-memory page information. + * The WT_PAGE structure describes the in-memory page information. */ struct __wt_page { - /* Per page-type information. */ - union { - /* - * Internal pages (both column- and row-store). - * - * In-memory internal pages have an array of pointers to child - * structures, maintained in collated order. - * - * Multiple threads of control may be searching the in-memory - * internal page and a child page of the internal page may - * cause a split at any time. When a page splits, a new array - * is allocated and atomically swapped into place. Threads in - * the old array continue without interruption (the old array is - * still valid), but have to avoid racing. No barrier is needed - * because the array reference is updated atomically, but code - * reading the fields multiple times would be a very bad idea. - * Specifically, do not do this: - * WT_REF **refp = page->u.intl__index->index; - * uint32_t entries = page->u.intl__index->entries; - * - * The field is declared volatile (so the compiler knows not to - * read it multiple times), and we obscure the field name and - * use a copy macro in all references to the field (so the code - * doesn't read it multiple times). - */ - struct { - WT_REF *parent_ref; /* Parent reference */ - uint64_t split_gen; /* Generation of last split */ - - struct __wt_page_index { - uint32_t entries; - uint32_t deleted_entries; - WT_REF **index; - } * volatile __index; /* Collated children */ - } intl; -#undef pg_intl_parent_ref -#define pg_intl_parent_ref u.intl.parent_ref -#undef pg_intl_split_gen -#define pg_intl_split_gen u.intl.split_gen - - /* - * Macros to copy/set the index because the name is obscured to ensure - * the field isn't read multiple times. - * - * There are two versions of WT_INTL_INDEX_GET because the session split - * generation is usually set, but it's not always required: for example, - * if a page is locked for splitting, or being created or destroyed. - */ -#define WT_INTL_INDEX_GET_SAFE(page) \ - ((page)->u.intl.__index) -#define WT_INTL_INDEX_GET(session, page, pindex) do { \ - WT_ASSERT(session, \ - __wt_session_gen(session, WT_GEN_SPLIT) != 0); \ - (pindex) = WT_INTL_INDEX_GET_SAFE(page); \ -} while (0) -#define WT_INTL_INDEX_SET(page, v) do { \ - WT_WRITE_BARRIER(); \ - ((page)->u.intl.__index) = (v); \ -} while (0) - - /* - * Macro to walk the list of references in an internal page. - */ -#define WT_INTL_FOREACH_BEGIN(session, page, ref) do { \ - WT_PAGE_INDEX *__pindex; \ - WT_REF **__refp; \ - uint32_t __entries; \ - WT_INTL_INDEX_GET(session, page, __pindex); \ - for (__refp = __pindex->index, \ - __entries = __pindex->entries; __entries > 0; --__entries) {\ - (ref) = *__refp++; -#define WT_INTL_FOREACH_REVERSE_BEGIN(session, page, ref) do { \ - WT_PAGE_INDEX *__pindex; \ - WT_REF **__refp; \ - uint32_t __entries; \ - WT_INTL_INDEX_GET(session, page, __pindex); \ - for (__refp = __pindex->index + __pindex->entries, \ - __entries = __pindex->entries; __entries > 0; --__entries) {\ - (ref) = *--__refp; -#define WT_INTL_FOREACH_END \ - } \ -} while (0) - - /* Row-store leaf page. */ - WT_ROW *row; /* Key/value pairs */ -#undef pg_row -#define pg_row u.row - - /* Fixed-length column-store leaf page. */ - uint8_t *fix_bitf; /* Values */ -#undef pg_fix_bitf -#define pg_fix_bitf u.fix_bitf - - /* Variable-length column-store leaf page. */ - struct { - WT_COL *col_var; /* Values */ - - /* - * Variable-length column-store pages have an array - * of page entries with RLE counts greater than 1 when - * reading the page, so it's not necessary to walk the - * page counting records to find a specific entry. We - * can do a binary search in this array, then an offset - * calculation to find the cell. - * - * It's a separate structure to keep the page structure - * as small as possible. - */ - struct __wt_col_var_repeat { - uint32_t nrepeats; /* repeat slots */ - WT_COL_RLE repeats[0]; /* lookup RLE array */ - } *repeats; -#define WT_COL_VAR_REPEAT_SET(page) \ - ((page)->u.col_var.repeats != NULL) - } col_var; -#undef pg_var -#define pg_var u.col_var.col_var -#undef pg_var_repeats -#define pg_var_repeats u.col_var.repeats->repeats -#undef pg_var_nrepeats -#define pg_var_nrepeats u.col_var.repeats->nrepeats - } u; - - /* - * Page entries, type and flags are positioned at the end of the WT_PAGE - * union to reduce cache misses in the row-store search function. - * - * The entries field only applies to leaf pages, internal pages use the - * page-index entries instead. - */ - uint32_t entries; /* Leaf page entries */ - -#define WT_PAGE_IS_INTERNAL(page) \ - ((page)->type == WT_PAGE_COL_INT || (page)->type == WT_PAGE_ROW_INT) -#define WT_PAGE_INVALID 0 /* Invalid page */ -#define WT_PAGE_BLOCK_MANAGER 1 /* Block-manager page */ -#define WT_PAGE_COL_FIX 2 /* Col-store fixed-len leaf */ -#define WT_PAGE_COL_INT 3 /* Col-store internal page */ -#define WT_PAGE_COL_VAR 4 /* Col-store var-length leaf page */ -#define WT_PAGE_OVFL 5 /* Overflow page */ -#define WT_PAGE_ROW_INT 6 /* Row-store internal page */ -#define WT_PAGE_ROW_LEAF 7 /* Row-store leaf page */ - uint8_t type; /* Page type */ + /* Per page-type information. */ + union { + /* + * Internal pages (both column- and row-store). + * + * In-memory internal pages have an array of pointers to child + * structures, maintained in collated order. + * + * Multiple threads of control may be searching the in-memory + * internal page and a child page of the internal page may + * cause a split at any time. When a page splits, a new array + * is allocated and atomically swapped into place. Threads in + * the old array continue without interruption (the old array is + * still valid), but have to avoid racing. No barrier is needed + * because the array reference is updated atomically, but code + * reading the fields multiple times would be a very bad idea. + * Specifically, do not do this: + * WT_REF **refp = page->u.intl__index->index; + * uint32_t entries = page->u.intl__index->entries; + * + * The field is declared volatile (so the compiler knows not to + * read it multiple times), and we obscure the field name and + * use a copy macro in all references to the field (so the code + * doesn't read it multiple times). + */ + struct { + WT_REF *parent_ref; /* Parent reference */ + uint64_t split_gen; /* Generation of last split */ + + struct __wt_page_index { + uint32_t entries; + uint32_t deleted_entries; + WT_REF **index; + } * volatile __index; /* Collated children */ + } intl; +#undef pg_intl_parent_ref +#define pg_intl_parent_ref u.intl.parent_ref +#undef pg_intl_split_gen +#define pg_intl_split_gen u.intl.split_gen + +/* + * Macros to copy/set the index because the name is obscured to ensure + * the field isn't read multiple times. + * + * There are two versions of WT_INTL_INDEX_GET because the session split + * generation is usually set, but it's not always required: for example, + * if a page is locked for splitting, or being created or destroyed. + */ +#define WT_INTL_INDEX_GET_SAFE(page) ((page)->u.intl.__index) +#define WT_INTL_INDEX_GET(session, page, pindex) \ + do { \ + WT_ASSERT(session, __wt_session_gen(session, WT_GEN_SPLIT) != 0); \ + (pindex) = WT_INTL_INDEX_GET_SAFE(page); \ + } while (0) +#define WT_INTL_INDEX_SET(page, v) \ + do { \ + WT_WRITE_BARRIER(); \ + ((page)->u.intl.__index) = (v); \ + } while (0) + +/* + * Macro to walk the list of references in an internal page. + */ +#define WT_INTL_FOREACH_BEGIN(session, page, ref) \ + do { \ + WT_PAGE_INDEX *__pindex; \ + WT_REF **__refp; \ + uint32_t __entries; \ + WT_INTL_INDEX_GET(session, page, __pindex); \ + for (__refp = __pindex->index, __entries = __pindex->entries; __entries > 0; \ + --__entries) { \ + (ref) = *__refp++; +#define WT_INTL_FOREACH_REVERSE_BEGIN(session, page, ref) \ + do { \ + WT_PAGE_INDEX *__pindex; \ + WT_REF **__refp; \ + uint32_t __entries; \ + WT_INTL_INDEX_GET(session, page, __pindex); \ + for (__refp = __pindex->index + __pindex->entries, __entries = __pindex->entries; \ + __entries > 0; --__entries) { \ + (ref) = *--__refp; +#define WT_INTL_FOREACH_END \ + } \ + } \ + while (0) + + /* Row-store leaf page. */ + WT_ROW *row; /* Key/value pairs */ +#undef pg_row +#define pg_row u.row + + /* Fixed-length column-store leaf page. */ + uint8_t *fix_bitf; /* Values */ +#undef pg_fix_bitf +#define pg_fix_bitf u.fix_bitf + + /* Variable-length column-store leaf page. */ + struct { + WT_COL *col_var; /* Values */ + + /* + * Variable-length column-store pages have an array + * of page entries with RLE counts greater than 1 when + * reading the page, so it's not necessary to walk the + * page counting records to find a specific entry. We + * can do a binary search in this array, then an offset + * calculation to find the cell. + * + * It's a separate structure to keep the page structure + * as small as possible. + */ + struct __wt_col_var_repeat { + uint32_t nrepeats; /* repeat slots */ + WT_COL_RLE repeats[0]; /* lookup RLE array */ + } * repeats; +#define WT_COL_VAR_REPEAT_SET(page) ((page)->u.col_var.repeats != NULL) + } col_var; +#undef pg_var +#define pg_var u.col_var.col_var +#undef pg_var_repeats +#define pg_var_repeats u.col_var.repeats->repeats +#undef pg_var_nrepeats +#define pg_var_nrepeats u.col_var.repeats->nrepeats + } u; + + /* + * Page entries, type and flags are positioned at the end of the WT_PAGE + * union to reduce cache misses in the row-store search function. + * + * The entries field only applies to leaf pages, internal pages use the + * page-index entries instead. + */ + uint32_t entries; /* Leaf page entries */ + +#define WT_PAGE_IS_INTERNAL(page) \ + ((page)->type == WT_PAGE_COL_INT || (page)->type == WT_PAGE_ROW_INT) +#define WT_PAGE_INVALID 0 /* Invalid page */ +#define WT_PAGE_BLOCK_MANAGER 1 /* Block-manager page */ +#define WT_PAGE_COL_FIX 2 /* Col-store fixed-len leaf */ +#define WT_PAGE_COL_INT 3 /* Col-store internal page */ +#define WT_PAGE_COL_VAR 4 /* Col-store var-length leaf page */ +#define WT_PAGE_OVFL 5 /* Overflow page */ +#define WT_PAGE_ROW_INT 6 /* Row-store internal page */ +#define WT_PAGE_ROW_LEAF 7 /* Row-store leaf page */ + uint8_t type; /* Page type */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_PAGE_BUILD_KEYS 0x01u /* Keys have been built in memory */ -#define WT_PAGE_DISK_ALLOC 0x02u /* Disk image in allocated memory */ -#define WT_PAGE_DISK_MAPPED 0x04u /* Disk image in mapped memory */ -#define WT_PAGE_EVICT_LRU 0x08u /* Page is on the LRU queue */ -#define WT_PAGE_EVICT_NO_PROGRESS 0x10u /* Eviction doesn't count as progress */ -#define WT_PAGE_OVERFLOW_KEYS 0x20u /* Page has overflow keys */ -#define WT_PAGE_SPLIT_INSERT 0x40u /* A leaf page was split for append */ -#define WT_PAGE_UPDATE_IGNORE 0x80u /* Ignore updates on page discard */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */ - - uint8_t unused[2]; /* Unused padding */ - - /* - * The page's read generation acts as an LRU value for each page in the - * tree; it is used by the eviction server thread to select pages to be - * discarded from the in-memory tree. - * - * The read generation is a 64-bit value, if incremented frequently, a - * 32-bit value could overflow. - * - * The read generation is a piece of shared memory potentially read - * by many threads. We don't want to update page read generations for - * in-cache workloads and suffer the cache misses, so we don't simply - * increment the read generation value on every access. Instead, the - * read generation is incremented by the eviction server each time it - * becomes active. To avoid incrementing a page's read generation too - * frequently, it is set to a future point. - * - * Because low read generation values have special meaning, and there - * are places where we manipulate the value, use an initial value well - * outside of the special range. - */ -#define WT_READGEN_NOTSET 0 -#define WT_READGEN_OLDEST 1 -#define WT_READGEN_WONT_NEED 2 -#define WT_READGEN_EVICT_SOON(readgen) \ - ((readgen) != WT_READGEN_NOTSET && (readgen) < WT_READGEN_START_VALUE) -#define WT_READGEN_START_VALUE 100 -#define WT_READGEN_STEP 100 - uint64_t read_gen; - - size_t memory_footprint; /* Memory attached to the page */ - - /* Page's on-disk representation: NULL for pages created in memory. */ - const WT_PAGE_HEADER *dsk; - - /* If/when the page is modified, we need lots more information. */ - WT_PAGE_MODIFY *modify; - - /* This is the 64 byte boundary, try to keep hot fields above here. */ - - uint64_t cache_create_gen; /* Page create timestamp */ - uint64_t evict_pass_gen; /* Eviction pass generation */ +#define WT_PAGE_BUILD_KEYS 0x01u /* Keys have been built in memory */ +#define WT_PAGE_DISK_ALLOC 0x02u /* Disk image in allocated memory */ +#define WT_PAGE_DISK_MAPPED 0x04u /* Disk image in mapped memory */ +#define WT_PAGE_EVICT_LRU 0x08u /* Page is on the LRU queue */ +#define WT_PAGE_EVICT_NO_PROGRESS 0x10u /* Eviction doesn't count as progress */ +#define WT_PAGE_OVERFLOW_KEYS 0x20u /* Page has overflow keys */ +#define WT_PAGE_SPLIT_INSERT 0x40u /* A leaf page was split for append */ +#define WT_PAGE_UPDATE_IGNORE 0x80u /* Ignore updates on page discard */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */ + + uint8_t unused[2]; /* Unused padding */ + +/* + * The page's read generation acts as an LRU value for each page in the + * tree; it is used by the eviction server thread to select pages to be + * discarded from the in-memory tree. + * + * The read generation is a 64-bit value, if incremented frequently, a + * 32-bit value could overflow. + * + * The read generation is a piece of shared memory potentially read + * by many threads. We don't want to update page read generations for + * in-cache workloads and suffer the cache misses, so we don't simply + * increment the read generation value on every access. Instead, the + * read generation is incremented by the eviction server each time it + * becomes active. To avoid incrementing a page's read generation too + * frequently, it is set to a future point. + * + * Because low read generation values have special meaning, and there + * are places where we manipulate the value, use an initial value well + * outside of the special range. + */ +#define WT_READGEN_NOTSET 0 +#define WT_READGEN_OLDEST 1 +#define WT_READGEN_WONT_NEED 2 +#define WT_READGEN_EVICT_SOON(readgen) \ + ((readgen) != WT_READGEN_NOTSET && (readgen) < WT_READGEN_START_VALUE) +#define WT_READGEN_START_VALUE 100 +#define WT_READGEN_STEP 100 + uint64_t read_gen; + + size_t memory_footprint; /* Memory attached to the page */ + + /* Page's on-disk representation: NULL for pages created in memory. */ + const WT_PAGE_HEADER *dsk; + + /* If/when the page is modified, we need lots more information. */ + WT_PAGE_MODIFY *modify; + + /* This is the 64 byte boundary, try to keep hot fields above here. */ + + uint64_t cache_create_gen; /* Page create timestamp */ + uint64_t evict_pass_gen; /* Eviction pass generation */ }; /* * WT_PAGE_DISK_OFFSET, WT_PAGE_REF_OFFSET -- * Return the offset/pointer of a pointer/offset in a page disk image. */ -#define WT_PAGE_DISK_OFFSET(page, p) \ - WT_PTRDIFF32(p, (page)->dsk) -#define WT_PAGE_REF_OFFSET(page, o) \ - ((void *)((uint8_t *)((page)->dsk) + (o))) +#define WT_PAGE_DISK_OFFSET(page, p) WT_PTRDIFF32(p, (page)->dsk) +#define WT_PAGE_REF_OFFSET(page, o) ((void *)((uint8_t *)((page)->dsk) + (o))) /* * Prepare update states. @@ -775,12 +755,13 @@ struct __wt_page { * Prepare state will not be updated during rollback and will continue to * have the state as INPROGRESS. */ -#define WT_PREPARE_INIT 0 /* Must be 0, as structures - will be default initialized - with 0. */ -#define WT_PREPARE_INPROGRESS 1 -#define WT_PREPARE_LOCKED 2 -#define WT_PREPARE_RESOLVED 3 +#define WT_PREPARE_INIT \ + 0 /* Must be 0, as structures \ + will be default initialized \ + with 0. */ +#define WT_PREPARE_INPROGRESS 1 +#define WT_PREPARE_LOCKED 2 +#define WT_PREPARE_RESOLVED 3 /* * Page state. @@ -852,20 +833,20 @@ struct __wt_page { * Related information for truncated pages. */ struct __wt_page_deleted { - volatile uint64_t txnid; /* Transaction ID */ + volatile uint64_t txnid; /* Transaction ID */ - wt_timestamp_t timestamp; /* Timestamps */ - wt_timestamp_t durable_timestamp; + wt_timestamp_t timestamp; /* Timestamps */ + wt_timestamp_t durable_timestamp; - /* - * The state is used for transaction prepare to manage visibility - * and inheriting prepare state to update_list. - */ - volatile uint8_t prepare_state; /* Prepare state. */ + /* + * The state is used for transaction prepare to manage visibility and inheriting prepare state + * to update_list. + */ + volatile uint8_t prepare_state; /* Prepare state. */ - uint32_t previous_state; /* Previous state */ + uint32_t previous_state; /* Previous state */ - WT_UPDATE **update_list; /* List of updates for abort */ + WT_UPDATE **update_list; /* List of updates for abort */ }; /* @@ -874,95 +855,94 @@ struct __wt_page_deleted { * it's OK to dereference the pointer to the page. */ struct __wt_ref { - WT_PAGE *page; /* Page */ - - /* - * When the tree deepens as a result of a split, the home page value - * changes. Don't cache it, we need to see that change when looking - * up our slot in the page's index structure. - */ - WT_PAGE * volatile home; /* Reference page */ - volatile uint32_t pindex_hint; /* Reference page index hint */ - -#define WT_REF_DISK 0 /* Page is on disk */ -#define WT_REF_DELETED 1 /* Page is on disk, but deleted */ -#define WT_REF_LIMBO 2 /* Page is in cache without history */ -#define WT_REF_LOCKED 3 /* Page locked for exclusive access */ -#define WT_REF_LOOKASIDE 4 /* Page is on disk with lookaside */ -#define WT_REF_MEM 5 /* Page is in cache and valid */ -#define WT_REF_READING 6 /* Page being read */ -#define WT_REF_SPLIT 7 /* Parent page split (WT_REF dead) */ - volatile uint32_t state; /* Page state */ - - /* - * Address: on-page cell if read from backing block, off-page WT_ADDR - * if instantiated in-memory, or NULL if page created in-memory. - */ - void *addr; - - /* - * The child page's key. Do NOT change this union without reviewing - * __wt_ref_key. - */ - union { - uint64_t recno; /* Column-store: starting recno */ - void *ikey; /* Row-store: key */ - } key; -#undef ref_recno -#define ref_recno key.recno -#undef ref_ikey -#define ref_ikey key.ikey - - WT_PAGE_DELETED *page_del; /* Deleted page information */ - WT_PAGE_LOOKASIDE *page_las; /* Lookaside information */ - - /* - * In DIAGNOSTIC mode we overwrite the WT_REF on free to force failures. - * Don't clear the history in that case. - */ -#define WT_REF_CLEAR_SIZE (offsetof(WT_REF, hist)) - -#define WT_REF_SAVE_STATE_MAX 3 + WT_PAGE *page; /* Page */ + + /* + * When the tree deepens as a result of a split, the home page value changes. Don't cache it, we + * need to see that change when looking up our slot in the page's index structure. + */ + WT_PAGE *volatile home; /* Reference page */ + volatile uint32_t pindex_hint; /* Reference page index hint */ + +#define WT_REF_DISK 0 /* Page is on disk */ +#define WT_REF_DELETED 1 /* Page is on disk, but deleted */ +#define WT_REF_LIMBO 2 /* Page is in cache without history */ +#define WT_REF_LOCKED 3 /* Page locked for exclusive access */ +#define WT_REF_LOOKASIDE 4 /* Page is on disk with lookaside */ +#define WT_REF_MEM 5 /* Page is in cache and valid */ +#define WT_REF_READING 6 /* Page being read */ +#define WT_REF_SPLIT 7 /* Parent page split (WT_REF dead) */ + volatile uint32_t state; /* Page state */ + + /* + * Address: on-page cell if read from backing block, off-page WT_ADDR if instantiated in-memory, + * or NULL if page created in-memory. + */ + void *addr; + + /* + * The child page's key. Do NOT change this union without reviewing + * __wt_ref_key. + */ + union { + uint64_t recno; /* Column-store: starting recno */ + void *ikey; /* Row-store: key */ + } key; +#undef ref_recno +#define ref_recno key.recno +#undef ref_ikey +#define ref_ikey key.ikey + + WT_PAGE_DELETED *page_del; /* Deleted page information */ + WT_PAGE_LOOKASIDE *page_las; /* Lookaside information */ + +/* + * In DIAGNOSTIC mode we overwrite the WT_REF on free to force failures. Don't clear the history in + * that case. + */ +#define WT_REF_CLEAR_SIZE (offsetof(WT_REF, hist)) + +#define WT_REF_SAVE_STATE_MAX 3 #ifdef HAVE_DIAGNOSTIC - /* Capture history of ref state changes. */ - struct __wt_ref_hist { - WT_SESSION_IMPL *session; - const char *name; - const char *func; - uint16_t line; - uint16_t state; - } hist[WT_REF_SAVE_STATE_MAX]; - uint64_t histoff; -#define WT_REF_SAVE_STATE(ref, s, f, l) do { \ - (ref)->hist[(ref)->histoff].session = session; \ - (ref)->hist[(ref)->histoff].name = session->name; \ - (ref)->hist[(ref)->histoff].func = (f); \ - (ref)->hist[(ref)->histoff].line = (uint16_t)(l); \ - (ref)->hist[(ref)->histoff].state = (uint16_t)(s); \ - (ref)->histoff = \ - ((ref)->histoff + 1) % WT_ELEMENTS((ref)->hist); \ -} while (0) -#define WT_REF_SET_STATE(ref, s) do { \ - WT_REF_SAVE_STATE(ref, s, __func__, __LINE__); \ - WT_PUBLISH((ref)->state, s); \ -} while (0) + /* Capture history of ref state changes. */ + struct __wt_ref_hist { + WT_SESSION_IMPL *session; + const char *name; + const char *func; + uint16_t line; + uint16_t state; + } hist[WT_REF_SAVE_STATE_MAX]; + uint64_t histoff; +#define WT_REF_SAVE_STATE(ref, s, f, l) \ + do { \ + (ref)->hist[(ref)->histoff].session = session; \ + (ref)->hist[(ref)->histoff].name = session->name; \ + (ref)->hist[(ref)->histoff].func = (f); \ + (ref)->hist[(ref)->histoff].line = (uint16_t)(l); \ + (ref)->hist[(ref)->histoff].state = (uint16_t)(s); \ + (ref)->histoff = ((ref)->histoff + 1) % WT_ELEMENTS((ref)->hist); \ + } while (0) +#define WT_REF_SET_STATE(ref, s) \ + do { \ + WT_REF_SAVE_STATE(ref, s, __func__, __LINE__); \ + WT_PUBLISH((ref)->state, s); \ + } while (0) #else -#define WT_REF_SET_STATE(ref, s) WT_PUBLISH((ref)->state, s) +#define WT_REF_SET_STATE(ref, s) WT_PUBLISH((ref)->state, s) #endif /* A macro wrapper allowing us to remember the callers code location */ -#define WT_REF_CAS_STATE(session, ref, old_state, new_state) \ - __wt_ref_cas_state_int( \ - session, ref, old_state, new_state, __func__, __LINE__) +#define WT_REF_CAS_STATE(session, ref, old_state, new_state) \ + __wt_ref_cas_state_int(session, ref, old_state, new_state, __func__, __LINE__) }; /* - * WT_REF_SIZE is the expected structure size -- we verify the build to ensure - * the compiler hasn't inserted padding which would break the world. + * WT_REF_SIZE is the expected structure size -- we verify the build to ensure the compiler hasn't + * inserted padding which would break the world. */ #ifdef HAVE_DIAGNOSTIC -#define WT_REF_SIZE (56 + WT_REF_SAVE_STATE_MAX * sizeof(WT_REF_HIST) + 8) +#define WT_REF_SIZE (56 + WT_REF_SAVE_STATE_MAX * sizeof(WT_REF_HIST) + 8) #else -#define WT_REF_SIZE 56 +#define WT_REF_SIZE 56 #endif /* @@ -989,49 +969,45 @@ struct __wt_ref { * references to the field (so the code doesn't read it multiple times), all * to make sure we don't introduce this bug (again). */ -struct __wt_row { /* On-page key, on-page cell, or off-page WT_IKEY */ - void * volatile __key; +struct __wt_row { /* On-page key, on-page cell, or off-page WT_IKEY */ + void *volatile __key; }; -#define WT_ROW_KEY_COPY(rip) ((rip)->__key) -#define WT_ROW_KEY_SET(rip, v) ((rip)->__key) = (void *)(v) +#define WT_ROW_KEY_COPY(rip) ((rip)->__key) +#define WT_ROW_KEY_SET(rip, v) ((rip)->__key) = (void *)(v) /* * WT_ROW_FOREACH -- * Walk the entries of an in-memory row-store leaf page. */ -#define WT_ROW_FOREACH(page, rip, i) \ - for ((i) = (page)->entries, \ - (rip) = (page)->pg_row; (i) > 0; ++(rip), --(i)) -#define WT_ROW_FOREACH_REVERSE(page, rip, i) \ - for ((i) = (page)->entries, \ - (rip) = (page)->pg_row + ((page)->entries - 1); \ - (i) > 0; --(rip), --(i)) +#define WT_ROW_FOREACH(page, rip, i) \ + for ((i) = (page)->entries, (rip) = (page)->pg_row; (i) > 0; ++(rip), --(i)) +#define WT_ROW_FOREACH_REVERSE(page, rip, i) \ + for ((i) = (page)->entries, (rip) = (page)->pg_row + ((page)->entries - 1); (i) > 0; \ + --(rip), --(i)) /* * WT_ROW_SLOT -- * Return the 0-based array offset based on a WT_ROW reference. */ -#define WT_ROW_SLOT(page, rip) \ - ((uint32_t)(((WT_ROW *)(rip)) - (page)->pg_row)) +#define WT_ROW_SLOT(page, rip) ((uint32_t)(((WT_ROW *)(rip)) - (page)->pg_row)) /* - * WT_COL -- - * Each in-memory variable-length column-store leaf page has an array of WT_COL - * structures: this is created from on-page data when a page is read from the - * file. It's fixed in size, and references data on the page. + * WT_COL -- Each in-memory variable-length column-store leaf page has an array of WT_COL + * structures: this is created from on-page data when a page is read from the file. It's fixed in + * size, and references data on the page. */ struct __wt_col { - /* - * Variable-length column-store data references are page offsets, not - * pointers (we boldly re-invent short pointers). The trade-off is 4B - * per K/V pair on a 64-bit machine vs. a single cycle for the addition - * of a base pointer. The on-page data is a WT_CELL (same as row-store - * pages). - * - * Obscure the field name, code shouldn't use WT_COL->__col_value, the - * public interface is WT_COL_PTR and WT_COL_PTR_SET. - */ - uint32_t __col_value; + /* + * Variable-length column-store data references are page offsets, not + * pointers (we boldly re-invent short pointers). The trade-off is 4B + * per K/V pair on a 64-bit machine vs. a single cycle for the addition + * of a base pointer. The on-page data is a WT_CELL (same as row-store + * pages). + * + * Obscure the field name, code shouldn't use WT_COL->__col_value, the + * public interface is WT_COL_PTR and WT_COL_PTR_SET. + */ + uint32_t __col_value; }; /* @@ -1039,112 +1015,103 @@ struct __wt_col { * Return/Set a pointer corresponding to the data offset. (If the item does * not exist on the page, return a NULL.) */ -#define WT_COL_PTR(page, cip) \ - WT_PAGE_REF_OFFSET(page, (cip)->__col_value) -#define WT_COL_PTR_SET(cip, value) \ - (cip)->__col_value = (value) +#define WT_COL_PTR(page, cip) WT_PAGE_REF_OFFSET(page, (cip)->__col_value) +#define WT_COL_PTR_SET(cip, value) (cip)->__col_value = (value) /* * WT_COL_FOREACH -- * Walk the entries of variable-length column-store leaf page. */ -#define WT_COL_FOREACH(page, cip, i) \ - for ((i) = (page)->entries, \ - (cip) = (page)->pg_var; (i) > 0; ++(cip), --(i)) +#define WT_COL_FOREACH(page, cip, i) \ + for ((i) = (page)->entries, (cip) = (page)->pg_var; (i) > 0; ++(cip), --(i)) /* * WT_COL_SLOT -- * Return the 0-based array offset based on a WT_COL reference. */ -#define WT_COL_SLOT(page, cip) \ - ((uint32_t)(((WT_COL *)(cip)) - (page)->pg_var)) +#define WT_COL_SLOT(page, cip) ((uint32_t)(((WT_COL *)(cip)) - (page)->pg_var)) /* * WT_IKEY -- - * Instantiated key: row-store keys are usually prefix compressed and sometimes - * Huffman encoded or overflow objects. Normally, a row-store page in-memory - * key points to the on-page WT_CELL, but in some cases, we instantiate the key - * in memory, in which case the row-store page in-memory key points to a WT_IKEY - * structure. + * Instantiated key: row-store keys are usually prefix compressed and + * sometimes Huffman encoded or overflow objects. Normally, a row-store + * page in-memory key points to the on-page WT_CELL, but in some cases, + * we instantiate the key in memory, in which case the row-store page + * in-memory key points to a WT_IKEY structure. */ struct __wt_ikey { - uint32_t size; /* Key length */ - - /* - * If we no longer point to the key's on-page WT_CELL, we can't find its - * related value. Save the offset of the key cell in the page. - * - * Row-store cell references are page offsets, not pointers (we boldly - * re-invent short pointers). The trade-off is 4B per K/V pair on a - * 64-bit machine vs. a single cycle for the addition of a base pointer. - */ - uint32_t cell_offset; - - /* The key bytes immediately follow the WT_IKEY structure. */ -#define WT_IKEY_DATA(ikey) \ - ((void *)((uint8_t *)(ikey) + sizeof(WT_IKEY))) + uint32_t size; /* Key length */ + + /* + * If we no longer point to the key's on-page WT_CELL, we can't find its + * related value. Save the offset of the key cell in the page. + * + * Row-store cell references are page offsets, not pointers (we boldly + * re-invent short pointers). The trade-off is 4B per K/V pair on a + * 64-bit machine vs. a single cycle for the addition of a base pointer. + */ + uint32_t cell_offset; + +/* The key bytes immediately follow the WT_IKEY structure. */ +#define WT_IKEY_DATA(ikey) ((void *)((uint8_t *)(ikey) + sizeof(WT_IKEY))) }; /* * WT_UPDATE -- - * Entries on leaf pages can be updated, either modified or deleted. Updates - * to entries referenced from the WT_ROW and WT_COL arrays are stored in the - * page's WT_UPDATE array. When the first element on a page is updated, the - * WT_UPDATE array is allocated, with one slot for every existing element in - * the page. A slot points to a WT_UPDATE structure; if more than one update - * is done for an entry, WT_UPDATE structures are formed into a forward-linked - * list. + * Entries on leaf pages can be updated, either modified or deleted. + * Updates to entries referenced from the WT_ROW and WT_COL arrays are + * stored in the page's WT_UPDATE array. When the first element on a page + * is updated, the WT_UPDATE array is allocated, with one slot for every + * existing element in the page. A slot points to a WT_UPDATE structure; + * if more than one update is done for an entry, WT_UPDATE structures are + * formed into a forward-linked list. */ struct __wt_update { - volatile uint64_t txnid; /* transaction ID */ - - wt_timestamp_t durable_ts; /* timestamps */ - wt_timestamp_t start_ts; - - WT_UPDATE *next; /* forward-linked list */ - - uint32_t size; /* data length */ - -#define WT_UPDATE_INVALID 0 /* diagnostic check */ -#define WT_UPDATE_BIRTHMARK 1 /* transaction for on-page value */ -#define WT_UPDATE_MODIFY 2 /* partial-update modify value */ -#define WT_UPDATE_RESERVE 3 /* reserved */ -#define WT_UPDATE_STANDARD 4 /* complete value */ -#define WT_UPDATE_TOMBSTONE 5 /* deleted */ - uint8_t type; /* type (one byte to conserve memory) */ - - /* If the update includes a complete value. */ -#define WT_UPDATE_DATA_VALUE(upd) \ - ((upd)->type == WT_UPDATE_STANDARD || \ - (upd)->type == WT_UPDATE_TOMBSTONE) - - /* - * The update state is used for transaction prepare to manage - * visibility and transitioning update structure state safely. - */ - volatile uint8_t prepare_state; /* prepare state */ - - /* - * Zero or more bytes of value (the payload) immediately follows the - * WT_UPDATE structure. We use a C99 flexible array member which has - * the semantics we want. - */ - uint8_t data[]; /* start of the data */ + volatile uint64_t txnid; /* transaction ID */ + + wt_timestamp_t durable_ts; /* timestamps */ + wt_timestamp_t start_ts; + + WT_UPDATE *next; /* forward-linked list */ + + uint32_t size; /* data length */ + +#define WT_UPDATE_INVALID 0 /* diagnostic check */ +#define WT_UPDATE_BIRTHMARK 1 /* transaction for on-page value */ +#define WT_UPDATE_MODIFY 2 /* partial-update modify value */ +#define WT_UPDATE_RESERVE 3 /* reserved */ +#define WT_UPDATE_STANDARD 4 /* complete value */ +#define WT_UPDATE_TOMBSTONE 5 /* deleted */ + uint8_t type; /* type (one byte to conserve memory) */ + +/* If the update includes a complete value. */ +#define WT_UPDATE_DATA_VALUE(upd) \ + ((upd)->type == WT_UPDATE_STANDARD || (upd)->type == WT_UPDATE_TOMBSTONE) + + /* + * The update state is used for transaction prepare to manage visibility and transitioning + * update structure state safely. + */ + volatile uint8_t prepare_state; /* prepare state */ + + /* + * Zero or more bytes of value (the payload) immediately follows the WT_UPDATE structure. We use + * a C99 flexible array member which has the semantics we want. + */ + uint8_t data[]; /* start of the data */ }; /* - * WT_UPDATE_SIZE is the expected structure size excluding the payload data -- - * we verify the build to ensure the compiler hasn't inserted padding. + * WT_UPDATE_SIZE is the expected structure size excluding the payload data -- we verify the build + * to ensure the compiler hasn't inserted padding. */ -#define WT_UPDATE_SIZE 38 +#define WT_UPDATE_SIZE 38 /* - * The memory size of an update: include some padding because this is such a - * common case that overhead of tiny allocations can swamp our cache overhead - * calculation. + * The memory size of an update: include some padding because this is such a common case that + * overhead of tiny allocations can swamp our cache overhead calculation. */ -#define WT_UPDATE_MEMSIZE(upd) \ - WT_ALIGN(WT_UPDATE_SIZE + (upd)->size, 32) +#define WT_UPDATE_MEMSIZE(upd) WT_ALIGN(WT_UPDATE_SIZE + (upd)->size, 32) /* * WT_MAX_MODIFY_UPDATE -- @@ -1153,13 +1120,13 @@ struct __wt_update { * when history has to be maintained, resulting in multiplying cache * pressure. */ -#define WT_MAX_MODIFY_UPDATE 10 +#define WT_MAX_MODIFY_UPDATE 10 /* * WT_MODIFY_MEM_FACTOR -- * Limit update chains to a fraction of the base document size. */ -#define WT_MODIFY_MEM_FRACTION 10 +#define WT_MODIFY_MEM_FRACTION 10 /* * WT_INSERT -- @@ -1193,123 +1160,114 @@ struct __wt_update { * scale and it isn't useful enough to re-implement, IMNSHO.) */ struct __wt_insert { - WT_UPDATE *upd; /* value */ - - union { - uint64_t recno; /* column-store record number */ - struct { - uint32_t offset; /* row-store key data start */ - uint32_t size; /* row-store key data size */ - } key; - } u; - -#define WT_INSERT_KEY_SIZE(ins) (((WT_INSERT *)(ins))->u.key.size) -#define WT_INSERT_KEY(ins) \ - ((void *)((uint8_t *)(ins) + ((WT_INSERT *)(ins))->u.key.offset)) -#define WT_INSERT_RECNO(ins) (((WT_INSERT *)(ins))->u.recno) - - WT_INSERT *next[0]; /* forward-linked skip list */ + WT_UPDATE *upd; /* value */ + + union { + uint64_t recno; /* column-store record number */ + struct { + uint32_t offset; /* row-store key data start */ + uint32_t size; /* row-store key data size */ + } key; + } u; + +#define WT_INSERT_KEY_SIZE(ins) (((WT_INSERT *)(ins))->u.key.size) +#define WT_INSERT_KEY(ins) ((void *)((uint8_t *)(ins) + ((WT_INSERT *)(ins))->u.key.offset)) +#define WT_INSERT_RECNO(ins) (((WT_INSERT *)(ins))->u.recno) + + WT_INSERT *next[0]; /* forward-linked skip list */ }; /* * Skiplist helper macros. */ -#define WT_SKIP_FIRST(ins_head) \ - (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->head[0]) -#define WT_SKIP_LAST(ins_head) \ - (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->tail[0]) -#define WT_SKIP_NEXT(ins) ((ins)->next[0]) -#define WT_SKIP_FOREACH(ins, ins_head) \ - for ((ins) = WT_SKIP_FIRST(ins_head); \ - (ins) != NULL; \ - (ins) = WT_SKIP_NEXT(ins)) +#define WT_SKIP_FIRST(ins_head) \ + (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->head[0]) +#define WT_SKIP_LAST(ins_head) \ + (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->tail[0]) +#define WT_SKIP_NEXT(ins) ((ins)->next[0]) +#define WT_SKIP_FOREACH(ins, ins_head) \ + for ((ins) = WT_SKIP_FIRST(ins_head); (ins) != NULL; (ins) = WT_SKIP_NEXT(ins)) /* * Atomically allocate and swap a structure or array into place. */ -#define WT_PAGE_ALLOC_AND_SWAP(s, page, dest, v, count) do { \ - if (((v) = (dest)) == NULL) { \ - WT_ERR(__wt_calloc_def(s, count, &(v))); \ - if (__wt_atomic_cas_ptr(&(dest), NULL, v)) \ - __wt_cache_page_inmem_incr( \ - s, page, (count) * sizeof(*(v))); \ - else \ - __wt_free(s, v); \ - } \ -} while (0) +#define WT_PAGE_ALLOC_AND_SWAP(s, page, dest, v, count) \ + do { \ + if (((v) = (dest)) == NULL) { \ + WT_ERR(__wt_calloc_def(s, count, &(v))); \ + if (__wt_atomic_cas_ptr(&(dest), NULL, v)) \ + __wt_cache_page_inmem_incr(s, page, (count) * sizeof(*(v))); \ + else \ + __wt_free(s, v); \ + } \ + } while (0) /* * WT_INSERT_HEAD -- * The head of a skiplist of WT_INSERT items. */ struct __wt_insert_head { - WT_INSERT *head[WT_SKIP_MAXDEPTH]; /* first item on skiplists */ - WT_INSERT *tail[WT_SKIP_MAXDEPTH]; /* last item on skiplists */ + WT_INSERT *head[WT_SKIP_MAXDEPTH]; /* first item on skiplists */ + WT_INSERT *tail[WT_SKIP_MAXDEPTH]; /* last item on skiplists */ }; /* - * The row-store leaf page insert lists are arrays of pointers to structures, - * and may not exist. The following macros return an array entry if the array - * of pointers and the specific structure exist, else NULL. + * The row-store leaf page insert lists are arrays of pointers to structures, and may not exist. The + * following macros return an array entry if the array of pointers and the specific structure exist, + * else NULL. */ -#define WT_ROW_INSERT_SLOT(page, slot) \ - ((page)->modify == NULL || \ - (page)->modify->mod_row_insert == NULL ? \ - NULL : (page)->modify->mod_row_insert[slot]) -#define WT_ROW_INSERT(page, ip) \ - WT_ROW_INSERT_SLOT(page, WT_ROW_SLOT(page, ip)) -#define WT_ROW_UPDATE(page, ip) \ - ((page)->modify == NULL || \ - (page)->modify->mod_row_update == NULL ? \ - NULL : (page)->modify->mod_row_update[WT_ROW_SLOT(page, ip)]) +#define WT_ROW_INSERT_SLOT(page, slot) \ + ((page)->modify == NULL || (page)->modify->mod_row_insert == NULL ? \ + NULL : \ + (page)->modify->mod_row_insert[slot]) +#define WT_ROW_INSERT(page, ip) WT_ROW_INSERT_SLOT(page, WT_ROW_SLOT(page, ip)) +#define WT_ROW_UPDATE(page, ip) \ + ((page)->modify == NULL || (page)->modify->mod_row_update == NULL ? \ + NULL : \ + (page)->modify->mod_row_update[WT_ROW_SLOT(page, ip)]) /* * WT_ROW_INSERT_SMALLEST references an additional slot past the end of the - * the "one per WT_ROW slot" insert array. That's because the insert array - * requires an extra slot to hold keys that sort before any key found on the - * original page. + * "one per WT_ROW slot" insert array. That's because the insert array requires + * an extra slot to hold keys that sort before any key found on the original + * page. */ -#define WT_ROW_INSERT_SMALLEST(page) \ - ((page)->modify == NULL || \ - (page)->modify->mod_row_insert == NULL ? \ - NULL : (page)->modify->mod_row_insert[(page)->entries]) +#define WT_ROW_INSERT_SMALLEST(page) \ + ((page)->modify == NULL || (page)->modify->mod_row_insert == NULL ? \ + NULL : \ + (page)->modify->mod_row_insert[(page)->entries]) /* - * The column-store leaf page update lists are arrays of pointers to structures, - * and may not exist. The following macros return an array entry if the array - * of pointers and the specific structure exist, else NULL. + * The column-store leaf page update lists are arrays of pointers to structures, and may not exist. + * The following macros return an array entry if the array of pointers and the specific structure + * exist, else NULL. */ -#define WT_COL_UPDATE_SLOT(page, slot) \ - ((page)->modify == NULL || \ - (page)->modify->mod_col_update == NULL ? \ - NULL : (page)->modify->mod_col_update[slot]) -#define WT_COL_UPDATE(page, ip) \ - WT_COL_UPDATE_SLOT(page, WT_COL_SLOT(page, ip)) +#define WT_COL_UPDATE_SLOT(page, slot) \ + ((page)->modify == NULL || (page)->modify->mod_col_update == NULL ? \ + NULL : \ + (page)->modify->mod_col_update[slot]) +#define WT_COL_UPDATE(page, ip) WT_COL_UPDATE_SLOT(page, WT_COL_SLOT(page, ip)) /* - * WT_COL_UPDATE_SINGLE is a single WT_INSERT list, used for any fixed-length - * column-store updates for a page. + * WT_COL_UPDATE_SINGLE is a single WT_INSERT list, used for any fixed-length column-store updates + * for a page. */ -#define WT_COL_UPDATE_SINGLE(page) \ - WT_COL_UPDATE_SLOT(page, 0) +#define WT_COL_UPDATE_SINGLE(page) WT_COL_UPDATE_SLOT(page, 0) /* - * WT_COL_APPEND is an WT_INSERT list, used for fixed- and variable-length - * appends. + * WT_COL_APPEND is an WT_INSERT list, used for fixed- and variable-length appends. */ -#define WT_COL_APPEND(page) \ - ((page)->modify == NULL || \ - (page)->modify->mod_col_append == NULL ? \ - NULL : (page)->modify->mod_col_append[0]) +#define WT_COL_APPEND(page) \ + ((page)->modify == NULL || (page)->modify->mod_col_append == NULL ? \ + NULL : \ + (page)->modify->mod_col_append[0]) /* WT_FIX_FOREACH walks fixed-length bit-fields on a disk page. */ -#define WT_FIX_FOREACH(btree, dsk, v, i) \ - for ((i) = 0, \ - (v) = (i) < (dsk)->u.entries ? \ - __bit_getv( \ - WT_PAGE_HEADER_BYTE(btree, dsk), 0, (btree)->bitcnt) : 0; \ - (i) < (dsk)->u.entries; ++(i), \ - (v) = __bit_getv( \ - WT_PAGE_HEADER_BYTE(btree, dsk), i, (btree)->bitcnt)) +#define WT_FIX_FOREACH(btree, dsk, v, i) \ + for ((i) = 0, (v) = (i) < (dsk)->u.entries ? \ + __bit_getv(WT_PAGE_HEADER_BYTE(btree, dsk), 0, (btree)->bitcnt) : \ + 0; \ + (i) < (dsk)->u.entries; \ + ++(i), (v) = __bit_getv(WT_PAGE_HEADER_BYTE(btree, dsk), i, (btree)->bitcnt)) /* * Manage split generation numbers. Splits walk the list of sessions to check @@ -1324,18 +1282,19 @@ struct __wt_insert_head { * an index, we don't want the oldest split generation to move forward and * potentially free it. */ -#define WT_ENTER_PAGE_INDEX(session) do { \ - uint64_t __prev_split_gen = \ - __wt_session_gen(session, WT_GEN_SPLIT); \ - if (__prev_split_gen == 0) \ - __wt_session_gen_enter(session, WT_GEN_SPLIT); - -#define WT_LEAVE_PAGE_INDEX(session) \ - if (__prev_split_gen == 0) \ - __wt_session_gen_leave(session, WT_GEN_SPLIT); \ - } while (0) - -#define WT_WITH_PAGE_INDEX(session, e) \ - WT_ENTER_PAGE_INDEX(session); \ - (e); \ - WT_LEAVE_PAGE_INDEX(session) +#define WT_ENTER_PAGE_INDEX(session) \ + do { \ + uint64_t __prev_split_gen = __wt_session_gen(session, WT_GEN_SPLIT); \ + if (__prev_split_gen == 0) \ + __wt_session_gen_enter(session, WT_GEN_SPLIT); + +#define WT_LEAVE_PAGE_INDEX(session) \ + if (__prev_split_gen == 0) \ + __wt_session_gen_leave(session, WT_GEN_SPLIT); \ + } \ + while (0) + +#define WT_WITH_PAGE_INDEX(session, e) \ + WT_ENTER_PAGE_INDEX(session); \ + (e); \ + WT_LEAVE_PAGE_INDEX(session) diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index 44fae885ae1..248297e6f26 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -7,26 +7,23 @@ */ /* - * Supported btree formats: the "current" version is the maximum supported - * major/minor versions. + * Supported btree formats: the "current" version is the maximum supported major/minor versions. */ -#define WT_BTREE_MAJOR_VERSION_MIN 1 /* Oldest version supported */ -#define WT_BTREE_MINOR_VERSION_MIN 1 +#define WT_BTREE_MAJOR_VERSION_MIN 1 /* Oldest version supported */ +#define WT_BTREE_MINOR_VERSION_MIN 1 -#define WT_BTREE_MAJOR_VERSION_MAX 1 /* Newest version supported */ -#define WT_BTREE_MINOR_VERSION_MAX 1 +#define WT_BTREE_MAJOR_VERSION_MAX 1 /* Newest version supported */ +#define WT_BTREE_MINOR_VERSION_MAX 1 -#define WT_BTREE_MIN_ALLOC_SIZE 512 +#define WT_BTREE_MIN_ALLOC_SIZE 512 /* - * The maximum btree leaf and internal page size is 512MB (2^29). The limit - * is enforced in software, it could be larger, specifically, the underlying - * default block manager can support 4GB (2^32). Currently, the maximum page - * size must accommodate our dependence on the maximum page size fitting into - * a number of bits less than 32; see the row-store page key-lookup functions - * for the magic. + * The maximum btree leaf and internal page size is 512MB (2^29). The limit is enforced in software, + * it could be larger, specifically, the underlying default block manager can support 4GB (2^32). + * Currently, the maximum page size must accommodate our dependence on the maximum page size fitting + * into a number of bits less than 32; see the row-store page key-lookup functions for the magic. */ -#define WT_BTREE_PAGE_SIZE_MAX (512 * WT_MEGABYTE) +#define WT_BTREE_PAGE_SIZE_MAX (512 * WT_MEGABYTE) /* * The length of variable-length column-store values and row-store keys/values @@ -46,233 +43,229 @@ * Record numbers are stored in 64-bit unsigned integers, meaning the largest * record number is "really, really big". */ -#define WT_BTREE_MAX_OBJECT_SIZE ((uint32_t)(UINT32_MAX - 1024)) +#define WT_BTREE_MAX_OBJECT_SIZE ((uint32_t)(UINT32_MAX - 1024)) /* - * A location in a file is a variable-length cookie, but it has a maximum size - * so it's easy to create temporary space in which to store them. (Locations - * can't be much larger than this anyway, they must fit onto the minimum size - * page because a reference to an overflow page is itself a location.) + * A location in a file is a variable-length cookie, but it has a maximum size so it's easy to + * create temporary space in which to store them. (Locations can't be much larger than this anyway, + * they must fit onto the minimum size page because a reference to an overflow page is itself a + * location.) */ -#define WT_BTREE_MAX_ADDR_COOKIE 255 /* Maximum address cookie */ +#define WT_BTREE_MAX_ADDR_COOKIE 255 /* Maximum address cookie */ /* Evict pages if we see this many consecutive deleted records. */ -#define WT_BTREE_DELETE_THRESHOLD 1000 +#define WT_BTREE_DELETE_THRESHOLD 1000 /* - * Minimum size of the chunks (in percentage of the page size) a page gets split - * into during reconciliation. + * Minimum size of the chunks (in percentage of the page size) a page gets split into during + * reconciliation. */ -#define WT_BTREE_MIN_SPLIT_PCT 50 +#define WT_BTREE_MIN_SPLIT_PCT 50 /* * WT_BTREE -- * A btree handle. */ struct __wt_btree { - WT_DATA_HANDLE *dhandle; + WT_DATA_HANDLE *dhandle; - WT_CKPT *ckpt; /* Checkpoint information */ + WT_CKPT *ckpt; /* Checkpoint information */ - enum { BTREE_COL_FIX=1, /* Fixed-length column store */ - BTREE_COL_VAR=2, /* Variable-length column store */ - BTREE_ROW=3 /* Row-store */ - } type; /* Type */ + enum { + BTREE_COL_FIX = 1, /* Fixed-length column store */ + BTREE_COL_VAR = 2, /* Variable-length column store */ + BTREE_ROW = 3 /* Row-store */ + } type; /* Type */ - const char *key_format; /* Key format */ - const char *value_format; /* Value format */ - uint8_t bitcnt; /* Fixed-length field size in bits */ + const char *key_format; /* Key format */ + const char *value_format; /* Value format */ + uint8_t bitcnt; /* Fixed-length field size in bits */ - WT_COLLATOR *collator; /* Row-store comparator */ - int collator_owned; /* The collator needs to be freed */ + WT_COLLATOR *collator; /* Row-store comparator */ + int collator_owned; /* The collator needs to be freed */ - uint32_t id; /* File ID, for logging */ + uint32_t id; /* File ID, for logging */ - uint32_t key_gap; /* Row-store prefix key gap */ + uint32_t key_gap; /* Row-store prefix key gap */ - uint32_t allocsize; /* Allocation size */ - uint32_t maxintlpage; /* Internal page max size */ - uint32_t maxintlkey; /* Internal page max key size */ - uint32_t maxleafpage; /* Leaf page max size */ - uint32_t maxleafkey; /* Leaf page max key size */ - uint32_t maxleafvalue; /* Leaf page max value size */ - uint64_t maxmempage; /* In-memory page max size */ - uint32_t maxmempage_image; /* In-memory page image max size */ - uint64_t splitmempage; /* In-memory split trigger size */ + uint32_t allocsize; /* Allocation size */ + uint32_t maxintlpage; /* Internal page max size */ + uint32_t maxintlkey; /* Internal page max key size */ + uint32_t maxleafpage; /* Leaf page max size */ + uint32_t maxleafkey; /* Leaf page max key size */ + uint32_t maxleafvalue; /* Leaf page max value size */ + uint64_t maxmempage; /* In-memory page max size */ + uint32_t maxmempage_image; /* In-memory page image max size */ + uint64_t splitmempage; /* In-memory split trigger size */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_ASSERT_COMMIT_TS_ALWAYS 0x01u -#define WT_ASSERT_COMMIT_TS_KEYS 0x02u -#define WT_ASSERT_COMMIT_TS_NEVER 0x04u -#define WT_ASSERT_DURABLE_TS_ALWAYS 0x08u -#define WT_ASSERT_DURABLE_TS_KEYS 0x10u -#define WT_ASSERT_DURABLE_TS_NEVER 0x20u -#define WT_ASSERT_READ_TS_ALWAYS 0x40u -#define WT_ASSERT_READ_TS_NEVER 0x80u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t assert_flags; /* Debugging assertion information */ - - void *huffman_key; /* Key huffman encoding */ - void *huffman_value; /* Value huffman encoding */ - - enum { CKSUM_ON=1, /* On */ - CKSUM_OFF=2, /* Off */ - CKSUM_UNCOMPRESSED=3 /* Uncompressed blocks only */ - } checksum; /* Checksum configuration */ - - /* - * Reconciliation... - */ - u_int dictionary; /* Dictionary slots */ - bool internal_key_truncate; /* Internal key truncate */ - bool prefix_compression; /* Prefix compression */ - u_int prefix_compression_min; /* Prefix compression min */ - -#define WT_SPLIT_DEEPEN_MIN_CHILD_DEF 10000 - u_int split_deepen_min_child; /* Minimum entries to deepen tree */ -#define WT_SPLIT_DEEPEN_PER_CHILD_DEF 100 - u_int split_deepen_per_child; /* Entries per child when deepened */ - int split_pct; /* Split page percent */ - - WT_COMPRESSOR *compressor; /* Page compressor */ - /* - * When doing compression, the pre-compression in-memory byte size is - * optionally adjusted based on previous compression results. - * It's an 8B value because it's updated without a lock. - */ - bool leafpage_compadjust; /* Run-time compression adjustment */ - uint64_t maxleafpage_precomp; /* Leaf page pre-compression size */ - bool intlpage_compadjust; /* Run-time compression adjustment */ - uint64_t maxintlpage_precomp; /* Internal page pre-compression size */ - - WT_KEYED_ENCRYPTOR *kencryptor; /* Page encryptor */ - - WT_RWLOCK ovfl_lock; /* Overflow lock */ - - int maximum_depth; /* Maximum tree depth during search */ - u_int rec_multiblock_max; /* Maximum blocks written for a page */ - - uint64_t last_recno; /* Column-store last record number */ - - WT_REF root; /* Root page reference */ - bool modified; /* If the tree ever modified */ - uint8_t original; /* Newly created: bulk-load possible - (want a bool but needs atomic cas) */ - - bool lookaside_entries; /* Has entries in the lookaside table */ - bool lsm_primary; /* Handle is/was the LSM primary */ - - WT_BM *bm; /* Block manager reference */ - u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ - - uint64_t write_gen; /* Write generation */ - uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ - wt_timestamp_t rec_max_timestamp; - - uint64_t checkpoint_gen; /* Checkpoint generation */ - WT_SESSION_IMPL *sync_session; /* Syncing session */ - volatile enum { - WT_BTREE_SYNC_OFF, WT_BTREE_SYNC_WAIT, WT_BTREE_SYNC_RUNNING - } syncing; /* Sync status */ - - /* - * Helper macros: - * WT_BTREE_SYNCING indicates if a sync is active (either waiting to - * start or already running), so no new operations should start that - * would conflict with the sync. - * WT_SESSION_BTREE_SYNC indicates if the session is performing a sync - * on its current tree. - * WT_SESSION_BTREE_SYNC_SAFE checks whether it is safe to perform an - * operation that would conflict with a sync. - */ -#define WT_BTREE_SYNCING(btree) \ - ((btree)->syncing != WT_BTREE_SYNC_OFF) -#define WT_SESSION_BTREE_SYNC(session) \ - (S2BT(session)->sync_session == (session)) -#define WT_SESSION_BTREE_SYNC_SAFE(session, btree) \ - ((btree)->syncing != WT_BTREE_SYNC_RUNNING || \ - (btree)->sync_session == (session)) - - uint64_t bytes_inmem; /* Cache bytes in memory. */ - uint64_t bytes_dirty_intl; /* Bytes in dirty internal pages. */ - uint64_t bytes_dirty_leaf; /* Bytes in dirty leaf pages. */ - uint64_t bytes_dirty_total; /* Bytes ever dirtied in cache. */ - - /* - * The maximum bytes allowed to be used for the table on disk. This is - * currently only used for the lookaside table. - */ - uint64_t file_max; - - /* - * We flush pages from the tree (in order to make checkpoint faster), - * without a high-level lock. To avoid multiple threads flushing at - * the same time, lock the tree. - */ - WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */ - - /* - * All of the following fields live at the end of the structure so it's - * easier to clear everything but the fields that persist. - */ -#define WT_BTREE_CLEAR_SIZE (offsetof(WT_BTREE, evict_ref)) - - /* - * Eviction information is maintained in the btree handle, but owned by - * eviction, not the btree code. - */ - WT_REF *evict_ref; /* Eviction thread's location */ - uint64_t evict_priority; /* Relative priority of cached pages */ - uint32_t evict_walk_progress;/* Eviction walk progress */ - uint32_t evict_walk_target; /* Eviction walk target */ - u_int evict_walk_period; /* Skip this many LRU walks */ - u_int evict_walk_saved; /* Saved walk skips for checkpoints */ - u_int evict_walk_skips; /* Number of walks skipped */ - int32_t evict_disabled; /* Eviction disabled count */ - bool evict_disabled_open;/* Eviction disabled on open */ - volatile uint32_t evict_busy; /* Count of threads in eviction */ - enum { /* Start position for eviction walk */ - WT_EVICT_WALK_NEXT, - WT_EVICT_WALK_PREV, - WT_EVICT_WALK_RAND_NEXT, - WT_EVICT_WALK_RAND_PREV - } evict_start_type; - - /* - * Flag values up to 0xff are reserved for WT_DHANDLE_XXX. We don't - * automatically generate these flag values for that reason, there's - * no way to start at an offset. - */ -#define WT_BTREE_ALTER 0x000100u /* Handle is for alter */ -#define WT_BTREE_BULK 0x000200u /* Bulk-load handle */ -#define WT_BTREE_CLOSED 0x000400u /* Handle closed */ -#define WT_BTREE_IGNORE_CACHE 0x000800u /* Cache-resident object */ -#define WT_BTREE_IN_MEMORY 0x001000u /* Cache-resident object */ -#define WT_BTREE_LOOKASIDE 0x002000u /* Look-aside table */ -#define WT_BTREE_NO_CHECKPOINT 0x004000u /* Disable checkpoints */ -#define WT_BTREE_NO_LOGGING 0x008000u /* Disable logging */ -#define WT_BTREE_READONLY 0x010000u /* Handle is readonly */ -#define WT_BTREE_REBALANCE 0x020000u /* Handle is for rebalance */ -#define WT_BTREE_SALVAGE 0x040000u /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x080000u /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x100000u /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x200000u /* Handle is for verify */ - uint32_t flags; +#define WT_ASSERT_COMMIT_TS_ALWAYS 0x01u +#define WT_ASSERT_COMMIT_TS_KEYS 0x02u +#define WT_ASSERT_COMMIT_TS_NEVER 0x04u +#define WT_ASSERT_DURABLE_TS_ALWAYS 0x08u +#define WT_ASSERT_DURABLE_TS_KEYS 0x10u +#define WT_ASSERT_DURABLE_TS_NEVER 0x20u +#define WT_ASSERT_READ_TS_ALWAYS 0x40u +#define WT_ASSERT_READ_TS_NEVER 0x80u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t assert_flags; /* Debugging assertion information */ + + void *huffman_key; /* Key huffman encoding */ + void *huffman_value; /* Value huffman encoding */ + + enum { + CKSUM_ON = 1, /* On */ + CKSUM_OFF = 2, /* Off */ + CKSUM_UNCOMPRESSED = 3 /* Uncompressed blocks only */ + } checksum; /* Checksum configuration */ + + /* + * Reconciliation... + */ + u_int dictionary; /* Dictionary slots */ + bool internal_key_truncate; /* Internal key truncate */ + bool prefix_compression; /* Prefix compression */ + u_int prefix_compression_min; /* Prefix compression min */ + +#define WT_SPLIT_DEEPEN_MIN_CHILD_DEF 10000 + u_int split_deepen_min_child; /* Minimum entries to deepen tree */ +#define WT_SPLIT_DEEPEN_PER_CHILD_DEF 100 + u_int split_deepen_per_child; /* Entries per child when deepened */ + int split_pct; /* Split page percent */ + + WT_COMPRESSOR *compressor; /* Page compressor */ + /* + * When doing compression, the pre-compression in-memory byte size + * is optionally adjusted based on previous compression results. + * It's an 8B value because it's updated without a lock. + */ + bool leafpage_compadjust; /* Run-time compression adjustment */ + uint64_t maxleafpage_precomp; /* Leaf page pre-compression size */ + bool intlpage_compadjust; /* Run-time compression adjustment */ + uint64_t maxintlpage_precomp; /* Internal page pre-compression size */ + + WT_KEYED_ENCRYPTOR *kencryptor; /* Page encryptor */ + + WT_RWLOCK ovfl_lock; /* Overflow lock */ + + int maximum_depth; /* Maximum tree depth during search */ + u_int rec_multiblock_max; /* Maximum blocks written for a page */ + + uint64_t last_recno; /* Column-store last record number */ + + WT_REF root; /* Root page reference */ + bool modified; /* If the tree ever modified */ + uint8_t original; /* Newly created: bulk-load possible + (want a bool but needs atomic cas) */ + + bool lookaside_entries; /* Has entries in the lookaside table */ + bool lsm_primary; /* Handle is/was the LSM primary */ + + WT_BM *bm; /* Block manager reference */ + u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ + + uint64_t write_gen; /* Write generation */ + uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ + wt_timestamp_t rec_max_timestamp; + + uint64_t checkpoint_gen; /* Checkpoint generation */ + WT_SESSION_IMPL *sync_session; /* Syncing session */ + volatile enum { + WT_BTREE_SYNC_OFF, + WT_BTREE_SYNC_WAIT, + WT_BTREE_SYNC_RUNNING + } syncing; /* Sync status */ + +/* + * Helper macros: WT_BTREE_SYNCING indicates if a sync is active (either waiting to start or already + * running), so no new operations should start that would conflict with the sync. + * WT_SESSION_BTREE_SYNC indicates if the session is performing a sync on its current tree. + * WT_SESSION_BTREE_SYNC_SAFE checks whether it is safe to perform an operation that would conflict + * with a sync. + */ +#define WT_BTREE_SYNCING(btree) ((btree)->syncing != WT_BTREE_SYNC_OFF) +#define WT_SESSION_BTREE_SYNC(session) (S2BT(session)->sync_session == (session)) +#define WT_SESSION_BTREE_SYNC_SAFE(session, btree) \ + ((btree)->syncing != WT_BTREE_SYNC_RUNNING || (btree)->sync_session == (session)) + + uint64_t bytes_inmem; /* Cache bytes in memory. */ + uint64_t bytes_dirty_intl; /* Bytes in dirty internal pages. */ + uint64_t bytes_dirty_leaf; /* Bytes in dirty leaf pages. */ + uint64_t bytes_dirty_total; /* Bytes ever dirtied in cache. */ + + /* + * The maximum bytes allowed to be used for the table on disk. This is currently only used for + * the lookaside table. + */ + uint64_t file_max; + + /* + * We flush pages from the tree (in order to make checkpoint faster), without a high-level lock. + * To avoid multiple threads flushing at the same time, lock the tree. + */ + WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */ + +/* + * All of the following fields live at the end of the structure so it's easier to clear everything + * but the fields that persist. + */ +#define WT_BTREE_CLEAR_SIZE (offsetof(WT_BTREE, evict_ref)) + + /* + * Eviction information is maintained in the btree handle, but owned by eviction, not the btree + * code. + */ + WT_REF *evict_ref; /* Eviction thread's location */ + uint64_t evict_priority; /* Relative priority of cached pages */ + uint32_t evict_walk_progress; /* Eviction walk progress */ + uint32_t evict_walk_target; /* Eviction walk target */ + u_int evict_walk_period; /* Skip this many LRU walks */ + u_int evict_walk_saved; /* Saved walk skips for checkpoints */ + u_int evict_walk_skips; /* Number of walks skipped */ + int32_t evict_disabled; /* Eviction disabled count */ + bool evict_disabled_open; /* Eviction disabled on open */ + volatile uint32_t evict_busy; /* Count of threads in eviction */ + enum { /* Start position for eviction walk */ + WT_EVICT_WALK_NEXT, + WT_EVICT_WALK_PREV, + WT_EVICT_WALK_RAND_NEXT, + WT_EVICT_WALK_RAND_PREV + } evict_start_type; + +/* + * Flag values up to 0xff are reserved for WT_DHANDLE_XXX. We don't automatically generate these + * flag values for that reason, there's no way to start at an offset. + */ +#define WT_BTREE_ALTER 0x000100u /* Handle is for alter */ +#define WT_BTREE_BULK 0x000200u /* Bulk-load handle */ +#define WT_BTREE_CLOSED 0x000400u /* Handle closed */ +#define WT_BTREE_IGNORE_CACHE 0x000800u /* Cache-resident object */ +#define WT_BTREE_IN_MEMORY 0x001000u /* Cache-resident object */ +#define WT_BTREE_LOOKASIDE 0x002000u /* Look-aside table */ +#define WT_BTREE_NO_CHECKPOINT 0x004000u /* Disable checkpoints */ +#define WT_BTREE_NO_LOGGING 0x008000u /* Disable logging */ +#define WT_BTREE_READONLY 0x010000u /* Handle is readonly */ +#define WT_BTREE_REBALANCE 0x020000u /* Handle is for rebalance */ +#define WT_BTREE_SALVAGE 0x040000u /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x080000u /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x100000u /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x200000u /* Handle is for verify */ + uint32_t flags; }; /* Flags that make a btree handle special (not for normal use). */ -#define WT_BTREE_SPECIAL_FLAGS \ - (WT_BTREE_ALTER | WT_BTREE_BULK | WT_BTREE_REBALANCE | \ - WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY) +#define WT_BTREE_SPECIAL_FLAGS \ + (WT_BTREE_ALTER | WT_BTREE_BULK | WT_BTREE_REBALANCE | WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | \ + WT_BTREE_VERIFY) /* * WT_SALVAGE_COOKIE -- * Encapsulation of salvage information for reconciliation. */ struct __wt_salvage_cookie { - uint64_t missing; /* Initial items to create */ - uint64_t skip; /* Initial items to skip */ - uint64_t take; /* Items to take */ + uint64_t missing; /* Initial items to create */ + uint64_t skip; /* Initial items to skip */ + uint64_t take; /* Items to take */ - bool done; /* Ignore the rest */ + bool done; /* Ignore the rest */ }; diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 3fa5d60f1f1..3f80ee5cda7 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -8,1769 +8,1674 @@ /* * __wt_ref_is_root -- - * Return if the page reference is for the root page. + * Return if the page reference is for the root page. */ static inline bool __wt_ref_is_root(WT_REF *ref) { - return (ref->home == NULL); + return (ref->home == NULL); } /* * __wt_page_is_empty -- - * Return if the page is empty. + * Return if the page is empty. */ static inline bool __wt_page_is_empty(WT_PAGE *page) { - return (page->modify != NULL && - page->modify->rec_result == WT_PM_REC_EMPTY); + return (page->modify != NULL && page->modify->rec_result == WT_PM_REC_EMPTY); } /* * __wt_page_evict_clean -- - * Return if the page can be evicted without dirtying the tree. + * Return if the page can be evicted without dirtying the tree. */ static inline bool __wt_page_evict_clean(WT_PAGE *page) { - return (page->modify == NULL || - (page->modify->page_state == WT_PAGE_CLEAN && - page->modify->rec_result == 0)); + return (page->modify == NULL || + (page->modify->page_state == WT_PAGE_CLEAN && page->modify->rec_result == 0)); } /* * __wt_page_is_modified -- - * Return if the page is dirty. + * Return if the page is dirty. */ static inline bool __wt_page_is_modified(WT_PAGE *page) { - return (page->modify != NULL && - page->modify->page_state != WT_PAGE_CLEAN); + return (page->modify != NULL && page->modify->page_state != WT_PAGE_CLEAN); } /* * __wt_btree_block_free -- - * Helper function to free a block from the current tree. + * Helper function to free a block from the current tree. */ static inline int -__wt_btree_block_free( - WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) +__wt_btree_block_free(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) { - WT_BM *bm; - WT_BTREE *btree; + WT_BM *bm; + WT_BTREE *btree; - btree = S2BT(session); - bm = btree->bm; + btree = S2BT(session); + bm = btree->bm; - return (bm->free(bm, session, addr, addr_size)); + return (bm->free(bm, session, addr, addr_size)); } /* * __wt_btree_bytes_inuse -- - * Return the number of bytes in use. + * Return the number of bytes in use. */ static inline uint64_t __wt_btree_bytes_inuse(WT_SESSION_IMPL *session) { - WT_BTREE *btree; - WT_CACHE *cache; + WT_BTREE *btree; + WT_CACHE *cache; - btree = S2BT(session); - cache = S2C(session)->cache; + btree = S2BT(session); + cache = S2C(session)->cache; - return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_inmem)); + return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_inmem)); } /* * __wt_btree_bytes_evictable -- - * Return the number of bytes that can be evicted (i.e. bytes apart from - * the pinned root page). + * Return the number of bytes that can be evicted (i.e. bytes apart from the pinned root page). */ static inline uint64_t __wt_btree_bytes_evictable(WT_SESSION_IMPL *session) { - WT_BTREE *btree; - WT_CACHE *cache; - WT_PAGE *root_page; - uint64_t bytes_inmem, bytes_root; + WT_BTREE *btree; + WT_CACHE *cache; + WT_PAGE *root_page; + uint64_t bytes_inmem, bytes_root; - btree = S2BT(session); - cache = S2C(session)->cache; - root_page = btree->root.page; + btree = S2BT(session); + cache = S2C(session)->cache; + root_page = btree->root.page; - bytes_inmem = btree->bytes_inmem; - bytes_root = root_page == NULL ? 0 : root_page->memory_footprint; + bytes_inmem = btree->bytes_inmem; + bytes_root = root_page == NULL ? 0 : root_page->memory_footprint; - return (bytes_inmem <= bytes_root ? 0 : - __wt_cache_bytes_plus_overhead(cache, bytes_inmem - bytes_root)); + return (bytes_inmem <= bytes_root ? 0 : __wt_cache_bytes_plus_overhead( + cache, bytes_inmem - bytes_root)); } /* * __wt_btree_dirty_inuse -- - * Return the number of dirty bytes in use. + * Return the number of dirty bytes in use. */ static inline uint64_t __wt_btree_dirty_inuse(WT_SESSION_IMPL *session) { - WT_BTREE *btree; - WT_CACHE *cache; + WT_BTREE *btree; + WT_CACHE *cache; - btree = S2BT(session); - cache = S2C(session)->cache; + btree = S2BT(session); + cache = S2C(session)->cache; - return (__wt_cache_bytes_plus_overhead(cache, - btree->bytes_dirty_intl + btree->bytes_dirty_leaf)); + return ( + __wt_cache_bytes_plus_overhead(cache, btree->bytes_dirty_intl + btree->bytes_dirty_leaf)); } /* * __wt_btree_dirty_leaf_inuse -- - * Return the number of bytes in use by dirty leaf pages. + * Return the number of bytes in use by dirty leaf pages. */ static inline uint64_t __wt_btree_dirty_leaf_inuse(WT_SESSION_IMPL *session) { - WT_BTREE *btree; - WT_CACHE *cache; + WT_BTREE *btree; + WT_CACHE *cache; - btree = S2BT(session); - cache = S2C(session)->cache; + btree = S2BT(session); + cache = S2C(session)->cache; - return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_dirty_leaf)); + return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_dirty_leaf)); } /* * __wt_cache_page_inmem_incr -- - * Increment a page's memory footprint in the cache. + * Increment a page's memory footprint in the cache. */ static inline void __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) { - WT_BTREE *btree; - WT_CACHE *cache; - - WT_ASSERT(session, size < WT_EXABYTE); - btree = S2BT(session); - cache = S2C(session)->cache; - - (void)__wt_atomic_add64(&btree->bytes_inmem, size); - (void)__wt_atomic_add64(&cache->bytes_inmem, size); - (void)__wt_atomic_addsize(&page->memory_footprint, size); - if (__wt_page_is_modified(page)) { - (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); - if (WT_PAGE_IS_INTERNAL(page)) { - (void)__wt_atomic_add64(&btree->bytes_dirty_intl, size); - (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); - } else if (!btree->lsm_primary) { - (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); - (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); - } - } - /* Track internal size in cache. */ - if (WT_PAGE_IS_INTERNAL(page)) - (void)__wt_atomic_add64(&cache->bytes_internal, size); + WT_BTREE *btree; + WT_CACHE *cache; + + WT_ASSERT(session, size < WT_EXABYTE); + btree = S2BT(session); + cache = S2C(session)->cache; + + (void)__wt_atomic_add64(&btree->bytes_inmem, size); + (void)__wt_atomic_add64(&cache->bytes_inmem, size); + (void)__wt_atomic_addsize(&page->memory_footprint, size); + if (__wt_page_is_modified(page)) { + (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); + if (WT_PAGE_IS_INTERNAL(page)) { + (void)__wt_atomic_add64(&btree->bytes_dirty_intl, size); + (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); + } else if (!btree->lsm_primary) { + (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); + (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); + } + } + /* Track internal size in cache. */ + if (WT_PAGE_IS_INTERNAL(page)) + (void)__wt_atomic_add64(&cache->bytes_internal, size); } /* * __wt_cache_decr_check_size -- - * Decrement a size_t cache value and check for underflow. + * Decrement a size_t cache value and check for underflow. */ static inline void -__wt_cache_decr_check_size( - WT_SESSION_IMPL *session, size_t *vp, size_t v, const char *fld) +__wt_cache_decr_check_size(WT_SESSION_IMPL *session, size_t *vp, size_t v, const char *fld) { - if (v == 0 || __wt_atomic_subsize(vp, v) < WT_EXABYTE) - return; + if (v == 0 || __wt_atomic_subsize(vp, v) < WT_EXABYTE) + return; - /* - * It's a bug if this accounting underflowed but allow the application - * to proceed - the consequence is we use more cache than configured. - */ - *vp = 0; - __wt_errx(session, - "%s went negative with decrement of %" WT_SIZET_FMT, fld, v); + /* + * It's a bug if this accounting underflowed but allow the application to proceed - the + * consequence is we use more cache than configured. + */ + *vp = 0; + __wt_errx(session, "%s went negative with decrement of %" WT_SIZET_FMT, fld, v); #ifdef HAVE_DIAGNOSTIC - __wt_abort(session); + __wt_abort(session); #endif } /* * __wt_cache_decr_check_uint64 -- - * Decrement a uint64_t cache value and check for underflow. + * Decrement a uint64_t cache value and check for underflow. */ static inline void -__wt_cache_decr_check_uint64( - WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld) +__wt_cache_decr_check_uint64(WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld) { - uint64_t orig = *vp; + uint64_t orig = *vp; - if (v == 0 || __wt_atomic_sub64(vp, v) < WT_EXABYTE) - return; + if (v == 0 || __wt_atomic_sub64(vp, v) < WT_EXABYTE) + return; - /* - * It's a bug if this accounting underflowed but allow the application - * to proceed - the consequence is we use more cache than configured. - */ - *vp = 0; - __wt_errx(session, - "%s was %" PRIu64 ", went negative with decrement of %" PRIu64, fld, - orig, v); + /* + * It's a bug if this accounting underflowed but allow the application to proceed - the + * consequence is we use more cache than configured. + */ + *vp = 0; + __wt_errx( + session, "%s was %" PRIu64 ", went negative with decrement of %" PRIu64, fld, orig, v); #ifdef HAVE_DIAGNOSTIC - __wt_abort(session); + __wt_abort(session); #endif } /* * __wt_cache_page_byte_dirty_decr -- - * Decrement the page's dirty byte count, guarding from underflow. + * Decrement the page's dirty byte count, guarding from underflow. */ static inline void -__wt_cache_page_byte_dirty_decr( - WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) +__wt_cache_page_byte_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) { - WT_BTREE *btree; - WT_CACHE *cache; - size_t decr, orig; - int i; - - btree = S2BT(session); - cache = S2C(session)->cache; - decr = 0; /* [-Wconditional-uninitialized] */ - - /* - * We don't have exclusive access and there are ways of decrementing the - * page's dirty byte count by a too-large value. For example: - * T1: __wt_cache_page_inmem_incr(page, size) - * page is clean, don't increment dirty byte count - * T2: mark page dirty - * T1: __wt_cache_page_inmem_decr(page, size) - * page is dirty, decrement dirty byte count - * and, of course, the reverse where the page is dirty at the increment - * and clean at the decrement. - * - * The page's dirty-byte value always reflects bytes represented in the - * cache's dirty-byte count, decrement the page/cache as much as we can - * without underflow. If we can't decrement the dirty byte counts after - * few tries, give up: the cache's value will be wrong, but consistent, - * and we'll fix it the next time this page is marked clean, or evicted. - */ - for (i = 0; i < 5; ++i) { - /* - * Take care to read the dirty-byte count only once in case - * we're racing with updates. - */ - WT_ORDERED_READ(orig, page->modify->bytes_dirty); - decr = WT_MIN(size, orig); - if (__wt_atomic_cassize( - &page->modify->bytes_dirty, orig, orig - decr)) - break; - } - - if (i == 5) - return; - - if (WT_PAGE_IS_INTERNAL(page)) { - __wt_cache_decr_check_uint64(session, &btree->bytes_dirty_intl, - decr, "WT_BTREE.bytes_dirty_intl"); - __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_intl, - decr, "WT_CACHE.bytes_dirty_intl"); - } else if (!btree->lsm_primary) { - __wt_cache_decr_check_uint64(session, &btree->bytes_dirty_leaf, - decr, "WT_BTREE.bytes_dirty_leaf"); - __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_leaf, - decr, "WT_CACHE.bytes_dirty_leaf"); - } + WT_BTREE *btree; + WT_CACHE *cache; + size_t decr, orig; + int i; + + btree = S2BT(session); + cache = S2C(session)->cache; + decr = 0; /* [-Wconditional-uninitialized] */ + + /* + * We don't have exclusive access and there are ways of decrementing the + * page's dirty byte count by a too-large value. For example: + * T1: __wt_cache_page_inmem_incr(page, size) + * page is clean, don't increment dirty byte count + * T2: mark page dirty + * T1: __wt_cache_page_inmem_decr(page, size) + * page is dirty, decrement dirty byte count + * and, of course, the reverse where the page is dirty at the increment + * and clean at the decrement. + * + * The page's dirty-byte value always reflects bytes represented in the + * cache's dirty-byte count, decrement the page/cache as much as we can + * without underflow. If we can't decrement the dirty byte counts after + * few tries, give up: the cache's value will be wrong, but consistent, + * and we'll fix it the next time this page is marked clean, or evicted. + */ + for (i = 0; i < 5; ++i) { + /* + * Take care to read the dirty-byte count only once in case we're racing with updates. + */ + WT_ORDERED_READ(orig, page->modify->bytes_dirty); + decr = WT_MIN(size, orig); + if (__wt_atomic_cassize(&page->modify->bytes_dirty, orig, orig - decr)) + break; + } + + if (i == 5) + return; + + if (WT_PAGE_IS_INTERNAL(page)) { + __wt_cache_decr_check_uint64( + session, &btree->bytes_dirty_intl, decr, "WT_BTREE.bytes_dirty_intl"); + __wt_cache_decr_check_uint64( + session, &cache->bytes_dirty_intl, decr, "WT_CACHE.bytes_dirty_intl"); + } else if (!btree->lsm_primary) { + __wt_cache_decr_check_uint64( + session, &btree->bytes_dirty_leaf, decr, "WT_BTREE.bytes_dirty_leaf"); + __wt_cache_decr_check_uint64( + session, &cache->bytes_dirty_leaf, decr, "WT_CACHE.bytes_dirty_leaf"); + } } /* * __wt_cache_page_inmem_decr -- - * Decrement a page's memory footprint in the cache. + * Decrement a page's memory footprint in the cache. */ static inline void __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) { - WT_CACHE *cache; - - cache = S2C(session)->cache; - - WT_ASSERT(session, size < WT_EXABYTE); - - __wt_cache_decr_check_uint64( - session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem"); - __wt_cache_decr_check_uint64( - session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem"); - __wt_cache_decr_check_size( - session, &page->memory_footprint, size, "WT_PAGE.memory_footprint"); - if (__wt_page_is_modified(page)) - __wt_cache_page_byte_dirty_decr(session, page, size); - /* Track internal size in cache. */ - if (WT_PAGE_IS_INTERNAL(page)) - __wt_cache_decr_check_uint64(session, - &cache->bytes_internal, size, "WT_CACHE.bytes_internal"); + WT_CACHE *cache; + + cache = S2C(session)->cache; + + WT_ASSERT(session, size < WT_EXABYTE); + + __wt_cache_decr_check_uint64( + session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem"); + __wt_cache_decr_check_uint64(session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem"); + __wt_cache_decr_check_size(session, &page->memory_footprint, size, "WT_PAGE.memory_footprint"); + if (__wt_page_is_modified(page)) + __wt_cache_page_byte_dirty_decr(session, page, size); + /* Track internal size in cache. */ + if (WT_PAGE_IS_INTERNAL(page)) + __wt_cache_decr_check_uint64( + session, &cache->bytes_internal, size, "WT_CACHE.bytes_internal"); } /* * __wt_cache_dirty_incr -- - * Page switch from clean to dirty: increment the cache dirty page/byte - * counts. + * Page switch from clean to dirty: increment the cache dirty page/byte counts. */ static inline void __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page) { - WT_BTREE *btree; - WT_CACHE *cache; - size_t size; - - btree = S2BT(session); - cache = S2C(session)->cache; - - /* - * Take care to read the memory_footprint once in case we are racing - * with updates. - */ - size = page->memory_footprint; - if (WT_PAGE_IS_INTERNAL(page)) { - (void)__wt_atomic_add64(&btree->bytes_dirty_intl, size); - (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); - (void)__wt_atomic_add64(&cache->pages_dirty_intl, 1); - } else { - if (!btree->lsm_primary) { - (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); - (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); - } - (void)__wt_atomic_add64(&cache->pages_dirty_leaf, 1); - } - (void)__wt_atomic_add64(&btree->bytes_dirty_total, size); - (void)__wt_atomic_add64(&cache->bytes_dirty_total, size); - (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); + WT_BTREE *btree; + WT_CACHE *cache; + size_t size; + + btree = S2BT(session); + cache = S2C(session)->cache; + + /* + * Take care to read the memory_footprint once in case we are racing with updates. + */ + size = page->memory_footprint; + if (WT_PAGE_IS_INTERNAL(page)) { + (void)__wt_atomic_add64(&btree->bytes_dirty_intl, size); + (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); + (void)__wt_atomic_add64(&cache->pages_dirty_intl, 1); + } else { + if (!btree->lsm_primary) { + (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); + (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); + } + (void)__wt_atomic_add64(&cache->pages_dirty_leaf, 1); + } + (void)__wt_atomic_add64(&btree->bytes_dirty_total, size); + (void)__wt_atomic_add64(&cache->bytes_dirty_total, size); + (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); } /* * __wt_cache_dirty_decr -- - * Page switch from dirty to clean: decrement the cache dirty page/byte - * counts. + * Page switch from dirty to clean: decrement the cache dirty page/byte counts. */ static inline void __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page) { - WT_CACHE *cache; - WT_PAGE_MODIFY *modify; - - cache = S2C(session)->cache; - - if (WT_PAGE_IS_INTERNAL(page)) - __wt_cache_decr_check_uint64(session, - &cache->pages_dirty_intl, 1, "dirty internal page count"); - else - __wt_cache_decr_check_uint64(session, - &cache->pages_dirty_leaf, 1, "dirty leaf page count"); - - modify = page->modify; - if (modify != NULL && modify->bytes_dirty != 0) - __wt_cache_page_byte_dirty_decr( - session, page, modify->bytes_dirty); + WT_CACHE *cache; + WT_PAGE_MODIFY *modify; + + cache = S2C(session)->cache; + + if (WT_PAGE_IS_INTERNAL(page)) + __wt_cache_decr_check_uint64( + session, &cache->pages_dirty_intl, 1, "dirty internal page count"); + else + __wt_cache_decr_check_uint64(session, &cache->pages_dirty_leaf, 1, "dirty leaf page count"); + + modify = page->modify; + if (modify != NULL && modify->bytes_dirty != 0) + __wt_cache_page_byte_dirty_decr(session, page, modify->bytes_dirty); } /* * __wt_cache_page_image_decr -- - * Decrement a page image's size to the cache. + * Decrement a page image's size to the cache. */ static inline void __wt_cache_page_image_decr(WT_SESSION_IMPL *session, uint32_t size) { - WT_CACHE *cache; + WT_CACHE *cache; - cache = S2C(session)->cache; + cache = S2C(session)->cache; - __wt_cache_decr_check_uint64( - session, &cache->bytes_image, size, "WT_CACHE.image_inmem"); + __wt_cache_decr_check_uint64(session, &cache->bytes_image, size, "WT_CACHE.image_inmem"); } /* * __wt_cache_page_image_incr -- - * Increment a page image's size to the cache. + * Increment a page image's size to the cache. */ static inline void __wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size) { - WT_CACHE *cache; + WT_CACHE *cache; - cache = S2C(session)->cache; - (void)__wt_atomic_add64(&cache->bytes_image, size); + cache = S2C(session)->cache; + (void)__wt_atomic_add64(&cache->bytes_image, size); } /* * __wt_cache_page_evict -- - * Evict pages from the cache. + * Evict pages from the cache. */ static inline void __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) { - WT_BTREE *btree; - WT_CACHE *cache; - WT_PAGE_MODIFY *modify; - - btree = S2BT(session); - cache = S2C(session)->cache; - modify = page->modify; - - /* Update the bytes in-memory to reflect the eviction. */ - __wt_cache_decr_check_uint64(session, &btree->bytes_inmem, - page->memory_footprint, "WT_BTREE.bytes_inmem"); - __wt_cache_decr_check_uint64(session, &cache->bytes_inmem, - page->memory_footprint, "WT_CACHE.bytes_inmem"); - - /* Update the bytes_internal value to reflect the eviction */ - if (WT_PAGE_IS_INTERNAL(page)) - __wt_cache_decr_check_uint64(session, - &cache->bytes_internal, - page->memory_footprint, "WT_CACHE.bytes_internal"); - - /* Update the cache's dirty-byte count. */ - if (modify != NULL && modify->bytes_dirty != 0) { - if (WT_PAGE_IS_INTERNAL(page)) { - __wt_cache_decr_check_uint64(session, - &btree->bytes_dirty_intl, - modify->bytes_dirty, "WT_BTREE.bytes_dirty_intl"); - __wt_cache_decr_check_uint64(session, - &cache->bytes_dirty_intl, - modify->bytes_dirty, "WT_CACHE.bytes_dirty_intl"); - } else if (!btree->lsm_primary) { - __wt_cache_decr_check_uint64(session, - &btree->bytes_dirty_leaf, - modify->bytes_dirty, "WT_BTREE.bytes_dirty_leaf"); - __wt_cache_decr_check_uint64(session, - &cache->bytes_dirty_leaf, - modify->bytes_dirty, "WT_CACHE.bytes_dirty_leaf"); - } - } - - /* Update bytes and pages evicted. */ - (void)__wt_atomic_add64(&cache->bytes_evict, page->memory_footprint); - (void)__wt_atomic_addv64(&cache->pages_evicted, 1); - - /* - * Track if eviction makes progress. This is used in various places to - * determine whether eviction is stuck. - */ - if (!F_ISSET_ATOMIC(page, WT_PAGE_EVICT_NO_PROGRESS)) - (void)__wt_atomic_addv64(&cache->eviction_progress, 1); + WT_BTREE *btree; + WT_CACHE *cache; + WT_PAGE_MODIFY *modify; + + btree = S2BT(session); + cache = S2C(session)->cache; + modify = page->modify; + + /* Update the bytes in-memory to reflect the eviction. */ + __wt_cache_decr_check_uint64( + session, &btree->bytes_inmem, page->memory_footprint, "WT_BTREE.bytes_inmem"); + __wt_cache_decr_check_uint64( + session, &cache->bytes_inmem, page->memory_footprint, "WT_CACHE.bytes_inmem"); + + /* Update the bytes_internal value to reflect the eviction */ + if (WT_PAGE_IS_INTERNAL(page)) + __wt_cache_decr_check_uint64( + session, &cache->bytes_internal, page->memory_footprint, "WT_CACHE.bytes_internal"); + + /* Update the cache's dirty-byte count. */ + if (modify != NULL && modify->bytes_dirty != 0) { + if (WT_PAGE_IS_INTERNAL(page)) { + __wt_cache_decr_check_uint64( + session, &btree->bytes_dirty_intl, modify->bytes_dirty, "WT_BTREE.bytes_dirty_intl"); + __wt_cache_decr_check_uint64( + session, &cache->bytes_dirty_intl, modify->bytes_dirty, "WT_CACHE.bytes_dirty_intl"); + } else if (!btree->lsm_primary) { + __wt_cache_decr_check_uint64( + session, &btree->bytes_dirty_leaf, modify->bytes_dirty, "WT_BTREE.bytes_dirty_leaf"); + __wt_cache_decr_check_uint64( + session, &cache->bytes_dirty_leaf, modify->bytes_dirty, "WT_CACHE.bytes_dirty_leaf"); + } + } + + /* Update bytes and pages evicted. */ + (void)__wt_atomic_add64(&cache->bytes_evict, page->memory_footprint); + (void)__wt_atomic_addv64(&cache->pages_evicted, 1); + + /* + * Track if eviction makes progress. This is used in various places to determine whether + * eviction is stuck. + */ + if (!F_ISSET_ATOMIC(page, WT_PAGE_EVICT_NO_PROGRESS)) + (void)__wt_atomic_addv64(&cache->eviction_progress, 1); } /* * __wt_update_list_memsize -- - * The size in memory of a list of updates. + * The size in memory of a list of updates. */ static inline size_t __wt_update_list_memsize(WT_UPDATE *upd) { - size_t upd_size; + size_t upd_size; - for (upd_size = 0; upd != NULL; upd = upd->next) - upd_size += WT_UPDATE_MEMSIZE(upd); + for (upd_size = 0; upd != NULL; upd = upd->next) + upd_size += WT_UPDATE_MEMSIZE(upd); - return (upd_size); + return (upd_size); } /* * __wt_page_modify_init -- - * A page is about to be modified, allocate the modification structure. + * A page is about to be modified, allocate the modification structure. */ static inline int __wt_page_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page) { - return (page->modify == NULL ? - __wt_page_modify_alloc(session, page) : 0); + return (page->modify == NULL ? __wt_page_modify_alloc(session, page) : 0); } /* * __wt_page_only_modify_set -- - * Mark the page (but only the page) dirty. + * Mark the page (but only the page) dirty. */ static inline void __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page) { - uint64_t last_running; - - WT_ASSERT(session, !F_ISSET(session->dhandle, WT_DHANDLE_DEAD)); - - last_running = 0; - if (page->modify->page_state == WT_PAGE_CLEAN) - last_running = S2C(session)->txn_global.last_running; - - /* - * We depend on the atomic operation being a write barrier, that is, a - * barrier to ensure all changes to the page are flushed before updating - * the page state and/or marking the tree dirty, otherwise checkpoints - * and/or page reconciliation might be looking at a clean page/tree. - * - * Every time the page transitions from clean to dirty, update the cache - * and transactional information. - * - * The page state can only ever be incremented above dirty by the number - * of concurrently running threads, so the counter will never approach - * the point where it would wrap. - */ - if (page->modify->page_state < WT_PAGE_DIRTY && - __wt_atomic_add32(&page->modify->page_state, 1) == - WT_PAGE_DIRTY_FIRST) { - __wt_cache_dirty_incr(session, page); - - /* - * We won the race to dirty the page, but another thread could - * have committed in the meantime, and the last_running field - * been updated past it. That is all very unlikely, but not - * impossible, so we take care to read the global state before - * the atomic increment. - * - * If the page was dirty on entry, then last_running == 0. The - * page could have become clean since then, if reconciliation - * completed. In that case, we leave the previous value for - * first_dirty_txn rather than potentially racing to update it, - * at worst, we'll unnecessarily write a page in a checkpoint. - */ - if (last_running != 0) - page->modify->first_dirty_txn = last_running; - } - - /* Check if this is the largest transaction ID to update the page. */ - if (WT_TXNID_LT(page->modify->update_txn, session->txn.id)) - page->modify->update_txn = session->txn.id; + uint64_t last_running; + + WT_ASSERT(session, !F_ISSET(session->dhandle, WT_DHANDLE_DEAD)); + + last_running = 0; + if (page->modify->page_state == WT_PAGE_CLEAN) + last_running = S2C(session)->txn_global.last_running; + + /* + * We depend on the atomic operation being a write barrier, that is, a + * barrier to ensure all changes to the page are flushed before updating + * the page state and/or marking the tree dirty, otherwise checkpoints + * and/or page reconciliation might be looking at a clean page/tree. + * + * Every time the page transitions from clean to dirty, update the cache + * and transactional information. + * + * The page state can only ever be incremented above dirty by the number + * of concurrently running threads, so the counter will never approach + * the point where it would wrap. + */ + if (page->modify->page_state < WT_PAGE_DIRTY && + __wt_atomic_add32(&page->modify->page_state, 1) == WT_PAGE_DIRTY_FIRST) { + __wt_cache_dirty_incr(session, page); + + /* + * We won the race to dirty the page, but another thread could + * have committed in the meantime, and the last_running field + * been updated past it. That is all very unlikely, but not + * impossible, so we take care to read the global state before + * the atomic increment. + * + * If the page was dirty on entry, then last_running == 0. The + * page could have become clean since then, if reconciliation + * completed. In that case, we leave the previous value for + * first_dirty_txn rather than potentially racing to update it, + * at worst, we'll unnecessarily write a page in a checkpoint. + */ + if (last_running != 0) + page->modify->first_dirty_txn = last_running; + } + + /* Check if this is the largest transaction ID to update the page. */ + if (WT_TXNID_LT(page->modify->update_txn, session->txn.id)) + page->modify->update_txn = session->txn.id; } /* * __wt_tree_modify_set -- - * Mark the tree dirty. + * Mark the tree dirty. */ static inline void __wt_tree_modify_set(WT_SESSION_IMPL *session) { - /* - * Test before setting the dirty flag, it's a hot cache line. - * - * The tree's modified flag is cleared by the checkpoint thread: set it - * and insert a barrier before dirtying the page. (I don't think it's - * a problem if the tree is marked dirty with all the pages clean, it - * might result in an extra checkpoint that doesn't do any work but it - * shouldn't cause problems; regardless, let's play it safe.) - */ - if (!S2BT(session)->modified) { - /* Assert we never dirty a checkpoint handle. */ - WT_ASSERT(session, session->dhandle->checkpoint == NULL); - - S2BT(session)->modified = true; - WT_FULL_BARRIER(); - } - - /* - * The btree may already be marked dirty while the connection is still - * clean; mark the connection dirty outside the test of the btree state. - */ - if (!S2C(session)->modified) - S2C(session)->modified = true; + /* + * Test before setting the dirty flag, it's a hot cache line. + * + * The tree's modified flag is cleared by the checkpoint thread: set it + * and insert a barrier before dirtying the page. (I don't think it's + * a problem if the tree is marked dirty with all the pages clean, it + * might result in an extra checkpoint that doesn't do any work but it + * shouldn't cause problems; regardless, let's play it safe.) + */ + if (!S2BT(session)->modified) { + /* Assert we never dirty a checkpoint handle. */ + WT_ASSERT(session, session->dhandle->checkpoint == NULL); + + S2BT(session)->modified = true; + WT_FULL_BARRIER(); + } + + /* + * The btree may already be marked dirty while the connection is still clean; mark the + * connection dirty outside the test of the btree state. + */ + if (!S2C(session)->modified) + S2C(session)->modified = true; } /* * __wt_page_modify_clear -- - * Clean a modified page. + * Clean a modified page. */ static inline void __wt_page_modify_clear(WT_SESSION_IMPL *session, WT_PAGE *page) { - /* - * The page must be held exclusive when this call is made, this call - * can only be used when the page is owned by a single thread. - * - * Allow the call to be made on clean pages. - */ - if (__wt_page_is_modified(page)) { - /* - * The only part where ordering matters is during - * reconciliation where updates on other threads are performing - * writes to the page state that need to be visible to the - * reconciliation thread. - * - * Since clearing of the page state is not going to be happening - * during reconciliation on a separate thread, there's no write - * barrier needed here. - */ - page->modify->page_state = WT_PAGE_CLEAN; - __wt_cache_dirty_decr(session, page); - } + /* + * The page must be held exclusive when this call is made, this call + * can only be used when the page is owned by a single thread. + * + * Allow the call to be made on clean pages. + */ + if (__wt_page_is_modified(page)) { + /* + * The only part where ordering matters is during + * reconciliation where updates on other threads are performing + * writes to the page state that need to be visible to the + * reconciliation thread. + * + * Since clearing of the page state is not going to be happening + * during reconciliation on a separate thread, there's no write + * barrier needed here. + */ + page->modify->page_state = WT_PAGE_CLEAN; + __wt_cache_dirty_decr(session, page); + } } /* * __wt_page_modify_set -- - * Mark the page and tree dirty. + * Mark the page and tree dirty. */ static inline void __wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page) { - /* - * Mark the tree dirty (even if the page is already marked dirty), newly - * created pages to support "empty" files are dirty, but the file isn't - * marked dirty until there's a real change needing to be written. - */ - __wt_tree_modify_set(session); - - __wt_page_only_modify_set(session, page); + /* + * Mark the tree dirty (even if the page is already marked dirty), newly created pages to + * support "empty" files are dirty, but the file isn't marked dirty until there's a real change + * needing to be written. + */ + __wt_tree_modify_set(session); + + __wt_page_only_modify_set(session, page); } /* * __wt_page_parent_modify_set -- - * Mark the parent page, and optionally the tree, dirty. + * Mark the parent page, and optionally the tree, dirty. */ static inline int -__wt_page_parent_modify_set( - WT_SESSION_IMPL *session, WT_REF *ref, bool page_only) +__wt_page_parent_modify_set(WT_SESSION_IMPL *session, WT_REF *ref, bool page_only) { - WT_PAGE *parent; - - /* - * This function exists as a place to stash this comment. There are a - * few places where we need to dirty a page's parent. The trick is the - * page's parent might split at any point, and the page parent might be - * the wrong parent at any particular time. We ignore this and dirty - * whatever page the page's reference structure points to. This is safe - * because if we're pointing to the wrong parent, that parent must have - * split, deepening the tree, which implies marking the original parent - * and all of the newly-created children as dirty. In other words, if - * we have the wrong parent page, everything was marked dirty already. - */ - parent = ref->home; - WT_RET(__wt_page_modify_init(session, parent)); - if (page_only) - __wt_page_only_modify_set(session, parent); - else - __wt_page_modify_set(session, parent); - return (0); + WT_PAGE *parent; + + /* + * This function exists as a place to stash this comment. There are a few places where we need + * to dirty a page's parent. The trick is the page's parent might split at any point, and the + * page parent might be the wrong parent at any particular time. We ignore this and dirty + * whatever page the page's reference structure points to. This is safe because if we're + * pointing to the wrong parent, that parent must have split, deepening the tree, which implies + * marking the original parent and all of the newly-created children as dirty. In other words, + * if we have the wrong parent page, everything was marked dirty already. + */ + parent = ref->home; + WT_RET(__wt_page_modify_init(session, parent)); + if (page_only) + __wt_page_only_modify_set(session, parent); + else + __wt_page_modify_set(session, parent); + return (0); } /* * __wt_off_page -- - * Return if a pointer references off-page data. + * Return if a pointer references off-page data. */ static inline bool __wt_off_page(WT_PAGE *page, const void *p) { - /* - * There may be no underlying page, in which case the reference is - * off-page by definition. - */ - return (page->dsk == NULL || - p < (void *)page->dsk || - p >= (void *)((uint8_t *)page->dsk + page->dsk->mem_size)); + /* + * There may be no underlying page, in which case the reference is off-page by definition. + */ + return (page->dsk == NULL || p < (void *)page->dsk || + p >= (void *)((uint8_t *)page->dsk + page->dsk->mem_size)); } /* * __wt_ref_addr_free -- - * Free the address in a reference, if necessary. + * Free the address in a reference, if necessary. */ static inline void __wt_ref_addr_free(WT_SESSION_IMPL *session, WT_REF *ref) { - if (ref->addr == NULL) - return; - - if (ref->home == NULL || __wt_off_page(ref->home, ref->addr)) { - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } - ref->addr = NULL; + if (ref->addr == NULL) + return; + + if (ref->home == NULL || __wt_off_page(ref->home, ref->addr)) { + __wt_free(session, ((WT_ADDR *)ref->addr)->addr); + __wt_free(session, ref->addr); + } + ref->addr = NULL; } /* * __wt_ref_key -- - * Return a reference to a row-store internal page key as cheaply as - * possible. + * Return a reference to a row-store internal page key as cheaply as possible. */ static inline void __wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep) { - uintptr_t v; - - /* - * An internal page key is in one of two places: if we instantiated the - * key (for example, when reading the page), WT_REF.ref_ikey references - * a WT_IKEY structure, otherwise WT_REF.ref_ikey references an on-page - * key offset/length pair. - * - * Now the magic: allocated memory must be aligned to store any standard - * type, and we expect some standard type to require at least quad-byte - * alignment, so allocated memory should have some clear low-order bits. - * On-page objects consist of an offset/length pair: the maximum page - * size currently fits into 29 bits, so we use the low-order bits of the - * pointer to mark the other bits of the pointer as encoding the key's - * location and length. This breaks if allocated memory isn't aligned, - * of course. - * - * In this specific case, we use bit 0x01 to mark an on-page key, else - * it's a WT_IKEY reference. The bit pattern for internal row-store - * on-page keys is: - * 32 bits key length - * 31 bits page offset of the key's bytes, - * 1 bits flags - */ -#define WT_IK_FLAG 0x01 -#define WT_IK_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 32) -#define WT_IK_DECODE_KEY_LEN(v) ((v) >> 32) -#define WT_IK_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 1) -#define WT_IK_DECODE_KEY_OFFSET(v) (((v) & 0xFFFFFFFF) >> 1) - v = (uintptr_t)ref->ref_ikey; - if (v & WT_IK_FLAG) { - *(void **)keyp = - WT_PAGE_REF_OFFSET(page, WT_IK_DECODE_KEY_OFFSET(v)); - *sizep = WT_IK_DECODE_KEY_LEN(v); - } else { - *(void **)keyp = WT_IKEY_DATA(ref->ref_ikey); - *sizep = ((WT_IKEY *)ref->ref_ikey)->size; - } + uintptr_t v; + +/* + * An internal page key is in one of two places: if we instantiated the + * key (for example, when reading the page), WT_REF.ref_ikey references + * a WT_IKEY structure, otherwise WT_REF.ref_ikey references an on-page + * key offset/length pair. + * + * Now the magic: allocated memory must be aligned to store any standard + * type, and we expect some standard type to require at least quad-byte + * alignment, so allocated memory should have some clear low-order bits. + * On-page objects consist of an offset/length pair: the maximum page + * size currently fits into 29 bits, so we use the low-order bits of the + * pointer to mark the other bits of the pointer as encoding the key's + * location and length. This breaks if allocated memory isn't aligned, + * of course. + * + * In this specific case, we use bit 0x01 to mark an on-page key, else + * it's a WT_IKEY reference. The bit pattern for internal row-store + * on-page keys is: + * 32 bits key length + * 31 bits page offset of the key's bytes, + * 1 bits flags + */ +#define WT_IK_FLAG 0x01 +#define WT_IK_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 32) +#define WT_IK_DECODE_KEY_LEN(v) ((v) >> 32) +#define WT_IK_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 1) +#define WT_IK_DECODE_KEY_OFFSET(v) (((v)&0xFFFFFFFF) >> 1) + v = (uintptr_t)ref->ref_ikey; + if (v & WT_IK_FLAG) { + *(void **)keyp = WT_PAGE_REF_OFFSET(page, WT_IK_DECODE_KEY_OFFSET(v)); + *sizep = WT_IK_DECODE_KEY_LEN(v); + } else { + *(void **)keyp = WT_IKEY_DATA(ref->ref_ikey); + *sizep = ((WT_IKEY *)ref->ref_ikey)->size; + } } /* * __wt_ref_key_onpage_set -- - * Set a WT_REF to reference an on-page key. + * Set a WT_REF to reference an on-page key. */ static inline void __wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK *unpack) { - uintptr_t v; - - /* - * See the comment in __wt_ref_key for an explanation of the magic. - */ - v = WT_IK_ENCODE_KEY_LEN(unpack->size) | - WT_IK_ENCODE_KEY_OFFSET(WT_PAGE_DISK_OFFSET(page, unpack->data)) | - WT_IK_FLAG; - ref->ref_ikey = (void *)v; + uintptr_t v; + + /* + * See the comment in __wt_ref_key for an explanation of the magic. + */ + v = WT_IK_ENCODE_KEY_LEN(unpack->size) | + WT_IK_ENCODE_KEY_OFFSET(WT_PAGE_DISK_OFFSET(page, unpack->data)) | WT_IK_FLAG; + ref->ref_ikey = (void *)v; } /* * __wt_ref_key_instantiated -- - * Return if a WT_REF key is instantiated. + * Return if a WT_REF key is instantiated. */ static inline WT_IKEY * __wt_ref_key_instantiated(WT_REF *ref) { - uintptr_t v; + uintptr_t v; - /* - * See the comment in __wt_ref_key for an explanation of the magic. - */ - v = (uintptr_t)ref->ref_ikey; - return (v & WT_IK_FLAG ? NULL : ref->ref_ikey); + /* + * See the comment in __wt_ref_key for an explanation of the magic. + */ + v = (uintptr_t)ref->ref_ikey; + return (v & WT_IK_FLAG ? NULL : ref->ref_ikey); } /* * __wt_ref_key_clear -- - * Clear a WT_REF key. + * Clear a WT_REF key. */ static inline void __wt_ref_key_clear(WT_REF *ref) { - /* - * The key union has 2 8B fields; this is equivalent to: - * - * ref->ref_recno = WT_RECNO_OOB; - * ref->ref_ikey = NULL; - */ - ref->ref_recno = 0; + /* + * The key union has 2 8B fields; this is equivalent to: + * + * ref->ref_recno = WT_RECNO_OOB; + * ref->ref_ikey = NULL; + */ + ref->ref_recno = 0; } /* * __wt_row_leaf_key_info -- - * Return a row-store leaf page key referenced by a WT_ROW if it can be - * had without unpacking a cell, and information about the cell, if the key - * isn't cheaply available. + * Return a row-store leaf page key referenced by a WT_ROW if it can be had without unpacking a + * cell, and information about the cell, if the key isn't cheaply available. */ static inline bool -__wt_row_leaf_key_info(WT_PAGE *page, void *copy, - WT_IKEY **ikeyp, WT_CELL **cellp, void *datap, size_t *sizep) +__wt_row_leaf_key_info( + WT_PAGE *page, void *copy, WT_IKEY **ikeyp, WT_CELL **cellp, void *datap, size_t *sizep) { - WT_IKEY *ikey; - uintptr_t v; - - v = (uintptr_t)copy; - - /* - * A row-store leaf page key is in one of two places: if instantiated, - * the WT_ROW pointer references a WT_IKEY structure, otherwise, it - * references an on-page offset. Further, on-page keys are in one of - * two states: if the key is a simple key (not an overflow key, prefix - * compressed or Huffman encoded, all of which are likely), the key's - * offset/size is encoded in the pointer. Otherwise, the offset is to - * the key's on-page cell. - * - * Now the magic: allocated memory must be aligned to store any standard - * type, and we expect some standard type to require at least quad-byte - * alignment, so allocated memory should have some clear low-order bits. - * On-page objects consist of an offset/length pair: the maximum page - * size currently fits into 29 bits, so we use the low-order bits of the - * pointer to mark the other bits of the pointer as encoding the key's - * location and length. This breaks if allocated memory isn't aligned, - * of course. - * - * In this specific case, we use bit 0x01 to mark an on-page cell, bit - * 0x02 to mark an on-page key, 0x03 to mark an on-page key/value pair, - * otherwise it's a WT_IKEY reference. The bit pattern for on-page cells - * is: - * 29 bits page offset of the key's cell, - * 2 bits flags - * - * The bit pattern for on-page keys is: - * 32 bits key length, - * 29 bits page offset of the key's bytes, - * 2 bits flags - * - * But, while that allows us to skip decoding simple key cells, we also - * want to skip decoding the value cell in the case where the value cell - * is also simple/short. We use bit 0x03 to mark an encoded on-page key - * and value pair. The bit pattern for on-page key/value pairs is: - * 9 bits key length, - * 13 bits value length, - * 20 bits page offset of the key's bytes, - * 20 bits page offset of the value's bytes, - * 2 bits flags - * - * These bit patterns are in-memory only, of course, so can be modified - * (we could even tune for specific workloads). Generally, the fields - * are larger than the anticipated values being stored (512B keys, 8KB - * values, 1MB pages), hopefully that won't be necessary. - * - * This function returns a list of things about the key (instantiation - * reference, cell reference and key/length pair). Our callers know - * the order in which we look things up and the information returned; - * for example, the cell will never be returned if we are working with - * an on-page key. - */ -#define WT_CELL_FLAG 0x01 -#define WT_CELL_ENCODE_OFFSET(v) ((uintptr_t)(v) << 2) -#define WT_CELL_DECODE_OFFSET(v) (((v) & 0xFFFFFFFF) >> 2) - -#define WT_K_FLAG 0x02 -#define WT_K_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 32) -#define WT_K_DECODE_KEY_LEN(v) ((v) >> 32) -#define WT_K_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 2) -#define WT_K_DECODE_KEY_OFFSET(v) (((v) & 0xFFFFFFFF) >> 2) - -#define WT_KV_FLAG 0x03 -#define WT_KV_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 55) -#define WT_KV_DECODE_KEY_LEN(v) ((v) >> 55) -#define WT_KV_MAX_KEY_LEN (0x200 - 1) -#define WT_KV_ENCODE_VALUE_LEN(v) ((uintptr_t)(v) << 42) -#define WT_KV_DECODE_VALUE_LEN(v) (((v) & 0x007FFC0000000000) >> 42) -#define WT_KV_MAX_VALUE_LEN (0x2000 - 1) -#define WT_KV_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 22) -#define WT_KV_DECODE_KEY_OFFSET(v) (((v) & 0x000003FFFFC00000) >> 22) -#define WT_KV_MAX_KEY_OFFSET (0x100000 - 1) -#define WT_KV_ENCODE_VALUE_OFFSET(v) ((uintptr_t)(v) << 2) -#define WT_KV_DECODE_VALUE_OFFSET(v) (((v) & 0x00000000003FFFFC) >> 2) -#define WT_KV_MAX_VALUE_OFFSET (0x100000 - 1) - switch (v & 0x03) { - case WT_CELL_FLAG: - /* On-page cell: no instantiated key. */ - if (ikeyp != NULL) - *ikeyp = NULL; - if (cellp != NULL) - *cellp = - WT_PAGE_REF_OFFSET(page, WT_CELL_DECODE_OFFSET(v)); - if (datap != NULL) { - *(void **)datap = NULL; - *sizep = 0; - } - return (false); - case WT_K_FLAG: - /* Encoded key: no instantiated key, no cell. */ - if (cellp != NULL) - *cellp = NULL; - if (ikeyp != NULL) - *ikeyp = NULL; - if (datap != NULL) { - *(void **)datap = - WT_PAGE_REF_OFFSET(page, WT_K_DECODE_KEY_OFFSET(v)); - *sizep = WT_K_DECODE_KEY_LEN(v); - return (true); - } - return (false); - case WT_KV_FLAG: - /* Encoded key/value pair: no instantiated key, no cell. */ - if (cellp != NULL) - *cellp = NULL; - if (ikeyp != NULL) - *ikeyp = NULL; - if (datap != NULL) { - *(void **)datap = WT_PAGE_REF_OFFSET( - page, WT_KV_DECODE_KEY_OFFSET(v)); - *sizep = WT_KV_DECODE_KEY_LEN(v); - return (true); - } - return (false); - - } - - /* Instantiated key. */ - ikey = copy; - if (ikeyp != NULL) - *ikeyp = copy; - if (cellp != NULL) - *cellp = WT_PAGE_REF_OFFSET(page, ikey->cell_offset); - if (datap != NULL) { - *(void **)datap = WT_IKEY_DATA(ikey); - *sizep = ikey->size; - return (true); - } - return (false); + WT_IKEY *ikey; + uintptr_t v; + + v = (uintptr_t)copy; + +/* + * A row-store leaf page key is in one of two places: if instantiated, + * the WT_ROW pointer references a WT_IKEY structure, otherwise, it + * references an on-page offset. Further, on-page keys are in one of + * two states: if the key is a simple key (not an overflow key, prefix + * compressed or Huffman encoded, all of which are likely), the key's + * offset/size is encoded in the pointer. Otherwise, the offset is to + * the key's on-page cell. + * + * Now the magic: allocated memory must be aligned to store any standard + * type, and we expect some standard type to require at least quad-byte + * alignment, so allocated memory should have some clear low-order bits. + * On-page objects consist of an offset/length pair: the maximum page + * size currently fits into 29 bits, so we use the low-order bits of the + * pointer to mark the other bits of the pointer as encoding the key's + * location and length. This breaks if allocated memory isn't aligned, + * of course. + * + * In this specific case, we use bit 0x01 to mark an on-page cell, bit + * 0x02 to mark an on-page key, 0x03 to mark an on-page key/value pair, + * otherwise it's a WT_IKEY reference. The bit pattern for on-page cells + * is: + * 29 bits page offset of the key's cell, + * 2 bits flags + * + * The bit pattern for on-page keys is: + * 32 bits key length, + * 29 bits page offset of the key's bytes, + * 2 bits flags + * + * But, while that allows us to skip decoding simple key cells, we also + * want to skip decoding the value cell in the case where the value cell + * is also simple/short. We use bit 0x03 to mark an encoded on-page key + * and value pair. The bit pattern for on-page key/value pairs is: + * 9 bits key length, + * 13 bits value length, + * 20 bits page offset of the key's bytes, + * 20 bits page offset of the value's bytes, + * 2 bits flags + * + * These bit patterns are in-memory only, of course, so can be modified + * (we could even tune for specific workloads). Generally, the fields + * are larger than the anticipated values being stored (512B keys, 8KB + * values, 1MB pages), hopefully that won't be necessary. + * + * This function returns a list of things about the key (instantiation + * reference, cell reference and key/length pair). Our callers know + * the order in which we look things up and the information returned; + * for example, the cell will never be returned if we are working with + * an on-page key. + */ +#define WT_CELL_FLAG 0x01 +#define WT_CELL_ENCODE_OFFSET(v) ((uintptr_t)(v) << 2) +#define WT_CELL_DECODE_OFFSET(v) (((v)&0xFFFFFFFF) >> 2) + +#define WT_K_FLAG 0x02 +#define WT_K_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 32) +#define WT_K_DECODE_KEY_LEN(v) ((v) >> 32) +#define WT_K_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 2) +#define WT_K_DECODE_KEY_OFFSET(v) (((v)&0xFFFFFFFF) >> 2) + +#define WT_KV_FLAG 0x03 +#define WT_KV_ENCODE_KEY_LEN(v) ((uintptr_t)(v) << 55) +#define WT_KV_DECODE_KEY_LEN(v) ((v) >> 55) +#define WT_KV_MAX_KEY_LEN (0x200 - 1) +#define WT_KV_ENCODE_VALUE_LEN(v) ((uintptr_t)(v) << 42) +#define WT_KV_DECODE_VALUE_LEN(v) (((v)&0x007FFC0000000000) >> 42) +#define WT_KV_MAX_VALUE_LEN (0x2000 - 1) +#define WT_KV_ENCODE_KEY_OFFSET(v) ((uintptr_t)(v) << 22) +#define WT_KV_DECODE_KEY_OFFSET(v) (((v)&0x000003FFFFC00000) >> 22) +#define WT_KV_MAX_KEY_OFFSET (0x100000 - 1) +#define WT_KV_ENCODE_VALUE_OFFSET(v) ((uintptr_t)(v) << 2) +#define WT_KV_DECODE_VALUE_OFFSET(v) (((v)&0x00000000003FFFFC) >> 2) +#define WT_KV_MAX_VALUE_OFFSET (0x100000 - 1) + switch (v & 0x03) { + case WT_CELL_FLAG: + /* On-page cell: no instantiated key. */ + if (ikeyp != NULL) + *ikeyp = NULL; + if (cellp != NULL) + *cellp = WT_PAGE_REF_OFFSET(page, WT_CELL_DECODE_OFFSET(v)); + if (datap != NULL) { + *(void **)datap = NULL; + *sizep = 0; + } + return (false); + case WT_K_FLAG: + /* Encoded key: no instantiated key, no cell. */ + if (cellp != NULL) + *cellp = NULL; + if (ikeyp != NULL) + *ikeyp = NULL; + if (datap != NULL) { + *(void **)datap = WT_PAGE_REF_OFFSET(page, WT_K_DECODE_KEY_OFFSET(v)); + *sizep = WT_K_DECODE_KEY_LEN(v); + return (true); + } + return (false); + case WT_KV_FLAG: + /* Encoded key/value pair: no instantiated key, no cell. */ + if (cellp != NULL) + *cellp = NULL; + if (ikeyp != NULL) + *ikeyp = NULL; + if (datap != NULL) { + *(void **)datap = WT_PAGE_REF_OFFSET(page, WT_KV_DECODE_KEY_OFFSET(v)); + *sizep = WT_KV_DECODE_KEY_LEN(v); + return (true); + } + return (false); + } + + /* Instantiated key. */ + ikey = copy; + if (ikeyp != NULL) + *ikeyp = copy; + if (cellp != NULL) + *cellp = WT_PAGE_REF_OFFSET(page, ikey->cell_offset); + if (datap != NULL) { + *(void **)datap = WT_IKEY_DATA(ikey); + *sizep = ikey->size; + return (true); + } + return (false); } /* * __wt_row_leaf_key_set_cell -- - * Set a WT_ROW to reference an on-page row-store leaf cell. + * Set a WT_ROW to reference an on-page row-store leaf cell. */ static inline void __wt_row_leaf_key_set_cell(WT_PAGE *page, WT_ROW *rip, WT_CELL *cell) { - uintptr_t v; - - /* - * See the comment in __wt_row_leaf_key_info for an explanation of the - * magic. - */ - v = WT_CELL_ENCODE_OFFSET(WT_PAGE_DISK_OFFSET(page, cell)) | - WT_CELL_FLAG; - WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries); - WT_ROW_KEY_SET(rip, v); + uintptr_t v; + + /* + * See the comment in __wt_row_leaf_key_info for an explanation of the magic. + */ + v = WT_CELL_ENCODE_OFFSET(WT_PAGE_DISK_OFFSET(page, cell)) | WT_CELL_FLAG; + WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries); + WT_ROW_KEY_SET(rip, v); } /* * __wt_row_leaf_key_set -- - * Set a WT_ROW to reference an on-page row-store leaf key. + * Set a WT_ROW to reference an on-page row-store leaf key. */ static inline void __wt_row_leaf_key_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack) { - uintptr_t v; - - /* - * See the comment in __wt_row_leaf_key_info for an explanation of the - * magic. - */ - v = WT_K_ENCODE_KEY_LEN(unpack->size) | - WT_K_ENCODE_KEY_OFFSET(WT_PAGE_DISK_OFFSET(page, unpack->data)) | - WT_K_FLAG; - WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries); - WT_ROW_KEY_SET(rip, v); + uintptr_t v; + + /* + * See the comment in __wt_row_leaf_key_info for an explanation of the magic. + */ + v = WT_K_ENCODE_KEY_LEN(unpack->size) | + WT_K_ENCODE_KEY_OFFSET(WT_PAGE_DISK_OFFSET(page, unpack->data)) | WT_K_FLAG; + WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries); + WT_ROW_KEY_SET(rip, v); } /* * __wt_row_leaf_value_set -- - * Set a WT_ROW to reference an on-page row-store leaf value. + * Set a WT_ROW to reference an on-page row-store leaf value. */ static inline void __wt_row_leaf_value_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack) { - uintptr_t key_len, key_offset, value_offset, v; - - v = (uintptr_t)WT_ROW_KEY_COPY(rip); - - /* - * See the comment in __wt_row_leaf_key_info for an explanation of the - * magic. - */ - if (!(v & WT_K_FLAG)) /* Already an encoded key */ - return; - - key_len = WT_K_DECODE_KEY_LEN(v); /* Key length */ - if (key_len > WT_KV_MAX_KEY_LEN) - return; - if (unpack->size > WT_KV_MAX_VALUE_LEN) /* Value length */ - return; - - key_offset = WT_K_DECODE_KEY_OFFSET(v); /* Page offsets */ - if (key_offset > WT_KV_MAX_KEY_OFFSET) - return; - value_offset = WT_PAGE_DISK_OFFSET(page, unpack->data); - if (value_offset > WT_KV_MAX_VALUE_OFFSET) - return; - - v = WT_KV_ENCODE_KEY_LEN(key_len) | - WT_KV_ENCODE_VALUE_LEN(unpack->size) | - WT_KV_ENCODE_KEY_OFFSET(key_offset) | - WT_KV_ENCODE_VALUE_OFFSET(value_offset) | WT_KV_FLAG; - WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries); - WT_ROW_KEY_SET(rip, v); + uintptr_t key_len, key_offset, value_offset, v; + + v = (uintptr_t)WT_ROW_KEY_COPY(rip); + + /* + * See the comment in __wt_row_leaf_key_info for an explanation of the magic. + */ + if (!(v & WT_K_FLAG)) /* Already an encoded key */ + return; + + key_len = WT_K_DECODE_KEY_LEN(v); /* Key length */ + if (key_len > WT_KV_MAX_KEY_LEN) + return; + if (unpack->size > WT_KV_MAX_VALUE_LEN) /* Value length */ + return; + + key_offset = WT_K_DECODE_KEY_OFFSET(v); /* Page offsets */ + if (key_offset > WT_KV_MAX_KEY_OFFSET) + return; + value_offset = WT_PAGE_DISK_OFFSET(page, unpack->data); + if (value_offset > WT_KV_MAX_VALUE_OFFSET) + return; + + v = WT_KV_ENCODE_KEY_LEN(key_len) | WT_KV_ENCODE_VALUE_LEN(unpack->size) | + WT_KV_ENCODE_KEY_OFFSET(key_offset) | WT_KV_ENCODE_VALUE_OFFSET(value_offset) | WT_KV_FLAG; + WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries); + WT_ROW_KEY_SET(rip, v); } /* * __wt_row_leaf_key -- - * Set a buffer to reference a row-store leaf page key as cheaply as - * possible. + * Set a buffer to reference a row-store leaf page key as cheaply as possible. */ static inline int -__wt_row_leaf_key(WT_SESSION_IMPL *session, - WT_PAGE *page, WT_ROW *rip, WT_ITEM *key, bool instantiate) +__wt_row_leaf_key( + WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key, bool instantiate) { - void *copy; - - /* - * A front-end for __wt_row_leaf_key_work, here to inline fast paths. - * - * The row-store key can change underfoot; explicitly take a copy. - */ - copy = WT_ROW_KEY_COPY(rip); - - /* - * All we handle here are on-page keys (which should be a common case), - * and instantiated keys (which start out rare, but become more common - * as a leaf page is searched, instantiating prefix-compressed keys). - */ - if (__wt_row_leaf_key_info( - page, copy, NULL, NULL, &key->data, &key->size)) - return (0); - - /* - * The alternative is an on-page cell with some kind of compressed or - * overflow key that's never been instantiated. Call the underlying - * worker function to figure it out. - */ - return (__wt_row_leaf_key_work(session, page, rip, key, instantiate)); + void *copy; + + /* + * A front-end for __wt_row_leaf_key_work, here to inline fast paths. + * + * The row-store key can change underfoot; explicitly take a copy. + */ + copy = WT_ROW_KEY_COPY(rip); + + /* + * All we handle here are on-page keys (which should be a common case), and instantiated keys + * (which start out rare, but become more common as a leaf page is searched, instantiating + * prefix-compressed keys). + */ + if (__wt_row_leaf_key_info(page, copy, NULL, NULL, &key->data, &key->size)) + return (0); + + /* + * The alternative is an on-page cell with some kind of compressed or overflow key that's never + * been instantiated. Call the underlying worker function to figure it out. + */ + return (__wt_row_leaf_key_work(session, page, rip, key, instantiate)); } /* * __wt_row_leaf_value_cell -- - * Return the unpacked value for a row-store leaf page key. + * Return the unpacked value for a row-store leaf page key. */ static inline void -__wt_row_leaf_value_cell(WT_SESSION_IMPL *session, - WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack) +__wt_row_leaf_value_cell(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, + WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack) { - WT_CELL *kcell, *vcell; - WT_CELL_UNPACK unpack; - size_t size; - void *copy, *key; - - /* If we already have an unpacked key cell, use it. */ - if (kpack != NULL) - vcell = (WT_CELL *) - ((uint8_t *)kpack->cell + __wt_cell_total_len(kpack)); - else { - /* - * The row-store key can change underfoot; explicitly take a - * copy. - */ - copy = WT_ROW_KEY_COPY(rip); - - /* - * Figure out where the key is, step past it to the value cell. - * The test for a cell not being set tells us that we have an - * on-page key, otherwise we're looking at an instantiated key - * or on-page cell, both of which require an unpack of the key's - * cell to find the value cell that follows. - */ - if (__wt_row_leaf_key_info( - page, copy, NULL, &kcell, &key, &size) && kcell == NULL) - vcell = (WT_CELL *)((uint8_t *)key + size); - else { - __wt_cell_unpack(session, page, kcell, &unpack); - vcell = (WT_CELL *)((uint8_t *) - unpack.cell + __wt_cell_total_len(&unpack)); - } - } - - __wt_cell_unpack(session, - page, __wt_cell_leaf_value_parse(page, vcell), vpack); + WT_CELL *kcell, *vcell; + WT_CELL_UNPACK unpack; + size_t size; + void *copy, *key; + + /* If we already have an unpacked key cell, use it. */ + if (kpack != NULL) + vcell = (WT_CELL *)((uint8_t *)kpack->cell + __wt_cell_total_len(kpack)); + else { + /* + * The row-store key can change underfoot; explicitly take a copy. + */ + copy = WT_ROW_KEY_COPY(rip); + + /* + * Figure out where the key is, step past it to the value cell. The test for a cell not + * being set tells us that we have an on-page key, otherwise we're looking at an + * instantiated key or on-page cell, both of which require an unpack of the key's cell to + * find the value cell that follows. + */ + if (__wt_row_leaf_key_info(page, copy, NULL, &kcell, &key, &size) && kcell == NULL) + vcell = (WT_CELL *)((uint8_t *)key + size); + else { + __wt_cell_unpack(session, page, kcell, &unpack); + vcell = (WT_CELL *)((uint8_t *)unpack.cell + __wt_cell_total_len(&unpack)); + } + } + + __wt_cell_unpack(session, page, __wt_cell_leaf_value_parse(page, vcell), vpack); } /* * __wt_row_leaf_value -- - * Return the value for a row-store leaf page encoded key/value pair. + * Return the value for a row-store leaf page encoded key/value pair. */ static inline bool __wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value) { - uintptr_t v; - - /* The row-store key can change underfoot; explicitly take a copy. */ - v = (uintptr_t)WT_ROW_KEY_COPY(rip); - - /* - * See the comment in __wt_row_leaf_key_info for an explanation of the - * magic. - */ - if ((v & 0x03) == WT_KV_FLAG) { - value->data = - WT_PAGE_REF_OFFSET(page, WT_KV_DECODE_VALUE_OFFSET(v)); - value->size = WT_KV_DECODE_VALUE_LEN(v); - return (true); - } - return (false); + uintptr_t v; + + /* The row-store key can change underfoot; explicitly take a copy. */ + v = (uintptr_t)WT_ROW_KEY_COPY(rip); + + /* + * See the comment in __wt_row_leaf_key_info for an explanation of the magic. + */ + if ((v & 0x03) == WT_KV_FLAG) { + value->data = WT_PAGE_REF_OFFSET(page, WT_KV_DECODE_VALUE_OFFSET(v)); + value->size = WT_KV_DECODE_VALUE_LEN(v); + return (true); + } + return (false); } /* * __wt_ref_info -- - * Return the addr/size and type triplet for a reference. + * Return the addr/size and type triplet for a reference. */ static inline void -__wt_ref_info(WT_SESSION_IMPL *session, - WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep) +__wt_ref_info( + WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep) { - WT_ADDR *addr; - WT_CELL_UNPACK *unpack, _unpack; - WT_PAGE *page; - - addr = ref->addr; - unpack = &_unpack; - page = ref->home; - - /* - * If NULL, there is no location. - * If off-page, the pointer references a WT_ADDR structure. - * If on-page, the pointer references a cell. - * - * The type is of a limited set: internal, leaf or no-overflow leaf. - */ - if (addr == NULL) { - *addrp = NULL; - *sizep = 0; - if (typep != NULL) - *typep = 0; - } else if (__wt_off_page(page, addr)) { - *addrp = addr->addr; - *sizep = addr->size; - if (typep != NULL) - switch (addr->type) { - case WT_ADDR_INT: - *typep = WT_CELL_ADDR_INT; - break; - case WT_ADDR_LEAF: - *typep = WT_CELL_ADDR_LEAF; - break; - case WT_ADDR_LEAF_NO: - *typep = WT_CELL_ADDR_LEAF_NO; - break; - default: - *typep = 0; - break; - } - } else { - __wt_cell_unpack(session, page, (WT_CELL *)addr, unpack); - *addrp = unpack->data; - *sizep = unpack->size; - if (typep != NULL) - *typep = unpack->type; - } + WT_ADDR *addr; + WT_CELL_UNPACK *unpack, _unpack; + WT_PAGE *page; + + addr = ref->addr; + unpack = &_unpack; + page = ref->home; + + /* + * If NULL, there is no location. + * If off-page, the pointer references a WT_ADDR structure. + * If on-page, the pointer references a cell. + * + * The type is of a limited set: internal, leaf or no-overflow leaf. + */ + if (addr == NULL) { + *addrp = NULL; + *sizep = 0; + if (typep != NULL) + *typep = 0; + } else if (__wt_off_page(page, addr)) { + *addrp = addr->addr; + *sizep = addr->size; + if (typep != NULL) + switch (addr->type) { + case WT_ADDR_INT: + *typep = WT_CELL_ADDR_INT; + break; + case WT_ADDR_LEAF: + *typep = WT_CELL_ADDR_LEAF; + break; + case WT_ADDR_LEAF_NO: + *typep = WT_CELL_ADDR_LEAF_NO; + break; + default: + *typep = 0; + break; + } + } else { + __wt_cell_unpack(session, page, (WT_CELL *)addr, unpack); + *addrp = unpack->data; + *sizep = unpack->size; + if (typep != NULL) + *typep = unpack->type; + } } /* * __wt_ref_block_free -- - * Free the on-disk block for a reference and clear the address. + * Free the on-disk block for a reference and clear the address. */ static inline int __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref) { - size_t addr_size; - const uint8_t *addr; + size_t addr_size; + const uint8_t *addr; - if (ref->addr == NULL) - return (0); + if (ref->addr == NULL) + return (0); - __wt_ref_info(session, ref, &addr, &addr_size, NULL); - WT_RET(__wt_btree_block_free(session, addr, addr_size)); + __wt_ref_info(session, ref, &addr, &addr_size, NULL); + WT_RET(__wt_btree_block_free(session, addr, addr_size)); - /* Clear the address (so we don't free it twice). */ - __wt_ref_addr_free(session, ref); - return (0); + /* Clear the address (so we don't free it twice). */ + __wt_ref_addr_free(session, ref); + return (0); } /* * __wt_page_del_active -- - * Return if a truncate operation is active. + * Return if a truncate operation is active. */ static inline bool __wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) { - WT_PAGE_DELETED *page_del; - uint8_t prepare_state; - - if ((page_del = ref->page_del) == NULL) - return (false); - if (page_del->txnid == WT_TXN_ABORTED) - return (false); - WT_ORDERED_READ(prepare_state, page_del->prepare_state); - if (prepare_state == WT_PREPARE_INPROGRESS || - prepare_state == WT_PREPARE_LOCKED) - return (true); - return (visible_all ? - !__wt_txn_visible_all(session, - page_del->txnid, page_del->timestamp) : - !__wt_txn_visible(session, page_del->txnid, page_del->timestamp)); + WT_PAGE_DELETED *page_del; + uint8_t prepare_state; + + if ((page_del = ref->page_del) == NULL) + return (false); + if (page_del->txnid == WT_TXN_ABORTED) + return (false); + WT_ORDERED_READ(prepare_state, page_del->prepare_state); + if (prepare_state == WT_PREPARE_INPROGRESS || prepare_state == WT_PREPARE_LOCKED) + return (true); + return (visible_all ? !__wt_txn_visible_all(session, page_del->txnid, page_del->timestamp) : + !__wt_txn_visible(session, page_del->txnid, page_del->timestamp)); } /* * __wt_page_las_active -- - * Return if lookaside data for a page is still required. + * Return if lookaside data for a page is still required. */ static inline bool __wt_page_las_active(WT_SESSION_IMPL *session, WT_REF *ref) { - WT_PAGE_LOOKASIDE *page_las; - - if ((page_las = ref->page_las) == NULL) - return (false); - if (page_las->resolved) - return (false); - if (!page_las->skew_newest || page_las->has_prepares) - return (true); - if (__wt_txn_visible_all(session, page_las->max_txn, - page_las->max_timestamp)) - return (false); - - return (true); + WT_PAGE_LOOKASIDE *page_las; + + if ((page_las = ref->page_las) == NULL) + return (false); + if (page_las->resolved) + return (false); + if (!page_las->skew_newest || page_las->has_prepares) + return (true); + if (__wt_txn_visible_all(session, page_las->max_txn, page_las->max_timestamp)) + return (false); + + return (true); } /* * __wt_btree_can_evict_dirty -- - * Check whether eviction of dirty pages or splits are permitted in the - * current tree. - * - * We cannot evict dirty pages or split while a checkpoint is in progress, - * unless the checkpoint thread is doing the work. - * - * Also, during connection close, if we take a checkpoint as of a - * timestamp, eviction should not write dirty pages to avoid updates newer - * than the checkpoint timestamp leaking to disk. + * Check whether eviction of dirty pages or splits are permitted in the current tree. We cannot + * evict dirty pages or split while a checkpoint is in progress, unless the checkpoint thread is + * doing the work. Also, during connection close, if we take a checkpoint as of a timestamp, + * eviction should not write dirty pages to avoid updates newer than the checkpoint timestamp + * leaking to disk. */ static inline bool __wt_btree_can_evict_dirty(WT_SESSION_IMPL *session) { - WT_BTREE *btree; + WT_BTREE *btree; - btree = S2BT(session); - return ((!WT_BTREE_SYNCING(btree) || WT_SESSION_BTREE_SYNC(session)) && - !F_ISSET(S2C(session), WT_CONN_CLOSING_TIMESTAMP)); + btree = S2BT(session); + return ((!WT_BTREE_SYNCING(btree) || WT_SESSION_BTREE_SYNC(session)) && + !F_ISSET(S2C(session), WT_CONN_CLOSING_TIMESTAMP)); } /* * __wt_leaf_page_can_split -- - * Check whether a page can be split in memory. + * Check whether a page can be split in memory. */ static inline bool __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) { - WT_BTREE *btree; - WT_INSERT *ins; - WT_INSERT_HEAD *ins_head; - size_t size; - int count; - - btree = S2BT(session); - - /* - * Checkpoints can't do in-memory splits in the tree they are walking: - * that can lead to corruption when the parent internal page is - * updated. - */ - if (WT_SESSION_BTREE_SYNC(session)) - return (false); - - /* - * Only split a page once, otherwise workloads that update in the middle - * of the page could continually split without benefit. - */ - if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT)) - return (false); - - /* - * Check for pages with append-only workloads. A common application - * pattern is to have multiple threads frantically appending to the - * tree. We want to reconcile and evict this page, but we'd like to - * do it without making the appending threads wait. See if it's worth - * doing a split to let the threads continue before doing eviction. - * - * Ignore anything other than large, dirty leaf pages. We depend on the - * page being dirty for correctness (the page must be reconciled again - * before being evicted after the split, information from a previous - * reconciliation will be wrong, so we can't evict immediately). - */ - if (page->memory_footprint < btree->splitmempage) - return (false); - if (WT_PAGE_IS_INTERNAL(page)) - return (false); - if (!__wt_page_is_modified(page)) - return (false); - - /* - * There is no point doing an in-memory split unless there is a lot of - * data in the last skiplist on the page. Split if there are enough - * items and the skiplist does not fit within a single disk page. - */ - ins_head = page->type == WT_PAGE_ROW_LEAF ? - (page->entries == 0 ? - WT_ROW_INSERT_SMALLEST(page) : - WT_ROW_INSERT_SLOT(page, page->entries - 1)) : - WT_COL_APPEND(page); - if (ins_head == NULL) - return (false); - - /* - * In the extreme case, where the page is much larger than the maximum - * size, split as soon as there are 5 items on the page. - */ -#define WT_MAX_SPLIT_COUNT 5 - if (page->memory_footprint > (size_t)btree->maxleafpage * 2) { - for (count = 0, ins = ins_head->head[0]; - ins != NULL; - ins = ins->next[0]) { - if (++count < WT_MAX_SPLIT_COUNT) - continue; - - WT_STAT_CONN_INCR(session, cache_inmem_splittable); - WT_STAT_DATA_INCR(session, cache_inmem_splittable); - return (true); - } - - return (false); - } - - /* - * Rather than scanning the whole list, walk a higher level, which - * gives a sample of the items -- at level 0 we have all the items, at - * level 1 we have 1/4 and at level 2 we have 1/16th. If we see more - * than 30 items and more data than would fit in a disk page, split. - */ -#define WT_MIN_SPLIT_DEPTH 2 -#define WT_MIN_SPLIT_COUNT 30 -#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */ - - for (count = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH]; - ins != NULL; - ins = ins->next[WT_MIN_SPLIT_DEPTH]) { - count += WT_MIN_SPLIT_MULTIPLIER; - size += WT_MIN_SPLIT_MULTIPLIER * - (WT_INSERT_KEY_SIZE(ins) + WT_UPDATE_MEMSIZE(ins->upd)); - if (count > WT_MIN_SPLIT_COUNT && - size > (size_t)btree->maxleafpage) { - WT_STAT_CONN_INCR(session, cache_inmem_splittable); - WT_STAT_DATA_INCR(session, cache_inmem_splittable); - return (true); - } - } - return (false); + WT_BTREE *btree; + WT_INSERT *ins; + WT_INSERT_HEAD *ins_head; + size_t size; + int count; + + btree = S2BT(session); + + /* + * Checkpoints can't do in-memory splits in the tree they are walking: that can lead to + * corruption when the parent internal page is updated. + */ + if (WT_SESSION_BTREE_SYNC(session)) + return (false); + + /* + * Only split a page once, otherwise workloads that update in the middle of the page could + * continually split without benefit. + */ + if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT)) + return (false); + + /* + * Check for pages with append-only workloads. A common application + * pattern is to have multiple threads frantically appending to the + * tree. We want to reconcile and evict this page, but we'd like to + * do it without making the appending threads wait. See if it's worth + * doing a split to let the threads continue before doing eviction. + * + * Ignore anything other than large, dirty leaf pages. We depend on the + * page being dirty for correctness (the page must be reconciled again + * before being evicted after the split, information from a previous + * reconciliation will be wrong, so we can't evict immediately). + */ + if (page->memory_footprint < btree->splitmempage) + return (false); + if (WT_PAGE_IS_INTERNAL(page)) + return (false); + if (!__wt_page_is_modified(page)) + return (false); + + /* + * There is no point doing an in-memory split unless there is a lot of data in the last skiplist + * on the page. Split if there are enough items and the skiplist does not fit within a single + * disk page. + */ + ins_head = page->type == WT_PAGE_ROW_LEAF ? + (page->entries == 0 ? WT_ROW_INSERT_SMALLEST(page) : + WT_ROW_INSERT_SLOT(page, page->entries - 1)) : + WT_COL_APPEND(page); + if (ins_head == NULL) + return (false); + +/* + * In the extreme case, where the page is much larger than the maximum size, split as soon as there + * are 5 items on the page. + */ +#define WT_MAX_SPLIT_COUNT 5 + if (page->memory_footprint > (size_t)btree->maxleafpage * 2) { + for (count = 0, ins = ins_head->head[0]; ins != NULL; ins = ins->next[0]) { + if (++count < WT_MAX_SPLIT_COUNT) + continue; + + WT_STAT_CONN_INCR(session, cache_inmem_splittable); + WT_STAT_DATA_INCR(session, cache_inmem_splittable); + return (true); + } + + return (false); + } + +/* + * Rather than scanning the whole list, walk a higher level, which gives a sample of the items -- at + * level 0 we have all the items, at level 1 we have 1/4 and at level 2 we have 1/16th. If we see + * more than 30 items and more data than would fit in a disk page, split. + */ +#define WT_MIN_SPLIT_DEPTH 2 +#define WT_MIN_SPLIT_COUNT 30 +#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */ + + for (count = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH]; ins != NULL; + ins = ins->next[WT_MIN_SPLIT_DEPTH]) { + count += WT_MIN_SPLIT_MULTIPLIER; + size += WT_MIN_SPLIT_MULTIPLIER * (WT_INSERT_KEY_SIZE(ins) + WT_UPDATE_MEMSIZE(ins->upd)); + if (count > WT_MIN_SPLIT_COUNT && size > (size_t)btree->maxleafpage) { + WT_STAT_CONN_INCR(session, cache_inmem_splittable); + WT_STAT_DATA_INCR(session, cache_inmem_splittable); + return (true); + } + } + return (false); } /* * __wt_page_evict_retry -- - * Avoid busy-spinning attempting to evict the same page all the time. + * Avoid busy-spinning attempting to evict the same page all the time. */ static inline bool __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page) { - WT_PAGE_MODIFY *mod; - WT_TXN_GLOBAL *txn_global; - wt_timestamp_t pinned_ts; - - txn_global = &S2C(session)->txn_global; - - /* - * If the page hasn't been through one round of update/restore, give it - * a try. - */ - if ((mod = page->modify) == NULL || - !FLD_ISSET(mod->restore_state, WT_PAGE_RS_RESTORED)) - return (true); - - /* - * Retry if a reasonable amount of eviction time has passed, the - * choice of 5 eviction passes as a reasonable amount of time is - * currently pretty arbitrary. - */ - if (__wt_cache_aggressive(session) || - mod->last_evict_pass_gen + 5 < S2C(session)->cache->evict_pass_gen) - return (true); - - /* Retry if the global transaction state has moved forward. */ - if (txn_global->current == txn_global->oldest_id || - mod->last_eviction_id != __wt_txn_oldest_id(session)) - return (true); - - if (mod->last_eviction_timestamp == WT_TS_NONE) - return (true); - - __wt_txn_pinned_timestamp(session, &pinned_ts); - if (pinned_ts > mod->last_eviction_timestamp) - return (true); - - return (false); + WT_PAGE_MODIFY *mod; + WT_TXN_GLOBAL *txn_global; + wt_timestamp_t pinned_ts; + + txn_global = &S2C(session)->txn_global; + + /* + * If the page hasn't been through one round of update/restore, give it a try. + */ + if ((mod = page->modify) == NULL || !FLD_ISSET(mod->restore_state, WT_PAGE_RS_RESTORED)) + return (true); + + /* + * Retry if a reasonable amount of eviction time has passed, the choice of 5 eviction passes as + * a reasonable amount of time is currently pretty arbitrary. + */ + if (__wt_cache_aggressive(session) || + mod->last_evict_pass_gen + 5 < S2C(session)->cache->evict_pass_gen) + return (true); + + /* Retry if the global transaction state has moved forward. */ + if (txn_global->current == txn_global->oldest_id || + mod->last_eviction_id != __wt_txn_oldest_id(session)) + return (true); + + if (mod->last_eviction_timestamp == WT_TS_NONE) + return (true); + + __wt_txn_pinned_timestamp(session, &pinned_ts); + if (pinned_ts > mod->last_eviction_timestamp) + return (true); + + return (false); } /* * __wt_page_can_evict -- - * Check whether a page can be evicted. + * Check whether a page can be evicted. */ static inline bool __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) { - WT_PAGE *page; - WT_PAGE_MODIFY *mod; - bool modified; - - if (inmem_splitp != NULL) - *inmem_splitp = false; - - page = ref->page; - mod = page->modify; - - /* A truncated page can't be evicted until the truncate completes. */ - if (__wt_page_del_active(session, ref, true)) - return (false); - - /* Otherwise, never modified pages can always be evicted. */ - if (mod == NULL) - return (true); - - /* - * We can't split or evict multiblock row-store pages where the parent's - * key for the page is an overflow item, because the split into the - * parent frees the backing blocks for any no-longer-used overflow keys, - * which will corrupt the checkpoint's block management. - */ - if (!__wt_btree_can_evict_dirty(session) && - F_ISSET_ATOMIC(ref->home, WT_PAGE_OVERFLOW_KEYS)) - return (false); - - /* - * Check for in-memory splits before other eviction tests. If the page - * should split in-memory, return success immediately and skip more - * detailed eviction tests. We don't need further tests since the page - * won't be written or discarded from the cache. - */ - if (__wt_leaf_page_can_split(session, page)) { - if (inmem_splitp != NULL) - *inmem_splitp = true; - return (true); - } - - modified = __wt_page_is_modified(page); - - /* - * If the file is being checkpointed, other threads can't evict dirty - * pages: if a page is written and the previous version freed, that - * previous version might be referenced by an internal page already - * written in the checkpoint, leaving the checkpoint inconsistent. - */ - if (modified && !__wt_btree_can_evict_dirty(session)) { - WT_STAT_CONN_INCR(session, cache_eviction_checkpoint); - WT_STAT_DATA_INCR(session, cache_eviction_checkpoint); - return (false); - } - - /* - * If a split created new internal pages, those newly created internal - * pages cannot be evicted until all threads are known to have exited - * the original parent page's index, because evicting an internal page - * discards its WT_REF array, and a thread traversing the original - * parent page index might see a freed WT_REF. - * - * One special case where we know this is safe is if the handle is - * locked exclusive (e.g., when the whole tree is being evicted). In - * that case, no readers can be looking at an old index. - */ - if (WT_PAGE_IS_INTERNAL(page) && - !F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE) && - __wt_gen_active(session, WT_GEN_SPLIT, page->pg_intl_split_gen)) - return (false); - - /* - * If the page is clean but has modifications that appear too new to - * evict, skip it. - */ - if (!modified && !__wt_txn_visible_all(session, - mod->rec_max_txn, mod->rec_max_timestamp)) - return (false); - - return (true); + WT_PAGE *page; + WT_PAGE_MODIFY *mod; + bool modified; + + if (inmem_splitp != NULL) + *inmem_splitp = false; + + page = ref->page; + mod = page->modify; + + /* A truncated page can't be evicted until the truncate completes. */ + if (__wt_page_del_active(session, ref, true)) + return (false); + + /* Otherwise, never modified pages can always be evicted. */ + if (mod == NULL) + return (true); + + /* + * We can't split or evict multiblock row-store pages where the parent's key for the page is an + * overflow item, because the split into the parent frees the backing blocks for any + * no-longer-used overflow keys, which will corrupt the checkpoint's block management. + */ + if (!__wt_btree_can_evict_dirty(session) && F_ISSET_ATOMIC(ref->home, WT_PAGE_OVERFLOW_KEYS)) + return (false); + + /* + * Check for in-memory splits before other eviction tests. If the page should split in-memory, + * return success immediately and skip more detailed eviction tests. We don't need further tests + * since the page won't be written or discarded from the cache. + */ + if (__wt_leaf_page_can_split(session, page)) { + if (inmem_splitp != NULL) + *inmem_splitp = true; + return (true); + } + + modified = __wt_page_is_modified(page); + + /* + * If the file is being checkpointed, other threads can't evict dirty pages: if a page is + * written and the previous version freed, that previous version might be referenced by an + * internal page already written in the checkpoint, leaving the checkpoint inconsistent. + */ + if (modified && !__wt_btree_can_evict_dirty(session)) { + WT_STAT_CONN_INCR(session, cache_eviction_checkpoint); + WT_STAT_DATA_INCR(session, cache_eviction_checkpoint); + return (false); + } + + /* + * If a split created new internal pages, those newly created internal + * pages cannot be evicted until all threads are known to have exited + * the original parent page's index, because evicting an internal page + * discards its WT_REF array, and a thread traversing the original + * parent page index might see a freed WT_REF. + * + * One special case where we know this is safe is if the handle is + * locked exclusive (e.g., when the whole tree is being evicted). In + * that case, no readers can be looking at an old index. + */ + if (WT_PAGE_IS_INTERNAL(page) && !F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE) && + __wt_gen_active(session, WT_GEN_SPLIT, page->pg_intl_split_gen)) + return (false); + + /* + * If the page is clean but has modifications that appear too new to evict, skip it. + */ + if (!modified && !__wt_txn_visible_all(session, mod->rec_max_txn, mod->rec_max_timestamp)) + return (false); + + return (true); } /* * __wt_page_release -- - * Release a reference to a page. + * Release a reference to a page. */ static inline int __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) { - WT_BTREE *btree; - WT_PAGE *page; - bool inmem_split; - - btree = S2BT(session); - - /* - * Discard our hazard pointer. Ignore pages we don't have and the root - * page, which sticks in memory, regardless. - */ - if (ref == NULL || ref->page == NULL || __wt_ref_is_root(ref)) - return (0); - - /* - * If hazard pointers aren't necessary for this file, we can't be - * evicting, we're done. - */ - if (F_ISSET(btree, WT_BTREE_IN_MEMORY)) - return (0); - - /* - * Attempt to evict pages with the special "oldest" read generation. - * This is set for pages that grow larger than the configured - * memory_page_max setting, when we see many deleted items, and when we - * are attempting to scan without trashing the cache. - * - * Checkpoint should not queue pages for urgent eviction if they require - * dirty eviction: there is a special exemption that allows checkpoint - * to evict dirty pages in a tree that is being checkpointed, and no - * other thread can help with that. Checkpoints don't rely on this code - * for dirty eviction: that is handled explicitly in __wt_sync_file. - * - * If the operation has disabled eviction or splitting, or the session - * is preventing from reconciling, then just queue the page for urgent - * eviction. Otherwise, attempt to release and evict it. - */ - page = ref->page; - if (WT_READGEN_EVICT_SOON(page->read_gen) && - btree->evict_disabled == 0 && - __wt_page_can_evict(session, ref, &inmem_split) && - (!WT_SESSION_IS_CHECKPOINT(session) || - __wt_page_evict_clean(page))) { - if (LF_ISSET(WT_READ_NO_EVICT) || - (inmem_split ? LF_ISSET(WT_READ_NO_SPLIT) : - F_ISSET(session, WT_SESSION_NO_RECONCILE))) - WT_IGNORE_RET_BOOL( - __wt_page_evict_urgent(session, ref)); - else { - WT_RET_BUSY_OK( - __wt_page_release_evict(session, ref, flags)); - return (0); - } - } - - return (__wt_hazard_clear(session, ref)); + WT_BTREE *btree; + WT_PAGE *page; + bool inmem_split; + + btree = S2BT(session); + + /* + * Discard our hazard pointer. Ignore pages we don't have and the root page, which sticks in + * memory, regardless. + */ + if (ref == NULL || ref->page == NULL || __wt_ref_is_root(ref)) + return (0); + + /* + * If hazard pointers aren't necessary for this file, we can't be evicting, we're done. + */ + if (F_ISSET(btree, WT_BTREE_IN_MEMORY)) + return (0); + + /* + * Attempt to evict pages with the special "oldest" read generation. + * This is set for pages that grow larger than the configured + * memory_page_max setting, when we see many deleted items, and when we + * are attempting to scan without trashing the cache. + * + * Checkpoint should not queue pages for urgent eviction if they require + * dirty eviction: there is a special exemption that allows checkpoint + * to evict dirty pages in a tree that is being checkpointed, and no + * other thread can help with that. Checkpoints don't rely on this code + * for dirty eviction: that is handled explicitly in __wt_sync_file. + * + * If the operation has disabled eviction or splitting, or the session + * is preventing from reconciling, then just queue the page for urgent + * eviction. Otherwise, attempt to release and evict it. + */ + page = ref->page; + if (WT_READGEN_EVICT_SOON(page->read_gen) && btree->evict_disabled == 0 && + __wt_page_can_evict(session, ref, &inmem_split) && + (!WT_SESSION_IS_CHECKPOINT(session) || __wt_page_evict_clean(page))) { + if (LF_ISSET(WT_READ_NO_EVICT) || + (inmem_split ? LF_ISSET(WT_READ_NO_SPLIT) : F_ISSET(session, WT_SESSION_NO_RECONCILE))) + WT_IGNORE_RET_BOOL(__wt_page_evict_urgent(session, ref)); + else { + WT_RET_BUSY_OK(__wt_page_release_evict(session, ref, flags)); + return (0); + } + } + + return (__wt_hazard_clear(session, ref)); } /* * __wt_skip_choose_depth -- - * Randomly choose a depth for a skiplist insert. + * Randomly choose a depth for a skiplist insert. */ static inline u_int __wt_skip_choose_depth(WT_SESSION_IMPL *session) { - u_int d; + u_int d; - for (d = 1; d < WT_SKIP_MAXDEPTH && - __wt_random(&session->rnd) < WT_SKIP_PROBABILITY; d++) - ; - return (d); + for (d = 1; d < WT_SKIP_MAXDEPTH && __wt_random(&session->rnd) < WT_SKIP_PROBABILITY; d++) + ; + return (d); } /* * __wt_btree_lsm_over_size -- - * Return if the size of an in-memory tree with a single leaf page is over - * a specified maximum. If called on anything other than a simple tree with a - * single leaf page, returns true so our LSM caller will switch to a new tree. + * Return if the size of an in-memory tree with a single leaf page is over a specified maximum. + * If called on anything other than a simple tree with a single leaf page, returns true so our + * LSM caller will switch to a new tree. */ static inline bool __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize) { - WT_BTREE *btree; - WT_PAGE *child, *root; - WT_PAGE_INDEX *pindex; - WT_REF *first; - - btree = S2BT(session); - root = btree->root.page; - - /* Check for a non-existent tree. */ - if (root == NULL) - return (false); - - /* A tree that can be evicted always requires a switch. */ - if (btree->evict_disabled == 0) - return (true); - - /* Check for a tree with a single leaf page. */ - WT_INTL_INDEX_GET(session, root, pindex); - if (pindex->entries != 1) /* > 1 child page, switch */ - return (true); - - first = pindex->index[0]; - if (first->state != WT_REF_MEM) /* no child page, ignore */ - return (false); - - /* - * We're reaching down into the page without a hazard pointer, but - * that's OK because we know that no-eviction is set and so the page - * cannot disappear. - */ - child = first->page; - if (child->type != WT_PAGE_ROW_LEAF) /* not a single leaf page */ - return (true); - - return (child->memory_footprint > maxsize); + WT_BTREE *btree; + WT_PAGE *child, *root; + WT_PAGE_INDEX *pindex; + WT_REF *first; + + btree = S2BT(session); + root = btree->root.page; + + /* Check for a non-existent tree. */ + if (root == NULL) + return (false); + + /* A tree that can be evicted always requires a switch. */ + if (btree->evict_disabled == 0) + return (true); + + /* Check for a tree with a single leaf page. */ + WT_INTL_INDEX_GET(session, root, pindex); + if (pindex->entries != 1) /* > 1 child page, switch */ + return (true); + + first = pindex->index[0]; + if (first->state != WT_REF_MEM) /* no child page, ignore */ + return (false); + + /* + * We're reaching down into the page without a hazard pointer, but that's OK because we know + * that no-eviction is set and so the page cannot disappear. + */ + child = first->page; + if (child->type != WT_PAGE_ROW_LEAF) /* not a single leaf page */ + return (true); + + return (child->memory_footprint > maxsize); } /* * __wt_split_descent_race -- - * Return if we raced with an internal page split when descending the tree. + * Return if we raced with an internal page split when descending the tree. */ static inline bool -__wt_split_descent_race( - WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex) +__wt_split_descent_race(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex) { - WT_PAGE_INDEX *pindex; - - /* No test when starting the descent (there's no home to check). */ - if (__wt_ref_is_root(ref)) - return (false); - - /* - * A place to hang this comment... - * - * There's a page-split race when we walk the tree: if we're splitting - * an internal page into its parent, we update the parent's page index - * before updating the split page's page index, and it's not an atomic - * update. A thread can read the parent page's original page index and - * then read the split page's replacement index. - * - * For example, imagine a search descending the tree. - * - * Because internal page splits work by truncating the original page to - * the initial part of the original page, the result of this race is we - * will have a search key that points past the end of the current page. - * This is only an issue when we search past the end of the page, if we - * find a WT_REF in the page with the namespace we're searching for, we - * don't care if the WT_REF moved or not while we were searching, we - * have the correct page. - * - * For example, imagine an internal page with 3 child pages, with the - * namespaces a-f, g-h and i-j; the first child page splits. The parent - * starts out with the following page-index: - * - * | ... | a | g | i | ... | - * - * which changes to this: - * - * | ... | a | c | e | g | i | ... | - * - * The child starts out with the following page-index: - * - * | a | b | c | d | e | f | - * - * which changes to this: - * - * | a | b | - * - * The thread searches the original parent page index for the key "cat", - * it couples to the "a" child page; if it uses the replacement child - * page index, it will search past the end of the page and couple to the - * "b" page, which is wrong. - * - * To detect the problem, we remember the parent page's page index used - * to descend the tree. Whenever we search past the end of a page, we - * check to see if the parent's page index has changed since our use of - * it during descent. As the problem only appears if we read the split - * page's replacement index, the parent page's index must already have - * changed, ensuring we detect the problem. - * - * It's possible for the opposite race to happen (a thread could read - * the parent page's replacement page index and then read the split - * page's original index). This isn't a problem because internal splits - * work by truncating the split page, so the split page search is for - * content the split page retains after the split, and we ignore this - * race. - * - * This code is a general purpose check for a descent race and we call - * it in other cases, for example, a cursor traversing backwards through - * the tree. - * - * Presumably we acquired a page index on the child page before calling - * this code, don't re-order that acquisition with this check. - */ - WT_BARRIER(); - WT_INTL_INDEX_GET(session, ref->home, pindex); - return (pindex != saved_pindex); + WT_PAGE_INDEX *pindex; + + /* No test when starting the descent (there's no home to check). */ + if (__wt_ref_is_root(ref)) + return (false); + + /* + * A place to hang this comment... + * + * There's a page-split race when we walk the tree: if we're splitting + * an internal page into its parent, we update the parent's page index + * before updating the split page's page index, and it's not an atomic + * update. A thread can read the parent page's original page index and + * then read the split page's replacement index. + * + * For example, imagine a search descending the tree. + * + * Because internal page splits work by truncating the original page to + * the initial part of the original page, the result of this race is we + * will have a search key that points past the end of the current page. + * This is only an issue when we search past the end of the page, if we + * find a WT_REF in the page with the namespace we're searching for, we + * don't care if the WT_REF moved or not while we were searching, we + * have the correct page. + * + * For example, imagine an internal page with 3 child pages, with the + * namespaces a-f, g-h and i-j; the first child page splits. The parent + * starts out with the following page-index: + * + * | ... | a | g | i | ... | + * + * which changes to this: + * + * | ... | a | c | e | g | i | ... | + * + * The child starts out with the following page-index: + * + * | a | b | c | d | e | f | + * + * which changes to this: + * + * | a | b | + * + * The thread searches the original parent page index for the key "cat", + * it couples to the "a" child page; if it uses the replacement child + * page index, it will search past the end of the page and couple to the + * "b" page, which is wrong. + * + * To detect the problem, we remember the parent page's page index used + * to descend the tree. Whenever we search past the end of a page, we + * check to see if the parent's page index has changed since our use of + * it during descent. As the problem only appears if we read the split + * page's replacement index, the parent page's index must already have + * changed, ensuring we detect the problem. + * + * It's possible for the opposite race to happen (a thread could read + * the parent page's replacement page index and then read the split + * page's original index). This isn't a problem because internal splits + * work by truncating the split page, so the split page search is for + * content the split page retains after the split, and we ignore this + * race. + * + * This code is a general purpose check for a descent race and we call + * it in other cases, for example, a cursor traversing backwards through + * the tree. + * + * Presumably we acquired a page index on the child page before calling + * this code, don't re-order that acquisition with this check. + */ + WT_BARRIER(); + WT_INTL_INDEX_GET(session, ref->home, pindex); + return (pindex != saved_pindex); } /* * __wt_page_swap_func -- - * Swap one page's hazard pointer for another one when hazard pointer - * coupling up/down the tree. + * Swap one page's hazard pointer for another one when hazard pointer coupling up/down the tree. */ static inline int -__wt_page_swap_func( - WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags +__wt_page_swap_func(WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags #ifdef HAVE_DIAGNOSTIC - , const char *func, int line + , + const char *func, int line #endif - ) + ) { - WT_DECL_RET; - bool acquired; - - /* - * This function is here to simplify the error handling during hazard - * pointer coupling so we never leave a hazard pointer dangling. The - * assumption is we're holding a hazard pointer on "held", and want to - * acquire a hazard pointer on "want", releasing the hazard pointer on - * "held" when we're done. - * - * When walking the tree, we sometimes swap to the same page. Fast-path - * that to avoid thinking about error handling. - */ - if (held == want) - return (0); - - /* Get the wanted page. */ - ret = __wt_page_in_func(session, want, flags + WT_DECL_RET; + bool acquired; + + /* + * This function is here to simplify the error handling during hazard + * pointer coupling so we never leave a hazard pointer dangling. The + * assumption is we're holding a hazard pointer on "held", and want to + * acquire a hazard pointer on "want", releasing the hazard pointer on + * "held" when we're done. + * + * When walking the tree, we sometimes swap to the same page. Fast-path + * that to avoid thinking about error handling. + */ + if (held == want) + return (0); + + /* Get the wanted page. */ + ret = __wt_page_in_func(session, want, flags #ifdef HAVE_DIAGNOSTIC - , func, line + , + func, line #endif - ); - - /* - * Expected failures: page not found or restart. Our callers list the - * errors they're expecting to handle. - */ - if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND) - return (WT_NOTFOUND); - if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART) - return (WT_RESTART); - - /* Discard the original held page on either success or error. */ - acquired = ret == 0; - WT_TRET(__wt_page_release(session, held, flags)); - - /* Fast-path expected success. */ - if (ret == 0) - return (0); - - /* - * If there was an error at any point that our caller isn't prepared to - * handle, discard any page we acquired. - */ - if (acquired) - WT_TRET(__wt_page_release(session, want, flags)); - - /* - * If we're returning an error, don't let it be one our caller expects - * to handle as returned by page-in: the expectation includes the held - * page not having been released, and that's not the case. - */ - if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND) - WT_RET_MSG(session, - EINVAL, "page-release WT_NOTFOUND error mapped to EINVAL"); - if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART) - WT_RET_MSG(session, - EINVAL, "page-release WT_RESTART error mapped to EINVAL"); - - return (ret); + ); + + /* + * Expected failures: page not found or restart. Our callers list the errors they're expecting + * to handle. + */ + if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND) + return (WT_NOTFOUND); + if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART) + return (WT_RESTART); + + /* Discard the original held page on either success or error. */ + acquired = ret == 0; + WT_TRET(__wt_page_release(session, held, flags)); + + /* Fast-path expected success. */ + if (ret == 0) + return (0); + + /* + * If there was an error at any point that our caller isn't prepared to handle, discard any page + * we acquired. + */ + if (acquired) + WT_TRET(__wt_page_release(session, want, flags)); + + /* + * If we're returning an error, don't let it be one our caller expects to handle as returned by + * page-in: the expectation includes the held page not having been released, and that's not the + * case. + */ + if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND) + WT_RET_MSG(session, EINVAL, "page-release WT_NOTFOUND error mapped to EINVAL"); + if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART) + WT_RET_MSG(session, EINVAL, "page-release WT_RESTART error mapped to EINVAL"); + + return (ret); } diff --git a/src/third_party/wiredtiger/src/include/btree_cmp.i b/src/third_party/wiredtiger/src/include/btree_cmp.i index 70e3e376e8d..2f0596bed13 100644 --- a/src/third_party/wiredtiger/src/include/btree_cmp.i +++ b/src/third_party/wiredtiger/src/include/btree_cmp.i @@ -15,311 +15,279 @@ #if defined(HAVE_ARM_NEON_INTRIN_H) #include <arm_neon.h> #endif - /* 16B alignment */ -#define WT_ALIGNED_16(p) (((uintptr_t)(p) & 0x0f) == 0) -#define WT_VECTOR_SIZE 16 /* chunk size */ +/* 16B alignment */ +#define WT_ALIGNED_16(p) (((uintptr_t)(p)&0x0f) == 0) +#define WT_VECTOR_SIZE 16 /* chunk size */ /* * __wt_lex_compare -- - * Lexicographic comparison routine. - * - * Returns: - * < 0 if user_item is lexicographically < tree_item - * = 0 if user_item is lexicographically = tree_item - * > 0 if user_item is lexicographically > tree_item - * - * We use the names "user" and "tree" so it's clear in the btree code which - * the application is looking at when we call its comparison function. + * Lexicographic comparison routine. Returns: < 0 if user_item is lexicographically < tree_item + * = 0 if user_item is lexicographically = tree_item > 0 if user_item is lexicographically > + * tree_item We use the names "user" and "tree" so it's clear in the btree code which the + * application is looking at when we call its comparison function. */ static inline int __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item) { - size_t len, usz, tsz; - const uint8_t *userp, *treep; + size_t len, usz, tsz; + const uint8_t *userp, *treep; - usz = user_item->size; - tsz = tree_item->size; - len = WT_MIN(usz, tsz); + usz = user_item->size; + tsz = tree_item->size; + len = WT_MIN(usz, tsz); - userp = user_item->data; - treep = tree_item->data; + userp = user_item->data; + treep = tree_item->data; #ifdef HAVE_X86INTRIN_H - /* Use vector instructions if we'll execute at least 2 of them. */ - if (len >= WT_VECTOR_SIZE * 2) { - size_t remain; - __m128i res_eq, u, t; + /* Use vector instructions if we'll execute at least 2 of them. */ + if (len >= WT_VECTOR_SIZE * 2) { + size_t remain; + __m128i res_eq, u, t; - remain = len % WT_VECTOR_SIZE; - len -= remain; - if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep)) - for (; len > 0; - len -= WT_VECTOR_SIZE, - userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { - u = _mm_load_si128((const __m128i *)userp); - t = _mm_load_si128((const __m128i *)treep); - res_eq = _mm_cmpeq_epi8(u, t); - if (_mm_movemask_epi8(res_eq) != 65535) - break; - } - else - for (; len > 0; - len -= WT_VECTOR_SIZE, - userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { - u = _mm_loadu_si128((const __m128i *)userp); - t = _mm_loadu_si128((const __m128i *)treep); - res_eq = _mm_cmpeq_epi8(u, t); - if (_mm_movemask_epi8(res_eq) != 65535) - break; - } - len += remain; - } + remain = len % WT_VECTOR_SIZE; + len -= remain; + if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep)) + for (; len > 0; + len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { + u = _mm_load_si128((const __m128i *)userp); + t = _mm_load_si128((const __m128i *)treep); + res_eq = _mm_cmpeq_epi8(u, t); + if (_mm_movemask_epi8(res_eq) != 65535) + break; + } + else + for (; len > 0; + len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { + u = _mm_loadu_si128((const __m128i *)userp); + t = _mm_loadu_si128((const __m128i *)treep); + res_eq = _mm_cmpeq_epi8(u, t); + if (_mm_movemask_epi8(res_eq) != 65535) + break; + } + len += remain; + } #elif defined(HAVE_ARM_NEON_INTRIN_H) - /* Use vector instructions if we'll execute at least 1 of them. */ - if (len >= WT_VECTOR_SIZE) { - size_t remain; - uint8x16_t res_eq, u, t; - remain = len % WT_VECTOR_SIZE; - len -= remain; - for (; len > 0; - len -= WT_VECTOR_SIZE, - userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { - u = vld1q_u8(userp); - t = vld1q_u8(treep); - res_eq = vceqq_u8(u, t); - if (vminvq_u8(res_eq) != 255) - break; - } - len += remain; - } + /* Use vector instructions if we'll execute at least 1 of them. */ + if (len >= WT_VECTOR_SIZE) { + size_t remain; + uint8x16_t res_eq, u, t; + remain = len % WT_VECTOR_SIZE; + len -= remain; + for (; len > 0; len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { + u = vld1q_u8(userp); + t = vld1q_u8(treep); + res_eq = vceqq_u8(u, t); + if (vminvq_u8(res_eq) != 255) + break; + } + len += remain; + } #endif - /* - * Use the non-vectorized version for the remaining bytes and for the - * small key sizes. - */ - for (; len > 0; --len, ++userp, ++treep) - if (*userp != *treep) - return (*userp < *treep ? -1 : 1); + /* + * Use the non-vectorized version for the remaining bytes and for the small key sizes. + */ + for (; len > 0; --len, ++userp, ++treep) + if (*userp != *treep) + return (*userp < *treep ? -1 : 1); - /* Contents are equal up to the smallest length. */ - return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1); + /* Contents are equal up to the smallest length. */ + return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1); } /* * __wt_compare -- - * The same as __wt_lex_compare, but using the application's collator - * function when configured. + * The same as __wt_lex_compare, but using the application's collator function when configured. */ static inline int -__wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator, - const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp) +__wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator, const WT_ITEM *user_item, + const WT_ITEM *tree_item, int *cmpp) { - if (collator == NULL) { - *cmpp = __wt_lex_compare(user_item, tree_item); - return (0); - } - return (collator->compare( - collator, &session->iface, user_item, tree_item, cmpp)); + if (collator == NULL) { + *cmpp = __wt_lex_compare(user_item, tree_item); + return (0); + } + return (collator->compare(collator, &session->iface, user_item, tree_item, cmpp)); } /* * __wt_lex_compare_skip -- - * Lexicographic comparison routine, skipping leading bytes. - * - * Returns: - * < 0 if user_item is lexicographically < tree_item - * = 0 if user_item is lexicographically = tree_item - * > 0 if user_item is lexicographically > tree_item - * - * We use the names "user" and "tree" so it's clear in the btree code which - * the application is looking at when we call its comparison function. + * Lexicographic comparison routine, skipping leading bytes. Returns: < 0 if user_item is + * lexicographically < tree_item = 0 if user_item is lexicographically = tree_item > 0 if + * user_item is lexicographically > tree_item We use the names "user" and "tree" so it's clear + * in the btree code which the application is looking at when we call its comparison function. */ static inline int -__wt_lex_compare_skip( - const WT_ITEM *user_item, const WT_ITEM *tree_item, size_t *matchp) +__wt_lex_compare_skip(const WT_ITEM *user_item, const WT_ITEM *tree_item, size_t *matchp) { - size_t len, usz, tsz; - const uint8_t *userp, *treep; + size_t len, usz, tsz; + const uint8_t *userp, *treep; - usz = user_item->size; - tsz = tree_item->size; - len = WT_MIN(usz, tsz) - *matchp; + usz = user_item->size; + tsz = tree_item->size; + len = WT_MIN(usz, tsz) - *matchp; - userp = (const uint8_t *)user_item->data + *matchp; - treep = (const uint8_t *)tree_item->data + *matchp; + userp = (const uint8_t *)user_item->data + *matchp; + treep = (const uint8_t *)tree_item->data + *matchp; #ifdef HAVE_X86INTRIN_H - /* Use vector instructions if we'll execute at least 2 of them. */ - if (len >= WT_VECTOR_SIZE * 2) { - size_t remain; - __m128i res_eq, u, t; + /* Use vector instructions if we'll execute at least 2 of them. */ + if (len >= WT_VECTOR_SIZE * 2) { + size_t remain; + __m128i res_eq, u, t; - remain = len % WT_VECTOR_SIZE; - len -= remain; - if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep)) - for (; len > 0; - len -= WT_VECTOR_SIZE, - userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, - *matchp += WT_VECTOR_SIZE) { - u = _mm_load_si128((const __m128i *)userp); - t = _mm_load_si128((const __m128i *)treep); - res_eq = _mm_cmpeq_epi8(u, t); - if (_mm_movemask_epi8(res_eq) != 65535) - break; - } - else - for (; len > 0; - len -= WT_VECTOR_SIZE, - userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, - *matchp += WT_VECTOR_SIZE) { - u = _mm_loadu_si128((const __m128i *)userp); - t = _mm_loadu_si128((const __m128i *)treep); - res_eq = _mm_cmpeq_epi8(u, t); - if (_mm_movemask_epi8(res_eq) != 65535) - break; - } - len += remain; - } + remain = len % WT_VECTOR_SIZE; + len -= remain; + if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep)) + for (; len > 0; len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, + *matchp += WT_VECTOR_SIZE) { + u = _mm_load_si128((const __m128i *)userp); + t = _mm_load_si128((const __m128i *)treep); + res_eq = _mm_cmpeq_epi8(u, t); + if (_mm_movemask_epi8(res_eq) != 65535) + break; + } + else + for (; len > 0; len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, + *matchp += WT_VECTOR_SIZE) { + u = _mm_loadu_si128((const __m128i *)userp); + t = _mm_loadu_si128((const __m128i *)treep); + res_eq = _mm_cmpeq_epi8(u, t); + if (_mm_movemask_epi8(res_eq) != 65535) + break; + } + len += remain; + } #elif defined(HAVE_ARM_NEON_INTRIN_H) - /* Use vector instructions if we'll execute at least 1 of them. */ - if (len >= WT_VECTOR_SIZE) { - size_t remain; - uint8x16_t res_eq, u, t; - remain = len % WT_VECTOR_SIZE; - len -= remain; - if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep)) - for (; len > 0; - len -= WT_VECTOR_SIZE, - userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, - *matchp += WT_VECTOR_SIZE) { - u = vld1q_u8(userp); - t = vld1q_u8(treep); - res_eq = vceqq_u8(u, t); - if (vminvq_u8(res_eq) != 255) - break; - } - len += remain; - } + /* Use vector instructions if we'll execute at least 1 of them. */ + if (len >= WT_VECTOR_SIZE) { + size_t remain; + uint8x16_t res_eq, u, t; + remain = len % WT_VECTOR_SIZE; + len -= remain; + if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep)) + for (; len > 0; len -= WT_VECTOR_SIZE, userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, + *matchp += WT_VECTOR_SIZE) { + u = vld1q_u8(userp); + t = vld1q_u8(treep); + res_eq = vceqq_u8(u, t); + if (vminvq_u8(res_eq) != 255) + break; + } + len += remain; + } #endif - /* - * Use the non-vectorized version for the remaining bytes and for the - * small key sizes. - */ - for (; len > 0; --len, ++userp, ++treep, ++*matchp) - if (*userp != *treep) - return (*userp < *treep ? -1 : 1); + /* + * Use the non-vectorized version for the remaining bytes and for the small key sizes. + */ + for (; len > 0; --len, ++userp, ++treep, ++*matchp) + if (*userp != *treep) + return (*userp < *treep ? -1 : 1); - /* Contents are equal up to the smallest length. */ - return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1); + /* Contents are equal up to the smallest length. */ + return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1); } /* * __wt_compare_skip -- - * The same as __wt_lex_compare_skip, but using the application's collator - * function when configured. + * The same as __wt_lex_compare_skip, but using the application's collator function when + * configured. */ static inline int -__wt_compare_skip(WT_SESSION_IMPL *session, WT_COLLATOR *collator, - const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp, - size_t *matchp) +__wt_compare_skip(WT_SESSION_IMPL *session, WT_COLLATOR *collator, const WT_ITEM *user_item, + const WT_ITEM *tree_item, int *cmpp, size_t *matchp) { - if (collator == NULL) { - *cmpp = __wt_lex_compare_skip(user_item, tree_item, matchp); - return (0); - } - return (collator->compare( - collator, &session->iface, user_item, tree_item, cmpp)); + if (collator == NULL) { + *cmpp = __wt_lex_compare_skip(user_item, tree_item, matchp); + return (0); + } + return (collator->compare(collator, &session->iface, user_item, tree_item, cmpp)); } /* * __wt_lex_compare_short -- - * Lexicographic comparison routine for short keys. - * - * Returns: - * < 0 if user_item is lexicographically < tree_item - * = 0 if user_item is lexicographically = tree_item - * > 0 if user_item is lexicographically > tree_item - * - * We use the names "user" and "tree" so it's clear in the btree code which - * the application is looking at when we call its comparison function. + * Lexicographic comparison routine for short keys. Returns: < 0 if user_item is + * lexicographically < tree_item = 0 if user_item is lexicographically = tree_item > 0 if + * user_item is lexicographically > tree_item We use the names "user" and "tree" so it's clear + * in the btree code which the application is looking at when we call its comparison function. */ static inline int __wt_lex_compare_short(const WT_ITEM *user_item, const WT_ITEM *tree_item) { - size_t len, usz, tsz; - const uint8_t *userp, *treep; + size_t len, usz, tsz; + const uint8_t *userp, *treep; - usz = user_item->size; - tsz = tree_item->size; - len = WT_MIN(usz, tsz); + usz = user_item->size; + tsz = tree_item->size; + len = WT_MIN(usz, tsz); - userp = user_item->data; - treep = tree_item->data; + userp = user_item->data; + treep = tree_item->data; - /* - * The maximum packed uint64_t is 9B, catch row-store objects using - * packed record numbers as keys. - * - * Don't use a #define to compress this case statement: gcc7 complains - * about implicit fallthrough and doesn't support explicit fallthrough - * comments in macros. - */ -#define WT_COMPARE_SHORT_MAXLEN 9 - switch (len) { - case 9: - if (*userp != *treep) - break; - ++userp; - ++treep; - /* FALLTHROUGH */ - case 8: - if (*userp != *treep) - break; - ++userp; - ++treep; - /* FALLTHROUGH */ - case 7: - if (*userp != *treep) - break; - ++userp; - ++treep; - /* FALLTHROUGH */ - case 6: - if (*userp != *treep) - break; - ++userp; - ++treep; - /* FALLTHROUGH */ - case 5: - if (*userp != *treep) - break; - ++userp; - ++treep; - /* FALLTHROUGH */ - case 4: - if (*userp != *treep) - break; - ++userp; - ++treep; - /* FALLTHROUGH */ - case 3: - if (*userp != *treep) - break; - ++userp; - ++treep; - /* FALLTHROUGH */ - case 2: - if (*userp != *treep) - break; - ++userp; - ++treep; - /* FALLTHROUGH */ - case 1: - if (*userp != *treep) - break; +/* + * The maximum packed uint64_t is 9B, catch row-store objects using + * packed record numbers as keys. + * + * Don't use a #define to compress this case statement: gcc7 complains + * about implicit fallthrough and doesn't support explicit fallthrough + * comments in macros. + */ +#define WT_COMPARE_SHORT_MAXLEN 9 + switch (len) { + case 9: + if (*userp != *treep) + break; + ++userp; + ++treep; + /* FALLTHROUGH */ + case 8: + if (*userp != *treep) + break; + ++userp; + ++treep; + /* FALLTHROUGH */ + case 7: + if (*userp != *treep) + break; + ++userp; + ++treep; + /* FALLTHROUGH */ + case 6: + if (*userp != *treep) + break; + ++userp; + ++treep; + /* FALLTHROUGH */ + case 5: + if (*userp != *treep) + break; + ++userp; + ++treep; + /* FALLTHROUGH */ + case 4: + if (*userp != *treep) + break; + ++userp; + ++treep; + /* FALLTHROUGH */ + case 3: + if (*userp != *treep) + break; + ++userp; + ++treep; + /* FALLTHROUGH */ + case 2: + if (*userp != *treep) + break; + ++userp; + ++treep; + /* FALLTHROUGH */ + case 1: + if (*userp != *treep) + break; - /* Contents are equal up to the smallest length. */ - return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1); - } - return (*userp < *treep ? -1 : 1); + /* Contents are equal up to the smallest length. */ + return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1); + } + return (*userp < *treep ? -1 : 1); } diff --git a/src/third_party/wiredtiger/src/include/buf.i b/src/third_party/wiredtiger/src/include/buf.i index 138288c2075..fbd771ec711 100644 --- a/src/third_party/wiredtiger/src/include/buf.i +++ b/src/third_party/wiredtiger/src/include/buf.i @@ -8,126 +8,120 @@ /* * __wt_buf_grow -- - * Grow a buffer that may be in-use, and ensure that all data is local to - * the buffer. + * Grow a buffer that may be in-use, and ensure that all data is local to the buffer. */ static inline int __wt_buf_grow(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { - return (size > buf->memsize || !WT_DATA_IN_ITEM(buf) ? - __wt_buf_grow_worker(session, buf, size) : 0); + return ( + size > buf->memsize || !WT_DATA_IN_ITEM(buf) ? __wt_buf_grow_worker(session, buf, size) : 0); } /* * __wt_buf_extend -- - * Grow a buffer that's currently in-use. + * Grow a buffer that's currently in-use. */ static inline int __wt_buf_extend(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { - /* - * The difference between __wt_buf_grow and __wt_buf_extend is that the - * latter is expected to be called repeatedly for the same buffer, and - * so grows the buffer exponentially to avoid repeated costly calls to - * realloc. - */ - return (size > buf->memsize ? - __wt_buf_grow(session, buf, WT_MAX(size, 2 * buf->memsize)) : 0); + /* + * The difference between __wt_buf_grow and __wt_buf_extend is that the latter is expected to be + * called repeatedly for the same buffer, and so grows the buffer exponentially to avoid + * repeated costly calls to realloc. + */ + return (size > buf->memsize ? __wt_buf_grow(session, buf, WT_MAX(size, 2 * buf->memsize)) : 0); } /* * __wt_buf_init -- - * Create an empty buffer at a specific size. + * Create an empty buffer at a specific size. */ static inline int __wt_buf_init(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { - /* - * The buffer grow function does what we need, but anticipates data - * referenced by the buffer. Avoid any data copy by setting data to - * reference the buffer's allocated memory, and clearing it. - */ - buf->data = buf->mem; - buf->size = 0; - return (__wt_buf_grow(session, buf, size)); + /* + * The buffer grow function does what we need, but anticipates data referenced by the buffer. + * Avoid any data copy by setting data to reference the buffer's allocated memory, and clearing + * it. + */ + buf->data = buf->mem; + buf->size = 0; + return (__wt_buf_grow(session, buf, size)); } /* * __wt_buf_initsize -- - * Create an empty buffer at a specific size, and set the data length. + * Create an empty buffer at a specific size, and set the data length. */ static inline int __wt_buf_initsize(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { - WT_RET(__wt_buf_init(session, buf, size)); + WT_RET(__wt_buf_init(session, buf, size)); - buf->size = size; /* Set the data length. */ + buf->size = size; /* Set the data length. */ - return (0); + return (0); } /* * __wt_buf_set -- - * Set the contents of the buffer. + * Set the contents of the buffer. */ static inline int -__wt_buf_set( - WT_SESSION_IMPL *session, WT_ITEM *buf, const void *data, size_t size) +__wt_buf_set(WT_SESSION_IMPL *session, WT_ITEM *buf, const void *data, size_t size) { - /* - * The buffer grow function does what we need, but expects the data to - * be referenced by the buffer. If we're copying data from outside the - * buffer, set it up so it makes sense to the buffer grow function. (No - * test needed, this works if WT_ITEM.data is already set to "data".) - */ - buf->data = data; - buf->size = size; - return (__wt_buf_grow(session, buf, size)); + /* + * The buffer grow function does what we need, but expects the data to be referenced by the + * buffer. If we're copying data from outside the buffer, set it up so it makes sense to the + * buffer grow function. (No test needed, this works if WT_ITEM.data is already set to "data".) + */ + buf->data = data; + buf->size = size; + return (__wt_buf_grow(session, buf, size)); } /* * __wt_buf_setstr -- - * Set the contents of the buffer to a NUL-terminated string. + * Set the contents of the buffer to a NUL-terminated string. */ static inline int __wt_buf_setstr(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *s) { - return (__wt_buf_set(session, buf, s, strlen(s) + 1)); + return (__wt_buf_set(session, buf, s, strlen(s) + 1)); } /* * __wt_buf_free -- - * Free a buffer. + * Free a buffer. */ static inline void __wt_buf_free(WT_SESSION_IMPL *session, WT_ITEM *buf) { - __wt_free(session, buf->mem); + __wt_free(session, buf->mem); - memset(buf, 0, sizeof(WT_ITEM)); + memset(buf, 0, sizeof(WT_ITEM)); } /* * __wt_scr_free -- - * Release a scratch buffer. + * Release a scratch buffer. */ static inline void __wt_scr_free(WT_SESSION_IMPL *session, WT_ITEM **bufp) { - WT_ITEM *buf; + WT_ITEM *buf; - if ((buf = *bufp) == NULL) - return; - *bufp = NULL; + if ((buf = *bufp) == NULL) + return; + *bufp = NULL; - if (session->scratch_cached + buf->memsize >= - S2C(session)->session_scratch_max) { - __wt_free(session, buf->mem); - buf->memsize = 0; - } else - session->scratch_cached += buf->memsize; + if (session->scratch_cached + buf->memsize >= S2C(session)->session_scratch_max) { + __wt_free(session, buf->mem); + buf->memsize = 0; + } else + session->scratch_cached += buf->memsize; - buf->data = NULL; - buf->size = 0; - F_CLR(buf, WT_ITEM_INUSE); + buf->data = NULL; + buf->size = 0; + F_CLR(buf, WT_ITEM_INUSE); } diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index 46718d9aba2..881733a88de 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -7,294 +7,290 @@ */ /* - * Helper: in order to read without any calls to eviction, we have to ignore - * the cache size and disable splits. + * Helper: in order to read without any calls to eviction, we have to ignore the cache size and + * disable splits. */ -#define WT_READ_NO_EVICT (WT_READ_IGNORE_CACHE_SIZE | WT_READ_NO_SPLIT) +#define WT_READ_NO_EVICT (WT_READ_IGNORE_CACHE_SIZE | WT_READ_NO_SPLIT) /* - * Tuning constants: I hesitate to call this tuning, but we want to review some - * number of pages from each file's in-memory tree for each page we evict. + * Tuning constants: I hesitate to call this tuning, but we want to review some number of pages from + * each file's in-memory tree for each page we evict. */ -#define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */ -#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */ -#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */ +#define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */ +#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */ +#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */ /* * WT_EVICT_ENTRY -- * Encapsulation of an eviction candidate. */ struct __wt_evict_entry { - WT_BTREE *btree; /* Enclosing btree object */ - WT_REF *ref; /* Page to flush/evict */ - uint64_t score; /* Relative eviction priority */ + WT_BTREE *btree; /* Enclosing btree object */ + WT_REF *ref; /* Page to flush/evict */ + uint64_t score; /* Relative eviction priority */ }; -#define WT_EVICT_QUEUE_MAX 3 /* Two ordinary queues plus urgent */ -#define WT_EVICT_URGENT_QUEUE 2 /* Urgent queue index */ +#define WT_EVICT_QUEUE_MAX 3 /* Two ordinary queues plus urgent */ +#define WT_EVICT_URGENT_QUEUE 2 /* Urgent queue index */ /* * WT_EVICT_QUEUE -- * Encapsulation of an eviction candidate queue. */ struct __wt_evict_queue { - WT_SPINLOCK evict_lock; /* Eviction LRU queue */ - WT_EVICT_ENTRY *evict_queue; /* LRU pages being tracked */ - WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */ - uint32_t evict_candidates; /* LRU list pages to evict */ - uint32_t evict_entries; /* LRU entries in the queue */ - volatile uint32_t evict_max; /* LRU maximum eviction slot used */ + WT_SPINLOCK evict_lock; /* Eviction LRU queue */ + WT_EVICT_ENTRY *evict_queue; /* LRU pages being tracked */ + WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */ + uint32_t evict_candidates; /* LRU list pages to evict */ + uint32_t evict_entries; /* LRU entries in the queue */ + volatile uint32_t evict_max; /* LRU maximum eviction slot used */ }; /* Cache operations. */ typedef enum __wt_cache_op { - WT_SYNC_CHECKPOINT, - WT_SYNC_CLOSE, - WT_SYNC_DISCARD, - WT_SYNC_WRITE_LEAVES + WT_SYNC_CHECKPOINT, + WT_SYNC_CLOSE, + WT_SYNC_DISCARD, + WT_SYNC_WRITE_LEAVES } WT_CACHE_OP; -#define WT_LAS_FILE_MIN (100 * WT_MEGABYTE) -#define WT_LAS_NUM_SESSIONS 5 -#define WT_LAS_SWEEP_ENTRIES (20 * WT_THOUSAND) -#define WT_LAS_SWEEP_SEC 2 +#define WT_LAS_FILE_MIN (100 * WT_MEGABYTE) +#define WT_LAS_NUM_SESSIONS 5 +#define WT_LAS_SWEEP_ENTRIES (20 * WT_THOUSAND) +#define WT_LAS_SWEEP_SEC 2 /* * WiredTiger cache structure. */ struct __wt_cache { - /* - * Different threads read/write pages to/from the cache and create pages - * in the cache, so we cannot know precisely how much memory is in use - * at any specific time. However, even though the values don't have to - * be exact, they can't be garbage, we track what comes in and what goes - * out and calculate the difference as needed. - */ - uint64_t bytes_dirty_intl; /* Bytes/pages currently dirty */ - uint64_t pages_dirty_intl; - uint64_t bytes_dirty_leaf; - uint64_t bytes_dirty_total; - uint64_t pages_dirty_leaf; - uint64_t bytes_evict; /* Bytes/pages discarded by eviction */ - uint64_t pages_evicted; - uint64_t bytes_image; /* Bytes of disk images */ - uint64_t bytes_inmem; /* Bytes/pages in memory */ - uint64_t pages_inmem; - uint64_t bytes_internal; /* Bytes of internal pages */ - uint64_t bytes_read; /* Bytes read into memory */ - uint64_t bytes_written; - - uint64_t bytes_lookaside; /* Lookaside bytes inmem */ - - volatile uint64_t eviction_progress; /* Eviction progress count */ - uint64_t last_eviction_progress;/* Tracked eviction progress */ - - uint64_t app_waits; /* User threads waited for cache */ - uint64_t app_evicts; /* Pages evicted by user threads */ - - uint64_t evict_max_page_size; /* Largest page seen at eviction */ - struct timespec stuck_time; /* Stuck time */ - - /* - * Read information. - */ - uint64_t read_gen; /* Current page read generation */ - uint64_t read_gen_oldest; /* Oldest read generation the eviction - * server saw in its last queue load */ - uint64_t evict_pass_gen; /* Number of eviction passes */ - - /* - * Eviction thread information. - */ - WT_CONDVAR *evict_cond; /* Eviction server condition */ - WT_SPINLOCK evict_walk_lock; /* Eviction walk location */ - - /* - * Eviction threshold percentages use double type to allow for - * specifying percentages less than one. - */ - double eviction_dirty_target; /* Percent to allow dirty */ - double eviction_dirty_trigger; /* Percent to trigger dirty eviction */ - double eviction_trigger; /* Percent to trigger eviction */ - double eviction_target; /* Percent to end eviction */ - - double eviction_checkpoint_target;/* Percent to reduce dirty - to during checkpoint scrubs */ - double eviction_scrub_target; /* Current scrub target */ - - u_int overhead_pct; /* Cache percent adjustment */ - uint64_t cache_max_wait_us; /* Maximum time an operation waits for - * space in cache */ - - /* - * Eviction thread tuning information. - */ - uint32_t evict_tune_datapts_needed; /* Data needed to tune */ - struct timespec evict_tune_last_action_time;/* Time of last action */ - struct timespec evict_tune_last_time; /* Time of last check */ - uint32_t evict_tune_num_points; /* Number of values tried */ - uint64_t evict_tune_progress_last; /* Progress counter */ - uint64_t evict_tune_progress_rate_max; /* Max progress rate */ - bool evict_tune_stable; /* Are we stable? */ - uint32_t evict_tune_workers_best; /* Best performing value */ - - /* - * Pass interrupt counter. - */ - volatile uint32_t pass_intr; /* Interrupt eviction pass. */ - - /* - * LRU eviction list information. - */ - WT_SPINLOCK evict_pass_lock; /* Eviction pass lock */ - WT_SESSION_IMPL *walk_session; /* Eviction pass session */ - WT_DATA_HANDLE *walk_tree; /* LRU walk current tree */ - - WT_SPINLOCK evict_queue_lock; /* Eviction current queue lock */ - WT_EVICT_QUEUE evict_queues[WT_EVICT_QUEUE_MAX]; - WT_EVICT_QUEUE *evict_current_queue; /* LRU current queue in use */ - WT_EVICT_QUEUE *evict_fill_queue; /* LRU next queue to fill. - This is usually the same as the - "other" queue but under heavy - load the eviction server will - start filling the current queue - before it switches. */ - WT_EVICT_QUEUE *evict_other_queue; /* LRU queue not in use */ - WT_EVICT_QUEUE *evict_urgent_queue; /* LRU urgent queue */ - uint32_t evict_slots; /* LRU list eviction slots */ - -#define WT_EVICT_SCORE_BUMP 10 -#define WT_EVICT_SCORE_CUTOFF 10 -#define WT_EVICT_SCORE_MAX 100 - /* - * Score of how aggressive eviction should be about selecting eviction - * candidates. If eviction is struggling to make progress, this score - * rises (up to a maximum of 100), at which point the cache is "stuck" - * and transactions will be rolled back. - */ - uint32_t evict_aggressive_score; - - /* - * Score of how often LRU queues are empty on refill. This score varies - * between 0 (if the queue hasn't been empty for a long time) and 100 - * (if the queue has been empty the last 10 times we filled up. - */ - uint32_t evict_empty_score; - - /* - * Score of how much pressure storing historical versions is having on - * eviction. This score varies between 0, if reconciliation always - * sees updates that are globally visible and hence can be discarded, - * to 100 if no updates are globally visible. - */ - int32_t evict_lookaside_score; - - /* - * Shared lookaside lock, session and cursor, used by threads accessing - * the lookaside table (other than eviction server and worker threads - * and the sweep thread, all of which have their own lookaside cursors). - */ - WT_SPINLOCK las_lock; - WT_SESSION_IMPL *las_session[WT_LAS_NUM_SESSIONS]; - bool las_session_inuse[WT_LAS_NUM_SESSIONS]; - - uint32_t las_fileid; /* Lookaside table file ID */ - uint64_t las_insert_count; /* Count of inserts to lookaside */ - uint64_t las_remove_count; /* Count of removes from lookaside */ - uint64_t las_pageid; /* Lookaside table page ID counter */ - - bool las_reader; /* Indicate an LAS reader to sweep */ - WT_RWLOCK las_sweepwalk_lock; - WT_SPINLOCK las_sweep_lock; - WT_ITEM las_sweep_key; /* Track sweep position. */ - uint32_t las_sweep_dropmin; /* Minimum btree ID in current set. */ - uint8_t *las_sweep_dropmap; /* Bitmap of dropped btree IDs. */ - uint32_t las_sweep_dropmax; /* Maximum btree ID in current set. */ - uint64_t las_sweep_max_pageid; /* Maximum page ID for sweep. */ - - uint32_t *las_dropped; /* List of dropped btree IDs. */ - size_t las_dropped_next; /* Next index into drop list. */ - size_t las_dropped_alloc; /* Allocated size of drop list. */ - - /* - * The "lookaside_activity" verbose messages are throttled to once per - * checkpoint. To accomplish this we track the checkpoint generation - * for the most recent read and write verbose messages. - */ - uint64_t las_verb_gen_read; - uint64_t las_verb_gen_write; - - /* - * Cache pool information. - */ - uint64_t cp_pass_pressure; /* Calculated pressure from this pass */ - uint64_t cp_quota; /* Maximum size for this cache */ - uint64_t cp_reserved; /* Base size for this cache */ - WT_SESSION_IMPL *cp_session; /* May be used for cache management */ - uint32_t cp_skip_count; /* Post change stabilization */ - wt_thread_t cp_tid; /* Thread ID for cache pool manager */ - /* State seen at the last pass of the shared cache manager */ - uint64_t cp_saved_app_evicts; /* User eviction count at last review */ - uint64_t cp_saved_app_waits; /* User wait count at last review */ - uint64_t cp_saved_read; /* Read count at last review */ - - /* - * Flags. - */ + /* + * Different threads read/write pages to/from the cache and create pages in the cache, so we + * cannot know precisely how much memory is in use at any specific time. However, even though + * the values don't have to be exact, they can't be garbage, we track what comes in and what + * goes out and calculate the difference as needed. + */ + uint64_t bytes_dirty_intl; /* Bytes/pages currently dirty */ + uint64_t pages_dirty_intl; + uint64_t bytes_dirty_leaf; + uint64_t bytes_dirty_total; + uint64_t pages_dirty_leaf; + uint64_t bytes_evict; /* Bytes/pages discarded by eviction */ + uint64_t pages_evicted; + uint64_t bytes_image; /* Bytes of disk images */ + uint64_t bytes_inmem; /* Bytes/pages in memory */ + uint64_t pages_inmem; + uint64_t bytes_internal; /* Bytes of internal pages */ + uint64_t bytes_read; /* Bytes read into memory */ + uint64_t bytes_written; + + uint64_t bytes_lookaside; /* Lookaside bytes inmem */ + + volatile uint64_t eviction_progress; /* Eviction progress count */ + uint64_t last_eviction_progress; /* Tracked eviction progress */ + + uint64_t app_waits; /* User threads waited for cache */ + uint64_t app_evicts; /* Pages evicted by user threads */ + + uint64_t evict_max_page_size; /* Largest page seen at eviction */ + struct timespec stuck_time; /* Stuck time */ + + /* + * Read information. + */ + uint64_t read_gen; /* Current page read generation */ + uint64_t read_gen_oldest; /* Oldest read generation the eviction + * server saw in its last queue load */ + uint64_t evict_pass_gen; /* Number of eviction passes */ + + /* + * Eviction thread information. + */ + WT_CONDVAR *evict_cond; /* Eviction server condition */ + WT_SPINLOCK evict_walk_lock; /* Eviction walk location */ + + /* + * Eviction threshold percentages use double type to allow for specifying percentages less than + * one. + */ + double eviction_dirty_target; /* Percent to allow dirty */ + double eviction_dirty_trigger; /* Percent to trigger dirty eviction */ + double eviction_trigger; /* Percent to trigger eviction */ + double eviction_target; /* Percent to end eviction */ + + double eviction_checkpoint_target; /* Percent to reduce dirty + to during checkpoint scrubs */ + double eviction_scrub_target; /* Current scrub target */ + + u_int overhead_pct; /* Cache percent adjustment */ + uint64_t cache_max_wait_us; /* Maximum time an operation waits for + * space in cache */ + + /* + * Eviction thread tuning information. + */ + uint32_t evict_tune_datapts_needed; /* Data needed to tune */ + struct timespec evict_tune_last_action_time; /* Time of last action */ + struct timespec evict_tune_last_time; /* Time of last check */ + uint32_t evict_tune_num_points; /* Number of values tried */ + uint64_t evict_tune_progress_last; /* Progress counter */ + uint64_t evict_tune_progress_rate_max; /* Max progress rate */ + bool evict_tune_stable; /* Are we stable? */ + uint32_t evict_tune_workers_best; /* Best performing value */ + + /* + * Pass interrupt counter. + */ + volatile uint32_t pass_intr; /* Interrupt eviction pass. */ + + /* + * LRU eviction list information. + */ + WT_SPINLOCK evict_pass_lock; /* Eviction pass lock */ + WT_SESSION_IMPL *walk_session; /* Eviction pass session */ + WT_DATA_HANDLE *walk_tree; /* LRU walk current tree */ + + WT_SPINLOCK evict_queue_lock; /* Eviction current queue lock */ + WT_EVICT_QUEUE evict_queues[WT_EVICT_QUEUE_MAX]; + WT_EVICT_QUEUE *evict_current_queue; /* LRU current queue in use */ + WT_EVICT_QUEUE *evict_fill_queue; /* LRU next queue to fill. + This is usually the same as the + "other" queue but under heavy + load the eviction server will + start filling the current queue + before it switches. */ + WT_EVICT_QUEUE *evict_other_queue; /* LRU queue not in use */ + WT_EVICT_QUEUE *evict_urgent_queue; /* LRU urgent queue */ + uint32_t evict_slots; /* LRU list eviction slots */ + +#define WT_EVICT_SCORE_BUMP 10 +#define WT_EVICT_SCORE_CUTOFF 10 +#define WT_EVICT_SCORE_MAX 100 + /* + * Score of how aggressive eviction should be about selecting eviction candidates. If eviction + * is struggling to make progress, this score rises (up to a maximum of 100), at which point the + * cache is "stuck" and transactions will be rolled back. + */ + uint32_t evict_aggressive_score; + + /* + * Score of how often LRU queues are empty on refill. This score varies + * between 0 (if the queue hasn't been empty for a long time) and 100 + * (if the queue has been empty the last 10 times we filled up. + */ + uint32_t evict_empty_score; + + /* + * Score of how much pressure storing historical versions is having on eviction. This score + * varies between 0, if reconciliation always sees updates that are globally visible and hence + * can be discarded, to 100 if no updates are globally visible. + */ + int32_t evict_lookaside_score; + + /* + * Shared lookaside lock, session and cursor, used by threads accessing the lookaside table + * (other than eviction server and worker threads and the sweep thread, all of which have their + * own lookaside cursors). + */ + WT_SPINLOCK las_lock; + WT_SESSION_IMPL *las_session[WT_LAS_NUM_SESSIONS]; + bool las_session_inuse[WT_LAS_NUM_SESSIONS]; + + uint32_t las_fileid; /* Lookaside table file ID */ + uint64_t las_insert_count; /* Count of inserts to lookaside */ + uint64_t las_remove_count; /* Count of removes from lookaside */ + uint64_t las_pageid; /* Lookaside table page ID counter */ + + bool las_reader; /* Indicate an LAS reader to sweep */ + WT_RWLOCK las_sweepwalk_lock; + WT_SPINLOCK las_sweep_lock; + WT_ITEM las_sweep_key; /* Track sweep position. */ + uint32_t las_sweep_dropmin; /* Minimum btree ID in current set. */ + uint8_t *las_sweep_dropmap; /* Bitmap of dropped btree IDs. */ + uint32_t las_sweep_dropmax; /* Maximum btree ID in current set. */ + uint64_t las_sweep_max_pageid; /* Maximum page ID for sweep. */ + + uint32_t *las_dropped; /* List of dropped btree IDs. */ + size_t las_dropped_next; /* Next index into drop list. */ + size_t las_dropped_alloc; /* Allocated size of drop list. */ + + /* + * The "lookaside_activity" verbose messages are throttled to once per checkpoint. To accomplish + * this we track the checkpoint generation for the most recent read and write verbose messages. + */ + uint64_t las_verb_gen_read; + uint64_t las_verb_gen_write; + + /* + * Cache pool information. + */ + uint64_t cp_pass_pressure; /* Calculated pressure from this pass */ + uint64_t cp_quota; /* Maximum size for this cache */ + uint64_t cp_reserved; /* Base size for this cache */ + WT_SESSION_IMPL *cp_session; /* May be used for cache management */ + uint32_t cp_skip_count; /* Post change stabilization */ + wt_thread_t cp_tid; /* Thread ID for cache pool manager */ + /* State seen at the last pass of the shared cache manager */ + uint64_t cp_saved_app_evicts; /* User eviction count at last review */ + uint64_t cp_saved_app_waits; /* User wait count at last review */ + uint64_t cp_saved_read; /* Read count at last review */ + +/* + * Flags. + */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CACHE_POOL_MANAGER 0x1u /* The active cache pool manager */ -#define WT_CACHE_POOL_RUN 0x2u /* Cache pool thread running */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t pool_flags; /* Cache pool flags */ +#define WT_CACHE_POOL_MANAGER 0x1u /* The active cache pool manager */ +#define WT_CACHE_POOL_RUN 0x2u /* Cache pool thread running */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t pool_flags; /* Cache pool flags */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CACHE_EVICT_CLEAN 0x001u /* Evict clean pages */ -#define WT_CACHE_EVICT_CLEAN_HARD 0x002u /* Clean % blocking app threads */ -#define WT_CACHE_EVICT_DEBUG_MODE 0x004u /* Aggressive debugging mode */ -#define WT_CACHE_EVICT_DIRTY 0x008u /* Evict dirty pages */ -#define WT_CACHE_EVICT_DIRTY_HARD 0x010u /* Dirty % blocking app threads */ -#define WT_CACHE_EVICT_LOOKASIDE 0x020u /* Try lookaside eviction */ -#define WT_CACHE_EVICT_NOKEEP 0x040u /* Don't add read pages to cache */ -#define WT_CACHE_EVICT_SCRUB 0x080u /* Scrub dirty pages */ -#define WT_CACHE_EVICT_URGENT 0x100u /* Pages are in the urgent queue */ +#define WT_CACHE_EVICT_CLEAN 0x001u /* Evict clean pages */ +#define WT_CACHE_EVICT_CLEAN_HARD 0x002u /* Clean % blocking app threads */ +#define WT_CACHE_EVICT_DEBUG_MODE 0x004u /* Aggressive debugging mode */ +#define WT_CACHE_EVICT_DIRTY 0x008u /* Evict dirty pages */ +#define WT_CACHE_EVICT_DIRTY_HARD 0x010u /* Dirty % blocking app threads */ +#define WT_CACHE_EVICT_LOOKASIDE 0x020u /* Try lookaside eviction */ +#define WT_CACHE_EVICT_NOKEEP 0x040u /* Don't add read pages to cache */ +#define WT_CACHE_EVICT_SCRUB 0x080u /* Scrub dirty pages */ +#define WT_CACHE_EVICT_URGENT 0x100u /* Pages are in the urgent queue */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ -#define WT_CACHE_EVICT_ALL (WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_DIRTY) - uint32_t flags; +#define WT_CACHE_EVICT_ALL (WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_DIRTY) + uint32_t flags; }; -#define WT_WITH_PASS_LOCK(session, op) do { \ - WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_PASS)); \ - WT_WITH_LOCK_WAIT(session, \ - &cache->evict_pass_lock, WT_SESSION_LOCKED_PASS, op); \ -} while (0) +#define WT_WITH_PASS_LOCK(session, op) \ + do { \ + WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_PASS)); \ + WT_WITH_LOCK_WAIT(session, &cache->evict_pass_lock, WT_SESSION_LOCKED_PASS, op); \ + } while (0) /* * WT_CACHE_POOL -- * A structure that represents a shared cache. */ struct __wt_cache_pool { - WT_SPINLOCK cache_pool_lock; - WT_CONDVAR *cache_pool_cond; - const char *name; - uint64_t size; - uint64_t chunk; - uint64_t quota; - uint64_t currently_used; - uint32_t refs; /* Reference count for structure. */ - /* Locked: List of connections participating in the cache pool. */ - TAILQ_HEAD(__wt_cache_pool_qh, __wt_connection_impl) cache_pool_qh; - - uint8_t pool_managed; /* Cache pool has a manager thread */ + WT_SPINLOCK cache_pool_lock; + WT_CONDVAR *cache_pool_cond; + const char *name; + uint64_t size; + uint64_t chunk; + uint64_t quota; + uint64_t currently_used; + uint32_t refs; /* Reference count for structure. */ + /* Locked: List of connections participating in the cache pool. */ + TAILQ_HEAD(__wt_cache_pool_qh, __wt_connection_impl) cache_pool_qh; + + uint8_t pool_managed; /* Cache pool has a manager thread */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CACHE_POOL_ACTIVE 0x1u /* Cache pool is active */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint8_t flags; +#define WT_CACHE_POOL_ACTIVE 0x1u /* Cache pool is active */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint8_t flags; }; /* Flags used with __wt_evict */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_EVICT_CALL_CLOSING 0x1u /* Closing connection or tree */ -#define WT_EVICT_CALL_NO_SPLIT 0x2u /* Splits not allowed */ -#define WT_EVICT_CALL_URGENT 0x4u /* Urgent eviction */ +#define WT_EVICT_CALL_CLOSING 0x1u /* Closing connection or tree */ +#define WT_EVICT_CALL_NO_SPLIT 0x2u /* Splits not allowed */ +#define WT_EVICT_CALL_URGENT 0x4u /* Urgent eviction */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i index 3ea38faee5f..a4a762eae7f 100644 --- a/src/third_party/wiredtiger/src/include/cache.i +++ b/src/third_party/wiredtiger/src/include/cache.i @@ -8,447 +8,423 @@ /* * __wt_cache_aggressive -- - * Indicate if the cache is operating in aggressive mode. + * Indicate if the cache is operating in aggressive mode. */ static inline bool __wt_cache_aggressive(WT_SESSION_IMPL *session) { - return (S2C(session)->cache->evict_aggressive_score >= - WT_EVICT_SCORE_CUTOFF); + return (S2C(session)->cache->evict_aggressive_score >= WT_EVICT_SCORE_CUTOFF); } /* * __wt_cache_read_gen -- - * Get the current read generation number. + * Get the current read generation number. */ static inline uint64_t __wt_cache_read_gen(WT_SESSION_IMPL *session) { - return (S2C(session)->cache->read_gen); + return (S2C(session)->cache->read_gen); } /* * __wt_cache_read_gen_incr -- - * Increment the current read generation number. + * Increment the current read generation number. */ static inline void __wt_cache_read_gen_incr(WT_SESSION_IMPL *session) { - ++S2C(session)->cache->read_gen; + ++S2C(session)->cache->read_gen; } /* * __wt_cache_read_gen_bump -- - * Update the page's read generation. + * Update the page's read generation. */ static inline void __wt_cache_read_gen_bump(WT_SESSION_IMPL *session, WT_PAGE *page) { - /* Ignore pages set for forcible eviction. */ - if (page->read_gen == WT_READGEN_OLDEST) - return; - - /* Ignore pages already in the future. */ - if (page->read_gen > __wt_cache_read_gen(session)) - return; - - /* - * We set read-generations in the future (where "the future" is measured - * by increments of the global read generation). The reason is because - * when acquiring a new hazard pointer for a page, we can check its read - * generation, and if the read generation isn't less than the current - * global generation, we don't bother updating the page. In other - * words, the goal is to avoid some number of updates immediately after - * each update we have to make. - */ - page->read_gen = __wt_cache_read_gen(session) + WT_READGEN_STEP; + /* Ignore pages set for forcible eviction. */ + if (page->read_gen == WT_READGEN_OLDEST) + return; + + /* Ignore pages already in the future. */ + if (page->read_gen > __wt_cache_read_gen(session)) + return; + + /* + * We set read-generations in the future (where "the future" is measured by increments of the + * global read generation). The reason is because when acquiring a new hazard pointer for a + * page, we can check its read generation, and if the read generation isn't less than the + * current global generation, we don't bother updating the page. In other words, the goal is to + * avoid some number of updates immediately after each update we have to make. + */ + page->read_gen = __wt_cache_read_gen(session) + WT_READGEN_STEP; } /* * __wt_cache_read_gen_new -- - * Get the read generation for a new page in memory. + * Get the read generation for a new page in memory. */ static inline void __wt_cache_read_gen_new(WT_SESSION_IMPL *session, WT_PAGE *page) { - WT_CACHE *cache; + WT_CACHE *cache; - cache = S2C(session)->cache; - page->read_gen = - (__wt_cache_read_gen(session) + cache->read_gen_oldest) / 2; + cache = S2C(session)->cache; + page->read_gen = (__wt_cache_read_gen(session) + cache->read_gen_oldest) / 2; } /* * __wt_cache_stuck -- - * Indicate if the cache is stuck (i.e., not making progress). + * Indicate if the cache is stuck (i.e., not making progress). */ static inline bool __wt_cache_stuck(WT_SESSION_IMPL *session) { - WT_CACHE *cache; + WT_CACHE *cache; - cache = S2C(session)->cache; - return (cache->evict_aggressive_score == WT_EVICT_SCORE_MAX && - F_ISSET(cache, - WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD)); + cache = S2C(session)->cache; + return (cache->evict_aggressive_score == WT_EVICT_SCORE_MAX && + F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD)); } /* * __wt_page_evict_soon -- - * Set a page to be evicted as soon as possible. + * Set a page to be evicted as soon as possible. */ static inline void __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref) { - WT_UNUSED(session); + WT_UNUSED(session); - ref->page->read_gen = WT_READGEN_OLDEST; + ref->page->read_gen = WT_READGEN_OLDEST; } /* * __wt_cache_pages_inuse -- - * Return the number of pages in use. + * Return the number of pages in use. */ static inline uint64_t __wt_cache_pages_inuse(WT_CACHE *cache) { - return (cache->pages_inmem - cache->pages_evicted); + return (cache->pages_inmem - cache->pages_evicted); } /* * __wt_cache_bytes_plus_overhead -- - * Apply the cache overhead to a size in bytes. + * Apply the cache overhead to a size in bytes. */ static inline uint64_t __wt_cache_bytes_plus_overhead(WT_CACHE *cache, uint64_t sz) { - if (cache->overhead_pct != 0) - sz += (sz * (uint64_t)cache->overhead_pct) / 100; + if (cache->overhead_pct != 0) + sz += (sz * (uint64_t)cache->overhead_pct) / 100; - return (sz); + return (sz); } /* * __wt_cache_bytes_inuse -- - * Return the number of bytes in use. + * Return the number of bytes in use. */ static inline uint64_t __wt_cache_bytes_inuse(WT_CACHE *cache) { - return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_inmem)); + return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_inmem)); } /* * __wt_cache_dirty_inuse -- - * Return the number of dirty bytes in use. + * Return the number of dirty bytes in use. */ static inline uint64_t __wt_cache_dirty_inuse(WT_CACHE *cache) { - return (__wt_cache_bytes_plus_overhead(cache, - cache->bytes_dirty_intl + cache->bytes_dirty_leaf)); + return ( + __wt_cache_bytes_plus_overhead(cache, cache->bytes_dirty_intl + cache->bytes_dirty_leaf)); } /* * __wt_cache_dirty_leaf_inuse -- - * Return the number of dirty bytes in use by leaf pages. + * Return the number of dirty bytes in use by leaf pages. */ static inline uint64_t __wt_cache_dirty_leaf_inuse(WT_CACHE *cache) { - return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_dirty_leaf)); + return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_dirty_leaf)); } /* * __wt_cache_bytes_image -- - * Return the number of page image bytes in use. + * Return the number of page image bytes in use. */ static inline uint64_t __wt_cache_bytes_image(WT_CACHE *cache) { - return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_image)); + return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_image)); } /* * __wt_cache_bytes_other -- - * Return the number of bytes in use not for page images. + * Return the number of bytes in use not for page images. */ static inline uint64_t __wt_cache_bytes_other(WT_CACHE *cache) { - uint64_t bytes_image, bytes_inmem; - - /* - * Reads can race with changes to the values, so only read once and - * check for the race. - */ - bytes_image = *(volatile uint64_t *)&cache->bytes_image; - bytes_inmem = *(volatile uint64_t *)&cache->bytes_inmem; - return ((bytes_image > bytes_inmem) ? 0 : - __wt_cache_bytes_plus_overhead(cache, bytes_inmem - bytes_image)); + uint64_t bytes_image, bytes_inmem; + + /* + * Reads can race with changes to the values, so only read once and check for the race. + */ + bytes_image = *(volatile uint64_t *)&cache->bytes_image; + bytes_inmem = *(volatile uint64_t *)&cache->bytes_inmem; + return ((bytes_image > bytes_inmem) ? 0 : __wt_cache_bytes_plus_overhead( + cache, bytes_inmem - bytes_image)); } /* * __wt_cache_lookaside_score -- - * Get the current lookaside score (between 0 and 100). + * Get the current lookaside score (between 0 and 100). */ static inline uint32_t __wt_cache_lookaside_score(WT_CACHE *cache) { - int32_t global_score; + int32_t global_score; - global_score = cache->evict_lookaside_score; - return ((uint32_t)WT_MIN(WT_MAX(global_score, 0), 100)); + global_score = cache->evict_lookaside_score; + return ((uint32_t)WT_MIN(WT_MAX(global_score, 0), 100)); } /* * __wt_cache_update_lookaside_score -- - * Update the lookaside score based how many unstable updates are seen. + * Update the lookaside score based how many unstable updates are seen. */ static inline void __wt_cache_update_lookaside_score( - WT_SESSION_IMPL *session, u_int updates_seen, u_int updates_unstable) + WT_SESSION_IMPL *session, u_int updates_seen, u_int updates_unstable) { - WT_CACHE *cache; - int32_t global_score, score; + WT_CACHE *cache; + int32_t global_score, score; - if (updates_seen == 0) - return; + if (updates_seen == 0) + return; - cache = S2C(session)->cache; - score = (int32_t)((100 * updates_unstable) / updates_seen); - global_score = cache->evict_lookaside_score; + cache = S2C(session)->cache; + score = (int32_t)((100 * updates_unstable) / updates_seen); + global_score = cache->evict_lookaside_score; - if (score > global_score && global_score < 100) - (void)__wt_atomic_addi32(&cache->evict_lookaside_score, 1); - else if (score < global_score && global_score > 0) - (void)__wt_atomic_subi32(&cache->evict_lookaside_score, 1); + if (score > global_score && global_score < 100) + (void)__wt_atomic_addi32(&cache->evict_lookaside_score, 1); + else if (score < global_score && global_score > 0) + (void)__wt_atomic_subi32(&cache->evict_lookaside_score, 1); } /* * __wt_session_can_wait -- - * Return if a session available for a potentially slow operation. + * Return if a session available for a potentially slow operation. */ static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session) { - /* - * Return if a session available for a potentially slow operation; - * for example, used by the block manager in the case of flushing - * the system cache. - */ - if (!F_ISSET(session, WT_SESSION_CAN_WAIT)) - return (false); - - /* - * LSM sets the "ignore cache size" flag when holding the LSM tree - * lock, in that case, or when holding the schema lock, we don't want - * this thread to block for eviction. - */ - return (!F_ISSET(session, - WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_LOCKED_SCHEMA)); + /* + * Return if a session available for a potentially slow operation; for example, used by the + * block manager in the case of flushing the system cache. + */ + if (!F_ISSET(session, WT_SESSION_CAN_WAIT)) + return (false); + + /* + * LSM sets the "ignore cache size" flag when holding the LSM tree lock, in that case, or when + * holding the schema lock, we don't want this thread to block for eviction. + */ + return (!F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_LOCKED_SCHEMA)); } /* * __wt_eviction_clean_needed -- - * Return if an application thread should do eviction due to the total - * volume of data in cache. + * Return if an application thread should do eviction due to the total volume of data in cache. */ static inline bool __wt_eviction_clean_needed(WT_SESSION_IMPL *session, double *pct_fullp) { - WT_CACHE *cache; - uint64_t bytes_inuse, bytes_max; + WT_CACHE *cache; + uint64_t bytes_inuse, bytes_max; - cache = S2C(session)->cache; + cache = S2C(session)->cache; - /* - * Avoid division by zero if the cache size has not yet been set in a - * shared cache. - */ - bytes_max = S2C(session)->cache_size + 1; - bytes_inuse = __wt_cache_bytes_inuse(cache); + /* + * Avoid division by zero if the cache size has not yet been set in a shared cache. + */ + bytes_max = S2C(session)->cache_size + 1; + bytes_inuse = __wt_cache_bytes_inuse(cache); - if (pct_fullp != NULL) - *pct_fullp = ((100.0 * bytes_inuse) / bytes_max); + if (pct_fullp != NULL) + *pct_fullp = ((100.0 * bytes_inuse) / bytes_max); - return (bytes_inuse > (cache->eviction_trigger * bytes_max) / 100); + return (bytes_inuse > (cache->eviction_trigger * bytes_max) / 100); } /* * __wt_eviction_dirty_target -- - * Return the effective dirty target (including checkpoint scrubbing). + * Return the effective dirty target (including checkpoint scrubbing). */ static inline double __wt_eviction_dirty_target(WT_CACHE *cache) { - double dirty_target, scrub_target; + double dirty_target, scrub_target; - dirty_target = cache->eviction_dirty_target; - scrub_target = cache->eviction_scrub_target; + dirty_target = cache->eviction_dirty_target; + scrub_target = cache->eviction_scrub_target; - return (scrub_target > 0 && scrub_target < dirty_target ? - scrub_target : dirty_target); + return (scrub_target > 0 && scrub_target < dirty_target ? scrub_target : dirty_target); } /* * __wt_eviction_dirty_needed -- - * Return if an application thread should do eviction due to the total - * volume of dirty data in cache. + * Return if an application thread should do eviction due to the total volume of dirty data in + * cache. */ static inline bool __wt_eviction_dirty_needed(WT_SESSION_IMPL *session, double *pct_fullp) { - WT_CACHE *cache; - uint64_t dirty_inuse, bytes_max; + WT_CACHE *cache; + uint64_t dirty_inuse, bytes_max; - cache = S2C(session)->cache; + cache = S2C(session)->cache; - /* - * Avoid division by zero if the cache size has not yet been set in a - * shared cache. - */ - bytes_max = S2C(session)->cache_size + 1; - dirty_inuse = __wt_cache_dirty_leaf_inuse(cache); + /* + * Avoid division by zero if the cache size has not yet been set in a shared cache. + */ + bytes_max = S2C(session)->cache_size + 1; + dirty_inuse = __wt_cache_dirty_leaf_inuse(cache); - if (pct_fullp != NULL) - *pct_fullp = ((100.0 * dirty_inuse) / bytes_max); + if (pct_fullp != NULL) + *pct_fullp = ((100.0 * dirty_inuse) / bytes_max); - return (dirty_inuse > (uint64_t)( - cache->eviction_dirty_trigger * bytes_max) / 100); + return (dirty_inuse > (uint64_t)(cache->eviction_dirty_trigger * bytes_max) / 100); } /* * __wt_eviction_needed -- - * Return if an application thread should do eviction, and the cache full - * percentage as a side-effect. + * Return if an application thread should do eviction, and the cache full percentage as a + * side-effect. */ static inline bool -__wt_eviction_needed( - WT_SESSION_IMPL *session, bool busy, bool readonly, double *pct_fullp) +__wt_eviction_needed(WT_SESSION_IMPL *session, bool busy, bool readonly, double *pct_fullp) { - WT_CACHE *cache; - double pct_dirty, pct_full; - bool clean_needed, dirty_needed; - - cache = S2C(session)->cache; - - /* - * If the connection is closing we do not need eviction from an - * application thread. The eviction subsystem is already closed. - */ - if (F_ISSET(S2C(session), WT_CONN_CLOSING)) - return (false); - - clean_needed = __wt_eviction_clean_needed(session, &pct_full); - if (readonly) { - dirty_needed = false; - pct_dirty = 0.0; - } else - dirty_needed = __wt_eviction_dirty_needed(session, &pct_dirty); - - /* - * Calculate the cache full percentage; anything over the trigger means - * we involve the application thread. - */ - if (pct_fullp != NULL) - *pct_fullp = WT_MAX(0.0, 100.0 - WT_MIN( - cache->eviction_trigger - pct_full, - cache->eviction_dirty_trigger - pct_dirty)); - - /* - * Only check the dirty trigger when the session is not busy. - * - * In other words, once we are pinning resources, try to finish the - * operation as quickly as possible without exceeding the cache size. - * The next transaction in this session will not be able to start until - * the cache is under the limit. - */ - return (clean_needed || (!busy && dirty_needed)); + WT_CACHE *cache; + double pct_dirty, pct_full; + bool clean_needed, dirty_needed; + + cache = S2C(session)->cache; + + /* + * If the connection is closing we do not need eviction from an application thread. The eviction + * subsystem is already closed. + */ + if (F_ISSET(S2C(session), WT_CONN_CLOSING)) + return (false); + + clean_needed = __wt_eviction_clean_needed(session, &pct_full); + if (readonly) { + dirty_needed = false; + pct_dirty = 0.0; + } else + dirty_needed = __wt_eviction_dirty_needed(session, &pct_dirty); + + /* + * Calculate the cache full percentage; anything over the trigger means we involve the + * application thread. + */ + if (pct_fullp != NULL) + *pct_fullp = WT_MAX(0.0, 100.0 - + WT_MIN(cache->eviction_trigger - pct_full, cache->eviction_dirty_trigger - pct_dirty)); + + /* + * Only check the dirty trigger when the session is not busy. + * + * In other words, once we are pinning resources, try to finish the + * operation as quickly as possible without exceeding the cache size. + * The next transaction in this session will not be able to start until + * the cache is under the limit. + */ + return (clean_needed || (!busy && dirty_needed)); } /* * __wt_cache_full -- - * Return if the cache is at (or over) capacity. + * Return if the cache is at (or over) capacity. */ static inline bool __wt_cache_full(WT_SESSION_IMPL *session) { - WT_CACHE *cache; - WT_CONNECTION_IMPL *conn; + WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; - conn = S2C(session); - cache = conn->cache; + conn = S2C(session); + cache = conn->cache; - return (__wt_cache_bytes_inuse(cache) >= conn->cache_size); + return (__wt_cache_bytes_inuse(cache) >= conn->cache_size); } /* * __wt_cache_eviction_check -- - * Evict pages if the cache crosses its boundaries. + * Evict pages if the cache crosses its boundaries. */ static inline int -__wt_cache_eviction_check( - WT_SESSION_IMPL *session, bool busy, bool readonly, bool *didworkp) +__wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool readonly, bool *didworkp) { - WT_BTREE *btree; - WT_TXN_GLOBAL *txn_global; - WT_TXN_STATE *txn_state; - double pct_full; - - if (didworkp != NULL) - *didworkp = false; - - /* - * If the current transaction is keeping the oldest ID pinned, it is in - * the middle of an operation. This may prevent the oldest ID from - * moving forward, leading to deadlock, so only evict what we can. - * Otherwise, we are at a transaction boundary and we can work harder - * to make sure there is free space in the cache. - */ - txn_global = &S2C(session)->txn_global; - txn_state = WT_SESSION_TXN_STATE(session); - busy = busy || txn_state->id != WT_TXN_NONE || - session->nhazard > 0 || - (txn_state->pinned_id != WT_TXN_NONE && - txn_global->current != txn_global->oldest_id); - - /* - * LSM sets the "ignore cache size" flag when holding the LSM tree - * lock, in that case, or when holding the handle list, schema or table - * locks (which can block checkpoints and eviction), don't block the - * thread for eviction. - */ - if (F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE | - WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA | - WT_SESSION_LOCKED_TABLE)) - return (0); - - /* In memory configurations don't block when the cache is full. */ - if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) - return (0); - - /* - * Threads operating on cache-resident trees are ignored because - * they're not contributing to the problem. We also don't block while - * reading metadata because we're likely to be holding some other - * resources that could block checkpoints or eviction. - */ - btree = S2BT_SAFE(session); - if (btree != NULL && (F_ISSET(btree, WT_BTREE_IN_MEMORY) || - WT_IS_METADATA(session->dhandle))) - return (0); - - /* Check if eviction is needed. */ - if (!__wt_eviction_needed(session, busy, readonly, &pct_full)) - return (0); - - /* - * Some callers (those waiting for slow operations), will sleep if there - * was no cache work to do. After this point, let them skip the sleep. - */ - if (didworkp != NULL) - *didworkp = true; - - return (__wt_cache_eviction_worker(session, busy, readonly, pct_full)); + WT_BTREE *btree; + WT_TXN_GLOBAL *txn_global; + WT_TXN_STATE *txn_state; + double pct_full; + + if (didworkp != NULL) + *didworkp = false; + + /* + * If the current transaction is keeping the oldest ID pinned, it is in the middle of an + * operation. This may prevent the oldest ID from moving forward, leading to deadlock, so only + * evict what we can. Otherwise, we are at a transaction boundary and we can work harder to make + * sure there is free space in the cache. + */ + txn_global = &S2C(session)->txn_global; + txn_state = WT_SESSION_TXN_STATE(session); + busy = busy || txn_state->id != WT_TXN_NONE || session->nhazard > 0 || + (txn_state->pinned_id != WT_TXN_NONE && txn_global->current != txn_global->oldest_id); + + /* + * LSM sets the "ignore cache size" flag when holding the LSM tree lock, in that case, or when + * holding the handle list, schema or table locks (which can block checkpoints and eviction), + * don't block the thread for eviction. + */ + if (F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_LOCKED_HANDLE_LIST | + WT_SESSION_LOCKED_SCHEMA | WT_SESSION_LOCKED_TABLE)) + return (0); + + /* In memory configurations don't block when the cache is full. */ + if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) + return (0); + + /* + * Threads operating on cache-resident trees are ignored because they're not contributing to the + * problem. We also don't block while reading metadata because we're likely to be holding some + * other resources that could block checkpoints or eviction. + */ + btree = S2BT_SAFE(session); + if (btree != NULL && (F_ISSET(btree, WT_BTREE_IN_MEMORY) || WT_IS_METADATA(session->dhandle))) + return (0); + + /* Check if eviction is needed. */ + if (!__wt_eviction_needed(session, busy, readonly, &pct_full)) + return (0); + + /* + * Some callers (those waiting for slow operations), will sleep if there was no cache work to + * do. After this point, let them skip the sleep. + */ + if (didworkp != NULL) + *didworkp = true; + + return (__wt_cache_eviction_worker(session, busy, readonly, pct_full)); } diff --git a/src/third_party/wiredtiger/src/include/capacity.h b/src/third_party/wiredtiger/src/include/capacity.h index 1fb42f5b435..b0db7c78561 100644 --- a/src/third_party/wiredtiger/src/include/capacity.h +++ b/src/third_party/wiredtiger/src/include/capacity.h @@ -7,68 +7,63 @@ */ typedef enum { - WT_THROTTLE_CKPT, /* Checkpoint throttle */ - WT_THROTTLE_EVICT, /* Eviction throttle */ - WT_THROTTLE_LOG, /* Logging throttle */ - WT_THROTTLE_READ /* Read throttle */ + WT_THROTTLE_CKPT, /* Checkpoint throttle */ + WT_THROTTLE_EVICT, /* Eviction throttle */ + WT_THROTTLE_LOG, /* Logging throttle */ + WT_THROTTLE_READ /* Read throttle */ } WT_THROTTLE_TYPE; -#define WT_THROTTLE_MIN WT_MEGABYTE /* Config minimum size */ +#define WT_THROTTLE_MIN WT_MEGABYTE /* Config minimum size */ /* - * The per-file threshold means we won't start the background fsync on a file - * until it crosses the per-file threshold of data written. The other minimum - * threshold defines a minimum threshold for the background thread. Otherwise - * we compute a percentage of the given capacity. + * The per-file threshold means we won't start the background fsync on a file until it crosses the + * per-file threshold of data written. The other minimum threshold defines a minimum threshold for + * the background thread. Otherwise we compute a percentage of the given capacity. */ -#define WT_CAPACITY_FILE_THRESHOLD (WT_MEGABYTE / 2) -#define WT_CAPACITY_MIN_THRESHOLD (10 * WT_MEGABYTE) -#define WT_CAPACITY_PCT 10 +#define WT_CAPACITY_FILE_THRESHOLD (WT_MEGABYTE / 2) +#define WT_CAPACITY_MIN_THRESHOLD (10 * WT_MEGABYTE) +#define WT_CAPACITY_PCT 10 /* - * If we're being asked to sleep a short amount of time, ignore it. - * A non-zero value means there may be a temporary violation of the - * capacity limitation, but one that would even out. That is, possibly - * fewer sleeps with the risk of more choppy behavior as this number - * is larger. + * If we're being asked to sleep a short amount of time, ignore it. A non-zero value means there may + * be a temporary violation of the capacity limitation, but one that would even out. That is, + * possibly fewer sleeps with the risk of more choppy behavior as this number is larger. */ -#define WT_CAPACITY_SLEEP_CUTOFF_US 100 +#define WT_CAPACITY_SLEEP_CUTOFF_US 100 /* - * When given a total capacity, divide it up for each subsystem. These defines - * represent the percentage of the total capacity that we allow for each - * subsystem capacity. We allow and expect the sum of the subsystems to - * exceed 100, as often they are not at their maximum at the same time. In any - * event, we track the total capacity separately, so it is never exceeded. + * When given a total capacity, divide it up for each subsystem. These defines represent the + * percentage of the total capacity that we allow for each subsystem capacity. We allow and expect + * the sum of the subsystems to exceed 100, as often they are not at their maximum at the same time. + * In any event, we track the total capacity separately, so it is never exceeded. */ -#define WT_CAPACITY_SYS(total, pct) ((total) * (pct) / 100) -#define WT_CAP_CKPT 5 -#define WT_CAP_EVICT 50 -#define WT_CAP_LOG 30 -#define WT_CAP_READ 55 +#define WT_CAPACITY_SYS(total, pct) ((total) * (pct) / 100) +#define WT_CAP_CKPT 5 +#define WT_CAP_EVICT 50 +#define WT_CAP_LOG 30 +#define WT_CAP_READ 55 struct __wt_capacity { - uint64_t ckpt; /* Bytes/sec checkpoint capacity */ - uint64_t evict; /* Bytes/sec eviction capacity */ - uint64_t log; /* Bytes/sec logging capacity */ - uint64_t read; /* Bytes/sec read capacity */ - uint64_t total; /* Bytes/sec total capacity */ - uint64_t threshold; /* Capacity size period */ + uint64_t ckpt; /* Bytes/sec checkpoint capacity */ + uint64_t evict; /* Bytes/sec eviction capacity */ + uint64_t log; /* Bytes/sec logging capacity */ + uint64_t read; /* Bytes/sec read capacity */ + uint64_t total; /* Bytes/sec total capacity */ + uint64_t threshold; /* Capacity size period */ - volatile uint64_t written; /* Written this period */ - volatile bool signalled; /* Capacity signalled */ + volatile uint64_t written; /* Written this period */ + volatile bool signalled; /* Capacity signalled */ - /* - * A reservation is a point in time when a read or write for a subsystem - * can be scheduled, so as not to overrun the given capacity. These - * values hold the next available reservation, in nanoseconds since - * the epoch. Getting a reservation with a future time implies sleeping - * until that time; getting a reservation with a past time implies that - * the operation can be done immediately. - */ - uint64_t reservation_ckpt; /* Atomic: next checkpoint write */ - uint64_t reservation_evict; /* Atomic: next eviction write */ - uint64_t reservation_log; /* Atomic: next logging write */ - uint64_t reservation_read; /* Atomic: next read */ - uint64_t reservation_total; /* Atomic: next operation of any kind */ + /* + * A reservation is a point in time when a read or write for a subsystem can be scheduled, so as + * not to overrun the given capacity. These values hold the next available reservation, in + * nanoseconds since the epoch. Getting a reservation with a future time implies sleeping until + * that time; getting a reservation with a past time implies that the operation can be done + * immediately. + */ + uint64_t reservation_ckpt; /* Atomic: next checkpoint write */ + uint64_t reservation_evict; /* Atomic: next eviction write */ + uint64_t reservation_log; /* Atomic: next logging write */ + uint64_t reservation_read; /* Atomic: next read */ + uint64_t reservation_total; /* Atomic: next operation of any kind */ }; diff --git a/src/third_party/wiredtiger/src/include/cell.h b/src/third_party/wiredtiger/src/include/cell.h index 2b9427a3095..ca9e8e50e91 100644 --- a/src/third_party/wiredtiger/src/include/cell.h +++ b/src/third_party/wiredtiger/src/include/cell.h @@ -66,22 +66,22 @@ * * Bits 5-8 are cell "types". */ -#define WT_CELL_KEY_SHORT 0x01 /* Short key */ -#define WT_CELL_KEY_SHORT_PFX 0x02 /* Short key with prefix byte */ -#define WT_CELL_VALUE_SHORT 0x03 /* Short data */ -#define WT_CELL_SHORT_TYPE(v) ((v) & 0x03U) +#define WT_CELL_KEY_SHORT 0x01 /* Short key */ +#define WT_CELL_KEY_SHORT_PFX 0x02 /* Short key with prefix byte */ +#define WT_CELL_VALUE_SHORT 0x03 /* Short data */ +#define WT_CELL_SHORT_TYPE(v) ((v)&0x03U) -#define WT_CELL_SHORT_MAX 63 /* Maximum short key/value */ -#define WT_CELL_SHORT_SHIFT 2 /* Shift for short key/value */ +#define WT_CELL_SHORT_MAX 63 /* Maximum short key/value */ +#define WT_CELL_SHORT_SHIFT 2 /* Shift for short key/value */ -#define WT_CELL_64V 0x04 /* Associated value */ -#define WT_CELL_SECOND_DESC 0x08 /* Second descriptor byte */ +#define WT_CELL_64V 0x04 /* Associated value */ +#define WT_CELL_SECOND_DESC 0x08 /* Second descriptor byte */ -#define WT_CELL_TS_DURABLE 0x01 /* Newest-durable timestamp */ -#define WT_CELL_TS_START 0x02 /* Oldest-start timestamp */ -#define WT_CELL_TS_STOP 0x04 /* Newest-stop timestamp */ -#define WT_CELL_TXN_START 0x08 /* Oldest-start txn ID */ -#define WT_CELL_TXN_STOP 0x10 /* Newest-stop txn ID */ +#define WT_CELL_TS_DURABLE 0x01 /* Newest-durable timestamp */ +#define WT_CELL_TS_START 0x02 /* Oldest-start timestamp */ +#define WT_CELL_TS_STOP 0x04 /* Newest-stop timestamp */ +#define WT_CELL_TXN_START 0x08 /* Oldest-start txn ID */ +#define WT_CELL_TXN_STOP 0x10 /* Newest-stop txn ID */ /* * WT_CELL_ADDR_INT is an internal block location, WT_CELL_ADDR_LEAF is a leaf @@ -96,53 +96,51 @@ * value dictionaries: if the two values are the same, we only store them once * and have any second and subsequent uses reference the original. */ -#define WT_CELL_ADDR_DEL (0) /* Address: deleted */ -#define WT_CELL_ADDR_INT (1 << 4) /* Address: internal */ -#define WT_CELL_ADDR_LEAF (2 << 4) /* Address: leaf */ -#define WT_CELL_ADDR_LEAF_NO (3 << 4) /* Address: leaf no overflow */ -#define WT_CELL_DEL (4 << 4) /* Deleted value */ -#define WT_CELL_KEY (5 << 4) /* Key */ -#define WT_CELL_KEY_OVFL (6 << 4) /* Overflow key */ -#define WT_CELL_KEY_OVFL_RM (12 << 4) /* Overflow key (removed) */ -#define WT_CELL_KEY_PFX (7 << 4) /* Key with prefix byte */ -#define WT_CELL_VALUE (8 << 4) /* Value */ -#define WT_CELL_VALUE_COPY (9 << 4) /* Value copy */ -#define WT_CELL_VALUE_OVFL (10 << 4) /* Overflow value */ -#define WT_CELL_VALUE_OVFL_RM (11 << 4) /* Overflow value (removed) */ - -#define WT_CELL_TYPE_MASK (0x0fU << 4) /* Maximum 16 cell types */ -#define WT_CELL_TYPE(v) ((v) & WT_CELL_TYPE_MASK) +#define WT_CELL_ADDR_DEL (0) /* Address: deleted */ +#define WT_CELL_ADDR_INT (1 << 4) /* Address: internal */ +#define WT_CELL_ADDR_LEAF (2 << 4) /* Address: leaf */ +#define WT_CELL_ADDR_LEAF_NO (3 << 4) /* Address: leaf no overflow */ +#define WT_CELL_DEL (4 << 4) /* Deleted value */ +#define WT_CELL_KEY (5 << 4) /* Key */ +#define WT_CELL_KEY_OVFL (6 << 4) /* Overflow key */ +#define WT_CELL_KEY_OVFL_RM (12 << 4) /* Overflow key (removed) */ +#define WT_CELL_KEY_PFX (7 << 4) /* Key with prefix byte */ +#define WT_CELL_VALUE (8 << 4) /* Value */ +#define WT_CELL_VALUE_COPY (9 << 4) /* Value copy */ +#define WT_CELL_VALUE_OVFL (10 << 4) /* Overflow value */ +#define WT_CELL_VALUE_OVFL_RM (11 << 4) /* Overflow value (removed) */ + +#define WT_CELL_TYPE_MASK (0x0fU << 4) /* Maximum 16 cell types */ +#define WT_CELL_TYPE(v) ((v)&WT_CELL_TYPE_MASK) /* - * When unable to create a short key or value (and where it wasn't an associated - * RLE or validity window that prevented creating a short value), the data must - * be at least 64B, else we'd have used a short cell. When packing/unpacking the - * size, decrement/increment the size, in the hopes that a smaller size will - * pack into a single byte instead of two. + * When unable to create a short key or value (and where it wasn't an associated RLE or validity + * window that prevented creating a short value), the data must be at least 64B, else we'd have used + * a short cell. When packing/unpacking the size, decrement/increment the size, in the hopes that a + * smaller size will pack into a single byte instead of two. */ -#define WT_CELL_SIZE_ADJUST (WT_CELL_SHORT_MAX + 1) +#define WT_CELL_SIZE_ADJUST (WT_CELL_SHORT_MAX + 1) /* * WT_CELL -- * Variable-length, on-page cell header. */ struct __wt_cell { - /* - * Maximum of 62 bytes: - * 1: cell descriptor byte - * 1: prefix compression count - * 1: secondary descriptor byte - * 27: 3 timestamps (uint64_t encoding, max 9 bytes) - * 18: 2 transaction IDs (uint64_t encoding, max 9 bytes) - * 9: associated 64-bit value (uint64_t encoding, max 9 bytes) - * 5: data length (uint32_t encoding, max 5 bytes) - * - * This calculation is extremely pessimistic: the prefix compression - * count and 64V value overlap, and the validity window, 64V value - * and data length are all optional in some cases. - */ - uint8_t __chunk[1 + 1 + 1 + - 6 * WT_INTPACK64_MAXSIZE + WT_INTPACK32_MAXSIZE]; + /* + * Maximum of 62 bytes: + * 1: cell descriptor byte + * 1: prefix compression count + * 1: secondary descriptor byte + * 27: 3 timestamps (uint64_t encoding, max 9 bytes) + * 18: 2 transaction IDs (uint64_t encoding, max 9 bytes) + * 9: associated 64-bit value (uint64_t encoding, max 9 bytes) + * 5: data length (uint32_t encoding, max 5 bytes) + * + * This calculation is extremely pessimistic: the prefix compression + * count and 64V value overlap, and the validity window, 64V value + * and data length are all optional in some cases. + */ + uint8_t __chunk[1 + 1 + 1 + 6 * WT_INTPACK64_MAXSIZE + WT_INTPACK32_MAXSIZE]; }; /* @@ -150,36 +148,36 @@ struct __wt_cell { * Unpacked cell. */ struct __wt_cell_unpack { - WT_CELL *cell; /* Cell's disk image address */ + WT_CELL *cell; /* Cell's disk image address */ - uint64_t v; /* RLE count or recno */ + uint64_t v; /* RLE count or recno */ - wt_timestamp_t start_ts; /* Value validity window */ - uint64_t start_txn; - wt_timestamp_t stop_ts; - uint64_t stop_txn; + wt_timestamp_t start_ts; /* Value validity window */ + uint64_t start_txn; + wt_timestamp_t stop_ts; + uint64_t stop_txn; - /* Address validity window */ - wt_timestamp_t newest_durable_ts; - wt_timestamp_t oldest_start_ts; - uint64_t oldest_start_txn; - wt_timestamp_t newest_stop_ts; - uint64_t newest_stop_txn; + /* Address validity window */ + wt_timestamp_t newest_durable_ts; + wt_timestamp_t oldest_start_ts; + uint64_t oldest_start_txn; + wt_timestamp_t newest_stop_ts; + uint64_t newest_stop_txn; - /* - * !!! - * The size and __len fields are reasonably type size_t; don't change - * the type, performance drops significantly if they're type size_t. - */ - const void *data; /* Data */ - uint32_t size; /* Data size */ + /* + * !!! + * The size and __len fields are reasonably type size_t; don't change + * the type, performance drops significantly if they're type size_t. + */ + const void *data; /* Data */ + uint32_t size; /* Data size */ - uint32_t __len; /* Cell + data length (usually) */ + uint32_t __len; /* Cell + data length (usually) */ - uint8_t prefix; /* Cell prefix length */ + uint8_t prefix; /* Cell prefix length */ - uint8_t raw; /* Raw cell type (include "shorts") */ - uint8_t type; /* Cell type */ + uint8_t raw; /* Raw cell type (include "shorts") */ + uint8_t type; /* Cell type */ - uint8_t ovfl; /* boolean: cell is an overflow */ + uint8_t ovfl; /* boolean: cell is an overflow */ }; diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i index cceadbf8c0c..f8f7f670392 100644 --- a/src/third_party/wiredtiger/src/include/cell.i +++ b/src/third_party/wiredtiger/src/include/cell.i @@ -8,1098 +8,1033 @@ /* * __cell_check_value_validity -- - * Check the value's validity window for sanity. + * Check the value's validity window for sanity. */ static inline void -__cell_check_value_validity(WT_SESSION_IMPL *session, - wt_timestamp_t start_ts, uint64_t start_txn, - wt_timestamp_t stop_ts, uint64_t stop_txn) +__cell_check_value_validity(WT_SESSION_IMPL *session, wt_timestamp_t start_ts, uint64_t start_txn, + wt_timestamp_t stop_ts, uint64_t stop_txn) { #ifdef HAVE_DIAGNOSTIC - char ts_string[2][WT_TS_INT_STRING_SIZE]; - - if (stop_ts == WT_TS_NONE) { - __wt_errx(session, "stop timestamp of 0"); - WT_ASSERT(session, stop_ts != WT_TS_NONE); - } - if (start_ts > stop_ts) { - __wt_errx(session, - "a start timestamp %s newer than its stop timestamp %s", - __wt_timestamp_to_string(start_ts, ts_string[0]), - __wt_timestamp_to_string(stop_ts, ts_string[1])); - WT_ASSERT(session, start_ts <= stop_ts); - } - - if (stop_txn == WT_TXN_NONE) { - __wt_errx(session, "stop transaction ID of 0"); - WT_ASSERT(session, stop_txn != WT_TXN_NONE); - } - if (start_txn > stop_txn) { - __wt_errx(session, - "a start transaction ID %" PRIu64 " newer than its stop " - "transaction ID %" PRIu64, - start_txn, stop_txn); - WT_ASSERT(session, start_txn <= stop_txn); - } + char ts_string[2][WT_TS_INT_STRING_SIZE]; + + if (stop_ts == WT_TS_NONE) { + __wt_errx(session, "stop timestamp of 0"); + WT_ASSERT(session, stop_ts != WT_TS_NONE); + } + if (start_ts > stop_ts) { + __wt_errx(session, "a start timestamp %s newer than its stop timestamp %s", + __wt_timestamp_to_string(start_ts, ts_string[0]), + __wt_timestamp_to_string(stop_ts, ts_string[1])); + WT_ASSERT(session, start_ts <= stop_ts); + } + + if (stop_txn == WT_TXN_NONE) { + __wt_errx(session, "stop transaction ID of 0"); + WT_ASSERT(session, stop_txn != WT_TXN_NONE); + } + if (start_txn > stop_txn) { + __wt_errx(session, "a start transaction ID %" PRIu64 + " newer than its stop " + "transaction ID %" PRIu64, + start_txn, stop_txn); + WT_ASSERT(session, start_txn <= stop_txn); + } #else - WT_UNUSED(session); - WT_UNUSED(start_ts); - WT_UNUSED(start_txn); - WT_UNUSED(stop_ts); - WT_UNUSED(stop_txn); + WT_UNUSED(session); + WT_UNUSED(start_ts); + WT_UNUSED(start_txn); + WT_UNUSED(stop_ts); + WT_UNUSED(stop_txn); #endif } /* * __cell_pack_value_validity -- - * Pack the validity window for a value. + * Pack the validity window for a value. */ static inline void -__cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, - wt_timestamp_t start_ts, uint64_t start_txn, - wt_timestamp_t stop_ts, uint64_t stop_txn) +__cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t start_ts, + uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn) { - uint8_t flags, *flagsp; - - __cell_check_value_validity( - session, start_ts, start_txn, stop_ts, stop_txn); - - /* - * Historic page versions and globally visible values have no associated - * validity window, else set a flag bit and store them. - */ - if (!__wt_process.page_version_ts || - (start_ts == WT_TS_NONE && start_txn == WT_TXN_NONE && - stop_ts == WT_TS_MAX && stop_txn == WT_TXN_MAX)) - ++*pp; - else { - **pp |= WT_CELL_SECOND_DESC; - ++*pp; - flagsp = *pp; - ++*pp; - - flags = 0; - if (start_ts != WT_TS_NONE) { - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_ts)); - LF_SET(WT_CELL_TS_START); - } - if (start_txn != WT_TXN_NONE) { - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_txn)); - LF_SET(WT_CELL_TXN_START); - } - if (stop_ts != WT_TS_MAX) { - /* Store differences, not absolutes. */ - WT_IGNORE_RET( - __wt_vpack_uint(pp, 0, stop_ts - start_ts)); - LF_SET(WT_CELL_TS_STOP); - } - if (stop_txn != WT_TXN_MAX) { - /* Store differences, not absolutes. */ - WT_IGNORE_RET( - __wt_vpack_uint(pp, 0, stop_txn - start_txn)); - LF_SET(WT_CELL_TXN_STOP); - } - *flagsp = flags; - } + uint8_t flags, *flagsp; + + __cell_check_value_validity(session, start_ts, start_txn, stop_ts, stop_txn); + + /* + * Historic page versions and globally visible values have no associated validity window, else + * set a flag bit and store them. + */ + if (!__wt_process.page_version_ts || (start_ts == WT_TS_NONE && start_txn == WT_TXN_NONE && + stop_ts == WT_TS_MAX && stop_txn == WT_TXN_MAX)) + ++*pp; + else { + **pp |= WT_CELL_SECOND_DESC; + ++*pp; + flagsp = *pp; + ++*pp; + + flags = 0; + if (start_ts != WT_TS_NONE) { + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_ts)); + LF_SET(WT_CELL_TS_START); + } + if (start_txn != WT_TXN_NONE) { + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_txn)); + LF_SET(WT_CELL_TXN_START); + } + if (stop_ts != WT_TS_MAX) { + /* Store differences, not absolutes. */ + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_ts - start_ts)); + LF_SET(WT_CELL_TS_STOP); + } + if (stop_txn != WT_TXN_MAX) { + /* Store differences, not absolutes. */ + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_txn - start_txn)); + LF_SET(WT_CELL_TXN_STOP); + } + *flagsp = flags; + } } /* * __wt_check_addr_validity -- - * Check the address' validity window for sanity. + * Check the address' validity window for sanity. */ static inline void -__wt_check_addr_validity(WT_SESSION_IMPL *session, - wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, - wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn) +__wt_check_addr_validity(WT_SESSION_IMPL *session, wt_timestamp_t oldest_start_ts, + uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn) { #ifdef HAVE_DIAGNOSTIC - char ts_string[2][WT_TS_INT_STRING_SIZE]; - - if (newest_stop_ts == WT_TS_NONE) { - __wt_errx(session, "newest stop timestamp of 0"); - WT_ASSERT(session, newest_stop_ts != WT_TS_NONE); - } - if (oldest_start_ts > newest_stop_ts) { - __wt_errx(session, - "an oldest start timestamp %s newer than its newest " - "stop timestamp %s", - __wt_timestamp_to_string(oldest_start_ts, ts_string[0]), - __wt_timestamp_to_string(newest_stop_ts, ts_string[1])); - WT_ASSERT(session, oldest_start_ts <= newest_stop_ts); - } - if (newest_stop_txn == WT_TXN_NONE) { - __wt_errx(session, "newest stop transaction of 0"); - WT_ASSERT(session, newest_stop_txn != WT_TXN_NONE); - } - if (oldest_start_txn > newest_stop_txn) { - __wt_errx(session, - "an oldest start transaction %" PRIu64 " newer than its " - "newest stop transaction %" PRIu64, - oldest_start_txn, newest_stop_txn); - WT_ASSERT(session, oldest_start_txn <= newest_stop_txn); - } + char ts_string[2][WT_TS_INT_STRING_SIZE]; + + if (newest_stop_ts == WT_TS_NONE) { + __wt_errx(session, "newest stop timestamp of 0"); + WT_ASSERT(session, newest_stop_ts != WT_TS_NONE); + } + if (oldest_start_ts > newest_stop_ts) { + __wt_errx(session, + "an oldest start timestamp %s newer than its newest " + "stop timestamp %s", + __wt_timestamp_to_string(oldest_start_ts, ts_string[0]), + __wt_timestamp_to_string(newest_stop_ts, ts_string[1])); + WT_ASSERT(session, oldest_start_ts <= newest_stop_ts); + } + if (newest_stop_txn == WT_TXN_NONE) { + __wt_errx(session, "newest stop transaction of 0"); + WT_ASSERT(session, newest_stop_txn != WT_TXN_NONE); + } + if (oldest_start_txn > newest_stop_txn) { + __wt_errx(session, "an oldest start transaction %" PRIu64 + " newer than its " + "newest stop transaction %" PRIu64, + oldest_start_txn, newest_stop_txn); + WT_ASSERT(session, oldest_start_txn <= newest_stop_txn); + } #else - WT_UNUSED(session); - WT_UNUSED(oldest_start_ts); - WT_UNUSED(oldest_start_txn); - WT_UNUSED(newest_stop_ts); - WT_UNUSED(newest_stop_txn); + WT_UNUSED(session); + WT_UNUSED(oldest_start_ts); + WT_UNUSED(oldest_start_txn); + WT_UNUSED(newest_stop_ts); + WT_UNUSED(newest_stop_txn); #endif } /* * __cell_pack_addr_validity -- - * Pack the validity window for an address. + * Pack the validity window for an address. */ static inline void -__cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, - wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, - uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, - uint64_t newest_stop_txn) +__cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t newest_durable_ts, + wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, + uint64_t newest_stop_txn) { - uint8_t flags, *flagsp; - - __wt_check_addr_validity(session, - oldest_start_ts, oldest_start_txn, newest_stop_ts, newest_stop_txn); - - /* - * Historic page versions and globally visible values have no associated - * validity window, else set a flag bit and store them. - */ - if (!__wt_process.page_version_ts || - (newest_durable_ts == WT_TS_NONE && - oldest_start_ts == WT_TS_NONE && oldest_start_txn == WT_TXN_NONE && - newest_stop_ts == WT_TS_MAX && newest_stop_txn == WT_TXN_MAX)) - ++*pp; - else { - **pp |= WT_CELL_SECOND_DESC; - ++*pp; - flagsp = *pp; - ++*pp; - - flags = 0; - if (newest_durable_ts != WT_TS_NONE) { - WT_IGNORE_RET( - __wt_vpack_uint(pp, 0, newest_durable_ts)); - LF_SET(WT_CELL_TS_DURABLE); - } - if (oldest_start_ts != WT_TS_NONE) { - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_ts)); - LF_SET(WT_CELL_TS_START); - } - if (oldest_start_txn != WT_TXN_NONE) { - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_txn)); - LF_SET(WT_CELL_TXN_START); - } - if (newest_stop_ts != WT_TS_MAX) { - /* Store differences, not absolutes. */ - WT_IGNORE_RET(__wt_vpack_uint( - pp, 0, newest_stop_ts - oldest_start_ts)); - LF_SET(WT_CELL_TS_STOP); - } - if (newest_stop_txn != WT_TXN_MAX) { - /* Store differences, not absolutes. */ - WT_IGNORE_RET(__wt_vpack_uint( - pp, 0, newest_stop_txn - oldest_start_txn)); - LF_SET(WT_CELL_TXN_STOP); - } - *flagsp = flags; - } + uint8_t flags, *flagsp; + + __wt_check_addr_validity( + session, oldest_start_ts, oldest_start_txn, newest_stop_ts, newest_stop_txn); + + /* + * Historic page versions and globally visible values have no associated validity window, else + * set a flag bit and store them. + */ + if (!__wt_process.page_version_ts || + (newest_durable_ts == WT_TS_NONE && oldest_start_ts == WT_TS_NONE && + oldest_start_txn == WT_TXN_NONE && newest_stop_ts == WT_TS_MAX && + newest_stop_txn == WT_TXN_MAX)) + ++*pp; + else { + **pp |= WT_CELL_SECOND_DESC; + ++*pp; + flagsp = *pp; + ++*pp; + + flags = 0; + if (newest_durable_ts != WT_TS_NONE) { + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_durable_ts)); + LF_SET(WT_CELL_TS_DURABLE); + } + if (oldest_start_ts != WT_TS_NONE) { + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_ts)); + LF_SET(WT_CELL_TS_START); + } + if (oldest_start_txn != WT_TXN_NONE) { + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_txn)); + LF_SET(WT_CELL_TXN_START); + } + if (newest_stop_ts != WT_TS_MAX) { + /* Store differences, not absolutes. */ + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_stop_ts - oldest_start_ts)); + LF_SET(WT_CELL_TS_STOP); + } + if (newest_stop_txn != WT_TXN_MAX) { + /* Store differences, not absolutes. */ + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_stop_txn - oldest_start_txn)); + LF_SET(WT_CELL_TXN_STOP); + } + *flagsp = flags; + } } /* * __wt_cell_pack_addr -- - * Pack an address cell. + * Pack an address cell. */ static inline size_t -__wt_cell_pack_addr(WT_SESSION_IMPL *session, - WT_CELL *cell, u_int cell_type, uint64_t recno, - wt_timestamp_t newest_durable_ts, - wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, - wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, size_t size) +__wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, uint64_t recno, + wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, + wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, size_t size) { - uint8_t *p; - - /* Start building a cell: the descriptor byte starts zero. */ - p = cell->__chunk; - *p = '\0'; - - __cell_pack_addr_validity(session, &p, - newest_durable_ts, oldest_start_ts, - oldest_start_txn, newest_stop_ts, newest_stop_txn); - - if (recno == WT_RECNO_OOB) - cell->__chunk[0] |= (uint8_t)cell_type; /* Type */ - else { - cell->__chunk[0] |= (uint8_t)(cell_type | WT_CELL_64V); - /* Record number */ - WT_IGNORE_RET(__wt_vpack_uint(&p, 0, recno)); - } - /* Length */ - WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size)); - return (WT_PTRDIFF(p, cell)); + uint8_t *p; + + /* Start building a cell: the descriptor byte starts zero. */ + p = cell->__chunk; + *p = '\0'; + + __cell_pack_addr_validity(session, &p, newest_durable_ts, oldest_start_ts, oldest_start_txn, + newest_stop_ts, newest_stop_txn); + + if (recno == WT_RECNO_OOB) + cell->__chunk[0] |= (uint8_t)cell_type; /* Type */ + else { + cell->__chunk[0] |= (uint8_t)(cell_type | WT_CELL_64V); + /* Record number */ + WT_IGNORE_RET(__wt_vpack_uint(&p, 0, recno)); + } + /* Length */ + WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size)); + return (WT_PTRDIFF(p, cell)); } /* * __wt_cell_pack_value -- - * Set a value item's WT_CELL contents. + * Set a value item's WT_CELL contents. */ static inline size_t -__wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, - wt_timestamp_t start_ts, uint64_t start_txn, - wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size) +__wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts, + uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size) { - uint8_t byte, *p; - bool validity; - - /* Start building a cell: the descriptor byte starts zero. */ - p = cell->__chunk; - *p = '\0'; - - __cell_pack_value_validity( - session, &p, start_ts, start_txn, stop_ts, stop_txn); - - /* - * Short data cells without a validity window or run-length encoding - * have 6 bits of data length in the descriptor byte. - */ - validity = (cell->__chunk[0] & WT_CELL_SECOND_DESC) != 0; - if (!validity && rle < 2 && size <= WT_CELL_SHORT_MAX) { - byte = (uint8_t)size; /* Type + length */ - cell->__chunk[0] = (uint8_t) - ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_VALUE_SHORT); - } else { - /* - * If the size was what prevented us from using a short cell, - * it's larger than the adjustment size. Decrement/increment - * it when packing/unpacking so it takes up less room. - */ - if (!validity && rle < 2) { - size -= WT_CELL_SIZE_ADJUST; - cell->__chunk[0] |= WT_CELL_VALUE; /* Type */ - } else { - cell->__chunk[0] |= WT_CELL_VALUE | WT_CELL_64V; - /* RLE */ - WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle)); - } - /* Length */ - WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size)); - } - return (WT_PTRDIFF(p, cell)); + uint8_t byte, *p; + bool validity; + + /* Start building a cell: the descriptor byte starts zero. */ + p = cell->__chunk; + *p = '\0'; + + __cell_pack_value_validity(session, &p, start_ts, start_txn, stop_ts, stop_txn); + + /* + * Short data cells without a validity window or run-length encoding have 6 bits of data length + * in the descriptor byte. + */ + validity = (cell->__chunk[0] & WT_CELL_SECOND_DESC) != 0; + if (!validity && rle < 2 && size <= WT_CELL_SHORT_MAX) { + byte = (uint8_t)size; /* Type + length */ + cell->__chunk[0] = (uint8_t)((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_VALUE_SHORT); + } else { + /* + * If the size was what prevented us from using a short cell, it's larger than the + * adjustment size. Decrement/increment it when packing/unpacking so it takes up less room. + */ + if (!validity && rle < 2) { + size -= WT_CELL_SIZE_ADJUST; + cell->__chunk[0] |= WT_CELL_VALUE; /* Type */ + } else { + cell->__chunk[0] |= WT_CELL_VALUE | WT_CELL_64V; + /* RLE */ + WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle)); + } + /* Length */ + WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size)); + } + return (WT_PTRDIFF(p, cell)); } /* * __wt_cell_pack_value_match -- - * Return if two value items would have identical WT_CELLs (except for - * their validity window and any RLE). + * Return if two value items would have identical WT_CELLs (except for their validity window and + * any RLE). */ static inline int -__wt_cell_pack_value_match(WT_CELL *page_cell, - WT_CELL *val_cell, const uint8_t *val_data, bool *matchp) +__wt_cell_pack_value_match( + WT_CELL *page_cell, WT_CELL *val_cell, const uint8_t *val_data, bool *matchp) { - uint64_t alen, blen, v; - const uint8_t *a, *b; - uint8_t flags; - bool rle, validity; - - *matchp = false; /* Default to no-match */ - - /* - * This is a special-purpose function used by reconciliation to support - * dictionary lookups. We're passed an on-page cell and a created cell - * plus a chunk of data we're about to write on the page, and we return - * if they would match on the page. Ignore the validity window and the - * column-store RLE because the copied cell will have its own. - */ - a = (uint8_t *)page_cell; - b = (uint8_t *)val_cell; - - if (WT_CELL_SHORT_TYPE(a[0]) == WT_CELL_VALUE_SHORT) { - alen = a[0] >> WT_CELL_SHORT_SHIFT; - ++a; - } else if (WT_CELL_TYPE(a[0]) == WT_CELL_VALUE) { - rle = (a[0] & WT_CELL_64V) != 0; - validity = (a[0] & WT_CELL_SECOND_DESC) != 0; - ++a; - if (validity) { /* Skip validity window */ - flags = *a; - ++a; - if (LF_ISSET(WT_CELL_TS_START)) - WT_RET(__wt_vunpack_uint(&a, 0, &v)); - if (LF_ISSET(WT_CELL_TS_STOP)) - WT_RET(__wt_vunpack_uint(&a, 0, &v)); - if (LF_ISSET(WT_CELL_TXN_START)) - WT_RET(__wt_vunpack_uint(&a, 0, &v)); - if (LF_ISSET(WT_CELL_TXN_STOP)) - WT_RET(__wt_vunpack_uint(&a, 0, &v)); - } - if (rle) /* Skip RLE */ - WT_RET(__wt_vunpack_uint(&a, 0, &v)); - WT_RET(__wt_vunpack_uint(&a, 0, &alen)); /* Length */ - } else - return (0); - - if (WT_CELL_SHORT_TYPE(b[0]) == WT_CELL_VALUE_SHORT) { - blen = b[0] >> WT_CELL_SHORT_SHIFT; - ++b; - } else if (WT_CELL_TYPE(b[0]) == WT_CELL_VALUE) { - rle = (b[0] & WT_CELL_64V) != 0; - validity = (b[0] & WT_CELL_SECOND_DESC) != 0; - ++b; - if (validity) { /* Skip validity window */ - flags = *b; - ++b; - if (LF_ISSET(WT_CELL_TS_START)) - WT_RET(__wt_vunpack_uint(&b, 0, &v)); - if (LF_ISSET(WT_CELL_TS_STOP)) - WT_RET(__wt_vunpack_uint(&b, 0, &v)); - if (LF_ISSET(WT_CELL_TXN_START)) - WT_RET(__wt_vunpack_uint(&b, 0, &v)); - if (LF_ISSET(WT_CELL_TXN_STOP)) - WT_RET(__wt_vunpack_uint(&b, 0, &v)); - } - if (rle) /* Skip RLE */ - WT_RET(__wt_vunpack_uint(&b, 0, &v)); - WT_RET(__wt_vunpack_uint(&b, 0, &blen)); /* Length */ - } else - return (0); - - if (alen == blen) - *matchp = memcmp(a, val_data, alen) == 0; - return (0); + uint64_t alen, blen, v; + uint8_t flags; + const uint8_t *a, *b; + bool rle, validity; + + *matchp = false; /* Default to no-match */ + + /* + * This is a special-purpose function used by reconciliation to support dictionary lookups. + * We're passed an on-page cell and a created cell plus a chunk of data we're about to write on + * the page, and we return if they would match on the page. Ignore the validity window and the + * column-store RLE because the copied cell will have its own. + */ + a = (uint8_t *)page_cell; + b = (uint8_t *)val_cell; + + if (WT_CELL_SHORT_TYPE(a[0]) == WT_CELL_VALUE_SHORT) { + alen = a[0] >> WT_CELL_SHORT_SHIFT; + ++a; + } else if (WT_CELL_TYPE(a[0]) == WT_CELL_VALUE) { + rle = (a[0] & WT_CELL_64V) != 0; + validity = (a[0] & WT_CELL_SECOND_DESC) != 0; + ++a; + if (validity) { /* Skip validity window */ + flags = *a; + ++a; + if (LF_ISSET(WT_CELL_TS_START)) + WT_RET(__wt_vunpack_uint(&a, 0, &v)); + if (LF_ISSET(WT_CELL_TS_STOP)) + WT_RET(__wt_vunpack_uint(&a, 0, &v)); + if (LF_ISSET(WT_CELL_TXN_START)) + WT_RET(__wt_vunpack_uint(&a, 0, &v)); + if (LF_ISSET(WT_CELL_TXN_STOP)) + WT_RET(__wt_vunpack_uint(&a, 0, &v)); + } + if (rle) /* Skip RLE */ + WT_RET(__wt_vunpack_uint(&a, 0, &v)); + WT_RET(__wt_vunpack_uint(&a, 0, &alen)); /* Length */ + } else + return (0); + + if (WT_CELL_SHORT_TYPE(b[0]) == WT_CELL_VALUE_SHORT) { + blen = b[0] >> WT_CELL_SHORT_SHIFT; + ++b; + } else if (WT_CELL_TYPE(b[0]) == WT_CELL_VALUE) { + rle = (b[0] & WT_CELL_64V) != 0; + validity = (b[0] & WT_CELL_SECOND_DESC) != 0; + ++b; + if (validity) { /* Skip validity window */ + flags = *b; + ++b; + if (LF_ISSET(WT_CELL_TS_START)) + WT_RET(__wt_vunpack_uint(&b, 0, &v)); + if (LF_ISSET(WT_CELL_TS_STOP)) + WT_RET(__wt_vunpack_uint(&b, 0, &v)); + if (LF_ISSET(WT_CELL_TXN_START)) + WT_RET(__wt_vunpack_uint(&b, 0, &v)); + if (LF_ISSET(WT_CELL_TXN_STOP)) + WT_RET(__wt_vunpack_uint(&b, 0, &v)); + } + if (rle) /* Skip RLE */ + WT_RET(__wt_vunpack_uint(&b, 0, &v)); + WT_RET(__wt_vunpack_uint(&b, 0, &blen)); /* Length */ + } else + return (0); + + if (alen == blen) + *matchp = memcmp(a, val_data, alen) == 0; + return (0); } /* * __wt_cell_pack_copy -- - * Write a copy value cell. + * Write a copy value cell. */ static inline size_t -__wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, - wt_timestamp_t start_ts, uint64_t start_txn, - wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, uint64_t v) +__wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts, + uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, uint64_t v) { - uint8_t *p; - - /* Start building a cell: the descriptor byte starts zero. */ - p = cell->__chunk; - *p = '\0'; - - __cell_pack_value_validity( - session, &p, start_ts, start_txn, stop_ts, stop_txn); - - if (rle < 2) - cell->__chunk[0] |= WT_CELL_VALUE_COPY; /* Type */ - else { - cell->__chunk[0] |= /* Type */ - WT_CELL_VALUE_COPY | WT_CELL_64V; - /* RLE */ - WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle)); - } - /* Copy offset */ - WT_IGNORE_RET(__wt_vpack_uint(&p, 0, v)); - return (WT_PTRDIFF(p, cell)); + uint8_t *p; + + /* Start building a cell: the descriptor byte starts zero. */ + p = cell->__chunk; + *p = '\0'; + + __cell_pack_value_validity(session, &p, start_ts, start_txn, stop_ts, stop_txn); + + if (rle < 2) + cell->__chunk[0] |= WT_CELL_VALUE_COPY; /* Type */ + else { + cell->__chunk[0] |= /* Type */ + WT_CELL_VALUE_COPY | WT_CELL_64V; + /* RLE */ + WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle)); + } + /* Copy offset */ + WT_IGNORE_RET(__wt_vpack_uint(&p, 0, v)); + return (WT_PTRDIFF(p, cell)); } /* * __wt_cell_pack_del -- - * Write a deleted value cell. + * Write a deleted value cell. */ static inline size_t -__wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, - wt_timestamp_t start_ts, uint64_t start_txn, - wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle) +__wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts, + uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle) { - uint8_t *p; - - /* Start building a cell: the descriptor byte starts zero. */ - p = cell->__chunk; - *p = '\0'; - - __cell_pack_value_validity( - session, &p, start_ts, start_txn, stop_ts, stop_txn); - - if (rle < 2) - cell->__chunk[0] |= WT_CELL_DEL; /* Type */ - else { - /* Type */ - cell->__chunk[0] |= WT_CELL_DEL | WT_CELL_64V; - /* RLE */ - WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle)); - } - return (WT_PTRDIFF(p, cell)); + uint8_t *p; + + /* Start building a cell: the descriptor byte starts zero. */ + p = cell->__chunk; + *p = '\0'; + + __cell_pack_value_validity(session, &p, start_ts, start_txn, stop_ts, stop_txn); + + if (rle < 2) + cell->__chunk[0] |= WT_CELL_DEL; /* Type */ + else { + /* Type */ + cell->__chunk[0] |= WT_CELL_DEL | WT_CELL_64V; + /* RLE */ + WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle)); + } + return (WT_PTRDIFF(p, cell)); } /* * __wt_cell_pack_int_key -- - * Set a row-store internal page key's WT_CELL contents. + * Set a row-store internal page key's WT_CELL contents. */ static inline size_t __wt_cell_pack_int_key(WT_CELL *cell, size_t size) { - uint8_t byte, *p; - - /* Short keys have 6 bits of data length in the descriptor byte. */ - if (size <= WT_CELL_SHORT_MAX) { - byte = (uint8_t)size; - cell->__chunk[0] = (uint8_t) - ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT); - return (1); - } - - cell->__chunk[0] = WT_CELL_KEY; /* Type */ - p = cell->__chunk + 1; - - /* - * If the size prevented us from using a short cell, it's larger than - * the adjustment size. Decrement/increment it when packing/unpacking - * so it takes up less room. - */ - size -= WT_CELL_SIZE_ADJUST; /* Length */ - WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size)); - return (WT_PTRDIFF(p, cell)); + uint8_t byte, *p; + + /* Short keys have 6 bits of data length in the descriptor byte. */ + if (size <= WT_CELL_SHORT_MAX) { + byte = (uint8_t)size; + cell->__chunk[0] = (uint8_t)((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT); + return (1); + } + + cell->__chunk[0] = WT_CELL_KEY; /* Type */ + p = cell->__chunk + 1; + + /* + * If the size prevented us from using a short cell, it's larger than the adjustment size. + * Decrement/increment it when packing/unpacking so it takes up less room. + */ + size -= WT_CELL_SIZE_ADJUST; /* Length */ + WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size)); + return (WT_PTRDIFF(p, cell)); } /* * __wt_cell_pack_leaf_key -- - * Set a row-store leaf page key's WT_CELL contents. + * Set a row-store leaf page key's WT_CELL contents. */ static inline size_t __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) { - uint8_t byte, *p; - - /* Short keys have 6 bits of data length in the descriptor byte. */ - if (size <= WT_CELL_SHORT_MAX) { - if (prefix == 0) { - byte = (uint8_t)size; /* Type + length */ - cell->__chunk[0] = (uint8_t) - ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT); - return (1); - } - byte = (uint8_t)size; /* Type + length */ - cell->__chunk[0] = (uint8_t) - ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT_PFX); - cell->__chunk[1] = prefix; /* Prefix */ - return (2); - } - - if (prefix == 0) { - cell->__chunk[0] = WT_CELL_KEY; /* Type */ - p = cell->__chunk + 1; - } else { - cell->__chunk[0] = WT_CELL_KEY_PFX; /* Type */ - cell->__chunk[1] = prefix; /* Prefix */ - p = cell->__chunk + 2; - } - - /* - * If the size prevented us from using a short cell, it's larger than - * the adjustment size. Decrement/increment it when packing/unpacking - * so it takes up less room. - */ - size -= WT_CELL_SIZE_ADJUST; /* Length */ - WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size)); - return (WT_PTRDIFF(p, cell)); + uint8_t byte, *p; + + /* Short keys have 6 bits of data length in the descriptor byte. */ + if (size <= WT_CELL_SHORT_MAX) { + if (prefix == 0) { + byte = (uint8_t)size; /* Type + length */ + cell->__chunk[0] = (uint8_t)((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT); + return (1); + } + byte = (uint8_t)size; /* Type + length */ + cell->__chunk[0] = (uint8_t)((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT_PFX); + cell->__chunk[1] = prefix; /* Prefix */ + return (2); + } + + if (prefix == 0) { + cell->__chunk[0] = WT_CELL_KEY; /* Type */ + p = cell->__chunk + 1; + } else { + cell->__chunk[0] = WT_CELL_KEY_PFX; /* Type */ + cell->__chunk[1] = prefix; /* Prefix */ + p = cell->__chunk + 2; + } + + /* + * If the size prevented us from using a short cell, it's larger than the adjustment size. + * Decrement/increment it when packing/unpacking so it takes up less room. + */ + size -= WT_CELL_SIZE_ADJUST; /* Length */ + WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size)); + return (WT_PTRDIFF(p, cell)); } /* * __wt_cell_pack_ovfl -- - * Pack an overflow cell. + * Pack an overflow cell. */ static inline size_t -__wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, - wt_timestamp_t start_ts, uint64_t start_txn, - wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size) +__wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, wt_timestamp_t start_ts, + uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size) { - uint8_t *p; - - /* Start building a cell: the descriptor byte starts zero. */ - p = cell->__chunk; - *p = '\0'; - - switch (type) { - case WT_CELL_KEY_OVFL: - case WT_CELL_KEY_OVFL_RM: - ++p; - break; - case WT_CELL_VALUE_OVFL: - case WT_CELL_VALUE_OVFL_RM: - __cell_pack_value_validity( - session, &p, start_ts, start_txn, stop_ts, stop_txn); - break; - } - - if (rle < 2) - cell->__chunk[0] |= type; /* Type */ - else { - cell->__chunk[0] |= type | WT_CELL_64V; /* Type */ - /* RLE */ - WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle)); - } - /* Length */ - WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size)); - return (WT_PTRDIFF(p, cell)); + uint8_t *p; + + /* Start building a cell: the descriptor byte starts zero. */ + p = cell->__chunk; + *p = '\0'; + + switch (type) { + case WT_CELL_KEY_OVFL: + case WT_CELL_KEY_OVFL_RM: + ++p; + break; + case WT_CELL_VALUE_OVFL: + case WT_CELL_VALUE_OVFL_RM: + __cell_pack_value_validity(session, &p, start_ts, start_txn, stop_ts, stop_txn); + break; + } + + if (rle < 2) + cell->__chunk[0] |= type; /* Type */ + else { + cell->__chunk[0] |= type | WT_CELL_64V; /* Type */ + /* RLE */ + WT_IGNORE_RET(__wt_vpack_uint(&p, 0, rle)); + } + /* Length */ + WT_IGNORE_RET(__wt_vpack_uint(&p, 0, (uint64_t)size)); + return (WT_PTRDIFF(p, cell)); } /* * __wt_cell_rle -- - * Return the cell's RLE value. + * Return the cell's RLE value. */ static inline uint64_t __wt_cell_rle(WT_CELL_UNPACK *unpack) { - /* - * Any item with only 1 occurrence is stored with an RLE of 0, that is, - * without any RLE at all. This code is a single place to handle that - * correction, for simplicity. - */ - return (unpack->v < 2 ? 1 : unpack->v); + /* + * Any item with only 1 occurrence is stored with an RLE of 0, that is, without any RLE at all. + * This code is a single place to handle that correction, for simplicity. + */ + return (unpack->v < 2 ? 1 : unpack->v); } /* * __wt_cell_total_len -- - * Return the cell's total length, including data. + * Return the cell's total length, including data. */ static inline size_t __wt_cell_total_len(WT_CELL_UNPACK *unpack) { - /* - * The length field is specially named because it's dangerous to use it: - * it represents the length of the current cell (normally used for the - * loop that walks through cells on the page), but occasionally we want - * to copy a cell directly from the page, and what we need is the cell's - * total length. The problem is dictionary-copy cells, because in that - * case, the __len field is the length of the current cell, not the cell - * for which we're returning data. To use the __len field, you must be - * sure you're not looking at a copy cell. - */ - return (unpack->__len); + /* + * The length field is specially named because it's dangerous to use it: it represents the + * length of the current cell (normally used for the loop that walks through cells on the page), + * but occasionally we want to copy a cell directly from the page, and what we need is the + * cell's total length. The problem is dictionary-copy cells, because in that case, the __len + * field is the length of the current cell, not the cell for which we're returning data. To use + * the __len field, you must be sure you're not looking at a copy cell. + */ + return (unpack->__len); } /* * __wt_cell_type -- - * Return the cell's type (collapsing special types). + * Return the cell's type (collapsing special types). */ static inline u_int __wt_cell_type(WT_CELL *cell) { - u_int type; - - switch (WT_CELL_SHORT_TYPE(cell->__chunk[0])) { - case WT_CELL_KEY_SHORT: - case WT_CELL_KEY_SHORT_PFX: - return (WT_CELL_KEY); - case WT_CELL_VALUE_SHORT: - return (WT_CELL_VALUE); - } - - switch (type = WT_CELL_TYPE(cell->__chunk[0])) { - case WT_CELL_KEY_PFX: - return (WT_CELL_KEY); - case WT_CELL_KEY_OVFL_RM: - return (WT_CELL_KEY_OVFL); - case WT_CELL_VALUE_OVFL_RM: - return (WT_CELL_VALUE_OVFL); - } - return (type); + u_int type; + + switch (WT_CELL_SHORT_TYPE(cell->__chunk[0])) { + case WT_CELL_KEY_SHORT: + case WT_CELL_KEY_SHORT_PFX: + return (WT_CELL_KEY); + case WT_CELL_VALUE_SHORT: + return (WT_CELL_VALUE); + } + + switch (type = WT_CELL_TYPE(cell->__chunk[0])) { + case WT_CELL_KEY_PFX: + return (WT_CELL_KEY); + case WT_CELL_KEY_OVFL_RM: + return (WT_CELL_KEY_OVFL); + case WT_CELL_VALUE_OVFL_RM: + return (WT_CELL_VALUE_OVFL); + } + return (type); } /* * __wt_cell_type_raw -- - * Return the cell's type. + * Return the cell's type. */ static inline u_int __wt_cell_type_raw(WT_CELL *cell) { - return (WT_CELL_SHORT_TYPE(cell->__chunk[0]) == 0 ? - WT_CELL_TYPE(cell->__chunk[0]) : - WT_CELL_SHORT_TYPE(cell->__chunk[0])); + return (WT_CELL_SHORT_TYPE(cell->__chunk[0]) == 0 ? WT_CELL_TYPE(cell->__chunk[0]) : + WT_CELL_SHORT_TYPE(cell->__chunk[0])); } /* * __wt_cell_type_reset -- - * Reset the cell's type. + * Reset the cell's type. */ static inline void -__wt_cell_type_reset( - WT_SESSION_IMPL *session, WT_CELL *cell, u_int old_type, u_int new_type) +__wt_cell_type_reset(WT_SESSION_IMPL *session, WT_CELL *cell, u_int old_type, u_int new_type) { - /* - * For all current callers of this function, this should happen once - * and only once, assert we're setting what we think we're setting. - */ - WT_ASSERT(session, old_type == 0 || old_type == __wt_cell_type(cell)); - WT_UNUSED(old_type); - - cell->__chunk[0] = - (cell->__chunk[0] & ~WT_CELL_TYPE_MASK) | WT_CELL_TYPE(new_type); + /* + * For all current callers of this function, this should happen once and only once, assert we're + * setting what we think we're setting. + */ + WT_ASSERT(session, old_type == 0 || old_type == __wt_cell_type(cell)); + WT_UNUSED(old_type); + + cell->__chunk[0] = (cell->__chunk[0] & ~WT_CELL_TYPE_MASK) | WT_CELL_TYPE(new_type); } /* * __wt_cell_leaf_value_parse -- - * Return the cell if it's a row-store leaf page value, otherwise return - * NULL. + * Return the cell if it's a row-store leaf page value, otherwise return NULL. */ static inline WT_CELL * __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell) { - /* - * This function exists so there's a place for this comment. - * - * Row-store leaf pages may have a single data cell between each key, or - * keys may be adjacent (when the data cell is empty). - * - * One special case: if the last key on a page is a key without a value, - * don't walk off the end of the page: the size of the underlying disk - * image is exact, which means the end of the last cell on the page plus - * the length of the cell should be the byte immediately after the page - * disk image. - * - * !!! - * This line of code is really a call to __wt_off_page, but we know the - * cell we're given will either be on the page or past the end of page, - * so it's a simpler check. (I wouldn't bother, but the real problem is - * we can't call __wt_off_page directly, it's in btree.i which requires - * this file be included first.) - */ - if (cell >= (WT_CELL *)((uint8_t *)page->dsk + page->dsk->mem_size)) - return (NULL); - - switch (__wt_cell_type_raw(cell)) { - case WT_CELL_KEY: - case WT_CELL_KEY_OVFL: - case WT_CELL_KEY_OVFL_RM: - case WT_CELL_KEY_PFX: - case WT_CELL_KEY_SHORT: - case WT_CELL_KEY_SHORT_PFX: - return (NULL); - default: - return (cell); - } + /* + * This function exists so there's a place for this comment. + * + * Row-store leaf pages may have a single data cell between each key, or + * keys may be adjacent (when the data cell is empty). + * + * One special case: if the last key on a page is a key without a value, + * don't walk off the end of the page: the size of the underlying disk + * image is exact, which means the end of the last cell on the page plus + * the length of the cell should be the byte immediately after the page + * disk image. + * + * !!! + * This line of code is really a call to __wt_off_page, but we know the + * cell we're given will either be on the page or past the end of page, + * so it's a simpler check. (I wouldn't bother, but the real problem is + * we can't call __wt_off_page directly, it's in btree.i which requires + * this file be included first.) + */ + if (cell >= (WT_CELL *)((uint8_t *)page->dsk + page->dsk->mem_size)) + return (NULL); + + switch (__wt_cell_type_raw(cell)) { + case WT_CELL_KEY: + case WT_CELL_KEY_OVFL: + case WT_CELL_KEY_OVFL_RM: + case WT_CELL_KEY_PFX: + case WT_CELL_KEY_SHORT: + case WT_CELL_KEY_SHORT_PFX: + return (NULL); + default: + return (cell); + } } /* * __wt_cell_unpack_safe -- - * Unpack a WT_CELL into a structure, with optional boundary checks. + * Unpack a WT_CELL into a structure, with optional boundary checks. */ static inline int -__wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, - WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *end) +__wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, + WT_CELL_UNPACK *unpack, const void *end) { - struct { - uint64_t v; - wt_timestamp_t start_ts; - uint64_t start_txn; - wt_timestamp_t stop_ts; - uint64_t stop_txn; - uint32_t len; - } copy; - uint64_t v; - const uint8_t *p; - uint8_t flags; - - copy.v = 0; /* -Werror=maybe-uninitialized */ - copy.start_ts = WT_TS_NONE; - copy.start_txn = WT_TXN_NONE; - copy.stop_ts = WT_TS_MAX; - copy.stop_txn = WT_TXN_MAX; - copy.len = 0; - - /* - * The verification code specifies an end argument, a pointer to 1B past - * the end-of-page. In which case, make sure all reads are inside the - * page image. If an error occurs, return an error code but don't output - * messages, our caller handles that. - */ -#define WT_CELL_LEN_CHK(t, len) do { \ - if (end != NULL && \ - ((uint8_t *)(t) < (uint8_t *)dsk || \ - (((uint8_t *)(t)) + (len)) > (uint8_t *)end)) \ - return (WT_ERROR); \ -} while (0) - - /* - * NB: when unpacking a WT_CELL_VALUE_COPY cell, unpack.cell is returned - * as the original cell, not the copied cell (in other words, data from - * the copied cell must be available from unpack after we return, as our - * caller has no way to find the copied cell). - */ - unpack->cell = cell; + struct { + uint64_t v; + wt_timestamp_t start_ts; + uint64_t start_txn; + wt_timestamp_t stop_ts; + uint64_t stop_txn; + uint32_t len; + } copy; + uint64_t v; + const uint8_t *p; + uint8_t flags; + + copy.v = 0; /* -Werror=maybe-uninitialized */ + copy.start_ts = WT_TS_NONE; + copy.start_txn = WT_TXN_NONE; + copy.stop_ts = WT_TS_MAX; + copy.stop_txn = WT_TXN_MAX; + copy.len = 0; + +/* + * The verification code specifies an end argument, a pointer to 1B past the end-of-page. In which + * case, make sure all reads are inside the page image. If an error occurs, return an error code but + * don't output messages, our caller handles that. + */ +#define WT_CELL_LEN_CHK(t, len) \ + do { \ + if (end != NULL && \ + ((uint8_t *)(t) < (uint8_t *)dsk || (((uint8_t *)(t)) + (len)) > (uint8_t *)end)) \ + return (WT_ERROR); \ + } while (0) + + /* + * NB: when unpacking a WT_CELL_VALUE_COPY cell, unpack.cell is returned as the original cell, + * not the copied cell (in other words, data from the copied cell must be available from unpack + * after we return, as our caller has no way to find the copied cell). + */ + unpack->cell = cell; restart: - WT_CELL_LEN_CHK(cell, 0); - - /* - * This path is performance critical for read-only trees, we're parsing - * on-page structures. For that reason we don't clear the unpacked cell - * structure (although that would be simpler), instead we make sure we - * initialize all structure elements either here or in the immediately - * following switch. All validity windows default to durability. - */ - unpack->v = 0; - unpack->start_ts = WT_TS_NONE; - unpack->start_txn = WT_TXN_NONE; - unpack->stop_ts = WT_TS_MAX; - unpack->stop_txn = WT_TXN_MAX; - unpack->newest_durable_ts = WT_TS_NONE; - unpack->oldest_start_ts = WT_TS_NONE; - unpack->oldest_start_txn = WT_TXN_NONE; - unpack->newest_stop_ts = WT_TS_MAX; - unpack->newest_stop_txn = WT_TXN_MAX; - unpack->raw = (uint8_t)__wt_cell_type_raw(cell); - unpack->type = (uint8_t)__wt_cell_type(cell); - unpack->ovfl = 0; - - /* - * Handle cells with none of RLE counts, validity window or data length: - * short key/data cells have 6 bits of data length in the descriptor - * byte and nothing else. - */ - switch (unpack->raw) { - case WT_CELL_KEY_SHORT_PFX: - WT_CELL_LEN_CHK(cell, 1); /* skip prefix */ - unpack->prefix = cell->__chunk[1]; - unpack->data = cell->__chunk + 2; - unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT; - unpack->__len = 2 + unpack->size; - goto done; - case WT_CELL_KEY_SHORT: - case WT_CELL_VALUE_SHORT: - unpack->prefix = 0; - unpack->data = cell->__chunk + 1; - unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT; - unpack->__len = 1 + unpack->size; - goto done; - } - - unpack->prefix = 0; - unpack->data = NULL; - unpack->size = 0; - unpack->__len = 0; - - p = (uint8_t *)cell + 1; /* skip cell */ - - /* - * Check for a prefix byte that optionally follows the cell descriptor - * byte in keys on row-store leaf pages. - */ - if (unpack->raw == WT_CELL_KEY_PFX) { - unpack->prefix = *p++; /* skip prefix */ - WT_CELL_LEN_CHK(p, 0); - } - - /* Check for a validity window. */ - switch (unpack->raw) { - case WT_CELL_ADDR_DEL: - case WT_CELL_ADDR_INT: - case WT_CELL_ADDR_LEAF: - case WT_CELL_ADDR_LEAF_NO: - if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0) - break; - flags = *p++; /* skip second descriptor byte */ - - if (LF_ISSET(WT_CELL_TS_DURABLE)) - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : - WT_PTRDIFF(end, p), &unpack->newest_durable_ts)); - if (LF_ISSET(WT_CELL_TS_START)) - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : - WT_PTRDIFF(end, p), &unpack->oldest_start_ts)); - if (LF_ISSET(WT_CELL_TXN_START)) - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : - WT_PTRDIFF(end, p), &unpack->oldest_start_txn)); - if (LF_ISSET(WT_CELL_TS_STOP)) { - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : - WT_PTRDIFF(end, p), &unpack->newest_stop_ts)); - unpack->newest_stop_ts += unpack->oldest_start_ts; - } - if (LF_ISSET(WT_CELL_TXN_STOP)) { - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : - WT_PTRDIFF(end, p), &unpack->newest_stop_txn)); - unpack->newest_stop_txn += unpack->oldest_start_txn; - } - __wt_check_addr_validity(session, - unpack->oldest_start_ts, unpack->oldest_start_txn, - unpack->newest_stop_ts, unpack->newest_stop_txn); - break; - case WT_CELL_DEL: - case WT_CELL_VALUE: - case WT_CELL_VALUE_COPY: - case WT_CELL_VALUE_OVFL: - case WT_CELL_VALUE_OVFL_RM: - if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0) - break; - flags = *p++; /* skip second descriptor byte */ - - if (LF_ISSET(WT_CELL_TS_START)) - WT_RET(__wt_vunpack_uint(&p, end == NULL ? - 0 : WT_PTRDIFF(end, p), &unpack->start_ts)); - if (LF_ISSET(WT_CELL_TXN_START)) - WT_RET(__wt_vunpack_uint(&p, end == NULL ? - 0 : WT_PTRDIFF(end, p), &unpack->start_txn)); - if (LF_ISSET(WT_CELL_TS_STOP)) { - WT_RET(__wt_vunpack_uint(&p, end == NULL ? - 0 : WT_PTRDIFF(end, p), &unpack->stop_ts)); - unpack->stop_ts += unpack->start_ts; - } - if (LF_ISSET(WT_CELL_TXN_STOP)) { - WT_RET(__wt_vunpack_uint(&p, end == NULL ? - 0 : WT_PTRDIFF(end, p), &unpack->stop_txn)); - unpack->stop_txn += unpack->start_txn; - } - __cell_check_value_validity(session, - unpack->start_ts, unpack->start_txn, - unpack->stop_ts, unpack->stop_txn); - break; - } - - /* - * Check for an RLE count or record number that optionally follows the - * cell descriptor byte on column-store variable-length pages. - */ - if (cell->__chunk[0] & WT_CELL_64V) /* skip value */ - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->v)); - - /* - * Handle special actions for a few different cell types and set the - * data length (deleted cells are fixed-size without length bytes, - * almost everything else has data length bytes). - */ - switch (unpack->raw) { - case WT_CELL_VALUE_COPY: - /* - * The cell is followed by an offset to a cell written earlier - * in the page. Save/restore the length and RLE of this cell, - * we need the length to step through the set of cells on the - * page and this RLE is probably different from the RLE of the - * earlier cell. - */ - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v)); - copy.v = unpack->v; - copy.start_ts = unpack->start_ts; - copy.start_txn = unpack->start_txn; - copy.stop_ts = unpack->stop_ts; - copy.stop_txn = unpack->stop_txn; - copy.len = WT_PTRDIFF32(p, cell); - cell = (WT_CELL *)((uint8_t *)cell - v); - goto restart; - - case WT_CELL_KEY_OVFL: - case WT_CELL_KEY_OVFL_RM: - case WT_CELL_VALUE_OVFL: - case WT_CELL_VALUE_OVFL_RM: - /* - * Set overflow flag. - */ - unpack->ovfl = 1; - /* FALLTHROUGH */ - - case WT_CELL_ADDR_DEL: - case WT_CELL_ADDR_INT: - case WT_CELL_ADDR_LEAF: - case WT_CELL_ADDR_LEAF_NO: - case WT_CELL_KEY: - case WT_CELL_KEY_PFX: - case WT_CELL_VALUE: - /* - * The cell is followed by a 4B data length and a chunk of - * data. - */ - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v)); - - /* - * If the size was what prevented us from using a short cell, - * it's larger than the adjustment size. Decrement/increment - * it when packing/unpacking so it takes up less room. - */ - if (unpack->raw == WT_CELL_KEY || - unpack->raw == WT_CELL_KEY_PFX || - (unpack->raw == WT_CELL_VALUE && - unpack->v == 0 && - (cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0)) - v += WT_CELL_SIZE_ADJUST; - - unpack->data = p; - unpack->size = (uint32_t)v; - unpack->__len = WT_PTRDIFF32(p, cell) + unpack->size; - break; - - case WT_CELL_DEL: - unpack->__len = WT_PTRDIFF32(p, cell); - break; - default: - return (WT_ERROR); /* Unknown cell type. */ - } - - /* - * Check the original cell against the full cell length (this is a - * diagnostic as well, we may be copying the cell from the page and - * we need the right length). - */ -done: WT_CELL_LEN_CHK(cell, unpack->__len); - if (copy.len != 0) { - unpack->raw = WT_CELL_VALUE_COPY; - unpack->v = copy.v; - unpack->start_ts = copy.start_ts; - unpack->start_txn = copy.start_txn; - unpack->stop_ts = copy.stop_ts; - unpack->stop_txn = copy.stop_txn; - unpack->__len = copy.len; - } - - return (0); + WT_CELL_LEN_CHK(cell, 0); + + /* + * This path is performance critical for read-only trees, we're parsing on-page structures. For + * that reason we don't clear the unpacked cell structure (although that would be simpler), + * instead we make sure we initialize all structure elements either here or in the immediately + * following switch. All validity windows default to durability. + */ + unpack->v = 0; + unpack->start_ts = WT_TS_NONE; + unpack->start_txn = WT_TXN_NONE; + unpack->stop_ts = WT_TS_MAX; + unpack->stop_txn = WT_TXN_MAX; + unpack->newest_durable_ts = WT_TS_NONE; + unpack->oldest_start_ts = WT_TS_NONE; + unpack->oldest_start_txn = WT_TXN_NONE; + unpack->newest_stop_ts = WT_TS_MAX; + unpack->newest_stop_txn = WT_TXN_MAX; + unpack->raw = (uint8_t)__wt_cell_type_raw(cell); + unpack->type = (uint8_t)__wt_cell_type(cell); + unpack->ovfl = 0; + + /* + * Handle cells with none of RLE counts, validity window or data length: short key/data cells + * have 6 bits of data length in the descriptor byte and nothing else. + */ + switch (unpack->raw) { + case WT_CELL_KEY_SHORT_PFX: + WT_CELL_LEN_CHK(cell, 1); /* skip prefix */ + unpack->prefix = cell->__chunk[1]; + unpack->data = cell->__chunk + 2; + unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT; + unpack->__len = 2 + unpack->size; + goto done; + case WT_CELL_KEY_SHORT: + case WT_CELL_VALUE_SHORT: + unpack->prefix = 0; + unpack->data = cell->__chunk + 1; + unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT; + unpack->__len = 1 + unpack->size; + goto done; + } + + unpack->prefix = 0; + unpack->data = NULL; + unpack->size = 0; + unpack->__len = 0; + + p = (uint8_t *)cell + 1; /* skip cell */ + + /* + * Check for a prefix byte that optionally follows the cell descriptor byte in keys on row-store + * leaf pages. + */ + if (unpack->raw == WT_CELL_KEY_PFX) { + unpack->prefix = *p++; /* skip prefix */ + WT_CELL_LEN_CHK(p, 0); + } + + /* Check for a validity window. */ + switch (unpack->raw) { + case WT_CELL_ADDR_DEL: + case WT_CELL_ADDR_INT: + case WT_CELL_ADDR_LEAF: + case WT_CELL_ADDR_LEAF_NO: + if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0) + break; + flags = *p++; /* skip second descriptor byte */ + + if (LF_ISSET(WT_CELL_TS_DURABLE)) + WT_RET(__wt_vunpack_uint( + &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_durable_ts)); + if (LF_ISSET(WT_CELL_TS_START)) + WT_RET(__wt_vunpack_uint( + &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->oldest_start_ts)); + if (LF_ISSET(WT_CELL_TXN_START)) + WT_RET(__wt_vunpack_uint( + &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->oldest_start_txn)); + if (LF_ISSET(WT_CELL_TS_STOP)) { + WT_RET( + __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_ts)); + unpack->newest_stop_ts += unpack->oldest_start_ts; + } + if (LF_ISSET(WT_CELL_TXN_STOP)) { + WT_RET(__wt_vunpack_uint( + &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_txn)); + unpack->newest_stop_txn += unpack->oldest_start_txn; + } + __wt_check_addr_validity(session, unpack->oldest_start_ts, unpack->oldest_start_txn, + unpack->newest_stop_ts, unpack->newest_stop_txn); + break; + case WT_CELL_DEL: + case WT_CELL_VALUE: + case WT_CELL_VALUE_COPY: + case WT_CELL_VALUE_OVFL: + case WT_CELL_VALUE_OVFL_RM: + if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0) + break; + flags = *p++; /* skip second descriptor byte */ + + if (LF_ISSET(WT_CELL_TS_START)) + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_ts)); + if (LF_ISSET(WT_CELL_TXN_START)) + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_txn)); + if (LF_ISSET(WT_CELL_TS_STOP)) { + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_ts)); + unpack->stop_ts += unpack->start_ts; + } + if (LF_ISSET(WT_CELL_TXN_STOP)) { + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_txn)); + unpack->stop_txn += unpack->start_txn; + } + __cell_check_value_validity( + session, unpack->start_ts, unpack->start_txn, unpack->stop_ts, unpack->stop_txn); + break; + } + + /* + * Check for an RLE count or record number that optionally follows the cell descriptor byte on + * column-store variable-length pages. + */ + if (cell->__chunk[0] & WT_CELL_64V) /* skip value */ + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->v)); + + /* + * Handle special actions for a few different cell types and set the data length (deleted cells + * are fixed-size without length bytes, almost everything else has data length bytes). + */ + switch (unpack->raw) { + case WT_CELL_VALUE_COPY: + /* + * The cell is followed by an offset to a cell written earlier in the page. Save/restore the + * length and RLE of this cell, we need the length to step through the set of cells on the + * page and this RLE is probably different from the RLE of the earlier cell. + */ + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v)); + copy.v = unpack->v; + copy.start_ts = unpack->start_ts; + copy.start_txn = unpack->start_txn; + copy.stop_ts = unpack->stop_ts; + copy.stop_txn = unpack->stop_txn; + copy.len = WT_PTRDIFF32(p, cell); + cell = (WT_CELL *)((uint8_t *)cell - v); + goto restart; + + case WT_CELL_KEY_OVFL: + case WT_CELL_KEY_OVFL_RM: + case WT_CELL_VALUE_OVFL: + case WT_CELL_VALUE_OVFL_RM: + /* + * Set overflow flag. + */ + unpack->ovfl = 1; + /* FALLTHROUGH */ + + case WT_CELL_ADDR_DEL: + case WT_CELL_ADDR_INT: + case WT_CELL_ADDR_LEAF: + case WT_CELL_ADDR_LEAF_NO: + case WT_CELL_KEY: + case WT_CELL_KEY_PFX: + case WT_CELL_VALUE: + /* + * The cell is followed by a 4B data length and a chunk of data. + */ + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v)); + + /* + * If the size was what prevented us from using a short cell, it's larger than the + * adjustment size. Decrement/increment it when packing/unpacking so it takes up less room. + */ + if (unpack->raw == WT_CELL_KEY || unpack->raw == WT_CELL_KEY_PFX || + (unpack->raw == WT_CELL_VALUE && unpack->v == 0 && + (cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0)) + v += WT_CELL_SIZE_ADJUST; + + unpack->data = p; + unpack->size = (uint32_t)v; + unpack->__len = WT_PTRDIFF32(p, cell) + unpack->size; + break; + + case WT_CELL_DEL: + unpack->__len = WT_PTRDIFF32(p, cell); + break; + default: + return (WT_ERROR); /* Unknown cell type. */ + } + +/* + * Check the original cell against the full cell length (this is a diagnostic as well, we may be + * copying the cell from the page and we need the right length). + */ +done: + WT_CELL_LEN_CHK(cell, unpack->__len); + if (copy.len != 0) { + unpack->raw = WT_CELL_VALUE_COPY; + unpack->v = copy.v; + unpack->start_ts = copy.start_ts; + unpack->start_txn = copy.start_txn; + unpack->stop_ts = copy.stop_ts; + unpack->stop_txn = copy.stop_txn; + unpack->__len = copy.len; + } + + return (0); } /* * __wt_cell_unpack_dsk -- - * Unpack a WT_CELL into a structure. + * Unpack a WT_CELL into a structure. */ static inline void -__wt_cell_unpack_dsk(WT_SESSION_IMPL *session, - const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack) +__wt_cell_unpack_dsk( + WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack) { - /* - * Row-store doesn't store zero-length values on pages, but this allows - * us to pretend. - */ - if (cell == NULL) { - unpack->cell = NULL; - unpack->v = 0; - /* - * If there isn't any value validity window (which is what it - * will take to get to a zero-length item), the value must be - * stable. - */ - unpack->start_ts = WT_TS_NONE; - unpack->start_txn = WT_TXN_NONE; - unpack->stop_ts = WT_TS_MAX; - unpack->stop_txn = WT_TXN_MAX; - unpack->newest_durable_ts = WT_TS_NONE; - unpack->oldest_start_ts = WT_TS_NONE; - unpack->oldest_start_txn = WT_TXN_NONE; - unpack->newest_stop_ts = WT_TS_MAX; - unpack->newest_stop_txn = WT_TXN_MAX; - unpack->data = ""; - unpack->size = 0; - unpack->__len = 0; - unpack->prefix = 0; - unpack->raw = unpack->type = WT_CELL_VALUE; - unpack->ovfl = 0; - return; - } - - WT_IGNORE_RET(__wt_cell_unpack_safe(session, dsk, cell, unpack, NULL)); + /* + * Row-store doesn't store zero-length values on pages, but this allows us to pretend. + */ + if (cell == NULL) { + unpack->cell = NULL; + unpack->v = 0; + /* + * If there isn't any value validity window (which is what it will take to get to a + * zero-length item), the value must be stable. + */ + unpack->start_ts = WT_TS_NONE; + unpack->start_txn = WT_TXN_NONE; + unpack->stop_ts = WT_TS_MAX; + unpack->stop_txn = WT_TXN_MAX; + unpack->newest_durable_ts = WT_TS_NONE; + unpack->oldest_start_ts = WT_TS_NONE; + unpack->oldest_start_txn = WT_TXN_NONE; + unpack->newest_stop_ts = WT_TS_MAX; + unpack->newest_stop_txn = WT_TXN_MAX; + unpack->data = ""; + unpack->size = 0; + unpack->__len = 0; + unpack->prefix = 0; + unpack->raw = unpack->type = WT_CELL_VALUE; + unpack->ovfl = 0; + return; + } + + WT_IGNORE_RET(__wt_cell_unpack_safe(session, dsk, cell, unpack, NULL)); } /* * __wt_cell_unpack -- - * Unpack a WT_CELL into a structure. + * Unpack a WT_CELL into a structure. */ static inline void -__wt_cell_unpack(WT_SESSION_IMPL *session, - WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack) +__wt_cell_unpack(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack) { - __wt_cell_unpack_dsk(session, page->dsk, cell, unpack); + __wt_cell_unpack_dsk(session, page->dsk, cell, unpack); } /* * __cell_data_ref -- - * Set a buffer to reference the data from an unpacked cell. + * Set a buffer to reference the data from an unpacked cell. */ static inline int -__cell_data_ref(WT_SESSION_IMPL *session, - WT_PAGE *page, int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store) +__cell_data_ref( + WT_SESSION_IMPL *session, WT_PAGE *page, int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store) { - WT_BTREE *btree; - bool decoded; - void *huffman; - - btree = S2BT(session); - - /* Reference the cell's data, optionally decode it. */ - switch (unpack->type) { - case WT_CELL_KEY: - store->data = unpack->data; - store->size = unpack->size; - if (page_type == WT_PAGE_ROW_INT) - return (0); - - huffman = btree->huffman_key; - break; - case WT_CELL_VALUE: - store->data = unpack->data; - store->size = unpack->size; - huffman = btree->huffman_value; - break; - case WT_CELL_KEY_OVFL: - WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded)); - if (page_type == WT_PAGE_ROW_INT || decoded) - return (0); - - huffman = btree->huffman_key; - break; - case WT_CELL_VALUE_OVFL: - WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded)); - if (decoded) - return (0); - huffman = btree->huffman_value; - break; - default: - return (__wt_illegal_value(session, unpack->type)); - } - - return (huffman == NULL || store->size == 0 ? 0 : - __wt_huffman_decode( - session, huffman, store->data, store->size, store)); + WT_BTREE *btree; + bool decoded; + void *huffman; + + btree = S2BT(session); + + /* Reference the cell's data, optionally decode it. */ + switch (unpack->type) { + case WT_CELL_KEY: + store->data = unpack->data; + store->size = unpack->size; + if (page_type == WT_PAGE_ROW_INT) + return (0); + + huffman = btree->huffman_key; + break; + case WT_CELL_VALUE: + store->data = unpack->data; + store->size = unpack->size; + huffman = btree->huffman_value; + break; + case WT_CELL_KEY_OVFL: + WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded)); + if (page_type == WT_PAGE_ROW_INT || decoded) + return (0); + + huffman = btree->huffman_key; + break; + case WT_CELL_VALUE_OVFL: + WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded)); + if (decoded) + return (0); + huffman = btree->huffman_value; + break; + default: + return (__wt_illegal_value(session, unpack->type)); + } + + return (huffman == NULL || store->size == 0 ? 0 : __wt_huffman_decode(session, huffman, + store->data, store->size, store)); } /* * __wt_dsk_cell_data_ref -- - * Set a buffer to reference the data from an unpacked cell. - * - * There are two versions because of WT_CELL_VALUE_OVFL_RM type cells. When an - * overflow item is deleted, its backing blocks are removed; if there are still - * running transactions that might need to see the overflow item, we cache a - * copy of the item and reset the item's cell to WT_CELL_VALUE_OVFL_RM. If we - * find a WT_CELL_VALUE_OVFL_RM cell when reading an overflow item, we use the - * page reference to look aside into the cache. So, calling the "dsk" version - * of the function declares the cell cannot be of type WT_CELL_VALUE_OVFL_RM, - * and calling the "page" version means it might be. + * Set a buffer to reference the data from an unpacked cell. There are two versions because of + * WT_CELL_VALUE_OVFL_RM type cells. When an overflow item is deleted, its backing blocks are + * removed; if there are still running transactions that might need to see the overflow item, we + * cache a copy of the item and reset the item's cell to WT_CELL_VALUE_OVFL_RM. If we find a + * WT_CELL_VALUE_OVFL_RM cell when reading an overflow item, we use the page reference to look + * aside into the cache. So, calling the "dsk" version of the function declares the cell cannot + * be of type WT_CELL_VALUE_OVFL_RM, and calling the "page" version means it might be. */ static inline int -__wt_dsk_cell_data_ref(WT_SESSION_IMPL *session, - int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store) +__wt_dsk_cell_data_ref( + WT_SESSION_IMPL *session, int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store) { - WT_ASSERT(session, - __wt_cell_type_raw(unpack->cell) != WT_CELL_VALUE_OVFL_RM); - return (__cell_data_ref(session, NULL, page_type, unpack, store)); + WT_ASSERT(session, __wt_cell_type_raw(unpack->cell) != WT_CELL_VALUE_OVFL_RM); + return (__cell_data_ref(session, NULL, page_type, unpack, store)); } /* * __wt_page_cell_data_ref -- - * Set a buffer to reference the data from an unpacked cell. + * Set a buffer to reference the data from an unpacked cell. */ static inline int -__wt_page_cell_data_ref(WT_SESSION_IMPL *session, - WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store) +__wt_page_cell_data_ref( + WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store) { - return (__cell_data_ref(session, page, page->type, unpack, store)); + return (__cell_data_ref(session, page, page->type, unpack, store)); } /* * WT_CELL_FOREACH -- * Walk the cells on a page. */ -#define WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) do { \ - uint32_t __i; \ - uint8_t *__cell; \ - for (__cell = WT_PAGE_HEADER_BYTE(btree, dsk), \ - __i = (dsk)->u.entries; \ - __i > 0; __cell += (unpack).__len, --__i) { \ - __wt_cell_unpack_dsk( \ - session, dsk, (WT_CELL *)__cell, &(unpack)); \ - -#define WT_CELL_FOREACH_END \ - } } while (0) +#define WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) \ + do { \ + uint32_t __i; \ + uint8_t *__cell; \ + for (__cell = WT_PAGE_HEADER_BYTE(btree, dsk), __i = (dsk)->u.entries; __i > 0; \ + __cell += (unpack).__len, --__i) { \ + __wt_cell_unpack_dsk(session, dsk, (WT_CELL *)__cell, &(unpack)); + +#define WT_CELL_FOREACH_END \ + } \ + } \ + while (0) diff --git a/src/third_party/wiredtiger/src/include/column.i b/src/third_party/wiredtiger/src/include/column.i index 608d2bffc97..d039386245c 100644 --- a/src/third_party/wiredtiger/src/include/column.i +++ b/src/third_party/wiredtiger/src/include/column.i @@ -8,335 +8,325 @@ /* * __col_insert_search_gt -- - * Search a column-store insert list for the next larger record. + * Search a column-store insert list for the next larger record. */ static inline WT_INSERT * __col_insert_search_gt(WT_INSERT_HEAD *ins_head, uint64_t recno) { - WT_INSERT *ins, **insp, *ret_ins; - int i; - - /* If there's no insert chain to search, we're done. */ - if ((ins = WT_SKIP_LAST(ins_head)) == NULL) - return (NULL); - - /* Fast path check for targets past the end of the skiplist. */ - if (recno >= WT_INSERT_RECNO(ins)) - return (NULL); - - /* - * The insert list is a skip list: start at the highest skip level, then - * go as far as possible at each level before stepping down to the next. - */ - ret_ins = NULL; - for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) { - /* - * Use a local variable to access the insert because the skip - * list can change across references. - */ - WT_ORDERED_READ(ins, *insp); - if (ins != NULL && recno >= WT_INSERT_RECNO(ins)) { - /* GTE: keep going at this level */ - insp = &ins->next[i]; - ret_ins = ins; - } else { - --i; /* LT: drop down a level */ - --insp; - } - } - - /* - * If we didn't find any records greater than or equal to the target, - * we never set the return value, set it to the first record in the - * list. - * - * Otherwise, it references a record less-than-or-equal to the target, - * move to a later record, that is, a subsequent record greater than - * the target. Because inserts happen concurrently, additional records - * might be inserted after the searched-for record that are still - * smaller than the target, continue to move forward until reaching a - * record larger than the target. There isn't any safety testing - * because we confirmed such a record exists before searching. - */ - if ((ins = ret_ins) == NULL) - ins = WT_SKIP_FIRST(ins_head); - while (recno >= WT_INSERT_RECNO(ins)) - ins = WT_SKIP_NEXT(ins); - return (ins); + WT_INSERT *ins, **insp, *ret_ins; + int i; + + /* If there's no insert chain to search, we're done. */ + if ((ins = WT_SKIP_LAST(ins_head)) == NULL) + return (NULL); + + /* Fast path check for targets past the end of the skiplist. */ + if (recno >= WT_INSERT_RECNO(ins)) + return (NULL); + + /* + * The insert list is a skip list: start at the highest skip level, then go as far as possible + * at each level before stepping down to the next. + */ + ret_ins = NULL; + for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) { + /* + * Use a local variable to access the insert because the skip list can change across + * references. + */ + WT_ORDERED_READ(ins, *insp); + if (ins != NULL && recno >= WT_INSERT_RECNO(ins)) { + /* GTE: keep going at this level */ + insp = &ins->next[i]; + ret_ins = ins; + } else { + --i; /* LT: drop down a level */ + --insp; + } + } + + /* + * If we didn't find any records greater than or equal to the target, + * we never set the return value, set it to the first record in the + * list. + * + * Otherwise, it references a record less-than-or-equal to the target, + * move to a later record, that is, a subsequent record greater than + * the target. Because inserts happen concurrently, additional records + * might be inserted after the searched-for record that are still + * smaller than the target, continue to move forward until reaching a + * record larger than the target. There isn't any safety testing + * because we confirmed such a record exists before searching. + */ + if ((ins = ret_ins) == NULL) + ins = WT_SKIP_FIRST(ins_head); + while (recno >= WT_INSERT_RECNO(ins)) + ins = WT_SKIP_NEXT(ins); + return (ins); } /* * __col_insert_search_lt -- - * Search a column-store insert list for the next smaller record. + * Search a column-store insert list for the next smaller record. */ static inline WT_INSERT * __col_insert_search_lt(WT_INSERT_HEAD *ins_head, uint64_t recno) { - WT_INSERT *ins, **insp, *ret_ins; - int i; - - /* If there's no insert chain to search, we're done. */ - if ((ins = WT_SKIP_FIRST(ins_head)) == NULL) - return (NULL); - - /* Fast path check for targets before the skiplist. */ - if (recno <= WT_INSERT_RECNO(ins)) - return (NULL); - - /* - * The insert list is a skip list: start at the highest skip level, then - * go as far as possible at each level before stepping down to the next. - */ - ret_ins = NULL; - for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) { - /* - * Use a local variable to access the insert because the skip - * list can change across references. - */ - WT_ORDERED_READ(ins, *insp); - if (ins != NULL && recno > WT_INSERT_RECNO(ins)) { - /* GT: keep going at this level */ - insp = &ins->next[i]; - ret_ins = ins; - } else { - --i; /* LTE: drop down a level */ - --insp; - } - } - - return (ret_ins); + WT_INSERT *ins, **insp, *ret_ins; + int i; + + /* If there's no insert chain to search, we're done. */ + if ((ins = WT_SKIP_FIRST(ins_head)) == NULL) + return (NULL); + + /* Fast path check for targets before the skiplist. */ + if (recno <= WT_INSERT_RECNO(ins)) + return (NULL); + + /* + * The insert list is a skip list: start at the highest skip level, then go as far as possible + * at each level before stepping down to the next. + */ + ret_ins = NULL; + for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) { + /* + * Use a local variable to access the insert because the skip list can change across + * references. + */ + WT_ORDERED_READ(ins, *insp); + if (ins != NULL && recno > WT_INSERT_RECNO(ins)) { + /* GT: keep going at this level */ + insp = &ins->next[i]; + ret_ins = ins; + } else { + --i; /* LTE: drop down a level */ + --insp; + } + } + + return (ret_ins); } /* * __col_insert_search_match -- - * Search a column-store insert list for an exact match. + * Search a column-store insert list for an exact match. */ static inline WT_INSERT * __col_insert_search_match(WT_INSERT_HEAD *ins_head, uint64_t recno) { - WT_INSERT *ins, **insp; - uint64_t ins_recno; - int cmp, i; - - /* If there's no insert chain to search, we're done. */ - if ((ins = WT_SKIP_LAST(ins_head)) == NULL) - return (NULL); - - /* Fast path the check for values at the end of the skiplist. */ - if (recno > WT_INSERT_RECNO(ins)) - return (NULL); - if (recno == WT_INSERT_RECNO(ins)) - return (ins); - - /* - * The insert list is a skip list: start at the highest skip level, then - * go as far as possible at each level before stepping down to the next. - */ - for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0; ) { - /* - * Use a local variable to access the insert because the skip - * list can change across references. - */ - WT_ORDERED_READ(ins, *insp); - if (ins == NULL) { - --i; - --insp; - continue; - } - - ins_recno = WT_INSERT_RECNO(ins); - cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1; - - if (cmp == 0) /* Exact match: return */ - return (ins); - if (cmp > 0) /* Keep going at this level */ - insp = &ins->next[i]; - else { /* Drop down a level */ - --i; - --insp; - } - } - - return (NULL); + WT_INSERT *ins, **insp; + uint64_t ins_recno; + int cmp, i; + + /* If there's no insert chain to search, we're done. */ + if ((ins = WT_SKIP_LAST(ins_head)) == NULL) + return (NULL); + + /* Fast path the check for values at the end of the skiplist. */ + if (recno > WT_INSERT_RECNO(ins)) + return (NULL); + if (recno == WT_INSERT_RECNO(ins)) + return (ins); + + /* + * The insert list is a skip list: start at the highest skip level, then go as far as possible + * at each level before stepping down to the next. + */ + for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) { + /* + * Use a local variable to access the insert because the skip list can change across + * references. + */ + WT_ORDERED_READ(ins, *insp); + if (ins == NULL) { + --i; + --insp; + continue; + } + + ins_recno = WT_INSERT_RECNO(ins); + cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1; + + if (cmp == 0) /* Exact match: return */ + return (ins); + if (cmp > 0) /* Keep going at this level */ + insp = &ins->next[i]; + else { /* Drop down a level */ + --i; + --insp; + } + } + + return (NULL); } /* * __col_insert_search -- - * Search a column-store insert list, creating a skiplist stack as we go. + * Search a column-store insert list, creating a skiplist stack as we go. */ static inline WT_INSERT * -__col_insert_search(WT_INSERT_HEAD *ins_head, - WT_INSERT ***ins_stack, WT_INSERT **next_stack, uint64_t recno) +__col_insert_search( + WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **next_stack, uint64_t recno) { - WT_INSERT **insp, *ret_ins; - uint64_t ins_recno; - int cmp, i; - - /* If there's no insert chain to search, we're done. */ - if ((ret_ins = WT_SKIP_LAST(ins_head)) == NULL) - return (NULL); - - /* Fast path appends. */ - if (recno >= WT_INSERT_RECNO(ret_ins)) { - for (i = 0; i < WT_SKIP_MAXDEPTH; i++) { - ins_stack[i] = (i == 0) ? &ret_ins->next[0] : - (ins_head->tail[i] != NULL) ? - &ins_head->tail[i]->next[i] : &ins_head->head[i]; - next_stack[i] = NULL; - } - return (ret_ins); - } - - /* - * The insert list is a skip list: start at the highest skip level, then - * go as far as possible at each level before stepping down to the next. - */ - for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0; ) { - if ((ret_ins = *insp) == NULL) { - next_stack[i] = NULL; - ins_stack[i--] = insp--; - continue; - } - - /* - * When no exact match is found, the search returns the smallest - * key larger than the searched-for key, or the largest key - * smaller than the searched-for key, if there is no larger key. - * Our callers depend on that: specifically, the fixed-length - * column store cursor code interprets returning a key smaller - * than the searched-for key to mean the searched-for key is - * larger than any key on the page. Don't change that behavior, - * things will break. - */ - ins_recno = WT_INSERT_RECNO(ret_ins); - cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1; - - if (cmp > 0) /* Keep going at this level */ - insp = &ret_ins->next[i]; - else if (cmp == 0) /* Exact match: return */ - for (; i >= 0; i--) { - next_stack[i] = ret_ins->next[i]; - ins_stack[i] = &ret_ins->next[i]; - } - else { /* Drop down a level */ - next_stack[i] = ret_ins; - ins_stack[i--] = insp--; - } - } - return (ret_ins); + WT_INSERT **insp, *ret_ins; + uint64_t ins_recno; + int cmp, i; + + /* If there's no insert chain to search, we're done. */ + if ((ret_ins = WT_SKIP_LAST(ins_head)) == NULL) + return (NULL); + + /* Fast path appends. */ + if (recno >= WT_INSERT_RECNO(ret_ins)) { + for (i = 0; i < WT_SKIP_MAXDEPTH; i++) { + ins_stack[i] = (i == 0) ? &ret_ins->next[0] : (ins_head->tail[i] != NULL) ? + &ins_head->tail[i]->next[i] : + &ins_head->head[i]; + next_stack[i] = NULL; + } + return (ret_ins); + } + + /* + * The insert list is a skip list: start at the highest skip level, then go as far as possible + * at each level before stepping down to the next. + */ + for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) { + if ((ret_ins = *insp) == NULL) { + next_stack[i] = NULL; + ins_stack[i--] = insp--; + continue; + } + + /* + * When no exact match is found, the search returns the smallest key larger than the + * searched-for key, or the largest key smaller than the searched-for key, if there is no + * larger key. Our callers depend on that: specifically, the fixed-length column store + * cursor code interprets returning a key smaller than the searched-for key to mean the + * searched-for key is larger than any key on the page. Don't change that behavior, things + * will break. + */ + ins_recno = WT_INSERT_RECNO(ret_ins); + cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1; + + if (cmp > 0) /* Keep going at this level */ + insp = &ret_ins->next[i]; + else if (cmp == 0) /* Exact match: return */ + for (; i >= 0; i--) { + next_stack[i] = ret_ins->next[i]; + ins_stack[i] = &ret_ins->next[i]; + } + else { /* Drop down a level */ + next_stack[i] = ret_ins; + ins_stack[i--] = insp--; + } + } + return (ret_ins); } /* * __col_var_last_recno -- - * Return the last record number for a variable-length column-store page. + * Return the last record number for a variable-length column-store page. */ static inline uint64_t __col_var_last_recno(WT_REF *ref) { - WT_COL_RLE *repeat; - WT_PAGE *page; - - page = ref->page; - - /* - * If there's an append list, there may be more records on the page. - * This function ignores those records, our callers must handle that - * explicitly, if they care. - */ - if (!WT_COL_VAR_REPEAT_SET(page)) - return (page->entries == 0 ? 0 : - ref->ref_recno + (page->entries - 1)); - - repeat = &page->pg_var_repeats[page->pg_var_nrepeats - 1]; - return ((repeat->recno + repeat->rle) - 1 + - (page->entries - (repeat->indx + 1))); + WT_COL_RLE *repeat; + WT_PAGE *page; + + page = ref->page; + + /* + * If there's an append list, there may be more records on the page. This function ignores those + * records, our callers must handle that explicitly, if they care. + */ + if (!WT_COL_VAR_REPEAT_SET(page)) + return (page->entries == 0 ? 0 : ref->ref_recno + (page->entries - 1)); + + repeat = &page->pg_var_repeats[page->pg_var_nrepeats - 1]; + return ((repeat->recno + repeat->rle) - 1 + (page->entries - (repeat->indx + 1))); } /* * __col_fix_last_recno -- - * Return the last record number for a fixed-length column-store page. + * Return the last record number for a fixed-length column-store page. */ static inline uint64_t __col_fix_last_recno(WT_REF *ref) { - WT_PAGE *page; + WT_PAGE *page; - page = ref->page; + page = ref->page; - /* - * If there's an append list, there may be more records on the page. - * This function ignores those records, our callers must handle that - * explicitly, if they care. - */ - return (page->entries == 0 ? 0 : ref->ref_recno + (page->entries - 1)); + /* + * If there's an append list, there may be more records on the page. This function ignores those + * records, our callers must handle that explicitly, if they care. + */ + return (page->entries == 0 ? 0 : ref->ref_recno + (page->entries - 1)); } /* * __col_var_search -- - * Search a variable-length column-store page for a record. + * Search a variable-length column-store page for a record. */ static inline WT_COL * __col_var_search(WT_REF *ref, uint64_t recno, uint64_t *start_recnop) { - WT_COL_RLE *repeat; - WT_PAGE *page; - uint64_t start_recno; - uint32_t base, indx, limit, start_indx; - - page = ref->page; - - /* - * Find the matching slot. - * - * This is done in two stages: first, we do a binary search among any - * repeating records to find largest repeating less than the search key. - * Once there, we can do a simple offset calculation to find the correct - * slot for this record number, because we know any intervening records - * have repeat counts of 1. - */ - for (base = 0, - limit = WT_COL_VAR_REPEAT_SET(page) ? page->pg_var_nrepeats : 0; - limit != 0; limit >>= 1) { - indx = base + (limit >> 1); - - repeat = page->pg_var_repeats + indx; - if (recno >= repeat->recno && - recno < repeat->recno + repeat->rle) { - if (start_recnop != NULL) - *start_recnop = repeat->recno; - return (page->pg_var + repeat->indx); - } - if (recno < repeat->recno) - continue; - base = indx + 1; - --limit; - } - - /* - * We didn't find an exact match, move forward from the largest repeat - * less than the search key. - */ - if (base == 0) { - start_indx = 0; - start_recno = ref->ref_recno; - } else { - repeat = page->pg_var_repeats + (base - 1); - start_indx = repeat->indx + 1; - start_recno = repeat->recno + repeat->rle; - } - - /* - * !!! - * The test could be written more simply as: - * - * (recno >= start_recno + (page->entries - start_indx)) - * - * It's split into two parts because the simpler test will overflow if - * searching for large record numbers. - */ - if (recno >= start_recno && - recno - start_recno >= page->entries - start_indx) - return (NULL); - - return (page->pg_var + start_indx + (uint32_t)(recno - start_recno)); + WT_COL_RLE *repeat; + WT_PAGE *page; + uint64_t start_recno; + uint32_t base, indx, limit, start_indx; + + page = ref->page; + + /* + * Find the matching slot. + * + * This is done in two stages: first, we do a binary search among any + * repeating records to find largest repeating less than the search key. + * Once there, we can do a simple offset calculation to find the correct + * slot for this record number, because we know any intervening records + * have repeat counts of 1. + */ + for (base = 0, limit = WT_COL_VAR_REPEAT_SET(page) ? page->pg_var_nrepeats : 0; limit != 0; + limit >>= 1) { + indx = base + (limit >> 1); + + repeat = page->pg_var_repeats + indx; + if (recno >= repeat->recno && recno < repeat->recno + repeat->rle) { + if (start_recnop != NULL) + *start_recnop = repeat->recno; + return (page->pg_var + repeat->indx); + } + if (recno < repeat->recno) + continue; + base = indx + 1; + --limit; + } + + /* + * We didn't find an exact match, move forward from the largest repeat less than the search key. + */ + if (base == 0) { + start_indx = 0; + start_recno = ref->ref_recno; + } else { + repeat = page->pg_var_repeats + (base - 1); + start_indx = repeat->indx + 1; + start_recno = repeat->recno + repeat->rle; + } + + /* + * !!! + * The test could be written more simply as: + * + * (recno >= start_recno + (page->entries - start_indx)) + * + * It's split into two parts because the simpler test will overflow if + * searching for large record numbers. + */ + if (recno >= start_recno && recno - start_recno >= page->entries - start_indx) + return (NULL); + + return (page->pg_var + start_indx + (uint32_t)(recno - start_recno)); } diff --git a/src/third_party/wiredtiger/src/include/compact.h b/src/third_party/wiredtiger/src/include/compact.h index 3a1f54ca294..bc5875c27e2 100644 --- a/src/third_party/wiredtiger/src/include/compact.h +++ b/src/third_party/wiredtiger/src/include/compact.h @@ -7,10 +7,10 @@ */ struct __wt_compact_state { - uint32_t lsm_count; /* Number of LSM trees seen */ - uint32_t file_count; /* Number of files seen */ - uint64_t max_time; /* Configured timeout */ - uint64_t prog_msg_count; /* Progress message count */ + uint32_t lsm_count; /* Number of LSM trees seen */ + uint32_t file_count; /* Number of files seen */ + uint64_t max_time; /* Configured timeout */ + uint64_t prog_msg_count; /* Progress message count */ - struct timespec begin; /* Starting time */ + struct timespec begin; /* Starting time */ }; diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h index 847ddef1b2e..a4b7204f8a5 100644 --- a/src/third_party/wiredtiger/src/include/config.h +++ b/src/third_party/wiredtiger/src/include/config.h @@ -7,109 +7,105 @@ */ struct __wt_config { - WT_SESSION_IMPL *session; - const char *orig; - const char *end; - const char *cur; + WT_SESSION_IMPL *session; + const char *orig; + const char *end; + const char *cur; - int depth, top; - const int8_t *go; + int depth, top; + const int8_t *go; }; struct __wt_config_check { - const char *name; - const char *type; - int (*checkf)(WT_SESSION_IMPL *, WT_CONFIG_ITEM *); - const char *checks; - const WT_CONFIG_CHECK *subconfigs; - u_int subconfigs_entries; + const char *name; + const char *type; + int (*checkf)(WT_SESSION_IMPL *, WT_CONFIG_ITEM *); + const char *checks; + const WT_CONFIG_CHECK *subconfigs; + u_int subconfigs_entries; }; -#define WT_CONFIG_REF(session, n) \ - (S2C(session)->config_entries[WT_CONFIG_ENTRY_##n]) +#define WT_CONFIG_REF(session, n) (S2C(session)->config_entries[WT_CONFIG_ENTRY_##n]) struct __wt_config_entry { - const char *method; /* method name */ + const char *method; /* method name */ -#define WT_CONFIG_BASE(session, n) (WT_CONFIG_REF(session, n)->base) - const char *base; /* configuration base */ +#define WT_CONFIG_BASE(session, n) (WT_CONFIG_REF(session, n)->base) + const char *base; /* configuration base */ - const WT_CONFIG_CHECK *checks; /* check array */ - u_int checks_entries; + const WT_CONFIG_CHECK *checks; /* check array */ + u_int checks_entries; }; struct __wt_config_parser_impl { - WT_CONFIG_PARSER iface; + WT_CONFIG_PARSER iface; - WT_SESSION_IMPL *session; - WT_CONFIG config; - WT_CONFIG_ITEM config_item; + WT_SESSION_IMPL *session; + WT_CONFIG config; + WT_CONFIG_ITEM config_item; }; -#define WT_CONFIG_ITEM_STATIC_INIT(n) \ - static const WT_CONFIG_ITEM n = { \ - "", 0, 0, WT_CONFIG_ITEM_NUM \ - } +#define WT_CONFIG_ITEM_STATIC_INIT(n) static const WT_CONFIG_ITEM n = {"", 0, 0, WT_CONFIG_ITEM_NUM} -#define WT_CONFIG_UNSET (-1) +#define WT_CONFIG_UNSET (-1) /* * DO NOT EDIT: automatically built by dist/api_config.py. * configuration section: BEGIN */ -#define WT_CONFIG_ENTRY_WT_CONNECTION_add_collator 0 -#define WT_CONFIG_ENTRY_WT_CONNECTION_add_compressor 1 -#define WT_CONFIG_ENTRY_WT_CONNECTION_add_data_source 2 -#define WT_CONFIG_ENTRY_WT_CONNECTION_add_encryptor 3 -#define WT_CONFIG_ENTRY_WT_CONNECTION_add_extractor 4 -#define WT_CONFIG_ENTRY_WT_CONNECTION_async_new_op 5 -#define WT_CONFIG_ENTRY_WT_CONNECTION_close 6 -#define WT_CONFIG_ENTRY_WT_CONNECTION_debug_info 7 -#define WT_CONFIG_ENTRY_WT_CONNECTION_load_extension 8 -#define WT_CONFIG_ENTRY_WT_CONNECTION_open_session 9 -#define WT_CONFIG_ENTRY_WT_CONNECTION_query_timestamp 10 -#define WT_CONFIG_ENTRY_WT_CONNECTION_reconfigure 11 -#define WT_CONFIG_ENTRY_WT_CONNECTION_rollback_to_stable 12 -#define WT_CONFIG_ENTRY_WT_CONNECTION_set_file_system 13 -#define WT_CONFIG_ENTRY_WT_CONNECTION_set_timestamp 14 -#define WT_CONFIG_ENTRY_WT_CURSOR_close 15 -#define WT_CONFIG_ENTRY_WT_CURSOR_reconfigure 16 -#define WT_CONFIG_ENTRY_WT_SESSION_alter 17 -#define WT_CONFIG_ENTRY_WT_SESSION_begin_transaction 18 -#define WT_CONFIG_ENTRY_WT_SESSION_checkpoint 19 -#define WT_CONFIG_ENTRY_WT_SESSION_close 20 -#define WT_CONFIG_ENTRY_WT_SESSION_commit_transaction 21 -#define WT_CONFIG_ENTRY_WT_SESSION_compact 22 -#define WT_CONFIG_ENTRY_WT_SESSION_create 23 -#define WT_CONFIG_ENTRY_WT_SESSION_drop 24 -#define WT_CONFIG_ENTRY_WT_SESSION_import 25 -#define WT_CONFIG_ENTRY_WT_SESSION_join 26 -#define WT_CONFIG_ENTRY_WT_SESSION_log_flush 27 -#define WT_CONFIG_ENTRY_WT_SESSION_log_printf 28 -#define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 29 -#define WT_CONFIG_ENTRY_WT_SESSION_prepare_transaction 30 -#define WT_CONFIG_ENTRY_WT_SESSION_query_timestamp 31 -#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 32 -#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 33 -#define WT_CONFIG_ENTRY_WT_SESSION_rename 34 -#define WT_CONFIG_ENTRY_WT_SESSION_reset 35 -#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 36 -#define WT_CONFIG_ENTRY_WT_SESSION_salvage 37 -#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 38 -#define WT_CONFIG_ENTRY_WT_SESSION_strerror 39 -#define WT_CONFIG_ENTRY_WT_SESSION_timestamp_transaction 40 -#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 41 -#define WT_CONFIG_ENTRY_WT_SESSION_truncate 42 -#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 43 -#define WT_CONFIG_ENTRY_WT_SESSION_verify 44 -#define WT_CONFIG_ENTRY_colgroup_meta 45 -#define WT_CONFIG_ENTRY_file_config 46 -#define WT_CONFIG_ENTRY_file_meta 47 -#define WT_CONFIG_ENTRY_index_meta 48 -#define WT_CONFIG_ENTRY_lsm_meta 49 -#define WT_CONFIG_ENTRY_table_meta 50 -#define WT_CONFIG_ENTRY_wiredtiger_open 51 -#define WT_CONFIG_ENTRY_wiredtiger_open_all 52 -#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 53 -#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 54 +#define WT_CONFIG_ENTRY_WT_CONNECTION_add_collator 0 +#define WT_CONFIG_ENTRY_WT_CONNECTION_add_compressor 1 +#define WT_CONFIG_ENTRY_WT_CONNECTION_add_data_source 2 +#define WT_CONFIG_ENTRY_WT_CONNECTION_add_encryptor 3 +#define WT_CONFIG_ENTRY_WT_CONNECTION_add_extractor 4 +#define WT_CONFIG_ENTRY_WT_CONNECTION_async_new_op 5 +#define WT_CONFIG_ENTRY_WT_CONNECTION_close 6 +#define WT_CONFIG_ENTRY_WT_CONNECTION_debug_info 7 +#define WT_CONFIG_ENTRY_WT_CONNECTION_load_extension 8 +#define WT_CONFIG_ENTRY_WT_CONNECTION_open_session 9 +#define WT_CONFIG_ENTRY_WT_CONNECTION_query_timestamp 10 +#define WT_CONFIG_ENTRY_WT_CONNECTION_reconfigure 11 +#define WT_CONFIG_ENTRY_WT_CONNECTION_rollback_to_stable 12 +#define WT_CONFIG_ENTRY_WT_CONNECTION_set_file_system 13 +#define WT_CONFIG_ENTRY_WT_CONNECTION_set_timestamp 14 +#define WT_CONFIG_ENTRY_WT_CURSOR_close 15 +#define WT_CONFIG_ENTRY_WT_CURSOR_reconfigure 16 +#define WT_CONFIG_ENTRY_WT_SESSION_alter 17 +#define WT_CONFIG_ENTRY_WT_SESSION_begin_transaction 18 +#define WT_CONFIG_ENTRY_WT_SESSION_checkpoint 19 +#define WT_CONFIG_ENTRY_WT_SESSION_close 20 +#define WT_CONFIG_ENTRY_WT_SESSION_commit_transaction 21 +#define WT_CONFIG_ENTRY_WT_SESSION_compact 22 +#define WT_CONFIG_ENTRY_WT_SESSION_create 23 +#define WT_CONFIG_ENTRY_WT_SESSION_drop 24 +#define WT_CONFIG_ENTRY_WT_SESSION_import 25 +#define WT_CONFIG_ENTRY_WT_SESSION_join 26 +#define WT_CONFIG_ENTRY_WT_SESSION_log_flush 27 +#define WT_CONFIG_ENTRY_WT_SESSION_log_printf 28 +#define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 29 +#define WT_CONFIG_ENTRY_WT_SESSION_prepare_transaction 30 +#define WT_CONFIG_ENTRY_WT_SESSION_query_timestamp 31 +#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 32 +#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 33 +#define WT_CONFIG_ENTRY_WT_SESSION_rename 34 +#define WT_CONFIG_ENTRY_WT_SESSION_reset 35 +#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 36 +#define WT_CONFIG_ENTRY_WT_SESSION_salvage 37 +#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 38 +#define WT_CONFIG_ENTRY_WT_SESSION_strerror 39 +#define WT_CONFIG_ENTRY_WT_SESSION_timestamp_transaction 40 +#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 41 +#define WT_CONFIG_ENTRY_WT_SESSION_truncate 42 +#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 43 +#define WT_CONFIG_ENTRY_WT_SESSION_verify 44 +#define WT_CONFIG_ENTRY_colgroup_meta 45 +#define WT_CONFIG_ENTRY_file_config 46 +#define WT_CONFIG_ENTRY_file_meta 47 +#define WT_CONFIG_ENTRY_index_meta 48 +#define WT_CONFIG_ENTRY_lsm_meta 49 +#define WT_CONFIG_ENTRY_table_meta 50 +#define WT_CONFIG_ENTRY_wiredtiger_open 51 +#define WT_CONFIG_ENTRY_wiredtiger_open_all 52 +#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 53 +#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 54 /* * configuration section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 5d70aa5f14c..174263c3949 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -14,22 +14,22 @@ * Per-process information for the library. */ struct __wt_process { - WT_SPINLOCK spinlock; /* Per-process spinlock */ + WT_SPINLOCK spinlock; /* Per-process spinlock */ - /* Locked: connection queue */ - TAILQ_HEAD(__wt_connection_impl_qh, __wt_connection_impl) connqh; + /* Locked: connection queue */ + TAILQ_HEAD(__wt_connection_impl_qh, __wt_connection_impl) connqh; - bool page_version_ts; /* timestamp version page formats */ + bool page_version_ts; /* timestamp version page formats */ - /* Checksum functions */ -#define __wt_checksum(chunk, len) __wt_process.checksum(chunk, len) - uint32_t (*checksum)(const void *, size_t); +/* Checksum functions */ +#define __wt_checksum(chunk, len) __wt_process.checksum(chunk, len) + uint32_t (*checksum)(const void *, size_t); -#define WT_TSC_DEFAULT_RATIO 1.0 - double tsc_nsec_ratio; /* rdtsc ticks to nanoseconds */ - bool use_epochtime; /* use expensive time */ +#define WT_TSC_DEFAULT_RATIO 1.0 + double tsc_nsec_ratio; /* rdtsc ticks to nanoseconds */ + bool use_epochtime; /* use expensive time */ - WT_CACHE_POOL *cache_pool; /* shared cache information */ + WT_CACHE_POOL *cache_pool; /* shared cache information */ }; extern WT_PROCESS __wt_process; @@ -38,13 +38,13 @@ extern WT_PROCESS __wt_process; * An list entry for an encryptor with a unique (name, keyid). */ struct __wt_keyed_encryptor { - const char *keyid; /* Key id of encryptor */ - int owned; /* Encryptor needs to be terminated */ - size_t size_const; /* The result of the sizing callback */ - WT_ENCRYPTOR *encryptor; /* User supplied callbacks */ - /* Linked list of encryptors */ - TAILQ_ENTRY(__wt_keyed_encryptor) hashq; - TAILQ_ENTRY(__wt_keyed_encryptor) q; + const char *keyid; /* Key id of encryptor */ + int owned; /* Encryptor needs to be terminated */ + size_t size_const; /* The result of the sizing callback */ + WT_ENCRYPTOR *encryptor; /* User supplied callbacks */ + /* Linked list of encryptors */ + TAILQ_ENTRY(__wt_keyed_encryptor) hashq; + TAILQ_ENTRY(__wt_keyed_encryptor) q; }; /* @@ -52,9 +52,9 @@ struct __wt_keyed_encryptor { * A collator list entry */ struct __wt_named_collator { - const char *name; /* Name of collator */ - WT_COLLATOR *collator; /* User supplied object */ - TAILQ_ENTRY(__wt_named_collator) q; /* Linked list of collators */ + const char *name; /* Name of collator */ + WT_COLLATOR *collator; /* User supplied object */ + TAILQ_ENTRY(__wt_named_collator) q; /* Linked list of collators */ }; /* @@ -62,10 +62,10 @@ struct __wt_named_collator { * A compressor list entry */ struct __wt_named_compressor { - const char *name; /* Name of compressor */ - WT_COMPRESSOR *compressor; /* User supplied callbacks */ - /* Linked list of compressors */ - TAILQ_ENTRY(__wt_named_compressor) q; + const char *name; /* Name of compressor */ + WT_COMPRESSOR *compressor; /* User supplied callbacks */ + /* Linked list of compressors */ + TAILQ_ENTRY(__wt_named_compressor) q; }; /* @@ -73,10 +73,10 @@ struct __wt_named_compressor { * A data source list entry */ struct __wt_named_data_source { - const char *prefix; /* Name of data source */ - WT_DATA_SOURCE *dsrc; /* User supplied callbacks */ - /* Linked list of data sources */ - TAILQ_ENTRY(__wt_named_data_source) q; + const char *prefix; /* Name of data source */ + WT_DATA_SOURCE *dsrc; /* User supplied callbacks */ + /* Linked list of data sources */ + TAILQ_ENTRY(__wt_named_data_source) q; }; /* @@ -84,14 +84,14 @@ struct __wt_named_data_source { * An encryptor list entry */ struct __wt_named_encryptor { - const char *name; /* Name of encryptor */ - WT_ENCRYPTOR *encryptor; /* User supplied callbacks */ - /* Locked: list of encryptors by key */ - TAILQ_HEAD(__wt_keyedhash, __wt_keyed_encryptor) - keyedhashqh[WT_HASH_ARRAY_SIZE]; - TAILQ_HEAD(__wt_keyed_qh, __wt_keyed_encryptor) keyedqh; - /* Linked list of encryptors */ - TAILQ_ENTRY(__wt_named_encryptor) q; + const char *name; /* Name of encryptor */ + WT_ENCRYPTOR *encryptor; /* User supplied callbacks */ + /* Locked: list of encryptors by key */ + TAILQ_HEAD(__wt_keyedhash, __wt_keyed_encryptor) + keyedhashqh[WT_HASH_ARRAY_SIZE]; + TAILQ_HEAD(__wt_keyed_qh, __wt_keyed_encryptor) keyedqh; + /* Linked list of encryptors */ + TAILQ_ENTRY(__wt_named_encryptor) q; }; /* @@ -99,445 +99,441 @@ struct __wt_named_encryptor { * An extractor list entry */ struct __wt_named_extractor { - const char *name; /* Name of extractor */ - WT_EXTRACTOR *extractor; /* User supplied object */ - TAILQ_ENTRY(__wt_named_extractor) q; /* Linked list of extractors */ + const char *name; /* Name of extractor */ + WT_EXTRACTOR *extractor; /* User supplied object */ + TAILQ_ENTRY(__wt_named_extractor) q; /* Linked list of extractors */ }; /* * WT_CONN_CHECK_PANIC -- * Check if we've panicked and return the appropriate error. */ -#define WT_CONN_CHECK_PANIC(conn) \ - (F_ISSET(conn, WT_CONN_PANIC) ? WT_PANIC : 0) -#define WT_SESSION_CHECK_PANIC(session) \ - WT_CONN_CHECK_PANIC(S2C(session)) +#define WT_CONN_CHECK_PANIC(conn) (F_ISSET(conn, WT_CONN_PANIC) ? WT_PANIC : 0) +#define WT_SESSION_CHECK_PANIC(session) WT_CONN_CHECK_PANIC(S2C(session)) /* - * Macros to ensure the dhandle is inserted or removed from both the - * main queue and the hashed queue. + * Macros to ensure the dhandle is inserted or removed from both the main queue and the hashed + * queue. */ -#define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) do { \ - WT_ASSERT(session, \ - F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ - TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \ - TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \ - ++(conn)->dh_bucket_count[bucket]; \ - ++(conn)->dhandle_count; \ -} while (0) - -#define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) do { \ - WT_ASSERT(session, \ - F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ - TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \ - TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \ - --(conn)->dh_bucket_count[bucket]; \ - --(conn)->dhandle_count; \ -} while (0) +#define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) \ + do { \ + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ + TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \ + TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \ + ++(conn)->dh_bucket_count[bucket]; \ + ++(conn)->dhandle_count; \ + } while (0) + +#define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) \ + do { \ + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ + TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \ + TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \ + --(conn)->dh_bucket_count[bucket]; \ + --(conn)->dhandle_count; \ + } while (0) /* - * Macros to ensure the block is inserted or removed from both the - * main queue and the hashed queue. + * Macros to ensure the block is inserted or removed from both the main queue and the hashed queue. */ -#define WT_CONN_BLOCK_INSERT(conn, block, bucket) do { \ - TAILQ_INSERT_HEAD(&(conn)->blockqh, block, q); \ - TAILQ_INSERT_HEAD(&(conn)->blockhash[bucket], block, hashq); \ -} while (0) - -#define WT_CONN_BLOCK_REMOVE(conn, block, bucket) do { \ - TAILQ_REMOVE(&(conn)->blockqh, block, q); \ - TAILQ_REMOVE(&(conn)->blockhash[bucket], block, hashq); \ -} while (0) +#define WT_CONN_BLOCK_INSERT(conn, block, bucket) \ + do { \ + TAILQ_INSERT_HEAD(&(conn)->blockqh, block, q); \ + TAILQ_INSERT_HEAD(&(conn)->blockhash[bucket], block, hashq); \ + } while (0) + +#define WT_CONN_BLOCK_REMOVE(conn, block, bucket) \ + do { \ + TAILQ_REMOVE(&(conn)->blockqh, block, q); \ + TAILQ_REMOVE(&(conn)->blockhash[bucket], block, hashq); \ + } while (0) /* * WT_CONN_HOTBACKUP_START -- * Macro to set connection data appropriately for when we commence hot * backup. */ -#define WT_CONN_HOTBACKUP_START(conn) do { \ - (conn)->hot_backup = true; \ - (conn)->hot_backup_list = NULL; \ -} while (0) +#define WT_CONN_HOTBACKUP_START(conn) \ + do { \ + (conn)->hot_backup = true; \ + (conn)->hot_backup_list = NULL; \ + } while (0) /* * WT_CONNECTION_IMPL -- * Implementation of WT_CONNECTION */ struct __wt_connection_impl { - WT_CONNECTION iface; - - /* For operations without an application-supplied session */ - WT_SESSION_IMPL *default_session; - WT_SESSION_IMPL dummy_session; - - const char *cfg; /* Connection configuration */ - - WT_SPINLOCK api_lock; /* Connection API spinlock */ - WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */ - WT_SPINLOCK fh_lock; /* File handle queue spinlock */ - WT_SPINLOCK metadata_lock; /* Metadata update spinlock */ - WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */ - WT_SPINLOCK schema_lock; /* Schema operation spinlock */ - WT_RWLOCK table_lock; /* Table list lock */ - WT_SPINLOCK turtle_lock; /* Turtle file spinlock */ - WT_RWLOCK dhandle_lock; /* Data handle list lock */ - - /* Connection queue */ - TAILQ_ENTRY(__wt_connection_impl) q; - /* Cache pool queue */ - TAILQ_ENTRY(__wt_connection_impl) cpq; - - const char *home; /* Database home */ - const char *error_prefix; /* Database error prefix */ - int is_new; /* Connection created database */ - - uint16_t compat_major; /* Compatibility major version */ - uint16_t compat_minor; /* Compatibility minor version */ -#define WT_CONN_COMPAT_NONE UINT16_MAX - uint16_t req_max_major; /* Compatibility maximum major */ - uint16_t req_max_minor; /* Compatibility maximum minor */ - uint16_t req_min_major; /* Compatibility minimum major */ - uint16_t req_min_minor; /* Compatibility minimum minor */ - - WT_EXTENSION_API extension_api; /* Extension API */ - - /* Configuration */ - const WT_CONFIG_ENTRY **config_entries; - - const char *optrack_path; /* Directory for operation logs */ - WT_FH *optrack_map_fh; /* Name to id translation file. */ - WT_SPINLOCK optrack_map_spinlock; /* Translation file spinlock. */ - uintmax_t optrack_pid; /* Cache the process ID. */ - - WT_LSN *debug_ckpt; /* Debug mode checkpoint LSNs. */ - uint32_t debug_ckpt_cnt;/* Checkpoint retention number */ - - void **foc; /* Free-on-close array */ - size_t foc_cnt; /* Array entries */ - size_t foc_size; /* Array size */ - - WT_FH *lock_fh; /* Lock file handle */ - - /* - * The connection keeps a cache of data handles. The set of handles - * can grow quite large so we maintain both a simple list and a hash - * table of lists. The hash table key is based on a hash of the table - * URI. - */ - /* Locked: data handle hash array */ - TAILQ_HEAD(__wt_dhhash, __wt_data_handle) dhhash[WT_HASH_ARRAY_SIZE]; - /* Locked: data handle list */ - TAILQ_HEAD(__wt_dhandle_qh, __wt_data_handle) dhqh; - /* Locked: LSM handle list. */ - TAILQ_HEAD(__wt_lsm_qh, __wt_lsm_tree) lsmqh; - /* Locked: file list */ - TAILQ_HEAD(__wt_fhhash, __wt_fh) fhhash[WT_HASH_ARRAY_SIZE]; - TAILQ_HEAD(__wt_fh_qh, __wt_fh) fhqh; - /* Locked: library list */ - TAILQ_HEAD(__wt_dlh_qh, __wt_dlh) dlhqh; - - WT_SPINLOCK block_lock; /* Locked: block manager list */ - TAILQ_HEAD(__wt_blockhash, __wt_block) blockhash[WT_HASH_ARRAY_SIZE]; - TAILQ_HEAD(__wt_block_qh, __wt_block) blockqh; - - /* Locked: handles in each bucket */ - u_int dh_bucket_count[WT_HASH_ARRAY_SIZE]; - u_int dhandle_count; /* Locked: handles in the queue */ - u_int open_btree_count; /* Locked: open writable btree count */ - uint32_t next_file_id; /* Locked: file ID counter */ - uint32_t open_file_count; /* Atomic: open file handle count */ - uint32_t open_cursor_count; /* Atomic: open cursor handle count */ - - /* - * WiredTiger allocates space for 50 simultaneous sessions (threads of - * control) by default. Growing the number of threads dynamically is - * possible, but tricky since server threads are walking the array - * without locking it. - * - * There's an array of WT_SESSION_IMPL pointers that reference the - * allocated array; we do it that way because we want an easy way for - * the server thread code to avoid walking the entire array when only a - * few threads are running. - */ - WT_SESSION_IMPL *sessions; /* Session reference */ - uint32_t session_size; /* Session array size */ - uint32_t session_cnt; /* Session count */ - - size_t session_scratch_max; /* Max scratch memory per session */ - - WT_CACHE *cache; /* Page cache */ - volatile uint64_t cache_size; /* Cache size (either statically - configured or the current size - within a cache pool). */ - - WT_TXN_GLOBAL txn_global; /* Global transaction state */ - - WT_RWLOCK hot_backup_lock; /* Hot backup serialization */ - bool hot_backup; /* Hot backup in progress */ - char **hot_backup_list; /* Hot backup file list */ - - WT_SESSION_IMPL *ckpt_session; /* Checkpoint thread session */ - wt_thread_t ckpt_tid; /* Checkpoint thread */ - bool ckpt_tid_set; /* Checkpoint thread set */ - WT_CONDVAR *ckpt_cond; /* Checkpoint wait mutex */ -#define WT_CKPT_LOGSIZE(conn) ((conn)->ckpt_logsize != 0) - wt_off_t ckpt_logsize; /* Checkpoint log size period */ - bool ckpt_signalled;/* Checkpoint signalled */ - - uint64_t ckpt_usecs; /* Checkpoint timer */ - uint64_t ckpt_time_max; /* Checkpoint time min/max */ - uint64_t ckpt_time_min; - uint64_t ckpt_time_recent; /* Checkpoint time recent/total */ - uint64_t ckpt_time_total; - - /* Checkpoint stats and verbosity timers */ - struct timespec ckpt_timer_start; - struct timespec ckpt_timer_scrub_end; - - /* Checkpoint progress message data */ - uint64_t ckpt_progress_msg_count; - uint64_t ckpt_write_bytes; - uint64_t ckpt_write_pages; - - /* Connection's maximum and base write generations. */ - uint64_t max_write_gen; - uint64_t base_write_gen; - - uint32_t stat_flags; /* Options declared in flags.py */ - - /* Connection statistics */ - WT_CONNECTION_STATS *stats[WT_COUNTER_SLOTS]; - WT_CONNECTION_STATS *stat_array; - - WT_ASYNC *async; /* Async structure */ - bool async_cfg; /* Global async configuration */ - uint32_t async_size; /* Async op array size */ - uint32_t async_workers; /* Number of async workers */ - - WT_CAPACITY capacity; /* Capacity structure */ - WT_SESSION_IMPL *capacity_session; /* Capacity thread session */ - wt_thread_t capacity_tid; /* Capacity thread */ - bool capacity_tid_set; /* Capacity thread set */ - WT_CONDVAR *capacity_cond; /* Capacity wait mutex */ - - WT_LSM_MANAGER lsm_manager; /* LSM worker thread information */ - - WT_KEYED_ENCRYPTOR *kencryptor; /* Encryptor for metadata and log */ - - bool evict_server_running;/* Eviction server operating */ - - WT_THREAD_GROUP evict_threads; - uint32_t evict_threads_max;/* Max eviction threads */ - uint32_t evict_threads_min;/* Min eviction threads */ - -#define WT_STATLOG_FILENAME "WiredTigerStat.%d.%H" - WT_SESSION_IMPL *stat_session; /* Statistics log session */ - wt_thread_t stat_tid; /* Statistics log thread */ - bool stat_tid_set; /* Statistics log thread set */ - WT_CONDVAR *stat_cond; /* Statistics log wait mutex */ - const char *stat_format; /* Statistics log timestamp format */ - WT_FSTREAM *stat_fs; /* Statistics log stream */ - /* Statistics log json table printing state flag */ - bool stat_json_tables; - char *stat_path; /* Statistics log path format */ - char **stat_sources; /* Statistics log list of objects */ - const char *stat_stamp; /* Statistics log entry timestamp */ - uint64_t stat_usecs; /* Statistics log period */ + WT_CONNECTION iface; + + /* For operations without an application-supplied session */ + WT_SESSION_IMPL *default_session; + WT_SESSION_IMPL dummy_session; + + const char *cfg; /* Connection configuration */ + + WT_SPINLOCK api_lock; /* Connection API spinlock */ + WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */ + WT_SPINLOCK fh_lock; /* File handle queue spinlock */ + WT_SPINLOCK metadata_lock; /* Metadata update spinlock */ + WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */ + WT_SPINLOCK schema_lock; /* Schema operation spinlock */ + WT_RWLOCK table_lock; /* Table list lock */ + WT_SPINLOCK turtle_lock; /* Turtle file spinlock */ + WT_RWLOCK dhandle_lock; /* Data handle list lock */ + + /* Connection queue */ + TAILQ_ENTRY(__wt_connection_impl) q; + /* Cache pool queue */ + TAILQ_ENTRY(__wt_connection_impl) cpq; + + const char *home; /* Database home */ + const char *error_prefix; /* Database error prefix */ + int is_new; /* Connection created database */ + + uint16_t compat_major; /* Compatibility major version */ + uint16_t compat_minor; /* Compatibility minor version */ +#define WT_CONN_COMPAT_NONE UINT16_MAX + uint16_t req_max_major; /* Compatibility maximum major */ + uint16_t req_max_minor; /* Compatibility maximum minor */ + uint16_t req_min_major; /* Compatibility minimum major */ + uint16_t req_min_minor; /* Compatibility minimum minor */ + + WT_EXTENSION_API extension_api; /* Extension API */ + + /* Configuration */ + const WT_CONFIG_ENTRY **config_entries; + + const char *optrack_path; /* Directory for operation logs */ + WT_FH *optrack_map_fh; /* Name to id translation file. */ + WT_SPINLOCK optrack_map_spinlock; /* Translation file spinlock. */ + uintmax_t optrack_pid; /* Cache the process ID. */ + + WT_LSN *debug_ckpt; /* Debug mode checkpoint LSNs. */ + uint32_t debug_ckpt_cnt; /* Checkpoint retention number */ + + void **foc; /* Free-on-close array */ + size_t foc_cnt; /* Array entries */ + size_t foc_size; /* Array size */ + + WT_FH *lock_fh; /* Lock file handle */ + + /* + * The connection keeps a cache of data handles. The set of handles can grow quite large so we + * maintain both a simple list and a hash table of lists. The hash table key is based on a hash + * of the table URI. + */ + /* Locked: data handle hash array */ + TAILQ_HEAD(__wt_dhhash, __wt_data_handle) dhhash[WT_HASH_ARRAY_SIZE]; + /* Locked: data handle list */ + TAILQ_HEAD(__wt_dhandle_qh, __wt_data_handle) dhqh; + /* Locked: LSM handle list. */ + TAILQ_HEAD(__wt_lsm_qh, __wt_lsm_tree) lsmqh; + /* Locked: file list */ + TAILQ_HEAD(__wt_fhhash, __wt_fh) fhhash[WT_HASH_ARRAY_SIZE]; + TAILQ_HEAD(__wt_fh_qh, __wt_fh) fhqh; + /* Locked: library list */ + TAILQ_HEAD(__wt_dlh_qh, __wt_dlh) dlhqh; + + WT_SPINLOCK block_lock; /* Locked: block manager list */ + TAILQ_HEAD(__wt_blockhash, __wt_block) blockhash[WT_HASH_ARRAY_SIZE]; + TAILQ_HEAD(__wt_block_qh, __wt_block) blockqh; + + /* Locked: handles in each bucket */ + u_int dh_bucket_count[WT_HASH_ARRAY_SIZE]; + u_int dhandle_count; /* Locked: handles in the queue */ + u_int open_btree_count; /* Locked: open writable btree count */ + uint32_t next_file_id; /* Locked: file ID counter */ + uint32_t open_file_count; /* Atomic: open file handle count */ + uint32_t open_cursor_count; /* Atomic: open cursor handle count */ + + /* + * WiredTiger allocates space for 50 simultaneous sessions (threads of + * control) by default. Growing the number of threads dynamically is + * possible, but tricky since server threads are walking the array + * without locking it. + * + * There's an array of WT_SESSION_IMPL pointers that reference the + * allocated array; we do it that way because we want an easy way for + * the server thread code to avoid walking the entire array when only a + * few threads are running. + */ + WT_SESSION_IMPL *sessions; /* Session reference */ + uint32_t session_size; /* Session array size */ + uint32_t session_cnt; /* Session count */ + + size_t session_scratch_max; /* Max scratch memory per session */ + + WT_CACHE *cache; /* Page cache */ + volatile uint64_t cache_size; /* Cache size (either statically + configured or the current size + within a cache pool). */ + + WT_TXN_GLOBAL txn_global; /* Global transaction state */ + + WT_RWLOCK hot_backup_lock; /* Hot backup serialization */ + bool hot_backup; /* Hot backup in progress */ + char **hot_backup_list; /* Hot backup file list */ + + WT_SESSION_IMPL *ckpt_session; /* Checkpoint thread session */ + wt_thread_t ckpt_tid; /* Checkpoint thread */ + bool ckpt_tid_set; /* Checkpoint thread set */ + WT_CONDVAR *ckpt_cond; /* Checkpoint wait mutex */ +#define WT_CKPT_LOGSIZE(conn) ((conn)->ckpt_logsize != 0) + wt_off_t ckpt_logsize; /* Checkpoint log size period */ + bool ckpt_signalled; /* Checkpoint signalled */ + + uint64_t ckpt_usecs; /* Checkpoint timer */ + uint64_t ckpt_time_max; /* Checkpoint time min/max */ + uint64_t ckpt_time_min; + uint64_t ckpt_time_recent; /* Checkpoint time recent/total */ + uint64_t ckpt_time_total; + + /* Checkpoint stats and verbosity timers */ + struct timespec ckpt_timer_start; + struct timespec ckpt_timer_scrub_end; + + /* Checkpoint progress message data */ + uint64_t ckpt_progress_msg_count; + uint64_t ckpt_write_bytes; + uint64_t ckpt_write_pages; + + /* Connection's maximum and base write generations. */ + uint64_t max_write_gen; + uint64_t base_write_gen; + + uint32_t stat_flags; /* Options declared in flags.py */ + + /* Connection statistics */ + WT_CONNECTION_STATS *stats[WT_COUNTER_SLOTS]; + WT_CONNECTION_STATS *stat_array; + + WT_ASYNC *async; /* Async structure */ + bool async_cfg; /* Global async configuration */ + uint32_t async_size; /* Async op array size */ + uint32_t async_workers; /* Number of async workers */ + + WT_CAPACITY capacity; /* Capacity structure */ + WT_SESSION_IMPL *capacity_session; /* Capacity thread session */ + wt_thread_t capacity_tid; /* Capacity thread */ + bool capacity_tid_set; /* Capacity thread set */ + WT_CONDVAR *capacity_cond; /* Capacity wait mutex */ + + WT_LSM_MANAGER lsm_manager; /* LSM worker thread information */ + + WT_KEYED_ENCRYPTOR *kencryptor; /* Encryptor for metadata and log */ + + bool evict_server_running; /* Eviction server operating */ + + WT_THREAD_GROUP evict_threads; + uint32_t evict_threads_max; /* Max eviction threads */ + uint32_t evict_threads_min; /* Min eviction threads */ + +#define WT_STATLOG_FILENAME "WiredTigerStat.%d.%H" + WT_SESSION_IMPL *stat_session; /* Statistics log session */ + wt_thread_t stat_tid; /* Statistics log thread */ + bool stat_tid_set; /* Statistics log thread set */ + WT_CONDVAR *stat_cond; /* Statistics log wait mutex */ + const char *stat_format; /* Statistics log timestamp format */ + WT_FSTREAM *stat_fs; /* Statistics log stream */ + /* Statistics log json table printing state flag */ + bool stat_json_tables; + char *stat_path; /* Statistics log path format */ + char **stat_sources; /* Statistics log list of objects */ + const char *stat_stamp; /* Statistics log entry timestamp */ + uint64_t stat_usecs; /* Statistics log period */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CONN_LOG_ARCHIVE 0x001u /* Archive is enabled */ -#define WT_CONN_LOG_DEBUG_MODE 0x002u /* Debug-mode logging enabled */ -#define WT_CONN_LOG_DOWNGRADED 0x004u /* Running older version */ -#define WT_CONN_LOG_ENABLED 0x008u /* Logging is enabled */ -#define WT_CONN_LOG_EXISTED 0x010u /* Log files found */ -#define WT_CONN_LOG_FORCE_DOWNGRADE 0x020u /* Force downgrade */ -#define WT_CONN_LOG_RECOVER_DIRTY 0x040u /* Recovering unclean */ -#define WT_CONN_LOG_RECOVER_DONE 0x080u /* Recovery completed */ -#define WT_CONN_LOG_RECOVER_ERR 0x100u /* Error if recovery required */ -#define WT_CONN_LOG_RECOVER_FAILED 0x200u /* Recovery failed */ -#define WT_CONN_LOG_ZERO_FILL 0x400u /* Manually zero files */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t log_flags; /* Global logging configuration */ - WT_CONDVAR *log_cond; /* Log server wait mutex */ - WT_SESSION_IMPL *log_session; /* Log server session */ - wt_thread_t log_tid; /* Log server thread */ - bool log_tid_set; /* Log server thread set */ - WT_CONDVAR *log_file_cond; /* Log file thread wait mutex */ - WT_SESSION_IMPL *log_file_session;/* Log file thread session */ - wt_thread_t log_file_tid; /* Log file thread */ - bool log_file_tid_set;/* Log file thread set */ - WT_CONDVAR *log_wrlsn_cond;/* Log write lsn thread wait mutex */ - WT_SESSION_IMPL *log_wrlsn_session;/* Log write lsn thread session */ - wt_thread_t log_wrlsn_tid; /* Log write lsn thread */ - bool log_wrlsn_tid_set;/* Log write lsn thread set */ - WT_LOG *log; /* Logging structure */ - WT_COMPRESSOR *log_compressor;/* Logging compressor */ - uint32_t log_cursors; /* Log cursor count */ - wt_off_t log_dirty_max; /* Log dirty system cache max size */ - wt_off_t log_file_max; /* Log file max size */ - const char *log_path; /* Logging path format */ - uint32_t log_prealloc; /* Log file pre-allocation */ - uint16_t log_req_max; /* Max required log version */ - uint16_t log_req_min; /* Min required log version */ - uint32_t txn_logsync; /* Log sync configuration */ - - WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */ - - /* - * Is there a data/schema change that needs to be the part of a - * checkpoint. - */ - bool modified; - - WT_SESSION_IMPL *sweep_session; /* Handle sweep session */ - wt_thread_t sweep_tid; /* Handle sweep thread */ - int sweep_tid_set; /* Handle sweep thread set */ - WT_CONDVAR *sweep_cond; /* Handle sweep wait mutex */ - uint64_t sweep_idle_time; /* Handle sweep idle time */ - uint64_t sweep_interval; /* Handle sweep interval */ - uint64_t sweep_handles_min;/* Handle sweep minimum open */ - - /* Set of btree IDs not being rolled back */ - uint8_t *stable_rollback_bitstring; - uint32_t stable_rollback_maxfile; - - /* Locked: collator list */ - TAILQ_HEAD(__wt_coll_qh, __wt_named_collator) collqh; - - /* Locked: compressor list */ - TAILQ_HEAD(__wt_comp_qh, __wt_named_compressor) compqh; - - /* Locked: data source list */ - TAILQ_HEAD(__wt_dsrc_qh, __wt_named_data_source) dsrcqh; - - /* Locked: encryptor list */ - WT_SPINLOCK encryptor_lock; /* Encryptor list lock */ - TAILQ_HEAD(__wt_encrypt_qh, __wt_named_encryptor) encryptqh; - - /* Locked: extractor list */ - TAILQ_HEAD(__wt_extractor_qh, __wt_named_extractor) extractorqh; - - void *lang_private; /* Language specific private storage */ - - /* If non-zero, all buffers used for I/O will be aligned to this. */ - size_t buffer_alignment; - - uint64_t stashed_bytes; /* Atomic: stashed memory statistics */ - uint64_t stashed_objects; - /* Generations manager */ - volatile uint64_t generations[WT_GENERATIONS]; - - wt_off_t data_extend_len; /* file_extend data length */ - wt_off_t log_extend_len; /* file_extend log length */ +#define WT_CONN_LOG_ARCHIVE 0x001u /* Archive is enabled */ +#define WT_CONN_LOG_DEBUG_MODE 0x002u /* Debug-mode logging enabled */ +#define WT_CONN_LOG_DOWNGRADED 0x004u /* Running older version */ +#define WT_CONN_LOG_ENABLED 0x008u /* Logging is enabled */ +#define WT_CONN_LOG_EXISTED 0x010u /* Log files found */ +#define WT_CONN_LOG_FORCE_DOWNGRADE 0x020u /* Force downgrade */ +#define WT_CONN_LOG_RECOVER_DIRTY 0x040u /* Recovering unclean */ +#define WT_CONN_LOG_RECOVER_DONE 0x080u /* Recovery completed */ +#define WT_CONN_LOG_RECOVER_ERR 0x100u /* Error if recovery required */ +#define WT_CONN_LOG_RECOVER_FAILED 0x200u /* Recovery failed */ +#define WT_CONN_LOG_ZERO_FILL 0x400u /* Manually zero files */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t log_flags; /* Global logging configuration */ + WT_CONDVAR *log_cond; /* Log server wait mutex */ + WT_SESSION_IMPL *log_session; /* Log server session */ + wt_thread_t log_tid; /* Log server thread */ + bool log_tid_set; /* Log server thread set */ + WT_CONDVAR *log_file_cond; /* Log file thread wait mutex */ + WT_SESSION_IMPL *log_file_session; /* Log file thread session */ + wt_thread_t log_file_tid; /* Log file thread */ + bool log_file_tid_set; /* Log file thread set */ + WT_CONDVAR *log_wrlsn_cond; /* Log write lsn thread wait mutex */ + WT_SESSION_IMPL *log_wrlsn_session; /* Log write lsn thread session */ + wt_thread_t log_wrlsn_tid; /* Log write lsn thread */ + bool log_wrlsn_tid_set; /* Log write lsn thread set */ + WT_LOG *log; /* Logging structure */ + WT_COMPRESSOR *log_compressor; /* Logging compressor */ + uint32_t log_cursors; /* Log cursor count */ + wt_off_t log_dirty_max; /* Log dirty system cache max size */ + wt_off_t log_file_max; /* Log file max size */ + const char *log_path; /* Logging path format */ + uint32_t log_prealloc; /* Log file pre-allocation */ + uint16_t log_req_max; /* Max required log version */ + uint16_t log_req_min; /* Min required log version */ + uint32_t txn_logsync; /* Log sync configuration */ + + WT_SESSION_IMPL *meta_ckpt_session; /* Metadata checkpoint session */ + + /* + * Is there a data/schema change that needs to be the part of a checkpoint. + */ + bool modified; + + WT_SESSION_IMPL *sweep_session; /* Handle sweep session */ + wt_thread_t sweep_tid; /* Handle sweep thread */ + int sweep_tid_set; /* Handle sweep thread set */ + WT_CONDVAR *sweep_cond; /* Handle sweep wait mutex */ + uint64_t sweep_idle_time; /* Handle sweep idle time */ + uint64_t sweep_interval; /* Handle sweep interval */ + uint64_t sweep_handles_min; /* Handle sweep minimum open */ + + /* Set of btree IDs not being rolled back */ + uint8_t *stable_rollback_bitstring; + uint32_t stable_rollback_maxfile; + + /* Locked: collator list */ + TAILQ_HEAD(__wt_coll_qh, __wt_named_collator) collqh; + + /* Locked: compressor list */ + TAILQ_HEAD(__wt_comp_qh, __wt_named_compressor) compqh; + + /* Locked: data source list */ + TAILQ_HEAD(__wt_dsrc_qh, __wt_named_data_source) dsrcqh; + + /* Locked: encryptor list */ + WT_SPINLOCK encryptor_lock; /* Encryptor list lock */ + TAILQ_HEAD(__wt_encrypt_qh, __wt_named_encryptor) encryptqh; + + /* Locked: extractor list */ + TAILQ_HEAD(__wt_extractor_qh, __wt_named_extractor) extractorqh; + + void *lang_private; /* Language specific private storage */ + + /* If non-zero, all buffers used for I/O will be aligned to this. */ + size_t buffer_alignment; + + uint64_t stashed_bytes; /* Atomic: stashed memory statistics */ + uint64_t stashed_objects; + /* Generations manager */ + volatile uint64_t generations[WT_GENERATIONS]; + + wt_off_t data_extend_len; /* file_extend data length */ + wt_off_t log_extend_len; /* file_extend log length */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_DIRECT_IO_CHECKPOINT 0x1u /* Checkpoints */ -#define WT_DIRECT_IO_DATA 0x2u /* Data files */ -#define WT_DIRECT_IO_LOG 0x4u /* Log files */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint64_t direct_io; /* O_DIRECT, FILE_FLAG_NO_BUFFERING */ - uint64_t write_through; /* FILE_FLAG_WRITE_THROUGH */ +#define WT_DIRECT_IO_CHECKPOINT 0x1u /* Checkpoints */ +#define WT_DIRECT_IO_DATA 0x2u /* Data files */ +#define WT_DIRECT_IO_LOG 0x4u /* Log files */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint64_t direct_io; /* O_DIRECT, FILE_FLAG_NO_BUFFERING */ + uint64_t write_through; /* FILE_FLAG_WRITE_THROUGH */ - bool mmap; /* mmap configuration */ - int page_size; /* OS page size for mmap alignment */ + bool mmap; /* mmap configuration */ + int page_size; /* OS page size for mmap alignment */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_VERB_API 0x000000001u -#define WT_VERB_BLOCK 0x000000002u -#define WT_VERB_CHECKPOINT 0x000000004u -#define WT_VERB_CHECKPOINT_PROGRESS 0x000000008u -#define WT_VERB_COMPACT 0x000000010u -#define WT_VERB_COMPACT_PROGRESS 0x000000020u -#define WT_VERB_ERROR_RETURNS 0x000000040u -#define WT_VERB_EVICT 0x000000080u -#define WT_VERB_EVICTSERVER 0x000000100u -#define WT_VERB_EVICT_STUCK 0x000000200u -#define WT_VERB_FILEOPS 0x000000400u -#define WT_VERB_HANDLEOPS 0x000000800u -#define WT_VERB_LOG 0x000001000u -#define WT_VERB_LOOKASIDE 0x000002000u -#define WT_VERB_LOOKASIDE_ACTIVITY 0x000004000u -#define WT_VERB_LSM 0x000008000u -#define WT_VERB_LSM_MANAGER 0x000010000u -#define WT_VERB_METADATA 0x000020000u -#define WT_VERB_MUTEX 0x000040000u -#define WT_VERB_OVERFLOW 0x000080000u -#define WT_VERB_READ 0x000100000u -#define WT_VERB_REBALANCE 0x000200000u -#define WT_VERB_RECONCILE 0x000400000u -#define WT_VERB_RECOVERY 0x000800000u -#define WT_VERB_RECOVERY_PROGRESS 0x001000000u -#define WT_VERB_SALVAGE 0x002000000u -#define WT_VERB_SHARED_CACHE 0x004000000u -#define WT_VERB_SPLIT 0x008000000u -#define WT_VERB_TEMPORARY 0x010000000u -#define WT_VERB_THREAD_GROUP 0x020000000u -#define WT_VERB_TIMESTAMP 0x040000000u -#define WT_VERB_TRANSACTION 0x080000000u -#define WT_VERB_VERIFY 0x100000000u -#define WT_VERB_VERSION 0x200000000u -#define WT_VERB_WRITE 0x400000000u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint64_t verbose; - - /* - * Variable with flags for which subsystems the diagnostic stress timing - * delays have been requested. - */ +#define WT_VERB_API 0x000000001u +#define WT_VERB_BLOCK 0x000000002u +#define WT_VERB_CHECKPOINT 0x000000004u +#define WT_VERB_CHECKPOINT_PROGRESS 0x000000008u +#define WT_VERB_COMPACT 0x000000010u +#define WT_VERB_COMPACT_PROGRESS 0x000000020u +#define WT_VERB_ERROR_RETURNS 0x000000040u +#define WT_VERB_EVICT 0x000000080u +#define WT_VERB_EVICTSERVER 0x000000100u +#define WT_VERB_EVICT_STUCK 0x000000200u +#define WT_VERB_FILEOPS 0x000000400u +#define WT_VERB_HANDLEOPS 0x000000800u +#define WT_VERB_LOG 0x000001000u +#define WT_VERB_LOOKASIDE 0x000002000u +#define WT_VERB_LOOKASIDE_ACTIVITY 0x000004000u +#define WT_VERB_LSM 0x000008000u +#define WT_VERB_LSM_MANAGER 0x000010000u +#define WT_VERB_METADATA 0x000020000u +#define WT_VERB_MUTEX 0x000040000u +#define WT_VERB_OVERFLOW 0x000080000u +#define WT_VERB_READ 0x000100000u +#define WT_VERB_REBALANCE 0x000200000u +#define WT_VERB_RECONCILE 0x000400000u +#define WT_VERB_RECOVERY 0x000800000u +#define WT_VERB_RECOVERY_PROGRESS 0x001000000u +#define WT_VERB_SALVAGE 0x002000000u +#define WT_VERB_SHARED_CACHE 0x004000000u +#define WT_VERB_SPLIT 0x008000000u +#define WT_VERB_TEMPORARY 0x010000000u +#define WT_VERB_THREAD_GROUP 0x020000000u +#define WT_VERB_TIMESTAMP 0x040000000u +#define WT_VERB_TRANSACTION 0x080000000u +#define WT_VERB_VERIFY 0x100000000u +#define WT_VERB_VERSION 0x200000000u +#define WT_VERB_WRITE 0x400000000u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint64_t verbose; + +/* + * Variable with flags for which subsystems the diagnostic stress timing delays have been requested. + */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x001u -#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x002u -#define WT_TIMING_STRESS_LOOKASIDE_SWEEP 0x004u -#define WT_TIMING_STRESS_SPLIT_1 0x008u -#define WT_TIMING_STRESS_SPLIT_2 0x010u -#define WT_TIMING_STRESS_SPLIT_3 0x020u -#define WT_TIMING_STRESS_SPLIT_4 0x040u -#define WT_TIMING_STRESS_SPLIT_5 0x080u -#define WT_TIMING_STRESS_SPLIT_6 0x100u -#define WT_TIMING_STRESS_SPLIT_7 0x200u -#define WT_TIMING_STRESS_SPLIT_8 0x400u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint64_t timing_stress_flags; - -#define WT_STDERR(s) (&S2C(s)->wt_stderr) -#define WT_STDOUT(s) (&S2C(s)->wt_stdout) - WT_FSTREAM wt_stderr, wt_stdout; - - /* - * File system interface abstracted to support alternative file system - * implementations. - */ - WT_FILE_SYSTEM *file_system; +#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x001u +#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x002u +#define WT_TIMING_STRESS_LOOKASIDE_SWEEP 0x004u +#define WT_TIMING_STRESS_SPLIT_1 0x008u +#define WT_TIMING_STRESS_SPLIT_2 0x010u +#define WT_TIMING_STRESS_SPLIT_3 0x020u +#define WT_TIMING_STRESS_SPLIT_4 0x040u +#define WT_TIMING_STRESS_SPLIT_5 0x080u +#define WT_TIMING_STRESS_SPLIT_6 0x100u +#define WT_TIMING_STRESS_SPLIT_7 0x200u +#define WT_TIMING_STRESS_SPLIT_8 0x400u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint64_t timing_stress_flags; + +#define WT_STDERR(s) (&S2C(s)->wt_stderr) +#define WT_STDOUT(s) (&S2C(s)->wt_stdout) + WT_FSTREAM wt_stderr, wt_stdout; + + /* + * File system interface abstracted to support alternative file system implementations. + */ + WT_FILE_SYSTEM *file_system; /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CONN_CACHE_CURSORS 0x0000001u -#define WT_CONN_CACHE_POOL 0x0000002u -#define WT_CONN_CKPT_SYNC 0x0000004u -#define WT_CONN_CLOSING 0x0000008u -#define WT_CONN_CLOSING_NO_MORE_OPENS 0x0000010u -#define WT_CONN_CLOSING_TIMESTAMP 0x0000020u -#define WT_CONN_COMPATIBILITY 0x0000040u -#define WT_CONN_DATA_CORRUPTION 0x0000080u -#define WT_CONN_EVICTION_NO_LOOKASIDE 0x0000100u -#define WT_CONN_EVICTION_RUN 0x0000200u -#define WT_CONN_IN_MEMORY 0x0000400u -#define WT_CONN_LEAK_MEMORY 0x0000800u -#define WT_CONN_LOOKASIDE_OPEN 0x0001000u -#define WT_CONN_LSM_MERGE 0x0002000u -#define WT_CONN_OPTRACK 0x0004000u -#define WT_CONN_PANIC 0x0008000u -#define WT_CONN_READONLY 0x0010000u -#define WT_CONN_RECONFIGURING 0x0020000u -#define WT_CONN_RECOVERING 0x0040000u -#define WT_CONN_SALVAGE 0x0080000u -#define WT_CONN_SERVER_ASYNC 0x0100000u -#define WT_CONN_SERVER_CAPACITY 0x0200000u -#define WT_CONN_SERVER_CHECKPOINT 0x0400000u -#define WT_CONN_SERVER_LOG 0x0800000u -#define WT_CONN_SERVER_LSM 0x1000000u -#define WT_CONN_SERVER_STATISTICS 0x2000000u -#define WT_CONN_SERVER_SWEEP 0x4000000u -#define WT_CONN_WAS_BACKUP 0x8000000u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_CONN_CACHE_CURSORS 0x0000001u +#define WT_CONN_CACHE_POOL 0x0000002u +#define WT_CONN_CKPT_SYNC 0x0000004u +#define WT_CONN_CLOSING 0x0000008u +#define WT_CONN_CLOSING_NO_MORE_OPENS 0x0000010u +#define WT_CONN_CLOSING_TIMESTAMP 0x0000020u +#define WT_CONN_COMPATIBILITY 0x0000040u +#define WT_CONN_DATA_CORRUPTION 0x0000080u +#define WT_CONN_EVICTION_NO_LOOKASIDE 0x0000100u +#define WT_CONN_EVICTION_RUN 0x0000200u +#define WT_CONN_IN_MEMORY 0x0000400u +#define WT_CONN_LEAK_MEMORY 0x0000800u +#define WT_CONN_LOOKASIDE_OPEN 0x0001000u +#define WT_CONN_LSM_MERGE 0x0002000u +#define WT_CONN_OPTRACK 0x0004000u +#define WT_CONN_PANIC 0x0008000u +#define WT_CONN_READONLY 0x0010000u +#define WT_CONN_RECONFIGURING 0x0020000u +#define WT_CONN_RECOVERING 0x0040000u +#define WT_CONN_SALVAGE 0x0080000u +#define WT_CONN_SERVER_ASYNC 0x0100000u +#define WT_CONN_SERVER_CAPACITY 0x0200000u +#define WT_CONN_SERVER_CHECKPOINT 0x0400000u +#define WT_CONN_SERVER_LOG 0x0800000u +#define WT_CONN_SERVER_LSM 0x1000000u +#define WT_CONN_SERVER_STATISTICS 0x2000000u +#define WT_CONN_SERVER_SWEEP 0x4000000u +#define WT_CONN_WAS_BACKUP 0x8000000u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/ctype.i b/src/third_party/wiredtiger/src/include/ctype.i index 5493128c81f..3fc3ba91fac 100644 --- a/src/third_party/wiredtiger/src/include/ctype.i +++ b/src/third_party/wiredtiger/src/include/ctype.i @@ -10,60 +10,60 @@ /* * __wt_isalnum -- - * Wrap the ctype function without sign extension. + * Wrap the ctype function without sign extension. */ static inline bool __wt_isalnum(u_char c) { - return (isalnum(c) != 0); + return (isalnum(c) != 0); } /* * __wt_isalpha -- - * Wrap the ctype function without sign extension. + * Wrap the ctype function without sign extension. */ static inline bool __wt_isalpha(u_char c) { - return (isalpha(c) != 0); + return (isalpha(c) != 0); } /* * __wt_isdigit -- - * Wrap the ctype function without sign extension. + * Wrap the ctype function without sign extension. */ static inline bool __wt_isdigit(u_char c) { - return (isdigit(c) != 0); + return (isdigit(c) != 0); } /* * __wt_isprint -- - * Wrap the ctype function without sign extension. + * Wrap the ctype function without sign extension. */ static inline bool __wt_isprint(u_char c) { - return (isprint(c) != 0); + return (isprint(c) != 0); } /* * __wt_isspace -- - * Wrap the ctype function without sign extension. + * Wrap the ctype function without sign extension. */ static inline bool __wt_isspace(u_char c) { - return (isspace(c) != 0); + return (isspace(c) != 0); } /* * __wt_tolower -- - * Wrap the ctype function without sign extension. + * Wrap the ctype function without sign extension. */ static inline u_char __wt_tolower(u_char c) { - return ((u_char)tolower(c)); + return ((u_char)tolower(c)); } diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 33d6660e687..18b17a3bebd 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -9,309 +9,257 @@ /* * Initialize a static WT_CURSOR structure. */ -#define WT_CURSOR_STATIC_INIT(n, \ - get_key, \ - get_value, \ - set_key, \ - set_value, \ - compare, \ - equals, \ - next, \ - prev, \ - reset, \ - search, \ - search_near, \ - insert, \ - modify, \ - update, \ - remove, \ - reserve, \ - reconfigure, \ - cache, \ - reopen, \ - close) \ - static const WT_CURSOR n = { \ - NULL, /* session */ \ - NULL, /* uri */ \ - NULL, /* key_format */ \ - NULL, /* value_format */ \ - get_key, \ - get_value, \ - set_key, \ - set_value, \ - compare, \ - equals, \ - next, \ - prev, \ - reset, \ - search, \ - search_near, \ - insert, \ - modify, \ - update, \ - remove, \ - reserve, \ - close, \ - reconfigure, \ - cache, \ - reopen, \ - 0, /* uri_hash */ \ - { NULL, NULL }, /* TAILQ_ENTRY q */ \ - 0, /* recno key */ \ - { 0 }, /* recno raw buffer */ \ - NULL, /* json_private */ \ - NULL, /* lang_private */ \ - { NULL, 0, NULL, 0, 0 }, /* WT_ITEM key */ \ - { NULL, 0, NULL, 0, 0 }, /* WT_ITEM value */ \ - 0, /* int saved_err */ \ - NULL, /* internal_uri */ \ - 0 /* uint32_t flags */ \ -} +#define WT_CURSOR_STATIC_INIT(n, get_key, get_value, set_key, set_value, compare, equals, next, \ + prev, reset, search, search_near, insert, modify, update, remove, \ + reserve, reconfigure, cache, reopen, close) \ + static const WT_CURSOR n = { \ + NULL, /* session */ \ + NULL, /* uri */ \ + NULL, /* key_format */ \ + NULL, /* value_format */ \ + get_key, get_value, set_key, set_value, compare, equals, next, prev, reset, search, \ + search_near, insert, modify, update, remove, reserve, close, reconfigure, cache, reopen, \ + 0, /* uri_hash */ \ + {NULL, NULL}, /* TAILQ_ENTRY q */ \ + 0, /* recno key */ \ + {0}, /* recno raw buffer */ \ + NULL, /* json_private */ \ + NULL, /* lang_private */ \ + {NULL, 0, NULL, 0, 0}, /* WT_ITEM key */ \ + {NULL, 0, NULL, 0, 0}, /* WT_ITEM value */ \ + 0, /* int saved_err */ \ + NULL, /* internal_uri */ \ + 0 /* uint32_t flags */ \ + } struct __wt_cursor_backup { - WT_CURSOR iface; + WT_CURSOR iface; - size_t next; /* Cursor position */ - WT_FSTREAM *bfs; /* Backup file stream */ - uint32_t maxid; /* Maximum log file ID seen */ + size_t next; /* Cursor position */ + WT_FSTREAM *bfs; /* Backup file stream */ + uint32_t maxid; /* Maximum log file ID seen */ - char **list; /* List of files to be copied. */ - size_t list_allocated; - size_t list_next; + char **list; /* List of files to be copied. */ + size_t list_allocated; + size_t list_next; /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CURBACKUP_DUP 0x1u /* Duplicated backup cursor */ -#define WT_CURBACKUP_LOCKER 0x2u /* Hot-backup started */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint8_t flags; +#define WT_CURBACKUP_DUP 0x1u /* Duplicated backup cursor */ +#define WT_CURBACKUP_LOCKER 0x2u /* Hot-backup started */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint8_t flags; }; -#define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)(cursor))->maxid) +#define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)(cursor))->maxid) struct __wt_cursor_btree { - WT_CURSOR iface; - - /* - * The btree field is safe to use when the cursor is open. When the - * cursor is cached, the btree may be closed, so it is only safe - * initially to look at the underlying data handle. - */ - WT_BTREE *btree; /* Enclosing btree */ - WT_DATA_HANDLE *dhandle; /* Data handle for the btree */ - - /* - * The following fields are set by the search functions as a precursor - * to page modification: we have a page, a WT_COL/WT_ROW slot on the - * page, an insert head, insert list and a skiplist stack (the stack of - * skiplist entries leading to the insert point). The search functions - * also return the relationship of the search key to the found key. - */ - WT_REF *ref; /* Current page */ - uint32_t slot; /* WT_COL/WT_ROW 0-based slot */ - - WT_INSERT_HEAD *ins_head; /* Insert chain head */ - WT_INSERT *ins; /* Current insert node */ - /* Search stack */ - WT_INSERT **ins_stack[WT_SKIP_MAXDEPTH]; - - /* Next item(s) found during search */ - WT_INSERT *next_stack[WT_SKIP_MAXDEPTH]; - - uint32_t page_deleted_count; /* Deleted items on the page */ - - uint64_t recno; /* Record number */ - - /* - * Next-random cursors can optionally be configured to step through a - * percentage of the total leaf pages to their next value. Note the - * configured value and the calculated number of leaf pages to skip. - */ - uint64_t next_random_leaf_skip; - u_int next_random_sample_size; - - /* - * The search function sets compare to: - * < 1 if the found key is less than the specified key - * 0 if the found key matches the specified key - * > 1 if the found key is larger than the specified key - */ - int compare; - - /* - * A key returned from a binary search or cursor movement on a row-store - * page; if we find an exact match on a row-store leaf page in a search - * operation, keep a copy of key we built during the search to avoid - * doing the additional work of getting the key again for return to the - * application. Note, this only applies to exact matches when searching - * disk-image structures, so it's not, for example, a key from an insert - * list. Additionally, this structure is used to build keys when moving - * a cursor through a row-store leaf page. - */ - WT_ITEM *row_key, _row_key; - - /* - * It's relatively expensive to calculate the last record on a variable- - * length column-store page because of the repeat values. Calculate it - * once per page and cache it. This value doesn't include the skiplist - * of appended entries on the last page. - */ - uint64_t last_standard_recno; - - /* - * For row-store pages, we need a single item that tells us the part of - * the page we're walking (otherwise switching from next to prev and - * vice-versa is just too complicated), so we map the WT_ROW and - * WT_INSERT_HEAD insert array slots into a single name space: slot 1 - * is the "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is - * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are - * odd-numbered slots, and WT_ROW array slots are even-numbered slots. - */ - uint32_t row_iteration_slot; /* Row-store iteration slot */ - - /* - * Variable-length column-store values are run-length encoded and may - * be overflow values or Huffman encoded. To avoid repeatedly reading - * overflow values or decompressing encoded values, process it once and - * store the result in a temporary buffer. The cip_saved field is used - * to determine if we've switched columns since our last cursor call. - */ - WT_COL *cip_saved; /* Last iteration reference */ - - /* - * We don't instantiate prefix-compressed keys on pages where there's no - * Huffman encoding because we don't want to waste memory if only moving - * a cursor through the page, and it's faster to build keys while moving - * through the page than to roll-forward from a previously instantiated - * key (we don't instantiate all of the keys, just the ones at binary - * search points). We can't use the application's WT_CURSOR key field - * as a copy of the last-returned key because it may have been altered - * by the API layer, for example, dump cursors. Instead we store the - * last-returned key in a temporary buffer. The rip_saved field is used - * to determine if the key in the temporary buffer has the prefix needed - * for building the current key. - */ - WT_ROW *rip_saved; /* Last-returned key reference */ - - /* - * A temporary buffer for caching RLE values for column-store files (if - * RLE is non-zero, then we don't unpack the value every time we move - * to the next cursor position, we re-use the unpacked value we stored - * here the first time we hit the value). - * - * A temporary buffer for building on-page keys when searching row-store - * files. - */ - WT_ITEM *tmp, _tmp; - - /* - * The update structure allocated by the row- and column-store modify - * functions, used to avoid a data copy in the WT_CURSOR.update call. - */ - WT_UPDATE *modify_update; - - /* - * Fixed-length column-store items are a single byte, and it's simpler - * and cheaper to allocate the space for it now than keep checking to - * see if we need to grow the buffer. - */ - uint8_t v; /* Fixed-length return value */ - - uint8_t append_tree; /* Cursor appended to the tree */ - - /* - * We have to restart cursor next/prev after a prepare conflict. Keep - * the state of the cursor separately so we can restart at exactly the - * right point. - */ - enum { WT_CBT_RETRY_NOTSET=0, - WT_CBT_RETRY_INSERT, WT_CBT_RETRY_PAGE } iter_retry; + WT_CURSOR iface; + + /* + * The btree field is safe to use when the cursor is open. When the cursor is cached, the btree + * may be closed, so it is only safe initially to look at the underlying data handle. + */ + WT_BTREE *btree; /* Enclosing btree */ + WT_DATA_HANDLE *dhandle; /* Data handle for the btree */ + + /* + * The following fields are set by the search functions as a precursor to page modification: we + * have a page, a WT_COL/WT_ROW slot on the page, an insert head, insert list and a skiplist + * stack (the stack of skiplist entries leading to the insert point). The search functions also + * return the relationship of the search key to the found key. + */ + WT_REF *ref; /* Current page */ + uint32_t slot; /* WT_COL/WT_ROW 0-based slot */ + + WT_INSERT_HEAD *ins_head; /* Insert chain head */ + WT_INSERT *ins; /* Current insert node */ + /* Search stack */ + WT_INSERT **ins_stack[WT_SKIP_MAXDEPTH]; + + /* Next item(s) found during search */ + WT_INSERT *next_stack[WT_SKIP_MAXDEPTH]; + + uint32_t page_deleted_count; /* Deleted items on the page */ + + uint64_t recno; /* Record number */ + + /* + * Next-random cursors can optionally be configured to step through a percentage of the total + * leaf pages to their next value. Note the configured value and the calculated number of leaf + * pages to skip. + */ + uint64_t next_random_leaf_skip; + u_int next_random_sample_size; + + /* + * The search function sets compare to: + * < 1 if the found key is less than the specified key + * 0 if the found key matches the specified key + * > 1 if the found key is larger than the specified key + */ + int compare; + + /* + * A key returned from a binary search or cursor movement on a row-store page; if we find an + * exact match on a row-store leaf page in a search operation, keep a copy of key we built + * during the search to avoid doing the additional work of getting the key again for return to + * the application. Note, this only applies to exact matches when searching disk-image + * structures, so it's not, for example, a key from an insert list. Additionally, this structure + * is used to build keys when moving a cursor through a row-store leaf page. + */ + WT_ITEM *row_key, _row_key; + + /* + * It's relatively expensive to calculate the last record on a variable- length column-store + * page because of the repeat values. Calculate it once per page and cache it. This value + * doesn't include the skiplist of appended entries on the last page. + */ + uint64_t last_standard_recno; + + /* + * For row-store pages, we need a single item that tells us the part of the page we're walking + * (otherwise switching from next to prev and vice-versa is just too complicated), so we map the + * WT_ROW and WT_INSERT_HEAD insert array slots into a single name space: slot 1 is the + * "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is WT_INSERT_HEAD[0], and so on. This + * means WT_INSERT lists are odd-numbered slots, and WT_ROW array slots are even-numbered slots. + */ + uint32_t row_iteration_slot; /* Row-store iteration slot */ + + /* + * Variable-length column-store values are run-length encoded and may be overflow values or + * Huffman encoded. To avoid repeatedly reading overflow values or decompressing encoded values, + * process it once and store the result in a temporary buffer. The cip_saved field is used to + * determine if we've switched columns since our last cursor call. + */ + WT_COL *cip_saved; /* Last iteration reference */ + + /* + * We don't instantiate prefix-compressed keys on pages where there's no Huffman encoding + * because we don't want to waste memory if only moving a cursor through the page, and it's + * faster to build keys while moving through the page than to roll-forward from a previously + * instantiated key (we don't instantiate all of the keys, just the ones at binary search + * points). We can't use the application's WT_CURSOR key field as a copy of the last-returned + * key because it may have been altered by the API layer, for example, dump cursors. Instead we + * store the last-returned key in a temporary buffer. The rip_saved field is used to determine + * if the key in the temporary buffer has the prefix needed for building the current key. + */ + WT_ROW *rip_saved; /* Last-returned key reference */ + + /* + * A temporary buffer for caching RLE values for column-store files (if + * RLE is non-zero, then we don't unpack the value every time we move + * to the next cursor position, we re-use the unpacked value we stored + * here the first time we hit the value). + * + * A temporary buffer for building on-page keys when searching row-store + * files. + */ + WT_ITEM *tmp, _tmp; + + /* + * The update structure allocated by the row- and column-store modify functions, used to avoid a + * data copy in the WT_CURSOR.update call. + */ + WT_UPDATE *modify_update; + + /* + * Fixed-length column-store items are a single byte, and it's simpler and cheaper to allocate + * the space for it now than keep checking to see if we need to grow the buffer. + */ + uint8_t v; /* Fixed-length return value */ + + uint8_t append_tree; /* Cursor appended to the tree */ + + /* + * We have to restart cursor next/prev after a prepare conflict. Keep the state of the cursor + * separately so we can restart at exactly the right point. + */ + enum { WT_CBT_RETRY_NOTSET = 0, WT_CBT_RETRY_INSERT, WT_CBT_RETRY_PAGE } iter_retry; #ifdef HAVE_DIAGNOSTIC - /* Check that cursor next/prev never returns keys out-of-order. */ - WT_ITEM *lastkey, _lastkey; - uint64_t lastrecno; + /* Check that cursor next/prev never returns keys out-of-order. */ + WT_ITEM *lastkey, _lastkey; + uint64_t lastrecno; #endif /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CBT_ACTIVE 0x001u /* Active in the tree */ -#define WT_CBT_ITERATE_APPEND 0x002u /* Col-store: iterating append list */ -#define WT_CBT_ITERATE_NEXT 0x004u /* Next iteration configuration */ -#define WT_CBT_ITERATE_PREV 0x008u /* Prev iteration configuration */ -#define WT_CBT_ITERATE_RETRY_NEXT 0x010u /* Prepare conflict by next. */ -#define WT_CBT_ITERATE_RETRY_PREV 0x020u /* Prepare conflict by prev. */ -#define WT_CBT_NO_TXN 0x040u /* Non-txn cursor (e.g. a checkpoint) */ -#define WT_CBT_READ_ONCE 0x080u /* Page in with WT_READ_WONT_NEED */ -#define WT_CBT_SEARCH_SMALLEST 0x100u /* Row-store: small-key insert list */ -#define WT_CBT_VAR_ONPAGE_MATCH 0x200u /* Var-store: on-page recno match */ +#define WT_CBT_ACTIVE 0x001u /* Active in the tree */ +#define WT_CBT_ITERATE_APPEND 0x002u /* Col-store: iterating append list */ +#define WT_CBT_ITERATE_NEXT 0x004u /* Next iteration configuration */ +#define WT_CBT_ITERATE_PREV 0x008u /* Prev iteration configuration */ +#define WT_CBT_ITERATE_RETRY_NEXT 0x010u /* Prepare conflict by next. */ +#define WT_CBT_ITERATE_RETRY_PREV 0x020u /* Prepare conflict by prev. */ +#define WT_CBT_NO_TXN 0x040u /* Non-txn cursor (e.g. a checkpoint) */ +#define WT_CBT_READ_ONCE 0x080u /* Page in with WT_READ_WONT_NEED */ +#define WT_CBT_SEARCH_SMALLEST 0x100u /* Row-store: small-key insert list */ +#define WT_CBT_VAR_ONPAGE_MATCH 0x200u /* Var-store: on-page recno match */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ -#define WT_CBT_POSITION_MASK /* Flags associated with position */ \ - (WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \ - WT_CBT_ITERATE_RETRY_NEXT | WT_CBT_ITERATE_RETRY_PREV | \ - WT_CBT_SEARCH_SMALLEST | WT_CBT_VAR_ONPAGE_MATCH) +#define WT_CBT_POSITION_MASK /* Flags associated with position */ \ + (WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \ + WT_CBT_ITERATE_RETRY_NEXT | WT_CBT_ITERATE_RETRY_PREV | WT_CBT_SEARCH_SMALLEST | \ + WT_CBT_VAR_ONPAGE_MATCH) - uint32_t flags; + uint32_t flags; }; struct __wt_cursor_bulk { - WT_CURSOR_BTREE cbt; - - /* - * Variable-length column store compares values during bulk load as - * part of RLE compression, row-store compares keys during bulk load - * to avoid corruption. - */ - bool first_insert; /* First insert */ - WT_ITEM last; /* Last key/value inserted */ - - /* - * Additional column-store bulk load support. - */ - uint64_t recno; /* Record number */ - uint64_t rle; /* Variable-length RLE counter */ - - /* - * Additional fixed-length column store bitmap bulk load support: - * current entry in memory chunk count, and the maximum number of - * records per chunk. - */ - bool bitmap; /* Bitmap bulk load */ - uint32_t entry; /* Entry count */ - uint32_t nrecs; /* Max records per chunk */ - - void *reconcile; /* Reconciliation support */ - WT_REF *ref; /* The leaf page */ - WT_PAGE *leaf; + WT_CURSOR_BTREE cbt; + + /* + * Variable-length column store compares values during bulk load as part of RLE compression, + * row-store compares keys during bulk load to avoid corruption. + */ + bool first_insert; /* First insert */ + WT_ITEM last; /* Last key/value inserted */ + + /* + * Additional column-store bulk load support. + */ + uint64_t recno; /* Record number */ + uint64_t rle; /* Variable-length RLE counter */ + + /* + * Additional fixed-length column store bitmap bulk load support: current entry in memory chunk + * count, and the maximum number of records per chunk. + */ + bool bitmap; /* Bitmap bulk load */ + uint32_t entry; /* Entry count */ + uint32_t nrecs; /* Max records per chunk */ + + void *reconcile; /* Reconciliation support */ + WT_REF *ref; /* The leaf page */ + WT_PAGE *leaf; }; struct __wt_cursor_config { - WT_CURSOR iface; + WT_CURSOR iface; }; struct __wt_cursor_data_source { - WT_CURSOR iface; + WT_CURSOR iface; - WT_COLLATOR *collator; /* Configured collator */ - int collator_owned; /* Collator needs to be terminated */ + WT_COLLATOR *collator; /* Configured collator */ + int collator_owned; /* Collator needs to be terminated */ - WT_CURSOR *source; /* Application-owned cursor */ + WT_CURSOR *source; /* Application-owned cursor */ }; struct __wt_cursor_dump { - WT_CURSOR iface; + WT_CURSOR iface; - WT_CURSOR *child; + WT_CURSOR *child; }; struct __wt_cursor_index { - WT_CURSOR iface; + WT_CURSOR iface; - WT_TABLE *table; - WT_INDEX *index; - const char *key_plan, *value_plan; + WT_TABLE *table; + WT_INDEX *index; + const char *key_plan, *value_plan; - WT_CURSOR *child; - WT_CURSOR **cg_cursors; - uint8_t *cg_needvalue; + WT_CURSOR *child; + WT_CURSOR **cg_cursors; + uint8_t *cg_needvalue; }; /* @@ -337,206 +285,199 @@ struct __wt_cursor_index { * are nested, a similarly deep stack of iterators is created. */ struct __wt_cursor_join_iter { - WT_SESSION_IMPL *session; - WT_CURSOR_JOIN *cjoin; - WT_CURSOR_JOIN_ENTRY *entry; - WT_CURSOR_JOIN_ITER *child; - WT_CURSOR *cursor; /* has null projection */ - WT_ITEM *curkey; /* primary key */ - WT_ITEM idxkey; - u_int entry_pos; /* the current entry */ - u_int entry_count; /* entries to walk */ - u_int end_pos; /* the current endpoint */ - u_int end_count; /* endpoints to walk */ - u_int end_skip; /* when testing for inclusion */ - /* can we skip current end? */ - bool positioned; - bool is_equal; + WT_SESSION_IMPL *session; + WT_CURSOR_JOIN *cjoin; + WT_CURSOR_JOIN_ENTRY *entry; + WT_CURSOR_JOIN_ITER *child; + WT_CURSOR *cursor; /* has null projection */ + WT_ITEM *curkey; /* primary key */ + WT_ITEM idxkey; + u_int entry_pos; /* the current entry */ + u_int entry_count; /* entries to walk */ + u_int end_pos; /* the current endpoint */ + u_int end_count; /* endpoints to walk */ + u_int end_skip; /* when testing for inclusion */ + /* can we skip current end? */ + bool positioned; + bool is_equal; }; /* - * A join endpoint represents a positioned cursor that is 'captured' by a - * WT_SESSION::join call. + * A join endpoint represents a positioned cursor that is 'captured' by a WT_SESSION::join call. */ struct __wt_cursor_join_endpoint { - WT_ITEM key; - uint8_t recno_buf[10]; /* holds packed recno */ - WT_CURSOR *cursor; + WT_ITEM key; + uint8_t recno_buf[10]; /* holds packed recno */ + WT_CURSOR *cursor; /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CURJOIN_END_EQ 0x1u /* include values == cursor */ -#define WT_CURJOIN_END_GT 0x2u /* include values > cursor */ -#define WT_CURJOIN_END_LT 0x4u /* include values < cursor */ -#define WT_CURJOIN_END_OWN_CURSOR 0x8u /* must close cursor */ +#define WT_CURJOIN_END_EQ 0x1u /* include values == cursor */ +#define WT_CURJOIN_END_GT 0x2u /* include values > cursor */ +#define WT_CURJOIN_END_LT 0x4u /* include values < cursor */ +#define WT_CURJOIN_END_OWN_CURSOR 0x8u /* must close cursor */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ -#define WT_CURJOIN_END_GE (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ) -#define WT_CURJOIN_END_LE (WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ) - uint8_t flags; /* range for this endpoint */ +#define WT_CURJOIN_END_GE (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ) +#define WT_CURJOIN_END_LE (WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ) + uint8_t flags; /* range for this endpoint */ }; -#define WT_CURJOIN_END_RANGE(endp) \ - ((endp)->flags & \ - (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | WT_CURJOIN_END_LT)) +#define WT_CURJOIN_END_RANGE(endp) \ + ((endp)->flags & (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | WT_CURJOIN_END_LT)) /* - * Each join entry typically represents an index's participation in a join. - * For example, if 'k' is an index, then "t.k > 10 && t.k < 20" would be - * represented by a single entry, with two endpoints. When the index and - * subjoin fields are NULL, the join is on the main table. When subjoin is + * Each join entry typically represents an index's participation in a join. For example, if 'k' is + * an index, then "t.k > 10 && t.k < 20" would be represented by a single entry, with two endpoints. + * When the index and subjoin fields are NULL, the join is on the main table. When subjoin is * non-NULL, there is a nested join clause. */ struct __wt_cursor_join_entry { - WT_INDEX *index; - WT_CURSOR *main; /* raw main table cursor */ - WT_CURSOR_JOIN *subjoin; /* a nested join clause */ - WT_BLOOM *bloom; /* Bloom filter handle */ - char *repack_format; /* target format for repack */ - uint32_t bloom_bit_count; /* bits per item in bloom */ - uint32_t bloom_hash_count; /* hash functions in bloom */ - uint64_t count; /* approx number of matches */ + WT_INDEX *index; + WT_CURSOR *main; /* raw main table cursor */ + WT_CURSOR_JOIN *subjoin; /* a nested join clause */ + WT_BLOOM *bloom; /* Bloom filter handle */ + char *repack_format; /* target format for repack */ + uint32_t bloom_bit_count; /* bits per item in bloom */ + uint32_t bloom_hash_count; /* hash functions in bloom */ + uint64_t count; /* approx number of matches */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CURJOIN_ENTRY_BLOOM 0x1u /* use a bloom filter */ -#define WT_CURJOIN_ENTRY_DISJUNCTION 0x2u /* endpoints are or-ed */ -#define WT_CURJOIN_ENTRY_FALSE_POSITIVES 0x4u /* don't filter false pos */ -#define WT_CURJOIN_ENTRY_OWN_BLOOM 0x8u /* this entry owns the bloom */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint8_t flags; - - WT_CURSOR_JOIN_ENDPOINT *ends; /* reference endpoints */ - size_t ends_allocated; - u_int ends_next; - - WT_JOIN_STATS stats; /* Join statistics */ +#define WT_CURJOIN_ENTRY_BLOOM 0x1u /* use a bloom filter */ +#define WT_CURJOIN_ENTRY_DISJUNCTION 0x2u /* endpoints are or-ed */ +#define WT_CURJOIN_ENTRY_FALSE_POSITIVES 0x4u /* don't filter false pos */ +#define WT_CURJOIN_ENTRY_OWN_BLOOM 0x8u /* this entry owns the bloom */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint8_t flags; + + WT_CURSOR_JOIN_ENDPOINT *ends; /* reference endpoints */ + size_t ends_allocated; + u_int ends_next; + + WT_JOIN_STATS stats; /* Join statistics */ }; struct __wt_cursor_join { - WT_CURSOR iface; - - WT_TABLE *table; - const char *projection; - WT_CURSOR *main; /* main table with projection */ - WT_CURSOR_JOIN *parent; /* parent of nested group */ - WT_CURSOR_JOIN_ITER *iter; /* chain of iterators */ - WT_CURSOR_JOIN_ENTRY *entries; - size_t entries_allocated; - u_int entries_next; - uint8_t recno_buf[10]; /* holds packed recno */ + WT_CURSOR iface; + + WT_TABLE *table; + const char *projection; + WT_CURSOR *main; /* main table with projection */ + WT_CURSOR_JOIN *parent; /* parent of nested group */ + WT_CURSOR_JOIN_ITER *iter; /* chain of iterators */ + WT_CURSOR_JOIN_ENTRY *entries; + size_t entries_allocated; + u_int entries_next; + uint8_t recno_buf[10]; /* holds packed recno */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CURJOIN_DISJUNCTION 0x1u /* Entries are or-ed */ -#define WT_CURJOIN_ERROR 0x2u /* Error in initialization */ -#define WT_CURJOIN_INITIALIZED 0x4u /* Successful initialization */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint8_t flags; +#define WT_CURJOIN_DISJUNCTION 0x1u /* Entries are or-ed */ +#define WT_CURJOIN_ERROR 0x2u /* Error in initialization */ +#define WT_CURJOIN_INITIALIZED 0x4u /* Successful initialization */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint8_t flags; }; struct __wt_cursor_json { - char *key_buf; /* JSON formatted string */ - char *value_buf; /* JSON formatted string */ - WT_CONFIG_ITEM key_names; /* Names of key columns */ - WT_CONFIG_ITEM value_names; /* Names of value columns */ + char *key_buf; /* JSON formatted string */ + char *value_buf; /* JSON formatted string */ + WT_CONFIG_ITEM key_names; /* Names of key columns */ + WT_CONFIG_ITEM value_names; /* Names of value columns */ }; struct __wt_cursor_log { - WT_CURSOR iface; - - WT_LSN *cur_lsn; /* LSN of current record */ - WT_LSN *next_lsn; /* LSN of next record */ - WT_ITEM *logrec; /* Copy of record for cursor */ - WT_ITEM *opkey, *opvalue; /* Op key/value copy */ - const uint8_t *stepp, *stepp_end; /* Pointer within record */ - uint8_t *packed_key; /* Packed key for 'raw' interface */ - uint8_t *packed_value; /* Packed value for 'raw' interface */ - uint32_t step_count; /* Intra-record count */ - uint32_t rectype; /* Record type */ - uint64_t txnid; /* Record txnid */ + WT_CURSOR iface; + + WT_LSN *cur_lsn; /* LSN of current record */ + WT_LSN *next_lsn; /* LSN of next record */ + WT_ITEM *logrec; /* Copy of record for cursor */ + WT_ITEM *opkey, *opvalue; /* Op key/value copy */ + const uint8_t *stepp, *stepp_end; /* Pointer within record */ + uint8_t *packed_key; /* Packed key for 'raw' interface */ + uint8_t *packed_value; /* Packed value for 'raw' interface */ + uint32_t step_count; /* Intra-record count */ + uint32_t rectype; /* Record type */ + uint64_t txnid; /* Record txnid */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CURLOG_ARCHIVE_LOCK 0x1u /* Archive lock held */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint8_t flags; +#define WT_CURLOG_ARCHIVE_LOCK 0x1u /* Archive lock held */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint8_t flags; }; struct __wt_cursor_metadata { - WT_CURSOR iface; + WT_CURSOR iface; - WT_CURSOR *file_cursor; /* Queries of regular metadata */ - WT_CURSOR *create_cursor; /* Extra cursor for create option */ + WT_CURSOR *file_cursor; /* Queries of regular metadata */ + WT_CURSOR *create_cursor; /* Extra cursor for create option */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_MDC_CREATEONLY 0x1u -#define WT_MDC_ONMETADATA 0x2u -#define WT_MDC_POSITIONED 0x4u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint8_t flags; +#define WT_MDC_CREATEONLY 0x1u +#define WT_MDC_ONMETADATA 0x2u +#define WT_MDC_POSITIONED 0x4u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint8_t flags; }; struct __wt_join_stats_group { - const char *desc_prefix; /* Prefix appears before description */ - WT_CURSOR_JOIN *join_cursor; - ssize_t join_cursor_entry; /* Position in entries */ - WT_JOIN_STATS join_stats; + const char *desc_prefix; /* Prefix appears before description */ + WT_CURSOR_JOIN *join_cursor; + ssize_t join_cursor_entry; /* Position in entries */ + WT_JOIN_STATS join_stats; }; struct __wt_cursor_stat { - WT_CURSOR iface; - - bool notinitialized; /* Cursor not initialized */ - bool notpositioned; /* Cursor not positioned */ - - int64_t *stats; /* Statistics */ - int stats_base; /* Base statistics value */ - int stats_count; /* Count of statistics values */ - int (*stats_desc)(WT_CURSOR_STAT *, int, const char **); - /* Statistics descriptions */ - int (*next_set)(WT_SESSION_IMPL *, WT_CURSOR_STAT *, bool, - bool); /* Advance to next set */ - - union { /* Copies of the statistics */ - WT_DSRC_STATS dsrc_stats; - WT_CONNECTION_STATS conn_stats; - WT_JOIN_STATS_GROUP join_stats_group; - WT_SESSION_STATS session_stats; - } u; - - const char **cfg; /* Original cursor configuration */ - char *desc_buf; /* Saved description string */ - - int key; /* Current stats key */ - uint64_t v; /* Current stats value */ - WT_ITEM pv; /* Current stats value (string) */ - - /* Options declared in flags.py, shared by WT_CONNECTION::stat_flags */ - uint32_t flags; + WT_CURSOR iface; + + bool notinitialized; /* Cursor not initialized */ + bool notpositioned; /* Cursor not positioned */ + + int64_t *stats; /* Statistics */ + int stats_base; /* Base statistics value */ + int stats_count; /* Count of statistics values */ + int (*stats_desc)(WT_CURSOR_STAT *, int, const char **); + /* Statistics descriptions */ + int (*next_set)(WT_SESSION_IMPL *, WT_CURSOR_STAT *, bool, bool); /* Advance to next set */ + + union { /* Copies of the statistics */ + WT_DSRC_STATS dsrc_stats; + WT_CONNECTION_STATS conn_stats; + WT_JOIN_STATS_GROUP join_stats_group; + WT_SESSION_STATS session_stats; + } u; + + const char **cfg; /* Original cursor configuration */ + char *desc_buf; /* Saved description string */ + + int key; /* Current stats key */ + uint64_t v; /* Current stats value */ + WT_ITEM pv; /* Current stats value (string) */ + + /* Options declared in flags.py, shared by WT_CONNECTION::stat_flags */ + uint32_t flags; }; /* * WT_CURSOR_STATS -- * Return a reference to a statistic cursor's stats structures. */ -#define WT_CURSOR_STATS(cursor) \ - (((WT_CURSOR_STAT *)(cursor))->stats) +#define WT_CURSOR_STATS(cursor) (((WT_CURSOR_STAT *)(cursor))->stats) struct __wt_cursor_table { - WT_CURSOR iface; + WT_CURSOR iface; - WT_TABLE *table; - const char *plan; + WT_TABLE *table; + const char *plan; - const char **cfg; /* Saved configuration string */ + const char **cfg; /* Saved configuration string */ - WT_CURSOR **cg_cursors; - WT_ITEM *cg_valcopy; /* - * Copies of column group values, for - * overlapping set_value calls. - */ - WT_CURSOR **idx_cursors; + WT_CURSOR **cg_cursors; + WT_ITEM *cg_valcopy; /* + * Copies of column group values, for + * overlapping set_value calls. + */ + WT_CURSOR **idx_cursors; }; -#define WT_CURSOR_PRIMARY(cursor) \ - (((WT_CURSOR_TABLE *)(cursor))->cg_cursors[0]) +#define WT_CURSOR_PRIMARY(cursor) (((WT_CURSOR_TABLE *)(cursor))->cg_cursors[0]) -#define WT_CURSOR_RECNO(cursor) WT_STREQ((cursor)->key_format, "r") +#define WT_CURSOR_RECNO(cursor) WT_STREQ((cursor)->key_format, "r") -#define WT_CURSOR_RAW_OK \ - (WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW) +#define WT_CURSOR_RAW_OK (WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW) diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i index 0cb3708a030..730d69cbdc7 100644 --- a/src/third_party/wiredtiger/src/include/cursor.i +++ b/src/third_party/wiredtiger/src/include/cursor.i @@ -8,470 +8,450 @@ /* * __cursor_set_recno -- - * The cursor value in the interface has to track the value in the - * underlying cursor, update them in parallel. + * The cursor value in the interface has to track the value in the underlying cursor, update + * them in parallel. */ static inline void __cursor_set_recno(WT_CURSOR_BTREE *cbt, uint64_t v) { - cbt->iface.recno = cbt->recno = v; + cbt->iface.recno = cbt->recno = v; } /* * __cursor_novalue -- - * Release any cached value before an operation that could update the - * transaction context and free data a value is pointing to. + * Release any cached value before an operation that could update the transaction context and + * free data a value is pointing to. */ static inline void __cursor_novalue(WT_CURSOR *cursor) { - F_CLR(cursor, WT_CURSTD_VALUE_INT); + F_CLR(cursor, WT_CURSTD_VALUE_INT); } /* * __cursor_checkkey -- - * Check if a key is set without making a copy. + * Check if a key is set without making a copy. */ static inline int __cursor_checkkey(WT_CURSOR *cursor) { - return (F_ISSET(cursor, WT_CURSTD_KEY_SET) ? - 0 : __wt_cursor_kv_not_set(cursor, true)); + return (F_ISSET(cursor, WT_CURSTD_KEY_SET) ? 0 : __wt_cursor_kv_not_set(cursor, true)); } /* * __cursor_checkvalue -- - * Check if a value is set without making a copy. + * Check if a value is set without making a copy. */ static inline int __cursor_checkvalue(WT_CURSOR *cursor) { - return (F_ISSET(cursor, WT_CURSTD_VALUE_SET) ? - 0 : __wt_cursor_kv_not_set(cursor, false)); + return (F_ISSET(cursor, WT_CURSTD_VALUE_SET) ? 0 : __wt_cursor_kv_not_set(cursor, false)); } /* * __cursor_localkey -- - * If the key points into the tree, get a local copy. + * If the key points into the tree, get a local copy. */ static inline int __cursor_localkey(WT_CURSOR *cursor) { - if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) { - if (!WT_DATA_IN_ITEM(&cursor->key)) - WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, - &cursor->key, cursor->key.data, cursor->key.size)); - F_CLR(cursor, WT_CURSTD_KEY_INT); - F_SET(cursor, WT_CURSTD_KEY_EXT); - } - return (0); + if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + if (!WT_DATA_IN_ITEM(&cursor->key)) + WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, &cursor->key, cursor->key.data, + cursor->key.size)); + F_CLR(cursor, WT_CURSTD_KEY_INT); + F_SET(cursor, WT_CURSTD_KEY_EXT); + } + return (0); } /* * __cursor_localvalue -- - * If the value points into the tree, get a local copy. + * If the value points into the tree, get a local copy. */ static inline int __cursor_localvalue(WT_CURSOR *cursor) { - if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) { - if (!WT_DATA_IN_ITEM(&cursor->value)) - WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, - &cursor->value, - cursor->value.data, cursor->value.size)); - F_CLR(cursor, WT_CURSTD_VALUE_INT); - F_SET(cursor, WT_CURSTD_VALUE_EXT); - } - return (0); + if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) { + if (!WT_DATA_IN_ITEM(&cursor->value)) + WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, &cursor->value, + cursor->value.data, cursor->value.size)); + F_CLR(cursor, WT_CURSTD_VALUE_INT); + F_SET(cursor, WT_CURSTD_VALUE_EXT); + } + return (0); } /* * __cursor_needkey -- - * - * Check if we have a key set. There's an additional semantic here: if we're - * pointing into the tree, get a local copy of whatever we're referencing in - * the tree, there's an obvious race with the cursor moving and the reference. + * Check if we have a key set. There's an additional semantic here: if we're pointing into the + * tree, get a local copy of whatever we're referencing in the tree, there's an obvious race + * with the cursor moving and the reference. */ static inline int __cursor_needkey(WT_CURSOR *cursor) { - WT_RET(__cursor_localkey(cursor)); - return (__cursor_checkkey(cursor)); + WT_RET(__cursor_localkey(cursor)); + return (__cursor_checkkey(cursor)); } /* * __cursor_needvalue -- - * - * Check if we have a value set. There's an additional semantic here: if we're - * pointing into the tree, get a local copy of whatever we're referencing in - * the tree, there's an obvious race with the cursor moving and the reference. + * Check if we have a value set. There's an additional semantic here: if we're pointing into the + * tree, get a local copy of whatever we're referencing in the tree, there's an obvious race + * with the cursor moving and the reference. */ static inline int __cursor_needvalue(WT_CURSOR *cursor) { - WT_RET(__cursor_localvalue(cursor)); - return (__cursor_checkvalue(cursor)); + WT_RET(__cursor_localvalue(cursor)); + return (__cursor_checkvalue(cursor)); } /* * __cursor_pos_clear -- - * Reset the cursor's location. + * Reset the cursor's location. */ static inline void __cursor_pos_clear(WT_CURSOR_BTREE *cbt) { - /* - * Most of the cursor's location information that needs to be set on - * successful return is always set by a successful return, for example, - * we don't initialize the compare return value because it's always - * set by the row-store search. The other stuff gets cleared here, - * and it's a minimal set of things we need to clear. It would be a - * lot simpler to clear everything, but we call this function a lot. - */ - cbt->recno = WT_RECNO_OOB; - - cbt->ins = NULL; - cbt->ins_head = NULL; - cbt->ins_stack[0] = NULL; - - F_CLR(cbt, WT_CBT_POSITION_MASK); + /* + * Most of the cursor's location information that needs to be set on successful return is always + * set by a successful return, for example, we don't initialize the compare return value because + * it's always set by the row-store search. The other stuff gets cleared here, and it's a + * minimal set of things we need to clear. It would be a lot simpler to clear everything, but we + * call this function a lot. + */ + cbt->recno = WT_RECNO_OOB; + + cbt->ins = NULL; + cbt->ins_head = NULL; + cbt->ins_stack[0] = NULL; + + F_CLR(cbt, WT_CBT_POSITION_MASK); } /* * __cursor_enter -- - * Activate a cursor. + * Activate a cursor. */ static inline int __cursor_enter(WT_SESSION_IMPL *session) { - /* - * If there are no other cursors positioned in the session, check - * whether the cache is full. - */ - if (session->ncursors == 0) - WT_RET(__wt_cache_eviction_check(session, false, false, NULL)); - ++session->ncursors; - return (0); + /* + * If there are no other cursors positioned in the session, check whether the cache is full. + */ + if (session->ncursors == 0) + WT_RET(__wt_cache_eviction_check(session, false, false, NULL)); + ++session->ncursors; + return (0); } /* * __cursor_leave -- - * Deactivate a cursor. + * Deactivate a cursor. */ static inline void __cursor_leave(WT_SESSION_IMPL *session) { - /* - * Decrement the count of active cursors in the session. When that - * goes to zero, there are no active cursors, and we can release any - * snapshot we're holding for read committed isolation. - */ - WT_ASSERT(session, session->ncursors > 0); - if (--session->ncursors == 0) - __wt_txn_read_last(session); + /* + * Decrement the count of active cursors in the session. When that goes to zero, there are no + * active cursors, and we can release any snapshot we're holding for read committed isolation. + */ + WT_ASSERT(session, session->ncursors > 0); + if (--session->ncursors == 0) + __wt_txn_read_last(session); } /* * __cursor_reset -- - * Reset the cursor, it no longer holds any position. + * Reset the cursor, it no longer holds any position. */ static inline int __cursor_reset(WT_CURSOR_BTREE *cbt) { - WT_DECL_RET; - WT_SESSION_IMPL *session; - - session = (WT_SESSION_IMPL *)cbt->iface.session; - - __cursor_pos_clear(cbt); - - /* If the cursor was active, deactivate it. */ - if (F_ISSET(cbt, WT_CBT_ACTIVE)) { - if (!F_ISSET(cbt, WT_CBT_NO_TXN)) - __cursor_leave(session); - F_CLR(cbt, WT_CBT_ACTIVE); - } - - /* If we're not holding a cursor reference, we're done. */ - if (cbt->ref == NULL) - return (0); - - /* - * If we were scanning and saw a lot of deleted records on this page, - * try to evict the page when we release it. - */ - if (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) { - __wt_page_evict_soon(session, cbt->ref); - WT_STAT_CONN_INCR(session, cache_eviction_force_delete); - } - cbt->page_deleted_count = 0; - - /* - * Release any page references we're holding. This can trigger eviction - * (e.g., forced eviction of big pages), so it's important to do after - * releasing our snapshot above. - * - * Clear the reference regardless, so we don't try the release twice. - */ - ret = __wt_page_release(session, cbt->ref, 0); - cbt->ref = NULL; - - return (ret); + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)cbt->iface.session; + + __cursor_pos_clear(cbt); + + /* If the cursor was active, deactivate it. */ + if (F_ISSET(cbt, WT_CBT_ACTIVE)) { + if (!F_ISSET(cbt, WT_CBT_NO_TXN)) + __cursor_leave(session); + F_CLR(cbt, WT_CBT_ACTIVE); + } + + /* If we're not holding a cursor reference, we're done. */ + if (cbt->ref == NULL) + return (0); + + /* + * If we were scanning and saw a lot of deleted records on this page, try to evict the page when + * we release it. + */ + if (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) { + __wt_page_evict_soon(session, cbt->ref); + WT_STAT_CONN_INCR(session, cache_eviction_force_delete); + } + cbt->page_deleted_count = 0; + + /* + * Release any page references we're holding. This can trigger eviction + * (e.g., forced eviction of big pages), so it's important to do after + * releasing our snapshot above. + * + * Clear the reference regardless, so we don't try the release twice. + */ + ret = __wt_page_release(session, cbt->ref, 0); + cbt->ref = NULL; + + return (ret); } /* * __wt_curindex_get_valuev -- - * Internal implementation of WT_CURSOR->get_value for index cursors + * Internal implementation of WT_CURSOR->get_value for index cursors */ static inline int __wt_curindex_get_valuev(WT_CURSOR *cursor, va_list ap) { - WT_CURSOR_INDEX *cindex; - WT_ITEM *item; - WT_SESSION_IMPL *session; - - cindex = (WT_CURSOR_INDEX *)cursor; - session = (WT_SESSION_IMPL *)cursor->session; - WT_RET(__cursor_checkvalue(cursor)); - - if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) { - WT_RET(__wt_schema_project_merge(session, - cindex->cg_cursors, cindex->value_plan, - cursor->value_format, &cursor->value)); - item = va_arg(ap, WT_ITEM *); - item->data = cursor->value.data; - item->size = cursor->value.size; - } else - WT_RET(__wt_schema_project_out(session, - cindex->cg_cursors, cindex->value_plan, ap)); - return (0); + WT_CURSOR_INDEX *cindex; + WT_ITEM *item; + WT_SESSION_IMPL *session; + + cindex = (WT_CURSOR_INDEX *)cursor; + session = (WT_SESSION_IMPL *)cursor->session; + WT_RET(__cursor_checkvalue(cursor)); + + if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) { + WT_RET(__wt_schema_project_merge( + session, cindex->cg_cursors, cindex->value_plan, cursor->value_format, &cursor->value)); + item = va_arg(ap, WT_ITEM *); + item->data = cursor->value.data; + item->size = cursor->value.size; + } else + WT_RET(__wt_schema_project_out(session, cindex->cg_cursors, cindex->value_plan, ap)); + return (0); } /* * __wt_curtable_get_valuev -- - * Internal implementation of WT_CURSOR->get_value for table cursors. + * Internal implementation of WT_CURSOR->get_value for table cursors. */ static inline int __wt_curtable_get_valuev(WT_CURSOR *cursor, va_list ap) { - WT_CURSOR *primary; - WT_CURSOR_TABLE *ctable; - WT_ITEM *item; - WT_SESSION_IMPL *session; - - ctable = (WT_CURSOR_TABLE *)cursor; - session = (WT_SESSION_IMPL *)cursor->session; - primary = *ctable->cg_cursors; - WT_RET(__cursor_checkvalue(primary)); - - if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) { - WT_RET(__wt_schema_project_merge(session, - ctable->cg_cursors, ctable->plan, - cursor->value_format, &cursor->value)); - item = va_arg(ap, WT_ITEM *); - item->data = cursor->value.data; - item->size = cursor->value.size; - } else - WT_RET(__wt_schema_project_out(session, - ctable->cg_cursors, ctable->plan, ap)); - return (0); + WT_CURSOR *primary; + WT_CURSOR_TABLE *ctable; + WT_ITEM *item; + WT_SESSION_IMPL *session; + + ctable = (WT_CURSOR_TABLE *)cursor; + session = (WT_SESSION_IMPL *)cursor->session; + primary = *ctable->cg_cursors; + WT_RET(__cursor_checkvalue(primary)); + + if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) { + WT_RET(__wt_schema_project_merge( + session, ctable->cg_cursors, ctable->plan, cursor->value_format, &cursor->value)); + item = va_arg(ap, WT_ITEM *); + item->data = cursor->value.data; + item->size = cursor->value.size; + } else + WT_RET(__wt_schema_project_out(session, ctable->cg_cursors, ctable->plan, ap)); + return (0); } /* * __wt_cursor_dhandle_incr_use -- - * Increment the in-use counter in the cursor's data source. + * Increment the in-use counter in the cursor's data source. */ static inline void __wt_cursor_dhandle_incr_use(WT_SESSION_IMPL *session) { - WT_DATA_HANDLE *dhandle; + WT_DATA_HANDLE *dhandle; - dhandle = session->dhandle; + dhandle = session->dhandle; - /* If we open a handle with a time of death set, clear it. */ - if (__wt_atomic_addi32(&dhandle->session_inuse, 1) == 1 && - dhandle->timeofdeath != 0) - dhandle->timeofdeath = 0; + /* If we open a handle with a time of death set, clear it. */ + if (__wt_atomic_addi32(&dhandle->session_inuse, 1) == 1 && dhandle->timeofdeath != 0) + dhandle->timeofdeath = 0; } /* * __wt_cursor_dhandle_decr_use -- - * Decrement the in-use counter in the cursor's data source. + * Decrement the in-use counter in the cursor's data source. */ static inline void __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session) { - WT_DATA_HANDLE *dhandle; + WT_DATA_HANDLE *dhandle; - dhandle = session->dhandle; + dhandle = session->dhandle; - /* If we close a handle with a time of death set, clear it. */ - WT_ASSERT(session, dhandle->session_inuse > 0); - if (__wt_atomic_subi32(&dhandle->session_inuse, 1) == 0 && - dhandle->timeofdeath != 0) - dhandle->timeofdeath = 0; + /* If we close a handle with a time of death set, clear it. */ + WT_ASSERT(session, dhandle->session_inuse > 0); + if (__wt_atomic_subi32(&dhandle->session_inuse, 1) == 0 && dhandle->timeofdeath != 0) + dhandle->timeofdeath = 0; } /* * __cursor_kv_return -- - * Return a page referenced key/value pair to the application. + * Return a page referenced key/value pair to the application. */ static inline int -__cursor_kv_return( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +__cursor_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) { - WT_RET(__wt_key_return(session, cbt)); - WT_RET(__wt_value_return(session, cbt, upd)); + WT_RET(__wt_key_return(session, cbt)); + WT_RET(__wt_value_return(session, cbt, upd)); - return (0); + return (0); } /* * __cursor_func_init -- - * Cursor call setup. + * Cursor call setup. */ static inline int __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter) { - WT_SESSION_IMPL *session; + WT_SESSION_IMPL *session; - session = (WT_SESSION_IMPL *)cbt->iface.session; + session = (WT_SESSION_IMPL *)cbt->iface.session; - if (reenter) { + if (reenter) { #ifdef HAVE_DIAGNOSTIC - __wt_cursor_key_order_reset(cbt); + __wt_cursor_key_order_reset(cbt); #endif - WT_RET(__cursor_reset(cbt)); - } - - /* - * Any old insert position is now invalid. We rely on this being - * cleared to detect if a new skiplist is installed after a search. - */ - cbt->ins_stack[0] = NULL; - - /* If the transaction is idle, check that the cache isn't full. */ - WT_RET(__wt_txn_idle_cache_check(session)); - - /* Activate the file cursor. */ - if (!F_ISSET(cbt, WT_CBT_ACTIVE)) { - if (!F_ISSET(cbt, WT_CBT_NO_TXN)) - WT_RET(__cursor_enter(session)); - F_SET(cbt, WT_CBT_ACTIVE); - } - - /* - * If this is an ordinary transactional cursor, make sure we are set up - * to read. - */ - if (!F_ISSET(cbt, WT_CBT_NO_TXN)) - __wt_txn_cursor_op(session); - return (0); + WT_RET(__cursor_reset(cbt)); + } + + /* + * Any old insert position is now invalid. We rely on this being cleared to detect if a new + * skiplist is installed after a search. + */ + cbt->ins_stack[0] = NULL; + + /* If the transaction is idle, check that the cache isn't full. */ + WT_RET(__wt_txn_idle_cache_check(session)); + + /* Activate the file cursor. */ + if (!F_ISSET(cbt, WT_CBT_ACTIVE)) { + if (!F_ISSET(cbt, WT_CBT_NO_TXN)) + WT_RET(__cursor_enter(session)); + F_SET(cbt, WT_CBT_ACTIVE); + } + + /* + * If this is an ordinary transactional cursor, make sure we are set up to read. + */ + if (!F_ISSET(cbt, WT_CBT_NO_TXN)) + __wt_txn_cursor_op(session); + return (0); } /* * __cursor_row_slot_return -- - * Return a row-store leaf page slot's K/V pair. + * Return a row-store leaf page slot's K/V pair. */ static inline int __cursor_row_slot_return(WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_UPDATE *upd) { - WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack; - WT_ITEM *kb, *vb; - WT_PAGE *page; - WT_SESSION_IMPL *session; - void *copy; - - session = (WT_SESSION_IMPL *)cbt->iface.session; - btree = S2BT(session); - page = cbt->ref->page; - - kpack = NULL; - vpack = &_vpack; - - kb = &cbt->iface.key; - vb = &cbt->iface.value; - - /* - * The row-store key can change underfoot; explicitly take a copy. - */ - copy = WT_ROW_KEY_COPY(rip); - - /* - * Get a key: we could just call __wt_row_leaf_key, but as a cursor - * is running through the tree, we may have additional information - * here (we may have the fully-built key that's immediately before - * the prefix-compressed key we want, so it's a faster construction). - * - * First, check for an immediately available key. - */ - if (__wt_row_leaf_key_info( - page, copy, NULL, &cell, &kb->data, &kb->size)) - goto value; - - /* Huffman encoded keys are a slow path in all cases. */ - if (btree->huffman_key != NULL) - goto slow; - - /* - * Unpack the cell and deal with overflow and prefix-compressed keys. - * Inline building simple prefix-compressed keys from a previous key, - * otherwise build from scratch. - * - * Clear the key cell structure. It shouldn't be necessary (as far as I - * can tell, and we don't do it in lots of other places), but disabling - * shared builds (--disable-shared) results in the compiler complaining - * about uninitialized field use. - */ - kpack = &_kpack; - memset(kpack, 0, sizeof(*kpack)); - __wt_cell_unpack(session, page, cell, kpack); - if (kpack->type == WT_CELL_KEY && - cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) { - WT_ASSERT(session, cbt->row_key->size >= kpack->prefix); - - /* - * Grow the buffer as necessary as well as ensure data has been - * copied into local buffer space, then append the suffix to the - * prefix already in the buffer. - * - * Don't grow the buffer unnecessarily or copy data we don't - * need, truncate the item's data length to the prefix bytes. - */ - cbt->row_key->size = kpack->prefix; - WT_RET(__wt_buf_grow( - session, cbt->row_key, cbt->row_key->size + kpack->size)); - memcpy((uint8_t *)cbt->row_key->data + cbt->row_key->size, - kpack->data, kpack->size); - cbt->row_key->size += kpack->size; - } else { - /* - * Call __wt_row_leaf_key_work instead of __wt_row_leaf_key: we - * already did __wt_row_leaf_key's fast-path checks inline. - */ -slow: WT_RET(__wt_row_leaf_key_work( - session, page, rip, cbt->row_key, false)); - } - kb->data = cbt->row_key->data; - kb->size = cbt->row_key->size; - cbt->rip_saved = rip; + WT_BTREE *btree; + WT_CELL *cell; + WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack; + WT_ITEM *kb, *vb; + WT_PAGE *page; + WT_SESSION_IMPL *session; + void *copy; + + session = (WT_SESSION_IMPL *)cbt->iface.session; + btree = S2BT(session); + page = cbt->ref->page; + + kpack = NULL; + vpack = &_vpack; + + kb = &cbt->iface.key; + vb = &cbt->iface.value; + + /* + * The row-store key can change underfoot; explicitly take a copy. + */ + copy = WT_ROW_KEY_COPY(rip); + + /* + * Get a key: we could just call __wt_row_leaf_key, but as a cursor + * is running through the tree, we may have additional information + * here (we may have the fully-built key that's immediately before + * the prefix-compressed key we want, so it's a faster construction). + * + * First, check for an immediately available key. + */ + if (__wt_row_leaf_key_info(page, copy, NULL, &cell, &kb->data, &kb->size)) + goto value; + + /* Huffman encoded keys are a slow path in all cases. */ + if (btree->huffman_key != NULL) + goto slow; + + /* + * Unpack the cell and deal with overflow and prefix-compressed keys. + * Inline building simple prefix-compressed keys from a previous key, + * otherwise build from scratch. + * + * Clear the key cell structure. It shouldn't be necessary (as far as I + * can tell, and we don't do it in lots of other places), but disabling + * shared builds (--disable-shared) results in the compiler complaining + * about uninitialized field use. + */ + kpack = &_kpack; + memset(kpack, 0, sizeof(*kpack)); + __wt_cell_unpack(session, page, cell, kpack); + if (kpack->type == WT_CELL_KEY && cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) { + WT_ASSERT(session, cbt->row_key->size >= kpack->prefix); + + /* + * Grow the buffer as necessary as well as ensure data has been + * copied into local buffer space, then append the suffix to the + * prefix already in the buffer. + * + * Don't grow the buffer unnecessarily or copy data we don't + * need, truncate the item's data length to the prefix bytes. + */ + cbt->row_key->size = kpack->prefix; + WT_RET(__wt_buf_grow(session, cbt->row_key, cbt->row_key->size + kpack->size)); + memcpy((uint8_t *)cbt->row_key->data + cbt->row_key->size, kpack->data, kpack->size); + cbt->row_key->size += kpack->size; + } else { + /* + * Call __wt_row_leaf_key_work instead of __wt_row_leaf_key: we already did __wt_row_leaf_key's + * fast-path checks inline. + */ +slow: + WT_RET(__wt_row_leaf_key_work(session, page, rip, cbt->row_key, false)); + } + kb->data = cbt->row_key->data; + kb->size = cbt->row_key->size; + cbt->rip_saved = rip; value: - /* - * If the item was ever modified, use the WT_UPDATE data. Note the - * caller passes us the update: it has already resolved which one - * (if any) is visible. - */ - if (upd != NULL) - return (__wt_value_return(session, cbt, upd)); - - /* Else, simple values have their location encoded in the WT_ROW. */ - if (__wt_row_leaf_value(page, rip, vb)) - return (0); - - /* Else, take the value from the original page cell. */ - __wt_row_leaf_value_cell(session, page, rip, kpack, vpack); - return (__wt_page_cell_data_ref(session, cbt->ref->page, vpack, vb)); + /* + * If the item was ever modified, use the WT_UPDATE data. Note the + * caller passes us the update: it has already resolved which one + * (if any) is visible. + */ + if (upd != NULL) + return (__wt_value_return(session, cbt, upd)); + + /* Else, simple values have their location encoded in the WT_ROW. */ + if (__wt_row_leaf_value(page, rip, vb)) + return (0); + + /* Else, take the value from the original page cell. */ + __wt_row_leaf_value_cell(session, page, rip, kpack, vpack); + return (__wt_page_cell_data_ref(session, cbt->ref->page, vpack, vb)); } diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h index c6518a715f3..76bc3987024 100644 --- a/src/third_party/wiredtiger/src/include/dhandle.h +++ b/src/third_party/wiredtiger/src/include/dhandle.h @@ -7,118 +7,108 @@ */ /* - * Helpers for calling a function with a data handle in session->dhandle - * then restoring afterwards. + * Helpers for calling a function with a data handle in session->dhandle then restoring afterwards. */ -#define WT_WITH_DHANDLE(s, d, e) do { \ - WT_DATA_HANDLE *__saved_dhandle = (s)->dhandle; \ - (s)->dhandle = (d); \ - e; \ - (s)->dhandle = __saved_dhandle; \ -} while (0) +#define WT_WITH_DHANDLE(s, d, e) \ + do { \ + WT_DATA_HANDLE *__saved_dhandle = (s)->dhandle; \ + (s)->dhandle = (d); \ + e; \ + (s)->dhandle = __saved_dhandle; \ + } while (0) -#define WT_WITH_BTREE(s, b, e) WT_WITH_DHANDLE(s, (b)->dhandle, e) +#define WT_WITH_BTREE(s, b, e) WT_WITH_DHANDLE(s, (b)->dhandle, e) /* Call a function without the caller's data handle, restore afterwards. */ -#define WT_WITHOUT_DHANDLE(s, e) WT_WITH_DHANDLE(s, NULL, e) +#define WT_WITHOUT_DHANDLE(s, e) WT_WITH_DHANDLE(s, NULL, e) /* - * Call a function with the caller's data handle, restore it afterwards in case - * it is overwritten. + * Call a function with the caller's data handle, restore it afterwards in case it is overwritten. */ -#define WT_SAVE_DHANDLE(s, e) WT_WITH_DHANDLE(s, (s)->dhandle, e) +#define WT_SAVE_DHANDLE(s, e) WT_WITH_DHANDLE(s, (s)->dhandle, e) /* Check if a handle is inactive. */ -#define WT_DHANDLE_INACTIVE(dhandle) \ - (F_ISSET(dhandle, WT_DHANDLE_DEAD) || \ - !F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_OPEN)) +#define WT_DHANDLE_INACTIVE(dhandle) \ + (F_ISSET(dhandle, WT_DHANDLE_DEAD) || !F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_OPEN)) /* Check if a handle could be reopened. */ -#define WT_DHANDLE_CAN_REOPEN(dhandle) \ - (!WT_DHANDLE_INACTIVE(dhandle) && \ - F_ISSET(dhandle, WT_DHANDLE_OPEN) && \ - !F_ISSET(dhandle, WT_DHANDLE_DROPPED)) +#define WT_DHANDLE_CAN_REOPEN(dhandle) \ + (!WT_DHANDLE_INACTIVE(dhandle) && F_ISSET(dhandle, WT_DHANDLE_OPEN) && \ + !F_ISSET(dhandle, WT_DHANDLE_DROPPED)) /* The metadata cursor's data handle. */ -#define WT_SESSION_META_DHANDLE(s) \ - (((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle) - -#define WT_DHANDLE_ACQUIRE(dhandle) \ - (void)__wt_atomic_add32(&(dhandle)->session_ref, 1) - -#define WT_DHANDLE_RELEASE(dhandle) \ - (void)__wt_atomic_sub32(&(dhandle)->session_ref, 1) - -#define WT_DHANDLE_NEXT(session, dhandle, head, field) do { \ - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));\ - if ((dhandle) == NULL) \ - (dhandle) = TAILQ_FIRST(head); \ - else { \ - WT_DHANDLE_RELEASE(dhandle); \ - (dhandle) = TAILQ_NEXT(dhandle, field); \ - } \ - if ((dhandle) != NULL) \ - WT_DHANDLE_ACQUIRE(dhandle); \ -} while (0) +#define WT_SESSION_META_DHANDLE(s) (((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle) + +#define WT_DHANDLE_ACQUIRE(dhandle) (void)__wt_atomic_add32(&(dhandle)->session_ref, 1) + +#define WT_DHANDLE_RELEASE(dhandle) (void)__wt_atomic_sub32(&(dhandle)->session_ref, 1) + +#define WT_DHANDLE_NEXT(session, dhandle, head, field) \ + do { \ + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \ + if ((dhandle) == NULL) \ + (dhandle) = TAILQ_FIRST(head); \ + else { \ + WT_DHANDLE_RELEASE(dhandle); \ + (dhandle) = TAILQ_NEXT(dhandle, field); \ + } \ + if ((dhandle) != NULL) \ + WT_DHANDLE_ACQUIRE(dhandle); \ + } while (0) /* * WT_DATA_HANDLE -- * A handle for a generic named data source. */ struct __wt_data_handle { - WT_RWLOCK rwlock; /* Lock for shared/exclusive ops */ - TAILQ_ENTRY(__wt_data_handle) q; - TAILQ_ENTRY(__wt_data_handle) hashq; - - const char *name; /* Object name as a URI */ - uint64_t name_hash; /* Hash of name */ - const char *checkpoint; /* Checkpoint name (or NULL) */ - const char **cfg; /* Configuration information */ - - /* - * Sessions holding a connection's data handle will have a non-zero - * reference count; sessions using a connection's data handle will - * have a non-zero in-use count. Instances of cached cursors referencing - * the data handle appear in session_cache_ref. - */ - uint32_t session_ref; /* Sessions referencing this handle */ - int32_t session_inuse; /* Sessions using this handle */ - uint32_t excl_ref; /* Refs of handle by excl_session */ - uint64_t timeofdeath; /* Use count went to 0 */ - WT_SESSION_IMPL *excl_session; /* Session with exclusive use, if any */ - - WT_DATA_SOURCE *dsrc; /* Data source for this handle */ - void *handle; /* Generic handle */ - - enum { - WT_DHANDLE_TYPE_BTREE, - WT_DHANDLE_TYPE_TABLE - } type; - - bool compact_skip; /* If the handle failed to compact */ - - /* - * Data handles can be closed without holding the schema lock; threads - * walk the list of open handles, operating on them (checkpoint is the - * best example). To avoid sources disappearing underneath checkpoint, - * lock the data handle when closing it. - */ - WT_SPINLOCK close_lock; /* Lock to close the handle */ - - /* Data-source statistics */ - WT_DSRC_STATS *stats[WT_COUNTER_SLOTS]; - WT_DSRC_STATS *stat_array; - - /* Flags values over 0xff are reserved for WT_BTREE_* */ + WT_RWLOCK rwlock; /* Lock for shared/exclusive ops */ + TAILQ_ENTRY(__wt_data_handle) q; + TAILQ_ENTRY(__wt_data_handle) hashq; + + const char *name; /* Object name as a URI */ + uint64_t name_hash; /* Hash of name */ + const char *checkpoint; /* Checkpoint name (or NULL) */ + const char **cfg; /* Configuration information */ + + /* + * Sessions holding a connection's data handle will have a non-zero reference count; sessions + * using a connection's data handle will have a non-zero in-use count. Instances of cached + * cursors referencing the data handle appear in session_cache_ref. + */ + uint32_t session_ref; /* Sessions referencing this handle */ + int32_t session_inuse; /* Sessions using this handle */ + uint32_t excl_ref; /* Refs of handle by excl_session */ + uint64_t timeofdeath; /* Use count went to 0 */ + WT_SESSION_IMPL *excl_session; /* Session with exclusive use, if any */ + + WT_DATA_SOURCE *dsrc; /* Data source for this handle */ + void *handle; /* Generic handle */ + + enum { WT_DHANDLE_TYPE_BTREE, WT_DHANDLE_TYPE_TABLE } type; + + bool compact_skip; /* If the handle failed to compact */ + + /* + * Data handles can be closed without holding the schema lock; threads walk the list of open + * handles, operating on them (checkpoint is the best example). To avoid sources disappearing + * underneath checkpoint, lock the data handle when closing it. + */ + WT_SPINLOCK close_lock; /* Lock to close the handle */ + + /* Data-source statistics */ + WT_DSRC_STATS *stats[WT_COUNTER_SLOTS]; + WT_DSRC_STATS *stat_array; + +/* Flags values over 0xff are reserved for WT_BTREE_* */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_DHANDLE_DEAD 0x01u /* Dead, awaiting discard */ -#define WT_DHANDLE_DISCARD 0x02u /* Close on release */ -#define WT_DHANDLE_DISCARD_KILL 0x04u /* Mark dead on release */ -#define WT_DHANDLE_DROPPED 0x08u /* Handle is dropped */ -#define WT_DHANDLE_EXCLUSIVE 0x10u /* Exclusive access */ -#define WT_DHANDLE_IS_METADATA 0x20u /* Metadata handle */ -#define WT_DHANDLE_LOCK_ONLY 0x40u /* Handle only used as a lock */ -#define WT_DHANDLE_OPEN 0x80u /* Handle is open */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_DHANDLE_DEAD 0x01u /* Dead, awaiting discard */ +#define WT_DHANDLE_DISCARD 0x02u /* Close on release */ +#define WT_DHANDLE_DISCARD_KILL 0x04u /* Mark dead on release */ +#define WT_DHANDLE_DROPPED 0x08u /* Handle is dropped */ +#define WT_DHANDLE_EXCLUSIVE 0x10u /* Exclusive access */ +#define WT_DHANDLE_IS_METADATA 0x20u /* Metadata handle */ +#define WT_DHANDLE_LOCK_ONLY 0x40u /* Handle only used as a lock */ +#define WT_DHANDLE_OPEN 0x80u /* Handle is open */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/dlh.h b/src/third_party/wiredtiger/src/include/dlh.h index a8fdab98415..cd037ad7656 100644 --- a/src/third_party/wiredtiger/src/include/dlh.h +++ b/src/third_party/wiredtiger/src/include/dlh.h @@ -7,10 +7,10 @@ */ struct __wt_dlh { - TAILQ_ENTRY(__wt_dlh) q; /* List of open libraries. */ + TAILQ_ENTRY(__wt_dlh) q; /* List of open libraries. */ - void *handle; /* Handle returned by dlopen. */ - char *name; + void *handle; /* Handle returned by dlopen. */ + char *name; - int (*terminate)(WT_CONNECTION *); /* Terminate function. */ + int (*terminate)(WT_CONNECTION *); /* Terminate function. */ }; diff --git a/src/third_party/wiredtiger/src/include/error.h b/src/third_party/wiredtiger/src/include/error.h index 620c581c3f7..5493852c855 100644 --- a/src/third_party/wiredtiger/src/include/error.h +++ b/src/third_party/wiredtiger/src/include/error.h @@ -5,147 +5,175 @@ * * See the file LICENSE for redistribution information. */ -#define WT_COMPAT_MSG_PREFIX "Version incompatibility detected: " +#define WT_COMPAT_MSG_PREFIX "Version incompatibility detected: " -#define WT_DEBUG_POINT ((void *)(uintptr_t)0xdeadbeef) -#define WT_DEBUG_BYTE (0xab) +#define WT_DEBUG_POINT ((void *)(uintptr_t)0xdeadbeef) +#define WT_DEBUG_BYTE (0xab) /* In DIAGNOSTIC mode, yield in places where we want to encourage races. */ #ifdef HAVE_DIAGNOSTIC -#define WT_DIAGNOSTIC_YIELD do { \ - __wt_yield(); \ -} while (0) +#define WT_DIAGNOSTIC_YIELD \ + do { \ + __wt_yield(); \ + } while (0) #else -#define WT_DIAGNOSTIC_YIELD +#define WT_DIAGNOSTIC_YIELD #endif -#define __wt_err(session, error, ...) \ - __wt_err_func(session, error, __func__, __LINE__, __VA_ARGS__) -#define __wt_errx(session, ...) \ - __wt_errx_func(session, __func__, __LINE__, __VA_ARGS__) -#define __wt_set_return(session, error) \ - __wt_set_return_func(session, __func__, __LINE__, error) +#define __wt_err(session, error, ...) __wt_err_func(session, error, __func__, __LINE__, __VA_ARGS__) +#define __wt_errx(session, ...) __wt_errx_func(session, __func__, __LINE__, __VA_ARGS__) +#define __wt_set_return(session, error) __wt_set_return_func(session, __func__, __LINE__, error) /* Set "ret" and branch-to-err-label tests. */ -#define WT_ERR(a) do { \ - if ((ret = (a)) != 0) \ - goto err; \ -} while (0) -#define WT_ERR_MSG(session, v, ...) do { \ - ret = (v); \ - __wt_err(session, ret, __VA_ARGS__); \ - goto err; \ -} while (0) -#define WT_ERR_TEST(a, v) do { \ - if (a) { \ - ret = (v); \ - goto err; \ - } else \ - ret = 0; \ -} while (0) -#define WT_ERR_ERROR_OK(a, e) \ - WT_ERR_TEST((ret = (a)) != 0 && ret != (e), ret) -#define WT_ERR_BUSY_OK(a) WT_ERR_ERROR_OK(a, EBUSY) -#define WT_ERR_NOTFOUND_OK(a) WT_ERR_ERROR_OK(a, WT_NOTFOUND) +#define WT_ERR(a) \ + do { \ + if ((ret = (a)) != 0) \ + goto err; \ + } while (0) +#define WT_ERR_MSG(session, v, ...) \ + do { \ + ret = (v); \ + __wt_err(session, ret, __VA_ARGS__); \ + goto err; \ + } while (0) +#define WT_ERR_TEST(a, v) \ + do { \ + if (a) { \ + ret = (v); \ + goto err; \ + } else \ + ret = 0; \ + } while (0) +#define WT_ERR_ERROR_OK(a, e) WT_ERR_TEST((ret = (a)) != 0 && ret != (e), ret) +#define WT_ERR_BUSY_OK(a) WT_ERR_ERROR_OK(a, EBUSY) +#define WT_ERR_NOTFOUND_OK(a) WT_ERR_ERROR_OK(a, WT_NOTFOUND) /* Return tests. */ -#define WT_RET(a) do { \ - int __ret; \ - if ((__ret = (a)) != 0) \ - return (__ret); \ -} while (0) -#define WT_RET_TRACK(a) do { \ - int __ret; \ - if ((__ret = (a)) != 0) { \ - WT_TRACK_OP_END(session); \ - return (__ret); \ - } \ -} while (0) -#define WT_RET_MSG(session, v, ...) do { \ - int __ret = (v); \ - __wt_err(session, __ret, __VA_ARGS__); \ - return (__ret); \ -} while (0) -#define WT_RET_TEST(a, v) do { \ - if (a) \ - return (v); \ -} while (0) -#define WT_RET_ERROR_OK(a, e) do { \ - int __ret = (a); \ - WT_RET_TEST(__ret != 0 && __ret != (e), __ret); \ -} while (0) -#define WT_RET_BUSY_OK(a) WT_RET_ERROR_OK(a, EBUSY) -#define WT_RET_NOTFOUND_OK(a) WT_RET_ERROR_OK(a, WT_NOTFOUND) +#define WT_RET(a) \ + do { \ + int __ret; \ + if ((__ret = (a)) != 0) \ + return (__ret); \ + } while (0) +#define WT_RET_TRACK(a) \ + do { \ + int __ret; \ + if ((__ret = (a)) != 0) { \ + WT_TRACK_OP_END(session); \ + return (__ret); \ + } \ + } while (0) +#define WT_RET_MSG(session, v, ...) \ + do { \ + int __ret = (v); \ + __wt_err(session, __ret, __VA_ARGS__); \ + return (__ret); \ + } while (0) +#define WT_RET_TEST(a, v) \ + do { \ + if (a) \ + return (v); \ + } while (0) +#define WT_RET_ERROR_OK(a, e) \ + do { \ + int __ret = (a); \ + WT_RET_TEST(__ret != 0 && __ret != (e), __ret); \ + } while (0) +#define WT_RET_BUSY_OK(a) WT_RET_ERROR_OK(a, EBUSY) +#define WT_RET_NOTFOUND_OK(a) WT_RET_ERROR_OK(a, WT_NOTFOUND) /* Set "ret" if not already set. */ -#define WT_TRET(a) do { \ - int __ret; \ - if ((__ret = (a)) != 0 && \ - (__ret == WT_PANIC || \ - ret == 0 || ret == WT_DUPLICATE_KEY || \ - ret == WT_NOTFOUND || ret == WT_RESTART)) \ - ret = __ret; \ -} while (0) -#define WT_TRET_ERROR_OK(a, e) do { \ - int __ret; \ - if ((__ret = (a)) != 0 && __ret != (e) && \ - (__ret == WT_PANIC || \ - ret == 0 || ret == WT_DUPLICATE_KEY || \ - ret == WT_NOTFOUND || ret == WT_RESTART)) \ - ret = __ret; \ -} while (0) -#define WT_TRET_BUSY_OK(a) WT_TRET_ERROR_OK(a, EBUSY) -#define WT_TRET_NOTFOUND_OK(a) WT_TRET_ERROR_OK(a, WT_NOTFOUND) +#define WT_TRET(a) \ + do { \ + int __ret; \ + if ((__ret = (a)) != 0 && (__ret == WT_PANIC || ret == 0 || ret == WT_DUPLICATE_KEY || \ + ret == WT_NOTFOUND || ret == WT_RESTART)) \ + ret = __ret; \ + } while (0) +#define WT_TRET_ERROR_OK(a, e) \ + do { \ + int __ret; \ + if ((__ret = (a)) != 0 && __ret != (e) && \ + (__ret == WT_PANIC || ret == 0 || ret == WT_DUPLICATE_KEY || ret == WT_NOTFOUND || \ + ret == WT_RESTART)) \ + ret = __ret; \ + } while (0) +#define WT_TRET_BUSY_OK(a) WT_TRET_ERROR_OK(a, EBUSY) +#define WT_TRET_NOTFOUND_OK(a) WT_TRET_ERROR_OK(a, WT_NOTFOUND) /* Called on unexpected code path: locate the failure. */ -#define __wt_illegal_value(session, v) \ - __wt_illegal_value_func(session, (uintmax_t)(v), __func__, __LINE__) +#define __wt_illegal_value(session, v) \ + __wt_illegal_value_func(session, (uintmax_t)(v), __func__, __LINE__) -#define WT_PANIC_MSG(session, v, ...) do { \ - __wt_err(session, v, __VA_ARGS__); \ - WT_IGNORE_RET(__wt_panic(session)); \ -} while (0) -#define WT_PANIC_ERR(session, v, ...) do { \ - WT_PANIC_MSG(session, v, __VA_ARGS__); \ - /* Return WT_PANIC regardless of earlier return codes. */ \ - WT_ERR(WT_PANIC); \ -} while (0) -#define WT_PANIC_RET(session, v, ...) do { \ - WT_PANIC_MSG(session, v, __VA_ARGS__); \ - /* Return WT_PANIC regardless of earlier return codes. */ \ - return (WT_PANIC); \ -} while (0) +#define WT_PANIC_MSG(session, v, ...) \ + do { \ + __wt_err(session, v, __VA_ARGS__); \ + WT_IGNORE_RET(__wt_panic(session)); \ + } while (0) +#define WT_PANIC_ERR(session, v, ...) \ + do { \ + WT_PANIC_MSG(session, v, __VA_ARGS__); \ + /* Return WT_PANIC regardless of earlier return codes. */ \ + WT_ERR(WT_PANIC); \ + } while (0) +#define WT_PANIC_RET(session, v, ...) \ + do { \ + WT_PANIC_MSG(session, v, __VA_ARGS__); \ + /* Return WT_PANIC regardless of earlier return codes. */ \ + return (WT_PANIC); \ + } while (0) /* - * WT_ASSERT - * Assert an expression, aborting in diagnostic mode. Otherwise, - * "use" the session to keep the compiler quiet and don't evaluate the - * expression. + * WT_ERR_ASSERT, WT_RET_ASSERT, WT_ASSERT + * Assert an expression, aborting in diagnostic mode and otherwise exiting + * the function with an error. WT_ASSERT is deprecated, and should be used only + * where required for performance. */ #ifdef HAVE_DIAGNOSTIC -#define WT_ASSERT(session, exp) do { \ - if (!(exp)) { \ - __wt_errx(session, "%s", #exp); \ - __wt_abort(session); \ - } \ -} while (0) +#define WT_ASSERT(session, exp) \ + do { \ + if (!(exp)) { \ + __wt_errx(session, "%s", #exp); \ + __wt_abort(session); \ + } \ + } while (0) +#define WT_ERR_ASSERT(session, exp, v, ...) \ + do { \ + if (!(exp)) { \ + __wt_err(session, v, __VA_ARGS__); \ + __wt_abort(session); \ + } \ + } while (0) +#define WT_RET_ASSERT(session, exp, v, ...) \ + do { \ + if (!(exp)) { \ + __wt_err(session, v, __VA_ARGS__); \ + __wt_abort(session); \ + } \ + } while (0) #else -#define WT_ASSERT(session, exp) \ - WT_UNUSED(session) +#define WT_ASSERT(session, exp) WT_UNUSED(session) +#define WT_ERR_ASSERT(session, exp, v, ...) \ + do { \ + if (!(exp)) \ + WT_ERR_MSG(session, v, __VA_ARGS__); \ + } while (0) +#define WT_RET_ASSERT(session, exp, v, ...) \ + do { \ + if (!(exp)) \ + WT_RET_MSG(session, v, __VA_ARGS__); \ + } while (0) #endif /* * __wt_verbose -- - * Display a verbose message. - * - * Not an inlined function because you can't inline functions taking variadic - * arguments and we don't want to make a function call in production systems - * just to find out a verbose flag isn't set. - * - * The macro must take a format string and at least one additional argument, - * there's no portable way to remove the comma before an empty __VA_ARGS__ - * value. + * Display a verbose message. Not an inlined function because you can't inline functions taking + * variadic arguments and we don't want to make a function call in production systems just to + * find out a verbose flag isn't set. The macro must take a format string and at least one + * additional argument, there's no portable way to remove the comma before an empty __VA_ARGS__ + * value. */ -#define __wt_verbose(session, flag, fmt, ...) do { \ - if (WT_VERBOSE_ISSET(session, flag)) \ - __wt_verbose_worker(session, fmt, __VA_ARGS__); \ -} while (0) +#define __wt_verbose(session, flag, fmt, ...) \ + do { \ + if (WT_VERBOSE_ISSET(session, flag)) \ + __wt_verbose_worker(session, fmt, __VA_ARGS__); \ + } while (0) diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 5dbd7115684..2b2b089a18c 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -1,750 +1,1558 @@ -extern WT_DATA_SOURCE * __wt_schema_get_source(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern WT_HAZARD * __wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session, const char *name) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern WT_HAZARD *__wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref, + WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern WT_THREAD_RET __wt_async_worker(void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern WT_THREAD_RET __wt_cache_pool_server(void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern WT_UPDATE * __wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd, bool update_accounting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_btree_immediately_durable(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_checksum_alt_match(const void *chunk, size_t len, uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_evict_thread_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_fsync_background_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_gen_active(WT_SESSION_IMPL *session, int which, uint64_t generation) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern WT_THREAD_RET __wt_cache_pool_server(void *arg) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern WT_UPDATE *__wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_PAGE *page, + WT_UPDATE *upd, bool update_accounting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_btree_immediately_durable(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_checksum_alt_match(const void *chunk, size_t len, uint32_t v) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_evict_thread_chk(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_fsync_background_chk(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_gen_active(WT_SESSION_IMPL *session, int which, uint64_t generation) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_ispo2(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_las_empty(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_lsm_chunk_visible_all( WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern char * __wt_timestamp_to_string(wt_timestamp_t ts, char *ts_string) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const WT_CONFIG_ENTRY * __wt_conn_config_match(const char *method) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_buf_set_printable( WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_buf_set_printable_format(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *format, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_buf_set_size( WT_SESSION_IMPL *session, uint64_t size, bool exact, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_cell_type_string(uint8_t type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, int error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_json_tokname(int toktype) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_page_type_string(u_int type) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_session_strerror(WT_SESSION *wt_session, int error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_wiredtiger_error(int error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, WT_CURSOR *cur, WT_CURSOR_TABLE *ctable, int (*f)(WT_CURSOR *)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_async_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_async_flush(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_async_op_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_backup_file_remove(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_addr_invalid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool live) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_addr_string(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_addr_to_buffer(WT_BLOCK *block, uint8_t **pp, wt_off_t offset, uint32_t size, uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_alloc( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_buffer_to_addr(WT_BLOCK *block, const uint8_t *p, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *p, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_checkpoint(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, WT_CKPT *ckptbase, bool data_checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_checkpoint_final(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t **file_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_checkpoint_last(WT_SESSION_IMPL *session, WT_BLOCK *block, char **metadatap, char **checkpoint_listp, WT_ITEM *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_checkpoint_resolve( WT_SESSION_IMPL *session, WT_BLOCK *block, bool failed) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_checkpoint_start(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_checkpoint_unload( WT_SESSION_IMPL *session, WT_BLOCK *block, bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_ckpt_decode(WT_SESSION *wt_session, size_t allocsize, const uint8_t *p, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_ckpt_init( WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **pp, WT_BLOCK_CKPT *ci, bool skip_avail) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_compact_page_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_ext_discard(WT_SESSION_IMPL *session, u_int max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_ext_prealloc(WT_SESSION_IMPL *session, u_int max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_extlist_check( WT_SESSION_IMPL *session, WT_EXTLIST *al, WT_EXTLIST *bl) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_extlist_init(WT_SESSION_IMPL *session, WT_EXTLIST *el, const char *name, const char *extname, bool track_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *a, WT_EXTLIST *b) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_extlist_overlap( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_extlist_read(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_extlist_read_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_extlist_truncate( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, WT_EXTLIST *additional) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_free(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_insert_ext(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_manager_drop( WT_SESSION_IMPL *session, const char *filename, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_misplaced( WT_SESSION_IMPL *session, WT_BLOCK *block, const char *list, wt_off_t offset, uint32_t size, bool live, const char *func, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_off_free( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_read_off_blind(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, uint32_t *sizep, uint32_t *checksump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_salvage_next(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *eofp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_salvage_valid(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t addr_size, bool valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_verify_start(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_checksum, bool checkpoint_io) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump, bool data_checksum, bool checkpoint_io, bool caller_locked) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bloom_close(WT_BLOOM *bloom) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bloom_create( WT_SESSION_IMPL *session, const char *uri, const char *config, uint64_t count, uint32_t factor, uint32_t k, WT_BLOOM **bloomp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bloom_drop(WT_BLOOM *bloom, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bloom_finalize(WT_BLOOM *bloom) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bloom_get(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bloom_inmem_get(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bloom_intersection(WT_BLOOM *bloom, WT_BLOOM *other) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bloom_open(WT_SESSION_IMPL *session, const char *uri, uint32_t factor, uint32_t k, WT_CURSOR *owner, WT_BLOOM **bloomp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bm_corrupt(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bm_preload( WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, size_t *compressed_sizep, bool checkpoint, bool checkpoint_io, bool compressed) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_las_empty(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_lsm_chunk_visible_all(WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern char *__wt_timestamp_to_string(wt_timestamp_t ts, char *ts_string) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const WT_CONFIG_ENTRY *__wt_conn_config_match(const char *method) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, + WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_buf_set_printable(WT_SESSION_IMPL *session, const void *p, size_t size, + WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_buf_set_printable_format(WT_SESSION_IMPL *session, const void *buffer, + size_t size, const char *format, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_buf_set_size(WT_SESSION_IMPL *session, uint64_t size, bool exact, + WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_cell_type_string(uint8_t type) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, int error) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_json_tokname(int toktype) WT_GCC_FUNC_DECL_ATTRIBUTE( + (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_page_type_string(u_int type) WT_GCC_FUNC_DECL_ATTRIBUTE( + (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_session_strerror(WT_SESSION *wt_session, int error) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_wiredtiger_error(int error) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, WT_CURSOR *cur, + WT_CURSOR_TABLE *ctable, int (*f)(WT_CURSOR *)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_flush(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config, + WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_op_init(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_backup_file_remove(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri) + WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_addr_invalid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, + size_t addr_size, bool live) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_addr_string(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, + const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_addr_to_buffer(WT_BLOCK *block, uint8_t **pp, wt_off_t offset, uint32_t size, + uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_alloc(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, + wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_buffer_to_addr(WT_BLOCK *block, const uint8_t *p, wt_off_t *offsetp, + uint32_t *sizep, uint32_t *checksump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *p, + WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_checkpoint(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, + WT_CKPT *ckptbase, bool data_checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_checkpoint_final(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, + uint8_t **file_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_checkpoint_last(WT_SESSION_IMPL *session, WT_BLOCK *block, char **metadatap, + char **checkpoint_listp, WT_ITEM *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, + const uint8_t *addr, size_t addr_size, uint8_t *root_addr, size_t *root_addr_sizep, + bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_checkpoint_resolve(WT_SESSION_IMPL *session, WT_BLOCK *block, bool failed) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_checkpoint_start(WT_SESSION_IMPL *session, WT_BLOCK *block) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_checkpoint_unload(WT_SESSION_IMPL *session, WT_BLOCK *block, bool checkpoint) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_ckpt_decode(WT_SESSION *wt_session, size_t allocsize, const uint8_t *p, + WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_ckpt_init(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci, const char *name) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **pp, + WT_BLOCK_CKPT *ci, bool skip_avail) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_compact_page_skip( + WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_ext_discard(WT_SESSION_IMPL *session, u_int max) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_ext_prealloc(WT_SESSION_IMPL *session, u_int max) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_check(WT_SESSION_IMPL *session, WT_EXTLIST *al, WT_EXTLIST *bl) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_init(WT_SESSION_IMPL *session, WT_EXTLIST *el, const char *name, + const char *extname, bool track_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *a, + WT_EXTLIST *b) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_read(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, + wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_read_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, + wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, + WT_EXTLIST *additional) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_free(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, + size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_insert_ext(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, + wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_manager_create(WT_SESSION_IMPL *session, const char *filename, + uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename, bool durable) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_manager_named_size(WT_SESSION_IMPL *session, const char *name, + wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, + const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, + size_t *lengthp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_misplaced(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *list, + wt_off_t offset, uint32_t size, bool live, const char *func, int line) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_off_free(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, + wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, + wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], + bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, + wt_off_t offset, uint32_t size, uint32_t checksum) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_read_off_blind(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, + uint32_t *sizep, uint32_t *checksump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_salvage_next(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, + size_t *addr_sizep, bool *eofp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_salvage_valid(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, + size_t addr_size, bool valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, + size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, + size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_verify_start(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, + const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, + size_t *addr_sizep, bool data_checksum, bool checkpoint_io) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, + wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump, bool data_checksum, bool checkpoint_io, + bool caller_locked) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bloom_close(WT_BLOOM *bloom) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bloom_create(WT_SESSION_IMPL *session, const char *uri, const char *config, + uint64_t count, uint32_t factor, uint32_t k, WT_BLOOM **bloomp) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bloom_drop(WT_BLOOM *bloom, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE( + (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bloom_finalize(WT_BLOOM *bloom) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bloom_get(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE( + (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bloom_inmem_get(WT_BLOOM *bloom, WT_ITEM *key) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bloom_intersection(WT_BLOOM *bloom, WT_BLOOM *other) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bloom_open(WT_SESSION_IMPL *session, const char *uri, uint32_t factor, uint32_t k, + WT_CURSOR *owner, WT_BLOOM **bloomp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bm_corrupt(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, + size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bm_preload(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, + size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, + size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, + size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, + size_t *compressed_sizep, bool checkpoint, bool checkpoint_io, bool compressed) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btcur_search_uncommitted(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_search_uncommitted(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btree_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btree_config_encryptor(WT_SESSION_IMPL *session, const char **cfg, WT_KEYED_ENCRYPTOR **kencryptorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btree_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bulk_insert_fix( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bulk_insert_var( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cache_config(WT_SESSION_IMPL *session, bool reconfigure, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cache_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cache_eviction_worker( WT_SESSION_IMPL *session, bool busy, bool readonly, double pct_full) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_capacity_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_capacity_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_close_connection_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_close(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_config_encryptor(WT_SESSION_IMPL *session, const char **cfg, + WT_KEYED_ENCRYPTOR **kencryptorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_discard(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_tree_open(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4))) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4))) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bulk_insert_fix(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cache_config(WT_SESSION_IMPL *session, bool reconfigure, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cache_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly, + double pct_full) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_capacity_server_create(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_capacity_server_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_close_connection_close(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_clsm_close(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_clsm_init_merge( WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_clsm_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_collator_config(WT_SESSION_IMPL *session, const char *uri, WT_CONFIG_ITEM *cname, WT_CONFIG_ITEM *metadata, WT_COLLATOR **collatorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_clsm_init_merge(WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, + u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_clsm_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, + const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, + const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, + WT_CURSOR_BTREE *cbt, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_collator_config(WT_SESSION_IMPL *session, const char *uri, WT_CONFIG_ITEM *cname, + WT_CONFIG_ITEM *metadata, WT_COLLATOR **collatorp, int *ownp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_compact(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_compact_page_skip( WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_compressor_config( WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_COMPRESSOR **compressorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cond_auto_alloc(WT_SESSION_IMPL *session, const char *name, uint64_t min, uint64_t max, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_check(WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry, const char *config, size_t config_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_collapse( WT_SESSION_IMPL *session, const char **cfg, char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_discard_defaults(WT_SESSION_IMPL *session, const char **cfg, const char *config, char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_get(WT_SESSION_IMPL *session, const char **cfg_arg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_getone(WT_SESSION_IMPL *session, const char *config, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_getones(WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_getones_none(WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_gets(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_gets_def(WT_SESSION_IMPL *session, const char **cfg, const char *key, int def, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_gets_none(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_merge(WT_SESSION_IMPL *session, const char **cfg, const char *cfg_strip, const char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_next(WT_CONFIG *conf, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_subgetraw(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_subgets(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_config_upgrade(WT_SESSION_IMPL *session, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri, const char *config, const char *type, const char *check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_compat_config( WT_SESSION_IMPL *session, const char **cfg, bool reconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_config_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_dhandle_alloc( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_dhandle_close( WT_SESSION_IMPL *session, bool final, bool mark_dead) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *uri, bool removed, bool mark_dead) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool final, bool mark_dead) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_dhandle_find( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_dhandle_open( WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_optrack_setup(WT_SESSION_IMPL *session, const char *cfg[], bool reconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_optrack_teardown(WT_SESSION_IMPL *session, bool reconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_remove_collator(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_remove_compressor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_remove_data_source(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_remove_encryptor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_remove_extractor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_compressor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, + WT_COMPRESSOR **compressorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cond_auto_alloc(WT_SESSION_IMPL *session, const char *name, uint64_t min, + uint64_t max, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_check(WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry, + const char *config, size_t config_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_collapse(WT_SESSION_IMPL *session, const char **cfg, char **config_ret) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_discard_defaults(WT_SESSION_IMPL *session, const char **cfg, + const char *config, char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_get(WT_SESSION_IMPL *session, const char **cfg_arg, WT_CONFIG_ITEM *key, + WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_getone(WT_SESSION_IMPL *session, const char *config, WT_CONFIG_ITEM *key, + WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_getones(WT_SESSION_IMPL *session, const char *config, const char *key, + WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_getones_none(WT_SESSION_IMPL *session, const char *config, const char *key, + WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_gets(WT_SESSION_IMPL *session, const char **cfg, const char *key, + WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_gets_def(WT_SESSION_IMPL *session, const char **cfg, const char *key, + int def, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_gets_none(WT_SESSION_IMPL *session, const char **cfg, const char *key, + WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_merge(WT_SESSION_IMPL *session, const char **cfg, const char *cfg_strip, + const char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_next(WT_CONFIG *conf, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_subgetraw(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, WT_CONFIG_ITEM *key, + WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_subgets(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, const char *key, + WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_upgrade(WT_SESSION_IMPL *session, WT_ITEM *buf) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri, + const char *config, const char *type, const char *check) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri, + int (*file_func)(WT_SESSION_IMPL *, const char *[]), + int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_compat_config(WT_SESSION_IMPL *session, const char **cfg, bool reconfig) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_config_init(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_alloc(WT_SESSION_IMPL *session, const char *uri, + const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_close(WT_SESSION_IMPL *session, bool final, bool mark_dead) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_close_all(WT_SESSION_IMPL *session, const char *uri, bool removed, + bool mark_dead) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, bool final, bool mark_dead) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_find(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_open(WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_optrack_setup(WT_SESSION_IMPL *session, const char *cfg[], bool reconfig) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_optrack_teardown(WT_SESSION_IMPL *session, bool reconfig) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_reconfig(WT_SESSION_IMPL *session, const char **cfg) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_remove_collator(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_remove_compressor(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_remove_data_source(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_remove_encryptor(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_remove_extractor(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_connection_close(WT_CONNECTION_IMPL *conn) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_connection_init(WT_CONNECTION_IMPL *conn) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_count_birthmarks(WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curbulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool bitmap, bool skip_sort_check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curfile_insert_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curfile_next_random(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx, WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range, uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curjoin_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_cache(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_cache_release(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool *released) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, + const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curbulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool bitmap, + bool skip_sort_check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], + WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curds_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, + const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curfile_insert_check(WT_CURSOR *cursor) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curfile_next_random(WT_CURSOR *cursor) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, + const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, + const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx, + WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range, uint64_t count, uint32_t bloom_bit_count, + uint32_t bloom_hash_count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curjoin_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, + const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], + WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, + const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_cache(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup, + const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_cache_release(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool *released) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_cached(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_get_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_get_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_get_raw_key(WT_CURSOR *cursor, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_get_raw_value(WT_CURSOR *cursor, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_get_value(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_get_value_notsup(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_get_valuev(WT_CURSOR *cursor, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_init(WT_CURSOR *cursor, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_key_order_check( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_modify_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_get_key(WT_CURSOR *cursor, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_get_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_get_raw_key(WT_CURSOR *cursor, WT_ITEM *key) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_get_raw_value(WT_CURSOR *cursor, WT_ITEM *value) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_get_value(WT_CURSOR *cursor, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_get_value_notsup(WT_CURSOR *cursor, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_get_valuev(WT_CURSOR *cursor, va_list ap) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_init(WT_CURSOR *cursor, const char *uri, WT_CURSOR *owner, const char *cfg[], + WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_key_order_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_modify_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_noop(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_notsup(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_reopen_notsup(WT_CURSOR *cursor, bool check_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curstat_colgroup_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curstat_index_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curstat_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *curjoin, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curstat_lsm_init( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curstat_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curstat_table_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curtable_get_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curtable_get_value(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_addr(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_addr_print( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_cursor_page(void *cursor_arg, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_disk( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size, uint32_t checksum, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_offset_blind( WT_SESSION_IMPL *session, wt_off_t offset, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_page( void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_tree( void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_tree_all( void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_debug_tree_shape( WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_CONFIG_ITEM *keyid, WT_CONFIG_ARG *cfg_arg, WT_KEYED_ENCRYPTOR **kencryptorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_reopen_notsup(WT_CURSOR *cursor, bool check_only) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curstat_colgroup_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], + WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curstat_index_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], + WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curstat_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *curjoin, + const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curstat_lsm_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curstat_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, + const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curstat_table_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], + WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curtable_get_key(WT_CURSOR *cursor, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curtable_get_value(WT_CURSOR *cursor, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, + const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_addr(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, + const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_addr_print(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_cursor_page(void *cursor_arg, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE( + (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_disk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size, + uint32_t checksum, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_offset_blind(WT_SESSION_IMPL *session, wt_off_t offset, const char *ofile) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_page(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_tree(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_tree_all(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_tree_shape(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in, + WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, + const char *config_name, uint32_t *allocsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, + WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, + WT_CONFIG_ITEM *keyid, WT_CONFIG_ARG *cfg_arg, WT_KEYED_ENCRYPTOR **kencryptorp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_errno(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_esc_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t previous_state, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_evict_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_evict_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_evict_thread_stop(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_exclusive_handle_operation(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_config_get(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg, const char *key, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_config_get_string(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *config, const char *key, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, const char *config, size_t len, WT_CONFIG_PARSER **config_parserp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_config_parser_open_arg(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg, WT_CONFIG_PARSER **config_parserp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_err_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_map_windows_error( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_metadata_insert(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_metadata_remove( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_metadata_search(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_metadata_update(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_msg_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_pack_close(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_pack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_pack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_pack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, void *buffer, size_t size, WT_PACK_STREAM **psp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_pack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_pack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer, size_t len, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *lenp, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const void *buffer, size_t len, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_transaction_visible( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint64_t transaction_id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_unpack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_unpack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_unpack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_unpack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_extractor_config(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_EXTRACTOR **extractorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_file_zero(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t start_off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_filename_construct(WT_SESSION_IMPL *session, const char *path, const char *file_prefix, uintmax_t id_1, uint32_t id_2, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_fsync_background(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_getopt( const char *progname, int nargc, char * const *nargv, const char *ostr) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_esc_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t previous_state, + uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict_create(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict_thread_stop(WT_SESSION_IMPL *session, WT_THREAD *thread) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_exclusive_handle_operation(WT_SESSION_IMPL *session, const char *uri, + int (*file_func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[], uint32_t open_flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_config_get(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, + WT_CONFIG_ARG *cfg_arg, const char *key, WT_CONFIG_ITEM *cval) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_config_get_string(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, + const char *config, const char *key, WT_CONFIG_ITEM *cval) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, + const char *config, size_t len, WT_CONFIG_PARSER **config_parserp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_config_parser_open_arg(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, + WT_CONFIG_ARG *cfg_arg, WT_CONFIG_PARSER **config_parserp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_err_printf(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, + ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_map_windows_error(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, + uint32_t windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_metadata_insert(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, + const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_metadata_remove(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, + const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_metadata_search(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, + const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_metadata_update(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, + const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_msg_printf(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, + ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_close(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, + void *buffer, size_t size, WT_PACK_STREAM **psp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer, + size_t len, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *lenp, + const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, + const void *buffer, size_t len, const char *fmt, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_transaction_isolation_level(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_transaction_notify(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, + WT_TXN_NOTIFY *notify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_transaction_visible(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, + uint64_t transaction_id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_unpack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_unpack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_unpack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, + const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_unpack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_extractor_config(WT_SESSION_IMPL *session, const char *uri, const char *config, + WT_EXTRACTOR **extractorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_file_zero(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t start_off, wt_off_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_filename_construct(WT_SESSION_IMPL *session, const char *path, + const char *file_prefix, uintmax_t id_1, uint32_t id_2, WT_ITEM *buf) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, + uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_fsync_background(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_getopt(const char *progname, int nargc, char *const *nargv, const char *ostr) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp #ifdef HAVE_DIAGNOSTIC - , const char *func, int line + , + const char *func, int line #endif - ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_hex2byte(const u_char *from, u_char *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_huffman_open(WT_SESSION_IMPL *session, void *symbol_frequency_array, u_int symcnt, u_int numbytes, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_illegal_value_func( WT_SESSION_IMPL *session, uintmax_t v, const char *func, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_import(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_inmem_unsupported_op(WT_SESSION_IMPL *session, const char *tag) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, WT_CURSOR_JSON *json, bool iskey, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat, const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, const char *src, size_t srclen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, const char **tokstart, size_t *toklen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_create(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_cursor_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_insert_block(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_remove_block(WT_SESSION_IMPL *session, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_save_dropped(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); + ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hex2byte(const u_char *from, u_char *to) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, + size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, + size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_huffman_open(WT_SESSION_IMPL *session, void *symbol_frequency_array, u_int symcnt, + u_int numbytes, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_illegal_value_func(WT_SESSION_IMPL *session, uintmax_t v, const char *func, + int line) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_import(WT_SESSION_IMPL *session, const char *uri) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_inmem_unsupported_op(WT_SESSION_IMPL *session, const char *tag) + WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, + const char *fmt, WT_CURSOR_JSON *json, bool iskey, va_list ap) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat, + const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, const char *src, + size_t srclen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, + WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, + const char **tokstart, size_t *toklen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_config(WT_SESSION_IMPL *session, const char **cfg) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_create(WT_SESSION_IMPL *session, const char **cfg) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_cursor_close(WT_SESSION_IMPL *session, WT_CURSOR **cursorp, + uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_cursor_open(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint64_t pageid) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_insert_block(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MULTI *multi, + WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_remove_block(WT_SESSION_IMPL *session, uint64_t pageid) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_save_dropped(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_sweep(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_library_init(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_allocfile( WT_SESSION_IMPL *session, uint32_t lognum, const char *dest) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_extract_lognum( WT_SESSION_IMPL *session, const char *name, uint32_t *id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_fill(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool force, WT_ITEM *record, WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_get_backup_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, bool active_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bool *recp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_allocfile(WT_SESSION_IMPL *session, uint32_t lognum, const char *dest) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_close(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_extract_lognum(WT_SESSION_IMPL *session, const char *name, uint32_t *id) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_fill(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool force, WT_ITEM *record, + WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_get_backup_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, + uint32_t *maxid, bool active_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bool *recp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_log_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_printf(WT_SESSION_IMPL *session, const char *format, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_recover_system(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_remove(WT_SESSION_IMPL *session, const char *file_prefix, uint32_t lognum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp, void *cookie, int firstrecord), void *cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_set_version(WT_SESSION_IMPL *session, uint16_t version, uint32_t first_rec, bool downgrade, bool live_chg, uint32_t *lognump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_slot_init(WT_SESSION_IMPL *session, bool alloc) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_system_record(WT_SESSION_IMPL *session, WT_FH *log_fh, WT_LSN *lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_truncate_files(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logmgr_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logmgr_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_checkpoint_start_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_checkpoint_start_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_checkpoint_start_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_modify_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_modify_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_modify_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_col_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_prev_lsn_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *prev_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_prev_lsn_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_prev_lsn_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *prev_lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep, uint32_t *opsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_modify_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_modify_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_modify_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_txn_timestamp_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint64_t time_sec, uint64_t time_nsec, uint64_t commit_ts, uint64_t durable_ts, uint64_t first_ts, uint64_t prepare_ts, uint64_t read_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_txn_timestamp_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logop_txn_timestamp_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint64_t *time_secp, uint64_t *time_nsecp, uint64_t *commit_tsp, uint64_t *durable_tsp, uint64_t *first_tsp, uint64_t *prepare_tsp, uint64_t *read_tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *rectypep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool force, WT_LSM_CHUNK **chunkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_manager_pop_entry( WT_SESSION_IMPL *session, uint32_t type, WT_LSM_WORK_UNIT **entryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, uint32_t type, uint32_t flags, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_manager_start(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_merge_update_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int start_chunk, u_int nchunks, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *newconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_bloom_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_chunk_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id, uint32_t generation, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_close_all(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_drop( WT_SESSION_IMPL *session, const char *name, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_get(WT_SESSION_IMPL *session, const char *uri, bool exclusive, WT_LSM_TREE **treep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_rename(WT_SESSION_IMPL *session, const char *olduri, const char *newuri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_retire_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int start_chunk, u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_set_chunk_size( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_setup_bloom( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_setup_chunk( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_truncate( WT_SESSION_IMPL *session, const char *name, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_work_enable_evict(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_memdup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_apply_all(WT_SESSION_IMPL *session, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_block_metadata( WT_SESSION_IMPL *session, const char *config, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_checkpoint(WT_SESSION_IMPL *session, const char *fname, const char *checkpoint, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char *fname, const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_ckptlist_get(WT_SESSION_IMPL *session, const char *fname, bool update, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_ckptlist_to_meta( WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_sysinfo_set(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_track_drop( WT_SESSION_IMPL *session, const char *filename) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_track_fileop( WT_SESSION_IMPL *session, const char *olduri, const char *newuri) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_track_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_track_insert(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_track_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_track_sub_off(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_metadata_cursor_open( WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_metadata_get_ckptlist( WT_SESSION *session, const char *name, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_metadata_insert( WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_metadata_salvage(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_metadata_set_base_write_gen(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_metadata_turtle_rewrite(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_metadata_update( WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_modify_apply(WT_CURSOR *cursor, const void *modify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_modify_apply_api(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_modify_pack(WT_CURSOR *cursor, WT_ITEM **modifyp, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_nhex_to_raw( WT_SESSION_IMPL *session, const char *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_os_inmemory(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, bool evicting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr, size_t addr_size, const void *value, size_t value_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page, uint8_t **addrp, size_t *addr_sizep, const void *value, size_t value_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ovfl_track_init(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_printf(WT_SESSION_IMPL *session, const char *format, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_recover_system(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_remove(WT_SESSION_IMPL *session, const char *file_prefix, uint32_t lognum) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, + int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp, + void *cookie, int firstrecord), + void *cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_set_version(WT_SESSION_IMPL *session, uint16_t version, uint32_t first_rec, + bool downgrade, bool live_chg, uint32_t *lognump) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_slot_init(WT_SESSION_IMPL *session, bool alloc) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, + bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_system_record(WT_SESSION_IMPL *session, WT_FH *log_fh, WT_LSN *lsn) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_truncate_files(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool force) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logmgr_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logmgr_open(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_checkpoint_start_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_checkpoint_start_print(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_checkpoint_start_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_modify_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, + uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_modify_print(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_modify_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_put_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, + uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_put_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_remove_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, + uint64_t recno) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_remove_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, uint32_t *fileidp, uint64_t *recnop) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_truncate_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, + uint64_t start, uint64_t stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_truncate_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_prev_lsn_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *prev_lsn) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_prev_lsn_print(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_prev_lsn_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_LSN *prev_lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + uint32_t *optypep, uint32_t *opsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_modify_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, + WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_modify_print(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_modify_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_put_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, + WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_put_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_remove_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, + WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_remove_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_truncate_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, + WT_ITEM *start, WT_ITEM *stop, uint32_t mode) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_truncate_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_txn_timestamp_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, + uint64_t time_sec, uint64_t time_nsec, uint64_t commit_ts, uint64_t durable_ts, uint64_t first_ts, + uint64_t prepare_ts, uint64_t read_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_txn_timestamp_print(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_txn_timestamp_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, uint64_t *time_secp, uint64_t *time_nsecp, uint64_t *commit_tsp, + uint64_t *durable_tsp, uint64_t *first_tsp, uint64_t *prepare_tsp, uint64_t *read_tsp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + uint32_t *rectypep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, + WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool force, + WT_LSM_CHUNK **chunkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_manager_pop_entry(WT_SESSION_IMPL *session, uint32_t type, + WT_LSM_WORK_UNIT **entryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, uint32_t type, uint32_t flags, + WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_manager_start(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_merge_update_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, + u_int start_chunk, u_int nchunks, WT_LSM_CHUNK *chunk) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, + const char *newconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_bloom_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id, + const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_chunk_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id, + uint32_t generation, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_close_all(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive, + const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_drop(WT_SESSION_IMPL *session, const char *name, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_get(WT_SESSION_IMPL *session, const char *uri, bool exclusive, + WT_LSM_TREE **treep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_rename(WT_SESSION_IMPL *session, const char *olduri, const char *newuri, + const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_retire_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, + u_int start_chunk, u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_set_chunk_size(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, + WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_setup_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, + WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_setup_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, + WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_truncate(WT_SESSION_IMPL *session, const char *name, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, const char *uri, + int (*file_func)(WT_SESSION_IMPL *, const char *[]), + int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_work_enable_evict(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_work_switch(WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_memdup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_apply_all(WT_SESSION_IMPL *session, + int (*file_func)(WT_SESSION_IMPL *, const char *[]), + int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_block_metadata(WT_SESSION_IMPL *session, const char *config, WT_CKPT *ckpt) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_checkpoint(WT_SESSION_IMPL *session, const char *fname, const char *checkpoint, + WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_checkpoint_last_name(WT_SESSION_IMPL *session, const char *fname, + const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_ckptlist_get(WT_SESSION_IMPL *session, const char *fname, bool update, + WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, + WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM *buf) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_sysinfo_set(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_drop(WT_SESSION_IMPL *session, const char *filename) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_fileop(WT_SESSION_IMPL *session, const char *olduri, const char *newuri) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_init(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_insert(WT_SESSION_IMPL *session, const char *key) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_on(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_sub_off(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_cursor_open(WT_SESSION_IMPL *session, const char *config, + WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_get_ckptlist(WT_SESSION *session, const char *name, WT_CKPT **ckptbasep) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_insert(WT_SESSION_IMPL *session, const char *key, const char *value) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_salvage(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_set_base_write_gen(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_turtle_rewrite(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_update(WT_SESSION_IMPL *session, const char *key, const char *value) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_modify_apply(WT_CURSOR *cursor, const void *modify) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_modify_apply_api(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_modify_pack(WT_CURSOR *cursor, WT_ITEM **modifyp, WT_MODIFY *entries, int nentries) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 2, 3))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, + WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_nfilename(WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_nhex_to_raw(WT_SESSION_IMPL *session, const char *from, size_t size, WT_ITEM *to) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri) + WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, + u_int flags, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, + const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, + bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, + const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_os_inmemory(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, + WT_ITEM *store, bool *decoded) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, + bool evicting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr, + size_t addr_size, const void *value, size_t value_size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page, uint8_t **addrp, + size_t *addr_sizep, const void *value, size_t value_size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_track_init(WT_SESSION_IMPL *session, WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, + bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags #ifdef HAVE_DIAGNOSTIC - , const char *func, int line + , + const char *func, int line #endif - ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, uint32_t flags, bool check_unstable, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_range_truncate(WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_raw_to_esc_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv, uint8_t type, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_child_modify(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref, bool *hazardp, WT_CHILD_STATE *statep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_col_fix_slvg(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_col_var(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u_int slots) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_dictionary_lookup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *val, WT_REC_DICTIONARY **dpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_row_leaf(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_split_crossing_bnd( WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_split_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page, uint64_t recno, uint64_t max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, void *ripcip, WT_CELL_UNPACK *vpack, WT_UPDATE_SELECT *upd_select) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags, bool *lookaside_retryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_IKEY **ikeyp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_close_table(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_colcheck(WT_SESSION_IMPL *session, const char *key_format, const char *value_format, WT_CONFIG_ITEM *colconf, u_int *kcolsp, u_int *vcolsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_colgroup_name(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, size_t len, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_create( WT_SESSION_IMPL *session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_destroy_index(WT_SESSION_IMPL *session, WT_INDEX **idxp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_get_colgroup(WT_SESSION_IMPL *session, const char *uri, bool quiet, WT_TABLE **tablep, WT_COLGROUP **colgroupp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_get_index(WT_SESSION_IMPL *session, const char *uri, bool invalidate, bool quiet, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_get_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, bool ok_incomplete, uint32_t flags, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_get_table_uri(WT_SESSION_IMPL *session, const char *uri, bool ok_incomplete, uint32_t flags, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_internal_session( WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_open_indices(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_open_table(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_project_in(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_project_merge(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, const char *vformat, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_project_out(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, bool key_only, const char *vformat, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_range_truncate( WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_session_release( WT_SESSION_IMPL *session, WT_SESSION_IMPL *int_session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_truncate( WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); + ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, uint32_t flags, + bool check_unstable, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_range_truncate(WT_CURSOR *start, WT_CURSOR *stop) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_raw_to_esc_hex(WT_SESSION_IMPL *session, const uint8_t *from, size_t size, + WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_raw_to_hex(WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, + size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, + size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, + size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv, + uint8_t type, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, + uint64_t stop_txn, uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_child_modify(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref, + bool *hazardp, WT_CHILD_STATE *statep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_col_fix_slvg(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, + WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_col_var(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, + WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u_int slots) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_dictionary_lookup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *val, + WT_REC_DICTIONARY **dpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_row_leaf(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, + WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_split_crossing_bnd(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_split_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page, + uint64_t recno, uint64_t max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, + void *ripcip, WT_CELL_UNPACK *vpack, WT_UPDATE_SELECT *upd_select) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, + uint32_t flags, bool *lookaside_retryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, + size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, + size_t size, WT_IKEY **ikeyp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, + const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, + WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_leaf_key_copy(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, + WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, + WT_ITEM *keyb, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, + const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, + WT_CURSOR_BTREE *cbt, bool insert, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_close_table(WT_SESSION_IMPL *session, WT_TABLE *table) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_colcheck(WT_SESSION_IMPL *session, const char *key_format, + const char *value_format, WT_CONFIG_ITEM *colconf, u_int *kcolsp, u_int *vcolsp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_colgroup_name(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, + size_t len, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_colgroup_source( + WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_destroy_index(WT_SESSION_IMPL *session, WT_INDEX **idxp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_get_colgroup(WT_SESSION_IMPL *session, const char *uri, bool quiet, + WT_TABLE **tablep, WT_COLGROUP **colgroupp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_get_index(WT_SESSION_IMPL *session, const char *uri, bool invalidate, + bool quiet, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_get_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, + bool ok_incomplete, uint32_t flags, WT_TABLE **tablep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_get_table_uri(WT_SESSION_IMPL *session, const char *uri, bool ok_incomplete, + uint32_t flags, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, + const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_internal_session(WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, + size_t len, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_open_indices(WT_SESSION_IMPL *session, WT_TABLE *table) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_open_table(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_project_in(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, + va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_project_merge(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, + const char *vformat, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_project_out(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, + va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, + bool key_only, const char *vformat, WT_ITEM *value) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_range_truncate(WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri, + const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_session_release(WT_SESSION_IMPL *session, WT_SESSION_IMPL *int_session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_truncate(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, + int (*file_func)(WT_SESSION_IMPL *, const char *[]), + int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp #ifdef HAVE_DIAGNOSTIC - , const char *func, int line + , + const char *func, int line #endif - ) - WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_breakpoint(WT_SESSION *wt_session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_compact_check_timeout(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_compact_readonly( WT_SESSION *wt_session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_copy_values(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_create( WT_SESSION_IMPL *session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_cursor_cache_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_get_dhandle(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_lock_dhandle( WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_notsup(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_release_dhandle(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_release_resources(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_set_return_func( WT_SESSION_IMPL *session, const char* func, int line, int err) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_stash_add(WT_SESSION_IMPL *session, int which, uint64_t generation, void *p, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_stat_connection_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_stat_connection_init( WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_stat_dsrc_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_stat_dsrc_init( WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_stat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_stat_session_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_struct_check(WT_SESSION_IMPL *session, const char *fmt, size_t len, bool *fixedp, uint32_t *fixed_lenp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_struct_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_struct_pack(WT_SESSION_IMPL *session, void *buffer, size_t len, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, bool value_only, WT_ITEM *plan) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, const char *extra_cols, bool value_only, WT_ITEM *format) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_struct_size(WT_SESSION_IMPL *session, size_t *lenp, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_struct_truncate(WT_SESSION_IMPL *session, const char *input_fmt, u_int ncols, WT_ITEM *format) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_struct_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t len, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_sweep_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_sweep_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_sweep_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_table_check(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_table_range_truncate(WT_CURSOR_TABLE *start, WT_CURSOR_TABLE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_group_create( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, const char *name, uint32_t min, uint32_t max, uint32_t flags, bool (*chk_func)(WT_SESSION_IMPL *session), int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context), int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_group_resize( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_min, uint32_t new_max, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_tree_walk_custom_skip( WT_SESSION_IMPL *session, WT_REF **refp, int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *), void *func_cookie, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_turtle_exists(WT_SESSION_IMPL *session, bool *existp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_turtle_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_activity_drain(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *ckpt_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_get_pinned_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t *tsp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_parse_timestamp_raw(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_printlog(WT_SESSION *wt_session, const char *ofile, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_query_timestamp(WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[], bool global_txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_reconfigure(WT_SESSION_IMPL *session, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_recover(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_rollback_required(WT_SESSION_IMPL *session, const char *reason) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_set_commit_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t commit_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_set_durable_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t durable_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_set_prepare_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t prepare_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_set_read_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t read_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_ts_log(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_unexpected_object_type( WT_SESSION_IMPL *session, const char *uri, const char *expect) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_value_return( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_value_return_upd(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verbose_dump_handles(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verbose_dump_log(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verbose_dump_sessions(WT_SESSION_IMPL *session, bool show_cursors) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_TXN *txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verify_ckpt_load( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, size_t size, WT_ADDR *addr, bool empty_page_ok) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern size_t __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern ssize_t __wt_json_strlen(const char *src, size_t srclen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern u_int __wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); + ) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, + WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_breakpoint(WT_SESSION *wt_session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_compact(WT_SESSION *wt_session, const char *uri, const char *config) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_compact_check_timeout(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_compact_readonly(WT_SESSION *wt_session, const char *uri, + const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_copy_values(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_create(WT_SESSION_IMPL *session, const char *uri, const char *config) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_cursor_cache_sweep(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], + uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_get_dhandle(WT_SESSION_IMPL *session, const char *uri, + const char *checkpoint, const char *cfg[], uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_notsup(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, + WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_release_dhandle(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_release_resources(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_set_return_func(WT_SESSION_IMPL *session, const char *func, int line, int err) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_stash_add(WT_SESSION_IMPL *session, int which, uint64_t generation, void *p, + size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_stat_connection_desc(WT_CURSOR_STAT *cst, int slot, const char **p) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_stat_connection_init(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_stat_dsrc_desc(WT_CURSOR_STAT *cst, int slot, const char **p) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_stat_dsrc_init(WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_stat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **p) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_stat_session_desc(WT_CURSOR_STAT *cst, int slot, const char **p) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_check(WT_SESSION_IMPL *session, const char *fmt, size_t len, bool *fixedp, + uint32_t *fixed_lenp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_pack(WT_SESSION_IMPL *session, void *buffer, size_t len, const char *fmt, + ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, + size_t len, bool value_only, WT_ITEM *plan) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, + size_t len, const char *extra_cols, bool value_only, WT_ITEM *format) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt, + const WT_ITEM *inbuf, WT_ITEM *outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_size(WT_SESSION_IMPL *session, size_t *lenp, const char *fmt, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_truncate(WT_SESSION_IMPL *session, const char *input_fmt, u_int ncols, + WT_ITEM *format) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t len, + const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_sweep_config(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_sweep_create(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_sweep_destroy(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_table_check(WT_SESSION_IMPL *session, WT_TABLE *table) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_table_range_truncate(WT_CURSOR_TABLE *start, WT_CURSOR_TABLE *stop) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_group_create(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, + const char *name, uint32_t min, uint32_t max, uint32_t flags, + bool (*chk_func)(WT_SESSION_IMPL *session), + int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context), + int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context)) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_group_resize(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, + uint32_t new_min, uint32_t new_max, uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, + uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_tree_walk_custom_skip(WT_SESSION_IMPL *session, WT_REF **refp, + int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *), void *func_cookie, uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_turtle_exists(WT_SESSION_IMPL *session, bool *existp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_turtle_init(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_activity_drain(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_checkpoint_log(WT_SESSION_IMPL *session, bool full, uint32_t flags, + WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session, const uint8_t **pp, + const uint8_t *end, WT_LSN *ckpt_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_get_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, + uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], + bool *has_create, bool *has_drops) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, + WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name, + wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_parse_timestamp_raw(WT_SESSION_IMPL *session, const char *name, + wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_printlog(WT_SESSION *wt_session, const char *ofile, uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_query_timestamp(WT_SESSION_IMPL *session, char *hex_timestamp, + const char *cfg[], bool global_txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_reconfigure(WT_SESSION_IMPL *session, const char *config) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_recover(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_rollback_required(WT_SESSION_IMPL *session, const char *reason) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_set_durable_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_set_prepare_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t prepare_ts) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t read_ts) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_truncate_log(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, + WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_ts_log(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_unexpected_object_type( + WT_SESSION_IMPL *session, const char *uri, const char *expect) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, + size_t *sizep, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_value_return_upd(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, + bool ignore_visibility) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verbose_dump_handles(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verbose_dump_log(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verbose_dump_sessions(WT_SESSION_IMPL *session, bool show_cursors) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_TXN *txn) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verify_ckpt_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, + const WT_PAGE_HEADER *dsk, size_t size, WT_ADDR *addr, bool empty_page_ok) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern size_t __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern ssize_t __wt_json_strlen(const char *src, size_t srclen) WT_GCC_FUNC_DECL_ATTRIBUTE( + (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern u_int __wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern uint32_t __wt_log2_int(uint32_t n) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern uint32_t __wt_nlpo2(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern uint32_t __wt_nlpo2_round(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint32_t __wt_random(WT_RAND_STATE volatile * rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint32_t __wt_split_page_size(int split_pct, uint32_t maxpagesize, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint64_t __wt_clock_to_nsec(uint64_t end, uint64_t begin) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint64_t __wt_ext_transaction_oldest(WT_EXTENSION_API *wt_api) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint64_t __wt_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint64_t __wt_gen_next(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint64_t __wt_hash_city64(const void *s, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint64_t __wt_hash_fnv64(const void *string, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint64_t __wt_session_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern void * __wt_ext_scr_alloc( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t size); -extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE( + (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint32_t __wt_split_page_size(int split_pct, uint32_t maxpagesize, uint32_t allocsize) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint64_t __wt_clock_to_nsec(uint64_t end, uint64_t begin) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint64_t __wt_ext_transaction_oldest(WT_EXTENSION_API *wt_api) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint64_t __wt_gen(WT_SESSION_IMPL *session, int which) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint64_t __wt_gen_next(WT_SESSION_IMPL *session, int which) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint64_t __wt_hash_city64(const void *s, size_t len) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint64_t __wt_hash_fnv64(const void *string, size_t len) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint64_t __wt_session_gen(WT_SESSION_IMPL *session, int which) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base) WT_GCC_FUNC_DECL_ATTRIBUTE( + (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void *__wt_ext_scr_alloc(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t size); +extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_async_stats_update(WT_SESSION_IMPL *session); extern void __wt_block_ckpt_destroy(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci); extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on); @@ -753,7 +1561,8 @@ extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el); extern void __wt_block_size_free(WT_SESSION_IMPL *session, WT_SIZE *sz); extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats); extern void __wt_bloom_hash(WT_BLOOM *bloom, WT_ITEM *key, WT_BLOOM_HASH *bhash); -extern void __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_btcur_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt); extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt); extern void __wt_btcur_open(WT_CURSOR_BTREE *cbt); @@ -763,13 +1572,19 @@ extern void __wt_cache_stats_update(WT_SESSION_IMPL *session); extern void __wt_capacity_throttle(WT_SESSION_IMPL *session, uint64_t bytes, WT_THROTTLE_TYPE type); extern void __wt_checkpoint_progress(WT_SESSION_IMPL *session, bool closing); extern void __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize); -extern void __wt_checkpoint_tree_reconcile_update( WT_SESSION_IMPL *session, wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn); -extern void __wt_ckpt_verbose(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *tag, const char *ckpt_name, const uint8_t *ckpt_string); -extern void __wt_cond_auto_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *)); -extern void __wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); +extern void __wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, + wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, + wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn); +extern void __wt_ckpt_verbose(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *tag, + const char *ckpt_name, const uint8_t *ckpt_string); +extern void __wt_cond_auto_wait( + WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *)); +extern void __wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, + bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); extern void __wt_config_init(WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str); -extern void __wt_config_initn( WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len); -extern void __wt_config_subinit( WT_SESSION_IMPL *session, WT_CONFIG *conf, WT_CONFIG_ITEM *item); +extern void __wt_config_initn( + WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len); +extern void __wt_config_subinit(WT_SESSION_IMPL *session, WT_CONFIG *conf, WT_CONFIG_ITEM *item); extern void __wt_conn_config_discard(WT_SESSION_IMPL *session); extern void __wt_conn_foc_discard(WT_SESSION_IMPL *session); extern void __wt_conn_stat_init(WT_SESSION_IMPL *session); @@ -790,10 +1605,17 @@ extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session); extern void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst); extern void __wt_curtable_set_key(WT_CURSOR *cursor, ...); extern void __wt_curtable_set_value(WT_CURSOR *cursor, ...); -extern void __wt_encrypt_size(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep); -extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern void __wt_err_func(WT_SESSION_IMPL *session, int error, const char *func, int line, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 5, 6))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern void __wt_errx_func(WT_SESSION_IMPL *session, const char *func, int line, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 4, 5))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_encrypt_size( + WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep); +extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_err_func( + WT_SESSION_IMPL *session, int error, const char *func, int line, const char *fmt, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 5, 6))) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_errx_func(WT_SESSION_IMPL *session, const char *func, int line, const char *fmt, + ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 4, 5))) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler); extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session); extern void __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref); @@ -801,10 +1623,13 @@ extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session); extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v); extern void __wt_evict_server_wake(WT_SESSION_IMPL *session); extern void __wt_ext_scr_free(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *p); -extern void __wt_fill_hex(const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp); -extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern void __wt_free_ref( WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages); -extern void __wt_free_ref_index(WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE_INDEX *pindex, bool free_pages); +extern void __wt_fill_hex( + const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp); +extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_free_ref(WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages); +extern void __wt_free_ref_index( + WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE_INDEX *pindex, bool free_pages); extern void __wt_free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd); extern void __wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation); extern void __wt_gen_init(WT_SESSION_IMPL *session); @@ -812,73 +1637,82 @@ extern void __wt_gen_next_drain(WT_SESSION_IMPL *session, int which); extern void __wt_hazard_close(WT_SESSION_IMPL *session); extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg); extern void __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor); -extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags); +extern void __wt_las_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags); extern void __wt_las_remove_dropped(WT_SESSION_IMPL *session); extern void __wt_las_stats_update(WT_SESSION_IMPL *session); extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn); extern void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckpt_lsn); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern void __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); -extern void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot); +extern void __wt_log_slot_join( + WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot); extern void __wt_log_written_reset(WT_SESSION_IMPL *session); extern void __wt_log_wrlsn(WT_SESSION_IMPL *session, int *yield); extern void __wt_logrec_free(WT_SESSION_IMPL *session, WT_ITEM **logrecp); extern void __wt_lsm_manager_clear_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); -extern void __wt_lsm_manager_free_work_unit( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT *entry); +extern void __wt_lsm_manager_free_work_unit(WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT *entry); extern void __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); extern void __wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); extern void __wt_lsm_tree_release(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); -extern void __wt_lsm_tree_throttle( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool decrease_only); +extern void __wt_lsm_tree_throttle( + WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool decrease_only); extern void __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); extern void __wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); extern void __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt); extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep); extern void __wt_meta_track_discard(WT_SESSION_IMPL *session); extern void __wt_meta_track_sub_on(WT_SESSION_IMPL *session); -extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_optrack_flush_buffer(WT_SESSION_IMPL *s); -extern void __wt_optrack_record_funcid( WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp); +extern void __wt_optrack_record_funcid( + WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp); extern void __wt_os_stdio(WT_SESSION_IMPL *session); extern void __wt_ovfl_discard_free(WT_SESSION_IMPL *session, WT_PAGE *page); extern void __wt_ovfl_discard_remove(WT_SESSION_IMPL *session, WT_PAGE *page); extern void __wt_ovfl_reuse_free(WT_SESSION_IMPL *session, WT_PAGE *page); extern void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep); extern void __wt_print_huffman_code(void *huffman_arg, uint16_t symbol); -extern void __wt_random_init(WT_RAND_STATE volatile * rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern void __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile * rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_random_init_seed(WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern void __wt_rec_dictionary_free(WT_SESSION_IMPL *session, WT_RECONCILE *r); extern void __wt_rec_dictionary_reset(WT_RECONCILE *r); extern void __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref); -extern void __wt_root_ref_init(WT_SESSION_IMPL *session, WT_REF *root_ref, WT_PAGE *root, bool is_recno); +extern void __wt_root_ref_init( + WT_SESSION_IMPL *session, WT_REF *root_ref, WT_PAGE *root, bool is_recno); extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern void __wt_schema_destroy_colgroup(WT_SESSION_IMPL *session, WT_COLGROUP **colgroupp); extern void __wt_scr_discard(WT_SESSION_IMPL *session); -extern void __wt_seconds(WT_SESSION_IMPL *session, uint64_t *secondsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_seconds(WT_SESSION_IMPL *session, uint64_t *secondsp) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_session_close_cache(WT_SESSION_IMPL *session); extern void __wt_session_gen_enter(WT_SESSION_IMPL *session, int which); extern void __wt_session_gen_leave(WT_SESSION_IMPL *session, int which); extern void __wt_stash_discard(WT_SESSION_IMPL *session); extern void __wt_stash_discard_all(WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session); -extern void __wt_stat_connection_aggregate( WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to); +extern void __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to); extern void __wt_stat_connection_clear_all(WT_CONNECTION_STATS **stats); extern void __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats); -extern void __wt_stat_connection_discard( WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle); +extern void __wt_stat_connection_discard(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle); extern void __wt_stat_connection_init_single(WT_CONNECTION_STATS *stats); -extern void __wt_stat_dsrc_aggregate( WT_DSRC_STATS **from, WT_DSRC_STATS *to); -extern void __wt_stat_dsrc_aggregate_single( WT_DSRC_STATS *from, WT_DSRC_STATS *to); +extern void __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to); +extern void __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to); extern void __wt_stat_dsrc_clear_all(WT_DSRC_STATS **stats); extern void __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats); -extern void __wt_stat_dsrc_discard( WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle); +extern void __wt_stat_dsrc_discard(WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle); extern void __wt_stat_dsrc_init_single(WT_DSRC_STATS *stats); -extern void __wt_stat_join_aggregate( WT_JOIN_STATS **from, WT_JOIN_STATS *to); +extern void __wt_stat_join_aggregate(WT_JOIN_STATS **from, WT_JOIN_STATS *to); extern void __wt_stat_join_clear_all(WT_JOIN_STATS **stats); extern void __wt_stat_join_clear_single(WT_JOIN_STATS *stats); extern void __wt_stat_join_init_single(WT_JOIN_STATS *stats); extern void __wt_stat_session_clear_single(WT_SESSION_STATS *stats); extern void __wt_stat_session_init_single(WT_SESSION_STATS *stats); -extern void __wt_thread_group_start_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool is_locked); +extern void __wt_thread_group_start_one( + WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool is_locked); extern void __wt_thread_group_stop_one(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group); extern void __wt_timestamp_to_hex_string(wt_timestamp_t ts, char *hex_timestamp); extern void __wt_txn_clear_durable_timestamp(WT_SESSION_IMPL *session); @@ -897,152 +1731,306 @@ extern void __wt_txn_release_resources(WT_SESSION_IMPL *session); extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session); extern void __wt_txn_stats_update(WT_SESSION_IMPL *session); extern void __wt_txn_truncate_end(WT_SESSION_IMPL *session); -extern void __wt_verbose_timestamp( WT_SESSION_IMPL *session, wt_timestamp_t ts, const char *msg); -extern void __wt_verbose_worker(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)); +extern void __wt_verbose_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t ts, const char *msg); +extern void __wt_verbose_worker(WT_SESSION_IMPL *session, const char *fmt, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)); extern void __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern void __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l); -static inline WT_CELL * __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline WT_IKEY * __wt_ref_key_instantiated(WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline WT_VISIBLE_TYPE __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_btree_can_evict_dirty(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_cache_aggressive(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_cache_full(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_cache_stuck(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_checksum_match(const void *chunk, size_t len, uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_eviction_clean_needed(WT_SESSION_IMPL *session, double *pct_fullp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_eviction_dirty_needed(WT_SESSION_IMPL *session, double *pct_fullp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_eviction_needed( WT_SESSION_IMPL *session, bool busy, bool readonly, double *pct_fullp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline WT_CELL *__wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline WT_IKEY *__wt_ref_key_instantiated(WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline WT_VISIBLE_TYPE __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_btree_can_evict_dirty(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_cache_aggressive(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_cache_full(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_cache_stuck(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_checksum_match(const void *chunk, size_t len, uint32_t v) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_eviction_clean_needed(WT_SESSION_IMPL *session, double *pct_fullp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_eviction_dirty_needed(WT_SESSION_IMPL *session, double *pct_fullp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_eviction_needed(WT_SESSION_IMPL *session, bool busy, bool readonly, + double *pct_fullp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_isalnum(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_isalpha(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_isdigit(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_isprint(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_isspace(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_off_page(WT_PAGE *page, const void *p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_page_evict_clean(WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_page_is_empty(WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_page_is_modified(WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_page_las_active(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_rec_need_split(WT_RECONCILE *r, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t old_state, uint32_t new_state, const char *func, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_off_page(WT_PAGE *page, const void *p) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_page_evict_clean(WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_page_is_empty(WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_page_is_modified(WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_page_las_active(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_rec_need_split(WT_RECONCILE *r, size_t len) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t old_state, + uint32_t new_state, const char *func, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_ref_is_root(WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_row_leaf_key_info(WT_PAGE *page, void *copy, WT_IKEY **ikeyp, WT_CELL **cellp, void *datap, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_split_descent_race( WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_txn_am_oldest(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_txn_upd_durable(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_txn_visible( WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_txn_visible_all( WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline double __wt_eviction_dirty_target(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_btree_block_free( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_buf_extend(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_buf_grow(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_buf_init(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_buf_initsize(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_buf_set( WT_SESSION_IMPL *session, WT_ITEM *buf, const void *data, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_buf_setstr(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *s) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_cache_eviction_check( WT_SESSION_IMPL *session, bool busy, bool readonly, bool *didworkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_cell_pack_value_match(WT_CELL *page_cell, WT_CELL *val_cell, const uint8_t *val_data, bool *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *end) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size, uint64_t *recnop, u_int skipdepth, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator, const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_compare_skip(WT_SESSION_IMPL *session, WT_COLLATOR *collator, const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp, size_t *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_curindex_get_valuev(WT_CURSOR *cursor, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_curtable_get_valuev(WT_CURSOR *cursor, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_dsk_cell_data_ref(WT_SESSION_IMPL *session, int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_extlist_read_pair(const uint8_t **p, wt_off_t *offp, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_extlist_write_pair(uint8_t **p, wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fclose(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fextend(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fflush(WT_SESSION_IMPL *session, WT_FSTREAM *fstr) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fs_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fs_directory_list_free( WT_SESSION_IMPL *session, char ***dirlistp, u_int count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fs_directory_list_single(WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fs_rename( WT_SESSION_IMPL *session, const char *from, const char *to, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size, u_int skipdepth, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_lex_compare_short(const WT_ITEM *user_item, const WT_ITEM *tree_item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_lex_compare_skip( const WT_ITEM *user_item, const WT_ITEM *tree_item, size_t *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_log_cmp(WT_LSN *lsn1, WT_LSN *lsn2) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_page_cell_data_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_page_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_page_parent_modify_set( WT_SESSION_IMPL *session, WT_REF *ref, bool page_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_row_leaf_key_info(WT_PAGE *page, void *copy, WT_IKEY **ikeyp, + WT_CELL **cellp, void *datap, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_split_descent_race(WT_SESSION_IMPL *session, WT_REF *ref, + WT_PAGE_INDEX *saved_pindex) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_am_oldest(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_upd_durable(WT_SESSION_IMPL *session, WT_UPDATE *upd) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_visible_all(WT_SESSION_IMPL *session, uint64_t id, + wt_timestamp_t timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline double __wt_eviction_dirty_target(WT_CACHE *cache) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_btree_block_free(WT_SESSION_IMPL *session, const uint8_t *addr, + size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_buf_extend(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_buf_grow(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_buf_init(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_buf_initsize(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_buf_set(WT_SESSION_IMPL *session, WT_ITEM *buf, const void *data, + size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_buf_setstr(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *s) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool readonly, + bool *didworkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_cell_pack_value_match(WT_CELL *page_cell, WT_CELL *val_cell, + const uint8_t *val_data, bool *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, + WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *end) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, + WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size, + uint64_t *recnop, u_int skipdepth, bool exclusive) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator, + const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_compare_skip(WT_SESSION_IMPL *session, WT_COLLATOR *collator, + const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp, size_t *matchp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_curindex_get_valuev(WT_CURSOR *cursor, va_list ap) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_curtable_get_valuev(WT_CURSOR *cursor, va_list ap) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_dsk_cell_data_ref(WT_SESSION_IMPL *session, int page_type, + WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_extlist_read_pair(const uint8_t **p, wt_off_t *offp, wt_off_t *sizep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_extlist_write_pair(uint8_t **p, wt_off_t off, wt_off_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fclose(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fextend(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fflush(WT_SESSION_IMPL *session, WT_FSTREAM *fstr) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_file_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fs_directory_list( + WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fs_directory_list_free(WT_SESSION_IMPL *session, char ***dirlistp, + u_int count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fs_directory_list_single( + WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to, + bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, + WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size, + u_int skipdepth, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_lex_compare_short(const WT_ITEM *user_item, const WT_ITEM *tree_item) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_lex_compare_skip(const WT_ITEM *user_item, const WT_ITEM *tree_item, + size_t *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_log_cmp(WT_LSN *lsn1, WT_LSN *lsn2) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_page_cell_data_ref(WT_SESSION_IMPL *session, WT_PAGE *page, + WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_page_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_page_parent_modify_set(WT_SESSION_IMPL *session, WT_REF *ref, bool page_only) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline int __wt_page_swap_func( - WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags + WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags #ifdef HAVE_DIAGNOSTIC - , const char *func, int line + , + const char *func, int line #endif - ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_read( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *data, size_t size, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, WT_REC_KV *val) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_row_leaf_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_snprintf(char *buf, size_t size, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_snprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 4, 5))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_snprintf_len_set( char *buf, size_t size, size_t *retsizep, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 4, 5))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_spin_trylock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_strdup(WT_SESSION_IMPL *session, const char *str, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_struct_packv(WT_SESSION_IMPL *session, void *buffer, size_t size, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_struct_sizev( WT_SESSION_IMPL *session, size_t *sizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_struct_unpackv(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_sync_and_rename(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp, const char *from, const char *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_activity_check(WT_SESSION_IMPL *session, bool *txn_active) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_autocommit_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_context_prepare_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_id_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_idle_cache_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_modify_page_delete(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_op_set_key(WT_SESSION_IMPL *session, const WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_resolve_prepared_op( WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, int64_t *resolved_update_countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_search_check(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_vfprintf( WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_vpack_uint(uint8_t **pp, size_t maxlen, uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_vsnprintf(char *buf, size_t size, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_vsnprintf_len_set( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_vunpack_posint(const uint8_t **pp, size_t maxlen, uint64_t *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_vunpack_uint(const uint8_t **pp, size_t maxlen, uint64_t *xp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline int __wt_write(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, const void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline size_t __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, uint64_t recno, wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline size_t __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline size_t __wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline size_t __wt_cell_pack_int_key(WT_CELL *cell, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline size_t __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline size_t __wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline size_t __wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline size_t __wt_cell_total_len(WT_CELL_UNPACK *unpack) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline size_t __wt_strnlen(const char *s, size_t maxlen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline size_t __wt_update_list_memsize(WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); + ) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_read(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, + void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, + const void *data, size_t size, wt_timestamp_t start_ts, uint64_t start_txn, + wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, + wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, + uint64_t rle, WT_REC_KV *val) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_row_leaf_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, + WT_ITEM *key, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_snprintf(char *buf, size_t size, const char *fmt, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 3, 4))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_snprintf_len_incr(char *buf, size_t size, size_t *retsizep, const char *fmt, + ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 4, 5))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_snprintf_len_set(char *buf, size_t size, size_t *retsizep, const char *fmt, + ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 4, 5))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_spin_trylock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_strdup(WT_SESSION_IMPL *session, const char *str, void *retp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_struct_packv(WT_SESSION_IMPL *session, void *buffer, size_t size, + const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_struct_sizev(WT_SESSION_IMPL *session, size_t *sizep, const char *fmt, + va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_struct_unpackv(WT_SESSION_IMPL *session, const void *buffer, size_t size, + const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_sync_and_rename(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp, + const char *from, const char *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_activity_check(WT_SESSION_IMPL *session, bool *txn_active) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_autocommit_check(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_context_prepare_check(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_id_check(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_idle_cache_check(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_modify_page_delete(WT_SESSION_IMPL *session, WT_REF *ref) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_op_set_key(WT_SESSION_IMPL *session, const WT_ITEM *key) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, + int64_t *resolved_update_countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_search_check(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE **srch_upd, + WT_UPDATE **updp, size_t upd_size, bool exclusive) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_vfprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, + va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_vpack_uint(uint8_t **pp, size_t maxlen, uint64_t x) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_vsnprintf(char *buf, size_t size, const char *fmt, va_list ap) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_vsnprintf_len_set(char *buf, size_t size, size_t *retsizep, const char *fmt, + va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_vunpack_posint(const uint8_t **pp, size_t maxlen, uint64_t *retp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_vunpack_uint(const uint8_t **pp, size_t maxlen, uint64_t *xp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline int __wt_write(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, + const void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline size_t __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, + uint64_t recno, wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, + uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, size_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline size_t __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, + wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, + uint64_t rle, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline size_t __wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, + wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, + uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline size_t __wt_cell_pack_int_key(WT_CELL *cell, size_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline size_t __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline size_t __wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, + wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, + uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline size_t __wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, + wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, + uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline size_t __wt_cell_total_len(WT_CELL_UNPACK *unpack) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline size_t __wt_strnlen(const char *s, size_t maxlen) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline size_t __wt_update_list_memsize(WT_UPDATE *upd) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline size_t __wt_vsize_int(int64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline size_t __wt_vsize_negint(uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline size_t __wt_vsize_posint(uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -1050,32 +2038,54 @@ static inline size_t __wt_vsize_uint(uint64_t x) WT_GCC_FUNC_DECL_ATTRIBUTE((war static inline u_char __wt_hex(int c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline u_char __wt_tolower(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline u_int __wt_cell_type(WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline u_int __wt_cell_type_raw(WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline u_int __wt_skip_choose_depth(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint32_t __wt_cache_lookaside_score(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_btree_bytes_evictable(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_btree_bytes_inuse(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_btree_dirty_inuse(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_btree_dirty_leaf_inuse(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_cache_bytes_image(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_cache_bytes_inuse(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_cache_bytes_other(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_cache_bytes_plus_overhead(WT_CACHE *cache, uint64_t sz) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_cache_dirty_inuse(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_cache_dirty_leaf_inuse(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_cache_pages_inuse(WT_CACHE *cache) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_cache_read_gen(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_cell_rle(WT_CELL_UNPACK *unpack) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_clock(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline u_int __wt_cell_type_raw(WT_CELL *cell) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline u_int __wt_skip_choose_depth(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint32_t __wt_cache_lookaside_score(WT_CACHE *cache) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_btree_bytes_evictable(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_btree_bytes_inuse(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_btree_dirty_inuse(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_btree_dirty_leaf_inuse(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_cache_bytes_image(WT_CACHE *cache) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_cache_bytes_inuse(WT_CACHE *cache) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_cache_bytes_other(WT_CACHE *cache) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_cache_bytes_plus_overhead(WT_CACHE *cache, uint64_t sz) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_cache_dirty_inuse(WT_CACHE *cache) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_cache_dirty_leaf_inuse(WT_CACHE *cache) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_cache_pages_inuse(WT_CACHE *cache) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_cache_read_gen(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_cell_rle(WT_CELL_UNPACK *unpack) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_clock(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline uint64_t __wt_rdtsc(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline uint64_t __wt_txn_oldest_id(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline uint64_t __wt_txn_oldest_id(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline void __wt_buf_free(WT_SESSION_IMPL *session, WT_ITEM *buf); -static inline void __wt_cache_decr_check_size( WT_SESSION_IMPL *session, size_t *vp, size_t v, const char *fld); -static inline void __wt_cache_decr_check_uint64( WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld); +static inline void __wt_cache_decr_check_size( + WT_SESSION_IMPL *session, size_t *vp, size_t v, const char *fld); +static inline void __wt_cache_decr_check_uint64( + WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld); static inline void __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page); static inline void __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page); -static inline void __wt_cache_page_byte_dirty_decr( WT_SESSION_IMPL *session, WT_PAGE *page, size_t size); +static inline void __wt_cache_page_byte_dirty_decr( + WT_SESSION_IMPL *session, WT_PAGE *page, size_t size); static inline void __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page); static inline void __wt_cache_page_image_decr(WT_SESSION_IMPL *session, uint32_t size); static inline void __wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size); @@ -1084,31 +2094,46 @@ static inline void __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE static inline void __wt_cache_read_gen_bump(WT_SESSION_IMPL *session, WT_PAGE *page); static inline void __wt_cache_read_gen_incr(WT_SESSION_IMPL *session); static inline void __wt_cache_read_gen_new(WT_SESSION_IMPL *session, WT_PAGE *page); -static inline void __wt_cache_update_lookaside_score( WT_SESSION_IMPL *session, u_int updates_seen, u_int updates_unstable); -static inline void __wt_cell_type_reset( WT_SESSION_IMPL *session, WT_CELL *cell, u_int old_type, u_int new_type); -static inline void __wt_cell_unpack(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack); -static inline void __wt_cell_unpack_dsk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack); -static inline void __wt_check_addr_validity(WT_SESSION_IMPL *session, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn); -static inline void __wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *)); +static inline void __wt_cache_update_lookaside_score( + WT_SESSION_IMPL *session, u_int updates_seen, u_int updates_unstable); +static inline void __wt_cell_type_reset( + WT_SESSION_IMPL *session, WT_CELL *cell, u_int old_type, u_int new_type); +static inline void __wt_cell_unpack( + WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack); +static inline void __wt_cell_unpack_dsk( + WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack); +static inline void __wt_check_addr_validity(WT_SESSION_IMPL *session, + wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, + uint64_t newest_stop_txn); +static inline void __wt_cond_wait( + WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *)); static inline void __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session); static inline void __wt_cursor_dhandle_incr_use(WT_SESSION_IMPL *session); static inline void __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref); static inline void __wt_page_modify_clear(WT_SESSION_IMPL *session, WT_PAGE *page); static inline void __wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page); static inline void __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page); -static inline void __wt_rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *newest_durable_ts, wt_timestamp_t *oldest_start_tsp, uint64_t *oldest_start_txnp, wt_timestamp_t *newest_stop_tsp, uint64_t *newest_stop_txnp); -static inline void __wt_rec_addr_ts_update(WT_RECONCILE *r, wt_timestamp_t newest_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn); -static inline void __wt_rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ADDR *addr, bool proxy_cell, uint64_t recno); +static inline void __wt_rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *newest_durable_ts, + wt_timestamp_t *oldest_start_tsp, uint64_t *oldest_start_txnp, wt_timestamp_t *newest_stop_tsp, + uint64_t *newest_stop_txnp); +static inline void __wt_rec_addr_ts_update(WT_RECONCILE *r, wt_timestamp_t newest_durable_ts, + wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, + uint64_t newest_stop_txn); +static inline void __wt_rec_cell_build_addr( + WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ADDR *addr, bool proxy_cell, uint64_t recno); static inline void __wt_rec_image_copy(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv); -static inline void __wt_rec_incr( WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size); +static inline void __wt_rec_incr( + WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size); static inline void __wt_ref_addr_free(WT_SESSION_IMPL *session, WT_REF *ref); -static inline void __wt_ref_info(WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep); +static inline void __wt_ref_info( + WT_SESSION_IMPL *session, WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep); static inline void __wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep); static inline void __wt_ref_key_clear(WT_REF *ref); static inline void __wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK *unpack); static inline void __wt_row_leaf_key_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack); static inline void __wt_row_leaf_key_set_cell(WT_PAGE *page, WT_ROW *rip, WT_CELL *cell); -static inline void __wt_row_leaf_value_cell(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack); +static inline void __wt_row_leaf_value_cell(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, + WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack); static inline void __wt_row_leaf_value_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack); static inline void __wt_scr_free(WT_SESSION_IMPL *session, WT_ITEM **bufp); static inline void __wt_spin_backoff(uint64_t *yield_count, uint64_t *sleep_usecs); @@ -1120,8 +2145,10 @@ static inline void __wt_struct_size_adjust(WT_SESSION_IMPL *session, size_t *siz static inline void __wt_timing_stress(WT_SESSION_IMPL *session, u_int flag); static inline void __wt_tree_modify_set(WT_SESSION_IMPL *session); static inline void __wt_txn_cursor_op(WT_SESSION_IMPL *session); -static inline void __wt_txn_op_apply_prepare_state( WT_SESSION_IMPL *session, WT_REF *ref, bool commit); -static inline void __wt_txn_op_delete_commit_apply_timestamps( WT_SESSION_IMPL *session, WT_REF *ref); +static inline void __wt_txn_op_apply_prepare_state( + WT_SESSION_IMPL *session, WT_REF *ref, bool commit); +static inline void __wt_txn_op_delete_commit_apply_timestamps( + WT_SESSION_IMPL *session, WT_REF *ref); static inline void __wt_txn_op_set_recno(WT_SESSION_IMPL *session, uint64_t recno); static inline void __wt_txn_op_set_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op); static inline void __wt_txn_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *pinned_tsp); diff --git a/src/third_party/wiredtiger/src/include/extern_posix.h b/src/third_party/wiredtiger/src/include/extern_posix.h index 8186d59bc67..189bc948714 100644 --- a/src/third_party/wiredtiger/src/include/extern_posix.h +++ b/src/third_party/wiredtiger/src/include/extern_posix.h @@ -1,34 +1,61 @@ extern bool __wt_absolute_path(const char *path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, + void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_localtime(WT_SESSION_IMPL *session, const time_t *timep, struct tm *result) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_localtime(WT_SESSION_IMPL *session, const time_t *timep, struct tm *result) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_os_posix(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_posix_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_posix_file_extend( WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_str(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, + const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, + char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, + const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_file_extend(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, + wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, + size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, + size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, + size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, + size_t len, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, + WT_THREAD_CALLBACK (*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid) WT_GCC_FUNC_DECL_ATTRIBUTE( + (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_str(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE( + (visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_vsnprintf_len_incr(char *buf, size_t size, size_t *retsizep, const char *fmt, + va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern uintmax_t __wt_process_id(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); -extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); +extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, + bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp); -extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_stream_set_line_buffer(FILE *fp) + WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_thread_id(uintmax_t *id) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); diff --git a/src/third_party/wiredtiger/src/include/extern_win.h b/src/third_party/wiredtiger/src/include/extern_win.h index 82ee283a213..116fe412dd9 100644 --- a/src/third_party/wiredtiger/src/include/extern_win.h +++ b/src/third_party/wiredtiger/src/include/extern_win.h @@ -1,34 +1,60 @@ +extern BOOL CALLBACK __wt_init_once_callback( + _Inout_ PINIT_ONCE InitOnce, _Inout_opt_ PVOID Parameter, _Out_opt_ PVOID *Context) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern DWORD __wt_getlasterror(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_absolute_path(const char *path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern const char * __wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, + void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_localtime(WT_SESSION_IMPL *session, const time_t *timep, struct tm *result) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_map_windows_error(DWORD windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_localtime(WT_SESSION_IMPL *session, const time_t *timep, struct tm *result) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_map_windows_error(DWORD windows_error) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_os_win(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_str(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char *utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t *wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_win_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_win_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_win_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, + WT_THREAD_CALLBACK (*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_str(char *buf, size_t buflen) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_to_utf16_string(WT_SESSION_IMPL *session, const char *utf8, WT_ITEM **outbuf) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_to_utf8_string(WT_SESSION_IMPL *session, const wchar_t *wide, WT_ITEM **outbuf) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_vsnprintf_len_incr(char *buf, size_t size, size_t *retsizep, const char *fmt, + va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, + const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_win_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, + char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_win_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, + const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_win_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, + wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp, + size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region, + size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern uintmax_t __wt_process_id(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); -extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); +extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, + bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); extern void __wt_stream_set_line_buffer(FILE *fp); diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h index 7ee64cb663f..052fb35d3a7 100644 --- a/src/third_party/wiredtiger/src/include/gcc.h +++ b/src/third_party/wiredtiger/src/include/gcc.h @@ -6,23 +6,23 @@ * See the file LICENSE for redistribution information. */ -#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */ -#define WT_SIZET_FMT "zu" /* size_t format string */ +#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */ +#define WT_SIZET_FMT "zu" /* size_t format string */ /* GCC-specific attributes. */ -#define WT_PACKED_STRUCT_BEGIN(name) \ - /* NOLINTNEXTLINE(misc-macro-parentheses) */ \ - struct __attribute__ ((__packed__)) name { -#define WT_PACKED_STRUCT_END \ - }; +#define WT_PACKED_STRUCT_BEGIN(name) \ + /* NOLINTNEXTLINE(misc-macro-parentheses) */ \ + struct __attribute__((__packed__)) name { +#define WT_PACKED_STRUCT_END \ + } \ + ; /* - * Attribute are only permitted on function declarations, not definitions. - * This macro is a marker for function definitions that is rewritten by - * dist/s_prototypes to create extern.h. + * Attribute are only permitted on function declarations, not definitions. This macro is a marker + * for function definitions that is rewritten by dist/s_prototypes to create extern.h. */ -#define WT_GCC_FUNC_ATTRIBUTE(x) -#define WT_GCC_FUNC_DECL_ATTRIBUTE(x) __attribute__(x) +#define WT_GCC_FUNC_ATTRIBUTE(x) +#define WT_GCC_FUNC_DECL_ATTRIBUTE(x) __attribute__(x) /* * Atomic writes: @@ -90,67 +90,56 @@ */ /* - * We've hit optimization bugs with Clang 3.5 in the past when using the atomic - * builtins. See http://llvm.org/bugs/show_bug.cgi?id=21499 for details. + * We've hit optimization bugs with Clang 3.5 in the past when using the atomic builtins. See + * http://llvm.org/bugs/show_bug.cgi?id=21499 for details. */ -#if defined(__clang__) && \ - defined(__clang_major__) && defined(__clang_minor__) && \ - (((__clang_major__ == 3) && (__clang_minor__ <= 5)) || \ - (__clang_major__ < 3)) +#if defined(__clang__) && defined(__clang_major__) && defined(__clang_minor__) && \ + (((__clang_major__ == 3) && (__clang_minor__ <= 5)) || (__clang_major__ < 3)) #error "Clang versions 3.5 and earlier are unsupported by WiredTiger" #endif -#define WT_ATOMIC_CAS(ptr, oldp, new) \ - __atomic_compare_exchange_n( \ - ptr, oldp, new, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) -#define WT_ATOMIC_CAS_FUNC(name, vp_arg, old_arg, new_arg) \ -static inline bool \ -__wt_atomic_cas##name(vp_arg, old_arg, new_arg) \ -{ \ - return (WT_ATOMIC_CAS(vp, &old, new)); \ -} +#define WT_ATOMIC_CAS(ptr, oldp, new) \ + __atomic_compare_exchange_n(ptr, oldp, new, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) +#define WT_ATOMIC_CAS_FUNC(name, vp_arg, old_arg, new_arg) \ + static inline bool __wt_atomic_cas##name(vp_arg, old_arg, new_arg) \ + { \ + return (WT_ATOMIC_CAS(vp, &old, new)); \ + } WT_ATOMIC_CAS_FUNC(8, uint8_t *vp, uint8_t old, uint8_t new) WT_ATOMIC_CAS_FUNC(16, uint16_t *vp, uint16_t old, uint16_t new) WT_ATOMIC_CAS_FUNC(32, uint32_t *vp, uint32_t old, uint32_t new) -WT_ATOMIC_CAS_FUNC(v32, \ - volatile uint32_t *vp, uint32_t old, volatile uint32_t new) +WT_ATOMIC_CAS_FUNC(v32, volatile uint32_t *vp, uint32_t old, volatile uint32_t new) WT_ATOMIC_CAS_FUNC(i32, int32_t *vp, int32_t old, int32_t new) -WT_ATOMIC_CAS_FUNC(iv32, \ - volatile int32_t *vp, int32_t old, volatile int32_t new) +WT_ATOMIC_CAS_FUNC(iv32, volatile int32_t *vp, int32_t old, volatile int32_t new) WT_ATOMIC_CAS_FUNC(64, uint64_t *vp, uint64_t old, uint64_t new) -WT_ATOMIC_CAS_FUNC(v64, \ - volatile uint64_t *vp, uint64_t old, volatile uint64_t new) +WT_ATOMIC_CAS_FUNC(v64, volatile uint64_t *vp, uint64_t old, volatile uint64_t new) WT_ATOMIC_CAS_FUNC(i64, int64_t *vp, int64_t old, int64_t new) -WT_ATOMIC_CAS_FUNC(iv64, \ - volatile int64_t *vp, int64_t old, volatile int64_t new) +WT_ATOMIC_CAS_FUNC(iv64, volatile int64_t *vp, int64_t old, volatile int64_t new) WT_ATOMIC_CAS_FUNC(size, size_t *vp, size_t old, size_t new) /* * __wt_atomic_cas_ptr -- - * Pointer compare and swap. + * Pointer compare and swap. */ static inline bool __wt_atomic_cas_ptr(void *vp, void *old, void *new) { - return (WT_ATOMIC_CAS((void **)vp, &old, new)); + return (WT_ATOMIC_CAS((void **)vp, &old, new)); } -#define WT_ATOMIC_FUNC(name, ret, vp_arg, v_arg) \ -static inline ret \ -__wt_atomic_add##name(vp_arg, v_arg) \ -{ \ - return (__atomic_add_fetch(vp, v, __ATOMIC_SEQ_CST)); \ -} \ -static inline ret \ -__wt_atomic_fetch_add##name(vp_arg, v_arg) \ -{ \ - return (__atomic_fetch_add(vp, v, __ATOMIC_SEQ_CST)); \ -} \ -static inline ret \ -__wt_atomic_sub##name(vp_arg, v_arg) \ -{ \ - return (__atomic_sub_fetch(vp, v, __ATOMIC_SEQ_CST)); \ -} +#define WT_ATOMIC_FUNC(name, ret, vp_arg, v_arg) \ + static inline ret __wt_atomic_add##name(vp_arg, v_arg) \ + { \ + return (__atomic_add_fetch(vp, v, __ATOMIC_SEQ_CST)); \ + } \ + static inline ret __wt_atomic_fetch_add##name(vp_arg, v_arg) \ + { \ + return (__atomic_fetch_add(vp, v, __ATOMIC_SEQ_CST)); \ + } \ + static inline ret __wt_atomic_sub##name(vp_arg, v_arg) \ + { \ + return (__atomic_sub_fetch(vp, v, __ATOMIC_SEQ_CST)); \ + } WT_ATOMIC_FUNC(8, uint8_t, uint8_t *vp, uint8_t v) WT_ATOMIC_FUNC(16, uint16_t, uint16_t *vp, uint16_t v) WT_ATOMIC_FUNC(32, uint32_t, uint32_t *vp, uint32_t v) @@ -164,83 +153,97 @@ WT_ATOMIC_FUNC(iv64, int64_t, volatile int64_t *vp, volatile int64_t v) WT_ATOMIC_FUNC(size, size_t, size_t *vp, size_t v) /* Compile read-write barrier */ -#define WT_BARRIER() __asm__ volatile("" ::: "memory") +#define WT_BARRIER() __asm__ volatile("" ::: "memory") #if defined(x86_64) || defined(__x86_64__) /* Pause instruction to prevent excess processor bus usage */ -#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") -#define WT_FULL_BARRIER() do { \ - __asm__ volatile ("mfence" ::: "memory"); \ -} while (0) -#define WT_READ_BARRIER() do { \ - __asm__ volatile ("lfence" ::: "memory"); \ -} while (0) -#define WT_WRITE_BARRIER() do { \ - __asm__ volatile ("sfence" ::: "memory"); \ -} while (0) +#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") +#define WT_FULL_BARRIER() \ + do { \ + __asm__ volatile("mfence" ::: "memory"); \ + } while (0) +#define WT_READ_BARRIER() \ + do { \ + __asm__ volatile("lfence" ::: "memory"); \ + } while (0) +#define WT_WRITE_BARRIER() \ + do { \ + __asm__ volatile("sfence" ::: "memory"); \ + } while (0) #elif defined(i386) || defined(__i386__) -#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") -#define WT_FULL_BARRIER() do { \ - __asm__ volatile ("lock; addl $0, 0(%%esp)" ::: "memory"); \ -} while (0) -#define WT_READ_BARRIER() WT_FULL_BARRIER() -#define WT_WRITE_BARRIER() WT_FULL_BARRIER() +#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") +#define WT_FULL_BARRIER() \ + do { \ + __asm__ volatile("lock; addl $0, 0(%%esp)" ::: "memory"); \ + } while (0) +#define WT_READ_BARRIER() WT_FULL_BARRIER() +#define WT_WRITE_BARRIER() WT_FULL_BARRIER() #elif defined(__PPC64__) || defined(PPC64) /* ori 0,0,0 is the PPC64 noop instruction */ -#define WT_PAUSE() __asm__ volatile("ori 0,0,0" ::: "memory") -#define WT_FULL_BARRIER() do { \ - __asm__ volatile ("sync" ::: "memory"); \ -} while (0) +#define WT_PAUSE() __asm__ volatile("ori 0,0,0" ::: "memory") +#define WT_FULL_BARRIER() \ + do { \ + __asm__ volatile("sync" ::: "memory"); \ + } while (0) /* TODO: ISA 2.07 Elemental Memory Barriers would be better, specifically mbll, and mbss, but they are not supported by POWER 8 */ -#define WT_READ_BARRIER() do { \ - __asm__ volatile ("lwsync" ::: "memory"); \ -} while (0) -#define WT_WRITE_BARRIER() do { \ - __asm__ volatile ("lwsync" ::: "memory"); \ -} while (0) +#define WT_READ_BARRIER() \ + do { \ + __asm__ volatile("lwsync" ::: "memory"); \ + } while (0) +#define WT_WRITE_BARRIER() \ + do { \ + __asm__ volatile("lwsync" ::: "memory"); \ + } while (0) #elif defined(__aarch64__) -#define WT_PAUSE() __asm__ volatile("yield" ::: "memory") -#define WT_FULL_BARRIER() do { \ - __asm__ volatile ("dsb sy" ::: "memory"); \ -} while (0) -#define WT_READ_BARRIER() do { \ - __asm__ volatile ("dsb ld" ::: "memory"); \ -} while (0) -#define WT_WRITE_BARRIER() do { \ - __asm__ volatile ("dsb st" ::: "memory"); \ -} while (0) +#define WT_PAUSE() __asm__ volatile("yield" ::: "memory") +#define WT_FULL_BARRIER() \ + do { \ + __asm__ volatile("dsb sy" ::: "memory"); \ + } while (0) +#define WT_READ_BARRIER() \ + do { \ + __asm__ volatile("dsb ld" ::: "memory"); \ + } while (0) +#define WT_WRITE_BARRIER() \ + do { \ + __asm__ volatile("dsb st" ::: "memory"); \ + } while (0) #elif defined(__s390x__) -#define WT_PAUSE() __asm__ volatile("lr 0,0" ::: "memory") -#define WT_FULL_BARRIER() do { \ - __asm__ volatile ("bcr 15,0\n" ::: "memory"); \ -} while (0) -#define WT_READ_BARRIER() WT_FULL_BARRIER() -#define WT_WRITE_BARRIER() WT_FULL_BARRIER() +#define WT_PAUSE() __asm__ volatile("lr 0,0" ::: "memory") +#define WT_FULL_BARRIER() \ + do { \ + __asm__ volatile("bcr 15,0\n" ::: "memory"); \ + } while (0) +#define WT_READ_BARRIER() WT_FULL_BARRIER() +#define WT_WRITE_BARRIER() WT_FULL_BARRIER() #elif defined(__sparc__) -#define WT_PAUSE() __asm__ volatile("rd %%ccr, %%g0" ::: "memory") +#define WT_PAUSE() __asm__ volatile("rd %%ccr, %%g0" ::: "memory") -#define WT_FULL_BARRIER() do { \ - __asm__ volatile ("membar #StoreLoad" ::: "memory"); \ -} while (0) +#define WT_FULL_BARRIER() \ + do { \ + __asm__ volatile("membar #StoreLoad" ::: "memory"); \ + } while (0) /* - * On UltraSparc machines, TSO is used, and so there is no need for membar. - * READ_BARRIER = #LoadLoad, and WRITE_BARRIER = #StoreStore are noop. + * On UltraSparc machines, TSO is used, and so there is no need for membar. READ_BARRIER = + * #LoadLoad, and WRITE_BARRIER = #StoreStore are noop. */ -#define WT_READ_BARRIER() do { \ - __asm__ volatile ("" ::: "memory"); \ -} while (0) - -#define WT_WRITE_BARRIER() do { \ - __asm__ volatile ("" ::: "memory"); \ -} while (0) +#define WT_READ_BARRIER() \ + do { \ + __asm__ volatile("" ::: "memory"); \ + } while (0) + +#define WT_WRITE_BARRIER() \ + do { \ + __asm__ volatile("" ::: "memory"); \ + } while (0) #else #error "No write barrier implementation for this hardware" diff --git a/src/third_party/wiredtiger/src/include/hardware.h b/src/third_party/wiredtiger/src/include/hardware.h index c4e26569fe8..447d082393e 100644 --- a/src/third_party/wiredtiger/src/include/hardware.h +++ b/src/third_party/wiredtiger/src/include/hardware.h @@ -7,53 +7,54 @@ */ /* - * Publish a value to a shared location. All previous stores must complete - * before the value is made public. + * Publish a value to a shared location. All previous stores must complete before the value is made + * public. */ -#define WT_PUBLISH(v, val) do { \ - WT_WRITE_BARRIER(); \ - (v) = (val); \ -} while (0) +#define WT_PUBLISH(v, val) \ + do { \ + WT_WRITE_BARRIER(); \ + (v) = (val); \ + } while (0) /* - * Read a shared location and guarantee that subsequent reads do not see any - * earlier state. + * Read a shared location and guarantee that subsequent reads do not see any earlier state. */ -#define WT_ORDERED_READ(v, val) do { \ - (v) = (val); \ - WT_READ_BARRIER(); \ -} while (0) +#define WT_ORDERED_READ(v, val) \ + do { \ + (v) = (val); \ + WT_READ_BARRIER(); \ + } while (0) /* * Atomic versions of the flag set/clear macros. */ -#define F_ISSET_ATOMIC(p, mask) ((p)->flags_atomic & (uint8_t)(mask)) +#define F_ISSET_ATOMIC(p, mask) ((p)->flags_atomic & (uint8_t)(mask)) -#define F_SET_ATOMIC(p, mask) do { \ - uint8_t __orig; \ - do { \ - __orig = (p)->flags_atomic; \ - } while (!__wt_atomic_cas8( \ - &(p)->flags_atomic, __orig, __orig | (uint8_t)(mask))); \ -} while (0) +#define F_SET_ATOMIC(p, mask) \ + do { \ + uint8_t __orig; \ + do { \ + __orig = (p)->flags_atomic; \ + } while (!__wt_atomic_cas8(&(p)->flags_atomic, __orig, __orig | (uint8_t)(mask))); \ + } while (0) -#define F_CLR_ATOMIC(p, mask) do { \ - uint8_t __orig; \ - do { \ - __orig = (p)->flags_atomic; \ - } while (!__wt_atomic_cas8( \ - &(p)->flags_atomic, __orig, __orig & ~(uint8_t)(mask))); \ -} while (0) +#define F_CLR_ATOMIC(p, mask) \ + do { \ + uint8_t __orig; \ + do { \ + __orig = (p)->flags_atomic; \ + } while (!__wt_atomic_cas8(&(p)->flags_atomic, __orig, __orig & ~(uint8_t)(mask))); \ + } while (0) /* * Cache line alignment. */ #if defined(__PPC64__) || defined(PPC64) -#define WT_CACHE_LINE_ALIGNMENT 128 +#define WT_CACHE_LINE_ALIGNMENT 128 #elif defined(__s390x__) -#define WT_CACHE_LINE_ALIGNMENT 256 +#define WT_CACHE_LINE_ALIGNMENT 256 #else -#define WT_CACHE_LINE_ALIGNMENT 64 +#define WT_CACHE_LINE_ALIGNMENT 64 #endif /* @@ -69,5 +70,12 @@ * anonymous union here which is supported under C11, earlier versions of * the GNU standard, and MSVC versions as early as 2003. */ -#define WT_CACHE_LINE_PAD_BEGIN union { struct { -#define WT_CACHE_LINE_PAD_END }; char __padding[WT_CACHE_LINE_ALIGNMENT]; }; +#define WT_CACHE_LINE_PAD_BEGIN \ + union { \ + struct { +#define WT_CACHE_LINE_PAD_END \ + } \ + ; \ + char __padding[WT_CACHE_LINE_ALIGNMENT]; \ + } \ + ; diff --git a/src/third_party/wiredtiger/src/include/intpack.i b/src/third_party/wiredtiger/src/include/intpack.i index aef94460262..e22adcd913a 100644 --- a/src/third_party/wiredtiger/src/include/intpack.i +++ b/src/third_party/wiredtiger/src/include/intpack.i @@ -31,353 +31,351 @@ * [11 11xxxx] | free | N/A | N/A */ -#define NEG_MULTI_MARKER (uint8_t)0x10 -#define NEG_2BYTE_MARKER (uint8_t)0x20 -#define NEG_1BYTE_MARKER (uint8_t)0x40 -#define POS_1BYTE_MARKER (uint8_t)0x80 -#define POS_2BYTE_MARKER (uint8_t)0xc0 -#define POS_MULTI_MARKER (uint8_t)0xe0 - -#define NEG_1BYTE_MIN (-(1 << 6)) -#define NEG_2BYTE_MIN (-(1 << 13) + NEG_1BYTE_MIN) -#define POS_1BYTE_MAX ((1 << 6) - 1) -#define POS_2BYTE_MAX ((1 << 13) + POS_1BYTE_MAX) +#define NEG_MULTI_MARKER (uint8_t)0x10 +#define NEG_2BYTE_MARKER (uint8_t)0x20 +#define NEG_1BYTE_MARKER (uint8_t)0x40 +#define POS_1BYTE_MARKER (uint8_t)0x80 +#define POS_2BYTE_MARKER (uint8_t)0xc0 +#define POS_MULTI_MARKER (uint8_t)0xe0 + +#define NEG_1BYTE_MIN (-(1 << 6)) +#define NEG_2BYTE_MIN (-(1 << 13) + NEG_1BYTE_MIN) +#define POS_1BYTE_MAX ((1 << 6) - 1) +#define POS_2BYTE_MAX ((1 << 13) + POS_1BYTE_MAX) /* Extract bits <start> to <end> from a value (counting from LSB == 0). */ -#define GET_BITS(x, start, end) \ - (((uint64_t)(x) & ((1U << (start)) - 1U)) >> (end)) +#define GET_BITS(x, start, end) (((uint64_t)(x) & ((1U << (start)) - 1U)) >> (end)) /* - * Size checks: return ENOMEM if not enough room when writing, EINVAL if the - * length is wrong when reading (presumably the value is corrupted). + * Size checks: return ENOMEM if not enough room when writing, EINVAL if the length is wrong when + * reading (presumably the value is corrupted). */ -#define WT_SIZE_CHECK_PACK(l, maxl) \ - WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), ENOMEM) -#define WT_SIZE_CHECK_UNPACK(l, maxl) \ - WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), EINVAL) +#define WT_SIZE_CHECK_PACK(l, maxl) WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), ENOMEM) +#define WT_SIZE_CHECK_UNPACK(l, maxl) WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), EINVAL) /* Count the leading zero bytes. */ #if defined(__GNUC__) -#define WT_LEADING_ZEROS(x, i) \ - ((i) = ((x) == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3) +#define WT_LEADING_ZEROS(x, i) ((i) = ((x) == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3) #elif defined(_MSC_VER) -#define WT_LEADING_ZEROS(x, i) do { \ - if ((x) == 0) (i) = (int)sizeof(x); \ - else { \ - unsigned long __index; \ - _BitScanReverse64(&__index, x); \ - __index = 63 ^ __index; \ - (i) = (int)(__index >> 3); } \ - } while (0) +#define WT_LEADING_ZEROS(x, i) \ + do { \ + if ((x) == 0) \ + (i) = (int)sizeof(x); \ + else { \ + unsigned long __index; \ + _BitScanReverse64(&__index, x); \ + __index = 63 ^ __index; \ + (i) = (int)(__index >> 3); \ + } \ + } while (0) #else -#define WT_LEADING_ZEROS(x, i) do { \ - uint64_t __x = (x); \ - uint64_t __m = (uint64_t)0xff << 56; \ - for ((i) = 0; !(__x & __m) && (i) != 8; (i)++) \ - __m >>= 8; \ -} while (0) +#define WT_LEADING_ZEROS(x, i) \ + do { \ + uint64_t __x = (x); \ + uint64_t __m = (uint64_t)0xff << 56; \ + for ((i) = 0; !(__x & __m) && (i) != 8; (i)++) \ + __m >>= 8; \ + } while (0) #endif /* * __wt_vpack_posint -- - * Packs a positive variable-length integer in the specified location. + * Packs a positive variable-length integer in the specified location. */ static inline int __wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x) { - uint8_t *p; - int len, lz, shift; + uint8_t *p; + int len, lz, shift; - WT_LEADING_ZEROS(x, lz); - len = (int)sizeof(x) - lz; - WT_SIZE_CHECK_PACK(len + 1, maxlen); - p = *pp; + WT_LEADING_ZEROS(x, lz); + len = (int)sizeof(x) - lz; + WT_SIZE_CHECK_PACK(len + 1, maxlen); + p = *pp; - /* There are four bits we can use in the first byte. */ - *p++ |= (len & 0xf); + /* There are four bits we can use in the first byte. */ + *p++ |= (len & 0xf); - for (shift = (len - 1) << 3; len != 0; --len, shift -= 8) - *p++ = (uint8_t)(x >> shift); + for (shift = (len - 1) << 3; len != 0; --len, shift -= 8) + *p++ = (uint8_t)(x >> shift); - *pp = p; - return (0); + *pp = p; + return (0); } /* * __wt_vpack_negint -- - * Packs a negative variable-length integer in the specified location. + * Packs a negative variable-length integer in the specified location. */ static inline int __wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x) { - uint8_t *p; - int len, lz, shift; - - WT_LEADING_ZEROS(~x, lz); - len = (int)sizeof(x) - lz; - WT_SIZE_CHECK_PACK(len + 1, maxlen); - p = *pp; - - /* - * There are four size bits we can use in the first byte. - * For negative numbers, we store the number of leading 0xff bytes - * to maintain ordering (if this is not obvious, it may help to - * remember that -1 is the largest negative number). - */ - *p++ |= (lz & 0xf); - - for (shift = (len - 1) << 3; len != 0; shift -= 8, --len) - *p++ = (uint8_t)(x >> shift); - - *pp = p; - return (0); + uint8_t *p; + int len, lz, shift; + + WT_LEADING_ZEROS(~x, lz); + len = (int)sizeof(x) - lz; + WT_SIZE_CHECK_PACK(len + 1, maxlen); + p = *pp; + + /* + * There are four size bits we can use in the first byte. For negative numbers, we store the + * number of leading 0xff bytes to maintain ordering (if this is not obvious, it may help to + * remember that -1 is the largest negative number). + */ + *p++ |= (lz & 0xf); + + for (shift = (len - 1) << 3; len != 0; shift -= 8, --len) + *p++ = (uint8_t)(x >> shift); + + *pp = p; + return (0); } /* * __wt_vunpack_posint -- - * Reads a variable-length positive integer from the specified location. + * Reads a variable-length positive integer from the specified location. */ static inline int __wt_vunpack_posint(const uint8_t **pp, size_t maxlen, uint64_t *retp) { - uint64_t x; - uint8_t len; - const uint8_t *p; + uint64_t x; + uint8_t len; + const uint8_t *p; - /* There are four length bits in the first byte. */ - p = *pp; - len = (*p++ & 0xf); - WT_SIZE_CHECK_UNPACK(len + 1, maxlen); + /* There are four length bits in the first byte. */ + p = *pp; + len = (*p++ & 0xf); + WT_SIZE_CHECK_UNPACK(len + 1, maxlen); - for (x = 0; len != 0; --len) - x = (x << 8) | *p++; + for (x = 0; len != 0; --len) + x = (x << 8) | *p++; - *retp = x; - *pp = p; - return (0); + *retp = x; + *pp = p; + return (0); } /* * __wt_vunpack_negint -- - * Reads a variable-length negative integer from the specified location. + * Reads a variable-length negative integer from the specified location. */ static inline int __wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp) { - uint64_t x; - uint8_t len; - const uint8_t *p; + uint64_t x; + uint8_t len; + const uint8_t *p; - /* There are four length bits in the first byte. */ - p = *pp; - len = (int)sizeof(x) - (*p++ & 0xf); - WT_SIZE_CHECK_UNPACK(len + 1, maxlen); + /* There are four length bits in the first byte. */ + p = *pp; + len = (int)sizeof(x) - (*p++ & 0xf); + WT_SIZE_CHECK_UNPACK(len + 1, maxlen); - for (x = UINT64_MAX; len != 0; --len) - x = (x << 8) | *p++; + for (x = UINT64_MAX; len != 0; --len) + x = (x << 8) | *p++; - *retp = x; - *pp = p; - return (0); + *retp = x; + *pp = p; + return (0); } /* * __wt_vpack_uint -- - * Variable-sized packing for unsigned integers + * Variable-sized packing for unsigned integers */ static inline int __wt_vpack_uint(uint8_t **pp, size_t maxlen, uint64_t x) { - uint8_t *p; - - WT_SIZE_CHECK_PACK(1, maxlen); - p = *pp; - if (x <= POS_1BYTE_MAX) - *p++ = POS_1BYTE_MARKER | GET_BITS(x, 6, 0); - else if (x <= POS_2BYTE_MAX) { - WT_SIZE_CHECK_PACK(2, maxlen); - x -= POS_1BYTE_MAX + 1; - *p++ = POS_2BYTE_MARKER | GET_BITS(x, 13, 8); - *p++ = GET_BITS(x, 8, 0); - } else if (x == POS_2BYTE_MAX + 1) { - /* - * This is a special case where we could store the value with - * just a single byte, but we append a zero byte so that the - * encoding doesn't get shorter for this one value. - */ - *p++ = POS_MULTI_MARKER | 0x1; - *p++ = 0; - } else { - x -= POS_2BYTE_MAX + 1; - *p = POS_MULTI_MARKER; - return (__wt_vpack_posint(pp, maxlen, x)); - } - - *pp = p; - return (0); + uint8_t *p; + + WT_SIZE_CHECK_PACK(1, maxlen); + p = *pp; + if (x <= POS_1BYTE_MAX) + *p++ = POS_1BYTE_MARKER | GET_BITS(x, 6, 0); + else if (x <= POS_2BYTE_MAX) { + WT_SIZE_CHECK_PACK(2, maxlen); + x -= POS_1BYTE_MAX + 1; + *p++ = POS_2BYTE_MARKER | GET_BITS(x, 13, 8); + *p++ = GET_BITS(x, 8, 0); + } else if (x == POS_2BYTE_MAX + 1) { + /* + * This is a special case where we could store the value with just a single byte, but we + * append a zero byte so that the encoding doesn't get shorter for this one value. + */ + *p++ = POS_MULTI_MARKER | 0x1; + *p++ = 0; + } else { + x -= POS_2BYTE_MAX + 1; + *p = POS_MULTI_MARKER; + return (__wt_vpack_posint(pp, maxlen, x)); + } + + *pp = p; + return (0); } /* * __wt_vpack_int -- - * Variable-sized packing for signed integers + * Variable-sized packing for signed integers */ static inline int __wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x) { - uint8_t *p; - - WT_SIZE_CHECK_PACK(1, maxlen); - p = *pp; - if (x < NEG_2BYTE_MIN) { - *p = NEG_MULTI_MARKER; - return (__wt_vpack_negint(pp, maxlen, (uint64_t)x)); - } - if (x < NEG_1BYTE_MIN) { - WT_SIZE_CHECK_PACK(2, maxlen); - x -= NEG_2BYTE_MIN; - *p++ = NEG_2BYTE_MARKER | GET_BITS(x, 13, 8); - *p++ = GET_BITS(x, 8, 0); - } else if (x < 0) { - x -= NEG_1BYTE_MIN; - *p++ = NEG_1BYTE_MARKER | GET_BITS(x, 6, 0); - } else - /* For non-negative values, use the unsigned code above. */ - return (__wt_vpack_uint(pp, maxlen, (uint64_t)x)); - - *pp = p; - return (0); + uint8_t *p; + + WT_SIZE_CHECK_PACK(1, maxlen); + p = *pp; + if (x < NEG_2BYTE_MIN) { + *p = NEG_MULTI_MARKER; + return (__wt_vpack_negint(pp, maxlen, (uint64_t)x)); + } + if (x < NEG_1BYTE_MIN) { + WT_SIZE_CHECK_PACK(2, maxlen); + x -= NEG_2BYTE_MIN; + *p++ = NEG_2BYTE_MARKER | GET_BITS(x, 13, 8); + *p++ = GET_BITS(x, 8, 0); + } else if (x < 0) { + x -= NEG_1BYTE_MIN; + *p++ = NEG_1BYTE_MARKER | GET_BITS(x, 6, 0); + } else + /* For non-negative values, use the unsigned code above. */ + return (__wt_vpack_uint(pp, maxlen, (uint64_t)x)); + + *pp = p; + return (0); } /* * __wt_vunpack_uint -- - * Variable-sized unpacking for unsigned integers + * Variable-sized unpacking for unsigned integers */ static inline int __wt_vunpack_uint(const uint8_t **pp, size_t maxlen, uint64_t *xp) { - const uint8_t *p; - - WT_SIZE_CHECK_UNPACK(1, maxlen); - p = *pp; - switch (*p & 0xf0) { - case POS_1BYTE_MARKER: - case POS_1BYTE_MARKER | 0x10: - case POS_1BYTE_MARKER | 0x20: - case POS_1BYTE_MARKER | 0x30: - *xp = GET_BITS(*p, 6, 0); - p += 1; - break; - case POS_2BYTE_MARKER: - case POS_2BYTE_MARKER | 0x10: - WT_SIZE_CHECK_UNPACK(2, maxlen); - *xp = GET_BITS(*p++, 5, 0) << 8; - *xp |= *p++; - *xp += POS_1BYTE_MAX + 1; - break; - case POS_MULTI_MARKER: - WT_RET(__wt_vunpack_posint(pp, maxlen, xp)); - *xp += POS_2BYTE_MAX + 1; - return (0); - default: - return (EINVAL); - } - - *pp = p; - return (0); + const uint8_t *p; + + WT_SIZE_CHECK_UNPACK(1, maxlen); + p = *pp; + switch (*p & 0xf0) { + case POS_1BYTE_MARKER: + case POS_1BYTE_MARKER | 0x10: + case POS_1BYTE_MARKER | 0x20: + case POS_1BYTE_MARKER | 0x30: + *xp = GET_BITS(*p, 6, 0); + p += 1; + break; + case POS_2BYTE_MARKER: + case POS_2BYTE_MARKER | 0x10: + WT_SIZE_CHECK_UNPACK(2, maxlen); + *xp = GET_BITS(*p++, 5, 0) << 8; + *xp |= *p++; + *xp += POS_1BYTE_MAX + 1; + break; + case POS_MULTI_MARKER: + WT_RET(__wt_vunpack_posint(pp, maxlen, xp)); + *xp += POS_2BYTE_MAX + 1; + return (0); + default: + return (EINVAL); + } + + *pp = p; + return (0); } /* * __wt_vunpack_int -- - * Variable-sized packing for signed integers + * Variable-sized packing for signed integers */ static inline int __wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp) { - const uint8_t *p; - - WT_SIZE_CHECK_UNPACK(1, maxlen); - p = *pp; - switch (*p & 0xf0) { - case NEG_MULTI_MARKER: - WT_RET(__wt_vunpack_negint(pp, maxlen, (uint64_t *)xp)); - return (0); - case NEG_2BYTE_MARKER: - case NEG_2BYTE_MARKER | 0x10: - WT_SIZE_CHECK_UNPACK(2, maxlen); - *xp = (int64_t)(GET_BITS(*p++, 5, 0) << 8); - *xp |= *p++; - *xp += NEG_2BYTE_MIN; - break; - case NEG_1BYTE_MARKER: - case NEG_1BYTE_MARKER | 0x10: - case NEG_1BYTE_MARKER | 0x20: - case NEG_1BYTE_MARKER | 0x30: - *xp = NEG_1BYTE_MIN + (int64_t)GET_BITS(*p, 6, 0); - p += 1; - break; - default: - /* Identical to the unsigned case. */ - return (__wt_vunpack_uint(pp, maxlen, (uint64_t *)xp)); - } - - *pp = p; - return (0); + const uint8_t *p; + + WT_SIZE_CHECK_UNPACK(1, maxlen); + p = *pp; + switch (*p & 0xf0) { + case NEG_MULTI_MARKER: + WT_RET(__wt_vunpack_negint(pp, maxlen, (uint64_t *)xp)); + return (0); + case NEG_2BYTE_MARKER: + case NEG_2BYTE_MARKER | 0x10: + WT_SIZE_CHECK_UNPACK(2, maxlen); + *xp = (int64_t)(GET_BITS(*p++, 5, 0) << 8); + *xp |= *p++; + *xp += NEG_2BYTE_MIN; + break; + case NEG_1BYTE_MARKER: + case NEG_1BYTE_MARKER | 0x10: + case NEG_1BYTE_MARKER | 0x20: + case NEG_1BYTE_MARKER | 0x30: + *xp = NEG_1BYTE_MIN + (int64_t)GET_BITS(*p, 6, 0); + p += 1; + break; + default: + /* Identical to the unsigned case. */ + return (__wt_vunpack_uint(pp, maxlen, (uint64_t *)xp)); + } + + *pp = p; + return (0); } /* * __wt_vsize_posint -- - * Return the packed size of a positive variable-length integer. + * Return the packed size of a positive variable-length integer. */ static inline size_t __wt_vsize_posint(uint64_t x) { - int lz; + int lz; - WT_LEADING_ZEROS(x, lz); - return ((size_t)(WT_INTPACK64_MAXSIZE - lz)); + WT_LEADING_ZEROS(x, lz); + return ((size_t)(WT_INTPACK64_MAXSIZE - lz)); } /* * __wt_vsize_negint -- - * Return the packed size of a negative variable-length integer. + * Return the packed size of a negative variable-length integer. */ static inline size_t __wt_vsize_negint(uint64_t x) { - int lz; + int lz; - WT_LEADING_ZEROS(~x, lz); - return (size_t)(WT_INTPACK64_MAXSIZE - lz); + WT_LEADING_ZEROS(~x, lz); + return (size_t)(WT_INTPACK64_MAXSIZE - lz); } /* * __wt_vsize_uint -- - * Return the packed size of an unsigned integer. + * Return the packed size of an unsigned integer. */ static inline size_t __wt_vsize_uint(uint64_t x) { - if (x <= POS_1BYTE_MAX) - return (1); - if (x <= POS_2BYTE_MAX + 1) - return (2); - x -= POS_2BYTE_MAX + 1; - return (__wt_vsize_posint(x)); + if (x <= POS_1BYTE_MAX) + return (1); + if (x <= POS_2BYTE_MAX + 1) + return (2); + x -= POS_2BYTE_MAX + 1; + return (__wt_vsize_posint(x)); } /* * __wt_vsize_int -- - * Return the packed size of a signed integer. + * Return the packed size of a signed integer. */ static inline size_t __wt_vsize_int(int64_t x) { - if (x < NEG_2BYTE_MIN) - return (__wt_vsize_negint((uint64_t)x)); - if (x < NEG_1BYTE_MIN) - return (2); - if (x < 0) - return (1); - /* For non-negative values, use the unsigned code above. */ - return (__wt_vsize_uint((uint64_t)x)); + if (x < NEG_2BYTE_MIN) + return (__wt_vsize_negint((uint64_t)x)); + if (x < NEG_1BYTE_MIN) + return (2); + if (x < 0) + return (1); + /* For non-negative values, use the unsigned code above. */ + return (__wt_vsize_uint((uint64_t)x)); } diff --git a/src/third_party/wiredtiger/src/include/lint.h b/src/third_party/wiredtiger/src/include/lint.h index 5d7cee531c2..b832a5af485 100644 --- a/src/third_party/wiredtiger/src/include/lint.h +++ b/src/third_party/wiredtiger/src/include/lint.h @@ -6,49 +6,45 @@ * See the file LICENSE for redistribution information. */ -#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */ -#define WT_SIZET_FMT "zu" /* size_t format string */ +#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */ +#define WT_SIZET_FMT "zu" /* size_t format string */ /* Lint-specific attributes. */ -#define WT_PACKED_STRUCT_BEGIN(name) \ - struct name { -#define WT_PACKED_STRUCT_END \ - }; +#define WT_PACKED_STRUCT_BEGIN(name) struct name { +#define WT_PACKED_STRUCT_END \ + } \ + ; -#define WT_GCC_FUNC_ATTRIBUTE(x) -#define WT_GCC_FUNC_DECL_ATTRIBUTE(x) +#define WT_GCC_FUNC_ATTRIBUTE(x) +#define WT_GCC_FUNC_DECL_ATTRIBUTE(x) -#define WT_ATOMIC_FUNC(name, ret, type) \ -static inline ret \ -__wt_atomic_add##name(type *vp, type v) \ -{ \ - *vp += v; \ - return (*vp); \ -} \ -static inline ret \ -__wt_atomic_fetch_add##name(type *vp, type v) \ -{ \ - type orig; \ - \ - orig = *vp; \ - *vp += v; \ - return (orig); \ -} \ -static inline ret \ -__wt_atomic_sub##name(type *vp, type v) \ -{ \ - *vp -= v; \ - return (*vp); \ -} \ -static inline bool \ -__wt_atomic_cas##name(type *vp, type orig, type new) \ -{ \ - if (*vp == orig) { \ - *vp = new; \ - return (true); \ - } \ - return (false); \ -} +#define WT_ATOMIC_FUNC(name, ret, type) \ + static inline ret __wt_atomic_add##name(type *vp, type v) \ + { \ + *vp += v; \ + return (*vp); \ + } \ + static inline ret __wt_atomic_fetch_add##name(type *vp, type v) \ + { \ + type orig; \ + \ + orig = *vp; \ + *vp += v; \ + return (orig); \ + } \ + static inline ret __wt_atomic_sub##name(type *vp, type v) \ + { \ + *vp -= v; \ + return (*vp); \ + } \ + static inline bool __wt_atomic_cas##name(type *vp, type orig, type new) \ + { \ + if (*vp == orig) { \ + *vp = new; \ + return (true); \ + } \ + return (false); \ + } WT_ATOMIC_FUNC(8, uint8_t, uint8_t) WT_ATOMIC_FUNC(16, uint16_t, uint16_t) @@ -64,19 +60,59 @@ WT_ATOMIC_FUNC(size, size_t, size_t) /* * __wt_atomic_cas_ptr -- - * Pointer compare and swap. + * Pointer compare and swap. */ static inline bool -__wt_atomic_cas_ptr(void *vp, void *orig, void *new) { - if (*(void **)vp == orig) { - *(void **)vp = new; - return (true); - } - return (false); +__wt_atomic_cas_ptr(void *vp, void *orig, void *new) +{ + if (*(void **)vp == orig) { + *(void **)vp = new; + return (true); + } + return (false); +} + +/* + * WT_BARRIER -- + * No-op implementation of WT_BARRIER. + */ +static inline void +WT_BARRIER(void) +{ +} + +/* + * WT_FULL_BARRIER -- + * No-op implementation of WT_FULL_BARRIER. + */ +static inline void +WT_FULL_BARRIER(void) +{ +} + +/* + * WT_PAUSE -- + * No-op implementation of WT_PAUSE. + */ +static inline void +WT_PAUSE(void) +{ +} + +/* + * WT_READ_BARRIER -- + * No-op implementation of WT_READ_BARRIER. + */ +static inline void +WT_READ_BARRIER(void) +{ } -static inline void WT_BARRIER(void) {} -static inline void WT_FULL_BARRIER(void) {} -static inline void WT_PAUSE(void) {} -static inline void WT_READ_BARRIER(void) {} -static inline void WT_WRITE_BARRIER(void) {} +/* + * WT_WRITE_BARRIER -- + * No-op implementation of WT_WRITE_BARRIER. + */ +static inline void +WT_WRITE_BARRIER(void) +{ +} diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h index a41d0f66798..0518d8dd0f9 100644 --- a/src/third_party/wiredtiger/src/include/log.h +++ b/src/third_party/wiredtiger/src/include/log.h @@ -7,111 +7,104 @@ */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_LOGSCAN_FIRST 0x01u -#define WT_LOGSCAN_FROM_CKP 0x02u -#define WT_LOGSCAN_ONE 0x04u -#define WT_LOGSCAN_RECOVER 0x08u -#define WT_LOGSCAN_RECOVER_METADATA 0x10u +#define WT_LOGSCAN_FIRST 0x01u +#define WT_LOGSCAN_FROM_CKP 0x02u +#define WT_LOGSCAN_ONE 0x04u +#define WT_LOGSCAN_RECOVER 0x08u +#define WT_LOGSCAN_RECOVER_METADATA 0x10u /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_LOG_BACKGROUND 0x01u -#define WT_LOG_DSYNC 0x02u -#define WT_LOG_FLUSH 0x04u -#define WT_LOG_FSYNC 0x08u -#define WT_LOG_SYNC_ENABLED 0x10u +#define WT_LOG_BACKGROUND 0x01u +#define WT_LOG_DSYNC 0x02u +#define WT_LOG_FLUSH 0x04u +#define WT_LOG_FSYNC 0x08u +#define WT_LOG_SYNC_ENABLED 0x10u /* AUTOMATIC FLAG VALUE GENERATION STOP */ -#define WT_LOGOP_IGNORE 0x80000000 -#define WT_LOGOP_IS_IGNORED(val) ((val) & WT_LOGOP_IGNORE) +#define WT_LOGOP_IGNORE 0x80000000 +#define WT_LOGOP_IS_IGNORED(val) ((val)&WT_LOGOP_IGNORE) /* * WT_LSN -- * A log sequence number, representing a position in the transaction log. */ union __wt_lsn { - struct { -#ifdef WORDS_BIGENDIAN - uint32_t file; - uint32_t offset; + struct { +#ifdef WORDS_BIGENDIAN + uint32_t file; + uint32_t offset; #else - uint32_t offset; - uint32_t file; + uint32_t offset; + uint32_t file; #endif - } l; - uint64_t file_offset; + } l; + uint64_t file_offset; }; -#define WT_LOG_FILENAME "WiredTigerLog" /* Log file name */ -#define WT_LOG_PREPNAME "WiredTigerPreplog" /* Log pre-allocated name */ -#define WT_LOG_TMPNAME "WiredTigerTmplog" /* Log temporary name */ +#define WT_LOG_FILENAME "WiredTigerLog" /* Log file name */ +#define WT_LOG_PREPNAME "WiredTigerPreplog" /* Log pre-allocated name */ +#define WT_LOG_TMPNAME "WiredTigerTmplog" /* Log temporary name */ /* Logging subsystem declarations. */ -#define WT_LOG_ALIGN 128 +#define WT_LOG_ALIGN 128 /* * Atomically set the two components of the LSN. */ -#define WT_SET_LSN(l, f, o) (l)->file_offset = (((uint64_t)(f) << 32) + (o)) +#define WT_SET_LSN(l, f, o) (l)->file_offset = (((uint64_t)(f) << 32) + (o)) -#define WT_INIT_LSN(l) WT_SET_LSN((l), 1, 0) +#define WT_INIT_LSN(l) WT_SET_LSN((l), 1, 0) -#define WT_MAX_LSN(l) WT_SET_LSN((l), UINT32_MAX, INT32_MAX) +#define WT_MAX_LSN(l) WT_SET_LSN((l), UINT32_MAX, INT32_MAX) -#define WT_ZERO_LSN(l) WT_SET_LSN((l), 0, 0) +#define WT_ZERO_LSN(l) WT_SET_LSN((l), 0, 0) /* - * Test for initial LSN. We only need to shift the 1 for comparison. + * Test for initial LSN. We only need to shift the 1 for comparison. */ -#define WT_IS_INIT_LSN(l) ((l)->file_offset == ((uint64_t)1 << 32)) +#define WT_IS_INIT_LSN(l) ((l)->file_offset == ((uint64_t)1 << 32)) /* - * Original tested INT32_MAX. But if we read one from an older - * release we may see UINT32_MAX. + * Original tested INT32_MAX. But if we read one from an older release we may see UINT32_MAX. */ -#define WT_IS_MAX_LSN(lsn) \ - ((lsn)->l.file == UINT32_MAX && \ - ((lsn)->l.offset == INT32_MAX || (lsn)->l.offset == UINT32_MAX)) +#define WT_IS_MAX_LSN(lsn) \ + ((lsn)->l.file == UINT32_MAX && ((lsn)->l.offset == INT32_MAX || (lsn)->l.offset == UINT32_MAX)) /* * Test for zero LSN. */ -#define WT_IS_ZERO_LSN(l) ((l)->file_offset == 0) +#define WT_IS_ZERO_LSN(l) ((l)->file_offset == 0) /* * Macro to print an LSN. */ -#define WT_LSN_MSG(lsn, msg) \ - __wt_msg(session, "%s LSN: [%" PRIu32 "][%" PRIu32 "]", \ - (msg), (lsn)->l.file, (lsn)->l.offset) +#define WT_LSN_MSG(lsn, msg) \ + __wt_msg(session, "%s LSN: [%" PRIu32 "][%" PRIu32 "]", (msg), (lsn)->l.file, (lsn)->l.offset) /* - * Both of the macros below need to change if the content of __wt_lsn - * ever changes. The value is the following: - * txnid, record type, operation type, file id, operation key, operation value + * Both of the macros below need to change if the content of __wt_lsn ever changes. The value is the + * following: txnid, record type, operation type, file id, operation key, operation value */ -#define WT_LOGC_KEY_FORMAT WT_UNCHECKED_STRING(III) -#define WT_LOGC_VALUE_FORMAT WT_UNCHECKED_STRING(qIIIuu) +#define WT_LOGC_KEY_FORMAT WT_UNCHECKED_STRING(III) +#define WT_LOGC_VALUE_FORMAT WT_UNCHECKED_STRING(qIIIuu) /* * Size range for the log files. */ -#define WT_LOG_FILE_MAX ((int64_t)2 * WT_GIGABYTE) -#define WT_LOG_FILE_MIN (100 * WT_KILOBYTE) +#define WT_LOG_FILE_MAX ((int64_t)2 * WT_GIGABYTE) +#define WT_LOG_FILE_MIN (100 * WT_KILOBYTE) -#define WT_LOG_SKIP_HEADER(data) \ - ((const uint8_t *)(data) + offsetof(WT_LOG_RECORD, record)) -#define WT_LOG_REC_SIZE(size) \ - ((size) - offsetof(WT_LOG_RECORD, record)) +#define WT_LOG_SKIP_HEADER(data) ((const uint8_t *)(data) + offsetof(WT_LOG_RECORD, record)) +#define WT_LOG_REC_SIZE(size) ((size)-offsetof(WT_LOG_RECORD, record)) /* - * We allocate the buffer size, but trigger a slot switch when we cross - * the maximum size of half the buffer. If a record is more than the buffer - * maximum then we trigger a slot switch and write that record unbuffered. - * We use a larger buffer to provide overflow space so that we can switch - * once we cross the threshold. + * We allocate the buffer size, but trigger a slot switch when we cross the maximum size of half the + * buffer. If a record is more than the buffer maximum then we trigger a slot switch and write that + * record unbuffered. We use a larger buffer to provide overflow space so that we can switch once we + * cross the threshold. */ -#define WT_LOG_SLOT_BUF_SIZE (256 * 1024) /* Must be power of 2 */ -#define WT_LOG_SLOT_BUF_MAX ((uint32_t)log->slot_buf_size / 2) -#define WT_LOG_SLOT_UNBUFFERED (WT_LOG_SLOT_BUF_SIZE << 1) +#define WT_LOG_SLOT_BUF_SIZE (256 * 1024) /* Must be power of 2 */ +#define WT_LOG_SLOT_BUF_MAX ((uint32_t)log->slot_buf_size / 2) +#define WT_LOG_SLOT_UNBUFFERED (WT_LOG_SLOT_BUF_SIZE << 1) /* * Possible values for the consolidation array slot states: @@ -129,241 +122,223 @@ union __wt_lsn { * the maximum size less than 32 bits for both joined and released. */ /* - * XXX - * The log slot bits are signed and should be rewritten as unsigned. For now, - * give the logging subsystem its own flags macro. + * XXX The log slot bits are signed and should be rewritten as unsigned. For now, give the logging + * subsystem its own flags macro. */ -#define FLD_LOG_SLOT_ISSET(field, mask) (((field) & (uint64_t)(mask)) != 0) +#define FLD_LOG_SLOT_ISSET(field, mask) (((field) & (uint64_t)(mask)) != 0) /* - * The high bit is reserved for the special states. If the high bit is - * set (WT_LOG_SLOT_RESERVED) then we are guaranteed to be in a special state. + * The high bit is reserved for the special states. If the high bit is set (WT_LOG_SLOT_RESERVED) + * then we are guaranteed to be in a special state. */ -#define WT_LOG_SLOT_FREE (-1) /* Not in use */ -#define WT_LOG_SLOT_WRITTEN (-2) /* Slot data written, not processed */ +#define WT_LOG_SLOT_FREE (-1) /* Not in use */ +#define WT_LOG_SLOT_WRITTEN (-2) /* Slot data written, not processed */ /* - * If new slot states are added, adjust WT_LOG_SLOT_BITS and - * WT_LOG_SLOT_MASK_OFF accordingly for how much of the top 32 - * bits we are using. More slot states here will reduce the maximum - * size that a slot can hold unbuffered by half. If a record is - * larger than the maximum we can account for in the slot state we fall - * back to direct writes. + * If new slot states are added, adjust WT_LOG_SLOT_BITS and WT_LOG_SLOT_MASK_OFF accordingly for + * how much of the top 32 bits we are using. More slot states here will reduce the maximum size that + * a slot can hold unbuffered by half. If a record is larger than the maximum we can account for in + * the slot state we fall back to direct writes. */ -#define WT_LOG_SLOT_BITS 2 -#define WT_LOG_SLOT_MAXBITS (32 - WT_LOG_SLOT_BITS) -#define WT_LOG_SLOT_CLOSE 0x4000000000000000LL /* Force slot close */ -#define WT_LOG_SLOT_RESERVED 0x8000000000000000LL /* Reserved states */ +#define WT_LOG_SLOT_BITS 2 +#define WT_LOG_SLOT_MAXBITS (32 - WT_LOG_SLOT_BITS) +#define WT_LOG_SLOT_CLOSE 0x4000000000000000LL /* Force slot close */ +#define WT_LOG_SLOT_RESERVED 0x8000000000000000LL /* Reserved states */ /* - * Check if the unbuffered flag is set in the joined portion of - * the slot state. + * Check if the unbuffered flag is set in the joined portion of the slot state. */ -#define WT_LOG_SLOT_UNBUFFERED_ISSET(state) \ - ((state) & ((int64_t)WT_LOG_SLOT_UNBUFFERED << 32)) +#define WT_LOG_SLOT_UNBUFFERED_ISSET(state) ((state) & ((int64_t)WT_LOG_SLOT_UNBUFFERED << 32)) -#define WT_LOG_SLOT_MASK_OFF 0x3fffffffffffffffLL -#define WT_LOG_SLOT_MASK_ON ~(WT_LOG_SLOT_MASK_OFF) -#define WT_LOG_SLOT_JOIN_MASK (WT_LOG_SLOT_MASK_OFF >> 32) +#define WT_LOG_SLOT_MASK_OFF 0x3fffffffffffffffLL +#define WT_LOG_SLOT_MASK_ON ~(WT_LOG_SLOT_MASK_OFF) +#define WT_LOG_SLOT_JOIN_MASK (WT_LOG_SLOT_MASK_OFF >> 32) /* * These macros manipulate the slot state and its component parts. */ -#define WT_LOG_SLOT_FLAGS(state) ((state) & WT_LOG_SLOT_MASK_ON) -#define WT_LOG_SLOT_JOINED(state) (((state) & WT_LOG_SLOT_MASK_OFF) >> 32) -#define WT_LOG_SLOT_JOINED_BUFFERED(state) \ - (WT_LOG_SLOT_JOINED(state) & \ - (WT_LOG_SLOT_UNBUFFERED - 1)) -#define WT_LOG_SLOT_JOIN_REL(j, r, s) (((j) << 32) + (r) + (s)) -#define WT_LOG_SLOT_RELEASED(state) ((int64_t)(int32_t)(state)) -#define WT_LOG_SLOT_RELEASED_BUFFERED(state) \ - ((int64_t)((int32_t)WT_LOG_SLOT_RELEASED(state) & \ - (WT_LOG_SLOT_UNBUFFERED - 1))) +#define WT_LOG_SLOT_FLAGS(state) ((state)&WT_LOG_SLOT_MASK_ON) +#define WT_LOG_SLOT_JOINED(state) (((state)&WT_LOG_SLOT_MASK_OFF) >> 32) +#define WT_LOG_SLOT_JOINED_BUFFERED(state) \ + (WT_LOG_SLOT_JOINED(state) & (WT_LOG_SLOT_UNBUFFERED - 1)) +#define WT_LOG_SLOT_JOIN_REL(j, r, s) (((j) << 32) + (r) + (s)) +#define WT_LOG_SLOT_RELEASED(state) ((int64_t)(int32_t)(state)) +#define WT_LOG_SLOT_RELEASED_BUFFERED(state) \ + ((int64_t)((int32_t)WT_LOG_SLOT_RELEASED(state) & (WT_LOG_SLOT_UNBUFFERED - 1))) /* Slot is in use */ -#define WT_LOG_SLOT_ACTIVE(state) \ - (WT_LOG_SLOT_JOINED(state) != WT_LOG_SLOT_JOIN_MASK) +#define WT_LOG_SLOT_ACTIVE(state) (WT_LOG_SLOT_JOINED(state) != WT_LOG_SLOT_JOIN_MASK) /* Slot is in use, but closed to new joins */ -#define WT_LOG_SLOT_CLOSED(state) \ - (WT_LOG_SLOT_ACTIVE(state) && \ - (FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_CLOSE) && \ - !FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_RESERVED))) +#define WT_LOG_SLOT_CLOSED(state) \ + (WT_LOG_SLOT_ACTIVE(state) && (FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_CLOSE) && \ + !FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_RESERVED))) /* Slot is in use, all data copied into buffer */ -#define WT_LOG_SLOT_INPROGRESS(state) \ - (WT_LOG_SLOT_RELEASED(state) != WT_LOG_SLOT_JOINED(state)) -#define WT_LOG_SLOT_DONE(state) \ - (WT_LOG_SLOT_CLOSED(state) && \ - !WT_LOG_SLOT_INPROGRESS(state)) +#define WT_LOG_SLOT_INPROGRESS(state) (WT_LOG_SLOT_RELEASED(state) != WT_LOG_SLOT_JOINED(state)) +#define WT_LOG_SLOT_DONE(state) (WT_LOG_SLOT_CLOSED(state) && !WT_LOG_SLOT_INPROGRESS(state)) /* Slot is in use, more threads may join this slot */ -#define WT_LOG_SLOT_OPEN(state) \ - (WT_LOG_SLOT_ACTIVE(state) && \ - !WT_LOG_SLOT_UNBUFFERED_ISSET(state) && \ - !FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_CLOSE) && \ - WT_LOG_SLOT_JOINED(state) < WT_LOG_SLOT_BUF_MAX) +#define WT_LOG_SLOT_OPEN(state) \ + (WT_LOG_SLOT_ACTIVE(state) && !WT_LOG_SLOT_UNBUFFERED_ISSET(state) && \ + !FLD_LOG_SLOT_ISSET((uint64_t)(state), WT_LOG_SLOT_CLOSE) && \ + WT_LOG_SLOT_JOINED(state) < WT_LOG_SLOT_BUF_MAX) struct __wt_logslot { - WT_CACHE_LINE_PAD_BEGIN - volatile int64_t slot_state; /* Slot state */ - int64_t slot_unbuffered; /* Unbuffered data in this slot */ - int slot_error; /* Error value */ - wt_off_t slot_start_offset; /* Starting file offset */ - wt_off_t slot_last_offset; /* Last record offset */ - WT_LSN slot_release_lsn; /* Slot release LSN */ - WT_LSN slot_start_lsn; /* Slot starting LSN */ - WT_LSN slot_end_lsn; /* Slot ending LSN */ - WT_FH *slot_fh; /* File handle for this group */ - WT_ITEM slot_buf; /* Buffer for grouped writes */ + WT_CACHE_LINE_PAD_BEGIN + volatile int64_t slot_state; /* Slot state */ + int64_t slot_unbuffered; /* Unbuffered data in this slot */ + int slot_error; /* Error value */ + wt_off_t slot_start_offset; /* Starting file offset */ + wt_off_t slot_last_offset; /* Last record offset */ + WT_LSN slot_release_lsn; /* Slot release LSN */ + WT_LSN slot_start_lsn; /* Slot starting LSN */ + WT_LSN slot_end_lsn; /* Slot ending LSN */ + WT_FH *slot_fh; /* File handle for this group */ + WT_ITEM slot_buf; /* Buffer for grouped writes */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_SLOT_CLOSEFH 0x01u /* Close old fh on release */ -#define WT_SLOT_FLUSH 0x02u /* Wait for write */ -#define WT_SLOT_SYNC 0x04u /* Needs sync on release */ -#define WT_SLOT_SYNC_DIR 0x08u /* Directory sync on release */ -#define WT_SLOT_SYNC_DIRTY 0x10u /* Sync system buffers on release */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; - WT_CACHE_LINE_PAD_END +#define WT_SLOT_CLOSEFH 0x01u /* Close old fh on release */ +#define WT_SLOT_FLUSH 0x02u /* Wait for write */ +#define WT_SLOT_SYNC 0x04u /* Needs sync on release */ +#define WT_SLOT_SYNC_DIR 0x08u /* Directory sync on release */ +#define WT_SLOT_SYNC_DIRTY 0x10u /* Sync system buffers on release */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; + WT_CACHE_LINE_PAD_END }; -#define WT_SLOT_INIT_FLAGS 0 +#define WT_SLOT_INIT_FLAGS 0 -#define WT_SLOT_SYNC_FLAGS \ - (WT_SLOT_SYNC | \ - WT_SLOT_SYNC_DIR | \ - WT_SLOT_SYNC_DIRTY) +#define WT_SLOT_SYNC_FLAGS (WT_SLOT_SYNC | WT_SLOT_SYNC_DIR | WT_SLOT_SYNC_DIRTY) -#define WT_WITH_SLOT_LOCK(session, log, op) do { \ - WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); \ - WT_WITH_LOCK_WAIT(session, \ - &(log)->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \ -} while (0) +#define WT_WITH_SLOT_LOCK(session, log, op) \ + do { \ + WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); \ + WT_WITH_LOCK_WAIT(session, &(log)->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \ + } while (0) struct __wt_myslot { - WT_LOGSLOT *slot; /* Slot I'm using */ - wt_off_t end_offset; /* My end offset in buffer */ - wt_off_t offset; /* Slot buffer offset */ + WT_LOGSLOT *slot; /* Slot I'm using */ + wt_off_t end_offset; /* My end offset in buffer */ + wt_off_t offset; /* Slot buffer offset */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_MYSLOT_CLOSE 0x1u /* This thread is closing the slot */ -#define WT_MYSLOT_NEEDS_RELEASE 0x2u /* This thread is releasing the slot */ -#define WT_MYSLOT_UNBUFFERED 0x4u /* Write directly */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_MYSLOT_CLOSE 0x1u /* This thread is closing the slot */ +#define WT_MYSLOT_NEEDS_RELEASE 0x2u /* This thread is releasing the slot */ +#define WT_MYSLOT_UNBUFFERED 0x4u /* Write directly */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; -#define WT_LOG_END_HEADER log->allocsize +#define WT_LOG_END_HEADER log->allocsize struct __wt_log { - uint32_t allocsize; /* Allocation alignment size */ - uint32_t first_record; /* Offset of first record in file */ - wt_off_t log_written; /* Amount of log written this period */ - /* - * Log file information - */ - uint32_t fileid; /* Current log file number */ - uint32_t prep_fileid; /* Pre-allocated file number */ - uint32_t tmp_fileid; /* Temporary file number */ - uint32_t prep_missed; /* Pre-allocated file misses */ - WT_FH *log_fh; /* Logging file handle */ - WT_FH *log_dir_fh; /* Log directory file handle */ - WT_FH *log_close_fh; /* Logging file handle to close */ - WT_LSN log_close_lsn; /* LSN needed to close */ - - uint16_t log_version; /* Version of log file */ - - /* - * System LSNs - */ - WT_LSN alloc_lsn; /* Next LSN for allocation */ - WT_LSN bg_sync_lsn; /* Latest background sync LSN */ - WT_LSN ckpt_lsn; /* Last checkpoint LSN */ - WT_LSN dirty_lsn; /* LSN of last non-synced write */ - WT_LSN first_lsn; /* First LSN */ - WT_LSN sync_dir_lsn; /* LSN of the last directory sync */ - WT_LSN sync_lsn; /* LSN of the last sync */ - WT_LSN trunc_lsn; /* End LSN for recovery truncation */ - WT_LSN write_lsn; /* End of last LSN written */ - WT_LSN write_start_lsn;/* Beginning of last LSN written */ - - /* - * Synchronization resources - */ - WT_SPINLOCK log_lock; /* Locked: Logging fields */ - WT_SPINLOCK log_fs_lock; /* Locked: tmp, prep and log files */ - WT_SPINLOCK log_slot_lock; /* Locked: Consolidation array */ - WT_SPINLOCK log_sync_lock; /* Locked: Single-thread fsync */ - WT_SPINLOCK log_writelsn_lock; /* Locked: write LSN */ - - WT_RWLOCK log_archive_lock;/* Archive and log cursors */ - - /* Notify any waiting threads when sync_lsn is updated. */ - WT_CONDVAR *log_sync_cond; - /* Notify any waiting threads when write_lsn is updated. */ - WT_CONDVAR *log_write_cond; - - /* - * Consolidation array information - * Our testing shows that the more consolidation we generate the - * better the performance we see which equates to an active slot - * slot count of one. - * - * Note: this can't be an array, we impose cache-line alignment and - * gcc doesn't support that for arrays. - */ -#define WT_SLOT_POOL 128 - WT_LOGSLOT *active_slot; /* Active slot */ - WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */ - int32_t pool_index; /* Index into slot pool */ - size_t slot_buf_size; /* Buffer size for slots */ + uint32_t allocsize; /* Allocation alignment size */ + uint32_t first_record; /* Offset of first record in file */ + wt_off_t log_written; /* Amount of log written this period */ + /* + * Log file information + */ + uint32_t fileid; /* Current log file number */ + uint32_t prep_fileid; /* Pre-allocated file number */ + uint32_t tmp_fileid; /* Temporary file number */ + uint32_t prep_missed; /* Pre-allocated file misses */ + WT_FH *log_fh; /* Logging file handle */ + WT_FH *log_dir_fh; /* Log directory file handle */ + WT_FH *log_close_fh; /* Logging file handle to close */ + WT_LSN log_close_lsn; /* LSN needed to close */ + + uint16_t log_version; /* Version of log file */ + + /* + * System LSNs + */ + WT_LSN alloc_lsn; /* Next LSN for allocation */ + WT_LSN bg_sync_lsn; /* Latest background sync LSN */ + WT_LSN ckpt_lsn; /* Last checkpoint LSN */ + WT_LSN dirty_lsn; /* LSN of last non-synced write */ + WT_LSN first_lsn; /* First LSN */ + WT_LSN sync_dir_lsn; /* LSN of the last directory sync */ + WT_LSN sync_lsn; /* LSN of the last sync */ + WT_LSN trunc_lsn; /* End LSN for recovery truncation */ + WT_LSN write_lsn; /* End of last LSN written */ + WT_LSN write_start_lsn; /* Beginning of last LSN written */ + + /* + * Synchronization resources + */ + WT_SPINLOCK log_lock; /* Locked: Logging fields */ + WT_SPINLOCK log_fs_lock; /* Locked: tmp, prep and log files */ + WT_SPINLOCK log_slot_lock; /* Locked: Consolidation array */ + WT_SPINLOCK log_sync_lock; /* Locked: Single-thread fsync */ + WT_SPINLOCK log_writelsn_lock; /* Locked: write LSN */ + + WT_RWLOCK log_archive_lock; /* Archive and log cursors */ + + /* Notify any waiting threads when sync_lsn is updated. */ + WT_CONDVAR *log_sync_cond; + /* Notify any waiting threads when write_lsn is updated. */ + WT_CONDVAR *log_write_cond; + +/* + * Consolidation array information + * Our testing shows that the more consolidation we generate the + * better the performance we see which equates to an active slot + * slot count of one. + * + * Note: this can't be an array, we impose cache-line alignment and + * gcc doesn't support that for arrays. + */ +#define WT_SLOT_POOL 128 + WT_LOGSLOT *active_slot; /* Active slot */ + WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */ + int32_t pool_index; /* Index into slot pool */ + size_t slot_buf_size; /* Buffer size for slots */ #ifdef HAVE_DIAGNOSTIC - uint64_t write_calls; /* Calls to log_write */ + uint64_t write_calls; /* Calls to log_write */ #endif /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_LOG_FORCE_NEWFILE 0x1u /* Force switch to new log file */ -#define WT_LOG_OPENED 0x2u /* Log subsystem successfully open */ -#define WT_LOG_TRUNCATE_NOTSUP 0x4u /* File system truncate not supported */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_LOG_FORCE_NEWFILE 0x1u /* Force switch to new log file */ +#define WT_LOG_OPENED 0x2u /* Log subsystem successfully open */ +#define WT_LOG_TRUNCATE_NOTSUP 0x4u /* File system truncate not supported */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; struct __wt_log_record { - uint32_t len; /* 00-03: Record length including hdr */ - uint32_t checksum; /* 04-07: Checksum of the record */ - - /* - * No automatic generation: flag values cannot change, they're written - * to disk. - * - * Unused bits in the flags, as well as the 'unused' padding, - * are expected to be zeroed; we check that to help detect file - * corruption. - */ -#define WT_LOG_RECORD_COMPRESSED 0x01u /* Compressed except hdr */ -#define WT_LOG_RECORD_ENCRYPTED 0x02u /* Encrypted except hdr */ -#define WT_LOG_RECORD_ALL_FLAGS \ - (WT_LOG_RECORD_COMPRESSED | WT_LOG_RECORD_ENCRYPTED) - uint16_t flags; /* 08-09: Flags */ - uint8_t unused[2]; /* 10-11: Padding */ - uint32_t mem_len; /* 12-15: Uncompressed len if needed */ - uint8_t record[0]; /* Beginning of actual data */ + uint32_t len; /* 00-03: Record length including hdr */ + uint32_t checksum; /* 04-07: Checksum of the record */ + +/* + * No automatic generation: flag values cannot change, they're written + * to disk. + * + * Unused bits in the flags, as well as the 'unused' padding, + * are expected to be zeroed; we check that to help detect file + * corruption. + */ +#define WT_LOG_RECORD_COMPRESSED 0x01u /* Compressed except hdr */ +#define WT_LOG_RECORD_ENCRYPTED 0x02u /* Encrypted except hdr */ +#define WT_LOG_RECORD_ALL_FLAGS (WT_LOG_RECORD_COMPRESSED | WT_LOG_RECORD_ENCRYPTED) + uint16_t flags; /* 08-09: Flags */ + uint8_t unused[2]; /* 10-11: Padding */ + uint32_t mem_len; /* 12-15: Uncompressed len if needed */ + uint8_t record[0]; /* Beginning of actual data */ }; /* * __wt_log_record_byteswap -- - * Handle big- and little-endian transformation of the log record - * header block. + * Handle big- and little-endian transformation of the log record header block. */ static inline void __wt_log_record_byteswap(WT_LOG_RECORD *record) { -#ifdef WORDS_BIGENDIAN - record->len = __wt_bswap32(record->len); - record->checksum = __wt_bswap32(record->checksum); - record->flags = __wt_bswap16(record->flags); - record->mem_len = __wt_bswap32(record->mem_len); +#ifdef WORDS_BIGENDIAN + record->len = __wt_bswap32(record->len); + record->checksum = __wt_bswap32(record->checksum); + record->flags = __wt_bswap16(record->flags); + record->mem_len = __wt_bswap32(record->mem_len); #else - WT_UNUSED(record); + WT_UNUSED(record); #endif } @@ -372,57 +347,57 @@ __wt_log_record_byteswap(WT_LOG_RECORD *record) * The log file's description. */ struct __wt_log_desc { -#define WT_LOG_MAGIC 0x101064u - uint32_t log_magic; /* 00-03: Magic number */ -/* - * NOTE: We bumped the log version from 2 to 3 to make it convenient for - * MongoDB to detect users accidentally running old binaries on a newer - * release. There are no actual log file format changes with version 2 and 3. - */ -#define WT_LOG_VERSION 3 - uint16_t version; /* 04-05: Log version */ - uint16_t unused; /* 06-07: Unused */ - uint64_t log_size; /* 08-15: Log file size */ +#define WT_LOG_MAGIC 0x101064u + uint32_t log_magic; /* 00-03: Magic number */ + /* + * NOTE: We bumped the log version from 2 to 3 to make it convenient for + * MongoDB to detect users accidentally running old binaries on a newer + * release. There are no actual log file format changes with version 2 and + * 3. + */ +#define WT_LOG_VERSION 3 + uint16_t version; /* 04-05: Log version */ + uint16_t unused; /* 06-07: Unused */ + uint64_t log_size; /* 08-15: Log file size */ }; /* * This is the log version that introduced the system record. */ -#define WT_LOG_VERSION_SYSTEM 2 +#define WT_LOG_VERSION_SYSTEM 2 /* * WiredTiger release version where log format version changed. */ -#define WT_LOG_V2_MAJOR 3 -#define WT_LOG_V2_MINOR 0 -#define WT_LOG_V3_MAJOR 3 -#define WT_LOG_V3_MINOR 1 +#define WT_LOG_V2_MAJOR 3 +#define WT_LOG_V2_MINOR 0 +#define WT_LOG_V3_MAJOR 3 +#define WT_LOG_V3_MINOR 1 /* * __wt_log_desc_byteswap -- - * Handle big- and little-endian transformation of the log file - * description block. + * Handle big- and little-endian transformation of the log file description block. */ static inline void __wt_log_desc_byteswap(WT_LOG_DESC *desc) { -#ifdef WORDS_BIGENDIAN - desc->log_magic = __wt_bswap32(desc->log_magic); - desc->version = __wt_bswap16(desc->version); - desc->unused = __wt_bswap16(desc->unused); - desc->log_size = __wt_bswap64(desc->log_size); +#ifdef WORDS_BIGENDIAN + desc->log_magic = __wt_bswap32(desc->log_magic); + desc->version = __wt_bswap16(desc->version); + desc->unused = __wt_bswap16(desc->unused); + desc->log_size = __wt_bswap64(desc->log_size); #else - WT_UNUSED(desc); + WT_UNUSED(desc); #endif } /* Cookie passed through the transaction printlog routines. */ struct __wt_txn_printlog_args { - WT_FSTREAM *fs; + WT_FSTREAM *fs; /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_TXN_PRINTLOG_HEX 0x1u /* Add hex output */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_TXN_PRINTLOG_HEX 0x1u /* Add hex output */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; /* @@ -430,8 +405,8 @@ struct __wt_txn_printlog_args { * A descriptor for a log record type. */ struct __wt_log_rec_desc { - const char *fmt; - int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end); + const char *fmt; + int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end); }; /* @@ -439,6 +414,6 @@ struct __wt_log_rec_desc { * A descriptor for a log operation type. */ struct __wt_log_op_desc { - const char *fmt; - int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end); + const char *fmt; + int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end); }; diff --git a/src/third_party/wiredtiger/src/include/log.i b/src/third_party/wiredtiger/src/include/log.i index 6c8be84a98c..2cc0fd172e8 100644 --- a/src/third_party/wiredtiger/src/include/log.i +++ b/src/third_party/wiredtiger/src/include/log.i @@ -8,20 +8,19 @@ /* * __wt_log_cmp -- - * Compare 2 LSNs, return -1 if lsn1 < lsn2, 0if lsn1 == lsn2 - * and 1 if lsn1 > lsn2. + * Compare 2 LSNs, return -1 if lsn1 < lsn2, 0if lsn1 == lsn2 and 1 if lsn1 > lsn2. */ static inline int __wt_log_cmp(WT_LSN *lsn1, WT_LSN *lsn2) { - uint64_t l1, l2; + uint64_t l1, l2; - /* - * Read LSNs into local variables so that we only read each field - * once and all comparisons are on the same values. - */ - l1 = ((volatile WT_LSN *)lsn1)->file_offset; - l2 = ((volatile WT_LSN *)lsn2)->file_offset; + /* + * Read LSNs into local variables so that we only read each field once and all comparisons are + * on the same values. + */ + l1 = ((volatile WT_LSN *)lsn1)->file_offset; + l2 = ((volatile WT_LSN *)lsn2)->file_offset; - return (l1 < l2 ? -1 : (l1 > l2 ? 1 : 0)); + return (l1 < l2 ? -1 : (l1 > l2 ? 1 : 0)); } diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h index cc0aa54417f..9533662cf92 100644 --- a/src/third_party/wiredtiger/src/include/lsm.h +++ b/src/third_party/wiredtiger/src/include/lsm.h @@ -11,9 +11,9 @@ * State for an LSM worker thread. */ struct __wt_lsm_worker_cookie { - WT_LSM_CHUNK **chunk_array; - size_t chunk_alloc; - u_int nchunks; + WT_LSM_CHUNK **chunk_array; + size_t chunk_alloc; + u_int nchunks; }; /* @@ -21,16 +21,16 @@ struct __wt_lsm_worker_cookie { * State for an LSM worker thread. */ struct __wt_lsm_worker_args { - WT_SESSION_IMPL *session; /* Session */ - WT_CONDVAR *work_cond; /* Owned by the manager */ + WT_SESSION_IMPL *session; /* Session */ + WT_CONDVAR *work_cond; /* Owned by the manager */ - wt_thread_t tid; /* Thread id */ - bool tid_set; /* Thread id set */ + wt_thread_t tid; /* Thread id */ + bool tid_set; /* Thread id set */ - u_int id; /* My manager slot id */ - uint32_t type; /* Types of operations handled */ + u_int id; /* My manager slot id */ + uint32_t type; /* Types of operations handled */ - volatile bool running; /* Worker is running */ + volatile bool running; /* Worker is running */ }; /* @@ -38,10 +38,10 @@ struct __wt_lsm_worker_args { * Iterator struct containing all the LSM cursor access points for a chunk. */ struct __wt_lsm_cursor_chunk { - WT_BLOOM *bloom; /* Bloom filter handle for each chunk.*/ - WT_CURSOR *cursor; /* Cursor handle for each chunk. */ - uint64_t count; /* Number of items in chunk */ - uint64_t switch_txn; /* Switch txn for each chunk */ + WT_BLOOM *bloom; /* Bloom filter handle for each chunk.*/ + WT_CURSOR *cursor; /* Cursor handle for each chunk. */ + uint64_t count; /* Number of items in chunk */ + uint64_t switch_txn; /* Switch txn for each chunk */ }; /* @@ -49,35 +49,35 @@ struct __wt_lsm_cursor_chunk { * An LSM cursor. */ struct __wt_cursor_lsm { - WT_CURSOR iface; + WT_CURSOR iface; - WT_LSM_TREE *lsm_tree; - uint64_t dsk_gen; + WT_LSM_TREE *lsm_tree; + uint64_t dsk_gen; - u_int nchunks; /* Number of chunks in the cursor */ - u_int nupdates; /* Updates needed (including - snapshot isolation checks). */ - WT_CURSOR *current; /* The current cursor for iteration */ - WT_LSM_CHUNK *primary_chunk; /* The current primary chunk */ + u_int nchunks; /* Number of chunks in the cursor */ + u_int nupdates; /* Updates needed (including + snapshot isolation checks). */ + WT_CURSOR *current; /* The current cursor for iteration */ + WT_LSM_CHUNK *primary_chunk; /* The current primary chunk */ - WT_LSM_CURSOR_CHUNK **chunks; /* Array of LSM cursor units */ - size_t chunks_alloc; /* Current size iterators array */ - size_t chunks_count; /* Current number of iterators */ + WT_LSM_CURSOR_CHUNK **chunks; /* Array of LSM cursor units */ + size_t chunks_alloc; /* Current size iterators array */ + size_t chunks_count; /* Current number of iterators */ - u_int update_count; /* Updates performed. */ + u_int update_count; /* Updates performed. */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CLSM_ACTIVE 0x001u /* Incremented the session count */ -#define WT_CLSM_BULK 0x002u /* Open for snapshot isolation */ -#define WT_CLSM_ITERATE_NEXT 0x004u /* Forward iteration */ -#define WT_CLSM_ITERATE_PREV 0x008u /* Backward iteration */ -#define WT_CLSM_MERGE 0x010u /* Merge cursor, don't update */ -#define WT_CLSM_MINOR_MERGE 0x020u /* Minor merge, include tombstones */ -#define WT_CLSM_MULTIPLE 0x040u /* Multiple cursors have values */ -#define WT_CLSM_OPEN_READ 0x080u /* Open for reads */ -#define WT_CLSM_OPEN_SNAPSHOT 0x100u /* Open for snapshot isolation */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_CLSM_ACTIVE 0x001u /* Incremented the session count */ +#define WT_CLSM_BULK 0x002u /* Open for snapshot isolation */ +#define WT_CLSM_ITERATE_NEXT 0x004u /* Forward iteration */ +#define WT_CLSM_ITERATE_PREV 0x008u /* Backward iteration */ +#define WT_CLSM_MERGE 0x010u /* Merge cursor, don't update */ +#define WT_CLSM_MINOR_MERGE 0x020u /* Minor merge, include tombstones */ +#define WT_CLSM_MULTIPLE 0x040u /* Multiple cursors have values */ +#define WT_CLSM_OPEN_READ 0x080u /* Open for reads */ +#define WT_CLSM_OPEN_SNAPSHOT 0x100u /* Open for snapshot isolation */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; /* @@ -85,76 +85,75 @@ struct __wt_cursor_lsm { * A single chunk (file) in an LSM tree. */ struct __wt_lsm_chunk { - const char *uri; /* Data source for this chunk */ - const char *bloom_uri; /* URI of Bloom filter, if any */ - struct timespec create_time; /* Creation time (for rate limiting) */ - uint64_t count; /* Approximate count of records */ - uint64_t size; /* Final chunk size */ - - uint64_t switch_txn; /* - * Largest transaction that can write - * to this chunk, set by a worker - * thread when the chunk is switched - * out, or by compact to get the most - * recent chunk flushed. - */ - wt_timestamp_t switch_timestamp;/* - * The timestamp used to decide when - * updates need to detect conflicts. - */ - WT_SPINLOCK timestamp_spinlock; - - uint32_t id; /* ID used to generate URIs */ - uint32_t generation; /* Merge generation */ - uint32_t refcnt; /* Number of worker thread references */ - uint32_t bloom_busy; /* Currently creating bloom filter */ - uint32_t evict_enabled; /* Eviction allowed on the chunk */ - - int8_t empty; /* 1/0: checkpoint missing */ - int8_t evicted; /* 1/0: in-memory chunk was evicted */ - uint8_t flushing; /* 1/0: chunk flush in progress */ + const char *uri; /* Data source for this chunk */ + const char *bloom_uri; /* URI of Bloom filter, if any */ + struct timespec create_time; /* Creation time (for rate limiting) */ + uint64_t count; /* Approximate count of records */ + uint64_t size; /* Final chunk size */ + + uint64_t switch_txn; /* + * Largest transaction that can write + * to this chunk, set by a worker + * thread when the chunk is switched + * out, or by compact to get the most + * recent chunk flushed. + */ + wt_timestamp_t switch_timestamp; /* + * The timestamp used to decide when + * updates need to detect conflicts. + */ + WT_SPINLOCK timestamp_spinlock; + + uint32_t id; /* ID used to generate URIs */ + uint32_t generation; /* Merge generation */ + uint32_t refcnt; /* Number of worker thread references */ + uint32_t bloom_busy; /* Currently creating bloom filter */ + uint32_t evict_enabled; /* Eviction allowed on the chunk */ + + int8_t empty; /* 1/0: checkpoint missing */ + int8_t evicted; /* 1/0: in-memory chunk was evicted */ + uint8_t flushing; /* 1/0: chunk flush in progress */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_LSM_CHUNK_BLOOM 0x01u -#define WT_LSM_CHUNK_HAS_TIMESTAMP 0x02u -#define WT_LSM_CHUNK_MERGING 0x04u -#define WT_LSM_CHUNK_ONDISK 0x08u -#define WT_LSM_CHUNK_STABLE 0x10u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_LSM_CHUNK_BLOOM 0x01u +#define WT_LSM_CHUNK_HAS_TIMESTAMP 0x02u +#define WT_LSM_CHUNK_MERGING 0x04u +#define WT_LSM_CHUNK_ONDISK 0x08u +#define WT_LSM_CHUNK_STABLE 0x10u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; /* - * Different types of work units. Used by LSM worker threads to choose which - * type of work they will execute, and by work units to define which action - * is required. + * Different types of work units. Used by LSM worker threads to choose which type of work they will + * execute, and by work units to define which action is required. */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_LSM_WORK_BLOOM 0x01u /* Create a bloom filter */ -#define WT_LSM_WORK_DROP 0x02u /* Drop unused chunks */ -#define WT_LSM_WORK_ENABLE_EVICT 0x04u /* Create a bloom filter */ -#define WT_LSM_WORK_FLUSH 0x08u /* Flush a chunk to disk */ -#define WT_LSM_WORK_MERGE 0x10u /* Look for a tree merge */ -#define WT_LSM_WORK_SWITCH 0x20u /* Switch the in-memory chunk */ +#define WT_LSM_WORK_BLOOM 0x01u /* Create a bloom filter */ +#define WT_LSM_WORK_DROP 0x02u /* Drop unused chunks */ +#define WT_LSM_WORK_ENABLE_EVICT 0x04u /* Create a bloom filter */ +#define WT_LSM_WORK_FLUSH 0x08u /* Flush a chunk to disk */ +#define WT_LSM_WORK_MERGE 0x10u /* Look for a tree merge */ +#define WT_LSM_WORK_SWITCH 0x20u /* Switch the in-memory chunk */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* Work units that are serviced by general worker threads. */ -#define WT_LSM_WORK_GENERAL_OPS \ - (WT_LSM_WORK_BLOOM | WT_LSM_WORK_DROP | WT_LSM_WORK_ENABLE_EVICT |\ - WT_LSM_WORK_FLUSH | WT_LSM_WORK_SWITCH) +#define WT_LSM_WORK_GENERAL_OPS \ + (WT_LSM_WORK_BLOOM | WT_LSM_WORK_DROP | WT_LSM_WORK_ENABLE_EVICT | WT_LSM_WORK_FLUSH | \ + WT_LSM_WORK_SWITCH) /* * WT_LSM_WORK_UNIT -- * A definition of maintenance that an LSM tree needs done. */ struct __wt_lsm_work_unit { - TAILQ_ENTRY(__wt_lsm_work_unit) q; /* Worker unit queue */ - uint32_t type; /* Type of operation */ + TAILQ_ENTRY(__wt_lsm_work_unit) q; /* Worker unit queue */ + uint32_t type; /* Type of operation */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_LSM_WORK_FORCE 0x1u /* Force operation */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; /* Flags for operation */ - WT_LSM_TREE *lsm_tree; +#define WT_LSM_WORK_FORCE 0x1u /* Force operation */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; /* Flags for operation */ + WT_LSM_TREE *lsm_tree; }; /* @@ -163,154 +162,154 @@ struct __wt_lsm_work_unit { * database. */ struct __wt_lsm_manager { - /* - * Queues of work units for LSM worker threads. We maintain three - * queues, to allow us to keep each queue FIFO, rather than needing - * to manage the order of work by shuffling the queue order. - * One queue for switches - since switches should never wait for other - * work to be done. - * One queue for application requested work. For example flushing - * and creating bloom filters. - * One queue that is for longer running operations such as merges. - */ - TAILQ_HEAD(__wt_lsm_work_switch_qh, __wt_lsm_work_unit) switchqh; - TAILQ_HEAD(__wt_lsm_work_app_qh, __wt_lsm_work_unit) appqh; - TAILQ_HEAD(__wt_lsm_work_manager_qh, __wt_lsm_work_unit) managerqh; - WT_SPINLOCK switch_lock; /* Lock for switch queue */ - WT_SPINLOCK app_lock; /* Lock for application queue */ - WT_SPINLOCK manager_lock; /* Lock for manager queue */ - WT_CONDVAR *work_cond; /* Used to notify worker of activity */ - uint32_t lsm_workers; /* Current number of LSM workers */ - uint32_t lsm_workers_max; -#define WT_LSM_MAX_WORKERS 20 -#define WT_LSM_MIN_WORKERS 3 - WT_LSM_WORKER_ARGS lsm_worker_cookies[WT_LSM_MAX_WORKERS]; + /* + * Queues of work units for LSM worker threads. We maintain three + * queues, to allow us to keep each queue FIFO, rather than needing + * to manage the order of work by shuffling the queue order. + * One queue for switches - since switches should never wait for other + * work to be done. + * One queue for application requested work. For example flushing + * and creating bloom filters. + * One queue that is for longer running operations such as merges. + */ + TAILQ_HEAD(__wt_lsm_work_switch_qh, __wt_lsm_work_unit) switchqh; + TAILQ_HEAD(__wt_lsm_work_app_qh, __wt_lsm_work_unit) appqh; + TAILQ_HEAD(__wt_lsm_work_manager_qh, __wt_lsm_work_unit) managerqh; + WT_SPINLOCK switch_lock; /* Lock for switch queue */ + WT_SPINLOCK app_lock; /* Lock for application queue */ + WT_SPINLOCK manager_lock; /* Lock for manager queue */ + WT_CONDVAR *work_cond; /* Used to notify worker of activity */ + uint32_t lsm_workers; /* Current number of LSM workers */ + uint32_t lsm_workers_max; +#define WT_LSM_MAX_WORKERS 20 +#define WT_LSM_MIN_WORKERS 3 + WT_LSM_WORKER_ARGS lsm_worker_cookies[WT_LSM_MAX_WORKERS]; /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_LSM_MANAGER_SHUTDOWN 0x1u /* Manager has shut down */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_LSM_MANAGER_SHUTDOWN 0x1u /* Manager has shut down */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; /* - * The value aggressive needs to get to before it influences how merges - * are chosen. The default value translates to enough level 0 chunks being - * generated to create a second level merge. + * The value aggressive needs to get to before it influences how merges are chosen. The default + * value translates to enough level 0 chunks being generated to create a second level merge. */ -#define WT_LSM_AGGRESSIVE_THRESHOLD 2 +#define WT_LSM_AGGRESSIVE_THRESHOLD 2 /* - * The minimum size for opening a tree: three chunks, plus one page for each - * participant in up to three concurrent merges. + * The minimum size for opening a tree: three chunks, plus one page for each participant in up to + * three concurrent merges. */ -#define WT_LSM_TREE_MINIMUM_SIZE(chunk_size, merge_max, maxleafpage) \ - (3 * (chunk_size) + 3 * ((merge_max) * (maxleafpage))) +#define WT_LSM_TREE_MINIMUM_SIZE(chunk_size, merge_max, maxleafpage) \ + (3 * (chunk_size) + 3 * ((merge_max) * (maxleafpage))) /* * WT_LSM_TREE -- * An LSM tree. */ struct __wt_lsm_tree { - const char *name, *config, *filename; - const char *key_format, *value_format; - const char *bloom_config, *file_config; - - uint32_t custom_generation; /* Level at which a custom data source - should be used for merges. */ - const char *custom_prefix; /* Prefix for custom data source */ - const char *custom_suffix; /* Suffix for custom data source */ - - WT_COLLATOR *collator; - const char *collator_name; - int collator_owned; - - uint32_t refcnt; /* Number of users of the tree */ - WT_SESSION_IMPL *excl_session; /* Session has exclusive lock */ - -#define LSM_TREE_MAX_QUEUE 100 - uint32_t queue_ref; - WT_RWLOCK rwlock; - TAILQ_ENTRY(__wt_lsm_tree) q; - - uint64_t dsk_gen; - - uint64_t ckpt_throttle; /* Rate limiting due to checkpoints */ - uint64_t merge_throttle; /* Rate limiting due to merges */ - uint64_t chunk_fill_ms; /* Estimate of time to fill a chunk */ - struct timespec last_flush_time;/* Time last flush finished */ - uint64_t chunks_flushed; /* Count of chunks flushed since open */ - struct timespec merge_aggressive_time;/* Time for merge aggression */ - uint64_t merge_progressing; /* Bumped when merges are active */ - uint32_t merge_syncing; /* Bumped when merges are syncing */ - struct timespec last_active; /* Time last work unit added */ - uint64_t mgr_work_count; /* Manager work count */ - uint64_t work_count; /* Work units added */ - - /* Configuration parameters */ - uint32_t bloom_bit_count; - uint32_t bloom_hash_count; - uint32_t chunk_count_limit; /* Limit number of chunks */ - uint64_t chunk_size; - uint64_t chunk_max; /* Maximum chunk a merge creates */ - u_int merge_min, merge_max; + const char *name, *config, *filename; + const char *key_format, *value_format; + const char *bloom_config, *file_config; + + uint32_t custom_generation; /* Level at which a custom data source + should be used for merges. */ + const char *custom_prefix; /* Prefix for custom data source */ + const char *custom_suffix; /* Suffix for custom data source */ + + WT_COLLATOR *collator; + const char *collator_name; + int collator_owned; + + uint32_t refcnt; /* Number of users of the tree */ + WT_SESSION_IMPL *excl_session; /* Session has exclusive lock */ + +#define LSM_TREE_MAX_QUEUE 100 + uint32_t queue_ref; + WT_RWLOCK rwlock; + TAILQ_ENTRY(__wt_lsm_tree) q; + + uint64_t dsk_gen; + + uint64_t ckpt_throttle; /* Rate limiting due to checkpoints */ + uint64_t merge_throttle; /* Rate limiting due to merges */ + uint64_t chunk_fill_ms; /* Estimate of time to fill a chunk */ + struct timespec last_flush_time; /* Time last flush finished */ + uint64_t chunks_flushed; /* Count of chunks flushed since open */ + struct timespec merge_aggressive_time; /* Time for merge aggression */ + uint64_t merge_progressing; /* Bumped when merges are active */ + uint32_t merge_syncing; /* Bumped when merges are syncing */ + struct timespec last_active; /* Time last work unit added */ + uint64_t mgr_work_count; /* Manager work count */ + uint64_t work_count; /* Work units added */ + + /* Configuration parameters */ + uint32_t bloom_bit_count; + uint32_t bloom_hash_count; + uint32_t chunk_count_limit; /* Limit number of chunks */ + uint64_t chunk_size; + uint64_t chunk_max; /* Maximum chunk a merge creates */ + u_int merge_min, merge_max; /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_LSM_BLOOM_MERGED 0x1u -#define WT_LSM_BLOOM_OFF 0x2u -#define WT_LSM_BLOOM_OLDEST 0x4u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t bloom; /* Bloom creation policy */ - - WT_LSM_CHUNK **chunk; /* Array of active LSM chunks */ - size_t chunk_alloc; /* Space allocated for chunks */ - uint32_t nchunks; /* Number of active chunks */ - uint32_t last; /* Last allocated ID */ - bool modified; /* Have there been updates? */ - - WT_LSM_CHUNK **old_chunks; /* Array of old LSM chunks */ - size_t old_alloc; /* Space allocated for old chunks */ - u_int nold_chunks; /* Number of old chunks */ - uint32_t freeing_old_chunks; /* Whether chunks are being freed */ - uint32_t merge_aggressiveness; /* Increase amount of work per merge */ - - /* - * We maintain a set of statistics outside of the normal statistics - * area, copying them into place when a statistics cursor is created. - */ -#define WT_LSM_TREE_STAT_INCR(session, fld) do { \ - if (WT_STAT_ENABLED(session)) \ - ++(fld); \ -} while (0) -#define WT_LSM_TREE_STAT_INCRV(session, fld, v) do { \ - if (WT_STAT_ENABLED(session)) \ - (fld) += (int64_t)(v); \ -} while (0) - int64_t bloom_false_positive; - int64_t bloom_hit; - int64_t bloom_miss; - int64_t lsm_checkpoint_throttle; - int64_t lsm_lookup_no_bloom; - int64_t lsm_merge_throttle; - - /* - * Following fields used to be flags but are susceptible to races. - * Don't merge them with flags. - */ - bool active; /* The tree is open for business */ - bool aggressive_timer_enabled; /* Timer for merge aggression enabled */ - bool need_switch; /* New chunk needs creating */ - - /* - * flags here are not protected for concurrent access, don't put - * anything here that is susceptible to races. - */ +#define WT_LSM_BLOOM_MERGED 0x1u +#define WT_LSM_BLOOM_OFF 0x2u +#define WT_LSM_BLOOM_OLDEST 0x4u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t bloom; /* Bloom creation policy */ + + WT_LSM_CHUNK **chunk; /* Array of active LSM chunks */ + size_t chunk_alloc; /* Space allocated for chunks */ + uint32_t nchunks; /* Number of active chunks */ + uint32_t last; /* Last allocated ID */ + bool modified; /* Have there been updates? */ + + WT_LSM_CHUNK **old_chunks; /* Array of old LSM chunks */ + size_t old_alloc; /* Space allocated for old chunks */ + u_int nold_chunks; /* Number of old chunks */ + uint32_t freeing_old_chunks; /* Whether chunks are being freed */ + uint32_t merge_aggressiveness; /* Increase amount of work per merge */ + +/* + * We maintain a set of statistics outside of the normal statistics area, copying them into place + * when a statistics cursor is created. + */ +#define WT_LSM_TREE_STAT_INCR(session, fld) \ + do { \ + if (WT_STAT_ENABLED(session)) \ + ++(fld); \ + } while (0) +#define WT_LSM_TREE_STAT_INCRV(session, fld, v) \ + do { \ + if (WT_STAT_ENABLED(session)) \ + (fld) += (int64_t)(v); \ + } while (0) + int64_t bloom_false_positive; + int64_t bloom_hit; + int64_t bloom_miss; + int64_t lsm_checkpoint_throttle; + int64_t lsm_lookup_no_bloom; + int64_t lsm_merge_throttle; + + /* + * Following fields used to be flags but are susceptible to races. Don't merge them with flags. + */ + bool active; /* The tree is open for business */ + bool aggressive_timer_enabled; /* Timer for merge aggression enabled */ + bool need_switch; /* New chunk needs creating */ + +/* + * flags here are not protected for concurrent access, don't put anything here that is susceptible + * to races. + */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_LSM_TREE_COMPACTING 0x1u /* Tree being compacted */ -#define WT_LSM_TREE_MERGES 0x2u /* Tree should run merges */ -#define WT_LSM_TREE_OPEN 0x4u /* The tree is open */ -#define WT_LSM_TREE_THROTTLE 0x8u /* Throttle updates */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_LSM_TREE_COMPACTING 0x1u /* Tree being compacted */ +#define WT_LSM_TREE_MERGES 0x2u /* Tree should run merges */ +#define WT_LSM_TREE_OPEN 0x4u /* The tree is open */ +#define WT_LSM_TREE_THROTTLE 0x8u /* Throttle updates */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; /* @@ -318,7 +317,7 @@ struct __wt_lsm_tree { * Implementation of the WT_DATA_SOURCE interface for LSM. */ struct __wt_lsm_data_source { - WT_DATA_SOURCE iface; + WT_DATA_SOURCE iface; - WT_RWLOCK *rwlock; + WT_RWLOCK *rwlock; }; diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h index ac9e0be3c20..574c9400f8f 100644 --- a/src/third_party/wiredtiger/src/include/meta.h +++ b/src/third_party/wiredtiger/src/include/meta.h @@ -6,107 +6,101 @@ * See the file LICENSE for redistribution information. */ -#define WT_WIREDTIGER "WiredTiger" /* Version file */ -#define WT_SINGLETHREAD "WiredTiger.lock" /* Locking file */ +#define WT_WIREDTIGER "WiredTiger" /* Version file */ +#define WT_SINGLETHREAD "WiredTiger.lock" /* Locking file */ -#define WT_BASECONFIG "WiredTiger.basecfg" /* Base configuration */ -#define WT_BASECONFIG_SET "WiredTiger.basecfg.set"/* Base config temp */ +#define WT_BASECONFIG "WiredTiger.basecfg" /* Base configuration */ +#define WT_BASECONFIG_SET "WiredTiger.basecfg.set" /* Base config temp */ -#define WT_USERCONFIG "WiredTiger.config" /* User configuration */ +#define WT_USERCONFIG "WiredTiger.config" /* User configuration */ -#define WT_BACKUP_TMP "WiredTiger.backup.tmp" /* Backup tmp file */ -#define WT_METADATA_BACKUP "WiredTiger.backup" /* Hot backup file */ -#define WT_INCREMENTAL_BACKUP "WiredTiger.ibackup" /* Incremental backup */ -#define WT_INCREMENTAL_SRC "WiredTiger.isrc" /* Incremental source */ +#define WT_BACKUP_TMP "WiredTiger.backup.tmp" /* Backup tmp file */ +#define WT_METADATA_BACKUP "WiredTiger.backup" /* Hot backup file */ +#define WT_INCREMENTAL_BACKUP "WiredTiger.ibackup" /* Incremental backup */ +#define WT_INCREMENTAL_SRC "WiredTiger.isrc" /* Incremental source */ -#define WT_METADATA_TURTLE "WiredTiger.turtle" /* Metadata metadata */ -#define WT_METADATA_TURTLE_SET "WiredTiger.turtle.set" /* Turtle temp file */ +#define WT_METADATA_TURTLE "WiredTiger.turtle" /* Metadata metadata */ +#define WT_METADATA_TURTLE_SET "WiredTiger.turtle.set" /* Turtle temp file */ -#define WT_METADATA_URI "metadata:" /* Metadata alias */ -#define WT_METAFILE "WiredTiger.wt" /* Metadata table */ -#define WT_METAFILE_SLVG "WiredTiger.wt.orig" /* Metadata copy */ -#define WT_METAFILE_URI "file:WiredTiger.wt" /* Metadata table URI */ +#define WT_METADATA_URI "metadata:" /* Metadata alias */ +#define WT_METAFILE "WiredTiger.wt" /* Metadata table */ +#define WT_METAFILE_SLVG "WiredTiger.wt.orig" /* Metadata copy */ +#define WT_METAFILE_URI "file:WiredTiger.wt" /* Metadata table URI */ -#define WT_LAS_FILE "WiredTigerLAS.wt" /* Lookaside table */ -#define WT_LAS_URI "file:WiredTigerLAS.wt" /* Lookaside table URI*/ +#define WT_LAS_FILE "WiredTigerLAS.wt" /* Lookaside table */ +#define WT_LAS_URI "file:WiredTigerLAS.wt" /* Lookaside table URI*/ -#define WT_SYSTEM_PREFIX "system:" /* System URI prefix */ -#define WT_SYSTEM_CKPT_URI "system:checkpoint" /* Checkpoint URI */ +#define WT_SYSTEM_PREFIX "system:" /* System URI prefix */ +#define WT_SYSTEM_CKPT_URI "system:checkpoint" /* Checkpoint URI */ /* - * Optimize comparisons against the metafile URI, flag handles that reference - * the metadata file. + * Optimize comparisons against the metafile URI, flag handles that reference the metadata file. */ -#define WT_IS_METADATA(dh) F_ISSET((dh), WT_DHANDLE_IS_METADATA) -#define WT_METAFILE_ID 0 /* Metadata file ID */ +#define WT_IS_METADATA(dh) F_ISSET((dh), WT_DHANDLE_IS_METADATA) +#define WT_METAFILE_ID 0 /* Metadata file ID */ -#define WT_METADATA_COMPAT "Compatibility version" -#define WT_METADATA_VERSION "WiredTiger version" /* Version keys */ -#define WT_METADATA_VERSION_STR "WiredTiger version string" +#define WT_METADATA_COMPAT "Compatibility version" +#define WT_METADATA_VERSION "WiredTiger version" /* Version keys */ +#define WT_METADATA_VERSION_STR "WiredTiger version string" /* * WT_WITH_TURTLE_LOCK -- * Acquire the turtle file lock, perform an operation, drop the lock. */ -#define WT_WITH_TURTLE_LOCK(session, op) do { \ - WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_TURTLE));\ - WT_WITH_LOCK_WAIT(session, \ - &S2C(session)->turtle_lock, WT_SESSION_LOCKED_TURTLE, op); \ -} while (0) +#define WT_WITH_TURTLE_LOCK(session, op) \ + do { \ + WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_TURTLE)); \ + WT_WITH_LOCK_WAIT(session, &S2C(session)->turtle_lock, WT_SESSION_LOCKED_TURTLE, op); \ + } while (0) /* * WT_CKPT -- * Encapsulation of checkpoint information, shared by the metadata, the * btree engine, and the block manager. */ -#define WT_CHECKPOINT "WiredTigerCheckpoint" -#define WT_CKPT_FOREACH(ckptbase, ckpt) \ - for ((ckpt) = (ckptbase); (ckpt)->name != NULL; ++(ckpt)) +#define WT_CHECKPOINT "WiredTigerCheckpoint" +#define WT_CKPT_FOREACH(ckptbase, ckpt) for ((ckpt) = (ckptbase); (ckpt)->name != NULL; ++(ckpt)) struct __wt_ckpt { - char *name; /* Name or NULL */ + char *name; /* Name or NULL */ - /* - * Each internal checkpoint name is appended with a generation - * to make it a unique name. We're solving two problems: when - * two checkpoints are taken quickly, the timer may not be - * unique and/or we can even see time travel on the second - * checkpoint if we snapshot the time in-between nanoseconds - * rolling over. Second, if we reset the generational counter - * when new checkpoints arrive, we could logically re-create - * specific checkpoints, racing with cursors open on those - * checkpoints. I can't think of any way to return incorrect - * results by racing with those cursors, but it's simpler not - * to worry about it. - */ - int64_t order; /* Checkpoint order */ + /* + * Each internal checkpoint name is appended with a generation to make it a unique name. We're + * solving two problems: when two checkpoints are taken quickly, the timer may not be unique + * and/or we can even see time travel on the second checkpoint if we snapshot the time + * in-between nanoseconds rolling over. Second, if we reset the generational counter when new + * checkpoints arrive, we could logically re-create specific checkpoints, racing with cursors + * open on those checkpoints. I can't think of any way to return incorrect results by racing + * with those cursors, but it's simpler not to worry about it. + */ + int64_t order; /* Checkpoint order */ - uint64_t sec; /* Wall clock time */ + uint64_t sec; /* Wall clock time */ - uint64_t size; /* Checkpoint size */ + uint64_t size; /* Checkpoint size */ - uint64_t write_gen; /* Write generation */ + uint64_t write_gen; /* Write generation */ - char *block_metadata; /* Block-stored metadata */ - char *block_checkpoint; /* Block-stored checkpoint */ + char *block_metadata; /* Block-stored metadata */ + char *block_checkpoint; /* Block-stored checkpoint */ - /* Validity window */ - wt_timestamp_t newest_durable_ts; - wt_timestamp_t oldest_start_ts; - uint64_t oldest_start_txn; - wt_timestamp_t newest_stop_ts; - uint64_t newest_stop_txn; + /* Validity window */ + wt_timestamp_t newest_durable_ts; + wt_timestamp_t oldest_start_ts; + uint64_t oldest_start_txn; + wt_timestamp_t newest_stop_ts; + uint64_t newest_stop_txn; - WT_ITEM addr; /* Checkpoint cookie string */ - WT_ITEM raw; /* Checkpoint cookie raw */ + WT_ITEM addr; /* Checkpoint cookie string */ + WT_ITEM raw; /* Checkpoint cookie raw */ - void *bpriv; /* Block manager private */ + void *bpriv; /* Block manager private */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CKPT_ADD 0x1u /* Checkpoint to be added */ -#define WT_CKPT_DELETE 0x2u /* Checkpoint to be deleted */ -#define WT_CKPT_FAKE 0x4u /* Checkpoint is a fake */ -#define WT_CKPT_UPDATE 0x8u /* Checkpoint requires update */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_CKPT_ADD 0x1u /* Checkpoint to be added */ +#define WT_CKPT_DELETE 0x2u /* Checkpoint to be deleted */ +#define WT_CKPT_FAKE 0x4u /* Checkpoint is a fake */ +#define WT_CKPT_UPDATE 0x8u /* Checkpoint requires update */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h index 18a88649ae6..046d724d1f7 100644 --- a/src/third_party/wiredtiger/src/include/misc.h +++ b/src/third_party/wiredtiger/src/include/misc.h @@ -7,142 +7,141 @@ */ /* - * Quiet compiler warnings about unused function parameters and variables, - * and unused function return values. + * Quiet compiler warnings about unused function parameters and variables, and unused function + * return values. */ -#define WT_UNUSED(var) (void)(var) -#define WT_NOT_READ(v, val) do { \ - (v) = (val); \ - (void)(v); \ -} while (0); -#define WT_IGNORE_RET(call) do { \ - uintmax_t __ignored_ret; \ - __ignored_ret = (uintmax_t)(call); \ - WT_UNUSED(__ignored_ret); \ -} while (0) -#define WT_IGNORE_RET_BOOL(call) do { \ - bool __ignored_ret; \ - __ignored_ret = (call); \ - WT_UNUSED(__ignored_ret); \ -} while (0) -#define WT_IGNORE_RET_PTR(call) do { \ - const void *__ignored_ret; \ - __ignored_ret = (call); \ - WT_UNUSED(__ignored_ret); \ -} while (0) - -#define WT_DIVIDER "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" +#define WT_UNUSED(var) (void)(var) +#define WT_NOT_READ(v, val) \ + do { \ + (v) = (val); \ + (void)(v); \ + } while (0); +#define WT_IGNORE_RET(call) \ + do { \ + uintmax_t __ignored_ret; \ + __ignored_ret = (uintmax_t)(call); \ + WT_UNUSED(__ignored_ret); \ + } while (0) +#define WT_IGNORE_RET_BOOL(call) \ + do { \ + bool __ignored_ret; \ + __ignored_ret = (call); \ + WT_UNUSED(__ignored_ret); \ + } while (0) +#define WT_IGNORE_RET_PTR(call) \ + do { \ + const void *__ignored_ret; \ + __ignored_ret = (call); \ + WT_UNUSED(__ignored_ret); \ + } while (0) + +#define WT_DIVIDER "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" /* Basic constants. */ -#define WT_THOUSAND (1000) -#define WT_MILLION (1000000) -#define WT_BILLION (1000000000) +#define WT_THOUSAND (1000) +#define WT_MILLION (1000000) +#define WT_BILLION (1000000000) -#define WT_MINUTE (60) +#define WT_MINUTE (60) -#define WT_PROGRESS_MSG_PERIOD (20) +#define WT_PROGRESS_MSG_PERIOD (20) -#define WT_KILOBYTE (1024) -#define WT_MEGABYTE (1048576) -#define WT_GIGABYTE (1073741824) -#define WT_TERABYTE ((uint64_t)1099511627776) -#define WT_PETABYTE ((uint64_t)1125899906842624) -#define WT_EXABYTE ((uint64_t)1152921504606846976) +#define WT_KILOBYTE (1024) +#define WT_MEGABYTE (1048576) +#define WT_GIGABYTE (1073741824) +#define WT_TERABYTE ((uint64_t)1099511627776) +#define WT_PETABYTE ((uint64_t)1125899906842624) +#define WT_EXABYTE ((uint64_t)1152921504606846976) /* - * Sizes that cannot be larger than 2**32 are stored in uint32_t fields in - * common structures to save space. To minimize conversions from size_t to - * uint32_t through the code, we use the following macros. + * Sizes that cannot be larger than 2**32 are stored in uint32_t fields in common structures to save + * space. To minimize conversions from size_t to uint32_t through the code, we use the following + * macros. */ -#define WT_STORE_SIZE(s) ((uint32_t)(s)) -#define WT_PTRDIFF(end, begin) \ - ((size_t)((const uint8_t *)(end) - (const uint8_t *)(begin))) -#define WT_PTRDIFF32(end, begin) \ - WT_STORE_SIZE(WT_PTRDIFF((end), (begin))) -#define WT_BLOCK_FITS(p, len, begin, maxlen) \ - ((const uint8_t *)(p) >= (const uint8_t *)(begin) && \ - ((const uint8_t *)(p) + (len) <= (const uint8_t *)(begin) + (maxlen))) -#define WT_PTR_IN_RANGE(p, begin, maxlen) \ - WT_BLOCK_FITS((p), 1, (begin), (maxlen)) +#define WT_STORE_SIZE(s) ((uint32_t)(s)) +#define WT_PTRDIFF(end, begin) ((size_t)((const uint8_t *)(end) - (const uint8_t *)(begin))) +#define WT_PTRDIFF32(end, begin) WT_STORE_SIZE(WT_PTRDIFF((end), (begin))) +#define WT_BLOCK_FITS(p, len, begin, maxlen) \ + ((const uint8_t *)(p) >= (const uint8_t *)(begin) && \ + ((const uint8_t *)(p) + (len) <= (const uint8_t *)(begin) + (maxlen))) +#define WT_PTR_IN_RANGE(p, begin, maxlen) WT_BLOCK_FITS((p), 1, (begin), (maxlen)) /* - * Align an unsigned value of any type to a specified power-of-2, including the - * offset result of a pointer subtraction; do the calculation using the largest - * unsigned integer type available. + * Align an unsigned value of any type to a specified power-of-2, including the offset result of a + * pointer subtraction; do the calculation using the largest unsigned integer type available. */ -#define WT_ALIGN(n, v) \ - ((((uintmax_t)(n)) + ((v) - 1)) & ~(((uintmax_t)(v)) - 1)) +#define WT_ALIGN(n, v) ((((uintmax_t)(n)) + ((v)-1)) & ~(((uintmax_t)(v)) - 1)) -#define WT_ALIGN_NEAREST(n, v) \ - ((((uintmax_t)(n)) + ((v) / 2)) & ~(((uintmax_t)(v)) - 1)) +#define WT_ALIGN_NEAREST(n, v) ((((uintmax_t)(n)) + ((v) / 2)) & ~(((uintmax_t)(v)) - 1)) /* Min, max. */ -#define WT_MIN(a, b) ((a) < (b) ? (a) : (b)) -#define WT_MAX(a, b) ((a) < (b) ? (b) : (a)) +#define WT_MIN(a, b) ((a) < (b) ? (a) : (b)) +#define WT_MAX(a, b) ((a) < (b) ? (b) : (a)) /* Elements in an array. */ -#define WT_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) +#define WT_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) /* 10 level skip lists, 1/4 have a link to the next element. */ -#define WT_SKIP_MAXDEPTH 10 -#define WT_SKIP_PROBABILITY (UINT32_MAX >> 2) +#define WT_SKIP_MAXDEPTH 10 +#define WT_SKIP_PROBABILITY (UINT32_MAX >> 2) /* - * Encryption needs to know its original length before either the - * block or logging subsystems pad. Constant value. + * Encryption needs to know its original length before either the block or logging subsystems pad. + * Constant value. */ -#define WT_ENCRYPT_LEN_SIZE sizeof(uint32_t) +#define WT_ENCRYPT_LEN_SIZE sizeof(uint32_t) /* - * Default hash table size; we don't need a prime number of buckets - * because we always use a good hash function. + * Default hash table size; we don't need a prime number of buckets because we always use a good + * hash function. */ -#define WT_HASH_ARRAY_SIZE 512 +#define WT_HASH_ARRAY_SIZE 512 /* * __wt_calloc_def, __wt_calloc_one -- - * Most calloc calls don't need separate count or sizeof arguments. + * Most calloc calls don't need separate count or sizeof arguments. */ -#define __wt_calloc_def(session, number, addr) \ - __wt_calloc(session, (size_t)(number), sizeof(**(addr)), addr) -#define __wt_calloc_one(session, addr) \ - __wt_calloc(session, (size_t)1, sizeof(**(addr)), addr) +#define __wt_calloc_def(session, number, addr) \ + __wt_calloc(session, (size_t)(number), sizeof(**(addr)), addr) +#define __wt_calloc_one(session, addr) __wt_calloc(session, (size_t)1, sizeof(**(addr)), addr) /* * __wt_realloc_def -- - * Common case allocate-and-grow function. - * Starts by allocating the requested number of items (at least 10), then - * doubles each time the list needs to grow. + * Common case allocate-and-grow function. Starts by allocating the requested number of items + * (at least 10), then doubles each time the list needs to grow. */ -#define __wt_realloc_def(session, sizep, number, addr) \ - (((number) * sizeof(**(addr)) <= *(sizep)) ? 0 : \ - __wt_realloc(session, sizep, WT_MAX(*(sizep) * 2, \ - WT_MAX(10, (number)) * sizeof(**(addr))), addr)) +#define __wt_realloc_def(session, sizep, number, addr) \ + (((number) * sizeof(**(addr)) <= *(sizep)) ? \ + 0 : \ + __wt_realloc( \ + session, sizep, WT_MAX(*(sizep)*2, WT_MAX(10, (number)) * sizeof(**(addr))), addr)) /* - * Our internal free function clears the underlying address atomically so there - * is a smaller chance of racing threads seeing intermediate results while a - * structure is being free'd. (That would be a bug, of course, but I'd rather - * not drop core, just the same.) That's a non-standard "free" API, and the - * resulting bug is a mother to find -- make sure we get it right, don't make - * the caller remember to put the & operator on the pointer. + * Our internal free function clears the underlying address atomically so there is a smaller chance + * of racing threads seeing intermediate results while a structure is being free'd. (That would be a + * bug, of course, but I'd rather not drop core, just the same.) That's a non-standard "free" API, + * and the resulting bug is a mother to find -- make sure we get it right, don't make the caller + * remember to put the & operator on the pointer. */ -#define __wt_free(session, p) do { \ - void *__p = &(p); \ - if (*(void **)__p != NULL) \ - __wt_free_int(session, __p); \ -} while (0) +#define __wt_free(session, p) \ + do { \ + void *__p = &(p); \ + if (*(void **)__p != NULL) \ + __wt_free_int(session, __p); \ + } while (0) #ifdef HAVE_DIAGNOSTIC -#define __wt_overwrite_and_free(session, p) do { \ - memset(p, WT_DEBUG_BYTE, sizeof(*(p))); \ - __wt_free(session, p); \ -} while (0) -#define __wt_overwrite_and_free_len(session, p, len) do { \ - memset(p, WT_DEBUG_BYTE, len); \ - __wt_free(session, p); \ -} while (0) +#define __wt_overwrite_and_free(session, p) \ + do { \ + memset(p, WT_DEBUG_BYTE, sizeof(*(p))); \ + __wt_free(session, p); \ + } while (0) +#define __wt_overwrite_and_free_len(session, p, len) \ + do { \ + memset(p, WT_DEBUG_BYTE, len); \ + __wt_free(session, p); \ + } while (0) #else -#define __wt_overwrite_and_free(session, p) __wt_free(session, p) -#define __wt_overwrite_and_free_len(session, p, len) __wt_free(session, p) +#define __wt_overwrite_and_free(session, p) __wt_free(session, p) +#define __wt_overwrite_and_free_len(session, p, len) __wt_free(session, p) #endif /* @@ -156,20 +155,20 @@ * hex constant might be a negative integer), and to ensure the hex constant is * the correct size before applying the bitwise not operator. */ -#define FLD_CLR(field, mask) ((void)((field) &= ~(mask))) -#define FLD_MASK(field, mask) ((field) & (mask)) -#define FLD_ISSET(field, mask) (FLD_MASK(field, mask) != 0) -#define FLD_SET(field, mask) ((void)((field) |= (mask))) +#define FLD_CLR(field, mask) ((void)((field) &= ~(mask))) +#define FLD_MASK(field, mask) ((field) & (mask)) +#define FLD_ISSET(field, mask) (FLD_MASK(field, mask) != 0) +#define FLD_SET(field, mask) ((void)((field) |= (mask))) -#define F_CLR(p, mask) FLD_CLR((p)->flags, mask) -#define F_ISSET(p, mask) FLD_ISSET((p)->flags, mask) -#define F_MASK(p, mask) FLD_MASK((p)->flags, mask) -#define F_SET(p, mask) FLD_SET((p)->flags, mask) +#define F_CLR(p, mask) FLD_CLR((p)->flags, mask) +#define F_ISSET(p, mask) FLD_ISSET((p)->flags, mask) +#define F_MASK(p, mask) FLD_MASK((p)->flags, mask) +#define F_SET(p, mask) FLD_SET((p)->flags, mask) -#define LF_CLR(mask) FLD_CLR(flags, mask) -#define LF_ISSET(mask) FLD_ISSET(flags, mask) -#define LF_MASK(mask) FLD_MASK(flags, mask) -#define LF_SET(mask) FLD_SET(flags, mask) +#define LF_CLR(mask) FLD_CLR(flags, mask) +#define LF_ISSET(mask) FLD_ISSET(flags, mask) +#define LF_MASK(mask) FLD_MASK(flags, mask) +#define LF_SET(mask) FLD_SET(flags, mask) /* * Insertion sort, for sorting small sets of values. @@ -177,140 +176,131 @@ * The "compare_lt" argument is a function or macro that returns true when * its first argument is less than its second argument. */ -#define WT_INSERTION_SORT(arrayp, n, value_type, compare_lt) do { \ - value_type __v; \ - int __i, __j, __n = (int)(n); \ - if (__n == 2) { \ - __v = (arrayp)[1]; \ - if (compare_lt(__v, (arrayp)[0])) { \ - (arrayp)[1] = (arrayp)[0]; \ - (arrayp)[0] = __v; \ - } \ - } \ - if (__n > 2) { \ - for (__i = 1; __i < __n; ++__i) { \ - __v = (arrayp)[__i]; \ - for (__j = __i - 1; __j >= 0 && \ - compare_lt(__v, (arrayp)[__j]); --__j) \ - (arrayp)[__j + 1] = (arrayp)[__j]; \ - (arrayp)[__j + 1] = __v; \ - } \ - } \ -} while (0) +#define WT_INSERTION_SORT(arrayp, n, value_type, compare_lt) \ + do { \ + value_type __v; \ + int __i, __j, __n = (int)(n); \ + if (__n == 2) { \ + __v = (arrayp)[1]; \ + if (compare_lt(__v, (arrayp)[0])) { \ + (arrayp)[1] = (arrayp)[0]; \ + (arrayp)[0] = __v; \ + } \ + } \ + if (__n > 2) { \ + for (__i = 1; __i < __n; ++__i) { \ + __v = (arrayp)[__i]; \ + for (__j = __i - 1; __j >= 0 && compare_lt(__v, (arrayp)[__j]); --__j) \ + (arrayp)[__j + 1] = (arrayp)[__j]; \ + (arrayp)[__j + 1] = __v; \ + } \ + } \ + } while (0) /* - * Some C compiler address sanitizers complain if qsort is passed a NULL base - * reference, even if there are no elements to compare (note zero elements is - * allowed by the IEEE Std 1003.1-2017 standard). Avoid the complaint. + * Some C compiler address sanitizers complain if qsort is passed a NULL base reference, even if + * there are no elements to compare (note zero elements is allowed by the IEEE Std 1003.1-2017 + * standard). Avoid the complaint. */ -#define __wt_qsort(base, nmemb, size, compar) \ - if ((nmemb) != 0) \ - qsort(base, nmemb, size, compar) +#define __wt_qsort(base, nmemb, size, compar) \ + if ((nmemb) != 0) \ + qsort(base, nmemb, size, compar) /* * Binary search for an integer key. */ -#define WT_BINARY_SEARCH(key, arrayp, n, found) do { \ - uint32_t __base, __indx, __limit; \ - (found) = false; \ - for (__base = 0, __limit = (n); __limit != 0; __limit >>= 1) { \ - __indx = __base + (__limit >> 1); \ - if ((arrayp)[__indx] < (key)) { \ - __base = __indx + 1; \ - --__limit; \ - } else if ((arrayp)[__indx] == (key)) { \ - (found) = true; \ - break; \ - } \ - } \ -} while (0) +#define WT_BINARY_SEARCH(key, arrayp, n, found) \ + do { \ + uint32_t __base, __indx, __limit; \ + (found) = false; \ + for (__base = 0, __limit = (n); __limit != 0; __limit >>= 1) { \ + __indx = __base + (__limit >> 1); \ + if ((arrayp)[__indx] < (key)) { \ + __base = __indx + 1; \ + --__limit; \ + } else if ((arrayp)[__indx] == (key)) { \ + (found) = true; \ + break; \ + } \ + } \ + } while (0) /* Verbose messages. */ -#define WT_VERBOSE_ISSET(session, f) \ - (FLD_ISSET(S2C(session)->verbose, f)) +#define WT_VERBOSE_ISSET(session, f) (FLD_ISSET(S2C(session)->verbose, f)) -#define WT_CLEAR(s) \ - memset(&(s), 0, sizeof(s)) +#define WT_CLEAR(s) memset(&(s), 0, sizeof(s)) /* Check if a string matches a prefix. */ -#define WT_PREFIX_MATCH(str, pfx) \ - (((const char *)(str))[0] == ((const char *)(pfx))[0] && \ - strncmp(str, pfx, strlen(pfx)) == 0) +#define WT_PREFIX_MATCH(str, pfx) \ + (((const char *)(str))[0] == ((const char *)(pfx))[0] && strncmp(str, pfx, strlen(pfx)) == 0) /* Check if a string matches a prefix, and move past it. */ -#define WT_PREFIX_SKIP(str, pfx) \ - (WT_PREFIX_MATCH(str, pfx) ? ((str) += strlen(pfx), 1) : 0) +#define WT_PREFIX_SKIP(str, pfx) (WT_PREFIX_MATCH(str, pfx) ? ((str) += strlen(pfx), 1) : 0) /* Assert that a string matches a prefix, and move past it. */ -#define WT_PREFIX_SKIP_REQUIRED(session, str, pfx) do { \ - WT_ASSERT(session, WT_PREFIX_MATCH(str, pfx)); \ - (str) += strlen(pfx); \ -} while (0) +#define WT_PREFIX_SKIP_REQUIRED(session, str, pfx) \ + do { \ + WT_ASSERT(session, WT_PREFIX_MATCH(str, pfx)); \ + (str) += strlen(pfx); \ + } while (0) /* - * Check if a variable string equals a constant string. Inline the common case - * for WiredTiger of a single byte string. This is required because not all - * compilers optimize this case in strcmp (e.g., clang). While this macro works - * in the case of comparing two pointers (a sizeof operator on a pointer won't - * equal 2 and the extra code will be discarded at compile time), that's not its + * Check if a variable string equals a constant string. Inline the common case for WiredTiger of a + * single byte string. This is required because not all compilers optimize this case in strcmp + * (e.g., clang). While this macro works in the case of comparing two pointers (a sizeof operator on + * a pointer won't equal 2 and the extra code will be discarded at compile time), that's not its * purpose. */ -#define WT_STREQ(s, cs) \ - (sizeof(cs) == 2 ? (s)[0] == (cs)[0] && (s)[1] == '\0' : \ - strcmp(s, cs) == 0) +#define WT_STREQ(s, cs) (sizeof(cs) == 2 ? (s)[0] == (cs)[0] && (s)[1] == '\0' : strcmp(s, cs) == 0) /* Check if a string matches a byte string of len bytes. */ -#define WT_STRING_MATCH(str, bytes, len) \ - (((const char *)(str))[0] == ((const char *)(bytes))[0] && \ - strncmp(str, bytes, len) == 0 && (str)[len] == '\0') +#define WT_STRING_MATCH(str, bytes, len) \ + (((const char *)(str))[0] == ((const char *)(bytes))[0] && strncmp(str, bytes, len) == 0 && \ + (str)[len] == '\0') /* - * Macro that produces a string literal that isn't wrapped in quotes, to avoid - * tripping up spell checkers. + * Macro that produces a string literal that isn't wrapped in quotes, to avoid tripping up spell + * checkers. */ -#define WT_UNCHECKED_STRING(str) #str +#define WT_UNCHECKED_STRING(str) #str /* Function return value and scratch buffer declaration and initialization. */ -#define WT_DECL_ITEM(i) WT_ITEM *i = NULL -#define WT_DECL_RET int ret = 0 +#define WT_DECL_ITEM(i) WT_ITEM *i = NULL +#define WT_DECL_RET int ret = 0 /* If a WT_ITEM data field points somewhere in its allocated memory. */ -#define WT_DATA_IN_ITEM(i) \ - ((i)->mem != NULL && (i)->data >= (i)->mem && \ - WT_PTRDIFF((i)->data, (i)->mem) < (i)->memsize) +#define WT_DATA_IN_ITEM(i) \ + ((i)->mem != NULL && (i)->data >= (i)->mem && WT_PTRDIFF((i)->data, (i)->mem) < (i)->memsize) /* Copy the data and size fields of an item. */ -#define WT_ITEM_SET(dst, src) do { \ - (dst).data = (src).data; \ - (dst).size = (src).size; \ -} while (0) +#define WT_ITEM_SET(dst, src) \ + do { \ + (dst).data = (src).data; \ + (dst).size = (src).size; \ + } while (0) /* - * In diagnostic mode we track the locations from which hazard pointers and - * scratch buffers were acquired. + * In diagnostic mode we track the locations from which hazard pointers and scratch buffers were + * acquired. */ #ifdef HAVE_DIAGNOSTIC -#define __wt_scr_alloc(session, size, scratchp) \ - __wt_scr_alloc_func(session, size, scratchp, __func__, __LINE__) -#define __wt_page_in(session, ref, flags) \ - __wt_page_in_func(session, ref, flags, __func__, __LINE__) -#define __wt_page_swap(session, held, want, flags) \ - __wt_page_swap_func(session, held, want, flags, __func__, __LINE__) +#define __wt_scr_alloc(session, size, scratchp) \ + __wt_scr_alloc_func(session, size, scratchp, __func__, __LINE__) +#define __wt_page_in(session, ref, flags) __wt_page_in_func(session, ref, flags, __func__, __LINE__) +#define __wt_page_swap(session, held, want, flags) \ + __wt_page_swap_func(session, held, want, flags, __func__, __LINE__) #else -#define __wt_scr_alloc(session, size, scratchp) \ - __wt_scr_alloc_func(session, size, scratchp) -#define __wt_page_in(session, ref, flags) \ - __wt_page_in_func(session, ref, flags) -#define __wt_page_swap(session, held, want, flags) \ - __wt_page_swap_func(session, held, want, flags) +#define __wt_scr_alloc(session, size, scratchp) __wt_scr_alloc_func(session, size, scratchp) +#define __wt_page_in(session, ref, flags) __wt_page_in_func(session, ref, flags) +#define __wt_page_swap(session, held, want, flags) __wt_page_swap_func(session, held, want, flags) #endif /* Random number generator state. */ union __wt_rand_state { - uint64_t v; - struct { - uint32_t w, z; - } x; + uint64_t v; + struct { + uint32_t w, z; + } x; }; /* @@ -322,15 +312,14 @@ union __wt_rand_state { * this macro works even when the next element gets removed along with the * current one. */ -#define WT_TAILQ_SAFE_REMOVE_BEGIN(var, head, field, tvar) \ - for ((tvar) = NULL; ((var) = TAILQ_FIRST(head)) != NULL; \ - (tvar) = (var)) { \ - if ((tvar) == (var)) { \ - /* Leak the structure. */ \ - TAILQ_REMOVE(head, (var), field); \ - continue; \ - } -#define WT_TAILQ_SAFE_REMOVE_END } +#define WT_TAILQ_SAFE_REMOVE_BEGIN(var, head, field, tvar) \ + for ((tvar) = NULL; ((var) = TAILQ_FIRST(head)) != NULL; (tvar) = (var)) { \ + if ((tvar) == (var)) { \ + /* Leak the structure. */ \ + TAILQ_REMOVE(head, (var), field); \ + continue; \ + } +#define WT_TAILQ_SAFE_REMOVE_END } /* * WT_VA_ARGS_BUF_FORMAT -- @@ -338,43 +327,42 @@ union __wt_rand_state { * macro because we need to repeatedly call va_start/va_end and there's no * way to do that inside a function call. */ -#define WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, concatenate) do { \ - size_t __len, __space; \ - va_list __ap; \ - int __ret_xx; /* __ret already used by WT_RET */ \ - char *__p; \ - \ - /* \ - * This macro is used to both initialize and concatenate into a \ - * buffer. If not concatenating, clear the size so we don't use \ - * any existing contents. \ - */ \ - if (!(concatenate)) \ - (buf)->size = 0; \ - for (;;) { \ - WT_ASSERT(session, (buf)->memsize >= (buf)->size); \ - __p = (char *)((uint8_t *)(buf)->mem + (buf)->size); \ - __space = (buf)->memsize - (buf)->size; \ - \ - /* Format into the buffer. */ \ - va_start(__ap, fmt); \ - __ret_xx = __wt_vsnprintf_len_set( \ - __p, __space, &__len, fmt, __ap); \ - va_end(__ap); \ - WT_RET(__ret_xx); \ - \ - /* Check if there was enough space. */ \ - if (__len < __space) { \ - (buf)->data = (buf)->mem; \ - (buf)->size += __len; \ - break; \ - } \ - \ - /* \ - * If not, double the size of the buffer: we're dealing \ - * with strings, we don't expect the size to get huge. \ - */ \ - WT_RET(__wt_buf_extend( \ - session, buf, (buf)->size + __len + 1)); \ - } \ -} while (0) +#define WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, concatenate) \ + do { \ + size_t __len, __space; \ + va_list __ap; \ + int __ret_xx; /* __ret already used by WT_RET */ \ + char *__p; \ + \ + /* \ + * This macro is used to both initialize and concatenate into a \ + * buffer. If not concatenating, clear the size so we don't use \ + * any existing contents. \ + */ \ + if (!(concatenate)) \ + (buf)->size = 0; \ + for (;;) { \ + WT_ASSERT(session, (buf)->memsize >= (buf)->size); \ + __p = (char *)((uint8_t *)(buf)->mem + (buf)->size); \ + __space = (buf)->memsize - (buf)->size; \ + \ + /* Format into the buffer. */ \ + va_start(__ap, fmt); \ + __ret_xx = __wt_vsnprintf_len_set(__p, __space, &__len, fmt, __ap); \ + va_end(__ap); \ + WT_RET(__ret_xx); \ + \ + /* Check if there was enough space. */ \ + if (__len < __space) { \ + (buf)->data = (buf)->mem; \ + (buf)->size += __len; \ + break; \ + } \ + \ + /* \ + * If not, double the size of the buffer: we're dealing \ + * with strings, we don't expect the size to get huge. \ + */ \ + WT_RET(__wt_buf_extend(session, buf, (buf)->size + __len + 1)); \ + } \ + } while (0) diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i index bd3ef02da54..7b908ac3871 100644 --- a/src/third_party/wiredtiger/src/include/misc.i +++ b/src/third_party/wiredtiger/src/include/misc.i @@ -8,288 +8,276 @@ /* * __wt_cond_wait -- - * Wait on a mutex, optionally timing out. + * Wait on a mutex, optionally timing out. */ static inline void -__wt_cond_wait(WT_SESSION_IMPL *session, - WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *)) +__wt_cond_wait( + WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *)) { - bool notused; + bool notused; - __wt_cond_wait_signal(session, cond, usecs, run_func, ¬used); + __wt_cond_wait_signal(session, cond, usecs, run_func, ¬used); } /* * __wt_hex -- - * Convert a byte to a hex character. + * Convert a byte to a hex character. */ static inline u_char __wt_hex(int c) { - return ((u_char)"0123456789abcdef"[c]); + return ((u_char) "0123456789abcdef"[c]); } /* * __wt_rdtsc -- - * Get a timestamp from CPU registers. + * Get a timestamp from CPU registers. */ static inline uint64_t -__wt_rdtsc(void) { -#if defined (__i386) - { - uint64_t x; - - __asm__ volatile ("rdtsc" : "=A" (x)); - return (x); - } -#elif defined (__amd64) - { - uint64_t a, d; - - __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d)); - return ((d << 32) | a); - } +__wt_rdtsc(void) +{ +#if defined(__i386) + { + uint64_t x; + + __asm__ volatile("rdtsc" : "=A"(x)); + return (x); + } +#elif defined(__amd64) + { + uint64_t a, d; + + __asm__ volatile("rdtsc" : "=a"(a), "=d"(d)); + return ((d << 32) | a); + } #else - return (0); + return (0); #endif } /* * __wt_clock -- - * Obtain a timestamp via either a CPU register or via a system call on - * platforms where obtaining it directly from the hardware register is - * not supported. + * Obtain a timestamp via either a CPU register or via a system call on platforms where + * obtaining it directly from the hardware register is not supported. */ static inline uint64_t __wt_clock(WT_SESSION_IMPL *session) { - struct timespec tsp; + struct timespec tsp; - if (__wt_process.use_epochtime) { - __wt_epoch(session, &tsp); - return ((uint64_t)(tsp.tv_sec * WT_BILLION + tsp.tv_nsec)); - } - return (__wt_rdtsc()); + if (__wt_process.use_epochtime) { + __wt_epoch(session, &tsp); + return ((uint64_t)(tsp.tv_sec * WT_BILLION + tsp.tv_nsec)); + } + return (__wt_rdtsc()); } /* * __wt_strdup -- - * ANSI strdup function. + * ANSI strdup function. */ static inline int __wt_strdup(WT_SESSION_IMPL *session, const char *str, void *retp) { - return (__wt_strndup( - session, str, (str == NULL) ? 0 : strlen(str), retp)); + return (__wt_strndup(session, str, (str == NULL) ? 0 : strlen(str), retp)); } /* * __wt_strnlen -- - * Determine the length of a fixed-size string + * Determine the length of a fixed-size string */ static inline size_t __wt_strnlen(const char *s, size_t maxlen) { - size_t i; + size_t i; - for (i = 0; i < maxlen && *s != '\0'; i++, s++) - ; - return (i); + for (i = 0; i < maxlen && *s != '\0'; i++, s++) + ; + return (i); } /* * __wt_snprintf -- - * snprintf convenience function, ignoring the returned size. + * snprintf convenience function, ignoring the returned size. */ static inline int __wt_snprintf(char *buf, size_t size, const char *fmt, ...) - WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) + WT_GCC_FUNC_ATTRIBUTE((format(printf, 3, 4))) { - WT_DECL_RET; - size_t len; - va_list ap; + WT_DECL_RET; + size_t len; + va_list ap; - len = 0; + len = 0; - va_start(ap, fmt); - ret = __wt_vsnprintf_len_incr(buf, size, &len, fmt, ap); - va_end(ap); - WT_RET(ret); + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, &len, fmt, ap); + va_end(ap); + WT_RET(ret); - /* It's an error if the buffer couldn't hold everything. */ - return (len >= size ? ERANGE : 0); + /* It's an error if the buffer couldn't hold everything. */ + return (len >= size ? ERANGE : 0); } /* * __wt_vsnprintf -- - * vsnprintf convenience function, ignoring the returned size. + * vsnprintf convenience function, ignoring the returned size. */ static inline int __wt_vsnprintf(char *buf, size_t size, const char *fmt, va_list ap) { - size_t len; + size_t len; - len = 0; + len = 0; - WT_RET(__wt_vsnprintf_len_incr(buf, size, &len, fmt, ap)); + WT_RET(__wt_vsnprintf_len_incr(buf, size, &len, fmt, ap)); - /* It's an error if the buffer couldn't hold everything. */ - return (len >= size ? ERANGE : 0); + /* It's an error if the buffer couldn't hold everything. */ + return (len >= size ? ERANGE : 0); } /* * __wt_snprintf_len_set -- - * snprintf convenience function, setting the returned size. + * snprintf convenience function, setting the returned size. */ static inline int -__wt_snprintf_len_set( - char *buf, size_t size, size_t *retsizep, const char *fmt, ...) - WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5))) +__wt_snprintf_len_set(char *buf, size_t size, size_t *retsizep, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format(printf, 4, 5))) { - WT_DECL_RET; - va_list ap; + WT_DECL_RET; + va_list ap; - *retsizep = 0; + *retsizep = 0; - va_start(ap, fmt); - ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap); - va_end(ap); - return (ret); + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap); + va_end(ap); + return (ret); } /* * __wt_vsnprintf_len_set -- - * vsnprintf convenience function, setting the returned size. + * vsnprintf convenience function, setting the returned size. */ static inline int -__wt_vsnprintf_len_set( - char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) +__wt_vsnprintf_len_set(char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) { - *retsizep = 0; + *retsizep = 0; - return (__wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap)); + return (__wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap)); } /* * __wt_snprintf_len_incr -- - * snprintf convenience function, incrementing the returned size. + * snprintf convenience function, incrementing the returned size. */ static inline int -__wt_snprintf_len_incr( - char *buf, size_t size, size_t *retsizep, const char *fmt, ...) - WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5))) +__wt_snprintf_len_incr(char *buf, size_t size, size_t *retsizep, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format(printf, 4, 5))) { - WT_DECL_RET; - va_list ap; + WT_DECL_RET; + va_list ap; - va_start(ap, fmt); - ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap); - va_end(ap); - return (ret); + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap); + va_end(ap); + return (ret); } /* * __wt_txn_context_prepare_check -- - * Return an error if the current transaction is in the prepare state. + * Return an error if the current transaction is in the prepare state. */ static inline int __wt_txn_context_prepare_check(WT_SESSION_IMPL *session) { - if (F_ISSET(&session->txn, WT_TXN_PREPARE)) - WT_RET_MSG(session, EINVAL, - "%s: not permitted in a prepared transaction", - session->name); - return (0); + if (F_ISSET(&session->txn, WT_TXN_PREPARE)) + WT_RET_MSG(session, EINVAL, "%s: not permitted in a prepared transaction", session->name); + return (0); } /* * __wt_txn_context_check -- - * Complain if a transaction is/isn't running. + * Complain if a transaction is/isn't running. */ static inline int __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn) { - if (requires_txn && !F_ISSET(&session->txn, WT_TXN_RUNNING)) - WT_RET_MSG(session, EINVAL, - "%s: only permitted in a running transaction", - session->name); - if (!requires_txn && F_ISSET(&session->txn, WT_TXN_RUNNING)) - WT_RET_MSG(session, EINVAL, - "%s: not permitted in a running transaction", - session->name); - return (0); + if (requires_txn && !F_ISSET(&session->txn, WT_TXN_RUNNING)) + WT_RET_MSG(session, EINVAL, "%s: only permitted in a running transaction", session->name); + if (!requires_txn && F_ISSET(&session->txn, WT_TXN_RUNNING)) + WT_RET_MSG(session, EINVAL, "%s: not permitted in a running transaction", session->name); + return (0); } /* * __wt_spin_backoff -- - * Back off while spinning for a resource. This is used to avoid busy - * waiting loops that can consume enough CPU to block real work being - * done. The algorithm spins a few times, then yields for a while, then - * falls back to sleeping. + * Back off while spinning for a resource. This is used to avoid busy waiting loops that can + * consume enough CPU to block real work being done. The algorithm spins a few times, then + * yields for a while, then falls back to sleeping. */ static inline void __wt_spin_backoff(uint64_t *yield_count, uint64_t *sleep_usecs) { - if ((*yield_count) < 10) { - (*yield_count)++; - return; - } - - if ((*yield_count) < WT_THOUSAND) { - (*yield_count)++; - __wt_yield(); - return; - } - - (*sleep_usecs) = WT_MIN((*sleep_usecs) + 100, WT_THOUSAND); - __wt_sleep(0, (*sleep_usecs)); + if ((*yield_count) < 10) { + (*yield_count)++; + return; + } + + if ((*yield_count) < WT_THOUSAND) { + (*yield_count)++; + __wt_yield(); + return; + } + + (*sleep_usecs) = WT_MIN((*sleep_usecs) + 100, WT_THOUSAND); + __wt_sleep(0, (*sleep_usecs)); } - /* Maximum stress delay is 1/10 of a second. */ -#define WT_TIMING_STRESS_MAX_DELAY (100000) +/* Maximum stress delay is 1/10 of a second. */ +#define WT_TIMING_STRESS_MAX_DELAY (100000) /* * __wt_timing_stress -- - * Optionally add delay to stress code paths. + * Optionally add delay to stress code paths. */ static inline void __wt_timing_stress(WT_SESSION_IMPL *session, u_int flag) { - double pct; - uint64_t i, max; - - /* Optionally only sleep when a specified configuration flag is set. */ - if (flag != 0 && !FLD_ISSET(S2C(session)->timing_stress_flags, flag)) - return; - - /* - * If there is a lot of cache pressure, don't let the sleep time - * get too large. If the cache is totally full, return. - */ - pct = 0.0; - if (__wt_eviction_needed(session, false, false, &pct)) - max = 5; - else - max = 9; - if (pct > 100.0) - return; - - /* - * We need a fast way to choose a sleep time. We want to sleep a short - * period most of the time, but occasionally wait longer. Divide the - * maximum period of time into 10 buckets (where bucket 0 doesn't sleep - * at all), and roll dice, advancing to the next bucket 50% of the time. - * That means we'll hit the maximum roughly every 1K calls. - */ - for (i = 0;;) - if (__wt_random(&session->rnd) & 0x1 || ++i > max) - break; - - if (i == 0) - __wt_yield(); - else - /* The default maximum delay is 1/10th of a second. */ - __wt_sleep(0, i * (WT_TIMING_STRESS_MAX_DELAY / 10)); + double pct; + uint64_t i, max; + + /* Optionally only sleep when a specified configuration flag is set. */ + if (flag != 0 && !FLD_ISSET(S2C(session)->timing_stress_flags, flag)) + return; + + /* + * If there is a lot of cache pressure, don't let the sleep time get too large. If the cache is + * totally full, return. + */ + pct = 0.0; + if (__wt_eviction_needed(session, false, false, &pct)) + max = 5; + else + max = 9; + if (pct > 100.0) + return; + + /* + * We need a fast way to choose a sleep time. We want to sleep a short period most of the time, + * but occasionally wait longer. Divide the maximum period of time into 10 buckets (where bucket + * 0 doesn't sleep at all), and roll dice, advancing to the next bucket 50% of the time. That + * means we'll hit the maximum roughly every 1K calls. + */ + for (i = 0;;) + if (__wt_random(&session->rnd) & 0x1 || ++i > max) + break; + + if (i == 0) + __wt_yield(); + else + /* The default maximum delay is 1/10th of a second. */ + __wt_sleep(0, i * (WT_TIMING_STRESS_MAX_DELAY / 10)); } /* @@ -304,24 +292,23 @@ __wt_timing_stress(WT_SESSION_IMPL *session, u_int flag) #if defined(_M_AMD64) && !defined(HAVE_NO_CRC32_HARDWARE) /* * __wt_checksum_match -- - * Return if a checksum matches either the primary or alternate values. + * Return if a checksum matches either the primary or alternate values. */ static inline bool __wt_checksum_match(const void *chunk, size_t len, uint32_t v) { - return (__wt_checksum(chunk, len) == v || - __wt_checksum_alt_match(chunk, len, v)); + return (__wt_checksum(chunk, len) == v || __wt_checksum_alt_match(chunk, len, v)); } #else /* * __wt_checksum_match -- - * Return if a checksum matches. + * Return if a checksum matches. */ static inline bool __wt_checksum_match(const void *chunk, size_t len, uint32_t v) { - return (__wt_checksum(chunk, len) == v); + return (__wt_checksum(chunk, len) == v); } #endif diff --git a/src/third_party/wiredtiger/src/include/msvc.h b/src/third_party/wiredtiger/src/include/msvc.h index f4d8dc942f6..232683b2af2 100644 --- a/src/third_party/wiredtiger/src/include/msvc.h +++ b/src/third_party/wiredtiger/src/include/msvc.h @@ -11,50 +11,44 @@ #error "Only x64 is supported with MSVC" #endif -#define inline __inline +#define inline __inline /* MSVC Doesn't provide __func__, it has __FUNCTION__ */ #ifdef _MSC_VER -#define __func__ __FUNCTION__ +#define __func__ __FUNCTION__ #endif -#define WT_PTRDIFFT_FMT "Id" /* ptrdiff_t format string */ -#define WT_SIZET_FMT "Iu" /* size_t format string */ +#define WT_PTRDIFFT_FMT "Id" /* ptrdiff_t format string */ +#define WT_SIZET_FMT "Iu" /* size_t format string */ /* MSVC-specific attributes. */ -#define WT_PACKED_STRUCT_BEGIN(name) \ - __pragma(pack(push,1)) \ - struct name { +#define WT_PACKED_STRUCT_BEGIN(name) __pragma(pack(push, 1)) struct name { -#define WT_PACKED_STRUCT_END \ - }; \ - __pragma(pack(pop)) +#define WT_PACKED_STRUCT_END \ + } \ + ; \ + __pragma(pack(pop)) -#define WT_GCC_FUNC_ATTRIBUTE(x) -#define WT_GCC_FUNC_DECL_ATTRIBUTE(x) +#define WT_GCC_FUNC_ATTRIBUTE(x) +#define WT_GCC_FUNC_DECL_ATTRIBUTE(x) -#define WT_ATOMIC_FUNC(name, ret, type, s, t) \ -static inline ret \ -__wt_atomic_add##name(type *vp, type v) \ -{ \ - return (_InterlockedExchangeAdd ## s((t *)(vp), (t)(v)) + (v)); \ -} \ -static inline ret \ -__wt_atomic_fetch_add##name(type *vp, type v) \ -{ \ - return (_InterlockedExchangeAdd ## s((t *)(vp), (t)(v))); \ -} \ -static inline ret \ -__wt_atomic_sub##name(type *vp, type v) \ -{ \ - return (_InterlockedExchangeAdd ## s((t *)(vp), - (t)v) - (v)); \ -} \ -static inline bool \ -__wt_atomic_cas##name(type *vp, type old, type new) \ -{ \ - return (_InterlockedCompareExchange ## s \ - ((t *)(vp), (t)(new), (t)(old)) == (t)(old)); \ -} +#define WT_ATOMIC_FUNC(name, ret, type, s, t) \ + static inline ret __wt_atomic_add##name(type *vp, type v) \ + { \ + return (_InterlockedExchangeAdd##s((t *)(vp), (t)(v)) + (v)); \ + } \ + static inline ret __wt_atomic_fetch_add##name(type *vp, type v) \ + { \ + return (_InterlockedExchangeAdd##s((t *)(vp), (t)(v))); \ + } \ + static inline ret __wt_atomic_sub##name(type *vp, type v) \ + { \ + return (_InterlockedExchangeAdd##s((t *)(vp), -(t)v) - (v)); \ + } \ + static inline bool __wt_atomic_cas##name(type *vp, type old, type new) \ + { \ + return (_InterlockedCompareExchange##s((t *)(vp), (t)(new), (t)(old)) == (t)(old)); \ + } WT_ATOMIC_FUNC(8, uint8_t, uint8_t, 8, char) WT_ATOMIC_FUNC(16, uint16_t, uint16_t, 16, short) @@ -70,17 +64,60 @@ WT_ATOMIC_FUNC(size, size_t, size_t, 64, __int64) /* * __wt_atomic_cas_ptr -- - * Pointer compare and swap. + * Pointer compare and swap. */ static inline bool __wt_atomic_cas_ptr(void *vp, void *old, void *new) { - return (_InterlockedCompareExchange64( - vp, (int64_t)new, (int64_t)old) == ((int64_t)old)); + return (_InterlockedCompareExchange64(vp, (int64_t) new, (int64_t)old) == ((int64_t)old)); } -static inline void WT_BARRIER(void) { _ReadWriteBarrier(); } -static inline void WT_FULL_BARRIER(void) { _mm_mfence(); } -static inline void WT_PAUSE(void) { _mm_pause(); } -static inline void WT_READ_BARRIER(void) { _mm_lfence(); } -static inline void WT_WRITE_BARRIER(void) { _mm_sfence(); } +/* + * WT_BARRIER -- + * MSVC implementation of WT_BARRIER. + */ +static inline void +WT_BARRIER(void) +{ + _ReadWriteBarrier(); +} + +/* + * WT_FULL_BARRIER -- + * MSVC implementation of WT_FULL_BARRIER. + */ +static inline void +WT_FULL_BARRIER(void) +{ + _mm_mfence(); +} + +/* + * WT_PAUSE -- + * MSVC implementation of WT_PAUSE. + */ +static inline void +WT_PAUSE(void) +{ + _mm_pause(); +} + +/* + * WT_READ_BARRIER -- + * MSVC implementation of WT_READ_BARRIER. + */ +static inline void +WT_READ_BARRIER(void) +{ + _mm_lfence(); +} + +/* + * WT_WRITE_BARRIER -- + * MSVC implementation of WT_WRITE_BARRIER. + */ +static inline void +WT_WRITE_BARRIER(void) +{ + _mm_sfence(); +} diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h index c7382fc94dc..63283c92633 100644 --- a/src/third_party/wiredtiger/src/include/mutex.h +++ b/src/third_party/wiredtiger/src/include/mutex.h @@ -13,20 +13,19 @@ * locking operations that are expected to block. */ struct __wt_condvar { - const char *name; /* Mutex name for debugging */ + const char *name; /* Mutex name for debugging */ - wt_mutex_t mtx; /* Mutex */ - wt_cond_t cond; /* Condition variable */ + wt_mutex_t mtx; /* Mutex */ + wt_cond_t cond; /* Condition variable */ - int waiters; /* Numbers of waiters, or - -1 if signalled with no waiters. */ - /* - * The following fields are used for automatically adjusting condition - * variable wait times. - */ - uint64_t min_wait; /* Minimum wait duration */ - uint64_t max_wait; /* Maximum wait duration */ - uint64_t prev_wait; /* Wait duration used last time */ + int waiters; /* Numbers of waiters, or + -1 if signalled with no waiters. */ + /* + * The following fields are used for automatically adjusting condition variable wait times. + */ + uint64_t min_wait; /* Minimum wait duration */ + uint64_t max_wait; /* Maximum wait duration */ + uint64_t prev_wait; /* Wait duration used last time */ }; /* @@ -37,26 +36,26 @@ struct __wt_condvar { * Don't modify this structure without understanding the read/write locking * functions. */ -struct __wt_rwlock { /* Read/write lock */ - volatile union { - uint64_t v; /* Full 64-bit value */ - struct { - uint8_t current; /* Current ticket */ - uint8_t next; /* Next available ticket */ - uint8_t reader; /* Read queue ticket */ - uint8_t readers_queued; /* Count of queued readers */ - uint32_t readers_active;/* Count of active readers */ - } s; - } u; +struct __wt_rwlock { /* Read/write lock */ + volatile union { + uint64_t v; /* Full 64-bit value */ + struct { + uint8_t current; /* Current ticket */ + uint8_t next; /* Next available ticket */ + uint8_t reader; /* Read queue ticket */ + uint8_t readers_queued; /* Count of queued readers */ + uint32_t readers_active; /* Count of active readers */ + } s; + } u; - int16_t stat_read_count_off; /* read acquisitions offset */ - int16_t stat_write_count_off; /* write acquisitions offset */ - int16_t stat_app_usecs_off; /* waiting application threads offset */ - int16_t stat_int_usecs_off; /* waiting server threads offset */ - int16_t stat_session_usecs_off; /* waiting session offset */ + int16_t stat_read_count_off; /* read acquisitions offset */ + int16_t stat_write_count_off; /* write acquisitions offset */ + int16_t stat_app_usecs_off; /* waiting application threads offset */ + int16_t stat_int_usecs_off; /* waiting server threads offset */ + int16_t stat_session_usecs_off; /* waiting session offset */ - WT_CONDVAR *cond_readers; /* Blocking readers */ - WT_CONDVAR *cond_writers; /* Blocking writers */ + WT_CONDVAR *cond_readers; /* Blocking readers */ + WT_CONDVAR *cond_writers; /* Blocking writers */ }; /* @@ -66,24 +65,25 @@ struct __wt_rwlock { /* Read/write lock */ * Implemented as a macro so we can pass in a statistics field and convert * it into a statistics structure array offset. */ -#define WT_RWLOCK_INIT_TRACKED(session, l, name) do { \ - WT_RET(__wt_rwlock_init(session, l)); \ - (l)->stat_read_count_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \ - S2C(session)->stats, lock_##name##_read_count); \ - (l)->stat_write_count_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \ - S2C(session)->stats, lock_##name##_write_count); \ - (l)->stat_app_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \ - S2C(session)->stats, lock_##name##_wait_application); \ - (l)->stat_int_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \ - S2C(session)->stats, lock_##name##_wait_internal); \ -} while (0) +#define WT_RWLOCK_INIT_TRACKED(session, l, name) \ + do { \ + WT_RET(__wt_rwlock_init(session, l)); \ + (l)->stat_read_count_off = \ + (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_read_count); \ + (l)->stat_write_count_off = \ + (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_write_count); \ + (l)->stat_app_usecs_off = \ + (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_wait_application); \ + (l)->stat_int_usecs_off = \ + (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_wait_internal); \ + } while (0) -#define WT_RWLOCK_INIT_SESSION_TRACKED(session, l, name) do { \ - WT_RWLOCK_INIT_TRACKED(session, l, name); \ - (l)->stat_session_usecs_off = \ - (int16_t)WT_SESSION_STATS_FIELD_TO_OFFSET( \ - &(session)->stats, lock_##name##_wait); \ -} while (0) +#define WT_RWLOCK_INIT_SESSION_TRACKED(session, l, name) \ + do { \ + WT_RWLOCK_INIT_TRACKED(session, l, name); \ + (l)->stat_session_usecs_off = \ + (int16_t)WT_SESSION_STATS_FIELD_TO_OFFSET(&(session)->stats, lock_##name##_wait); \ + } while (0) /* * Spin locks: @@ -92,39 +92,37 @@ struct __wt_rwlock { /* Read/write lock */ * while holding the spin lock are expected to complete in a small number of * instructions). */ -#define SPINLOCK_GCC 0 -#define SPINLOCK_MSVC 1 -#define SPINLOCK_PTHREAD_MUTEX 2 -#define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 3 +#define SPINLOCK_GCC 0 +#define SPINLOCK_MSVC 1 +#define SPINLOCK_PTHREAD_MUTEX 2 +#define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 3 struct __wt_spinlock { #if SPINLOCK_TYPE == SPINLOCK_GCC - WT_CACHE_LINE_PAD_BEGIN - volatile int lock; -#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE || \ - SPINLOCK_TYPE == SPINLOCK_MSVC - wt_mutex_t lock; + WT_CACHE_LINE_PAD_BEGIN + volatile int lock; +#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ + SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE || SPINLOCK_TYPE == SPINLOCK_MSVC + wt_mutex_t lock; #else #error Unknown spinlock type #endif - const char *name; /* Mutex name */ + const char *name; /* Mutex name */ - /* - * We track acquisitions and time spent waiting for some locks. For - * performance reasons and to make it possible to write generic code - * that tracks statistics for different locks, we store the offset - * of the statistics fields to be updated during lock acquisition. - */ - int16_t stat_count_off; /* acquisitions offset */ - int16_t stat_app_usecs_off; /* waiting application threads offset */ - int16_t stat_int_usecs_off; /* waiting server threads offset */ - int16_t stat_session_usecs_off; /* waiting session offset */ + /* + * We track acquisitions and time spent waiting for some locks. For performance reasons and to + * make it possible to write generic code that tracks statistics for different locks, we store + * the offset of the statistics fields to be updated during lock acquisition. + */ + int16_t stat_count_off; /* acquisitions offset */ + int16_t stat_app_usecs_off; /* waiting application threads offset */ + int16_t stat_int_usecs_off; /* waiting server threads offset */ + int16_t stat_session_usecs_off; /* waiting session offset */ - int8_t initialized; /* Lock initialized, for cleanup */ + int8_t initialized; /* Lock initialized, for cleanup */ #if SPINLOCK_TYPE == SPINLOCK_GCC - WT_CACHE_LINE_PAD_END + WT_CACHE_LINE_PAD_END #endif }; diff --git a/src/third_party/wiredtiger/src/include/mutex.i b/src/third_party/wiredtiger/src/include/mutex.i index 660ee22ed96..d9a93902fcd 100644 --- a/src/third_party/wiredtiger/src/include/mutex.i +++ b/src/third_party/wiredtiger/src/include/mutex.i @@ -16,251 +16,248 @@ /* * __spin_init_internal -- - * Initialize the WT portion of a spinlock. + * Initialize the WT portion of a spinlock. */ static inline void __spin_init_internal(WT_SPINLOCK *t, const char *name) { - t->name = name; - t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1; - t->initialized = 1; + t->name = name; + t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1; + t->initialized = 1; } #if SPINLOCK_TYPE == SPINLOCK_GCC /* Default to spinning 1000 times before yielding. */ #ifndef WT_SPIN_COUNT -#define WT_SPIN_COUNT WT_THOUSAND +#define WT_SPIN_COUNT WT_THOUSAND #endif /* * __wt_spin_init -- - * Initialize a spinlock. + * Initialize a spinlock. */ static inline int __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) { - WT_UNUSED(session); + WT_UNUSED(session); - t->lock = 0; - __spin_init_internal(t, name); - return (0); + t->lock = 0; + __spin_init_internal(t, name); + return (0); } /* * __wt_spin_destroy -- - * Destroy a spinlock. + * Destroy a spinlock. */ static inline void __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - WT_UNUSED(session); + WT_UNUSED(session); - t->lock = 0; + t->lock = 0; } /* * __wt_spin_trylock -- - * Try to lock a spinlock or fail immediately if it is busy. + * Try to lock a spinlock or fail immediately if it is busy. */ static inline int __wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - WT_UNUSED(session); + WT_UNUSED(session); - return (__atomic_test_and_set(&t->lock, __ATOMIC_ACQUIRE) ? 0 : EBUSY); + return (__atomic_test_and_set(&t->lock, __ATOMIC_ACQUIRE) ? 0 : EBUSY); } /* * __wt_spin_lock -- - * Spin until the lock is acquired. + * Spin until the lock is acquired. */ static inline void __wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - int i; + int i; - WT_UNUSED(session); + WT_UNUSED(session); - while (__atomic_test_and_set(&t->lock, __ATOMIC_ACQUIRE)) { - for (i = 0; t->lock && i < WT_SPIN_COUNT; i++) - WT_PAUSE(); - if (t->lock) - __wt_yield(); - } + while (__atomic_test_and_set(&t->lock, __ATOMIC_ACQUIRE)) { + for (i = 0; t->lock && i < WT_SPIN_COUNT; i++) + WT_PAUSE(); + if (t->lock) + __wt_yield(); + } } /* * __wt_spin_unlock -- - * Release the spinlock. + * Release the spinlock. */ static inline void __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - WT_UNUSED(session); + WT_UNUSED(session); - __atomic_clear(&t->lock, __ATOMIC_RELEASE); + __atomic_clear(&t->lock, __ATOMIC_RELEASE); } -#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE +#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE /* * __wt_spin_init -- - * Initialize a spinlock. + * Initialize a spinlock. */ static inline int __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) { #if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE - WT_DECL_RET; - pthread_mutexattr_t attr; - - WT_RET(pthread_mutexattr_init(&attr)); - ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); - if (ret == 0) - ret = pthread_mutex_init(&t->lock, &attr); - WT_TRET(pthread_mutexattr_destroy(&attr)); - WT_RET(ret); + WT_DECL_RET; + pthread_mutexattr_t attr; + + WT_RET(pthread_mutexattr_init(&attr)); + ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); + if (ret == 0) + ret = pthread_mutex_init(&t->lock, &attr); + WT_TRET(pthread_mutexattr_destroy(&attr)); + WT_RET(ret); #else - WT_RET(pthread_mutex_init(&t->lock, NULL)); + WT_RET(pthread_mutex_init(&t->lock, NULL)); #endif - __spin_init_internal(t, name); + __spin_init_internal(t, name); - WT_UNUSED(session); - return (0); + WT_UNUSED(session); + return (0); } /* * __wt_spin_destroy -- - * Destroy a spinlock. + * Destroy a spinlock. */ static inline void __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - WT_UNUSED(session); + WT_UNUSED(session); - if (t->initialized) { - (void)pthread_mutex_destroy(&t->lock); - t->initialized = 0; - } + if (t->initialized) { + (void)pthread_mutex_destroy(&t->lock); + t->initialized = 0; + } } -#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE +#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE /* * __wt_spin_trylock -- - * Try to lock a spinlock or fail immediately if it is busy. + * Try to lock a spinlock or fail immediately if it is busy. */ static inline int __wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - WT_UNUSED(session); + WT_UNUSED(session); - return (pthread_mutex_trylock(&t->lock)); + return (pthread_mutex_trylock(&t->lock)); } /* * __wt_spin_lock -- - * Spin until the lock is acquired. + * Spin until the lock is acquired. */ static inline void __wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - WT_DECL_RET; + WT_DECL_RET; - if ((ret = pthread_mutex_lock(&t->lock)) != 0) - WT_PANIC_MSG(session, ret, "pthread_mutex_lock: %s", t->name); + if ((ret = pthread_mutex_lock(&t->lock)) != 0) + WT_PANIC_MSG(session, ret, "pthread_mutex_lock: %s", t->name); } #endif /* * __wt_spin_unlock -- - * Release the spinlock. + * Release the spinlock. */ static inline void __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - WT_DECL_RET; + WT_DECL_RET; - if ((ret = pthread_mutex_unlock(&t->lock)) != 0) - WT_PANIC_MSG(session, ret, "pthread_mutex_unlock: %s", t->name); + if ((ret = pthread_mutex_unlock(&t->lock)) != 0) + WT_PANIC_MSG(session, ret, "pthread_mutex_unlock: %s", t->name); } #elif SPINLOCK_TYPE == SPINLOCK_MSVC /* * __wt_spin_init -- - * Initialize a spinlock. + * Initialize a spinlock. */ static inline int __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) { - DWORD windows_error; - - if (InitializeCriticalSectionAndSpinCount(&t->lock, 4000) == 0) { - windows_error = __wt_getlasterror(); - __wt_errx(session, - "%s: InitializeCriticalSectionAndSpinCount: %s", - name, __wt_formatmessage(session, windows_error)); - return (__wt_map_windows_error(windows_error)); - } - - __spin_init_internal(t, name); - return (0); + DWORD windows_error; + + if (InitializeCriticalSectionAndSpinCount(&t->lock, 4000) == 0) { + windows_error = __wt_getlasterror(); + __wt_errx(session, "%s: InitializeCriticalSectionAndSpinCount: %s", name, + __wt_formatmessage(session, windows_error)); + return (__wt_map_windows_error(windows_error)); + } + + __spin_init_internal(t, name); + return (0); } /* * __wt_spin_destroy -- - * Destroy a spinlock. + * Destroy a spinlock. */ static inline void __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - WT_UNUSED(session); + WT_UNUSED(session); - if (t->initialized) { - DeleteCriticalSection(&t->lock); - t->initialized = 0; - } + if (t->initialized) { + DeleteCriticalSection(&t->lock); + t->initialized = 0; + } } /* * __wt_spin_trylock -- - * Try to lock a spinlock or fail immediately if it is busy. + * Try to lock a spinlock or fail immediately if it is busy. */ static inline int __wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - WT_UNUSED(session); + WT_UNUSED(session); - BOOL b = TryEnterCriticalSection(&t->lock); - return (b == 0 ? EBUSY : 0); + BOOL b = TryEnterCriticalSection(&t->lock); + return (b == 0 ? EBUSY : 0); } /* * __wt_spin_lock -- - * Spin until the lock is acquired. + * Spin until the lock is acquired. */ static inline void __wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - WT_UNUSED(session); + WT_UNUSED(session); - EnterCriticalSection(&t->lock); + EnterCriticalSection(&t->lock); } /* * __wt_spin_unlock -- - * Release the spinlock. + * Release the spinlock. */ static inline void __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - WT_UNUSED(session); + WT_UNUSED(session); - LeaveCriticalSection(&t->lock); + LeaveCriticalSection(&t->lock); } #else @@ -276,68 +273,66 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) * Implemented as a macro so we can pass in a statistics field and convert * it into a statistics structure array offset. */ -#define WT_SPIN_INIT_TRACKED(session, t, name) do { \ - WT_RET(__wt_spin_init(session, t, #name)); \ - (t)->stat_count_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \ - S2C(session)->stats, lock_##name##_count); \ - (t)->stat_app_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \ - S2C(session)->stats, lock_##name##_wait_application); \ - (t)->stat_int_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \ - S2C(session)->stats, lock_##name##_wait_internal); \ -} while (0) - -#define WT_SPIN_INIT_SESSION_TRACKED(session, t, name) do { \ - WT_SPIN_INIT_TRACKED(session, t, name); \ - (t)->stat_session_usecs_off = \ - (int16_t)WT_SESSION_STATS_FIELD_TO_OFFSET( \ - &(session)->stats, lock_##name##_wait); \ -} while (0) +#define WT_SPIN_INIT_TRACKED(session, t, name) \ + do { \ + WT_RET(__wt_spin_init(session, t, #name)); \ + (t)->stat_count_off = \ + (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_count); \ + (t)->stat_app_usecs_off = \ + (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_wait_application); \ + (t)->stat_int_usecs_off = \ + (int16_t)WT_STATS_FIELD_TO_OFFSET(S2C(session)->stats, lock_##name##_wait_internal); \ + } while (0) + +#define WT_SPIN_INIT_SESSION_TRACKED(session, t, name) \ + do { \ + WT_SPIN_INIT_TRACKED(session, t, name); \ + (t)->stat_session_usecs_off = \ + (int16_t)WT_SESSION_STATS_FIELD_TO_OFFSET(&(session)->stats, lock_##name##_wait); \ + } while (0) /* * __wt_spin_lock_track -- - * Spinlock acquisition, with tracking. + * Spinlock acquisition, with tracking. */ static inline void __wt_spin_lock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - uint64_t time_diff, time_start, time_stop; - int64_t *session_stats, **stats; - - if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) { - time_start = __wt_clock(session); - __wt_spin_lock(session, t); - time_stop = __wt_clock(session); - time_diff = WT_CLOCKDIFF_US(time_stop, time_start); - stats = (int64_t **)S2C(session)->stats; - session_stats = (int64_t *)&(session->stats); - stats[session->stat_bucket][t->stat_count_off]++; - if (F_ISSET(session, WT_SESSION_INTERNAL)) - stats[session->stat_bucket][t->stat_int_usecs_off] += - (int64_t)time_diff; - else { - stats[session->stat_bucket][t->stat_app_usecs_off] += - (int64_t)time_diff; - } - session_stats[t->stat_session_usecs_off] += (int64_t)time_diff; - } else - __wt_spin_lock(session, t); + uint64_t time_diff, time_start, time_stop; + int64_t *session_stats, **stats; + + if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) { + time_start = __wt_clock(session); + __wt_spin_lock(session, t); + time_stop = __wt_clock(session); + time_diff = WT_CLOCKDIFF_US(time_stop, time_start); + stats = (int64_t **)S2C(session)->stats; + session_stats = (int64_t *)&(session->stats); + stats[session->stat_bucket][t->stat_count_off]++; + if (F_ISSET(session, WT_SESSION_INTERNAL)) + stats[session->stat_bucket][t->stat_int_usecs_off] += (int64_t)time_diff; + else { + stats[session->stat_bucket][t->stat_app_usecs_off] += (int64_t)time_diff; + } + session_stats[t->stat_session_usecs_off] += (int64_t)time_diff; + } else + __wt_spin_lock(session, t); } /* * __wt_spin_trylock_track -- - * Try to lock a spinlock or fail immediately if it is busy. - * Track if successful. + * Try to lock a spinlock or fail immediately if it is busy. Track if successful. */ static inline int __wt_spin_trylock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - int64_t **stats; - - if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) { - WT_RET(__wt_spin_trylock(session, t)); - stats = (int64_t **)S2C(session)->stats; - stats[session->stat_bucket][t->stat_count_off]++; - return (0); - } - return (__wt_spin_trylock(session, t)); + int64_t **stats; + + if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) { + WT_RET(__wt_spin_trylock(session, t)); + stats = (int64_t **)S2C(session)->stats; + stats[session->stat_bucket][t->stat_count_off]++; + return (0); + } + return (__wt_spin_trylock(session, t)); } diff --git a/src/third_party/wiredtiger/src/include/optrack.h b/src/third_party/wiredtiger/src/include/optrack.h index e5b97e1b5d7..a5be938140a 100644 --- a/src/third_party/wiredtiger/src/include/optrack.h +++ b/src/third_party/wiredtiger/src/include/optrack.h @@ -6,9 +6,9 @@ * See the file LICENSE for redistribution information. */ -#define WT_OPTRACK_MAXRECS (16384) -#define WT_OPTRACK_BUFSIZE (WT_OPTRACK_MAXRECS * sizeof(WT_OPTRACK_RECORD)) -#define WT_OPTRACK_VERSION 3 +#define WT_OPTRACK_MAXRECS (16384) +#define WT_OPTRACK_BUFSIZE (WT_OPTRACK_MAXRECS * sizeof(WT_OPTRACK_RECORD)) +#define WT_OPTRACK_VERSION 3 /* * WT_OPTRACK_HEADER -- @@ -16,11 +16,11 @@ * identifier is a boolean: 1 if the session is internal, 0 otherwise. */ struct __wt_optrack_header { - uint32_t optrack_version; - uint32_t optrack_session_internal; - uint32_t optrack_tsc_nsec_ratio; - uint32_t padding; - uint64_t optrack_seconds_epoch; + uint32_t optrack_version; + uint32_t optrack_session_internal; + uint32_t optrack_tsc_nsec_ratio; + uint32_t padding; + uint64_t optrack_seconds_epoch; }; /* @@ -44,46 +44,42 @@ struct __wt_optrack_header { * from it. */ struct __wt_optrack_record { - uint64_t op_timestamp; /* timestamp */ - uint16_t op_id; /* function ID */ - uint16_t op_type; /* start/stop */ - uint8_t padding[4]; + uint64_t op_timestamp; /* timestamp */ + uint16_t op_id; /* function ID */ + uint16_t op_type; /* start/stop */ + uint8_t padding[4]; }; -#define WT_TRACK_OP(s, optype) do { \ - WT_OPTRACK_RECORD *__tr; \ - __tr = &((s)->optrack_buf[ \ - (s)->optrackbuf_ptr % WT_OPTRACK_MAXRECS]); \ - __tr->op_timestamp = __wt_clock(s); \ - __tr->op_id = __func_id; \ - __tr->op_type = optype; \ - \ - if (++(s)->optrackbuf_ptr == WT_OPTRACK_MAXRECS) { \ - __wt_optrack_flush_buffer(s); \ - (s)->optrackbuf_ptr = 0; \ - } \ -} while (0) +#define WT_TRACK_OP(s, optype) \ + do { \ + WT_OPTRACK_RECORD *__tr; \ + __tr = &((s)->optrack_buf[(s)->optrackbuf_ptr % WT_OPTRACK_MAXRECS]); \ + __tr->op_timestamp = __wt_clock(s); \ + __tr->op_id = __func_id; \ + __tr->op_type = optype; \ + \ + if (++(s)->optrackbuf_ptr == WT_OPTRACK_MAXRECS) { \ + __wt_optrack_flush_buffer(s); \ + (s)->optrackbuf_ptr = 0; \ + } \ + } while (0) /* - * We do not synchronize access to optrack buffer pointer under the assumption - * that there is no more than one thread using a given session. This assumption - * does not always hold. When it does not, we might have a race. In this case, - * we may lose a few log records. We prefer to risk losing a few log records - * occasionally in order not to synchronize this code, which is intended to be - * very lightweight. - * Exclude the default session (ID 0) because it can be used by multiple - * threads and it is also used in error paths during failed open calls. + * We do not synchronize access to optrack buffer pointer under the assumption that there is no more + * than one thread using a given session. This assumption does not always hold. When it does not, we + * might have a race. In this case, we may lose a few log records. We prefer to risk losing a few + * log records occasionally in order not to synchronize this code, which is intended to be very + * lightweight. Exclude the default session (ID 0) because it can be used by multiple threads and it + * is also used in error paths during failed open calls. */ -#define WT_TRACK_OP_DECL \ - static uint16_t __func_id = 0 -#define WT_TRACK_OP_INIT(s) \ - if (F_ISSET(S2C(s), WT_CONN_OPTRACK) && (s)->id != 0) { \ - if (__func_id == 0) \ - __wt_optrack_record_funcid( \ - s, __func__, &__func_id); \ - WT_TRACK_OP(s, 0); \ - } +#define WT_TRACK_OP_DECL static uint16_t __func_id = 0 +#define WT_TRACK_OP_INIT(s) \ + if (F_ISSET(S2C(s), WT_CONN_OPTRACK) && (s)->id != 0) { \ + if (__func_id == 0) \ + __wt_optrack_record_funcid(s, __func__, &__func_id); \ + WT_TRACK_OP(s, 0); \ + } -#define WT_TRACK_OP_END(s) \ - if (F_ISSET(S2C(s), WT_CONN_OPTRACK) && (s)->id != 0) \ - WT_TRACK_OP(s, 1); +#define WT_TRACK_OP_END(s) \ + if (F_ISSET(S2C(s), WT_CONN_OPTRACK) && (s)->id != 0) \ + WT_TRACK_OP(s, 1); diff --git a/src/third_party/wiredtiger/src/include/os.h b/src/third_party/wiredtiger/src/include/os.h index 37e0799ef16..919edeec586 100644 --- a/src/third_party/wiredtiger/src/include/os.h +++ b/src/third_party/wiredtiger/src/include/os.h @@ -6,179 +6,174 @@ * See the file LICENSE for redistribution information. */ -#define WT_SYSCALL(call, ret) do { \ - /* \ - * A call returning 0 indicates success; any call where \ - * 0 is not the only successful return must provide an \ - * expression evaluating to 0 in all successful cases. \ - * \ - * XXX \ - * Casting the call's return to int is because CentOS 7.3.1611 \ - * complains about syscall returning a long and the loss of \ - * integer precision in the assignment to ret. The cast should \ - * be a no-op everywhere. \ - */ \ - if (((ret) = (int)(call)) == 0) \ - break; \ - /* \ - * The call's error was either returned by the call or \ - * is in errno, and there are cases where it depends on \ - * the software release as to which it is (for example, \ - * posix_fadvise on FreeBSD and OS X). Failing calls \ - * must either return a non-zero error value, or -1 if \ - * the error value is in errno. (The WiredTiger errno \ - * function returns WT_ERROR if errno is 0, which isn't \ - * ideal but won't discard the failure.) \ - */ \ - if ((ret) == -1) \ - (ret) = __wt_errno(); \ -} while (0) - -#define WT_RETRY_MAX 10 - -#define WT_SYSCALL_RETRY(call, ret) do { \ - int __retry; \ - for (__retry = 0; __retry < WT_RETRY_MAX; ++__retry) { \ - WT_SYSCALL(call, ret); \ - switch (ret) { \ - case EAGAIN: \ - case EBUSY: \ - case EINTR: \ - case EIO: \ - case EMFILE: \ - case ENFILE: \ - case ENOSPC: \ - __wt_sleep(0L, 50000L); \ - continue; \ - default: \ - break; \ - } \ - break; \ - } \ -} while (0) - -#define WT_TIMEDIFF_NS(end, begin) \ - (WT_BILLION * (uint64_t)((end).tv_sec - (begin).tv_sec) + \ - (uint64_t)(end).tv_nsec - (uint64_t)(begin).tv_nsec) -#define WT_TIMEDIFF_US(end, begin) \ - (WT_TIMEDIFF_NS((end), (begin)) / WT_THOUSAND) -#define WT_TIMEDIFF_MS(end, begin) \ - (WT_TIMEDIFF_NS((end), (begin)) / WT_MILLION) -#define WT_TIMEDIFF_SEC(end, begin) \ - (WT_TIMEDIFF_NS((end), (begin)) / WT_BILLION) - -#define WT_CLOCKDIFF_NS(end, begin) \ - (__wt_clock_to_nsec(end, begin)) -#define WT_CLOCKDIFF_US(end, begin) \ - (WT_CLOCKDIFF_NS(end, begin) / WT_THOUSAND) -#define WT_CLOCKDIFF_MS(end, begin) \ - (WT_CLOCKDIFF_NS(end, begin) / WT_MILLION) -#define WT_CLOCKDIFF_SEC(end, begin) \ - (WT_CLOCKDIFF_NS(end, begin) / WT_BILLION) - -#define WT_TIMECMP(t1, t2) \ - ((t1).tv_sec < (t2).tv_sec ? -1 : \ - (t1).tv_sec == (t2).tv_sec ? \ - (t1).tv_nsec < (t2).tv_nsec ? -1 : \ - (t1).tv_nsec == (t2).tv_nsec ? 0 : 1 : 1) +#define WT_SYSCALL(call, ret) \ + do { \ + /* \ + * A call returning 0 indicates success; any call where \ + * 0 is not the only successful return must provide an \ + * expression evaluating to 0 in all successful cases. \ + * \ + * XXX \ + * Casting the call's return to int is because CentOS 7.3.1611 \ + * complains about syscall returning a long and the loss of \ + * integer precision in the assignment to ret. The cast should \ + * be a no-op everywhere. \ + */ \ + if (((ret) = (int)(call)) == 0) \ + break; \ + /* \ + * The call's error was either returned by the call or \ + * is in errno, and there are cases where it depends on \ + * the software release as to which it is (for example, \ + * posix_fadvise on FreeBSD and OS X). Failing calls \ + * must either return a non-zero error value, or -1 if \ + * the error value is in errno. (The WiredTiger errno \ + * function returns WT_ERROR if errno is 0, which isn't \ + * ideal but won't discard the failure.) \ + */ \ + if ((ret) == -1) \ + (ret) = __wt_errno(); \ + } while (0) + +#define WT_RETRY_MAX 10 + +#define WT_SYSCALL_RETRY(call, ret) \ + do { \ + int __retry; \ + for (__retry = 0; __retry < WT_RETRY_MAX; ++__retry) { \ + WT_SYSCALL(call, ret); \ + switch (ret) { \ + case EAGAIN: \ + case EBUSY: \ + case EINTR: \ + case EIO: \ + case EMFILE: \ + case ENFILE: \ + case ENOSPC: \ + __wt_sleep(0L, 50000L); \ + continue; \ + default: \ + break; \ + } \ + break; \ + } \ + } while (0) + +#define WT_TIMEDIFF_NS(end, begin) \ + (WT_BILLION * (uint64_t)((end).tv_sec - (begin).tv_sec) + (uint64_t)(end).tv_nsec - \ + (uint64_t)(begin).tv_nsec) +#define WT_TIMEDIFF_US(end, begin) (WT_TIMEDIFF_NS((end), (begin)) / WT_THOUSAND) +#define WT_TIMEDIFF_MS(end, begin) (WT_TIMEDIFF_NS((end), (begin)) / WT_MILLION) +#define WT_TIMEDIFF_SEC(end, begin) (WT_TIMEDIFF_NS((end), (begin)) / WT_BILLION) + +#define WT_CLOCKDIFF_NS(end, begin) (__wt_clock_to_nsec(end, begin)) +#define WT_CLOCKDIFF_US(end, begin) (WT_CLOCKDIFF_NS(end, begin) / WT_THOUSAND) +#define WT_CLOCKDIFF_MS(end, begin) (WT_CLOCKDIFF_NS(end, begin) / WT_MILLION) +#define WT_CLOCKDIFF_SEC(end, begin) (WT_CLOCKDIFF_NS(end, begin) / WT_BILLION) + +#define WT_TIMECMP(t1, t2) \ + ((t1).tv_sec < (t2).tv_sec ? -1 : (t1).tv_sec == (t2).tv_sec ? \ + (t1).tv_nsec < (t2).tv_nsec ? -1 : (t1).tv_nsec == (t2).tv_nsec ? \ + 0 : \ + 1 : \ + 1) /* - * Macros to ensure a file handle is inserted or removed from both the main and - * the hashed queue, used by connection-level and in-memory data structures. + * Macros to ensure a file handle is inserted or removed from both the main and the hashed queue, + * used by connection-level and in-memory data structures. */ -#define WT_FILE_HANDLE_INSERT(h, fh, bucket) do { \ - TAILQ_INSERT_HEAD(&(h)->fhqh, fh, q); \ - TAILQ_INSERT_HEAD(&(h)->fhhash[bucket], fh, hashq); \ -} while (0) - -#define WT_FILE_HANDLE_REMOVE(h, fh, bucket) do { \ - TAILQ_REMOVE(&(h)->fhqh, fh, q); \ - TAILQ_REMOVE(&(h)->fhhash[bucket], fh, hashq); \ -} while (0) +#define WT_FILE_HANDLE_INSERT(h, fh, bucket) \ + do { \ + TAILQ_INSERT_HEAD(&(h)->fhqh, fh, q); \ + TAILQ_INSERT_HEAD(&(h)->fhhash[bucket], fh, hashq); \ + } while (0) + +#define WT_FILE_HANDLE_REMOVE(h, fh, bucket) \ + do { \ + TAILQ_REMOVE(&(h)->fhqh, fh, q); \ + TAILQ_REMOVE(&(h)->fhhash[bucket], fh, hashq); \ + } while (0) struct __wt_fh { - /* - * There is a file name field in both the WT_FH and WT_FILE_HANDLE - * structures, which isn't ideal. There would be compromises to keeping - * a single copy: If it were in WT_FH, file systems could not access - * the name field, if it were just in the WT_FILE_HANDLE internal - * WiredTiger code would need to maintain a string inside a structure - * that is owned by the user (since we care about the content of the - * file name). Keeping two copies seems most reasonable. - */ - const char *name; /* File name */ - - uint64_t name_hash; /* hash of name */ - uint64_t last_sync; /* time of background fsync */ - volatile uint64_t written; /* written since fsync */ - TAILQ_ENTRY(__wt_fh) q; /* internal queue */ - TAILQ_ENTRY(__wt_fh) hashq; /* internal hash queue */ - u_int ref; /* reference count */ - WT_FS_OPEN_FILE_TYPE file_type; /* file type */ - - WT_FILE_HANDLE *handle; + /* + * There is a file name field in both the WT_FH and WT_FILE_HANDLE structures, which isn't + * ideal. There would be compromises to keeping a single copy: If it were in WT_FH, file systems + * could not access the name field, if it were just in the WT_FILE_HANDLE internal WiredTiger + * code would need to maintain a string inside a structure that is owned by the user (since we + * care about the content of the file name). Keeping two copies seems most reasonable. + */ + const char *name; /* File name */ + + uint64_t name_hash; /* hash of name */ + uint64_t last_sync; /* time of background fsync */ + volatile uint64_t written; /* written since fsync */ + TAILQ_ENTRY(__wt_fh) q; /* internal queue */ + TAILQ_ENTRY(__wt_fh) hashq; /* internal hash queue */ + u_int ref; /* reference count */ + WT_FS_OPEN_FILE_TYPE file_type; /* file type */ + + WT_FILE_HANDLE *handle; }; #ifdef _WIN32 struct __wt_file_handle_win { - WT_FILE_HANDLE iface; - - /* - * Windows specific file handle fields - */ - HANDLE filehandle; /* Windows file handle */ - HANDLE filehandle_secondary; /* Windows file handle - for file size changes */ - bool direct_io; /* O_DIRECT configured */ + WT_FILE_HANDLE iface; + + /* + * Windows specific file handle fields + */ + HANDLE filehandle; /* Windows file handle */ + HANDLE filehandle_secondary; /* Windows file handle + for file size changes */ + bool direct_io; /* O_DIRECT configured */ }; #else struct __wt_file_handle_posix { - WT_FILE_HANDLE iface; + WT_FILE_HANDLE iface; - /* - * POSIX specific file handle fields - */ - int fd; /* POSIX file handle */ + /* + * POSIX specific file handle fields + */ + int fd; /* POSIX file handle */ - bool direct_io; /* O_DIRECT configured */ + bool direct_io; /* O_DIRECT configured */ }; #endif struct __wt_file_handle_inmem { - WT_FILE_HANDLE iface; + WT_FILE_HANDLE iface; - /* - * In memory specific file handle fields - */ - uint64_t name_hash; /* hash of name */ - TAILQ_ENTRY(__wt_file_handle_inmem) q; /* internal queue, hash queue */ - TAILQ_ENTRY(__wt_file_handle_inmem) hashq; + /* + * In memory specific file handle fields + */ + uint64_t name_hash; /* hash of name */ + TAILQ_ENTRY(__wt_file_handle_inmem) q; /* internal queue, hash queue */ + TAILQ_ENTRY(__wt_file_handle_inmem) hashq; - WT_ITEM buf; /* Data */ - u_int ref; /* Reference count */ + WT_ITEM buf; /* Data */ + u_int ref; /* Reference count */ }; struct __wt_fstream { - const char *name; /* Stream name */ + const char *name; /* Stream name */ - FILE *fp; /* stdio FILE stream */ - WT_FH *fh; /* WT file handle */ - wt_off_t off; /* Read/write offset */ - wt_off_t size; /* File size */ - WT_ITEM buf; /* Data */ + FILE *fp; /* stdio FILE stream */ + WT_FH *fh; /* WT file handle */ + wt_off_t off; /* Read/write offset */ + wt_off_t size; /* File size */ + WT_ITEM buf; /* Data */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_STREAM_APPEND 0x1u /* Open a stream for append */ -#define WT_STREAM_READ 0x2u /* Open a stream for read */ -#define WT_STREAM_WRITE 0x4u /* Open a stream for write */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; - - int (*close)(WT_SESSION_IMPL *, WT_FSTREAM *); - int (*fstr_flush)(WT_SESSION_IMPL *, WT_FSTREAM *); - int (*fstr_getline)(WT_SESSION_IMPL *, WT_FSTREAM *, WT_ITEM *); - int (*fstr_printf)( - WT_SESSION_IMPL *, WT_FSTREAM *, const char *, va_list); +#define WT_STREAM_APPEND 0x1u /* Open a stream for append */ +#define WT_STREAM_READ 0x2u /* Open a stream for read */ +#define WT_STREAM_WRITE 0x4u /* Open a stream for write */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; + + int (*close)(WT_SESSION_IMPL *, WT_FSTREAM *); + int (*fstr_flush)(WT_SESSION_IMPL *, WT_FSTREAM *); + int (*fstr_getline)(WT_SESSION_IMPL *, WT_FSTREAM *, WT_ITEM *); + int (*fstr_printf)(WT_SESSION_IMPL *, WT_FSTREAM *, const char *, va_list); }; diff --git a/src/third_party/wiredtiger/src/include/os_fhandle.i b/src/third_party/wiredtiger/src/include/os_fhandle.i index a0573ee3cba..d7f5dc1ff51 100644 --- a/src/third_party/wiredtiger/src/include/os_fhandle.i +++ b/src/third_party/wiredtiger/src/include/os_fhandle.i @@ -7,200 +7,181 @@ */ /* - * Define functions that increment histogram statistics for filesystem - * operations latency. + * Define functions that increment histogram statistics for filesystem operations latency. */ WT_STAT_MSECS_HIST_INCR_FUNC(fsread, perf_hist_fsread_latency, 10) WT_STAT_MSECS_HIST_INCR_FUNC(fswrite, perf_hist_fswrite_latency, 10) /* * __wt_fsync -- - * POSIX fsync. + * POSIX fsync. */ static inline int __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) { - WT_DECL_RET; - WT_FILE_HANDLE *handle; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - __wt_verbose( - session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->handle->name); - - handle = fh->handle; - /* - * There is no way to check when the non-blocking sync-file-range is - * complete, but we track the time taken in the call for completeness. - */ - WT_STAT_CONN_INCR_ATOMIC(session, thread_fsync_active); - WT_STAT_CONN_INCR(session, fsync_io); - if (block) - ret = (handle->fh_sync == NULL ? 0 : - handle->fh_sync(handle, (WT_SESSION *)session)); - else - ret = (handle->fh_sync_nowait == NULL ? 0 : - handle->fh_sync_nowait(handle, (WT_SESSION *)session)); - WT_STAT_CONN_DECR_ATOMIC(session, thread_fsync_active); - return (ret); + WT_DECL_RET; + WT_FILE_HANDLE *handle; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->handle->name); + + handle = fh->handle; + /* + * There is no way to check when the non-blocking sync-file-range is complete, but we track the + * time taken in the call for completeness. + */ + WT_STAT_CONN_INCR_ATOMIC(session, thread_fsync_active); + WT_STAT_CONN_INCR(session, fsync_io); + if (block) + ret = (handle->fh_sync == NULL ? 0 : handle->fh_sync(handle, (WT_SESSION *)session)); + else + ret = (handle->fh_sync_nowait == NULL ? 0 : handle->fh_sync_nowait( + handle, (WT_SESSION *)session)); + WT_STAT_CONN_DECR_ATOMIC(session, thread_fsync_active); + return (ret); } /* * __wt_fextend -- - * Extend a file. + * Extend a file. */ static inline int __wt_fextend(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset) { - WT_FILE_HANDLE *handle; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); - - __wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: handle-extend: to %" PRIuMAX, - fh->handle->name, (uintmax_t)offset); - - /* - * Our caller is responsible for handling any locking issues, all we - * have to do is find a function to call. - */ - handle = fh->handle; - if (handle->fh_extend_nolock != NULL) - return (handle->fh_extend_nolock( - handle, (WT_SESSION *)session, offset)); - if (handle->fh_extend != NULL) - return (handle->fh_extend( - handle, (WT_SESSION *)session, offset)); - return (__wt_set_return(session, ENOTSUP)); + WT_FILE_HANDLE *handle; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + + __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-extend: to %" PRIuMAX, fh->handle->name, + (uintmax_t)offset); + + /* + * Our caller is responsible for handling any locking issues, all we have to do is find a + * function to call. + */ + handle = fh->handle; + if (handle->fh_extend_nolock != NULL) + return (handle->fh_extend_nolock(handle, (WT_SESSION *)session, offset)); + if (handle->fh_extend != NULL) + return (handle->fh_extend(handle, (WT_SESSION *)session, offset)); + return (__wt_set_return(session, ENOTSUP)); } /* * __wt_file_lock -- - * Lock/unlock a file. + * Lock/unlock a file. */ static inline int -__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock) +__wt_file_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) { - WT_FILE_HANDLE *handle; + WT_FILE_HANDLE *handle; - __wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: handle-lock: %s", fh->handle->name, lock ? "lock" : "unlock"); + __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-lock: %s", fh->handle->name, + lock ? "lock" : "unlock"); - handle = fh->handle; - return (handle->fh_lock == NULL ? 0 : - handle->fh_lock(handle, (WT_SESSION*)session, lock)); + handle = fh->handle; + return (handle->fh_lock == NULL ? 0 : handle->fh_lock(handle, (WT_SESSION *)session, lock)); } /* * __wt_read -- - * POSIX pread. + * POSIX pread. */ static inline int -__wt_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +__wt_read(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) { - WT_DECL_RET; - uint64_t time_start, time_stop; + WT_DECL_RET; + uint64_t time_start, time_stop; - __wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX, - fh->handle->name, len, (uintmax_t)offset); + __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX, + fh->handle->name, len, (uintmax_t)offset); - WT_STAT_CONN_INCR_ATOMIC(session, thread_read_active); - WT_STAT_CONN_INCR(session, read_io); - time_start = __wt_clock(session); + WT_STAT_CONN_INCR_ATOMIC(session, thread_read_active); + WT_STAT_CONN_INCR(session, read_io); + time_start = __wt_clock(session); - ret = fh->handle->fh_read( - fh->handle, (WT_SESSION *)session, offset, len, buf); + ret = fh->handle->fh_read(fh->handle, (WT_SESSION *)session, offset, len, buf); - /* Flag any failed read: if we're in startup, it may be fatal. */ - if (ret != 0) - F_SET(S2C(session), WT_CONN_DATA_CORRUPTION); + /* Flag any failed read: if we're in startup, it may be fatal. */ + if (ret != 0) + F_SET(S2C(session), WT_CONN_DATA_CORRUPTION); - time_stop = __wt_clock(session); - __wt_stat_msecs_hist_incr_fsread(session, - WT_CLOCKDIFF_MS(time_stop, time_start)); - WT_STAT_CONN_DECR_ATOMIC(session, thread_read_active); - return (ret); + time_stop = __wt_clock(session); + __wt_stat_msecs_hist_incr_fsread(session, WT_CLOCKDIFF_MS(time_stop, time_start)); + WT_STAT_CONN_DECR_ATOMIC(session, thread_read_active); + return (ret); } /* * __wt_filesize -- - * Get the size of a file in bytes, by file handle. + * Get the size of a file in bytes, by file handle. */ static inline int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) { - __wt_verbose( - session, WT_VERB_HANDLEOPS, "%s: handle-size", fh->handle->name); + __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-size", fh->handle->name); - return (fh->handle->fh_size(fh->handle, (WT_SESSION *)session, sizep)); + return (fh->handle->fh_size(fh->handle, (WT_SESSION *)session, sizep)); } /* * __wt_ftruncate -- - * Truncate a file. + * Truncate a file. */ static inline int __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset) { - WT_FILE_HANDLE *handle; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - __wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: handle-truncate: to %" PRIuMAX, - fh->handle->name, (uintmax_t)offset); - - /* - * Our caller is responsible for handling any locking issues, all we - * have to do is find a function to call. - */ - handle = fh->handle; - if (handle->fh_truncate != NULL) - return (handle->fh_truncate( - handle, (WT_SESSION *)session, offset)); - return (__wt_set_return(session, ENOTSUP)); + WT_FILE_HANDLE *handle; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-truncate: to %" PRIuMAX, fh->handle->name, + (uintmax_t)offset); + + /* + * Our caller is responsible for handling any locking issues, all we have to do is find a + * function to call. + */ + handle = fh->handle; + if (handle->fh_truncate != NULL) + return (handle->fh_truncate(handle, (WT_SESSION *)session, offset)); + return (__wt_set_return(session, ENOTSUP)); } /* * __wt_write -- - * POSIX pwrite. + * POSIX pwrite. */ static inline int -__wt_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +__wt_write(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, const void *buf) { - WT_DECL_RET; - uint64_t time_start, time_stop; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || - WT_STRING_MATCH(fh->name, - WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); - - __wt_verbose(session, WT_VERB_HANDLEOPS, - "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX, - fh->handle->name, len, (uintmax_t)offset); - - /* - * Do a final panic check before I/O, so we stop writing as quickly as - * possible if there's an unanticipated error. We aren't handling the - * error correctly by definition, and writing won't make things better. - */ - WT_RET(WT_SESSION_CHECK_PANIC(session)); - - WT_STAT_CONN_INCR(session, write_io); - WT_STAT_CONN_INCR_ATOMIC(session, thread_write_active); - time_start = __wt_clock(session); - - ret = fh->handle->fh_write( - fh->handle, (WT_SESSION *)session, offset, len, buf); - - time_stop = __wt_clock(session); - __wt_stat_msecs_hist_incr_fswrite(session, - WT_CLOCKDIFF_MS(time_stop, time_start)); - (void)__wt_atomic_addv64(&fh->written, len); - WT_STAT_CONN_DECR_ATOMIC(session, thread_write_active); - return (ret); + WT_DECL_RET; + uint64_t time_start, time_stop; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || + WT_STRING_MATCH(fh->name, WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); + + __wt_verbose(session, WT_VERB_HANDLEOPS, "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX, + fh->handle->name, len, (uintmax_t)offset); + + /* + * Do a final panic check before I/O, so we stop writing as quickly as possible if there's an + * unanticipated error. We aren't handling the error correctly by definition, and writing won't + * make things better. + */ + WT_RET(WT_SESSION_CHECK_PANIC(session)); + + WT_STAT_CONN_INCR(session, write_io); + WT_STAT_CONN_INCR_ATOMIC(session, thread_write_active); + time_start = __wt_clock(session); + + ret = fh->handle->fh_write(fh->handle, (WT_SESSION *)session, offset, len, buf); + + time_stop = __wt_clock(session); + __wt_stat_msecs_hist_incr_fswrite(session, WT_CLOCKDIFF_MS(time_stop, time_start)); + (void)__wt_atomic_addv64(&fh->written, len); + WT_STAT_CONN_DECR_ATOMIC(session, thread_write_active); + return (ret); } diff --git a/src/third_party/wiredtiger/src/include/os_fs.i b/src/third_party/wiredtiger/src/include/os_fs.i index b79c598594b..8f1c80d6177 100644 --- a/src/third_party/wiredtiger/src/include/os_fs.i +++ b/src/third_party/wiredtiger/src/include/os_fs.i @@ -8,215 +8,205 @@ /* * __wt_fs_directory_list -- - * Return a list of files from a directory. + * Return a list of files from a directory. */ static inline int -__wt_fs_directory_list(WT_SESSION_IMPL *session, - const char *dir, const char *prefix, char ***dirlistp, u_int *countp) +__wt_fs_directory_list( + WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp) { - WT_DECL_RET; - WT_FILE_SYSTEM *file_system; - WT_SESSION *wt_session; - char *path; + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *path; - *dirlistp = NULL; - *countp = 0; + *dirlistp = NULL; + *countp = 0; - __wt_verbose(session, WT_VERB_FILEOPS, - "%s: directory-list: prefix %s", - dir, prefix == NULL ? "all" : prefix); + __wt_verbose(session, WT_VERB_FILEOPS, "%s: directory-list: prefix %s", dir, + prefix == NULL ? "all" : prefix); - WT_RET(__wt_filename(session, dir, &path)); + WT_RET(__wt_filename(session, dir, &path)); - file_system = S2C(session)->file_system; - wt_session = (WT_SESSION *)session; - ret = file_system->fs_directory_list( - file_system, wt_session, path, prefix, dirlistp, countp); + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_directory_list(file_system, wt_session, path, prefix, dirlistp, countp); - __wt_free(session, path); - return (ret); + __wt_free(session, path); + return (ret); } /* * __wt_fs_directory_list_single -- - * Return a single matching file from a directory. + * Return a single matching file from a directory. */ static inline int -__wt_fs_directory_list_single(WT_SESSION_IMPL *session, - const char *dir, const char *prefix, char ***dirlistp, u_int *countp) +__wt_fs_directory_list_single( + WT_SESSION_IMPL *session, const char *dir, const char *prefix, char ***dirlistp, u_int *countp) { - WT_DECL_RET; - WT_FILE_SYSTEM *file_system; - WT_SESSION *wt_session; - char *path; + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *path; - *dirlistp = NULL; - *countp = 0; + *dirlistp = NULL; + *countp = 0; - __wt_verbose(session, WT_VERB_FILEOPS, - "%s: directory-list-single: prefix %s", - dir, prefix == NULL ? "all" : prefix); + __wt_verbose(session, WT_VERB_FILEOPS, "%s: directory-list-single: prefix %s", dir, + prefix == NULL ? "all" : prefix); - WT_RET(__wt_filename(session, dir, &path)); + WT_RET(__wt_filename(session, dir, &path)); - file_system = S2C(session)->file_system; - wt_session = (WT_SESSION *)session; - ret = file_system->fs_directory_list_single( - file_system, wt_session, path, prefix, dirlistp, countp); + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_directory_list_single( + file_system, wt_session, path, prefix, dirlistp, countp); - __wt_free(session, path); - return (ret); + __wt_free(session, path); + return (ret); } /* * __wt_fs_directory_list_free -- - * Free memory allocated by __wt_fs_directory_list. + * Free memory allocated by __wt_fs_directory_list. */ static inline int -__wt_fs_directory_list_free( - WT_SESSION_IMPL *session, char ***dirlistp, u_int count) +__wt_fs_directory_list_free(WT_SESSION_IMPL *session, char ***dirlistp, u_int count) { - WT_DECL_RET; - WT_FILE_SYSTEM *file_system; - WT_SESSION *wt_session; - - if (*dirlistp != NULL) { - file_system = S2C(session)->file_system; - wt_session = (WT_SESSION *)session; - ret = file_system->fs_directory_list_free( - file_system, wt_session, *dirlistp, count); - } - - *dirlistp = NULL; - return (ret); + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + + if (*dirlistp != NULL) { + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_directory_list_free(file_system, wt_session, *dirlistp, count); + } + + *dirlistp = NULL; + return (ret); } /* * __wt_fs_exist -- - * Return if the file exists. + * Return if the file exists. */ static inline int __wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) { - WT_DECL_RET; - WT_FILE_SYSTEM *file_system; - WT_SESSION *wt_session; - char *path; + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *path; - __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-exist", name); + __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-exist", name); - WT_RET(__wt_filename(session, name, &path)); + WT_RET(__wt_filename(session, name, &path)); - file_system = S2C(session)->file_system; - wt_session = (WT_SESSION *)session; - ret = file_system->fs_exist(file_system, wt_session, path, existp); + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_exist(file_system, wt_session, path, existp); - __wt_free(session, path); - return (ret); + __wt_free(session, path); + return (ret); } /* * __wt_fs_remove -- - * Remove the file. + * Remove the file. */ static inline int __wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable) { - WT_DECL_RET; - WT_FILE_SYSTEM *file_system; - WT_SESSION *wt_session; - char *path; + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *path; - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-remove", name); + __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-remove", name); #ifdef HAVE_DIAGNOSTIC - /* - * It is a layering violation to retrieve a WT_FH here, but it is a - * useful diagnostic to ensure WiredTiger doesn't have the handle open. - */ - if (__wt_handle_is_open(session, name)) - WT_RET_MSG(session, EINVAL, - "%s: file-remove: file has open handles", name); + /* + * It is a layering violation to retrieve a WT_FH here, but it is a useful diagnostic to ensure + * WiredTiger doesn't have the handle open. + */ + if (__wt_handle_is_open(session, name)) + WT_RET_MSG(session, EINVAL, "%s: file-remove: file has open handles", name); #endif - WT_RET(__wt_filename(session, name, &path)); + WT_RET(__wt_filename(session, name, &path)); - file_system = S2C(session)->file_system; - wt_session = (WT_SESSION *)session; - ret = file_system->fs_remove( - file_system, wt_session, path, durable ? WT_FS_DURABLE : 0); + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_remove(file_system, wt_session, path, durable ? WT_FS_DURABLE : 0); - __wt_free(session, path); - return (ret); + __wt_free(session, path); + return (ret); } /* * __wt_fs_rename -- - * Rename the file. + * Rename the file. */ static inline int -__wt_fs_rename( - WT_SESSION_IMPL *session, const char *from, const char *to, bool durable) +__wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to, bool durable) { - WT_DECL_RET; - WT_FILE_SYSTEM *file_system; - WT_SESSION *wt_session; - char *from_path, *to_path; + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *from_path, *to_path; - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - __wt_verbose( - session, WT_VERB_FILEOPS, "%s to %s: file-rename", from, to); + __wt_verbose(session, WT_VERB_FILEOPS, "%s to %s: file-rename", from, to); #ifdef HAVE_DIAGNOSTIC - /* - * It is a layering violation to retrieve a WT_FH here, but it is a - * useful diagnostic to ensure WiredTiger doesn't have the handle open. - */ - if (__wt_handle_is_open(session, from)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", from); - if (__wt_handle_is_open(session, to)) - WT_RET_MSG(session, EINVAL, - "%s: file-rename: file has open handles", to); + /* + * It is a layering violation to retrieve a WT_FH here, but it is a useful diagnostic to ensure + * WiredTiger doesn't have the handle open. + */ + if (__wt_handle_is_open(session, from)) + WT_RET_MSG(session, EINVAL, "%s: file-rename: file has open handles", from); + if (__wt_handle_is_open(session, to)) + WT_RET_MSG(session, EINVAL, "%s: file-rename: file has open handles", to); #endif - from_path = to_path = NULL; - WT_ERR(__wt_filename(session, from, &from_path)); - WT_ERR(__wt_filename(session, to, &to_path)); + from_path = to_path = NULL; + WT_ERR(__wt_filename(session, from, &from_path)); + WT_ERR(__wt_filename(session, to, &to_path)); - file_system = S2C(session)->file_system; - wt_session = (WT_SESSION *)session; - ret = file_system->fs_rename(file_system, - wt_session, from_path, to_path, durable ? WT_FS_DURABLE : 0); + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_rename( + file_system, wt_session, from_path, to_path, durable ? WT_FS_DURABLE : 0); -err: __wt_free(session, from_path); - __wt_free(session, to_path); - return (ret); +err: + __wt_free(session, from_path); + __wt_free(session, to_path); + return (ret); } /* * __wt_fs_size -- - * Return the size of a file in bytes, by file name. + * Return the size of a file in bytes, by file name. */ static inline int __wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) { - WT_DECL_RET; - WT_FILE_SYSTEM *file_system; - WT_SESSION *wt_session; - char *path; + WT_DECL_RET; + WT_FILE_SYSTEM *file_system; + WT_SESSION *wt_session; + char *path; - __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-size", name); + __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-size", name); - WT_RET(__wt_filename(session, name, &path)); + WT_RET(__wt_filename(session, name, &path)); - file_system = S2C(session)->file_system; - wt_session = (WT_SESSION *)session; - ret = file_system->fs_size(file_system, wt_session, path, sizep); + file_system = S2C(session)->file_system; + wt_session = (WT_SESSION *)session; + ret = file_system->fs_size(file_system, wt_session, path, sizep); - __wt_free(session, path); - return (ret); + __wt_free(session, path); + return (ret); } diff --git a/src/third_party/wiredtiger/src/include/os_fstream.i b/src/third_party/wiredtiger/src/include/os_fstream.i index b2052054f93..c7b735e2da2 100644 --- a/src/third_party/wiredtiger/src/include/os_fstream.i +++ b/src/third_party/wiredtiger/src/include/os_fstream.i @@ -8,87 +8,85 @@ /* * __wt_getline -- - * Get a line from a stream. + * Get a line from a stream. */ static inline int __wt_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_ITEM *buf) { - return (fstr->fstr_getline(session, fstr, buf)); + return (fstr->fstr_getline(session, fstr, buf)); } /* * __wt_fclose -- - * Close a stream. + * Close a stream. */ static inline int __wt_fclose(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp) { - WT_FSTREAM *fstr; + WT_FSTREAM *fstr; - if ((fstr = *fstrp) == NULL) - return (0); - *fstrp = NULL; - return (fstr->close(session, fstr)); + if ((fstr = *fstrp) == NULL) + return (0); + *fstrp = NULL; + return (fstr->close(session, fstr)); } /* * __wt_fflush -- - * Flush a stream. + * Flush a stream. */ static inline int __wt_fflush(WT_SESSION_IMPL *session, WT_FSTREAM *fstr) { - return (fstr->fstr_flush(session, fstr)); + return (fstr->fstr_flush(session, fstr)); } /* * __wt_vfprintf -- - * ANSI C vfprintf. + * ANSI C vfprintf. */ static inline int -__wt_vfprintf( - WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap) +__wt_vfprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, va_list ap) { - return (fstr->fstr_printf(session, fstr, fmt, ap)); + return (fstr->fstr_printf(session, fstr, fmt, ap)); } /* * __wt_fprintf -- - * ANSI C fprintf. + * ANSI C fprintf. */ static inline int __wt_fprintf(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, const char *fmt, ...) - WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) + WT_GCC_FUNC_ATTRIBUTE((format(printf, 3, 4))) { - WT_DECL_RET; - va_list ap; + WT_DECL_RET; + va_list ap; - va_start(ap, fmt); - ret = __wt_vfprintf(session, fstr, fmt, ap); - va_end(ap); + va_start(ap, fmt); + ret = __wt_vfprintf(session, fstr, fmt, ap); + va_end(ap); - return (ret); + return (ret); } /* * __wt_sync_and_rename -- - * Flush and close a stream, then swap it into place. + * Flush and close a stream, then swap it into place. */ static inline int -__wt_sync_and_rename(WT_SESSION_IMPL *session, - WT_FSTREAM **fstrp, const char *from, const char *to) +__wt_sync_and_rename(WT_SESSION_IMPL *session, WT_FSTREAM **fstrp, const char *from, const char *to) { - WT_DECL_RET; - WT_FSTREAM *fstr; + WT_DECL_RET; + WT_FSTREAM *fstr; - fstr = *fstrp; - *fstrp = NULL; + fstr = *fstrp; + *fstrp = NULL; - /* Flush to disk and close the handle. */ - WT_TRET(__wt_fflush(session, fstr)); - WT_TRET(__wt_fsync(session, fstr->fh, true)); - WT_TRET(__wt_fclose(session, &fstr)); - WT_RET(ret); + /* Flush to disk and close the handle. */ + WT_TRET(__wt_fflush(session, fstr)); + WT_TRET(__wt_fsync(session, fstr->fh, true)); + WT_TRET(__wt_fclose(session, &fstr)); + WT_RET(ret); - return (__wt_fs_rename(session, from, to, true)); + return (__wt_fs_rename(session, from, to, true)); } diff --git a/src/third_party/wiredtiger/src/include/os_windows.h b/src/third_party/wiredtiger/src/include/os_windows.h index 84619f218a2..a184ff3cfbc 100644 --- a/src/third_party/wiredtiger/src/include/os_windows.h +++ b/src/third_party/wiredtiger/src/include/os_windows.h @@ -7,52 +7,49 @@ */ /* - * Define WT threading and concurrency primitives - * Assumes Windows 7+/2008 R2+ + * Define WT threading and concurrency primitives Assumes Windows 7+/2008 R2+ */ -typedef CONDITION_VARIABLE wt_cond_t; -typedef CRITICAL_SECTION wt_mutex_t; +typedef CONDITION_VARIABLE wt_cond_t; +typedef CRITICAL_SECTION wt_mutex_t; typedef struct { - bool created; - HANDLE id; + bool created; + HANDLE id; } wt_thread_t; /* * Thread callbacks need to match the return signature of _beginthreadex. */ -#define WT_THREAD_CALLBACK(x) unsigned (__stdcall x) -#define WT_THREAD_RET unsigned __stdcall -#define WT_THREAD_RET_VALUE 0 +#define WT_THREAD_CALLBACK(x) unsigned(__stdcall x) +#define WT_THREAD_RET unsigned __stdcall +#define WT_THREAD_RET_VALUE 0 /* * WT declaration for calling convention type */ -#define WT_CDECL __cdecl +#define WT_CDECL __cdecl #if _MSC_VER < 1900 /* Timespec is a POSIX structure not defined in Windows */ struct timespec { - time_t tv_sec; /* seconds */ - long tv_nsec; /* nanoseconds */ + time_t tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ }; #endif /* - * Windows Portability stuff - * These are POSIX types which Windows lacks - * Eventually WiredTiger will migrate away from these types + * Windows Portability stuff These are POSIX types which Windows lacks Eventually WiredTiger will + * migrate away from these types */ -typedef unsigned int u_int; -typedef unsigned char u_char; -typedef unsigned long u_long; +typedef unsigned int u_int; +typedef unsigned char u_char; +typedef unsigned long u_long; /* - * Windows does have ssize_t - * Python headers declare also though so we need to guard it + * Windows does have ssize_t Python headers declare also though so we need to guard it */ #ifndef HAVE_SSIZE_T typedef int ssize_t; #endif /* Windows does not provide fsync */ -#define fsync _commit +#define fsync _commit diff --git a/src/third_party/wiredtiger/src/include/packing.i b/src/third_party/wiredtiger/src/include/packing.i index 0f47569a4ae..1335334f142 100644 --- a/src/third_party/wiredtiger/src/include/packing.i +++ b/src/third_party/wiredtiger/src/include/packing.i @@ -14,738 +14,730 @@ * because the compiler promotes shorter types to int or unsigned int. */ typedef struct { - union { - int64_t i; - uint64_t u; - const char *s; - WT_ITEM item; - } u; - uint32_t size; - int8_t havesize; - char type; + union { + int64_t i; + uint64_t u; + const char *s; + WT_ITEM item; + } u; + uint32_t size; + int8_t havesize; + char type; } WT_PACK_VALUE; /* Default to size = 1 if there is no size prefix. */ -#define WT_PACK_VALUE_INIT { { 0 }, 1, 0, 0 } -#define WT_DECL_PACK_VALUE(pv) WT_PACK_VALUE pv = WT_PACK_VALUE_INIT +#define WT_PACK_VALUE_INIT \ + { \ + {0}, 1, 0, 0 \ + } +#define WT_DECL_PACK_VALUE(pv) WT_PACK_VALUE pv = WT_PACK_VALUE_INIT typedef struct { - WT_SESSION_IMPL *session; - const char *cur, *end, *orig; - unsigned long repeats; - WT_PACK_VALUE lastv; + WT_SESSION_IMPL *session; + const char *cur, *end, *orig; + unsigned long repeats; + WT_PACK_VALUE lastv; } WT_PACK; -#define WT_PACK_INIT { NULL, NULL, NULL, NULL, 0, WT_PACK_VALUE_INIT } -#define WT_DECL_PACK(pack) WT_PACK pack = WT_PACK_INIT +#define WT_PACK_INIT \ + { \ + NULL, NULL, NULL, NULL, 0, WT_PACK_VALUE_INIT \ + } +#define WT_DECL_PACK(pack) WT_PACK pack = WT_PACK_INIT typedef struct { - WT_CONFIG config; - char buf[20]; - int count; - bool iskey; - int genname; + WT_CONFIG config; + char buf[20]; + int count; + bool iskey; + int genname; } WT_PACK_NAME; /* * __pack_initn -- - * Initialize a pack iterator with the specified string and length. + * Initialize a pack iterator with the specified string and length. */ static inline int -__pack_initn( - WT_SESSION_IMPL *session, WT_PACK *pack, const char *fmt, size_t len) +__pack_initn(WT_SESSION_IMPL *session, WT_PACK *pack, const char *fmt, size_t len) { - if (*fmt == '@' || *fmt == '<' || *fmt == '>') - return (EINVAL); - if (*fmt == '.') - ++fmt; - - pack->session = session; - pack->cur = pack->orig = fmt; - pack->end = fmt + len; - pack->repeats = 0; - return (0); + if (*fmt == '@' || *fmt == '<' || *fmt == '>') + return (EINVAL); + if (*fmt == '.') + ++fmt; + + pack->session = session; + pack->cur = pack->orig = fmt; + pack->end = fmt + len; + pack->repeats = 0; + return (0); } /* * __pack_init -- - * Initialize a pack iterator with the specified string. + * Initialize a pack iterator with the specified string. */ static inline int __pack_init(WT_SESSION_IMPL *session, WT_PACK *pack, const char *fmt) { - return (__pack_initn(session, pack, fmt, strlen(fmt))); + return (__pack_initn(session, pack, fmt, strlen(fmt))); } /* * __pack_name_init -- - * Initialize the name of a pack iterator. + * Initialize the name of a pack iterator. */ static inline void -__pack_name_init(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *names, - bool iskey, WT_PACK_NAME *pn) +__pack_name_init(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *names, bool iskey, WT_PACK_NAME *pn) { - WT_CLEAR(*pn); - pn->iskey = iskey; + WT_CLEAR(*pn); + pn->iskey = iskey; - if (names->str != NULL) - __wt_config_subinit(session, &pn->config, names); - else - pn->genname = 1; + if (names->str != NULL) + __wt_config_subinit(session, &pn->config, names); + else + pn->genname = 1; } /* * __pack_name_next -- - * Get the next field type from a pack iterator. + * Get the next field type from a pack iterator. */ static inline int __pack_name_next(WT_PACK_NAME *pn, WT_CONFIG_ITEM *name) { - WT_CONFIG_ITEM ignore; - - if (pn->genname) { - WT_RET(__wt_snprintf(pn->buf, sizeof(pn->buf), - (pn->iskey ? "key%d" : "value%d"), pn->count)); - WT_CLEAR(*name); - name->str = pn->buf; - name->len = strlen(pn->buf); - name->type = WT_CONFIG_ITEM_STRING; - pn->count++; - } - else - WT_RET(__wt_config_next(&pn->config, name, &ignore)); - - return (0); + WT_CONFIG_ITEM ignore; + + if (pn->genname) { + WT_RET( + __wt_snprintf(pn->buf, sizeof(pn->buf), (pn->iskey ? "key%d" : "value%d"), pn->count)); + WT_CLEAR(*name); + name->str = pn->buf; + name->len = strlen(pn->buf); + name->type = WT_CONFIG_ITEM_STRING; + pn->count++; + } else + WT_RET(__wt_config_next(&pn->config, name, &ignore)); + + return (0); } /* * __pack_next -- - * Next pack iterator. + * Next pack iterator. */ static inline int __pack_next(WT_PACK *pack, WT_PACK_VALUE *pv) { - char *endsize; - - if (pack->repeats > 0) { - *pv = pack->lastv; - --pack->repeats; - return (0); - } - -next: if (pack->cur == pack->end) - return (WT_NOTFOUND); - - if (__wt_isdigit((u_char)*pack->cur)) { - pv->havesize = 1; - pv->size = WT_STORE_SIZE(strtoul(pack->cur, &endsize, 10)); - pack->cur = endsize; - } else { - pv->havesize = 0; - pv->size = 1; - } - - pv->type = *pack->cur++; - pack->repeats = 0; - - switch (pv->type) { - case 'S': - return (0); - case 's': - if (pv->size < 1) - WT_RET_MSG(pack->session, EINVAL, - "Fixed length strings must be at least 1 byte " - "in format '%.*s'", - (int)(pack->end - pack->orig), pack->orig); - return (0); - case 'x': - return (0); - case 't': - if (pv->size < 1 || pv->size > 8) - WT_RET_MSG(pack->session, EINVAL, - "Bitfield sizes must be between 1 and 8 bits " - "in format '%.*s'", - (int)(pack->end - pack->orig), pack->orig); - return (0); - case 'u': - /* Special case for items with a size prefix. */ - pv->type = (!pv->havesize && *pack->cur != '\0') ? 'U' : 'u'; - return (0); - case 'U': - /* - * Don't change the type. 'U' is used internally, so this type - * was already changed to explicitly include the size. - */ - return (0); - case 'b': - case 'h': - case 'i': - case 'B': - case 'H': - case 'I': - case 'l': - case 'L': - case 'q': - case 'Q': - case 'r': - case 'R': - /* Integral types repeat <size> times. */ - if (pv->size == 0) - goto next; - pv->havesize = 0; - pack->repeats = pv->size - 1; - pack->lastv = *pv; - return (0); - default: - WT_RET_MSG(pack->session, EINVAL, - "Invalid type '%c' found in format '%.*s'", - pv->type, (int)(pack->end - pack->orig), pack->orig); - } - + char *endsize; + + if (pack->repeats > 0) { + *pv = pack->lastv; + --pack->repeats; + return (0); + } + +next: + if (pack->cur == pack->end) + return (WT_NOTFOUND); + + if (__wt_isdigit((u_char)*pack->cur)) { + pv->havesize = 1; + pv->size = WT_STORE_SIZE(strtoul(pack->cur, &endsize, 10)); + pack->cur = endsize; + } else { + pv->havesize = 0; + pv->size = 1; + } + + pv->type = *pack->cur++; + pack->repeats = 0; + + switch (pv->type) { + case 'S': + return (0); + case 's': + if (pv->size < 1) + WT_RET_MSG(pack->session, EINVAL, + "Fixed length strings must be at least 1 byte " + "in format '%.*s'", + (int)(pack->end - pack->orig), pack->orig); + return (0); + case 'x': + return (0); + case 't': + if (pv->size < 1 || pv->size > 8) + WT_RET_MSG(pack->session, EINVAL, + "Bitfield sizes must be between 1 and 8 bits " + "in format '%.*s'", + (int)(pack->end - pack->orig), pack->orig); + return (0); + case 'u': + /* Special case for items with a size prefix. */ + pv->type = (!pv->havesize && *pack->cur != '\0') ? 'U' : 'u'; + return (0); + case 'U': + /* + * Don't change the type. 'U' is used internally, so this type was already changed to + * explicitly include the size. + */ + return (0); + case 'b': + case 'h': + case 'i': + case 'B': + case 'H': + case 'I': + case 'l': + case 'L': + case 'q': + case 'Q': + case 'r': + case 'R': + /* Integral types repeat <size> times. */ + if (pv->size == 0) + goto next; + pv->havesize = 0; + pack->repeats = pv->size - 1; + pack->lastv = *pv; + return (0); + default: + WT_RET_MSG(pack->session, EINVAL, "Invalid type '%c' found in format '%.*s'", pv->type, + (int)(pack->end - pack->orig), pack->orig); + } } -#define WT_PACK_GET(session, pv, ap) do { \ - WT_ITEM *__item; \ - switch ((pv).type) { \ - case 'x': \ - break; \ - case 's': \ - case 'S': \ - (pv).u.s = va_arg(ap, const char *); \ - break; \ - case 'U': \ - case 'u': \ - __item = va_arg(ap, WT_ITEM *); \ - (pv).u.item.data = __item->data; \ - (pv).u.item.size = __item->size; \ - break; \ - case 'b': \ - case 'h': \ - case 'i': \ - (pv).u.i = va_arg(ap, int); \ - break; \ - case 'B': \ - case 'H': \ - case 'I': \ - case 't': \ - (pv).u.u = va_arg(ap, unsigned int); \ - break; \ - case 'l': \ - (pv).u.i = va_arg(ap, long); \ - break; \ - case 'L': \ - (pv).u.u = va_arg(ap, unsigned long); \ - break; \ - case 'q': \ - (pv).u.i = va_arg(ap, int64_t); \ - break; \ - case 'Q': \ - case 'r': \ - case 'R': \ - (pv).u.u = va_arg(ap, uint64_t); \ - break; \ - default: \ - /* User format strings have already been validated. */ \ - return (__wt_illegal_value(session, (pv).type)); \ - } \ -} while (0) +#define WT_PACK_GET(session, pv, ap) \ + do { \ + WT_ITEM *__item; \ + switch ((pv).type) { \ + case 'x': \ + break; \ + case 's': \ + case 'S': \ + (pv).u.s = va_arg(ap, const char *); \ + break; \ + case 'U': \ + case 'u': \ + __item = va_arg(ap, WT_ITEM *); \ + (pv).u.item.data = __item->data; \ + (pv).u.item.size = __item->size; \ + break; \ + case 'b': \ + case 'h': \ + case 'i': \ + (pv).u.i = va_arg(ap, int); \ + break; \ + case 'B': \ + case 'H': \ + case 'I': \ + case 't': \ + (pv).u.u = va_arg(ap, unsigned int); \ + break; \ + case 'l': \ + (pv).u.i = va_arg(ap, long); \ + break; \ + case 'L': \ + (pv).u.u = va_arg(ap, unsigned long); \ + break; \ + case 'q': \ + (pv).u.i = va_arg(ap, int64_t); \ + break; \ + case 'Q': \ + case 'r': \ + case 'R': \ + (pv).u.u = va_arg(ap, uint64_t); \ + break; \ + default: \ + /* User format strings have already been validated. */ \ + return (__wt_illegal_value(session, (pv).type)); \ + } \ + } while (0) /* * __pack_size -- - * Get the size of a packed value. + * Get the size of a packed value. */ static inline int __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv, size_t *vp) { - size_t s, pad; - - switch (pv->type) { - case 'x': - *vp = pv->size; - return (0); - case 'j': - case 'J': - case 'K': - /* These formats are only used internally. */ - if (pv->type == 'j' || pv->havesize) - s = pv->size; - else { - ssize_t len; - - /* The string was previously validated. */ - len = __wt_json_strlen(pv->u.item.data, - pv->u.item.size); - WT_ASSERT(session, len >= 0); - s = (size_t)len + (pv->type == 'K' ? 0 : 1); - } - *vp = s; - return (0); - case 's': - case 'S': - if (pv->type == 's' || pv->havesize) { - s = pv->size; - WT_ASSERT(session, s != 0); - } else - s = strlen(pv->u.s) + 1; - *vp = s; - return (0); - case 'U': - case 'u': - s = pv->u.item.size; - pad = 0; - if (pv->havesize && pv->size < s) - s = pv->size; - else if (pv->havesize) - pad = pv->size - s; - if (pv->type == 'U') - s += __wt_vsize_uint(s + pad); - *vp = s + pad; - return (0); - case 'b': - case 'B': - case 't': - *vp = 1; - return (0); - case 'h': - case 'i': - case 'l': - case 'q': - *vp = __wt_vsize_int(pv->u.i); - return (0); - case 'H': - case 'I': - case 'L': - case 'Q': - case 'r': - *vp = __wt_vsize_uint(pv->u.u); - return (0); - case 'R': - *vp = sizeof(uint64_t); - return (0); - } - - WT_RET_MSG( - session, EINVAL, "unknown pack-value type: %c", (int)pv->type); + size_t s, pad; + + switch (pv->type) { + case 'x': + *vp = pv->size; + return (0); + case 'j': + case 'J': + case 'K': + /* These formats are only used internally. */ + if (pv->type == 'j' || pv->havesize) + s = pv->size; + else { + ssize_t len; + + /* The string was previously validated. */ + len = __wt_json_strlen(pv->u.item.data, pv->u.item.size); + WT_ASSERT(session, len >= 0); + s = (size_t)len + (pv->type == 'K' ? 0 : 1); + } + *vp = s; + return (0); + case 's': + case 'S': + if (pv->type == 's' || pv->havesize) { + s = pv->size; + WT_ASSERT(session, s != 0); + } else + s = strlen(pv->u.s) + 1; + *vp = s; + return (0); + case 'U': + case 'u': + s = pv->u.item.size; + pad = 0; + if (pv->havesize && pv->size < s) + s = pv->size; + else if (pv->havesize) + pad = pv->size - s; + if (pv->type == 'U') + s += __wt_vsize_uint(s + pad); + *vp = s + pad; + return (0); + case 'b': + case 'B': + case 't': + *vp = 1; + return (0); + case 'h': + case 'i': + case 'l': + case 'q': + *vp = __wt_vsize_int(pv->u.i); + return (0); + case 'H': + case 'I': + case 'L': + case 'Q': + case 'r': + *vp = __wt_vsize_uint(pv->u.u); + return (0); + case 'R': + *vp = sizeof(uint64_t); + return (0); + } + + WT_RET_MSG(session, EINVAL, "unknown pack-value type: %c", (int)pv->type); } /* * __pack_write -- - * Pack a value into a buffer. + * Pack a value into a buffer. */ static inline int -__pack_write( - WT_SESSION_IMPL *session, WT_PACK_VALUE *pv, uint8_t **pp, size_t maxlen) +__pack_write(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv, uint8_t **pp, size_t maxlen) { - size_t s, pad; - uint8_t *oldp; - - switch (pv->type) { - case 'x': - WT_SIZE_CHECK_PACK(pv->size, maxlen); - memset(*pp, 0, pv->size); - *pp += pv->size; - break; - case 's': - WT_SIZE_CHECK_PACK(pv->size, maxlen); - memcpy(*pp, pv->u.s, pv->size); - *pp += pv->size; - break; - case 'S': - /* - * When preceded by a size, that indicates the maximum number - * of bytes the string can store, this does not include the - * terminating NUL character. In a string with characters - * less than the specified size, the remaining bytes are - * NULL padded. - */ - if (pv->havesize) { - s = __wt_strnlen(pv->u.s, pv->size); - pad = (s < pv->size) ? pv->size - s : 0; - } else { - s = strlen(pv->u.s); - pad = 1; - } - WT_SIZE_CHECK_PACK(s + pad, maxlen); - if (s > 0) - memcpy(*pp, pv->u.s, s); - *pp += s; - if (pad > 0) { - memset(*pp, 0, pad); - *pp += pad; - } - break; - case 'j': - case 'J': - case 'K': - /* These formats are only used internally. */ - s = pv->u.item.size; - if ((pv->type == 'j' || pv->havesize) && pv->size < s) { - s = pv->size; - pad = 0; - } else if (pv->havesize) - pad = pv->size - s; - else if (pv->type == 'K') - pad = 0; - else - pad = 1; - if (s > 0) { - oldp = *pp; - WT_RET(__wt_json_strncpy((WT_SESSION *)session, - (char **)pp, maxlen, pv->u.item.data, s)); - maxlen -= (size_t)(*pp - oldp); - } - if (pad > 0) { - WT_SIZE_CHECK_PACK(pad, maxlen); - memset(*pp, 0, pad); - *pp += pad; - } - break; - case 'U': - case 'u': - s = pv->u.item.size; - pad = 0; - if (pv->havesize && pv->size < s) - s = pv->size; - else if (pv->havesize) - pad = pv->size - s; - if (pv->type == 'U') { - oldp = *pp; - /* - * Check that there is at least one byte available: the - * low-level routines treat zero length as unchecked. - */ - WT_SIZE_CHECK_PACK(1, maxlen); - WT_RET(__wt_vpack_uint(pp, maxlen, s + pad)); - maxlen -= (size_t)(*pp - oldp); - } - WT_SIZE_CHECK_PACK(s + pad, maxlen); - if (s > 0) - memcpy(*pp, pv->u.item.data, s); - *pp += s; - if (pad > 0) { - memset(*pp, 0, pad); - *pp += pad; - } - break; - case 'b': - /* Translate to maintain ordering with the sign bit. */ - WT_SIZE_CHECK_PACK(1, maxlen); - **pp = (uint8_t)(pv->u.i + 0x80); - *pp += 1; - break; - case 'B': - case 't': - WT_SIZE_CHECK_PACK(1, maxlen); - **pp = (uint8_t)pv->u.u; - *pp += 1; - break; - case 'h': - case 'i': - case 'l': - case 'q': - /* - * Check that there is at least one byte available: the - * low-level routines treat zero length as unchecked. - */ - WT_SIZE_CHECK_PACK(1, maxlen); - WT_RET(__wt_vpack_int(pp, maxlen, pv->u.i)); - break; - case 'H': - case 'I': - case 'L': - case 'Q': - case 'r': - /* - * Check that there is at least one byte available: the - * low-level routines treat zero length as unchecked. - */ - WT_SIZE_CHECK_PACK(1, maxlen); - WT_RET(__wt_vpack_uint(pp, maxlen, pv->u.u)); - break; - case 'R': - WT_SIZE_CHECK_PACK(sizeof(uint64_t), maxlen); - *(uint64_t *)*pp = pv->u.u; - *pp += sizeof(uint64_t); - break; - default: - WT_RET_MSG(session, EINVAL, - "unknown pack-value type: %c", (int)pv->type); - } - - return (0); + size_t s, pad; + uint8_t *oldp; + + switch (pv->type) { + case 'x': + WT_SIZE_CHECK_PACK(pv->size, maxlen); + memset(*pp, 0, pv->size); + *pp += pv->size; + break; + case 's': + WT_SIZE_CHECK_PACK(pv->size, maxlen); + memcpy(*pp, pv->u.s, pv->size); + *pp += pv->size; + break; + case 'S': + /* + * When preceded by a size, that indicates the maximum number of bytes the string can store, + * this does not include the terminating NUL character. In a string with characters less + * than the specified size, the remaining bytes are NULL padded. + */ + if (pv->havesize) { + s = __wt_strnlen(pv->u.s, pv->size); + pad = (s < pv->size) ? pv->size - s : 0; + } else { + s = strlen(pv->u.s); + pad = 1; + } + WT_SIZE_CHECK_PACK(s + pad, maxlen); + if (s > 0) + memcpy(*pp, pv->u.s, s); + *pp += s; + if (pad > 0) { + memset(*pp, 0, pad); + *pp += pad; + } + break; + case 'j': + case 'J': + case 'K': + /* These formats are only used internally. */ + s = pv->u.item.size; + if ((pv->type == 'j' || pv->havesize) && pv->size < s) { + s = pv->size; + pad = 0; + } else if (pv->havesize) + pad = pv->size - s; + else if (pv->type == 'K') + pad = 0; + else + pad = 1; + if (s > 0) { + oldp = *pp; + WT_RET( + __wt_json_strncpy((WT_SESSION *)session, (char **)pp, maxlen, pv->u.item.data, s)); + maxlen -= (size_t)(*pp - oldp); + } + if (pad > 0) { + WT_SIZE_CHECK_PACK(pad, maxlen); + memset(*pp, 0, pad); + *pp += pad; + } + break; + case 'U': + case 'u': + s = pv->u.item.size; + pad = 0; + if (pv->havesize && pv->size < s) + s = pv->size; + else if (pv->havesize) + pad = pv->size - s; + if (pv->type == 'U') { + oldp = *pp; + /* + * Check that there is at least one byte available: the low-level routines treat zero + * length as unchecked. + */ + WT_SIZE_CHECK_PACK(1, maxlen); + WT_RET(__wt_vpack_uint(pp, maxlen, s + pad)); + maxlen -= (size_t)(*pp - oldp); + } + WT_SIZE_CHECK_PACK(s + pad, maxlen); + if (s > 0) + memcpy(*pp, pv->u.item.data, s); + *pp += s; + if (pad > 0) { + memset(*pp, 0, pad); + *pp += pad; + } + break; + case 'b': + /* Translate to maintain ordering with the sign bit. */ + WT_SIZE_CHECK_PACK(1, maxlen); + **pp = (uint8_t)(pv->u.i + 0x80); + *pp += 1; + break; + case 'B': + case 't': + WT_SIZE_CHECK_PACK(1, maxlen); + **pp = (uint8_t)pv->u.u; + *pp += 1; + break; + case 'h': + case 'i': + case 'l': + case 'q': + /* + * Check that there is at least one byte available: the low-level routines treat zero length + * as unchecked. + */ + WT_SIZE_CHECK_PACK(1, maxlen); + WT_RET(__wt_vpack_int(pp, maxlen, pv->u.i)); + break; + case 'H': + case 'I': + case 'L': + case 'Q': + case 'r': + /* + * Check that there is at least one byte available: the low-level routines treat zero length + * as unchecked. + */ + WT_SIZE_CHECK_PACK(1, maxlen); + WT_RET(__wt_vpack_uint(pp, maxlen, pv->u.u)); + break; + case 'R': + WT_SIZE_CHECK_PACK(sizeof(uint64_t), maxlen); + *(uint64_t *)*pp = pv->u.u; + *pp += sizeof(uint64_t); + break; + default: + WT_RET_MSG(session, EINVAL, "unknown pack-value type: %c", (int)pv->type); + } + + return (0); } /* * __unpack_read -- - * Read a packed value from a buffer. + * Read a packed value from a buffer. */ static inline int -__unpack_read(WT_SESSION_IMPL *session, - WT_PACK_VALUE *pv, const uint8_t **pp, size_t maxlen) +__unpack_read(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv, const uint8_t **pp, size_t maxlen) { - size_t s; - - switch (pv->type) { - case 'x': - WT_SIZE_CHECK_UNPACK(pv->size, maxlen); - *pp += pv->size; - break; - case 's': - case 'S': - if (pv->type == 's' || pv->havesize) { - s = pv->size; - WT_ASSERT(session, s != 0); - } else - s = strlen((const char *)*pp) + 1; - if (s > 0) - pv->u.s = (const char *)*pp; - WT_SIZE_CHECK_UNPACK(s, maxlen); - *pp += s; - break; - case 'U': - /* - * Check that there is at least one byte available: the - * low-level routines treat zero length as unchecked. - */ - WT_SIZE_CHECK_UNPACK(1, maxlen); - WT_RET(__wt_vunpack_uint(pp, maxlen, &pv->u.u)); - /* FALLTHROUGH */ - case 'u': - if (pv->havesize) - s = pv->size; - else if (pv->type == 'U') - s = (size_t)pv->u.u; - else - s = maxlen; - WT_SIZE_CHECK_UNPACK(s, maxlen); - pv->u.item.data = *pp; - pv->u.item.size = s; - *pp += s; - break; - case 'b': - /* Translate to maintain ordering with the sign bit. */ - WT_SIZE_CHECK_UNPACK(1, maxlen); - pv->u.i = (int8_t)(*(*pp)++ - 0x80); - break; - case 'B': - case 't': - WT_SIZE_CHECK_UNPACK(1, maxlen); - pv->u.u = *(*pp)++; - break; - case 'h': - case 'i': - case 'l': - case 'q': - /* - * Check that there is at least one byte available: the - * low-level routines treat zero length as unchecked. - */ - WT_SIZE_CHECK_UNPACK(1, maxlen); - WT_RET(__wt_vunpack_int(pp, maxlen, &pv->u.i)); - break; - case 'H': - case 'I': - case 'L': - case 'Q': - case 'r': - /* - * Check that there is at least one byte available: the - * low-level routines treat zero length as unchecked. - */ - WT_SIZE_CHECK_UNPACK(1, maxlen); - WT_RET(__wt_vunpack_uint(pp, maxlen, &pv->u.u)); - break; - case 'R': - WT_SIZE_CHECK_UNPACK(sizeof(uint64_t), maxlen); - pv->u.u = *(const uint64_t *)*pp; - *pp += sizeof(uint64_t); - break; - default: - WT_RET_MSG(session, EINVAL, - "unknown pack-value type: %c", (int)pv->type); - } - - return (0); + size_t s; + + switch (pv->type) { + case 'x': + WT_SIZE_CHECK_UNPACK(pv->size, maxlen); + *pp += pv->size; + break; + case 's': + case 'S': + if (pv->type == 's' || pv->havesize) { + s = pv->size; + WT_ASSERT(session, s != 0); + } else + s = strlen((const char *)*pp) + 1; + if (s > 0) + pv->u.s = (const char *)*pp; + WT_SIZE_CHECK_UNPACK(s, maxlen); + *pp += s; + break; + case 'U': + /* + * Check that there is at least one byte available: the low-level routines treat zero length + * as unchecked. + */ + WT_SIZE_CHECK_UNPACK(1, maxlen); + WT_RET(__wt_vunpack_uint(pp, maxlen, &pv->u.u)); + /* FALLTHROUGH */ + case 'u': + if (pv->havesize) + s = pv->size; + else if (pv->type == 'U') + s = (size_t)pv->u.u; + else + s = maxlen; + WT_SIZE_CHECK_UNPACK(s, maxlen); + pv->u.item.data = *pp; + pv->u.item.size = s; + *pp += s; + break; + case 'b': + /* Translate to maintain ordering with the sign bit. */ + WT_SIZE_CHECK_UNPACK(1, maxlen); + pv->u.i = (int8_t)(*(*pp)++ - 0x80); + break; + case 'B': + case 't': + WT_SIZE_CHECK_UNPACK(1, maxlen); + pv->u.u = *(*pp)++; + break; + case 'h': + case 'i': + case 'l': + case 'q': + /* + * Check that there is at least one byte available: the low-level routines treat zero length + * as unchecked. + */ + WT_SIZE_CHECK_UNPACK(1, maxlen); + WT_RET(__wt_vunpack_int(pp, maxlen, &pv->u.i)); + break; + case 'H': + case 'I': + case 'L': + case 'Q': + case 'r': + /* + * Check that there is at least one byte available: the low-level routines treat zero length + * as unchecked. + */ + WT_SIZE_CHECK_UNPACK(1, maxlen); + WT_RET(__wt_vunpack_uint(pp, maxlen, &pv->u.u)); + break; + case 'R': + WT_SIZE_CHECK_UNPACK(sizeof(uint64_t), maxlen); + pv->u.u = *(const uint64_t *)*pp; + *pp += sizeof(uint64_t); + break; + default: + WT_RET_MSG(session, EINVAL, "unknown pack-value type: %c", (int)pv->type); + } + + return (0); } -#define WT_UNPACK_PUT(session, pv, ap) do { \ - WT_ITEM *__item; \ - switch ((pv).type) { \ - case 'x': \ - break; \ - case 's': \ - case 'S': \ - *va_arg(ap, const char **) = (pv).u.s; \ - break; \ - case 'U': \ - case 'u': \ - __item = va_arg(ap, WT_ITEM *); \ - __item->data = (pv).u.item.data; \ - __item->size = (pv).u.item.size; \ - break; \ - case 'b': \ - *va_arg(ap, int8_t *) = (int8_t)(pv).u.i; \ - break; \ - case 'h': \ - *va_arg(ap, int16_t *) = (short)(pv).u.i; \ - break; \ - case 'i': \ - case 'l': \ - *va_arg(ap, int32_t *) = (int32_t)(pv).u.i; \ - break; \ - case 'q': \ - *va_arg(ap, int64_t *) = (pv).u.i; \ - break; \ - case 'B': \ - case 't': \ - *va_arg(ap, uint8_t *) = (uint8_t)(pv).u.u; \ - break; \ - case 'H': \ - *va_arg(ap, uint16_t *) = (uint16_t)(pv).u.u; \ - break; \ - case 'I': \ - case 'L': \ - *va_arg(ap, uint32_t *) = (uint32_t)(pv).u.u; \ - break; \ - case 'Q': \ - case 'r': \ - case 'R': \ - *va_arg(ap, uint64_t *) = (pv).u.u; \ - break; \ - default: \ - /* User format strings have already been validated. */ \ - return (__wt_illegal_value(session, (pv).type)); \ - } \ -} while (0) +#define WT_UNPACK_PUT(session, pv, ap) \ + do { \ + WT_ITEM *__item; \ + switch ((pv).type) { \ + case 'x': \ + break; \ + case 's': \ + case 'S': \ + *va_arg(ap, const char **) = (pv).u.s; \ + break; \ + case 'U': \ + case 'u': \ + __item = va_arg(ap, WT_ITEM *); \ + __item->data = (pv).u.item.data; \ + __item->size = (pv).u.item.size; \ + break; \ + case 'b': \ + *va_arg(ap, int8_t *) = (int8_t)(pv).u.i; \ + break; \ + case 'h': \ + *va_arg(ap, int16_t *) = (short)(pv).u.i; \ + break; \ + case 'i': \ + case 'l': \ + *va_arg(ap, int32_t *) = (int32_t)(pv).u.i; \ + break; \ + case 'q': \ + *va_arg(ap, int64_t *) = (pv).u.i; \ + break; \ + case 'B': \ + case 't': \ + *va_arg(ap, uint8_t *) = (uint8_t)(pv).u.u; \ + break; \ + case 'H': \ + *va_arg(ap, uint16_t *) = (uint16_t)(pv).u.u; \ + break; \ + case 'I': \ + case 'L': \ + *va_arg(ap, uint32_t *) = (uint32_t)(pv).u.u; \ + break; \ + case 'Q': \ + case 'r': \ + case 'R': \ + *va_arg(ap, uint64_t *) = (pv).u.u; \ + break; \ + default: \ + /* User format strings have already been validated. */ \ + return (__wt_illegal_value(session, (pv).type)); \ + } \ + } while (0) /* * __wt_struct_packv -- - * Pack a byte string (va_list version). + * Pack a byte string (va_list version). */ static inline int -__wt_struct_packv(WT_SESSION_IMPL *session, - void *buffer, size_t size, const char *fmt, va_list ap) +__wt_struct_packv(WT_SESSION_IMPL *session, void *buffer, size_t size, const char *fmt, va_list ap) { - WT_DECL_PACK_VALUE(pv); - WT_DECL_RET; - WT_PACK pack; - uint8_t *p, *end; - - p = buffer; - end = p + size; - - if (fmt[0] != '\0' && fmt[1] == '\0') { - pv.type = fmt[0]; - WT_PACK_GET(session, pv, ap); - return (__pack_write(session, &pv, &p, size)); - } - - WT_RET(__pack_init(session, &pack, fmt)); - while ((ret = __pack_next(&pack, &pv)) == 0) { - WT_PACK_GET(session, pv, ap); - WT_RET(__pack_write(session, &pv, &p, (size_t)(end - p))); - } - WT_RET_NOTFOUND_OK(ret); - - /* Be paranoid - __pack_write should never overflow. */ - WT_ASSERT(session, p <= end); - - return (0); + WT_DECL_PACK_VALUE(pv); + WT_DECL_RET; + WT_PACK pack; + uint8_t *p, *end; + + p = buffer; + end = p + size; + + if (fmt[0] != '\0' && fmt[1] == '\0') { + pv.type = fmt[0]; + WT_PACK_GET(session, pv, ap); + return (__pack_write(session, &pv, &p, size)); + } + + WT_RET(__pack_init(session, &pack, fmt)); + while ((ret = __pack_next(&pack, &pv)) == 0) { + WT_PACK_GET(session, pv, ap); + WT_RET(__pack_write(session, &pv, &p, (size_t)(end - p))); + } + WT_RET_NOTFOUND_OK(ret); + + /* Be paranoid - __pack_write should never overflow. */ + WT_ASSERT(session, p <= end); + + return (0); } /* * __wt_struct_sizev -- - * Calculate the size of a packed byte string (va_list version). + * Calculate the size of a packed byte string (va_list version). */ static inline int -__wt_struct_sizev( - WT_SESSION_IMPL *session, size_t *sizep, const char *fmt, va_list ap) +__wt_struct_sizev(WT_SESSION_IMPL *session, size_t *sizep, const char *fmt, va_list ap) { - WT_DECL_PACK_VALUE(pv); - WT_DECL_RET; - WT_PACK pack; - size_t v; - - *sizep = 0; - - if (fmt[0] != '\0' && fmt[1] == '\0') { - pv.type = fmt[0]; - WT_PACK_GET(session, pv, ap); - return (__pack_size(session, &pv, sizep)); - } - - WT_RET(__pack_init(session, &pack, fmt)); - while ((ret = __pack_next(&pack, &pv)) == 0) { - WT_PACK_GET(session, pv, ap); - WT_RET(__pack_size(session, &pv, &v)); - *sizep += v; - } - WT_RET_NOTFOUND_OK(ret); - - return (0); + WT_DECL_PACK_VALUE(pv); + WT_DECL_RET; + WT_PACK pack; + size_t v; + + *sizep = 0; + + if (fmt[0] != '\0' && fmt[1] == '\0') { + pv.type = fmt[0]; + WT_PACK_GET(session, pv, ap); + return (__pack_size(session, &pv, sizep)); + } + + WT_RET(__pack_init(session, &pack, fmt)); + while ((ret = __pack_next(&pack, &pv)) == 0) { + WT_PACK_GET(session, pv, ap); + WT_RET(__pack_size(session, &pv, &v)); + *sizep += v; + } + WT_RET_NOTFOUND_OK(ret); + + return (0); } /* * __wt_struct_unpackv -- - * Unpack a byte string (va_list version). + * Unpack a byte string (va_list version). */ static inline int -__wt_struct_unpackv(WT_SESSION_IMPL *session, - const void *buffer, size_t size, const char *fmt, va_list ap) +__wt_struct_unpackv( + WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, va_list ap) { - WT_DECL_PACK_VALUE(pv); - WT_DECL_RET; - WT_PACK pack; - const uint8_t *p, *end; - - p = buffer; - end = p + size; - - if (fmt[0] != '\0' && fmt[1] == '\0') { - pv.type = fmt[0]; - WT_RET(__unpack_read(session, &pv, &p, size)); - WT_UNPACK_PUT(session, pv, ap); - return (0); - } - - WT_RET(__pack_init(session, &pack, fmt)); - while ((ret = __pack_next(&pack, &pv)) == 0) { - WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p))); - WT_UNPACK_PUT(session, pv, ap); - } - WT_RET_NOTFOUND_OK(ret); - - /* Be paranoid - __pack_write should never overflow. */ - WT_ASSERT(session, p <= end); - - return (0); + WT_DECL_PACK_VALUE(pv); + WT_DECL_RET; + WT_PACK pack; + const uint8_t *p, *end; + + p = buffer; + end = p + size; + + if (fmt[0] != '\0' && fmt[1] == '\0') { + pv.type = fmt[0]; + WT_RET(__unpack_read(session, &pv, &p, size)); + WT_UNPACK_PUT(session, pv, ap); + return (0); + } + + WT_RET(__pack_init(session, &pack, fmt)); + while ((ret = __pack_next(&pack, &pv)) == 0) { + WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p))); + WT_UNPACK_PUT(session, pv, ap); + } + WT_RET_NOTFOUND_OK(ret); + + /* Be paranoid - __pack_write should never overflow. */ + WT_ASSERT(session, p <= end); + + return (0); } /* * __wt_struct_size_adjust -- - * Adjust the size field for a packed structure. - * - * Sometimes we want to include the size as a field in a packed structure. - * This is done by calling __wt_struct_size with the expected format and - * a size of zero. Then we want to pack the structure using the final - * size. This function adjusts the size appropriately (taking into - * account the size of the final size or the size field itself). + * Adjust the size field for a packed structure. Sometimes we want to include the size as a + * field in a packed structure. This is done by calling __wt_struct_size with the expected + * format and a size of zero. Then we want to pack the structure using the final size. This + * function adjusts the size appropriately (taking into account the size of the final size or + * the size field itself). */ static inline void __wt_struct_size_adjust(WT_SESSION_IMPL *session, size_t *sizep) { - size_t curr_size, field_size, prev_field_size; + size_t curr_size, field_size, prev_field_size; - curr_size = *sizep; - prev_field_size = 1; + curr_size = *sizep; + prev_field_size = 1; - while ((field_size = __wt_vsize_uint(curr_size)) != prev_field_size) { - curr_size += field_size - prev_field_size; - prev_field_size = field_size; - } + while ((field_size = __wt_vsize_uint(curr_size)) != prev_field_size) { + curr_size += field_size - prev_field_size; + prev_field_size = field_size; + } - /* Make sure the field size we calculated matches the adjusted size. */ - WT_ASSERT(session, field_size == __wt_vsize_uint(curr_size)); + /* Make sure the field size we calculated matches the adjusted size. */ + WT_ASSERT(session, field_size == __wt_vsize_uint(curr_size)); - *sizep = curr_size; + *sizep = curr_size; } diff --git a/src/third_party/wiredtiger/src/include/posix.h b/src/third_party/wiredtiger/src/include/posix.h index 4ad8db1f43a..b0c6e831148 100644 --- a/src/third_party/wiredtiger/src/include/posix.h +++ b/src/third_party/wiredtiger/src/include/posix.h @@ -7,38 +7,38 @@ */ /* Some systems don't configure 64-bit MIN/MAX by default. */ -#ifndef ULLONG_MAX -#define ULLONG_MAX 0xffffffffffffffffULL +#ifndef ULLONG_MAX +#define ULLONG_MAX 0xffffffffffffffffULL #endif -#ifndef LLONG_MAX -#define LLONG_MAX 0x7fffffffffffffffLL +#ifndef LLONG_MAX +#define LLONG_MAX 0x7fffffffffffffffLL #endif -#ifndef LLONG_MIN -#define LLONG_MIN (-0x7fffffffffffffffLL - 1) +#ifndef LLONG_MIN +#define LLONG_MIN (-0x7fffffffffffffffLL - 1) #endif /* Define O_BINARY for Posix systems */ -#define O_BINARY 0 +#define O_BINARY 0 /* * Define WT threading and concurrency primitives */ -typedef pthread_cond_t wt_cond_t; -typedef pthread_mutex_t wt_mutex_t; +typedef pthread_cond_t wt_cond_t; +typedef pthread_mutex_t wt_mutex_t; typedef struct { - bool created; - pthread_t id; + bool created; + pthread_t id; } wt_thread_t; /* * Thread callbacks need to match the platform specific callback types */ /* NOLINTNEXTLINE(misc-macro-parentheses) */ -#define WT_THREAD_CALLBACK(x) void* (x) -#define WT_THREAD_RET void* -#define WT_THREAD_RET_VALUE NULL +#define WT_THREAD_CALLBACK(x) void *(x) +#define WT_THREAD_RET void * +#define WT_THREAD_RET_VALUE NULL /* * WT declaration for calling convention type */ -#define WT_CDECL +#define WT_CDECL diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h index c3c46ec11c5..22f63ae4ff4 100644 --- a/src/third_party/wiredtiger/src/include/reconcile.h +++ b/src/third_party/wiredtiger/src/include/reconcile.h @@ -15,268 +15,259 @@ * Information tracking a single page reconciliation. */ struct __wt_reconcile { - WT_REF *ref; /* Page being reconciled */ - WT_PAGE *page; - uint32_t flags; /* Caller's configuration */ - - /* - * Track start/stop checkpoint generations to decide if lookaside table - * records are correct. - */ - uint64_t orig_btree_checkpoint_gen; - uint64_t orig_txn_checkpoint_gen; - - /* - * Track the oldest running transaction and whether to skew lookaside - * to the newest update. - */ - bool las_skew_newest; - uint64_t last_running; - - /* Track the page's min/maximum transactions. */ - uint64_t max_txn; - wt_timestamp_t max_timestamp; - - /* Lookaside boundary tracking. */ - uint64_t unstable_txn; - wt_timestamp_t unstable_durable_timestamp; - wt_timestamp_t unstable_timestamp; - - u_int updates_seen; /* Count of updates seen. */ - u_int updates_unstable; /* Count of updates not visible_all. */ - - bool update_uncommitted; /* An update was uncommitted. */ - bool update_used; /* An update could be used. */ - - /* All the updates are with prepare in-progress state. */ - bool all_upd_prepare_in_prog; - - /* - * When we can't mark the page clean (for example, checkpoint found some - * uncommitted updates), there's a leave-dirty flag. - */ - bool leave_dirty; - - /* - * Track if reconciliation has seen any overflow items. If a leaf page - * with no overflow items is written, the parent page's address cell is - * set to the leaf-no-overflow type. This means we can delete the leaf - * page without reading it because we don't have to discard any overflow - * items it might reference. - * - * The test test is per-page reconciliation, that is, once we see an - * overflow item on the page, all subsequent leaf pages written for the - * page will not be leaf-no-overflow type, regardless of whether or not - * they contain overflow items. In other words, leaf-no-overflow is not - * guaranteed to be set on every page that doesn't contain an overflow - * item, only that if it is set, the page contains no overflow items. - * XXX - * This was originally done because raw compression couldn't do better, - * now that raw compression has been removed, we should do better. - */ - bool ovfl_items; - - /* - * Track if reconciliation of a row-store leaf page has seen empty (zero - * length) values. We don't write out anything for empty values, so if - * there are empty values on a page, we have to make two passes over the - * page when it's read to figure out how many keys it has, expensive in - * the common case of no empty values and (entries / 2) keys. Likewise, - * a page with only empty values is another common data set, and keys on - * that page will be equal to the number of entries. In both cases, set - * a flag in the page's on-disk header. - * - * The test is per-page reconciliation as described above for the - * overflow-item test. - */ - bool all_empty_value, any_empty_value; - - /* - * Reconciliation gets tricky if we have to split a page, which happens - * when the disk image we create exceeds the page type's maximum disk - * image size. - * - * First, the target size of the page we're building. - */ - uint32_t page_size; /* Page size */ - - /* - * Second, the split size: if we're doing the page layout, split to a - * smaller-than-maximum page size when a split is required so we don't - * repeatedly split a packed page. - */ - uint32_t split_size; /* Split page size */ - uint32_t min_split_size; /* Minimum split page size */ - - /* - * We maintain two split chunks in the memory during reconciliation to - * be written out as pages. As we get to the end of the data, if the - * last one turns out to be smaller than the minimum split size, we go - * back into the penultimate chunk and split at this minimum split size - * boundary. This moves some data from the penultimate chunk to the last - * chunk, hence increasing the size of the last page written without - * decreasing the penultimate page size beyond the minimum split size. - * For this reason, we maintain an expected split percentage boundary - * and a minimum split percentage boundary. - * - * Chunks are referenced by current and previous pointers. In case of a - * split, previous references the first chunk and current switches to - * the second chunk. If reconciliation generates more split chunks, the - * the previous chunk is written to the disk and current and previous - * swap. - */ - struct __wt_rec_chunk { - /* - * The recno and entries fields are the starting record number - * of the split chunk (for column-store splits), and the number - * of entries in the split chunk. - * - * The key for a row-store page; no column-store key is needed - * because the page's recno, stored in the recno field, is the - * column-store key. - */ - uint32_t entries; - uint64_t recno; - WT_ITEM key; - wt_timestamp_t newest_durable_ts; - wt_timestamp_t oldest_start_ts; - uint64_t oldest_start_txn; - wt_timestamp_t newest_stop_ts; - uint64_t newest_stop_txn; - - /* Saved minimum split-size boundary information. */ - uint32_t min_entries; - uint64_t min_recno; - WT_ITEM min_key; - wt_timestamp_t min_newest_durable_ts; - wt_timestamp_t min_oldest_start_ts; - uint64_t min_oldest_start_txn; - wt_timestamp_t min_newest_stop_ts; - uint64_t min_newest_stop_txn; - - size_t min_offset; /* byte offset */ - - WT_ITEM image; /* disk-image */ - } chunkA, chunkB, *cur_ptr, *prev_ptr; - - /* - * We track current information about the current record number, the - * number of entries copied into the disk image buffer, where we are - * in the buffer, how much memory remains, and the current min/max of - * the timestamps. Those values are packaged here rather than passing - * pointers to stack locations around the code. - */ - uint64_t recno; /* Current record number */ - uint32_t entries; /* Current number of entries */ - uint8_t *first_free; /* Current first free byte */ - size_t space_avail; /* Remaining space in this chunk */ - /* Remaining space in this chunk to put a minimum size boundary */ - size_t min_space_avail; - - /* - * Saved update list, supporting the WT_REC_UPDATE_RESTORE and - * WT_REC_LOOKASIDE configurations. While reviewing updates for each - * page, we save WT_UPDATE lists here, and then move them to per-block - * areas as the blocks are defined. - */ - WT_SAVE_UPD *supd; /* Saved updates */ - uint32_t supd_next; - size_t supd_allocated; - size_t supd_memsize; /* Size of saved update structures */ - - /* List of pages we've written so far. */ - WT_MULTI *multi; - uint32_t multi_next; - size_t multi_allocated; - - /* - * Root pages are written when wrapping up the reconciliation, remember - * the image we're going to write. - */ - WT_ITEM *wrapup_checkpoint; - bool wrapup_checkpoint_compressed; - - /* - * We don't need to keep the 0th key around on internal pages, the - * search code ignores them as nothing can sort less by definition. - * There's some trickiness here, see the code for comments on how - * these fields work. - */ - bool cell_zero; /* Row-store internal page 0th key */ - - /* - * We calculate checksums to find previously written identical blocks, - * but once a match fails during an eviction, there's no point trying - * again. - */ - bool evict_matching_checksum_failed; - - /* - * WT_REC_DICTIONARY -- - * We optionally build a dictionary of values for leaf pages. Where - * two value cells are identical, only write the value once, the second - * and subsequent copies point to the original cell. The dictionary is - * fixed size, but organized in a skip-list to make searches faster. - */ - struct __wt_rec_dictionary { - uint64_t hash; /* Hash value */ - uint32_t offset; /* Matching cell */ - - u_int depth; /* Skiplist */ - WT_REC_DICTIONARY *next[0]; - } **dictionary; /* Dictionary */ - u_int dictionary_next, dictionary_slots; /* Next, max entries */ - /* Skiplist head. */ - WT_REC_DICTIONARY *dictionary_head[WT_SKIP_MAXDEPTH]; - - /* - * WT_REC_KV-- - * An on-page key/value item we're building. - */ - struct __wt_rec_kv { - WT_ITEM buf; /* Data */ - WT_CELL cell; /* Cell and cell's length */ - size_t cell_len; - size_t len; /* Total length of cell + data */ - } k, v; /* Key/Value being built */ - - WT_ITEM *cur, _cur; /* Key/Value being built */ - WT_ITEM *last, _last; /* Last key/value built */ - - bool key_pfx_compress; /* If can prefix-compress next key */ - bool key_pfx_compress_conf; /* If prefix compression configured */ - bool key_sfx_compress; /* If can suffix-compress next key */ - bool key_sfx_compress_conf; /* If suffix compression configured */ - - bool is_bulk_load; /* If it's a bulk load */ - - WT_SALVAGE_COOKIE *salvage; /* If it's a salvage operation */ - - bool cache_write_lookaside; /* Used the lookaside table */ - bool cache_write_restore; /* Used update/restoration */ - - uint32_t tested_ref_state; /* Debugging information */ - - /* - * XXX - * In the case of a modified update, we may need a copy of the current - * value as a set of bytes. We call back into the btree code using a - * fake cursor to do that work. This a layering violation and fragile, - * we need a better solution. - */ - WT_CURSOR_BTREE update_modify_cbt; + WT_REF *ref; /* Page being reconciled */ + WT_PAGE *page; + uint32_t flags; /* Caller's configuration */ + + /* + * Track start/stop checkpoint generations to decide if lookaside table records are correct. + */ + uint64_t orig_btree_checkpoint_gen; + uint64_t orig_txn_checkpoint_gen; + + /* + * Track the oldest running transaction and whether to skew lookaside to the newest update. + */ + bool las_skew_newest; + uint64_t last_running; + + /* Track the page's min/maximum transactions. */ + uint64_t max_txn; + wt_timestamp_t max_timestamp; + + /* Lookaside boundary tracking. */ + uint64_t unstable_txn; + wt_timestamp_t unstable_durable_timestamp; + wt_timestamp_t unstable_timestamp; + + u_int updates_seen; /* Count of updates seen. */ + u_int updates_unstable; /* Count of updates not visible_all. */ + + bool update_uncommitted; /* An update was uncommitted. */ + bool update_used; /* An update could be used. */ + + /* All the updates are with prepare in-progress state. */ + bool all_upd_prepare_in_prog; + + /* + * When we can't mark the page clean (for example, checkpoint found some uncommitted updates), + * there's a leave-dirty flag. + */ + bool leave_dirty; + + /* + * Track if reconciliation has seen any overflow items. If a leaf page + * with no overflow items is written, the parent page's address cell is + * set to the leaf-no-overflow type. This means we can delete the leaf + * page without reading it because we don't have to discard any overflow + * items it might reference. + * + * The test test is per-page reconciliation, that is, once we see an + * overflow item on the page, all subsequent leaf pages written for the + * page will not be leaf-no-overflow type, regardless of whether or not + * they contain overflow items. In other words, leaf-no-overflow is not + * guaranteed to be set on every page that doesn't contain an overflow + * item, only that if it is set, the page contains no overflow items. + * XXX + * This was originally done because raw compression couldn't do better, + * now that raw compression has been removed, we should do better. + */ + bool ovfl_items; + + /* + * Track if reconciliation of a row-store leaf page has seen empty (zero + * length) values. We don't write out anything for empty values, so if + * there are empty values on a page, we have to make two passes over the + * page when it's read to figure out how many keys it has, expensive in + * the common case of no empty values and (entries / 2) keys. Likewise, + * a page with only empty values is another common data set, and keys on + * that page will be equal to the number of entries. In both cases, set + * a flag in the page's on-disk header. + * + * The test is per-page reconciliation as described above for the + * overflow-item test. + */ + bool all_empty_value, any_empty_value; + + /* + * Reconciliation gets tricky if we have to split a page, which happens + * when the disk image we create exceeds the page type's maximum disk + * image size. + * + * First, the target size of the page we're building. + */ + uint32_t page_size; /* Page size */ + + /* + * Second, the split size: if we're doing the page layout, split to a smaller-than-maximum page + * size when a split is required so we don't repeatedly split a packed page. + */ + uint32_t split_size; /* Split page size */ + uint32_t min_split_size; /* Minimum split page size */ + + /* + * We maintain two split chunks in the memory during reconciliation to + * be written out as pages. As we get to the end of the data, if the + * last one turns out to be smaller than the minimum split size, we go + * back into the penultimate chunk and split at this minimum split size + * boundary. This moves some data from the penultimate chunk to the last + * chunk, hence increasing the size of the last page written without + * decreasing the penultimate page size beyond the minimum split size. + * For this reason, we maintain an expected split percentage boundary + * and a minimum split percentage boundary. + * + * Chunks are referenced by current and previous pointers. In case of a + * split, previous references the first chunk and current switches to + * the second chunk. If reconciliation generates more split chunks, the + * the previous chunk is written to the disk and current and previous + * swap. + */ + struct __wt_rec_chunk { + /* + * The recno and entries fields are the starting record number + * of the split chunk (for column-store splits), and the number + * of entries in the split chunk. + * + * The key for a row-store page; no column-store key is needed + * because the page's recno, stored in the recno field, is the + * column-store key. + */ + uint32_t entries; + uint64_t recno; + WT_ITEM key; + wt_timestamp_t newest_durable_ts; + wt_timestamp_t oldest_start_ts; + uint64_t oldest_start_txn; + wt_timestamp_t newest_stop_ts; + uint64_t newest_stop_txn; + + /* Saved minimum split-size boundary information. */ + uint32_t min_entries; + uint64_t min_recno; + WT_ITEM min_key; + wt_timestamp_t min_newest_durable_ts; + wt_timestamp_t min_oldest_start_ts; + uint64_t min_oldest_start_txn; + wt_timestamp_t min_newest_stop_ts; + uint64_t min_newest_stop_txn; + + size_t min_offset; /* byte offset */ + + WT_ITEM image; /* disk-image */ + } chunkA, chunkB, *cur_ptr, *prev_ptr; + + /* + * We track current information about the current record number, the number of entries copied + * into the disk image buffer, where we are in the buffer, how much memory remains, and the + * current min/max of the timestamps. Those values are packaged here rather than passing + * pointers to stack locations around the code. + */ + uint64_t recno; /* Current record number */ + uint32_t entries; /* Current number of entries */ + uint8_t *first_free; /* Current first free byte */ + size_t space_avail; /* Remaining space in this chunk */ + /* Remaining space in this chunk to put a minimum size boundary */ + size_t min_space_avail; + + /* + * Saved update list, supporting the WT_REC_UPDATE_RESTORE and WT_REC_LOOKASIDE configurations. + * While reviewing updates for each page, we save WT_UPDATE lists here, and then move them to + * per-block areas as the blocks are defined. + */ + WT_SAVE_UPD *supd; /* Saved updates */ + uint32_t supd_next; + size_t supd_allocated; + size_t supd_memsize; /* Size of saved update structures */ + + /* List of pages we've written so far. */ + WT_MULTI *multi; + uint32_t multi_next; + size_t multi_allocated; + + /* + * Root pages are written when wrapping up the reconciliation, remember the image we're going to + * write. + */ + WT_ITEM *wrapup_checkpoint; + bool wrapup_checkpoint_compressed; + + /* + * We don't need to keep the 0th key around on internal pages, the search code ignores them as + * nothing can sort less by definition. There's some trickiness here, see the code for comments + * on how these fields work. + */ + bool cell_zero; /* Row-store internal page 0th key */ + + /* + * We calculate checksums to find previously written identical blocks, but once a match fails + * during an eviction, there's no point trying again. + */ + bool evict_matching_checksum_failed; + + /* + * WT_REC_DICTIONARY -- + * We optionally build a dictionary of values for leaf pages. Where + * two value cells are identical, only write the value once, the second + * and subsequent copies point to the original cell. The dictionary is + * fixed size, but organized in a skip-list to make searches faster. + */ + struct __wt_rec_dictionary { + uint64_t hash; /* Hash value */ + uint32_t offset; /* Matching cell */ + + u_int depth; /* Skiplist */ + WT_REC_DICTIONARY *next[0]; + } * *dictionary; /* Dictionary */ + u_int dictionary_next, dictionary_slots; /* Next, max entries */ + /* Skiplist head. */ + WT_REC_DICTIONARY *dictionary_head[WT_SKIP_MAXDEPTH]; + + /* + * WT_REC_KV-- + * An on-page key/value item we're building. + */ + struct __wt_rec_kv { + WT_ITEM buf; /* Data */ + WT_CELL cell; /* Cell and cell's length */ + size_t cell_len; + size_t len; /* Total length of cell + data */ + } k, v; /* Key/Value being built */ + + WT_ITEM *cur, _cur; /* Key/Value being built */ + WT_ITEM *last, _last; /* Last key/value built */ + + bool key_pfx_compress; /* If can prefix-compress next key */ + bool key_pfx_compress_conf; /* If prefix compression configured */ + bool key_sfx_compress; /* If can suffix-compress next key */ + bool key_sfx_compress_conf; /* If suffix compression configured */ + + bool is_bulk_load; /* If it's a bulk load */ + + WT_SALVAGE_COOKIE *salvage; /* If it's a salvage operation */ + + bool cache_write_lookaside; /* Used the lookaside table */ + bool cache_write_restore; /* Used update/restoration */ + + uint32_t tested_ref_state; /* Debugging information */ + + /* + * XXX In the case of a modified update, we may need a copy of the current value as a set of + * bytes. We call back into the btree code using a fake cursor to do that work. This a layering + * violation and fragile, we need a better solution. + */ + WT_CURSOR_BTREE update_modify_cbt; }; typedef struct { - WT_UPDATE *upd; /* Update to write (or NULL) */ + WT_UPDATE *upd; /* Update to write (or NULL) */ - wt_timestamp_t durable_ts; /* Transaction IDs, timestamps */ - wt_timestamp_t start_ts; - uint64_t start_txn; - wt_timestamp_t stop_ts; - uint64_t stop_txn; + wt_timestamp_t durable_ts; /* Transaction IDs, timestamps */ + wt_timestamp_t start_ts; + uint64_t start_txn; + wt_timestamp_t stop_ts; + uint64_t stop_txn; - bool upd_saved; /* Updates saved to list */ + bool upd_saved; /* Updates saved to list */ } WT_UPDATE_SELECT; @@ -285,29 +276,28 @@ typedef struct { * Macros to clean up during internal-page reconciliation, releasing the * hazard pointer we're holding on child pages. */ -#define WT_CHILD_RELEASE(session, hazard, ref) do { \ - if (hazard) { \ - (hazard) = false; \ - WT_TRET( \ - __wt_page_release(session, ref, WT_READ_NO_EVICT)); \ - } \ -} while (0) -#define WT_CHILD_RELEASE_ERR(session, hazard, ref) do { \ - WT_CHILD_RELEASE(session, hazard, ref); \ - WT_ERR(ret); \ -} while (0) +#define WT_CHILD_RELEASE(session, hazard, ref) \ + do { \ + if (hazard) { \ + (hazard) = false; \ + WT_TRET(__wt_page_release(session, ref, WT_READ_NO_EVICT)); \ + } \ + } while (0) +#define WT_CHILD_RELEASE_ERR(session, hazard, ref) \ + do { \ + WT_CHILD_RELEASE(session, hazard, ref); \ + WT_ERR(ret); \ + } while (0) typedef enum { - WT_CHILD_IGNORE, /* Ignored child */ - WT_CHILD_MODIFIED, /* Modified child */ - WT_CHILD_ORIGINAL, /* Original child */ - WT_CHILD_PROXY /* Deleted child: proxy */ + WT_CHILD_IGNORE, /* Ignored child */ + WT_CHILD_MODIFIED, /* Modified child */ + WT_CHILD_ORIGINAL, /* Original child */ + WT_CHILD_PROXY /* Deleted child: proxy */ } WT_CHILD_STATE; /* * Macros from fixed-length entries to/from bytes. */ -#define WT_FIX_BYTES_TO_ENTRIES(btree, bytes) \ - ((uint32_t)((((bytes) * 8) / (btree)->bitcnt))) -#define WT_FIX_ENTRIES_TO_BYTES(btree, entries) \ - ((uint32_t)WT_ALIGN((entries) * (btree)->bitcnt, 8)) +#define WT_FIX_BYTES_TO_ENTRIES(btree, bytes) ((uint32_t)((((bytes)*8) / (btree)->bitcnt))) +#define WT_FIX_ENTRIES_TO_BYTES(btree, entries) ((uint32_t)WT_ALIGN((entries) * (btree)->bitcnt, 8)) diff --git a/src/third_party/wiredtiger/src/include/reconcile.i b/src/third_party/wiredtiger/src/include/reconcile.i index ab44ce31d36..eabf9e58c4f 100644 --- a/src/third_party/wiredtiger/src/include/reconcile.i +++ b/src/third_party/wiredtiger/src/include/reconcile.i @@ -6,316 +6,295 @@ * See the file LICENSE for redistribution information. */ -#define WT_CROSSING_MIN_BND(r, next_len) \ - ((r)->cur_ptr->min_offset == 0 && \ - (next_len) > (r)->min_space_avail) -#define WT_CROSSING_SPLIT_BND(r, next_len) ((next_len) > (r)->space_avail) -#define WT_CHECK_CROSSING_BND(r, next_len) \ - (WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len)) +#define WT_CROSSING_MIN_BND(r, next_len) \ + ((r)->cur_ptr->min_offset == 0 && (next_len) > (r)->min_space_avail) +#define WT_CROSSING_SPLIT_BND(r, next_len) ((next_len) > (r)->space_avail) +#define WT_CHECK_CROSSING_BND(r, next_len) \ + (WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len)) /* * __wt_rec_need_split -- - * Check whether adding some bytes to the page requires a split. + * Check whether adding some bytes to the page requires a split. */ static inline bool __wt_rec_need_split(WT_RECONCILE *r, size_t len) { - /* - * In the case of a row-store leaf page, trigger a split if a threshold - * number of saved updates is reached. This allows pages to split for - * update/restore and lookaside eviction when there is no visible data - * causing the disk image to grow. - * - * In the case of small pages or large keys, we might try to split when - * a page has no updates or entries, which isn't possible. To consider - * update/restore or lookaside information, require either page entries - * or updates that will be attached to the image. The limit is one of - * either, but it doesn't make sense to create pages or images with few - * entries or updates, even where page sizes are small (especially as - * updates that will eventually become overflow items can throw off our - * calculations). Bound the combination at something reasonable. - */ - if (r->page->type == WT_PAGE_ROW_LEAF && r->entries + r->supd_next > 10) - len += r->supd_memsize; + /* + * In the case of a row-store leaf page, trigger a split if a threshold + * number of saved updates is reached. This allows pages to split for + * update/restore and lookaside eviction when there is no visible data + * causing the disk image to grow. + * + * In the case of small pages or large keys, we might try to split when + * a page has no updates or entries, which isn't possible. To consider + * update/restore or lookaside information, require either page entries + * or updates that will be attached to the image. The limit is one of + * either, but it doesn't make sense to create pages or images with few + * entries or updates, even where page sizes are small (especially as + * updates that will eventually become overflow items can throw off our + * calculations). Bound the combination at something reasonable. + */ + if (r->page->type == WT_PAGE_ROW_LEAF && r->entries + r->supd_next > 10) + len += r->supd_memsize; - /* Check for the disk image crossing a boundary. */ - return (WT_CHECK_CROSSING_BND(r, len)); + /* Check for the disk image crossing a boundary. */ + return (WT_CHECK_CROSSING_BND(r, len)); } /* * __wt_rec_addr_ts_init -- - * Initialize an address timestamp triplet. + * Initialize an address timestamp triplet. */ static inline void __wt_rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *newest_durable_ts, - wt_timestamp_t *oldest_start_tsp, uint64_t *oldest_start_txnp, - wt_timestamp_t *newest_stop_tsp, uint64_t *newest_stop_txnp) + wt_timestamp_t *oldest_start_tsp, uint64_t *oldest_start_txnp, wt_timestamp_t *newest_stop_tsp, + uint64_t *newest_stop_txnp) { - /* - * If the page format supports address timestamps (and not fixed-length - * column-store, where we don't maintain timestamps at all), set the - * oldest/newest timestamps to values at the end of their expected range - * so they're corrected as we process key/value items. Otherwise, set - * the oldest/newest timestamps to simple durability. - */ - *newest_durable_ts = WT_TS_NONE; - *oldest_start_tsp = WT_TS_MAX; - *oldest_start_txnp = WT_TXN_MAX; - *newest_stop_tsp = WT_TS_NONE; - *newest_stop_txnp = WT_TXN_NONE; - if (!__wt_process.page_version_ts || r->page->type == WT_PAGE_COL_FIX) { - *newest_durable_ts = WT_TS_NONE; - *oldest_start_tsp = WT_TS_NONE; - *oldest_start_txnp = WT_TXN_NONE; - *newest_stop_tsp = WT_TS_MAX; - *newest_stop_txnp = WT_TXN_MAX; - } + /* + * If the page format supports address timestamps (and not fixed-length column-store, where we + * don't maintain timestamps at all), set the oldest/newest timestamps to values at the end of + * their expected range so they're corrected as we process key/value items. Otherwise, set the + * oldest/newest timestamps to simple durability. + */ + *newest_durable_ts = WT_TS_NONE; + *oldest_start_tsp = WT_TS_MAX; + *oldest_start_txnp = WT_TXN_MAX; + *newest_stop_tsp = WT_TS_NONE; + *newest_stop_txnp = WT_TXN_NONE; + if (!__wt_process.page_version_ts || r->page->type == WT_PAGE_COL_FIX) { + *newest_durable_ts = WT_TS_NONE; + *oldest_start_tsp = WT_TS_NONE; + *oldest_start_txnp = WT_TXN_NONE; + *newest_stop_tsp = WT_TS_MAX; + *newest_stop_txnp = WT_TXN_MAX; + } } /* * __wt_rec_addr_ts_update -- - * Update the chunk's timestamp information. + * Update the chunk's timestamp information. */ static inline void __wt_rec_addr_ts_update(WT_RECONCILE *r, wt_timestamp_t newest_durable_ts, - wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, - wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn) + wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t newest_stop_ts, + uint64_t newest_stop_txn) { - r->cur_ptr->newest_durable_ts = - WT_MAX(newest_durable_ts, r->cur_ptr->newest_durable_ts); - r->cur_ptr->oldest_start_ts = - WT_MIN(oldest_start_ts, r->cur_ptr->oldest_start_ts); - r->cur_ptr->oldest_start_txn = - WT_MIN(oldest_start_txn, r->cur_ptr->oldest_start_txn); - r->cur_ptr->newest_stop_ts = - WT_MAX(newest_stop_ts, r->cur_ptr->newest_stop_ts); - r->cur_ptr->newest_stop_txn = - WT_MAX(newest_stop_txn, r->cur_ptr->newest_stop_txn); + r->cur_ptr->newest_durable_ts = WT_MAX(newest_durable_ts, r->cur_ptr->newest_durable_ts); + r->cur_ptr->oldest_start_ts = WT_MIN(oldest_start_ts, r->cur_ptr->oldest_start_ts); + r->cur_ptr->oldest_start_txn = WT_MIN(oldest_start_txn, r->cur_ptr->oldest_start_txn); + r->cur_ptr->newest_stop_ts = WT_MAX(newest_stop_ts, r->cur_ptr->newest_stop_ts); + r->cur_ptr->newest_stop_txn = WT_MAX(newest_stop_txn, r->cur_ptr->newest_stop_txn); } /* * __wt_rec_incr -- - * Update the memory tracking structure for a set of new entries. + * Update the memory tracking structure for a set of new entries. */ static inline void -__wt_rec_incr( - WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size) +__wt_rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size) { - /* - * The buffer code is fragile and prone to off-by-one errors -- check - * for overflow in diagnostic mode. - */ - WT_ASSERT(session, r->space_avail >= size); - WT_ASSERT(session, WT_BLOCK_FITS(r->first_free, size, - r->cur_ptr->image.mem, r->cur_ptr->image.memsize)); + /* + * The buffer code is fragile and prone to off-by-one errors -- check for overflow in diagnostic + * mode. + */ + WT_ASSERT(session, r->space_avail >= size); + WT_ASSERT(session, + WT_BLOCK_FITS(r->first_free, size, r->cur_ptr->image.mem, r->cur_ptr->image.memsize)); - r->entries += v; - r->space_avail -= size; - r->first_free += size; + r->entries += v; + r->space_avail -= size; + r->first_free += size; - /* - * If offset for the minimum split size boundary is not set, we have not - * yet reached the minimum boundary, reduce the space available for it. - */ - if (r->cur_ptr->min_offset == 0) { - if (r->min_space_avail >= size) - r->min_space_avail -= size; - else - r->min_space_avail = 0; - } + /* + * If offset for the minimum split size boundary is not set, we have not yet reached the minimum + * boundary, reduce the space available for it. + */ + if (r->cur_ptr->min_offset == 0) { + if (r->min_space_avail >= size) + r->min_space_avail -= size; + else + r->min_space_avail = 0; + } } /* * __wt_rec_image_copy -- - * Copy a key/value cell and buffer pair into the new image. + * Copy a key/value cell and buffer pair into the new image. */ static inline void __wt_rec_image_copy(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv) { - size_t len; - uint8_t *p, *t; + size_t len; + uint8_t *p, *t; - /* - * If there's only one chunk of data to copy (because the cell and data - * are being copied from the original disk page), the cell length won't - * be set, the WT_ITEM data/length will reference the data to be copied. - * - * WT_CELLs are typically small, 1 or 2 bytes -- don't call memcpy, do - * the copy in-line. - */ - for (p = r->first_free, - t = (uint8_t *)&kv->cell, len = kv->cell_len; len > 0; --len) - *p++ = *t++; + /* + * If there's only one chunk of data to copy (because the cell and data + * are being copied from the original disk page), the cell length won't + * be set, the WT_ITEM data/length will reference the data to be copied. + * + * WT_CELLs are typically small, 1 or 2 bytes -- don't call memcpy, do + * the copy in-line. + */ + for (p = r->first_free, t = (uint8_t *)&kv->cell, len = kv->cell_len; len > 0; --len) + *p++ = *t++; - /* The data can be quite large -- call memcpy. */ - if (kv->buf.size != 0) - memcpy(p, kv->buf.data, kv->buf.size); + /* The data can be quite large -- call memcpy. */ + if (kv->buf.size != 0) + memcpy(p, kv->buf.data, kv->buf.size); - WT_ASSERT(session, kv->len == kv->cell_len + kv->buf.size); - __wt_rec_incr(session, r, 1, kv->len); + WT_ASSERT(session, kv->len == kv->cell_len + kv->buf.size); + __wt_rec_incr(session, r, 1, kv->len); } /* * __wt_rec_cell_build_addr -- - * Process an address reference and return a cell structure to be stored - * on the page. + * Process an address reference and return a cell structure to be stored on the page. */ static inline void -__wt_rec_cell_build_addr(WT_SESSION_IMPL *session, - WT_RECONCILE *r, WT_ADDR *addr, bool proxy_cell, uint64_t recno) +__wt_rec_cell_build_addr( + WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ADDR *addr, bool proxy_cell, uint64_t recno) { - WT_REC_KV *val; - u_int cell_type; + WT_REC_KV *val; + u_int cell_type; - val = &r->v; + val = &r->v; - /* - * Our caller optionally specifies a cell type (deleted proxy cells), - * otherwise go with what we know. - */ - if (proxy_cell) - cell_type = WT_CELL_ADDR_DEL; - else { - switch (addr->type) { - case WT_ADDR_INT: - cell_type = WT_CELL_ADDR_INT; - break; - case WT_ADDR_LEAF: - cell_type = WT_CELL_ADDR_LEAF; - break; - case WT_ADDR_LEAF_NO: - default: - cell_type = WT_CELL_ADDR_LEAF_NO; - break; - } - WT_ASSERT(session, addr->size != 0); - } + /* + * Our caller optionally specifies a cell type (deleted proxy cells), otherwise go with what we + * know. + */ + if (proxy_cell) + cell_type = WT_CELL_ADDR_DEL; + else { + switch (addr->type) { + case WT_ADDR_INT: + cell_type = WT_CELL_ADDR_INT; + break; + case WT_ADDR_LEAF: + cell_type = WT_CELL_ADDR_LEAF; + break; + case WT_ADDR_LEAF_NO: + default: + cell_type = WT_CELL_ADDR_LEAF_NO; + break; + } + WT_ASSERT(session, addr->size != 0); + } - /* - * We don't check the address size because we can't store an address on - * an overflow page: if the address won't fit, the overflow page's - * address won't fit either. This possibility must be handled by Btree - * configuration, we have to disallow internal page sizes that are too - * small with respect to the largest address cookie the underlying block - * manager might return. - */ + /* + * We don't check the address size because we can't store an address on an overflow page: if the + * address won't fit, the overflow page's address won't fit either. This possibility must be + * handled by Btree configuration, we have to disallow internal page sizes that are too small + * with respect to the largest address cookie the underlying block manager might return. + */ - /* - * We don't copy the data into the buffer, it's not necessary; just - * re-point the buffer's data/length fields. - */ - val->buf.data = addr->addr; - val->buf.size = addr->size; - val->cell_len = __wt_cell_pack_addr( - session, &val->cell, cell_type, recno, addr->newest_durable_ts, - addr->oldest_start_ts, addr->oldest_start_txn, - addr->newest_stop_ts, addr->newest_stop_txn, val->buf.size); - val->len = val->cell_len + val->buf.size; + /* + * We don't copy the data into the buffer, it's not necessary; just re-point the buffer's + * data/length fields. + */ + val->buf.data = addr->addr; + val->buf.size = addr->size; + val->cell_len = __wt_cell_pack_addr(session, &val->cell, cell_type, recno, + addr->newest_durable_ts, addr->oldest_start_ts, addr->oldest_start_txn, addr->newest_stop_ts, + addr->newest_stop_txn, val->buf.size); + val->len = val->cell_len + val->buf.size; } /* * __wt_rec_cell_build_val -- - * Process a data item and return a WT_CELL structure and byte string to - * be stored on the page. + * Process a data item and return a WT_CELL structure and byte string to be stored on the page. */ static inline int -__wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, - const void *data, size_t size, - wt_timestamp_t start_ts, uint64_t start_txn, - wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle) +__wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *data, size_t size, + wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, + uint64_t rle) { - WT_BTREE *btree; - WT_REC_KV *val; + WT_BTREE *btree; + WT_REC_KV *val; - btree = S2BT(session); + btree = S2BT(session); - val = &r->v; + val = &r->v; - /* - * We don't copy the data into the buffer, it's not necessary; just - * re-point the buffer's data/length fields. - */ - val->buf.data = data; - val->buf.size = size; + /* + * We don't copy the data into the buffer, it's not necessary; just re-point the buffer's + * data/length fields. + */ + val->buf.data = data; + val->buf.size = size; - /* Handle zero-length cells quickly. */ - if (size != 0) { - /* Optionally compress the data using the Huffman engine. */ - if (btree->huffman_value != NULL) - WT_RET(__wt_huffman_encode( - session, btree->huffman_value, - val->buf.data, (uint32_t)val->buf.size, &val->buf)); + /* Handle zero-length cells quickly. */ + if (size != 0) { + /* Optionally compress the data using the Huffman engine. */ + if (btree->huffman_value != NULL) + WT_RET(__wt_huffman_encode( + session, btree->huffman_value, val->buf.data, (uint32_t)val->buf.size, &val->buf)); - /* Create an overflow object if the data won't fit. */ - if (val->buf.size > btree->maxleafvalue) { - WT_STAT_DATA_INCR(session, rec_overflow_value); + /* Create an overflow object if the data won't fit. */ + if (val->buf.size > btree->maxleafvalue) { + WT_STAT_DATA_INCR(session, rec_overflow_value); - return (__wt_rec_cell_build_ovfl(session, r, val, - WT_CELL_VALUE_OVFL, - start_ts, start_txn, stop_ts, stop_txn, rle)); - } - } - val->cell_len = __wt_cell_pack_value(session, &val->cell, - start_ts, start_txn, stop_ts, stop_txn, rle, val->buf.size); - val->len = val->cell_len + val->buf.size; + return (__wt_rec_cell_build_ovfl( + session, r, val, WT_CELL_VALUE_OVFL, start_ts, start_txn, stop_ts, stop_txn, rle)); + } + } + val->cell_len = __wt_cell_pack_value( + session, &val->cell, start_ts, start_txn, stop_ts, stop_txn, rle, val->buf.size); + val->len = val->cell_len + val->buf.size; - return (0); + return (0); } /* * __wt_rec_dict_replace -- - * Check for a dictionary match. + * Check for a dictionary match. */ static inline int -__wt_rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, - wt_timestamp_t start_ts, uint64_t start_txn, - wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, WT_REC_KV *val) +__wt_rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, wt_timestamp_t start_ts, + uint64_t start_txn, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle, WT_REC_KV *val) { - WT_REC_DICTIONARY *dp; - uint64_t offset; + WT_REC_DICTIONARY *dp; + uint64_t offset; - /* - * We optionally create a dictionary of values and only write a unique - * value once per page, using a special "copy" cell for all subsequent - * copies of the value. We have to do the cell build and resolution at - * this low level because we need physical cell offsets for the page. - * - * Sanity check: short-data cells can be smaller than dictionary-copy - * cells. If the data is already small, don't bother doing the work. - * This isn't just work avoidance: on-page cells can't grow as a result - * of writing a dictionary-copy cell, the reconciliation functions do a - * split-boundary test based on the size required by the value's cell; - * if we grow the cell after that test we'll potentially write off the - * end of the buffer's memory. - */ - if (val->buf.size <= WT_INTPACK32_MAXSIZE) - return (0); - WT_RET(__wt_rec_dictionary_lookup(session, r, val, &dp)); - if (dp == NULL) - return (0); + /* + * We optionally create a dictionary of values and only write a unique + * value once per page, using a special "copy" cell for all subsequent + * copies of the value. We have to do the cell build and resolution at + * this low level because we need physical cell offsets for the page. + * + * Sanity check: short-data cells can be smaller than dictionary-copy + * cells. If the data is already small, don't bother doing the work. + * This isn't just work avoidance: on-page cells can't grow as a result + * of writing a dictionary-copy cell, the reconciliation functions do a + * split-boundary test based on the size required by the value's cell; + * if we grow the cell after that test we'll potentially write off the + * end of the buffer's memory. + */ + if (val->buf.size <= WT_INTPACK32_MAXSIZE) + return (0); + WT_RET(__wt_rec_dictionary_lookup(session, r, val, &dp)); + if (dp == NULL) + return (0); - /* - * If the dictionary offset isn't set, we're creating a new entry in the - * dictionary, set its location. - * - * If the dictionary offset is set, we have a matching value. Create a - * copy cell instead. - */ - if (dp->offset == 0) - dp->offset = WT_PTRDIFF32(r->first_free, r->cur_ptr->image.mem); - else { - /* - * The offset is the byte offset from this cell to the previous, - * matching cell, NOT the byte offset from the beginning of the - * page. - */ - offset = (uint64_t)WT_PTRDIFF(r->first_free, - (uint8_t *)r->cur_ptr->image.mem + dp->offset); - val->len = val->cell_len = __wt_cell_pack_copy(session, - &val->cell, - start_ts, start_txn, stop_ts, stop_txn, rle, offset); - val->buf.data = NULL; - val->buf.size = 0; - } - return (0); + /* + * If the dictionary offset isn't set, we're creating a new entry in the + * dictionary, set its location. + * + * If the dictionary offset is set, we have a matching value. Create a + * copy cell instead. + */ + if (dp->offset == 0) + dp->offset = WT_PTRDIFF32(r->first_free, r->cur_ptr->image.mem); + else { + /* + * The offset is the byte offset from this cell to the previous, matching cell, NOT the byte + * offset from the beginning of the page. + */ + offset = (uint64_t)WT_PTRDIFF(r->first_free, (uint8_t *)r->cur_ptr->image.mem + dp->offset); + val->len = val->cell_len = __wt_cell_pack_copy( + session, &val->cell, start_ts, start_txn, stop_ts, stop_txn, rle, offset); + val->buf.data = NULL; + val->buf.size = 0; + } + return (0); } diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h index cd217fe9c51..a945c895182 100644 --- a/src/third_party/wiredtiger/src/include/schema.h +++ b/src/third_party/wiredtiger/src/include/schema.h @@ -7,44 +7,44 @@ */ /* Character constants for projection plans */ -#define WT_PROJ_KEY 'k' /* Go to key in cursor <arg> */ -#define WT_PROJ_NEXT 'n' /* Process the next item (<arg> repeats) */ -#define WT_PROJ_REUSE 'r' /* Reuse the previous item (<arg> repeats) */ -#define WT_PROJ_SKIP 's' /* Skip a column in the cursor (<arg> repeats) */ -#define WT_PROJ_VALUE 'v' /* Go to the value in cursor <arg> */ +#define WT_PROJ_KEY 'k' /* Go to key in cursor <arg> */ +#define WT_PROJ_NEXT 'n' /* Process the next item (<arg> repeats) */ +#define WT_PROJ_REUSE 'r' /* Reuse the previous item (<arg> repeats) */ +#define WT_PROJ_SKIP 's' /* Skip a column in the cursor (<arg> repeats) */ +#define WT_PROJ_VALUE 'v' /* Go to the value in cursor <arg> */ struct __wt_colgroup { - const char *name; /* Logical name */ - const char *source; /* Underlying data source */ - const char *config; /* Configuration string */ + const char *name; /* Logical name */ + const char *source; /* Underlying data source */ + const char *config; /* Configuration string */ - WT_CONFIG_ITEM colconf; /* List of columns from config */ + WT_CONFIG_ITEM colconf; /* List of columns from config */ }; struct __wt_index { - const char *name; /* Logical name */ - const char *source; /* Underlying data source */ - const char *config; /* Configuration string */ + const char *name; /* Logical name */ + const char *source; /* Underlying data source */ + const char *config; /* Configuration string */ - WT_CONFIG_ITEM colconf; /* List of columns from config */ + WT_CONFIG_ITEM colconf; /* List of columns from config */ - WT_COLLATOR *collator; /* Custom collator */ - int collator_owned; /* Collator is owned by this index */ + WT_COLLATOR *collator; /* Custom collator */ + int collator_owned; /* Collator is owned by this index */ - WT_EXTRACTOR *extractor; /* Custom key extractor */ - int extractor_owned; /* Extractor is owned by this index */ + WT_EXTRACTOR *extractor; /* Custom key extractor */ + int extractor_owned; /* Extractor is owned by this index */ - const char *key_format; /* Key format */ - const char *key_plan; /* Key projection plan */ - const char *value_plan; /* Value projection plan */ + const char *key_format; /* Key format */ + const char *key_plan; /* Key projection plan */ + const char *value_plan; /* Value projection plan */ - const char *idxkey_format; /* Index key format (hides primary) */ - const char *exkey_format; /* Key format for custom extractors */ + const char *idxkey_format; /* Index key format (hides primary) */ + const char *exkey_format; /* Key format for custom extractors */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_INDEX_IMMUTABLE 0x1u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; /* Index configuration flags */ +#define WT_INDEX_IMMUTABLE 0x1u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; /* Index configuration flags */ }; /* @@ -55,81 +55,77 @@ struct __wt_index { * in an index key that can be used to reconstruct the primary key. */ struct __wt_table { - WT_DATA_HANDLE iface; + WT_DATA_HANDLE iface; - const char *plan; - const char *key_format, *value_format; + const char *plan; + const char *key_format, *value_format; - WT_CONFIG_ITEM cgconf, colconf; + WT_CONFIG_ITEM cgconf, colconf; - WT_COLGROUP **cgroups; - WT_INDEX **indices; - size_t idx_alloc; + WT_COLGROUP **cgroups; + WT_INDEX **indices; + size_t idx_alloc; - bool cg_complete, idx_complete, is_simple; - u_int ncolgroups, nindices, nkey_columns; + bool cg_complete, idx_complete, is_simple; + u_int ncolgroups, nindices, nkey_columns; }; /* - * Tables without explicit column groups have a single default column group - * containing all of the columns. + * Tables without explicit column groups have a single default column group containing all of the + * columns. */ -#define WT_COLGROUPS(t) WT_MAX((t)->ncolgroups, 1) +#define WT_COLGROUPS(t) WT_MAX((t)->ncolgroups, 1) /* Helpers for the locked state of the handle list and table locks. */ -#define WT_SESSION_LOCKED_HANDLE_LIST \ - (WT_SESSION_LOCKED_HANDLE_LIST_READ | \ - WT_SESSION_LOCKED_HANDLE_LIST_WRITE) -#define WT_SESSION_LOCKED_TABLE \ - (WT_SESSION_LOCKED_TABLE_READ | \ - WT_SESSION_LOCKED_TABLE_WRITE) -#define WT_SESSION_LOCKED_HOTBACKUP \ - (WT_SESSION_LOCKED_HOTBACKUP_READ | \ - WT_SESSION_LOCKED_HOTBACKUP_WRITE) +#define WT_SESSION_LOCKED_HANDLE_LIST \ + (WT_SESSION_LOCKED_HANDLE_LIST_READ | WT_SESSION_LOCKED_HANDLE_LIST_WRITE) +#define WT_SESSION_LOCKED_TABLE (WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_TABLE_WRITE) +#define WT_SESSION_LOCKED_HOTBACKUP \ + (WT_SESSION_LOCKED_HOTBACKUP_READ | WT_SESSION_LOCKED_HOTBACKUP_WRITE) /* * WT_WITH_LOCK_WAIT -- * Wait for a lock, perform an operation, drop the lock. */ -#define WT_WITH_LOCK_WAIT(session, lock, flag, op) do { \ - if (F_ISSET(session, (flag))) { \ - op; \ - } else { \ - __wt_spin_lock_track(session, lock); \ - F_SET(session, (flag)); \ - op; \ - F_CLR(session, (flag)); \ - __wt_spin_unlock(session, lock); \ - } \ -} while (0) +#define WT_WITH_LOCK_WAIT(session, lock, flag, op) \ + do { \ + if (F_ISSET(session, (flag))) { \ + op; \ + } else { \ + __wt_spin_lock_track(session, lock); \ + F_SET(session, (flag)); \ + op; \ + F_CLR(session, (flag)); \ + __wt_spin_unlock(session, lock); \ + } \ + } while (0) /* * WT_WITH_LOCK_NOWAIT -- * Acquire a lock if available, perform an operation, drop the lock. */ -#define WT_WITH_LOCK_NOWAIT(session, ret, lock, flag, op) do { \ - (ret) = 0; \ - if (F_ISSET(session, (flag))) { \ - op; \ - } else if (((ret) = \ - __wt_spin_trylock_track(session, lock)) == 0) { \ - F_SET(session, (flag)); \ - op; \ - F_CLR(session, (flag)); \ - __wt_spin_unlock(session, lock); \ - } \ -} while (0) +#define WT_WITH_LOCK_NOWAIT(session, ret, lock, flag, op) \ + do { \ + (ret) = 0; \ + if (F_ISSET(session, (flag))) { \ + op; \ + } else if (((ret) = __wt_spin_trylock_track(session, lock)) == 0) { \ + F_SET(session, (flag)); \ + op; \ + F_CLR(session, (flag)); \ + __wt_spin_unlock(session, lock); \ + } \ + } while (0) /* * WT_WITH_CHECKPOINT_LOCK, WT_WITH_CHECKPOINT_LOCK_NOWAIT -- * Acquire the checkpoint lock, perform an operation, drop the lock. */ -#define WT_WITH_CHECKPOINT_LOCK(session, op) \ - WT_WITH_LOCK_WAIT(session, \ - &S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op) -#define WT_WITH_CHECKPOINT_LOCK_NOWAIT(session, ret, op) \ - WT_WITH_LOCK_NOWAIT(session, ret, \ - &S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op) +#define WT_WITH_CHECKPOINT_LOCK(session, op) \ + WT_WITH_LOCK_WAIT(session, &S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op) +#define WT_WITH_CHECKPOINT_LOCK_NOWAIT(session, ret, op) \ + WT_WITH_LOCK_NOWAIT( \ + session, ret, &S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op) /* * WT_WITH_HANDLE_LIST_READ_LOCK -- @@ -141,17 +137,18 @@ struct __wt_table { * discard handles, and we only expect it to be held across short * operations. */ -#define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) do { \ - if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { \ - op; \ - } else { \ - __wt_readlock(session, &S2C(session)->dhandle_lock); \ - F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ - op; \ - F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ - __wt_readunlock(session, &S2C(session)->dhandle_lock); \ - } \ -} while (0) +#define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) \ + do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { \ + op; \ + } else { \ + __wt_readlock(session, &S2C(session)->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + __wt_readunlock(session, &S2C(session)->dhandle_lock); \ + } \ + } while (0) /* * WT_WITH_HANDLE_LIST_WRITE_LOCK -- @@ -159,27 +156,26 @@ struct __wt_table { * operation, drop the lock. The handle list lock is a read-write lock so * the implementation is different to the other lock macros. */ -#define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) do { \ - if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \ - op; \ - } else { \ - WT_ASSERT(session, \ - !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ));\ - __wt_writelock(session, &S2C(session)->dhandle_lock); \ - F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ - op; \ - F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ - __wt_writeunlock(session, &S2C(session)->dhandle_lock); \ - } \ -} while (0) +#define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) \ + do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \ + op; \ + } else { \ + WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ)); \ + __wt_writelock(session, &S2C(session)->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + __wt_writeunlock(session, &S2C(session)->dhandle_lock); \ + } \ + } while (0) /* * WT_WITH_METADATA_LOCK -- * Acquire the metadata lock, perform an operation, drop the lock. */ -#define WT_WITH_METADATA_LOCK(session, op) \ - WT_WITH_LOCK_WAIT(session, \ - &S2C(session)->metadata_lock, WT_SESSION_LOCKED_METADATA, op) +#define WT_WITH_METADATA_LOCK(session, op) \ + WT_WITH_LOCK_WAIT(session, &S2C(session)->metadata_lock, WT_SESSION_LOCKED_METADATA, op) /* * WT_WITH_SCHEMA_LOCK, WT_WITH_SCHEMA_LOCK_NOWAIT -- @@ -187,22 +183,21 @@ struct __wt_table { * Check that we are not already holding some other lock: the schema lock * must be taken first. */ -#define WT_WITH_SCHEMA_LOCK(session, op) do { \ - WT_ASSERT(session, \ - F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \ - !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST | \ - WT_SESSION_NO_SCHEMA_LOCK | WT_SESSION_LOCKED_TABLE)); \ - WT_WITH_LOCK_WAIT(session, \ - &S2C(session)->schema_lock, WT_SESSION_LOCKED_SCHEMA, op); \ -} while (0) -#define WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret, op) do { \ - WT_ASSERT(session, \ - F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \ - !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST | \ - WT_SESSION_NO_SCHEMA_LOCK | WT_SESSION_LOCKED_TABLE)); \ - WT_WITH_LOCK_NOWAIT(session, ret, \ - &S2C(session)->schema_lock, WT_SESSION_LOCKED_SCHEMA, op); \ -} while (0) +#define WT_WITH_SCHEMA_LOCK(session, op) \ + do { \ + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \ + !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_NO_SCHEMA_LOCK | \ + WT_SESSION_LOCKED_TABLE)); \ + WT_WITH_LOCK_WAIT(session, &S2C(session)->schema_lock, WT_SESSION_LOCKED_SCHEMA, op); \ + } while (0) +#define WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret, op) \ + do { \ + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \ + !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_NO_SCHEMA_LOCK | \ + WT_SESSION_LOCKED_TABLE)); \ + WT_WITH_LOCK_NOWAIT( \ + session, ret, &S2C(session)->schema_lock, WT_SESSION_LOCKED_SCHEMA, op); \ + } while (0) /* * WT_WITH_TABLE_READ_LOCK, WT_WITH_TABLE_WRITE_LOCK, @@ -215,49 +210,47 @@ struct __wt_table { * to discard handles, and we only expect it to be held across short * operations. */ -#define WT_WITH_TABLE_READ_LOCK(session, op) do { \ - if (F_ISSET(session, WT_SESSION_LOCKED_TABLE)) { \ - op; \ - } else { \ - WT_ASSERT(session, \ - !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \ - __wt_readlock(session, &S2C(session)->table_lock); \ - F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \ - op; \ - F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \ - __wt_readunlock(session, &S2C(session)->table_lock); \ - } \ -} while (0) +#define WT_WITH_TABLE_READ_LOCK(session, op) \ + do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_TABLE)) { \ + op; \ + } else { \ + WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \ + __wt_readlock(session, &S2C(session)->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \ + __wt_readunlock(session, &S2C(session)->table_lock); \ + } \ + } while (0) -#define WT_WITH_TABLE_WRITE_LOCK(session, op) do { \ - if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \ - op; \ - } else { \ - WT_ASSERT(session, \ - !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | \ - WT_SESSION_LOCKED_HANDLE_LIST)); \ - __wt_writelock(session, &S2C(session)->table_lock); \ - F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ - op; \ - F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ - __wt_writeunlock(session, &S2C(session)->table_lock); \ - } \ -} while (0) -#define WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, op) do { \ - WT_ASSERT(session, \ - F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) || \ - !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | \ - WT_SESSION_LOCKED_HANDLE_LIST)); \ - if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \ - op; \ - } else if (((ret) = __wt_try_writelock(session, \ - &S2C(session)->table_lock)) == 0) { \ - F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ - op; \ - F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ - __wt_writeunlock(session, &S2C(session)->table_lock); \ - } \ -} while (0) +#define WT_WITH_TABLE_WRITE_LOCK(session, op) \ + do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \ + op; \ + } else { \ + WT_ASSERT(session, \ + !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \ + __wt_writelock(session, &S2C(session)->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + __wt_writeunlock(session, &S2C(session)->table_lock); \ + } \ + } while (0) +#define WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, op) \ + do { \ + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) || \ + !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \ + if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \ + op; \ + } else if (((ret) = __wt_try_writelock(session, &S2C(session)->table_lock)) == 0) { \ + F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + __wt_writeunlock(session, &S2C(session)->table_lock); \ + } \ + } while (0) /* * WT_WITH_HOTBACKUP_READ_LOCK -- @@ -265,48 +258,48 @@ struct __wt_table { * there is no hot backup in progress. The skipp parameter can be used to * check whether the operation got skipped or not. */ -#define WT_WITH_HOTBACKUP_READ_LOCK(session, op, skipp) do { \ - WT_CONNECTION_IMPL *__conn = S2C(session); \ - if ((skipp) != (bool *)NULL) \ - *(bool *)(skipp) = true; \ - if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \ - if (!__conn->hot_backup) { \ - if ((skipp) != (bool *)NULL) \ - *(bool *)(skipp) = false; \ - op; \ - } \ - } else { \ - __wt_readlock(session, &__conn->hot_backup_lock); \ - F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \ - if (!__conn->hot_backup) { \ - if ((skipp) != (bool *)NULL) \ - *(bool *)(skipp) = false; \ - op; \ - } \ - F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \ - __wt_readunlock(session, &__conn->hot_backup_lock); \ - } \ -} while (0) +#define WT_WITH_HOTBACKUP_READ_LOCK(session, op, skipp) \ + do { \ + WT_CONNECTION_IMPL *__conn = S2C(session); \ + if ((skipp) != (bool *)NULL) \ + *(bool *)(skipp) = true; \ + if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \ + if (!__conn->hot_backup) { \ + if ((skipp) != (bool *)NULL) \ + *(bool *)(skipp) = false; \ + op; \ + } \ + } else { \ + __wt_readlock(session, &__conn->hot_backup_lock); \ + F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \ + if (!__conn->hot_backup) { \ + if ((skipp) != (bool *)NULL) \ + *(bool *)(skipp) = false; \ + op; \ + } \ + F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \ + __wt_readunlock(session, &__conn->hot_backup_lock); \ + } \ + } while (0) /* * WT_WITH_HOTBACKUP_WRITE_LOCK -- * Acquire the hot backup write lock and perform an operation. */ -#define WT_WITH_HOTBACKUP_WRITE_LOCK(session, op) do { \ - WT_CONNECTION_IMPL *__conn = S2C(session); \ - if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE)) { \ - op; \ - } else { \ - WT_ASSERT(session, \ - !F_ISSET( \ - session, WT_SESSION_LOCKED_HOTBACKUP_READ)); \ - __wt_writelock(session, &__conn->hot_backup_lock); \ - F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \ - op; \ - F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \ - __wt_writeunlock(session, &__conn->hot_backup_lock); \ - } \ -} while (0) +#define WT_WITH_HOTBACKUP_WRITE_LOCK(session, op) \ + do { \ + WT_CONNECTION_IMPL *__conn = S2C(session); \ + if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE)) { \ + op; \ + } else { \ + WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP_READ)); \ + __wt_writelock(session, &__conn->hot_backup_lock); \ + F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \ + __wt_writeunlock(session, &__conn->hot_backup_lock); \ + } \ + } while (0) /* * WT_WITH_HOTBACKUP_READ_LOCK_UNCOND -- @@ -316,87 +309,83 @@ struct __wt_table { * WT_WITH_HOTBACKUP_READ_LOCK which checks that there is no hot backup in * progress. */ -#define WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session, op) do { \ - WT_CONNECTION_IMPL *__conn = S2C(session); \ - if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \ - op; \ - } else { \ - __wt_readlock(session, &__conn->hot_backup_lock); \ - F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \ - op; \ - F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \ - __wt_readunlock(session, &__conn->hot_backup_lock); \ - } \ -} while (0) +#define WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session, op) \ + do { \ + WT_CONNECTION_IMPL *__conn = S2C(session); \ + if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \ + op; \ + } else { \ + __wt_readlock(session, &__conn->hot_backup_lock); \ + F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \ + __wt_readunlock(session, &__conn->hot_backup_lock); \ + } \ + } while (0) /* * WT_WITHOUT_LOCKS -- * Drop the handle, table and/or schema locks, perform an operation, * re-acquire the lock(s). */ -#define WT_WITHOUT_LOCKS(session, op) do { \ - WT_CONNECTION_IMPL *__conn = S2C(session); \ - bool __checkpoint_locked = \ - F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \ - bool __handle_read_locked = \ - F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ - bool __handle_write_locked = \ - F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ - bool __table_read_locked = \ - F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ); \ - bool __table_write_locked = \ - F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ - bool __schema_locked = \ - F_ISSET(session, WT_SESSION_LOCKED_SCHEMA); \ - if (__handle_read_locked) { \ - F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ - __wt_readunlock(session, &__conn->dhandle_lock); \ - } \ - if (__handle_write_locked) { \ - F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ - __wt_writeunlock(session, &__conn->dhandle_lock); \ - } \ - if (__table_read_locked) { \ - F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \ - __wt_readunlock(session, &__conn->table_lock); \ - } \ - if (__table_write_locked) { \ - F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ - __wt_writeunlock(session, &__conn->table_lock); \ - } \ - if (__schema_locked) { \ - F_CLR(session, WT_SESSION_LOCKED_SCHEMA); \ - __wt_spin_unlock(session, &__conn->schema_lock); \ - } \ - if (__checkpoint_locked) { \ - F_CLR(session, WT_SESSION_LOCKED_CHECKPOINT); \ - __wt_spin_unlock(session, &__conn->checkpoint_lock); \ - } \ - __wt_yield(); \ - op; \ - __wt_yield(); \ - if (__checkpoint_locked) { \ - __wt_spin_lock(session, &__conn->checkpoint_lock); \ - F_SET(session, WT_SESSION_LOCKED_CHECKPOINT); \ - } \ - if (__schema_locked) { \ - __wt_spin_lock(session, &__conn->schema_lock); \ - F_SET(session, WT_SESSION_LOCKED_SCHEMA); \ - } \ - if (__table_read_locked) { \ - __wt_readlock(session, &__conn->table_lock); \ - F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \ - } \ - if (__table_write_locked) { \ - __wt_writelock(session, &__conn->table_lock); \ - F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ - } \ - if (__handle_read_locked) { \ - __wt_readlock(session, &__conn->dhandle_lock); \ - F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ - } \ - if (__handle_write_locked) { \ - __wt_writelock(session, &__conn->dhandle_lock); \ - F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ - } \ -} while (0) +#define WT_WITHOUT_LOCKS(session, op) \ + do { \ + WT_CONNECTION_IMPL *__conn = S2C(session); \ + bool __checkpoint_locked = F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \ + bool __handle_read_locked = F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + bool __handle_write_locked = F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + bool __table_read_locked = F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ); \ + bool __table_write_locked = F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + bool __schema_locked = F_ISSET(session, WT_SESSION_LOCKED_SCHEMA); \ + if (__handle_read_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + __wt_readunlock(session, &__conn->dhandle_lock); \ + } \ + if (__handle_write_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + __wt_writeunlock(session, &__conn->dhandle_lock); \ + } \ + if (__table_read_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \ + __wt_readunlock(session, &__conn->table_lock); \ + } \ + if (__table_write_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + __wt_writeunlock(session, &__conn->table_lock); \ + } \ + if (__schema_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_SCHEMA); \ + __wt_spin_unlock(session, &__conn->schema_lock); \ + } \ + if (__checkpoint_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_CHECKPOINT); \ + __wt_spin_unlock(session, &__conn->checkpoint_lock); \ + } \ + __wt_yield(); \ + op; \ + __wt_yield(); \ + if (__checkpoint_locked) { \ + __wt_spin_lock(session, &__conn->checkpoint_lock); \ + F_SET(session, WT_SESSION_LOCKED_CHECKPOINT); \ + } \ + if (__schema_locked) { \ + __wt_spin_lock(session, &__conn->schema_lock); \ + F_SET(session, WT_SESSION_LOCKED_SCHEMA); \ + } \ + if (__table_read_locked) { \ + __wt_readlock(session, &__conn->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \ + } \ + if (__table_write_locked) { \ + __wt_writelock(session, &__conn->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + } \ + if (__handle_read_locked) { \ + __wt_readlock(session, &__conn->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + } \ + if (__handle_write_locked) { \ + __wt_writelock(session, &__conn->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + } \ + } while (0) diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i index 701f73df84f..4f8d6ac6611 100644 --- a/src/third_party/wiredtiger/src/include/serial.i +++ b/src/third_party/wiredtiger/src/include/serial.i @@ -8,302 +8,289 @@ /* * __insert_simple_func -- - * Worker function to add a WT_INSERT entry to the middle of a skiplist. + * Worker function to add a WT_INSERT entry to the middle of a skiplist. */ static inline int -__insert_simple_func(WT_SESSION_IMPL *session, - WT_INSERT ***ins_stack, WT_INSERT *new_ins, u_int skipdepth) +__insert_simple_func( + WT_SESSION_IMPL *session, WT_INSERT ***ins_stack, WT_INSERT *new_ins, u_int skipdepth) { - u_int i; - - WT_UNUSED(session); - - /* - * Update the skiplist elements referencing the new WT_INSERT item. - * If we fail connecting one of the upper levels in the skiplist, - * return success: the levels we updated are correct and sufficient. - * Even though we don't get the benefit of the memory we allocated, - * we can't roll back. - * - * All structure setup must be flushed before the structure is entered - * into the list. We need a write barrier here, our callers depend on - * it. Don't pass complex arguments to the macro, some implementations - * read the old value multiple times. - */ - for (i = 0; i < skipdepth; i++) { - WT_INSERT *old_ins = *ins_stack[i]; - if (old_ins != new_ins->next[i] || - !__wt_atomic_cas_ptr(ins_stack[i], old_ins, new_ins)) - return (i == 0 ? WT_RESTART : 0); - } - - return (0); + u_int i; + + WT_UNUSED(session); + + /* + * Update the skiplist elements referencing the new WT_INSERT item. + * If we fail connecting one of the upper levels in the skiplist, + * return success: the levels we updated are correct and sufficient. + * Even though we don't get the benefit of the memory we allocated, + * we can't roll back. + * + * All structure setup must be flushed before the structure is entered + * into the list. We need a write barrier here, our callers depend on + * it. Don't pass complex arguments to the macro, some implementations + * read the old value multiple times. + */ + for (i = 0; i < skipdepth; i++) { + WT_INSERT *old_ins = *ins_stack[i]; + if (old_ins != new_ins->next[i] || !__wt_atomic_cas_ptr(ins_stack[i], old_ins, new_ins)) + return (i == 0 ? WT_RESTART : 0); + } + + return (0); } /* * __insert_serial_func -- - * Worker function to add a WT_INSERT entry to a skiplist. + * Worker function to add a WT_INSERT entry to a skiplist. */ static inline int -__insert_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head, - WT_INSERT ***ins_stack, WT_INSERT *new_ins, u_int skipdepth) +__insert_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, + WT_INSERT *new_ins, u_int skipdepth) { - u_int i; - - /* The cursor should be positioned. */ - WT_ASSERT(session, ins_stack[0] != NULL); - - /* - * Update the skiplist elements referencing the new WT_INSERT item. - * - * Confirm we are still in the expected position, and no item has been - * added where our insert belongs. If we fail connecting one of the - * upper levels in the skiplist, return success: the levels we updated - * are correct and sufficient. Even though we don't get the benefit of - * the memory we allocated, we can't roll back. - * - * All structure setup must be flushed before the structure is entered - * into the list. We need a write barrier here, our callers depend on - * it. Don't pass complex arguments to the macro, some implementations - * read the old value multiple times. - */ - for (i = 0; i < skipdepth; i++) { - WT_INSERT *old_ins = *ins_stack[i]; - if (old_ins != new_ins->next[i] || - !__wt_atomic_cas_ptr(ins_stack[i], old_ins, new_ins)) - return (i == 0 ? WT_RESTART : 0); - if (ins_head->tail[i] == NULL || - ins_stack[i] == &ins_head->tail[i]->next[i]) - ins_head->tail[i] = new_ins; - } - - return (0); + u_int i; + + /* The cursor should be positioned. */ + WT_ASSERT(session, ins_stack[0] != NULL); + + /* + * Update the skiplist elements referencing the new WT_INSERT item. + * + * Confirm we are still in the expected position, and no item has been + * added where our insert belongs. If we fail connecting one of the + * upper levels in the skiplist, return success: the levels we updated + * are correct and sufficient. Even though we don't get the benefit of + * the memory we allocated, we can't roll back. + * + * All structure setup must be flushed before the structure is entered + * into the list. We need a write barrier here, our callers depend on + * it. Don't pass complex arguments to the macro, some implementations + * read the old value multiple times. + */ + for (i = 0; i < skipdepth; i++) { + WT_INSERT *old_ins = *ins_stack[i]; + if (old_ins != new_ins->next[i] || !__wt_atomic_cas_ptr(ins_stack[i], old_ins, new_ins)) + return (i == 0 ? WT_RESTART : 0); + if (ins_head->tail[i] == NULL || ins_stack[i] == &ins_head->tail[i]->next[i]) + ins_head->tail[i] = new_ins; + } + + return (0); } /* * __col_append_serial_func -- - * Worker function to allocate a record number as necessary, then add a - * WT_INSERT entry to a skiplist. + * Worker function to allocate a record number as necessary, then add a WT_INSERT entry to a + * skiplist. */ static inline int -__col_append_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head, - WT_INSERT ***ins_stack, WT_INSERT *new_ins, uint64_t *recnop, - u_int skipdepth) +__col_append_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, + WT_INSERT *new_ins, uint64_t *recnop, u_int skipdepth) { - WT_BTREE *btree; - uint64_t recno; - u_int i; - - btree = S2BT(session); - - /* - * If the application didn't specify a record number, allocate a new one - * and set up for an append. - */ - if ((recno = WT_INSERT_RECNO(new_ins)) == WT_RECNO_OOB) { - recno = WT_INSERT_RECNO(new_ins) = btree->last_recno + 1; - WT_ASSERT(session, WT_SKIP_LAST(ins_head) == NULL || - recno > WT_INSERT_RECNO(WT_SKIP_LAST(ins_head))); - for (i = 0; i < skipdepth; i++) - ins_stack[i] = ins_head->tail[i] == NULL ? - &ins_head->head[i] : &ins_head->tail[i]->next[i]; - } - - /* Confirm position and insert the new WT_INSERT item. */ - WT_RET(__insert_serial_func( - session, ins_head, ins_stack, new_ins, skipdepth)); - - /* - * Set the calling cursor's record number. - * If we extended the file, update the last record number. - */ - *recnop = recno; - if (recno > btree->last_recno) - btree->last_recno = recno; - - return (0); + WT_BTREE *btree; + uint64_t recno; + u_int i; + + btree = S2BT(session); + + /* + * If the application didn't specify a record number, allocate a new one and set up for an + * append. + */ + if ((recno = WT_INSERT_RECNO(new_ins)) == WT_RECNO_OOB) { + recno = WT_INSERT_RECNO(new_ins) = btree->last_recno + 1; + WT_ASSERT(session, + WT_SKIP_LAST(ins_head) == NULL || recno > WT_INSERT_RECNO(WT_SKIP_LAST(ins_head))); + for (i = 0; i < skipdepth; i++) + ins_stack[i] = + ins_head->tail[i] == NULL ? &ins_head->head[i] : &ins_head->tail[i]->next[i]; + } + + /* Confirm position and insert the new WT_INSERT item. */ + WT_RET(__insert_serial_func(session, ins_head, ins_stack, new_ins, skipdepth)); + + /* + * Set the calling cursor's record number. If we extended the file, update the last record + * number. + */ + *recnop = recno; + if (recno > btree->last_recno) + btree->last_recno = recno; + + return (0); } /* * __wt_col_append_serial -- - * Append a new column-store entry. + * Append a new column-store entry. */ static inline int -__wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, - WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, - size_t new_ins_size, uint64_t *recnop, u_int skipdepth, bool exclusive) +__wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head, + WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size, uint64_t *recnop, + u_int skipdepth, bool exclusive) { - WT_DECL_RET; - WT_INSERT *new_ins; - - /* Clear references to memory we now own and must free on error. */ - new_ins = *new_insp; - *new_insp = NULL; - - /* - * Acquire the page's spinlock unless we already have exclusive access. - * Then call the worker function. - */ - if (!exclusive) - WT_PAGE_LOCK(session, page); - ret = __col_append_serial_func( - session, ins_head, ins_stack, new_ins, recnop, skipdepth); - if (!exclusive) - WT_PAGE_UNLOCK(session, page); - - if (ret != 0) { - /* Free unused memory on error. */ - __wt_free(session, new_ins); - return (ret); - } - - /* - * Increment in-memory footprint after releasing the mutex: that's safe - * because the structures we added cannot be discarded while visible to - * any running transaction, and we're a running transaction, which means - * there can be no corresponding delete until we complete. - */ - __wt_cache_page_inmem_incr(session, page, new_ins_size); - - /* Mark the page dirty after updating the footprint. */ - __wt_page_modify_set(session, page); - - return (0); + WT_DECL_RET; + WT_INSERT *new_ins; + + /* Clear references to memory we now own and must free on error. */ + new_ins = *new_insp; + *new_insp = NULL; + + /* + * Acquire the page's spinlock unless we already have exclusive access. Then call the worker + * function. + */ + if (!exclusive) + WT_PAGE_LOCK(session, page); + ret = __col_append_serial_func(session, ins_head, ins_stack, new_ins, recnop, skipdepth); + if (!exclusive) + WT_PAGE_UNLOCK(session, page); + + if (ret != 0) { + /* Free unused memory on error. */ + __wt_free(session, new_ins); + return (ret); + } + + /* + * Increment in-memory footprint after releasing the mutex: that's safe because the structures + * we added cannot be discarded while visible to any running transaction, and we're a running + * transaction, which means there can be no corresponding delete until we complete. + */ + __wt_cache_page_inmem_incr(session, page, new_ins_size); + + /* Mark the page dirty after updating the footprint. */ + __wt_page_modify_set(session, page); + + return (0); } /* * __wt_insert_serial -- - * Insert a row or column-store entry. + * Insert a row or column-store entry. */ static inline int -__wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, - WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, - size_t new_ins_size, u_int skipdepth, bool exclusive) +__wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head, + WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size, u_int skipdepth, + bool exclusive) { - WT_DECL_RET; - WT_INSERT *new_ins; - u_int i; - bool simple; - - /* Clear references to memory we now own and must free on error. */ - new_ins = *new_insp; - *new_insp = NULL; - - simple = true; - for (i = 0; i < skipdepth; i++) - if (new_ins->next[i] == NULL) - simple = false; - - if (simple) - ret = __insert_simple_func( - session, ins_stack, new_ins, skipdepth); - else { - if (!exclusive) - WT_PAGE_LOCK(session, page); - ret = __insert_serial_func( - session, ins_head, ins_stack, new_ins, skipdepth); - if (!exclusive) - WT_PAGE_UNLOCK(session, page); - } - - if (ret != 0) { - /* Free unused memory on error. */ - __wt_free(session, new_ins); - return (ret); - } - - /* - * Increment in-memory footprint after releasing the mutex: that's safe - * because the structures we added cannot be discarded while visible to - * any running transaction, and we're a running transaction, which means - * there can be no corresponding delete until we complete. - */ - __wt_cache_page_inmem_incr(session, page, new_ins_size); - - /* Mark the page dirty after updating the footprint. */ - __wt_page_modify_set(session, page); - - return (0); + WT_DECL_RET; + WT_INSERT *new_ins; + u_int i; + bool simple; + + /* Clear references to memory we now own and must free on error. */ + new_ins = *new_insp; + *new_insp = NULL; + + simple = true; + for (i = 0; i < skipdepth; i++) + if (new_ins->next[i] == NULL) + simple = false; + + if (simple) + ret = __insert_simple_func(session, ins_stack, new_ins, skipdepth); + else { + if (!exclusive) + WT_PAGE_LOCK(session, page); + ret = __insert_serial_func(session, ins_head, ins_stack, new_ins, skipdepth); + if (!exclusive) + WT_PAGE_UNLOCK(session, page); + } + + if (ret != 0) { + /* Free unused memory on error. */ + __wt_free(session, new_ins); + return (ret); + } + + /* + * Increment in-memory footprint after releasing the mutex: that's safe because the structures + * we added cannot be discarded while visible to any running transaction, and we're a running + * transaction, which means there can be no corresponding delete until we complete. + */ + __wt_cache_page_inmem_incr(session, page, new_ins_size); + + /* Mark the page dirty after updating the footprint. */ + __wt_page_modify_set(session, page); + + return (0); } /* * __wt_update_serial -- - * Update a row or column-store entry. + * Update a row or column-store entry. */ static inline int -__wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, - WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size, bool exclusive) +__wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE **srch_upd, WT_UPDATE **updp, + size_t upd_size, bool exclusive) { - WT_DECL_RET; - WT_UPDATE *obsolete, *upd; - wt_timestamp_t obsolete_timestamp; - uint64_t txn; - - /* Clear references to memory we now own and must free on error. */ - upd = *updp; - *updp = NULL; - - /* - * All structure setup must be flushed before the structure is entered - * into the list. We need a write barrier here, our callers depend on - * it. - * - * Swap the update into place. If that fails, a new update was added - * after our search, we raced. Check if our update is still permitted. - */ - while (!__wt_atomic_cas_ptr(srch_upd, upd->next, upd)) { - if ((ret = __wt_txn_update_check( - session, upd->next = *srch_upd)) != 0) { - /* Free unused memory on error. */ - __wt_free(session, upd); - return (ret); - } - } - - /* - * Increment in-memory footprint after swapping the update into place. - * Safe because the structures we added cannot be discarded while - * visible to any running transaction, and we're a running transaction, - * which means there can be no corresponding delete until we complete. - */ - __wt_cache_page_inmem_incr(session, page, upd_size); - - /* Mark the page dirty after updating the footprint. */ - __wt_page_modify_set(session, page); - - /* If there are no subsequent WT_UPDATE structures we are done here. */ - if (upd->next == NULL || exclusive) - return (0); - - /* - * We would like to call __wt_txn_update_oldest only in the event that - * there are further updates to this page, the check against WT_TXN_NONE - * is used as an indicator of there being further updates on this page. - */ - if ((txn = page->modify->obsolete_check_txn) != WT_TXN_NONE) { - obsolete_timestamp = page->modify->obsolete_check_timestamp; - if (!__wt_txn_visible_all(session, txn, obsolete_timestamp)) { - /* Try to move the oldest ID forward and re-check. */ - WT_RET(__wt_txn_update_oldest(session, 0)); - - if (!__wt_txn_visible_all( - session, txn, obsolete_timestamp)) - return (0); - } - - page->modify->obsolete_check_txn = WT_TXN_NONE; - } - - /* If we can't lock it, don't scan, that's okay. */ - if (WT_PAGE_TRYLOCK(session, page) != 0) - return (0); - - obsolete = __wt_update_obsolete_check(session, page, upd->next, true); - - WT_PAGE_UNLOCK(session, page); - - if (obsolete != NULL) - __wt_free_update_list(session, obsolete); - - return (0); + WT_DECL_RET; + WT_UPDATE *obsolete, *upd; + wt_timestamp_t obsolete_timestamp; + uint64_t txn; + + /* Clear references to memory we now own and must free on error. */ + upd = *updp; + *updp = NULL; + + /* + * All structure setup must be flushed before the structure is entered + * into the list. We need a write barrier here, our callers depend on + * it. + * + * Swap the update into place. If that fails, a new update was added + * after our search, we raced. Check if our update is still permitted. + */ + while (!__wt_atomic_cas_ptr(srch_upd, upd->next, upd)) { + if ((ret = __wt_txn_update_check(session, upd->next = *srch_upd)) != 0) { + /* Free unused memory on error. */ + __wt_free(session, upd); + return (ret); + } + } + + /* + * Increment in-memory footprint after swapping the update into place. Safe because the + * structures we added cannot be discarded while visible to any running transaction, and we're a + * running transaction, which means there can be no corresponding delete until we complete. + */ + __wt_cache_page_inmem_incr(session, page, upd_size); + + /* Mark the page dirty after updating the footprint. */ + __wt_page_modify_set(session, page); + + /* If there are no subsequent WT_UPDATE structures we are done here. */ + if (upd->next == NULL || exclusive) + return (0); + + /* + * We would like to call __wt_txn_update_oldest only in the event that there are further updates + * to this page, the check against WT_TXN_NONE is used as an indicator of there being further + * updates on this page. + */ + if ((txn = page->modify->obsolete_check_txn) != WT_TXN_NONE) { + obsolete_timestamp = page->modify->obsolete_check_timestamp; + if (!__wt_txn_visible_all(session, txn, obsolete_timestamp)) { + /* Try to move the oldest ID forward and re-check. */ + WT_RET(__wt_txn_update_oldest(session, 0)); + + if (!__wt_txn_visible_all(session, txn, obsolete_timestamp)) + return (0); + } + + page->modify->obsolete_check_txn = WT_TXN_NONE; + } + + /* If we can't lock it, don't scan, that's okay. */ + if (WT_PAGE_TRYLOCK(session, page) != 0) + return (0); + + obsolete = __wt_update_obsolete_check(session, page, upd->next, true); + + WT_PAGE_UNLOCK(session, page); + + if (obsolete != NULL) + __wt_free_update_list(session, obsolete); + + return (0); } diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index c7ae31b4e54..20428dadf1b 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -12,10 +12,10 @@ * cursors. */ struct __wt_data_handle_cache { - WT_DATA_HANDLE *dhandle; + WT_DATA_HANDLE *dhandle; - TAILQ_ENTRY(__wt_data_handle_cache) q; - TAILQ_ENTRY(__wt_data_handle_cache) hashq; + TAILQ_ENTRY(__wt_data_handle_cache) q; + TAILQ_ENTRY(__wt_data_handle_cache) hashq; }; /* @@ -23,255 +23,247 @@ struct __wt_data_handle_cache { * A hazard pointer. */ struct __wt_hazard { - WT_REF *ref; /* Page reference */ + WT_REF *ref; /* Page reference */ #ifdef HAVE_DIAGNOSTIC - const char *func; /* Function/line hazard acquired */ - int line; + const char *func; /* Function/line hazard acquired */ + int line; #endif }; /* Get the connection implementation for a session */ -#define S2C(session) ((WT_CONNECTION_IMPL *)(session)->iface.connection) +#define S2C(session) ((WT_CONNECTION_IMPL *)(session)->iface.connection) /* Get the btree for a session */ -#define S2BT(session) ((WT_BTREE *)(session)->dhandle->handle) -#define S2BT_SAFE(session) ((session)->dhandle == NULL ? NULL : S2BT(session)) +#define S2BT(session) ((WT_BTREE *)(session)->dhandle->handle) +#define S2BT_SAFE(session) ((session)->dhandle == NULL ? NULL : S2BT(session)) -typedef TAILQ_HEAD(__wt_cursor_list, __wt_cursor) WT_CURSOR_LIST; +typedef TAILQ_HEAD(__wt_cursor_list, __wt_cursor) WT_CURSOR_LIST; /* Number of cursors cached to trigger cursor sweep. */ -#define WT_SESSION_CURSOR_SWEEP_COUNTDOWN 40 +#define WT_SESSION_CURSOR_SWEEP_COUNTDOWN 40 /* Minimum number of buckets to visit during cursor sweep. */ -#define WT_SESSION_CURSOR_SWEEP_MIN 5 +#define WT_SESSION_CURSOR_SWEEP_MIN 5 /* Maximum number of buckets to visit during cursor sweep. */ -#define WT_SESSION_CURSOR_SWEEP_MAX 32 +#define WT_SESSION_CURSOR_SWEEP_MAX 32 /* * WT_SESSION_IMPL -- * Implementation of WT_SESSION. */ struct __wt_session_impl { - WT_SESSION iface; + WT_SESSION iface; - void *lang_private; /* Language specific private storage */ + void *lang_private; /* Language specific private storage */ - u_int active; /* Non-zero if the session is in-use */ + u_int active; /* Non-zero if the session is in-use */ - const char *name; /* Name */ - const char *lastop; /* Last operation */ - uint32_t id; /* UID, offset in session array */ + const char *name; /* Name */ + const char *lastop; /* Last operation */ + uint32_t id; /* UID, offset in session array */ - WT_EVENT_HANDLER *event_handler;/* Application's event handlers */ + WT_EVENT_HANDLER *event_handler; /* Application's event handlers */ - WT_DATA_HANDLE *dhandle; /* Current data handle */ + WT_DATA_HANDLE *dhandle; /* Current data handle */ - /* - * Each session keeps a cache of data handles. The set of handles can - * grow quite large so we maintain both a simple list and a hash table - * of lists. The hash table key is based on a hash of the data handle's - * URI. The hash table list is kept in allocated memory that lives - * across session close - so it is declared further down. - */ - /* Session handle reference list */ - TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles; - uint64_t last_sweep; /* Last sweep for dead handles */ - struct timespec last_epoch; /* Last epoch time returned */ + /* + * Each session keeps a cache of data handles. The set of handles can grow quite large so we + * maintain both a simple list and a hash table of lists. The hash table key is based on a hash + * of the data handle's URI. The hash table list is kept in allocated memory that lives across + * session close - so it is declared further down. + */ + /* Session handle reference list */ + TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles; + uint64_t last_sweep; /* Last sweep for dead handles */ + struct timespec last_epoch; /* Last epoch time returned */ - WT_CURSOR_LIST cursors; /* Cursors closed with the session */ - uint32_t cursor_sweep_position; /* Position in cursor_cache for sweep */ - uint32_t cursor_sweep_countdown;/* Countdown to cursor sweep */ - uint64_t last_cursor_sweep; /* Last sweep for dead cursors */ + WT_CURSOR_LIST cursors; /* Cursors closed with the session */ + uint32_t cursor_sweep_position; /* Position in cursor_cache for sweep */ + uint32_t cursor_sweep_countdown; /* Countdown to cursor sweep */ + uint64_t last_cursor_sweep; /* Last sweep for dead cursors */ - WT_CURSOR_BACKUP *bkp_cursor; /* Hot backup cursor */ + WT_CURSOR_BACKUP *bkp_cursor; /* Hot backup cursor */ - WT_COMPACT_STATE *compact; /* Compaction information */ - enum { WT_COMPACT_NONE=0, - WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state; + WT_COMPACT_STATE *compact; /* Compaction information */ + enum { WT_COMPACT_NONE = 0, WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state; - WT_CURSOR *las_cursor; /* Lookaside table cursor */ + WT_CURSOR *las_cursor; /* Lookaside table cursor */ - WT_CURSOR *meta_cursor; /* Metadata file */ - void *meta_track; /* Metadata operation tracking */ - void *meta_track_next; /* Current position */ - void *meta_track_sub; /* Child transaction / save point */ - size_t meta_track_alloc; /* Currently allocated */ - int meta_track_nest; /* Nesting level of meta transaction */ -#define WT_META_TRACKING(session) ((session)->meta_track_next != NULL) + WT_CURSOR *meta_cursor; /* Metadata file */ + void *meta_track; /* Metadata operation tracking */ + void *meta_track_next; /* Current position */ + void *meta_track_sub; /* Child transaction / save point */ + size_t meta_track_alloc; /* Currently allocated */ + int meta_track_nest; /* Nesting level of meta transaction */ +#define WT_META_TRACKING(session) ((session)->meta_track_next != NULL) - /* Current rwlock for callback. */ - WT_RWLOCK *current_rwlock; - uint8_t current_rwticket; + /* Current rwlock for callback. */ + WT_RWLOCK *current_rwlock; + uint8_t current_rwticket; - WT_ITEM **scratch; /* Temporary memory for any function */ - u_int scratch_alloc; /* Currently allocated */ - size_t scratch_cached; /* Scratch bytes cached */ + WT_ITEM **scratch; /* Temporary memory for any function */ + u_int scratch_alloc; /* Currently allocated */ + size_t scratch_cached; /* Scratch bytes cached */ #ifdef HAVE_DIAGNOSTIC - /* - * Variables used to look for violations of the contract that a - * session is only used by a single session at once. - */ - volatile uintmax_t api_tid; - volatile uint32_t api_enter_refcnt; - /* - * It's hard to figure out from where a buffer was allocated after it's - * leaked, so in diagnostic mode we track them; DIAGNOSTIC can't simply - * add additional fields to WT_ITEM structures because they are visible - * to applications, create a parallel structure instead. - */ - struct __wt_scratch_track { - const char *func; /* Allocating function, line */ - int line; - } *scratch_track; + /* + * Variables used to look for violations of the contract that a session is only used by a single + * session at once. + */ + volatile uintmax_t api_tid; + volatile uint32_t api_enter_refcnt; + /* + * It's hard to figure out from where a buffer was allocated after it's leaked, so in diagnostic + * mode we track them; DIAGNOSTIC can't simply add additional fields to WT_ITEM structures + * because they are visible to applications, create a parallel structure instead. + */ + struct __wt_scratch_track { + const char *func; /* Allocating function, line */ + int line; + } * scratch_track; #endif - WT_ITEM err; /* Error buffer */ + WT_ITEM err; /* Error buffer */ - WT_TXN_ISOLATION isolation; - WT_TXN txn; /* Transaction state */ -#define WT_SESSION_BG_SYNC_MSEC 1200000 - WT_LSN bg_sync_lsn; /* Background sync operation LSN. */ - u_int ncursors; /* Count of active file cursors. */ + WT_TXN_ISOLATION isolation; + WT_TXN txn; /* Transaction state */ +#define WT_SESSION_BG_SYNC_MSEC 1200000 + WT_LSN bg_sync_lsn; /* Background sync operation LSN. */ + u_int ncursors; /* Count of active file cursors. */ - void *block_manager; /* Block-manager support */ - int (*block_manager_cleanup)(WT_SESSION_IMPL *); + void *block_manager; /* Block-manager support */ + int (*block_manager_cleanup)(WT_SESSION_IMPL *); - /* Checkpoint handles */ - WT_DATA_HANDLE **ckpt_handle; /* Handle list */ - u_int ckpt_handle_next; /* Next empty slot */ - size_t ckpt_handle_allocated; /* Bytes allocated */ + /* Checkpoint handles */ + WT_DATA_HANDLE **ckpt_handle; /* Handle list */ + u_int ckpt_handle_next; /* Next empty slot */ + size_t ckpt_handle_allocated; /* Bytes allocated */ - uint64_t cache_wait_us; /* Wait time for cache for current operation */ + uint64_t cache_wait_us; /* Wait time for cache for current operation */ - /* - * Operations acting on handles. - * - * The preferred pattern is to gather all of the required handles at - * the beginning of an operation, then drop any other locks, perform - * the operation, then release the handles. This cannot be easily - * merged with the list of checkpoint handles because some operations - * (such as compact) do checkpoints internally. - */ - WT_DATA_HANDLE **op_handle; /* Handle list */ - u_int op_handle_next; /* Next empty slot */ - size_t op_handle_allocated; /* Bytes allocated */ + /* + * Operations acting on handles. + * + * The preferred pattern is to gather all of the required handles at + * the beginning of an operation, then drop any other locks, perform + * the operation, then release the handles. This cannot be easily + * merged with the list of checkpoint handles because some operations + * (such as compact) do checkpoints internally. + */ + WT_DATA_HANDLE **op_handle; /* Handle list */ + u_int op_handle_next; /* Next empty slot */ + size_t op_handle_allocated; /* Bytes allocated */ - void *reconcile; /* Reconciliation support */ - int (*reconcile_cleanup)(WT_SESSION_IMPL *); + void *reconcile; /* Reconciliation support */ + int (*reconcile_cleanup)(WT_SESSION_IMPL *); - /* Sessions have an associated statistics bucket based on its ID. */ - u_int stat_bucket; /* Statistics bucket offset */ + /* Sessions have an associated statistics bucket based on its ID. */ + u_int stat_bucket; /* Statistics bucket offset */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_SESSION_BACKUP_CURSOR 0x0000001u -#define WT_SESSION_BACKUP_DUP 0x0000002u -#define WT_SESSION_CACHE_CURSORS 0x0000004u -#define WT_SESSION_CAN_WAIT 0x0000008u -#define WT_SESSION_IGNORE_CACHE_SIZE 0x0000010u -#define WT_SESSION_INTERNAL 0x0000020u -#define WT_SESSION_LOCKED_CHECKPOINT 0x0000040u -#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x0000080u -#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x0000100u -#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x0000200u -#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x0000400u -#define WT_SESSION_LOCKED_METADATA 0x0000800u -#define WT_SESSION_LOCKED_PASS 0x0001000u -#define WT_SESSION_LOCKED_SCHEMA 0x0002000u -#define WT_SESSION_LOCKED_SLOT 0x0004000u -#define WT_SESSION_LOCKED_TABLE_READ 0x0008000u -#define WT_SESSION_LOCKED_TABLE_WRITE 0x0010000u -#define WT_SESSION_LOCKED_TURTLE 0x0020000u -#define WT_SESSION_LOGGING_INMEM 0x0040000u -#define WT_SESSION_LOOKASIDE_CURSOR 0x0080000u -#define WT_SESSION_NO_DATA_HANDLES 0x0100000u -#define WT_SESSION_NO_LOGGING 0x0200000u -#define WT_SESSION_NO_RECONCILE 0x0400000u -#define WT_SESSION_NO_SCHEMA_LOCK 0x0800000u -#define WT_SESSION_QUIET_CORRUPT_FILE 0x1000000u -#define WT_SESSION_READ_WONT_NEED 0x2000000u -#define WT_SESSION_SCHEMA_TXN 0x4000000u -#define WT_SESSION_SERVER_ASYNC 0x8000000u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; - - /* - * All of the following fields live at the end of the structure so it's - * easier to clear everything but the fields that persist. - */ -#define WT_SESSION_CLEAR_SIZE (offsetof(WT_SESSION_IMPL, rnd)) - - /* - * The random number state persists past session close because we don't - * want to repeatedly use the same values for skiplist depth when the - * application isn't caching sessions. - */ - WT_RAND_STATE rnd; /* Random number generation state */ - - /* - * Hash tables are allocated lazily as sessions are used to keep the - * size of this structure from growing too large. - */ - WT_CURSOR_LIST *cursor_cache; /* Hash table of cached cursors */ - - /* Hashed handle reference list array */ - TAILQ_HEAD(__dhandles_hash, __wt_data_handle_cache) *dhhash; - - /* Generations manager */ -#define WT_GEN_CHECKPOINT 0 /* Checkpoint generation */ -#define WT_GEN_COMMIT 1 /* Commit generation */ -#define WT_GEN_EVICT 2 /* Eviction generation */ -#define WT_GEN_HAZARD 3 /* Hazard pointer */ -#define WT_GEN_SPLIT 4 /* Page splits */ -#define WT_GENERATIONS 5 /* Total generation manager entries */ - volatile uint64_t generations[WT_GENERATIONS]; - - /* - * Session memory persists past session close because it's accessed by - * threads of control other than the thread owning the session. For - * example, btree splits and hazard pointers can "free" memory that's - * still in use. In order to eventually free it, it's stashed here with - * with its generation number; when no thread is reading in generation, - * the memory can be freed for real. - */ - struct __wt_session_stash { - struct __wt_stash { - void *p; /* Memory, length */ - size_t len; - uint64_t gen; /* Generation */ - } *list; - size_t cnt; /* Array entries */ - size_t alloc; /* Allocated bytes */ - } stash[WT_GENERATIONS]; - - /* - * Hazard pointers. - * - * Hazard information persists past session close because it's accessed - * by threads of control other than the thread owning the session. - * - * Use the non-NULL state of the hazard field to know if the session has - * previously been initialized. - */ -#define WT_SESSION_FIRST_USE(s) \ - ((s)->hazard == NULL) - - /* - * The hazard pointer array grows as necessary, initialize with 250 - * slots. - */ -#define WT_SESSION_INITIAL_HAZARD_SLOTS 250 - uint32_t hazard_size; /* Hazard pointer array slots */ - uint32_t hazard_inuse; /* Hazard pointer array slots in-use */ - uint32_t nhazard; /* Count of active hazard pointers */ - WT_HAZARD *hazard; /* Hazard pointer array */ - - /* - * Operation tracking. - */ - WT_OPTRACK_RECORD *optrack_buf; - u_int optrackbuf_ptr; - uint64_t optrack_offset; - WT_FH *optrack_fh; - - WT_SESSION_STATS stats; +#define WT_SESSION_BACKUP_CURSOR 0x0000001u +#define WT_SESSION_BACKUP_DUP 0x0000002u +#define WT_SESSION_CACHE_CURSORS 0x0000004u +#define WT_SESSION_CAN_WAIT 0x0000008u +#define WT_SESSION_IGNORE_CACHE_SIZE 0x0000010u +#define WT_SESSION_INTERNAL 0x0000020u +#define WT_SESSION_LOCKED_CHECKPOINT 0x0000040u +#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x0000080u +#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x0000100u +#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x0000200u +#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x0000400u +#define WT_SESSION_LOCKED_METADATA 0x0000800u +#define WT_SESSION_LOCKED_PASS 0x0001000u +#define WT_SESSION_LOCKED_SCHEMA 0x0002000u +#define WT_SESSION_LOCKED_SLOT 0x0004000u +#define WT_SESSION_LOCKED_TABLE_READ 0x0008000u +#define WT_SESSION_LOCKED_TABLE_WRITE 0x0010000u +#define WT_SESSION_LOCKED_TURTLE 0x0020000u +#define WT_SESSION_LOGGING_INMEM 0x0040000u +#define WT_SESSION_LOOKASIDE_CURSOR 0x0080000u +#define WT_SESSION_NO_DATA_HANDLES 0x0100000u +#define WT_SESSION_NO_LOGGING 0x0200000u +#define WT_SESSION_NO_RECONCILE 0x0400000u +#define WT_SESSION_NO_SCHEMA_LOCK 0x0800000u +#define WT_SESSION_QUIET_CORRUPT_FILE 0x1000000u +#define WT_SESSION_READ_WONT_NEED 0x2000000u +#define WT_SESSION_SCHEMA_TXN 0x4000000u +#define WT_SESSION_SERVER_ASYNC 0x8000000u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; + +/* + * All of the following fields live at the end of the structure so it's easier to clear everything + * but the fields that persist. + */ +#define WT_SESSION_CLEAR_SIZE (offsetof(WT_SESSION_IMPL, rnd)) + + /* + * The random number state persists past session close because we don't want to repeatedly use + * the same values for skiplist depth when the application isn't caching sessions. + */ + WT_RAND_STATE rnd; /* Random number generation state */ + + /* + * Hash tables are allocated lazily as sessions are used to keep the size of this structure from + * growing too large. + */ + WT_CURSOR_LIST *cursor_cache; /* Hash table of cached cursors */ + + /* Hashed handle reference list array */ + TAILQ_HEAD(__dhandles_hash, __wt_data_handle_cache) * dhhash; + +/* Generations manager */ +#define WT_GEN_CHECKPOINT 0 /* Checkpoint generation */ +#define WT_GEN_COMMIT 1 /* Commit generation */ +#define WT_GEN_EVICT 2 /* Eviction generation */ +#define WT_GEN_HAZARD 3 /* Hazard pointer */ +#define WT_GEN_SPLIT 4 /* Page splits */ +#define WT_GENERATIONS 5 /* Total generation manager entries */ + volatile uint64_t generations[WT_GENERATIONS]; + + /* + * Session memory persists past session close because it's accessed by threads of control other + * than the thread owning the session. For example, btree splits and hazard pointers can "free" + * memory that's still in use. In order to eventually free it, it's stashed here with its + * generation number; when no thread is reading in generation, the memory can be freed for real. + */ + struct __wt_session_stash { + struct __wt_stash { + void *p; /* Memory, length */ + size_t len; + uint64_t gen; /* Generation */ + } * list; + size_t cnt; /* Array entries */ + size_t alloc; /* Allocated bytes */ + } stash[WT_GENERATIONS]; + +/* + * Hazard pointers. + * + * Hazard information persists past session close because it's accessed + * by threads of control other than the thread owning the session. + * + * Use the non-NULL state of the hazard field to know if the session has + * previously been initialized. + */ +#define WT_SESSION_FIRST_USE(s) ((s)->hazard == NULL) + +/* + * The hazard pointer array grows as necessary, initialize with 250 slots. + */ +#define WT_SESSION_INITIAL_HAZARD_SLOTS 250 + uint32_t hazard_size; /* Hazard pointer array slots */ + uint32_t hazard_inuse; /* Hazard pointer array slots in-use */ + uint32_t nhazard; /* Count of active hazard pointers */ + WT_HAZARD *hazard; /* Hazard pointer array */ + + /* + * Operation tracking. + */ + WT_OPTRACK_RECORD *optrack_buf; + u_int optrackbuf_ptr; + uint64_t optrack_offset; + WT_FH *optrack_fh; + + WT_SESSION_STATS stats; }; diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index de4d5fe97ad..53d3f2126ae 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -52,7 +52,7 @@ * Default hash table size; use a prime number of buckets rather than assuming * a good hash (Reference Sedgewick, Algorithms in C, "Hash Functions"). */ -#define WT_COUNTER_SLOTS 23 +#define WT_COUNTER_SLOTS 23 /* * WT_STATS_SLOT_ID is the thread's slot ID for the array of structures. @@ -71,8 +71,7 @@ * Our solution is to use the session ID; there is normally a session per thread * and the session ID is a small, monotonically increasing number. */ -#define WT_STATS_SLOT_ID(session) \ - (((session)->id) % WT_COUNTER_SLOTS) +#define WT_STATS_SLOT_ID(session) (((session)->id) % WT_COUNTER_SLOTS) /* * Statistic structures are arrays of int64_t's. We have functions to read/write @@ -81,21 +80,19 @@ * * Translate a statistic's value name to an offset in the array. */ -#define WT_STATS_FIELD_TO_OFFSET(stats, fld) \ - (int)(&(stats)[0]->fld - (int64_t *)(stats)[0]) +#define WT_STATS_FIELD_TO_OFFSET(stats, fld) (int)(&(stats)[0]->fld - (int64_t *)(stats)[0]) -#define WT_SESSION_STATS_FIELD_TO_OFFSET(stats, fld) \ - (int)(&(stats)->fld - (int64_t *)(stats)) +#define WT_SESSION_STATS_FIELD_TO_OFFSET(stats, fld) (int)(&(stats)->fld - (int64_t *)(stats)) /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_STAT_CLEAR 0x01u -#define WT_STAT_JSON 0x02u -#define WT_STAT_ON_CLOSE 0x04u -#define WT_STAT_TYPE_ALL 0x08u -#define WT_STAT_TYPE_CACHE_WALK 0x10u -#define WT_STAT_TYPE_FAST 0x20u -#define WT_STAT_TYPE_SIZE 0x40u -#define WT_STAT_TYPE_TREE_WALK 0x80u +#define WT_STAT_CLEAR 0x01u +#define WT_STAT_JSON 0x02u +#define WT_STAT_ON_CLOSE 0x04u +#define WT_STAT_TYPE_ALL 0x08u +#define WT_STAT_TYPE_CACHE_WALK 0x10u +#define WT_STAT_TYPE_FAST 0x20u +#define WT_STAT_TYPE_SIZE 0x40u +#define WT_STAT_TYPE_TREE_WALK 0x80u /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* @@ -104,32 +101,32 @@ static inline int64_t __wt_stats_aggregate(void *stats_arg, int slot) { - int64_t **stats, aggr_v; - int i; + int64_t **stats, aggr_v; + int i; - stats = stats_arg; - for (aggr_v = 0, i = 0; i < WT_COUNTER_SLOTS; i++) - aggr_v += stats[i][slot]; + stats = stats_arg; + for (aggr_v = 0, i = 0; i < WT_COUNTER_SLOTS; i++) + aggr_v += stats[i][slot]; - /* - * This can race. However, any implementation with a single value can - * race as well, different threads could set the same counter value - * simultaneously. While we are making races more likely, we are not - * fundamentally weakening the isolation semantics found in updating a - * single value. - * - * Additionally, the aggregation can go negative (imagine a thread - * incrementing a value after aggregation has passed its slot and a - * second thread decrementing a value before aggregation has reached - * its slot). - * - * For historic API compatibility, the external type is a uint64_t; - * limit our return to positive values, negative numbers would just - * look really, really large. - */ - if (aggr_v < 0) - aggr_v = 0; - return (aggr_v); + /* + * This can race. However, any implementation with a single value can + * race as well, different threads could set the same counter value + * simultaneously. While we are making races more likely, we are not + * fundamentally weakening the isolation semantics found in updating a + * single value. + * + * Additionally, the aggregation can go negative (imagine a thread + * incrementing a value after aggregation has passed its slot and a + * second thread decrementing a value before aggregation has reached + * its slot). + * + * For historic API compatibility, the external type is a uint64_t; + * limit our return to positive values, negative numbers would just + * look really, really large. + */ + if (aggr_v < 0) + aggr_v = 0; + return (aggr_v); } /* @@ -138,99 +135,92 @@ __wt_stats_aggregate(void *stats_arg, int slot) static inline void __wt_stats_clear(void *stats_arg, int slot) { - int64_t **stats; - int i; + int64_t **stats; + int i; - stats = stats_arg; - for (i = 0; i < WT_COUNTER_SLOTS; i++) - stats[i][slot] = 0; + stats = stats_arg; + for (i = 0; i < WT_COUNTER_SLOTS; i++) + stats[i][slot] = 0; } /* - * Read/write statistics if statistics gathering is enabled. Reading and - * writing the field requires different actions: reading sums the values - * across the array of structures, writing updates a single structure's value. + * Read/write statistics if statistics gathering is enabled. Reading and writing the field requires + * different actions: reading sums the values across the array of structures, writing updates a + * single structure's value. */ -#define WT_STAT_ENABLED(session) (S2C(session)->stat_flags != 0) +#define WT_STAT_ENABLED(session) (S2C(session)->stat_flags != 0) -#define WT_STAT_READ(stats, fld) \ - __wt_stats_aggregate(stats, WT_STATS_FIELD_TO_OFFSET(stats, fld)) -#define WT_STAT_WRITE(session, stats, fld, v) do { \ - if (WT_STAT_ENABLED(session)) \ - (stats)->fld = (int64_t)(v); \ -} while (0) +#define WT_STAT_READ(stats, fld) __wt_stats_aggregate(stats, WT_STATS_FIELD_TO_OFFSET(stats, fld)) +#define WT_STAT_WRITE(session, stats, fld, v) \ + do { \ + if (WT_STAT_ENABLED(session)) \ + (stats)->fld = (int64_t)(v); \ + } while (0) -#define WT_STAT_DECRV_BASE(session, stat, fld, value) do { \ - if (WT_STAT_ENABLED(session)) \ - (stat)->fld -= (int64_t)(value); \ -} while (0) -#define WT_STAT_DECRV_ATOMIC_BASE(session, stat, fld, value) do { \ - if (WT_STAT_ENABLED(session)) \ - (void) \ - __wt_atomic_subi64(&(stat)->fld, (int64_t)(value)); \ -} while (0) -#define WT_STAT_INCRV_BASE(session, stat, fld, value) do { \ - if (WT_STAT_ENABLED(session)) \ - (stat)->fld += (int64_t)(value); \ -} while (0) -#define WT_STAT_INCRV_ATOMIC_BASE(session, stat, fld, value) do { \ - if (WT_STAT_ENABLED(session)) \ - (void) \ - __wt_atomic_addi64(&(stat)->fld, (int64_t)(value)); \ -} while (0) +#define WT_STAT_DECRV_BASE(session, stat, fld, value) \ + do { \ + if (WT_STAT_ENABLED(session)) \ + (stat)->fld -= (int64_t)(value); \ + } while (0) +#define WT_STAT_DECRV_ATOMIC_BASE(session, stat, fld, value) \ + do { \ + if (WT_STAT_ENABLED(session)) \ + (void)__wt_atomic_subi64(&(stat)->fld, (int64_t)(value)); \ + } while (0) +#define WT_STAT_INCRV_BASE(session, stat, fld, value) \ + do { \ + if (WT_STAT_ENABLED(session)) \ + (stat)->fld += (int64_t)(value); \ + } while (0) +#define WT_STAT_INCRV_ATOMIC_BASE(session, stat, fld, value) \ + do { \ + if (WT_STAT_ENABLED(session)) \ + (void)__wt_atomic_addi64(&(stat)->fld, (int64_t)(value)); \ + } while (0) -#define WT_STAT_DECRV(session, stats, fld, value) do { \ - WT_STAT_DECRV_BASE( \ - session, (stats)[(session)->stat_bucket], fld, value); \ -} while (0) -#define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) do { \ - WT_STAT_DECRV_ATOMIC_BASE( \ - session, (stats)[(session)->stat_bucket], fld, value); \ -} while (0) -#define WT_STAT_DECR(session, stats, fld) \ - WT_STAT_DECRV(session, stats, fld, 1) +#define WT_STAT_DECRV(session, stats, fld, value) \ + do { \ + WT_STAT_DECRV_BASE(session, (stats)[(session)->stat_bucket], fld, value); \ + } while (0) +#define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) \ + do { \ + WT_STAT_DECRV_ATOMIC_BASE(session, (stats)[(session)->stat_bucket], fld, value); \ + } while (0) +#define WT_STAT_DECR(session, stats, fld) WT_STAT_DECRV(session, stats, fld, 1) -#define WT_STAT_INCRV(session, stats, fld, value) do { \ - WT_STAT_INCRV_BASE( \ - session, (stats)[(session)->stat_bucket], fld, value); \ -} while (0) -#define WT_STAT_INCRV_ATOMIC(session, stats, fld, value) do { \ - WT_STAT_INCRV_ATOMIC_BASE( \ - session, (stats)[(session)->stat_bucket], fld, value); \ -} while (0) -#define WT_STAT_INCR(session, stats, fld) \ - WT_STAT_INCRV(session, stats, fld, 1) -#define WT_STAT_SET(session, stats, fld, value) do { \ - if (WT_STAT_ENABLED(session)) { \ - __wt_stats_clear(stats, \ - WT_STATS_FIELD_TO_OFFSET(stats, fld)); \ - (stats)[0]->fld = (int64_t)(value); \ - } \ -} while (0) +#define WT_STAT_INCRV(session, stats, fld, value) \ + do { \ + WT_STAT_INCRV_BASE(session, (stats)[(session)->stat_bucket], fld, value); \ + } while (0) +#define WT_STAT_INCRV_ATOMIC(session, stats, fld, value) \ + do { \ + WT_STAT_INCRV_ATOMIC_BASE(session, (stats)[(session)->stat_bucket], fld, value); \ + } while (0) +#define WT_STAT_INCR(session, stats, fld) WT_STAT_INCRV(session, stats, fld, 1) +#define WT_STAT_SET(session, stats, fld, value) \ + do { \ + if (WT_STAT_ENABLED(session)) { \ + __wt_stats_clear(stats, WT_STATS_FIELD_TO_OFFSET(stats, fld)); \ + (stats)[0]->fld = (int64_t)(value); \ + } \ + } while (0) /* * Update connection handle statistics if statistics gathering is enabled. */ -#define WT_STAT_CONN_DECRV(session, fld, value) \ - WT_STAT_DECRV_BASE(session, \ - S2C(session)->stats[(session)->stat_bucket], fld, value) -#define WT_STAT_CONN_DECR_ATOMIC(session, fld) \ - WT_STAT_DECRV_ATOMIC_BASE(session, \ - S2C(session)->stats[(session)->stat_bucket], fld, 1) -#define WT_STAT_CONN_DECR(session, fld) \ - WT_STAT_CONN_DECRV(session, fld, 1) +#define WT_STAT_CONN_DECRV(session, fld, value) \ + WT_STAT_DECRV_BASE(session, S2C(session)->stats[(session)->stat_bucket], fld, value) +#define WT_STAT_CONN_DECR_ATOMIC(session, fld) \ + WT_STAT_DECRV_ATOMIC_BASE(session, S2C(session)->stats[(session)->stat_bucket], fld, 1) +#define WT_STAT_CONN_DECR(session, fld) WT_STAT_CONN_DECRV(session, fld, 1) -#define WT_STAT_CONN_INCRV(session, fld, value) \ - WT_STAT_INCRV_BASE(session, \ - S2C(session)->stats[(session)->stat_bucket], fld, value) -#define WT_STAT_CONN_INCR_ATOMIC(session, fld) \ - WT_STAT_INCRV_ATOMIC_BASE(session, \ - S2C(session)->stats[(session)->stat_bucket], fld, 1) -#define WT_STAT_CONN_INCR(session, fld) \ - WT_STAT_CONN_INCRV(session, fld, 1) +#define WT_STAT_CONN_INCRV(session, fld, value) \ + WT_STAT_INCRV_BASE(session, S2C(session)->stats[(session)->stat_bucket], fld, value) +#define WT_STAT_CONN_INCR_ATOMIC(session, fld) \ + WT_STAT_INCRV_ATOMIC_BASE(session, S2C(session)->stats[(session)->stat_bucket], fld, 1) +#define WT_STAT_CONN_INCR(session, fld) WT_STAT_CONN_INCRV(session, fld, 1) -#define WT_STAT_CONN_SET(session, fld, value) \ - WT_STAT_SET(session, S2C(session)->stats, fld, value) +#define WT_STAT_CONN_SET(session, fld, value) WT_STAT_SET(session, S2C(session)->stats, fld, value) /* * Update data-source handle statistics if statistics gathering is enabled @@ -240,79 +230,71 @@ __wt_stats_clear(void *stats_arg, int slot) * We shouldn't have to check if the data-source handle is NULL, but it's * necessary until everything is converted to using data-source handles. */ -#define WT_STAT_DATA_DECRV(session, fld, value) do { \ - if ((session)->dhandle != NULL && \ - (session)->dhandle->stat_array != NULL) \ - WT_STAT_DECRV( \ - session, (session)->dhandle->stats, fld, value); \ -} while (0) -#define WT_STAT_DATA_DECR(session, fld) \ - WT_STAT_DATA_DECRV(session, fld, 1) -#define WT_STAT_DATA_INCRV(session, fld, value) do { \ - if ((session)->dhandle != NULL && \ - (session)->dhandle->stat_array != NULL) \ - WT_STAT_INCRV( \ - session, (session)->dhandle->stats, fld, value); \ -} while (0) -#define WT_STAT_DATA_INCR(session, fld) \ - WT_STAT_DATA_INCRV(session, fld, 1) -#define WT_STAT_DATA_SET(session, fld, value) do { \ - if ((session)->dhandle != NULL && \ - (session)->dhandle->stat_array != NULL) \ - WT_STAT_SET( \ - session, (session)->dhandle->stats, fld, value); \ -} while (0) +#define WT_STAT_DATA_DECRV(session, fld, value) \ + do { \ + if ((session)->dhandle != NULL && (session)->dhandle->stat_array != NULL) \ + WT_STAT_DECRV(session, (session)->dhandle->stats, fld, value); \ + } while (0) +#define WT_STAT_DATA_DECR(session, fld) WT_STAT_DATA_DECRV(session, fld, 1) +#define WT_STAT_DATA_INCRV(session, fld, value) \ + do { \ + if ((session)->dhandle != NULL && (session)->dhandle->stat_array != NULL) \ + WT_STAT_INCRV(session, (session)->dhandle->stats, fld, value); \ + } while (0) +#define WT_STAT_DATA_INCR(session, fld) WT_STAT_DATA_INCRV(session, fld, 1) +#define WT_STAT_DATA_SET(session, fld, value) \ + do { \ + if ((session)->dhandle != NULL && (session)->dhandle->stat_array != NULL) \ + WT_STAT_SET(session, (session)->dhandle->stats, fld, value); \ + } while (0) /* * Update per session statistics. */ -#define WT_STAT_SESSION_INCRV(session, fld, value) \ - WT_STAT_INCRV_BASE(session, &(session)->stats, fld, value) +#define WT_STAT_SESSION_INCRV(session, fld, value) \ + WT_STAT_INCRV_BASE(session, &(session)->stats, fld, value) /* - * Construct histogram increment functions to put the passed value into the - * right bucket. Bucket ranges, represented by various statistics, depend upon - * whether the passed value is in milliseconds or microseconds. Also values - * less than a given minimum are ignored and not put in any bucket. This floor - * value keeps us from having an excessively large smallest values. + * Construct histogram increment functions to put the passed value into the right bucket. Bucket + * ranges, represented by various statistics, depend upon whether the passed value is in + * milliseconds or microseconds. Also values less than a given minimum are ignored and not put in + * any bucket. This floor value keeps us from having an excessively large smallest values. */ -#define WT_STAT_MSECS_HIST_INCR_FUNC(name, stat, min_val) \ -static inline void \ -__wt_stat_msecs_hist_incr_##name(WT_SESSION_IMPL *session, uint64_t msecs) \ -{ \ - if (msecs < (min_val)) \ - return; \ - if (msecs < 50) \ - WT_STAT_CONN_INCR(session, stat##_lt50); \ - else if (msecs < 100) \ - WT_STAT_CONN_INCR(session, stat##_lt100); \ - else if (msecs < 250) \ - WT_STAT_CONN_INCR(session, stat##_lt250); \ - else if (msecs < 500) \ - WT_STAT_CONN_INCR(session, stat##_lt500); \ - else if (msecs < 1000) \ - WT_STAT_CONN_INCR(session, stat##_lt1000); \ - else \ - WT_STAT_CONN_INCR(session, stat##_gt1000); \ -} +#define WT_STAT_MSECS_HIST_INCR_FUNC(name, stat, min_val) \ + static inline void __wt_stat_msecs_hist_incr_##name(WT_SESSION_IMPL *session, uint64_t msecs) \ + { \ + if (msecs < (min_val)) \ + return; \ + if (msecs < 50) \ + WT_STAT_CONN_INCR(session, stat##_lt50); \ + else if (msecs < 100) \ + WT_STAT_CONN_INCR(session, stat##_lt100); \ + else if (msecs < 250) \ + WT_STAT_CONN_INCR(session, stat##_lt250); \ + else if (msecs < 500) \ + WT_STAT_CONN_INCR(session, stat##_lt500); \ + else if (msecs < 1000) \ + WT_STAT_CONN_INCR(session, stat##_lt1000); \ + else \ + WT_STAT_CONN_INCR(session, stat##_gt1000); \ + } -#define WT_STAT_USECS_HIST_INCR_FUNC(name, stat, min_val) \ -static inline void \ -__wt_stat_usecs_hist_incr_##name(WT_SESSION_IMPL *session, uint64_t usecs) \ -{ \ - if (usecs < (min_val)) \ - return; \ - if (usecs < 250) \ - WT_STAT_CONN_INCR(session, stat##_lt250); \ - else if (usecs < 500) \ - WT_STAT_CONN_INCR(session, stat##_lt500); \ - else if (usecs < 1000) \ - WT_STAT_CONN_INCR(session, stat##_lt1000); \ - else if (usecs < 10000) \ - WT_STAT_CONN_INCR(session, stat##_lt10000); \ - else \ - WT_STAT_CONN_INCR(session, stat##_gt10000); \ -} +#define WT_STAT_USECS_HIST_INCR_FUNC(name, stat, min_val) \ + static inline void __wt_stat_usecs_hist_incr_##name(WT_SESSION_IMPL *session, uint64_t usecs) \ + { \ + if (usecs < (min_val)) \ + return; \ + if (usecs < 250) \ + WT_STAT_CONN_INCR(session, stat##_lt250); \ + else if (usecs < 500) \ + WT_STAT_CONN_INCR(session, stat##_lt500); \ + else if (usecs < 1000) \ + WT_STAT_CONN_INCR(session, stat##_lt1000); \ + else if (usecs < 10000) \ + WT_STAT_CONN_INCR(session, stat##_lt10000); \ + else \ + WT_STAT_CONN_INCR(session, stat##_gt10000); \ + } /* * DO NOT EDIT: automatically built by dist/stat.py. @@ -322,588 +304,588 @@ __wt_stat_usecs_hist_incr_##name(WT_SESSION_IMPL *session, uint64_t usecs) \ /* * Statistics entries for connections. */ -#define WT_CONNECTION_STATS_BASE 1000 +#define WT_CONNECTION_STATS_BASE 1000 struct __wt_connection_stats { - int64_t lsm_work_queue_app; - int64_t lsm_work_queue_manager; - int64_t lsm_rows_merged; - int64_t lsm_checkpoint_throttle; - int64_t lsm_merge_throttle; - int64_t lsm_work_queue_switch; - int64_t lsm_work_units_discarded; - int64_t lsm_work_units_done; - int64_t lsm_work_units_created; - int64_t lsm_work_queue_max; - int64_t async_cur_queue; - int64_t async_max_queue; - int64_t async_alloc_race; - int64_t async_flush; - int64_t async_alloc_view; - int64_t async_full; - int64_t async_nowork; - int64_t async_op_alloc; - int64_t async_op_compact; - int64_t async_op_insert; - int64_t async_op_remove; - int64_t async_op_search; - int64_t async_op_update; - int64_t block_preload; - int64_t block_read; - int64_t block_write; - int64_t block_byte_read; - int64_t block_byte_write; - int64_t block_byte_write_checkpoint; - int64_t block_map_read; - int64_t block_byte_map_read; - int64_t cache_read_app_count; - int64_t cache_read_app_time; - int64_t cache_write_app_count; - int64_t cache_write_app_time; - int64_t cache_bytes_image; - int64_t cache_bytes_lookaside; - int64_t cache_bytes_inuse; - int64_t cache_bytes_dirty_total; - int64_t cache_bytes_other; - int64_t cache_bytes_read; - int64_t cache_bytes_write; - int64_t cache_lookaside_cursor_wait_application; - int64_t cache_lookaside_cursor_wait_internal; - int64_t cache_lookaside_score; - int64_t cache_lookaside_entries; - int64_t cache_lookaside_insert; - int64_t cache_lookaside_ondisk_max; - int64_t cache_lookaside_ondisk; - int64_t cache_lookaside_remove; - int64_t cache_eviction_checkpoint; - int64_t cache_eviction_get_ref; - int64_t cache_eviction_get_ref_empty; - int64_t cache_eviction_get_ref_empty2; - int64_t cache_eviction_aggressive_set; - int64_t cache_eviction_empty_score; - int64_t cache_eviction_walk_passes; - int64_t cache_eviction_queue_empty; - int64_t cache_eviction_queue_not_empty; - int64_t cache_eviction_server_evicting; - int64_t cache_eviction_server_slept; - int64_t cache_eviction_slow; - int64_t cache_eviction_walk_leaf_notfound; - int64_t cache_eviction_walk_internal_wait; - int64_t cache_eviction_walk_internal_yield; - int64_t cache_eviction_state; - int64_t cache_eviction_target_page_lt10; - int64_t cache_eviction_target_page_lt32; - int64_t cache_eviction_target_page_ge128; - int64_t cache_eviction_target_page_lt64; - int64_t cache_eviction_target_page_lt128; - int64_t cache_eviction_walks_abandoned; - int64_t cache_eviction_walks_stopped; - int64_t cache_eviction_walks_gave_up_no_targets; - int64_t cache_eviction_walks_gave_up_ratio; - int64_t cache_eviction_walks_ended; - int64_t cache_eviction_walk_from_root; - int64_t cache_eviction_walk_saved_pos; - int64_t cache_eviction_active_workers; - int64_t cache_eviction_worker_created; - int64_t cache_eviction_worker_evicting; - int64_t cache_eviction_worker_removed; - int64_t cache_eviction_stable_state_workers; - int64_t cache_eviction_walks_active; - int64_t cache_eviction_walks_started; - int64_t cache_eviction_force_retune; - int64_t cache_eviction_force_clean; - int64_t cache_eviction_force_clean_time; - int64_t cache_eviction_force_dirty; - int64_t cache_eviction_force_dirty_time; - int64_t cache_eviction_force_delete; - int64_t cache_eviction_force; - int64_t cache_eviction_force_fail; - int64_t cache_eviction_force_fail_time; - int64_t cache_eviction_hazard; - int64_t cache_hazard_checks; - int64_t cache_hazard_walks; - int64_t cache_hazard_max; - int64_t cache_inmem_splittable; - int64_t cache_inmem_split; - int64_t cache_eviction_internal; - int64_t cache_eviction_split_internal; - int64_t cache_eviction_split_leaf; - int64_t cache_bytes_max; - int64_t cache_eviction_maximum_page_size; - int64_t cache_eviction_dirty; - int64_t cache_eviction_app_dirty; - int64_t cache_timed_out_ops; - int64_t cache_read_overflow; - int64_t cache_eviction_deepen; - int64_t cache_write_lookaside; - int64_t cache_pages_inuse; - int64_t cache_eviction_app; - int64_t cache_eviction_pages_queued; - int64_t cache_eviction_pages_queued_post_lru; - int64_t cache_eviction_pages_queued_urgent; - int64_t cache_eviction_pages_queued_oldest; - int64_t cache_read; - int64_t cache_read_deleted; - int64_t cache_read_deleted_prepared; - int64_t cache_read_lookaside; - int64_t cache_read_lookaside_checkpoint; - int64_t cache_read_lookaside_skipped; - int64_t cache_read_lookaside_delay; - int64_t cache_read_lookaside_delay_checkpoint; - int64_t cache_pages_requested; - int64_t cache_eviction_pages_seen; - int64_t cache_eviction_fail; - int64_t cache_eviction_walk; - int64_t cache_write; - int64_t cache_write_restore; - int64_t cache_overhead; - int64_t cache_bytes_internal; - int64_t cache_bytes_leaf; - int64_t cache_bytes_dirty; - int64_t cache_pages_dirty; - int64_t cache_eviction_clean; - int64_t fsync_all_fh_total; - int64_t fsync_all_fh; - int64_t fsync_all_time; - int64_t capacity_bytes_read; - int64_t capacity_bytes_ckpt; - int64_t capacity_bytes_evict; - int64_t capacity_bytes_log; - int64_t capacity_bytes_written; - int64_t capacity_threshold; - int64_t capacity_time_total; - int64_t capacity_time_ckpt; - int64_t capacity_time_evict; - int64_t capacity_time_log; - int64_t capacity_time_read; - int64_t cond_auto_wait_reset; - int64_t cond_auto_wait; - int64_t time_travel; - int64_t file_open; - int64_t memory_allocation; - int64_t memory_free; - int64_t memory_grow; - int64_t cond_wait; - int64_t rwlock_read; - int64_t rwlock_write; - int64_t fsync_io; - int64_t read_io; - int64_t write_io; - int64_t cursor_cached_count; - int64_t cursor_insert_bulk; - int64_t cursor_cache; - int64_t cursor_create; - int64_t cursor_insert; - int64_t cursor_insert_bytes; - int64_t cursor_modify; - int64_t cursor_modify_bytes; - int64_t cursor_modify_bytes_touch; - int64_t cursor_next; - int64_t cursor_restart; - int64_t cursor_prev; - int64_t cursor_remove; - int64_t cursor_remove_bytes; - int64_t cursor_reserve; - int64_t cursor_reset; - int64_t cursor_search; - int64_t cursor_search_near; - int64_t cursor_sweep_buckets; - int64_t cursor_sweep_closed; - int64_t cursor_sweep_examined; - int64_t cursor_sweep; - int64_t cursor_truncate; - int64_t cursor_update; - int64_t cursor_update_bytes; - int64_t cursor_update_bytes_changed; - int64_t cursor_reopen; - int64_t cursor_open_count; - int64_t dh_conn_handle_size; - int64_t dh_conn_handle_count; - int64_t dh_sweep_ref; - int64_t dh_sweep_close; - int64_t dh_sweep_remove; - int64_t dh_sweep_tod; - int64_t dh_sweeps; - int64_t dh_session_handles; - int64_t dh_session_sweeps; - int64_t lock_checkpoint_count; - int64_t lock_checkpoint_wait_application; - int64_t lock_checkpoint_wait_internal; - int64_t lock_dhandle_wait_application; - int64_t lock_dhandle_wait_internal; - int64_t lock_dhandle_read_count; - int64_t lock_dhandle_write_count; - int64_t lock_durable_timestamp_wait_application; - int64_t lock_durable_timestamp_wait_internal; - int64_t lock_durable_timestamp_read_count; - int64_t lock_durable_timestamp_write_count; - int64_t lock_metadata_count; - int64_t lock_metadata_wait_application; - int64_t lock_metadata_wait_internal; - int64_t lock_read_timestamp_wait_application; - int64_t lock_read_timestamp_wait_internal; - int64_t lock_read_timestamp_read_count; - int64_t lock_read_timestamp_write_count; - int64_t lock_schema_count; - int64_t lock_schema_wait_application; - int64_t lock_schema_wait_internal; - int64_t lock_table_wait_application; - int64_t lock_table_wait_internal; - int64_t lock_table_read_count; - int64_t lock_table_write_count; - int64_t lock_txn_global_wait_application; - int64_t lock_txn_global_wait_internal; - int64_t lock_txn_global_read_count; - int64_t lock_txn_global_write_count; - int64_t log_slot_switch_busy; - int64_t log_force_archive_sleep; - int64_t log_bytes_payload; - int64_t log_bytes_written; - int64_t log_zero_fills; - int64_t log_flush; - int64_t log_force_write; - int64_t log_force_write_skip; - int64_t log_compress_writes; - int64_t log_compress_write_fails; - int64_t log_compress_small; - int64_t log_release_write_lsn; - int64_t log_scans; - int64_t log_scan_rereads; - int64_t log_write_lsn; - int64_t log_write_lsn_skip; - int64_t log_sync; - int64_t log_sync_duration; - int64_t log_sync_dir; - int64_t log_sync_dir_duration; - int64_t log_writes; - int64_t log_slot_consolidated; - int64_t log_max_filesize; - int64_t log_prealloc_max; - int64_t log_prealloc_missed; - int64_t log_prealloc_files; - int64_t log_prealloc_used; - int64_t log_scan_records; - int64_t log_slot_close_race; - int64_t log_slot_close_unbuf; - int64_t log_slot_closes; - int64_t log_slot_races; - int64_t log_slot_yield_race; - int64_t log_slot_immediate; - int64_t log_slot_yield_close; - int64_t log_slot_yield_sleep; - int64_t log_slot_yield; - int64_t log_slot_active_closed; - int64_t log_slot_yield_duration; - int64_t log_slot_no_free_slots; - int64_t log_slot_unbuffered; - int64_t log_compress_mem; - int64_t log_buffer_size; - int64_t log_compress_len; - int64_t log_slot_coalesced; - int64_t log_close_yields; - int64_t perf_hist_fsread_latency_lt50; - int64_t perf_hist_fsread_latency_lt100; - int64_t perf_hist_fsread_latency_lt250; - int64_t perf_hist_fsread_latency_lt500; - int64_t perf_hist_fsread_latency_lt1000; - int64_t perf_hist_fsread_latency_gt1000; - int64_t perf_hist_fswrite_latency_lt50; - int64_t perf_hist_fswrite_latency_lt100; - int64_t perf_hist_fswrite_latency_lt250; - int64_t perf_hist_fswrite_latency_lt500; - int64_t perf_hist_fswrite_latency_lt1000; - int64_t perf_hist_fswrite_latency_gt1000; - int64_t perf_hist_opread_latency_lt250; - int64_t perf_hist_opread_latency_lt500; - int64_t perf_hist_opread_latency_lt1000; - int64_t perf_hist_opread_latency_lt10000; - int64_t perf_hist_opread_latency_gt10000; - int64_t perf_hist_opwrite_latency_lt250; - int64_t perf_hist_opwrite_latency_lt500; - int64_t perf_hist_opwrite_latency_lt1000; - int64_t perf_hist_opwrite_latency_lt10000; - int64_t perf_hist_opwrite_latency_gt10000; - int64_t rec_page_delete_fast; - int64_t rec_pages; - int64_t rec_pages_eviction; - int64_t rec_page_delete; - int64_t rec_split_stashed_bytes; - int64_t rec_split_stashed_objects; - int64_t session_open; - int64_t session_query_ts; - int64_t session_table_alter_fail; - int64_t session_table_alter_success; - int64_t session_table_alter_skip; - int64_t session_table_compact_fail; - int64_t session_table_compact_success; - int64_t session_table_create_fail; - int64_t session_table_create_success; - int64_t session_table_drop_fail; - int64_t session_table_drop_success; - int64_t session_table_import_fail; - int64_t session_table_import_success; - int64_t session_table_rebalance_fail; - int64_t session_table_rebalance_success; - int64_t session_table_rename_fail; - int64_t session_table_rename_success; - int64_t session_table_salvage_fail; - int64_t session_table_salvage_success; - int64_t session_table_truncate_fail; - int64_t session_table_truncate_success; - int64_t session_table_verify_fail; - int64_t session_table_verify_success; - int64_t thread_fsync_active; - int64_t thread_read_active; - int64_t thread_write_active; - int64_t application_evict_time; - int64_t application_cache_time; - int64_t txn_release_blocked; - int64_t conn_close_blocked_lsm; - int64_t dhandle_lock_blocked; - int64_t page_index_slot_ref_blocked; - int64_t log_server_sync_blocked; - int64_t prepared_transition_blocked_page; - int64_t page_busy_blocked; - int64_t page_forcible_evict_blocked; - int64_t page_locked_blocked; - int64_t page_read_blocked; - int64_t page_sleep; - int64_t page_del_rollback_blocked; - int64_t child_modify_blocked_page; - int64_t txn_prepared_updates_count; - int64_t txn_prepared_updates_lookaside_inserts; - int64_t txn_prepared_updates_resolved; - int64_t txn_durable_queue_walked; - int64_t txn_durable_queue_empty; - int64_t txn_durable_queue_head; - int64_t txn_durable_queue_inserts; - int64_t txn_durable_queue_len; - int64_t txn_snapshots_created; - int64_t txn_snapshots_dropped; - int64_t txn_prepare; - int64_t txn_prepare_commit; - int64_t txn_prepare_active; - int64_t txn_prepare_rollback; - int64_t txn_query_ts; - int64_t txn_read_queue_walked; - int64_t txn_read_queue_empty; - int64_t txn_read_queue_head; - int64_t txn_read_queue_inserts; - int64_t txn_read_queue_len; - int64_t txn_rollback_to_stable; - int64_t txn_rollback_upd_aborted; - int64_t txn_rollback_las_removed; - int64_t txn_set_ts; - int64_t txn_set_ts_durable; - int64_t txn_set_ts_durable_upd; - int64_t txn_set_ts_oldest; - int64_t txn_set_ts_oldest_upd; - int64_t txn_set_ts_stable; - int64_t txn_set_ts_stable_upd; - int64_t txn_begin; - int64_t txn_checkpoint_running; - int64_t txn_checkpoint_generation; - int64_t txn_checkpoint_time_max; - int64_t txn_checkpoint_time_min; - int64_t txn_checkpoint_time_recent; - int64_t txn_checkpoint_scrub_target; - int64_t txn_checkpoint_scrub_time; - int64_t txn_checkpoint_time_total; - int64_t txn_checkpoint; - int64_t txn_checkpoint_skipped; - int64_t txn_fail_cache; - int64_t txn_checkpoint_fsync_post; - int64_t txn_checkpoint_fsync_post_duration; - int64_t txn_pinned_range; - int64_t txn_pinned_checkpoint_range; - int64_t txn_pinned_snapshot_range; - int64_t txn_pinned_timestamp; - int64_t txn_pinned_timestamp_checkpoint; - int64_t txn_pinned_timestamp_reader; - int64_t txn_pinned_timestamp_oldest; - int64_t txn_timestamp_oldest_active_read; - int64_t txn_sync; - int64_t txn_commit; - int64_t txn_rollback; - int64_t txn_update_conflict; + int64_t lsm_work_queue_app; + int64_t lsm_work_queue_manager; + int64_t lsm_rows_merged; + int64_t lsm_checkpoint_throttle; + int64_t lsm_merge_throttle; + int64_t lsm_work_queue_switch; + int64_t lsm_work_units_discarded; + int64_t lsm_work_units_done; + int64_t lsm_work_units_created; + int64_t lsm_work_queue_max; + int64_t async_cur_queue; + int64_t async_max_queue; + int64_t async_alloc_race; + int64_t async_flush; + int64_t async_alloc_view; + int64_t async_full; + int64_t async_nowork; + int64_t async_op_alloc; + int64_t async_op_compact; + int64_t async_op_insert; + int64_t async_op_remove; + int64_t async_op_search; + int64_t async_op_update; + int64_t block_preload; + int64_t block_read; + int64_t block_write; + int64_t block_byte_read; + int64_t block_byte_write; + int64_t block_byte_write_checkpoint; + int64_t block_map_read; + int64_t block_byte_map_read; + int64_t cache_read_app_count; + int64_t cache_read_app_time; + int64_t cache_write_app_count; + int64_t cache_write_app_time; + int64_t cache_bytes_image; + int64_t cache_bytes_lookaside; + int64_t cache_bytes_inuse; + int64_t cache_bytes_dirty_total; + int64_t cache_bytes_other; + int64_t cache_bytes_read; + int64_t cache_bytes_write; + int64_t cache_lookaside_cursor_wait_application; + int64_t cache_lookaside_cursor_wait_internal; + int64_t cache_lookaside_score; + int64_t cache_lookaside_entries; + int64_t cache_lookaside_insert; + int64_t cache_lookaside_ondisk_max; + int64_t cache_lookaside_ondisk; + int64_t cache_lookaside_remove; + int64_t cache_eviction_checkpoint; + int64_t cache_eviction_get_ref; + int64_t cache_eviction_get_ref_empty; + int64_t cache_eviction_get_ref_empty2; + int64_t cache_eviction_aggressive_set; + int64_t cache_eviction_empty_score; + int64_t cache_eviction_walk_passes; + int64_t cache_eviction_queue_empty; + int64_t cache_eviction_queue_not_empty; + int64_t cache_eviction_server_evicting; + int64_t cache_eviction_server_slept; + int64_t cache_eviction_slow; + int64_t cache_eviction_walk_leaf_notfound; + int64_t cache_eviction_walk_internal_wait; + int64_t cache_eviction_walk_internal_yield; + int64_t cache_eviction_state; + int64_t cache_eviction_target_page_lt10; + int64_t cache_eviction_target_page_lt32; + int64_t cache_eviction_target_page_ge128; + int64_t cache_eviction_target_page_lt64; + int64_t cache_eviction_target_page_lt128; + int64_t cache_eviction_walks_abandoned; + int64_t cache_eviction_walks_stopped; + int64_t cache_eviction_walks_gave_up_no_targets; + int64_t cache_eviction_walks_gave_up_ratio; + int64_t cache_eviction_walks_ended; + int64_t cache_eviction_walk_from_root; + int64_t cache_eviction_walk_saved_pos; + int64_t cache_eviction_active_workers; + int64_t cache_eviction_worker_created; + int64_t cache_eviction_worker_evicting; + int64_t cache_eviction_worker_removed; + int64_t cache_eviction_stable_state_workers; + int64_t cache_eviction_walks_active; + int64_t cache_eviction_walks_started; + int64_t cache_eviction_force_retune; + int64_t cache_eviction_force_clean; + int64_t cache_eviction_force_clean_time; + int64_t cache_eviction_force_dirty; + int64_t cache_eviction_force_dirty_time; + int64_t cache_eviction_force_delete; + int64_t cache_eviction_force; + int64_t cache_eviction_force_fail; + int64_t cache_eviction_force_fail_time; + int64_t cache_eviction_hazard; + int64_t cache_hazard_checks; + int64_t cache_hazard_walks; + int64_t cache_hazard_max; + int64_t cache_inmem_splittable; + int64_t cache_inmem_split; + int64_t cache_eviction_internal; + int64_t cache_eviction_split_internal; + int64_t cache_eviction_split_leaf; + int64_t cache_bytes_max; + int64_t cache_eviction_maximum_page_size; + int64_t cache_eviction_dirty; + int64_t cache_eviction_app_dirty; + int64_t cache_timed_out_ops; + int64_t cache_read_overflow; + int64_t cache_eviction_deepen; + int64_t cache_write_lookaside; + int64_t cache_pages_inuse; + int64_t cache_eviction_app; + int64_t cache_eviction_pages_queued; + int64_t cache_eviction_pages_queued_post_lru; + int64_t cache_eviction_pages_queued_urgent; + int64_t cache_eviction_pages_queued_oldest; + int64_t cache_read; + int64_t cache_read_deleted; + int64_t cache_read_deleted_prepared; + int64_t cache_read_lookaside; + int64_t cache_read_lookaside_checkpoint; + int64_t cache_read_lookaside_skipped; + int64_t cache_read_lookaside_delay; + int64_t cache_read_lookaside_delay_checkpoint; + int64_t cache_pages_requested; + int64_t cache_eviction_pages_seen; + int64_t cache_eviction_fail; + int64_t cache_eviction_walk; + int64_t cache_write; + int64_t cache_write_restore; + int64_t cache_overhead; + int64_t cache_bytes_internal; + int64_t cache_bytes_leaf; + int64_t cache_bytes_dirty; + int64_t cache_pages_dirty; + int64_t cache_eviction_clean; + int64_t fsync_all_fh_total; + int64_t fsync_all_fh; + int64_t fsync_all_time; + int64_t capacity_bytes_read; + int64_t capacity_bytes_ckpt; + int64_t capacity_bytes_evict; + int64_t capacity_bytes_log; + int64_t capacity_bytes_written; + int64_t capacity_threshold; + int64_t capacity_time_total; + int64_t capacity_time_ckpt; + int64_t capacity_time_evict; + int64_t capacity_time_log; + int64_t capacity_time_read; + int64_t cond_auto_wait_reset; + int64_t cond_auto_wait; + int64_t time_travel; + int64_t file_open; + int64_t memory_allocation; + int64_t memory_free; + int64_t memory_grow; + int64_t cond_wait; + int64_t rwlock_read; + int64_t rwlock_write; + int64_t fsync_io; + int64_t read_io; + int64_t write_io; + int64_t cursor_cached_count; + int64_t cursor_insert_bulk; + int64_t cursor_cache; + int64_t cursor_create; + int64_t cursor_insert; + int64_t cursor_insert_bytes; + int64_t cursor_modify; + int64_t cursor_modify_bytes; + int64_t cursor_modify_bytes_touch; + int64_t cursor_next; + int64_t cursor_restart; + int64_t cursor_prev; + int64_t cursor_remove; + int64_t cursor_remove_bytes; + int64_t cursor_reserve; + int64_t cursor_reset; + int64_t cursor_search; + int64_t cursor_search_near; + int64_t cursor_sweep_buckets; + int64_t cursor_sweep_closed; + int64_t cursor_sweep_examined; + int64_t cursor_sweep; + int64_t cursor_truncate; + int64_t cursor_update; + int64_t cursor_update_bytes; + int64_t cursor_update_bytes_changed; + int64_t cursor_reopen; + int64_t cursor_open_count; + int64_t dh_conn_handle_size; + int64_t dh_conn_handle_count; + int64_t dh_sweep_ref; + int64_t dh_sweep_close; + int64_t dh_sweep_remove; + int64_t dh_sweep_tod; + int64_t dh_sweeps; + int64_t dh_session_handles; + int64_t dh_session_sweeps; + int64_t lock_checkpoint_count; + int64_t lock_checkpoint_wait_application; + int64_t lock_checkpoint_wait_internal; + int64_t lock_dhandle_wait_application; + int64_t lock_dhandle_wait_internal; + int64_t lock_dhandle_read_count; + int64_t lock_dhandle_write_count; + int64_t lock_durable_timestamp_wait_application; + int64_t lock_durable_timestamp_wait_internal; + int64_t lock_durable_timestamp_read_count; + int64_t lock_durable_timestamp_write_count; + int64_t lock_metadata_count; + int64_t lock_metadata_wait_application; + int64_t lock_metadata_wait_internal; + int64_t lock_read_timestamp_wait_application; + int64_t lock_read_timestamp_wait_internal; + int64_t lock_read_timestamp_read_count; + int64_t lock_read_timestamp_write_count; + int64_t lock_schema_count; + int64_t lock_schema_wait_application; + int64_t lock_schema_wait_internal; + int64_t lock_table_wait_application; + int64_t lock_table_wait_internal; + int64_t lock_table_read_count; + int64_t lock_table_write_count; + int64_t lock_txn_global_wait_application; + int64_t lock_txn_global_wait_internal; + int64_t lock_txn_global_read_count; + int64_t lock_txn_global_write_count; + int64_t log_slot_switch_busy; + int64_t log_force_archive_sleep; + int64_t log_bytes_payload; + int64_t log_bytes_written; + int64_t log_zero_fills; + int64_t log_flush; + int64_t log_force_write; + int64_t log_force_write_skip; + int64_t log_compress_writes; + int64_t log_compress_write_fails; + int64_t log_compress_small; + int64_t log_release_write_lsn; + int64_t log_scans; + int64_t log_scan_rereads; + int64_t log_write_lsn; + int64_t log_write_lsn_skip; + int64_t log_sync; + int64_t log_sync_duration; + int64_t log_sync_dir; + int64_t log_sync_dir_duration; + int64_t log_writes; + int64_t log_slot_consolidated; + int64_t log_max_filesize; + int64_t log_prealloc_max; + int64_t log_prealloc_missed; + int64_t log_prealloc_files; + int64_t log_prealloc_used; + int64_t log_scan_records; + int64_t log_slot_close_race; + int64_t log_slot_close_unbuf; + int64_t log_slot_closes; + int64_t log_slot_races; + int64_t log_slot_yield_race; + int64_t log_slot_immediate; + int64_t log_slot_yield_close; + int64_t log_slot_yield_sleep; + int64_t log_slot_yield; + int64_t log_slot_active_closed; + int64_t log_slot_yield_duration; + int64_t log_slot_no_free_slots; + int64_t log_slot_unbuffered; + int64_t log_compress_mem; + int64_t log_buffer_size; + int64_t log_compress_len; + int64_t log_slot_coalesced; + int64_t log_close_yields; + int64_t perf_hist_fsread_latency_lt50; + int64_t perf_hist_fsread_latency_lt100; + int64_t perf_hist_fsread_latency_lt250; + int64_t perf_hist_fsread_latency_lt500; + int64_t perf_hist_fsread_latency_lt1000; + int64_t perf_hist_fsread_latency_gt1000; + int64_t perf_hist_fswrite_latency_lt50; + int64_t perf_hist_fswrite_latency_lt100; + int64_t perf_hist_fswrite_latency_lt250; + int64_t perf_hist_fswrite_latency_lt500; + int64_t perf_hist_fswrite_latency_lt1000; + int64_t perf_hist_fswrite_latency_gt1000; + int64_t perf_hist_opread_latency_lt250; + int64_t perf_hist_opread_latency_lt500; + int64_t perf_hist_opread_latency_lt1000; + int64_t perf_hist_opread_latency_lt10000; + int64_t perf_hist_opread_latency_gt10000; + int64_t perf_hist_opwrite_latency_lt250; + int64_t perf_hist_opwrite_latency_lt500; + int64_t perf_hist_opwrite_latency_lt1000; + int64_t perf_hist_opwrite_latency_lt10000; + int64_t perf_hist_opwrite_latency_gt10000; + int64_t rec_page_delete_fast; + int64_t rec_pages; + int64_t rec_pages_eviction; + int64_t rec_page_delete; + int64_t rec_split_stashed_bytes; + int64_t rec_split_stashed_objects; + int64_t session_open; + int64_t session_query_ts; + int64_t session_table_alter_fail; + int64_t session_table_alter_success; + int64_t session_table_alter_skip; + int64_t session_table_compact_fail; + int64_t session_table_compact_success; + int64_t session_table_create_fail; + int64_t session_table_create_success; + int64_t session_table_drop_fail; + int64_t session_table_drop_success; + int64_t session_table_import_fail; + int64_t session_table_import_success; + int64_t session_table_rebalance_fail; + int64_t session_table_rebalance_success; + int64_t session_table_rename_fail; + int64_t session_table_rename_success; + int64_t session_table_salvage_fail; + int64_t session_table_salvage_success; + int64_t session_table_truncate_fail; + int64_t session_table_truncate_success; + int64_t session_table_verify_fail; + int64_t session_table_verify_success; + int64_t thread_fsync_active; + int64_t thread_read_active; + int64_t thread_write_active; + int64_t application_evict_time; + int64_t application_cache_time; + int64_t txn_release_blocked; + int64_t conn_close_blocked_lsm; + int64_t dhandle_lock_blocked; + int64_t page_index_slot_ref_blocked; + int64_t log_server_sync_blocked; + int64_t prepared_transition_blocked_page; + int64_t page_busy_blocked; + int64_t page_forcible_evict_blocked; + int64_t page_locked_blocked; + int64_t page_read_blocked; + int64_t page_sleep; + int64_t page_del_rollback_blocked; + int64_t child_modify_blocked_page; + int64_t txn_prepared_updates_count; + int64_t txn_prepared_updates_lookaside_inserts; + int64_t txn_prepared_updates_resolved; + int64_t txn_durable_queue_walked; + int64_t txn_durable_queue_empty; + int64_t txn_durable_queue_head; + int64_t txn_durable_queue_inserts; + int64_t txn_durable_queue_len; + int64_t txn_snapshots_created; + int64_t txn_snapshots_dropped; + int64_t txn_prepare; + int64_t txn_prepare_commit; + int64_t txn_prepare_active; + int64_t txn_prepare_rollback; + int64_t txn_query_ts; + int64_t txn_read_queue_walked; + int64_t txn_read_queue_empty; + int64_t txn_read_queue_head; + int64_t txn_read_queue_inserts; + int64_t txn_read_queue_len; + int64_t txn_rollback_to_stable; + int64_t txn_rollback_upd_aborted; + int64_t txn_rollback_las_removed; + int64_t txn_set_ts; + int64_t txn_set_ts_durable; + int64_t txn_set_ts_durable_upd; + int64_t txn_set_ts_oldest; + int64_t txn_set_ts_oldest_upd; + int64_t txn_set_ts_stable; + int64_t txn_set_ts_stable_upd; + int64_t txn_begin; + int64_t txn_checkpoint_running; + int64_t txn_checkpoint_generation; + int64_t txn_checkpoint_time_max; + int64_t txn_checkpoint_time_min; + int64_t txn_checkpoint_time_recent; + int64_t txn_checkpoint_scrub_target; + int64_t txn_checkpoint_scrub_time; + int64_t txn_checkpoint_time_total; + int64_t txn_checkpoint; + int64_t txn_checkpoint_skipped; + int64_t txn_fail_cache; + int64_t txn_checkpoint_fsync_post; + int64_t txn_checkpoint_fsync_post_duration; + int64_t txn_pinned_range; + int64_t txn_pinned_checkpoint_range; + int64_t txn_pinned_snapshot_range; + int64_t txn_pinned_timestamp; + int64_t txn_pinned_timestamp_checkpoint; + int64_t txn_pinned_timestamp_reader; + int64_t txn_pinned_timestamp_oldest; + int64_t txn_timestamp_oldest_active_read; + int64_t txn_sync; + int64_t txn_commit; + int64_t txn_rollback; + int64_t txn_update_conflict; }; /* * Statistics entries for data sources. */ -#define WT_DSRC_STATS_BASE 2000 +#define WT_DSRC_STATS_BASE 2000 struct __wt_dsrc_stats { - int64_t bloom_false_positive; - int64_t bloom_hit; - int64_t bloom_miss; - int64_t bloom_page_evict; - int64_t bloom_page_read; - int64_t bloom_count; - int64_t lsm_chunk_count; - int64_t lsm_generation_max; - int64_t lsm_lookup_no_bloom; - int64_t lsm_checkpoint_throttle; - int64_t lsm_merge_throttle; - int64_t bloom_size; - int64_t block_extension; - int64_t block_alloc; - int64_t block_free; - int64_t block_checkpoint_size; - int64_t allocation_size; - int64_t block_reuse_bytes; - int64_t block_magic; - int64_t block_major; - int64_t block_size; - int64_t block_minor; - int64_t btree_checkpoint_generation; - int64_t btree_column_fix; - int64_t btree_column_internal; - int64_t btree_column_rle; - int64_t btree_column_deleted; - int64_t btree_column_variable; - int64_t btree_fixed_len; - int64_t btree_maxintlkey; - int64_t btree_maxintlpage; - int64_t btree_maxleafkey; - int64_t btree_maxleafpage; - int64_t btree_maxleafvalue; - int64_t btree_maximum_depth; - int64_t btree_entries; - int64_t btree_overflow; - int64_t btree_compact_rewrite; - int64_t btree_row_empty_values; - int64_t btree_row_internal; - int64_t btree_row_leaf; - int64_t cache_bytes_inuse; - int64_t cache_bytes_dirty_total; - int64_t cache_bytes_read; - int64_t cache_bytes_write; - int64_t cache_eviction_checkpoint; - int64_t cache_eviction_fail; - int64_t cache_eviction_walk_passes; - int64_t cache_eviction_target_page_lt10; - int64_t cache_eviction_target_page_lt32; - int64_t cache_eviction_target_page_ge128; - int64_t cache_eviction_target_page_lt64; - int64_t cache_eviction_target_page_lt128; - int64_t cache_eviction_walks_abandoned; - int64_t cache_eviction_walks_stopped; - int64_t cache_eviction_walks_gave_up_no_targets; - int64_t cache_eviction_walks_gave_up_ratio; - int64_t cache_eviction_walks_ended; - int64_t cache_eviction_walk_from_root; - int64_t cache_eviction_walk_saved_pos; - int64_t cache_eviction_hazard; - int64_t cache_inmem_splittable; - int64_t cache_inmem_split; - int64_t cache_eviction_internal; - int64_t cache_eviction_split_internal; - int64_t cache_eviction_split_leaf; - int64_t cache_eviction_dirty; - int64_t cache_read_overflow; - int64_t cache_eviction_deepen; - int64_t cache_write_lookaside; - int64_t cache_read; - int64_t cache_read_deleted; - int64_t cache_read_deleted_prepared; - int64_t cache_read_lookaside; - int64_t cache_pages_requested; - int64_t cache_eviction_pages_seen; - int64_t cache_write; - int64_t cache_write_restore; - int64_t cache_bytes_dirty; - int64_t cache_eviction_clean; - int64_t cache_state_gen_avg_gap; - int64_t cache_state_avg_written_size; - int64_t cache_state_avg_visited_age; - int64_t cache_state_avg_unvisited_age; - int64_t cache_state_pages_clean; - int64_t cache_state_gen_current; - int64_t cache_state_pages_dirty; - int64_t cache_state_root_entries; - int64_t cache_state_pages_internal; - int64_t cache_state_pages_leaf; - int64_t cache_state_gen_max_gap; - int64_t cache_state_max_pagesize; - int64_t cache_state_min_written_size; - int64_t cache_state_unvisited_count; - int64_t cache_state_smaller_alloc_size; - int64_t cache_state_memory; - int64_t cache_state_queued; - int64_t cache_state_not_queueable; - int64_t cache_state_refs_skipped; - int64_t cache_state_root_size; - int64_t cache_state_pages; - int64_t compress_precomp_intl_max_page_size; - int64_t compress_precomp_leaf_max_page_size; - int64_t compress_read; - int64_t compress_write; - int64_t compress_write_fail; - int64_t compress_write_too_small; - int64_t cursor_insert_bulk; - int64_t cursor_reopen; - int64_t cursor_cache; - int64_t cursor_create; - int64_t cursor_insert; - int64_t cursor_insert_bytes; - int64_t cursor_modify; - int64_t cursor_modify_bytes; - int64_t cursor_modify_bytes_touch; - int64_t cursor_next; - int64_t cursor_open_count; - int64_t cursor_restart; - int64_t cursor_prev; - int64_t cursor_remove; - int64_t cursor_remove_bytes; - int64_t cursor_reserve; - int64_t cursor_reset; - int64_t cursor_search; - int64_t cursor_search_near; - int64_t cursor_truncate; - int64_t cursor_update; - int64_t cursor_update_bytes; - int64_t cursor_update_bytes_changed; - int64_t rec_dictionary; - int64_t rec_page_delete_fast; - int64_t rec_suffix_compression; - int64_t rec_multiblock_internal; - int64_t rec_overflow_key_internal; - int64_t rec_prefix_compression; - int64_t rec_multiblock_leaf; - int64_t rec_overflow_key_leaf; - int64_t rec_multiblock_max; - int64_t rec_overflow_value; - int64_t rec_page_match; - int64_t rec_pages; - int64_t rec_pages_eviction; - int64_t rec_page_delete; - int64_t session_compact; - int64_t txn_update_conflict; + int64_t bloom_false_positive; + int64_t bloom_hit; + int64_t bloom_miss; + int64_t bloom_page_evict; + int64_t bloom_page_read; + int64_t bloom_count; + int64_t lsm_chunk_count; + int64_t lsm_generation_max; + int64_t lsm_lookup_no_bloom; + int64_t lsm_checkpoint_throttle; + int64_t lsm_merge_throttle; + int64_t bloom_size; + int64_t block_extension; + int64_t block_alloc; + int64_t block_free; + int64_t block_checkpoint_size; + int64_t allocation_size; + int64_t block_reuse_bytes; + int64_t block_magic; + int64_t block_major; + int64_t block_size; + int64_t block_minor; + int64_t btree_checkpoint_generation; + int64_t btree_column_fix; + int64_t btree_column_internal; + int64_t btree_column_rle; + int64_t btree_column_deleted; + int64_t btree_column_variable; + int64_t btree_fixed_len; + int64_t btree_maxintlkey; + int64_t btree_maxintlpage; + int64_t btree_maxleafkey; + int64_t btree_maxleafpage; + int64_t btree_maxleafvalue; + int64_t btree_maximum_depth; + int64_t btree_entries; + int64_t btree_overflow; + int64_t btree_compact_rewrite; + int64_t btree_row_empty_values; + int64_t btree_row_internal; + int64_t btree_row_leaf; + int64_t cache_bytes_inuse; + int64_t cache_bytes_dirty_total; + int64_t cache_bytes_read; + int64_t cache_bytes_write; + int64_t cache_eviction_checkpoint; + int64_t cache_eviction_fail; + int64_t cache_eviction_walk_passes; + int64_t cache_eviction_target_page_lt10; + int64_t cache_eviction_target_page_lt32; + int64_t cache_eviction_target_page_ge128; + int64_t cache_eviction_target_page_lt64; + int64_t cache_eviction_target_page_lt128; + int64_t cache_eviction_walks_abandoned; + int64_t cache_eviction_walks_stopped; + int64_t cache_eviction_walks_gave_up_no_targets; + int64_t cache_eviction_walks_gave_up_ratio; + int64_t cache_eviction_walks_ended; + int64_t cache_eviction_walk_from_root; + int64_t cache_eviction_walk_saved_pos; + int64_t cache_eviction_hazard; + int64_t cache_inmem_splittable; + int64_t cache_inmem_split; + int64_t cache_eviction_internal; + int64_t cache_eviction_split_internal; + int64_t cache_eviction_split_leaf; + int64_t cache_eviction_dirty; + int64_t cache_read_overflow; + int64_t cache_eviction_deepen; + int64_t cache_write_lookaside; + int64_t cache_read; + int64_t cache_read_deleted; + int64_t cache_read_deleted_prepared; + int64_t cache_read_lookaside; + int64_t cache_pages_requested; + int64_t cache_eviction_pages_seen; + int64_t cache_write; + int64_t cache_write_restore; + int64_t cache_bytes_dirty; + int64_t cache_eviction_clean; + int64_t cache_state_gen_avg_gap; + int64_t cache_state_avg_written_size; + int64_t cache_state_avg_visited_age; + int64_t cache_state_avg_unvisited_age; + int64_t cache_state_pages_clean; + int64_t cache_state_gen_current; + int64_t cache_state_pages_dirty; + int64_t cache_state_root_entries; + int64_t cache_state_pages_internal; + int64_t cache_state_pages_leaf; + int64_t cache_state_gen_max_gap; + int64_t cache_state_max_pagesize; + int64_t cache_state_min_written_size; + int64_t cache_state_unvisited_count; + int64_t cache_state_smaller_alloc_size; + int64_t cache_state_memory; + int64_t cache_state_queued; + int64_t cache_state_not_queueable; + int64_t cache_state_refs_skipped; + int64_t cache_state_root_size; + int64_t cache_state_pages; + int64_t compress_precomp_intl_max_page_size; + int64_t compress_precomp_leaf_max_page_size; + int64_t compress_read; + int64_t compress_write; + int64_t compress_write_fail; + int64_t compress_write_too_small; + int64_t cursor_insert_bulk; + int64_t cursor_reopen; + int64_t cursor_cache; + int64_t cursor_create; + int64_t cursor_insert; + int64_t cursor_insert_bytes; + int64_t cursor_modify; + int64_t cursor_modify_bytes; + int64_t cursor_modify_bytes_touch; + int64_t cursor_next; + int64_t cursor_open_count; + int64_t cursor_restart; + int64_t cursor_prev; + int64_t cursor_remove; + int64_t cursor_remove_bytes; + int64_t cursor_reserve; + int64_t cursor_reset; + int64_t cursor_search; + int64_t cursor_search_near; + int64_t cursor_truncate; + int64_t cursor_update; + int64_t cursor_update_bytes; + int64_t cursor_update_bytes_changed; + int64_t rec_dictionary; + int64_t rec_page_delete_fast; + int64_t rec_suffix_compression; + int64_t rec_multiblock_internal; + int64_t rec_overflow_key_internal; + int64_t rec_prefix_compression; + int64_t rec_multiblock_leaf; + int64_t rec_overflow_key_leaf; + int64_t rec_multiblock_max; + int64_t rec_overflow_value; + int64_t rec_page_match; + int64_t rec_pages; + int64_t rec_pages_eviction; + int64_t rec_page_delete; + int64_t session_compact; + int64_t txn_update_conflict; }; /* * Statistics entries for join cursors. */ -#define WT_JOIN_STATS_BASE 3000 +#define WT_JOIN_STATS_BASE 3000 struct __wt_join_stats { - int64_t main_access; - int64_t bloom_false_positive; - int64_t membership_check; - int64_t bloom_insert; - int64_t iterated; + int64_t main_access; + int64_t bloom_false_positive; + int64_t membership_check; + int64_t bloom_insert; + int64_t iterated; }; /* * Statistics entries for session. */ -#define WT_SESSION_STATS_BASE 4000 +#define WT_SESSION_STATS_BASE 4000 struct __wt_session_stats { - int64_t bytes_read; - int64_t bytes_write; - int64_t lock_dhandle_wait; - int64_t read_time; - int64_t write_time; - int64_t lock_schema_wait; - int64_t cache_time; + int64_t bytes_read; + int64_t bytes_write; + int64_t lock_dhandle_wait; + int64_t read_time; + int64_t write_time; + int64_t lock_schema_wait; + int64_t cache_time; }; /* Statistics section: END */ diff --git a/src/third_party/wiredtiger/src/include/swap.h b/src/third_party/wiredtiger/src/include/swap.h index 30cdf0d08d2..d5129add260 100644 --- a/src/third_party/wiredtiger/src/include/swap.h +++ b/src/third_party/wiredtiger/src/include/swap.h @@ -8,86 +8,73 @@ #if defined(_MSC_VER) && (_MSC_VER >= 1300) #include <stdlib.h> -#define __wt_bswap16(v) _byteswap_ushort(v) -#define __wt_bswap32(v) _byteswap_ulong(v) -#define __wt_bswap64(v) _byteswap_uint64(v) -#elif defined(__clang__) && \ - defined(__clang_major__) && defined(__clang_minor__) && \ - (__clang_major__ >= 3) && (__clang_minor__ >= 1) +#define __wt_bswap16(v) _byteswap_ushort(v) +#define __wt_bswap32(v) _byteswap_ulong(v) +#define __wt_bswap64(v) _byteswap_uint64(v) +#elif defined(__clang__) && defined(__clang_major__) && defined(__clang_minor__) && \ + (__clang_major__ >= 3) && (__clang_minor__ >= 1) #if __has_builtin(__builtin_bswap16) -#define __wt_bswap16(v) __builtin_bswap16(v) +#define __wt_bswap16(v) __builtin_bswap16(v) #endif #if __has_builtin(__builtin_bswap32) -#define __wt_bswap32(v) __builtin_bswap32(v) +#define __wt_bswap32(v) __builtin_bswap32(v) #endif #if __has_builtin(__builtin_bswap64) -#define __wt_bswap64(v) __builtin_bswap64(v) +#define __wt_bswap64(v) __builtin_bswap64(v) #endif #elif defined(__GNUC__) && (__GNUC__ >= 4) #if __GNUC__ >= 4 && defined(__GNUC_MINOR__) && __GNUC_MINOR__ >= 3 -#define __wt_bswap32(v) __builtin_bswap32(v) -#define __wt_bswap64(v) __builtin_bswap64(v) +#define __wt_bswap32(v) __builtin_bswap32(v) +#define __wt_bswap64(v) __builtin_bswap64(v) #endif #if __GNUC__ >= 4 && defined(__GNUC_MINOR__) && __GNUC_MINOR__ >= 8 -#define __wt_bswap16(v) __builtin_bswap16(v) +#define __wt_bswap16(v) __builtin_bswap16(v) #endif #elif defined(__sun) #include <sys/byteorder.h> -#define __wt_bswap16(v) BSWAP_16(v) -#define __wt_bswap32(v) BSWAP_32(v) -#define __wt_bswap64(v) BSWAP_64(v) +#define __wt_bswap16(v) BSWAP_16(v) +#define __wt_bswap32(v) BSWAP_32(v) +#define __wt_bswap64(v) BSWAP_64(v) #endif #if !defined(__wt_bswap64) /* * __wt_bswap64 -- - * 64-bit unsigned little-endian to/from big-endian value. + * 64-bit unsigned little-endian to/from big-endian value. */ static inline uint64_t __wt_bswap64(uint64_t v) { - return ( - /* NOLINTNEXTLINE(misc-redundant-expression) */ - ((v << 56) & 0xff00000000000000UL) | - ((v << 40) & 0x00ff000000000000UL) | - ((v << 24) & 0x0000ff0000000000UL) | - ((v << 8) & 0x000000ff00000000UL) | - ((v >> 8) & 0x00000000ff000000UL) | - ((v >> 24) & 0x0000000000ff0000UL) | - ((v >> 40) & 0x000000000000ff00UL) | - ((v >> 56) & 0x00000000000000ffUL) - ); + return ( + /* NOLINTNEXTLINE(misc-redundant-expression) */ + ((v << 56) & 0xff00000000000000UL) | ((v << 40) & 0x00ff000000000000UL) | + ((v << 24) & 0x0000ff0000000000UL) | ((v << 8) & 0x000000ff00000000UL) | + ((v >> 8) & 0x00000000ff000000UL) | ((v >> 24) & 0x0000000000ff0000UL) | + ((v >> 40) & 0x000000000000ff00UL) | ((v >> 56) & 0x00000000000000ffUL)); } #endif #if !defined(__wt_bswap32) /* * __wt_bswap32 -- - * 32-bit unsigned little-endian to/from big-endian value. + * 32-bit unsigned little-endian to/from big-endian value. */ static inline uint32_t __wt_bswap32(uint32_t v) { - return ( - ((v << 24) & 0xff000000) | - ((v << 8) & 0x00ff0000) | - ((v >> 8) & 0x0000ff00) | - ((v >> 24) & 0x000000ff) - ); + return (((v << 24) & 0xff000000) | ((v << 8) & 0x00ff0000) | ((v >> 8) & 0x0000ff00) | + ((v >> 24) & 0x000000ff)); } #endif #if !defined(__wt_bswap16) /* * __wt_bswap16 -- - * 16-bit unsigned little-endian to/from big-endian value. + * 16-bit unsigned little-endian to/from big-endian value. */ static inline uint16_t __wt_bswap16(uint16_t v) { - return ( - ((v << 8) & 0xff00) | - ((v >> 8) & 0x00ff) - ); + return (((v << 8) & 0xff00) | ((v >> 8) & 0x00ff)); } #endif diff --git a/src/third_party/wiredtiger/src/include/thread_group.h b/src/third_party/wiredtiger/src/include/thread_group.h index e14d7afd999..f828b44daf4 100644 --- a/src/third_party/wiredtiger/src/include/thread_group.h +++ b/src/third_party/wiredtiger/src/include/thread_group.h @@ -6,42 +6,41 @@ * See the file LICENSE for redistribution information. */ -#define WT_THREAD_PAUSE 10 /* Thread pause timeout in seconds */ +#define WT_THREAD_PAUSE 10 /* Thread pause timeout in seconds */ /* * WT_THREAD -- * Encapsulation of a thread that belongs to a thread group. */ struct __wt_thread { - WT_SESSION_IMPL *session; - u_int id; - wt_thread_t tid; + WT_SESSION_IMPL *session; + u_int id; + wt_thread_t tid; - /* - * WT_THREAD and thread-group function flags, merged because - * WT_THREAD_PANIC_FAIL appears in both groups. - */ +/* + * WT_THREAD and thread-group function flags, merged because WT_THREAD_PANIC_FAIL appears in both + * groups. + */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_THREAD_ACTIVE 0x01u /* thread is active or paused */ -#define WT_THREAD_CAN_WAIT 0x02u /* WT_SESSION_CAN_WAIT */ -#define WT_THREAD_LOOKASIDE 0x04u /* open lookaside cursor */ -#define WT_THREAD_PANIC_FAIL 0x08u /* panic if the thread fails */ -#define WT_THREAD_RUN 0x10u /* thread is running */ -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_THREAD_ACTIVE 0x01u /* thread is active or paused */ +#define WT_THREAD_CAN_WAIT 0x02u /* WT_SESSION_CAN_WAIT */ +#define WT_THREAD_LOOKASIDE 0x04u /* open lookaside cursor */ +#define WT_THREAD_PANIC_FAIL 0x08u /* panic if the thread fails */ +#define WT_THREAD_RUN 0x10u /* thread is running */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; - /* - * Condition signalled when a thread becomes active. Paused - * threads wait on this condition. - */ - WT_CONDVAR *pause_cond; + /* + * Condition signalled when a thread becomes active. Paused threads wait on this condition. + */ + WT_CONDVAR *pause_cond; - /* The check function used by all threads. */ - bool (*chk_func)(WT_SESSION_IMPL *session); - /* The runner function used by all threads. */ - int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context); - /* The stop function used by all threads. */ - int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context); + /* The check function used by all threads. */ + bool (*chk_func)(WT_SESSION_IMPL *session); + /* The runner function used by all threads. */ + int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context); + /* The stop function used by all threads. */ + int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context); }; /* @@ -49,34 +48,33 @@ struct __wt_thread { * Encapsulation of a group of utility threads. */ struct __wt_thread_group { - uint32_t alloc; /* Size of allocated group */ - uint32_t max; /* Max threads in group */ - uint32_t min; /* Min threads in group */ - uint32_t current_threads;/* Number of active threads */ + uint32_t alloc; /* Size of allocated group */ + uint32_t max; /* Max threads in group */ + uint32_t min; /* Min threads in group */ + uint32_t current_threads; /* Number of active threads */ - const char *name; /* Name */ + const char *name; /* Name */ - WT_RWLOCK lock; /* Protects group changes */ + WT_RWLOCK lock; /* Protects group changes */ - /* - * Condition signalled when wanting to wake up threads that are - * part of the group - for example when shutting down. This condition - * can also be used by group owners to ensure state changes are noticed. - */ - WT_CONDVAR *wait_cond; + /* + * Condition signalled when wanting to wake up threads that are part of the group - for example + * when shutting down. This condition can also be used by group owners to ensure state changes + * are noticed. + */ + WT_CONDVAR *wait_cond; - /* - * The threads need to be held in an array of arrays, not an array of - * structures because the array is reallocated as it grows, which - * causes threads to loose track of their context is realloc moves the - * memory. - */ - WT_THREAD **threads; + /* + * The threads need to be held in an array of arrays, not an array of structures because the + * array is reallocated as it grows, which causes threads to loose track of their context is + * realloc moves the memory. + */ + WT_THREAD **threads; - /* The check function used by all threads. */ - bool (*chk_func)(WT_SESSION_IMPL *session); - /* The runner function used by all threads. */ - int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context); - /* The stop function used by all threads. May be NULL */ - int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context); + /* The check function used by all threads. */ + bool (*chk_func)(WT_SESSION_IMPL *session); + /* The runner function used by all threads. */ + int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context); + /* The stop function used by all threads. May be NULL */ + int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context); }; diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index 281249d64b7..e67f680b076 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -6,34 +6,34 @@ * See the file LICENSE for redistribution information. */ -#define WT_TXN_NONE 0 /* Beginning of time */ -#define WT_TXN_FIRST 1 /* First transaction to run */ -#define WT_TXN_MAX (UINT64_MAX - 10) /* End of time */ -#define WT_TXN_ABORTED UINT64_MAX /* Update rolled back */ +#define WT_TXN_NONE 0 /* Beginning of time */ +#define WT_TXN_FIRST 1 /* First transaction to run */ +#define WT_TXN_MAX (UINT64_MAX - 10) /* End of time */ +#define WT_TXN_ABORTED UINT64_MAX /* Update rolled back */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_TXN_LOG_CKPT_CLEANUP 0x01u -#define WT_TXN_LOG_CKPT_PREPARE 0x02u -#define WT_TXN_LOG_CKPT_START 0x04u -#define WT_TXN_LOG_CKPT_STOP 0x08u -#define WT_TXN_LOG_CKPT_SYNC 0x10u +#define WT_TXN_LOG_CKPT_CLEANUP 0x01u +#define WT_TXN_LOG_CKPT_PREPARE 0x02u +#define WT_TXN_LOG_CKPT_START 0x04u +#define WT_TXN_LOG_CKPT_STOP 0x08u +#define WT_TXN_LOG_CKPT_SYNC 0x10u /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_TXN_OLDEST_STRICT 0x1u -#define WT_TXN_OLDEST_WAIT 0x2u +#define WT_TXN_OLDEST_STRICT 0x1u +#define WT_TXN_OLDEST_WAIT 0x2u /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_TXN_TS_ALREADY_LOCKED 0x1u -#define WT_TXN_TS_INCLUDE_CKPT 0x2u -#define WT_TXN_TS_INCLUDE_OLDEST 0x4u +#define WT_TXN_TS_ALREADY_LOCKED 0x1u +#define WT_TXN_TS_INCLUDE_CKPT 0x2u +#define WT_TXN_TS_INCLUDE_OLDEST 0x4u /* AUTOMATIC FLAG VALUE GENERATION STOP */ typedef enum { - WT_VISIBLE_FALSE=0, /* Not a visible update */ - WT_VISIBLE_PREPARE=1, /* Prepared update */ - WT_VISIBLE_TRUE=2 /* A visible update */ + WT_VISIBLE_FALSE = 0, /* Not a visible update */ + WT_VISIBLE_PREPARE = 1, /* Prepared update */ + WT_VISIBLE_TRUE = 2 /* A visible update */ } WT_VISIBLE_TYPE; /* @@ -43,19 +43,16 @@ typedef enum { * transaction), WT_TXN_NONE is smaller than any possible ID (visible to all * running transactions). */ -#define WT_TXNID_LE(t1, t2) \ - ((t1) <= (t2)) +#define WT_TXNID_LE(t1, t2) ((t1) <= (t2)) -#define WT_TXNID_LT(t1, t2) \ - ((t1) < (t2)) +#define WT_TXNID_LT(t1, t2) ((t1) < (t2)) -#define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id]) +#define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id]) -#define WT_SESSION_IS_CHECKPOINT(s) \ - ((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id) +#define WT_SESSION_IS_CHECKPOINT(s) ((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id) -#define WT_TS_NONE 0 /* Beginning of time */ -#define WT_TS_MAX UINT64_MAX /* End of time */ +#define WT_TS_NONE 0 /* Beginning of time */ +#define WT_TS_MAX UINT64_MAX /* End of time */ /* * We format timestamps in a couple of ways, declare appropriate sized buffers. @@ -64,8 +61,8 @@ typedef enum { * 2x the maximum digits from a 4B unsigned integer + 3. Both sizes include a * trailing nul byte as well. */ -#define WT_TS_HEX_STRING_SIZE (2 * sizeof(wt_timestamp_t) + 1) -#define WT_TS_INT_STRING_SIZE (2 * 10 + 3 + 1) +#define WT_TS_HEX_STRING_SIZE (2 * sizeof(wt_timestamp_t) + 1) +#define WT_TS_INT_STRING_SIZE (2 * 10 + 3 + 1) /* * Perform an operation at the specified isolation level. @@ -75,123 +72,123 @@ typedef enum { * snap_min forwards (or updates we need could be freed while this operation is * in progress). Check for those cases: the bugs they cause are hard to debug. */ -#define WT_WITH_TXN_ISOLATION(s, iso, op) do { \ - WT_TXN_ISOLATION saved_iso = (s)->isolation; \ - WT_TXN_ISOLATION saved_txn_iso = (s)->txn.isolation; \ - WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(s); \ - WT_TXN_STATE saved_state = *txn_state; \ - (s)->txn.forced_iso++; \ - (s)->isolation = (s)->txn.isolation = (iso); \ - op; \ - (s)->isolation = saved_iso; \ - (s)->txn.isolation = saved_txn_iso; \ - WT_ASSERT((s), (s)->txn.forced_iso > 0); \ - (s)->txn.forced_iso--; \ - WT_ASSERT((s), txn_state->id == saved_state.id && \ - (txn_state->metadata_pinned == saved_state.metadata_pinned ||\ - saved_state.metadata_pinned == WT_TXN_NONE) && \ - (txn_state->pinned_id == saved_state.pinned_id || \ - saved_state.pinned_id == WT_TXN_NONE)); \ - txn_state->metadata_pinned = saved_state.metadata_pinned; \ - txn_state->pinned_id = saved_state.pinned_id; \ -} while (0) +#define WT_WITH_TXN_ISOLATION(s, iso, op) \ + do { \ + WT_TXN_ISOLATION saved_iso = (s)->isolation; \ + WT_TXN_ISOLATION saved_txn_iso = (s)->txn.isolation; \ + WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(s); \ + WT_TXN_STATE saved_state = *txn_state; \ + (s)->txn.forced_iso++; \ + (s)->isolation = (s)->txn.isolation = (iso); \ + op; \ + (s)->isolation = saved_iso; \ + (s)->txn.isolation = saved_txn_iso; \ + WT_ASSERT((s), (s)->txn.forced_iso > 0); \ + (s)->txn.forced_iso--; \ + WT_ASSERT((s), txn_state->id == saved_state.id && \ + (txn_state->metadata_pinned == saved_state.metadata_pinned || \ + saved_state.metadata_pinned == WT_TXN_NONE) && \ + (txn_state->pinned_id == saved_state.pinned_id || \ + saved_state.pinned_id == WT_TXN_NONE)); \ + txn_state->metadata_pinned = saved_state.metadata_pinned; \ + txn_state->pinned_id = saved_state.pinned_id; \ + } while (0) struct __wt_named_snapshot { - const char *name; + const char *name; - TAILQ_ENTRY(__wt_named_snapshot) q; + TAILQ_ENTRY(__wt_named_snapshot) q; - uint64_t id, pinned_id, snap_min, snap_max; - uint64_t *snapshot; - uint32_t snapshot_count; + uint64_t id, pinned_id, snap_min, snap_max; + uint64_t *snapshot; + uint32_t snapshot_count; }; struct __wt_txn_state { - WT_CACHE_LINE_PAD_BEGIN - volatile uint64_t id; - volatile uint64_t pinned_id; - volatile uint64_t metadata_pinned; - volatile bool is_allocating; + WT_CACHE_LINE_PAD_BEGIN + volatile uint64_t id; + volatile uint64_t pinned_id; + volatile uint64_t metadata_pinned; + volatile bool is_allocating; - WT_CACHE_LINE_PAD_END + WT_CACHE_LINE_PAD_END }; struct __wt_txn_global { - volatile uint64_t current; /* Current transaction ID. */ - - /* The oldest running transaction ID (may race). */ - volatile uint64_t last_running; - - /* - * The oldest transaction ID that is not yet visible to some - * transaction in the system. - */ - volatile uint64_t oldest_id; - - wt_timestamp_t durable_timestamp; - wt_timestamp_t last_ckpt_timestamp; - wt_timestamp_t meta_ckpt_timestamp; - wt_timestamp_t oldest_timestamp; - wt_timestamp_t pinned_timestamp; - wt_timestamp_t recovery_timestamp; - wt_timestamp_t stable_timestamp; - bool has_durable_timestamp; - bool has_oldest_timestamp; - bool has_pinned_timestamp; - bool has_stable_timestamp; - bool oldest_is_pinned; - bool stable_is_pinned; - - WT_SPINLOCK id_lock; - - /* Protects the active transaction states. */ - WT_RWLOCK rwlock; - - /* Protects logging, checkpoints and transaction visibility. */ - WT_RWLOCK visibility_rwlock; - - /* List of transactions sorted by durable timestamp. */ - WT_RWLOCK durable_timestamp_rwlock; - TAILQ_HEAD(__wt_txn_dts_qh, __wt_txn) durable_timestamph; - uint32_t durable_timestampq_len; - - /* List of transactions sorted by read timestamp. */ - WT_RWLOCK read_timestamp_rwlock; - TAILQ_HEAD(__wt_txn_rts_qh, __wt_txn) read_timestamph; - uint32_t read_timestampq_len; - - /* - * Track information about the running checkpoint. The transaction - * snapshot used when checkpointing are special. Checkpoints can run - * for a long time so we keep them out of regular visibility checks. - * Eviction and checkpoint operations know when they need to be aware - * of checkpoint transactions. - * - * We rely on the fact that (a) the only table a checkpoint updates is - * the metadata; and (b) once checkpoint has finished reading a table, - * it won't revisit it. - */ - volatile bool checkpoint_running; /* Checkpoint running */ - volatile uint32_t checkpoint_id; /* Checkpoint's session ID */ - WT_TXN_STATE checkpoint_state; /* Checkpoint's txn state */ - wt_timestamp_t checkpoint_timestamp; /* Checkpoint's timestamp */ - - volatile uint64_t debug_ops; /* Debug mode op counter */ - uint64_t debug_rollback; /* Debug mode rollback */ - volatile uint64_t metadata_pinned; /* Oldest ID for metadata */ - - /* Named snapshot state. */ - WT_RWLOCK nsnap_rwlock; - volatile uint64_t nsnap_oldest_id; - TAILQ_HEAD(__wt_nsnap_qh, __wt_named_snapshot) nsnaph; - - WT_TXN_STATE *states; /* Per-session transaction states */ + volatile uint64_t current; /* Current transaction ID. */ + + /* The oldest running transaction ID (may race). */ + volatile uint64_t last_running; + + /* + * The oldest transaction ID that is not yet visible to some transaction in the system. + */ + volatile uint64_t oldest_id; + + wt_timestamp_t durable_timestamp; + wt_timestamp_t last_ckpt_timestamp; + wt_timestamp_t meta_ckpt_timestamp; + wt_timestamp_t oldest_timestamp; + wt_timestamp_t pinned_timestamp; + wt_timestamp_t recovery_timestamp; + wt_timestamp_t stable_timestamp; + bool has_durable_timestamp; + bool has_oldest_timestamp; + bool has_pinned_timestamp; + bool has_stable_timestamp; + bool oldest_is_pinned; + bool stable_is_pinned; + + WT_SPINLOCK id_lock; + + /* Protects the active transaction states. */ + WT_RWLOCK rwlock; + + /* Protects logging, checkpoints and transaction visibility. */ + WT_RWLOCK visibility_rwlock; + + /* List of transactions sorted by durable timestamp. */ + WT_RWLOCK durable_timestamp_rwlock; + TAILQ_HEAD(__wt_txn_dts_qh, __wt_txn) durable_timestamph; + uint32_t durable_timestampq_len; + + /* List of transactions sorted by read timestamp. */ + WT_RWLOCK read_timestamp_rwlock; + TAILQ_HEAD(__wt_txn_rts_qh, __wt_txn) read_timestamph; + uint32_t read_timestampq_len; + + /* + * Track information about the running checkpoint. The transaction + * snapshot used when checkpointing are special. Checkpoints can run + * for a long time so we keep them out of regular visibility checks. + * Eviction and checkpoint operations know when they need to be aware + * of checkpoint transactions. + * + * We rely on the fact that (a) the only table a checkpoint updates is + * the metadata; and (b) once checkpoint has finished reading a table, + * it won't revisit it. + */ + volatile bool checkpoint_running; /* Checkpoint running */ + volatile uint32_t checkpoint_id; /* Checkpoint's session ID */ + WT_TXN_STATE checkpoint_state; /* Checkpoint's txn state */ + wt_timestamp_t checkpoint_timestamp; /* Checkpoint's timestamp */ + + volatile uint64_t debug_ops; /* Debug mode op counter */ + uint64_t debug_rollback; /* Debug mode rollback */ + volatile uint64_t metadata_pinned; /* Oldest ID for metadata */ + + /* Named snapshot state. */ + WT_RWLOCK nsnap_rwlock; + volatile uint64_t nsnap_oldest_id; + TAILQ_HEAD(__wt_nsnap_qh, __wt_named_snapshot) nsnaph; + + WT_TXN_STATE *states; /* Per-session transaction states */ }; typedef enum __wt_txn_isolation { - WT_ISO_READ_COMMITTED, - WT_ISO_READ_UNCOMMITTED, - WT_ISO_SNAPSHOT + WT_ISO_READ_COMMITTED, + WT_ISO_READ_UNCOMMITTED, + WT_ISO_SNAPSHOT } WT_TXN_ISOLATION; /* @@ -201,59 +198,58 @@ typedef enum __wt_txn_isolation { * records during commit or undo the operations during rollback. */ struct __wt_txn_op { - WT_BTREE *btree; - enum { - WT_TXN_OP_NONE=0, - WT_TXN_OP_BASIC_COL, - WT_TXN_OP_BASIC_ROW, - WT_TXN_OP_INMEM_COL, - WT_TXN_OP_INMEM_ROW, - WT_TXN_OP_REF_DELETE, - WT_TXN_OP_TRUNCATE_COL, - WT_TXN_OP_TRUNCATE_ROW - } type; - union { - /* WT_TXN_OP_BASIC_ROW, WT_TXN_OP_INMEM_ROW */ - struct { - WT_UPDATE *upd; - WT_ITEM key; - } op_row; - - /* WT_TXN_OP_BASIC_COL, WT_TXN_OP_INMEM_COL */ - struct { - WT_UPDATE *upd; - uint64_t recno; - } op_col; + WT_BTREE *btree; + enum { + WT_TXN_OP_NONE = 0, + WT_TXN_OP_BASIC_COL, + WT_TXN_OP_BASIC_ROW, + WT_TXN_OP_INMEM_COL, + WT_TXN_OP_INMEM_ROW, + WT_TXN_OP_REF_DELETE, + WT_TXN_OP_TRUNCATE_COL, + WT_TXN_OP_TRUNCATE_ROW + } type; + union { + /* WT_TXN_OP_BASIC_ROW, WT_TXN_OP_INMEM_ROW */ + struct { + WT_UPDATE *upd; + WT_ITEM key; + } op_row; + + /* WT_TXN_OP_BASIC_COL, WT_TXN_OP_INMEM_COL */ + struct { + WT_UPDATE *upd; + uint64_t recno; + } op_col; /* - * upd is pointing to same memory in both op_row and op_col, so for simplicity - * just chose op_row upd + * upd is pointing to same memory in both op_row and op_col, so for simplicity just chose op_row upd */ #undef op_upd -#define op_upd op_row.upd - - /* WT_TXN_OP_REF_DELETE */ - WT_REF *ref; - /* WT_TXN_OP_TRUNCATE_COL */ - struct { - uint64_t start, stop; - } truncate_col; - /* WT_TXN_OP_TRUNCATE_ROW */ - struct { - WT_ITEM start, stop; - enum { - WT_TXN_TRUNC_ALL, - WT_TXN_TRUNC_BOTH, - WT_TXN_TRUNC_START, - WT_TXN_TRUNC_STOP - } mode; - } truncate_row; - } u; +#define op_upd op_row.upd + + /* WT_TXN_OP_REF_DELETE */ + WT_REF *ref; + /* WT_TXN_OP_TRUNCATE_COL */ + struct { + uint64_t start, stop; + } truncate_col; + /* WT_TXN_OP_TRUNCATE_ROW */ + struct { + WT_ITEM start, stop; + enum { + WT_TXN_TRUNC_ALL, + WT_TXN_TRUNC_BOTH, + WT_TXN_TRUNC_START, + WT_TXN_TRUNC_STOP + } mode; + } truncate_row; + } u; /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_TXN_OP_KEY_REPEATED 0x1u -#define WT_TXN_OP_KEY_RESERVED 0x2u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_TXN_OP_KEY_REPEATED 0x1u +#define WT_TXN_OP_KEY_RESERVED 0x2u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; /* @@ -261,77 +257,75 @@ struct __wt_txn_op { * Per-session transaction context. */ struct __wt_txn { - uint64_t id; - - WT_TXN_ISOLATION isolation; - - uint32_t forced_iso; /* Isolation is currently forced. */ - - /* - * Snapshot data: - * ids < snap_min are visible, - * ids > snap_max are invisible, - * everything else is visible unless it is in the snapshot. - */ - uint64_t snap_min, snap_max; - uint64_t *snapshot; - uint32_t snapshot_count; - uint32_t txn_logsync; /* Log sync configuration */ - - /* - * Timestamp copied into updates created by this transaction. - * - * In some use cases, this can be updated while the transaction is - * running. - */ - wt_timestamp_t commit_timestamp; - - /* - * Durable timestamp copied into updates created by this transaction. - * It is used to decide whether to consider this update to be persisted - * or not by stable checkpoint. - */ - wt_timestamp_t durable_timestamp; - - /* - * Set to the first commit timestamp used in the transaction and fixed - * while the transaction is on the public list of committed timestamps. - */ - wt_timestamp_t first_commit_timestamp; - - /* - * Timestamp copied into updates created by this transaction, when this - * transaction is prepared. - */ - wt_timestamp_t prepare_timestamp; - - /* Read updates committed as of this timestamp. */ - wt_timestamp_t read_timestamp; - - TAILQ_ENTRY(__wt_txn) durable_timestampq; - TAILQ_ENTRY(__wt_txn) read_timestampq; - /* Set if need to clear from the durable queue */ - bool clear_durable_q; - bool clear_read_q; /* Set if need to clear from the read queue */ - - /* Array of modifications by this transaction. */ - WT_TXN_OP *mod; - size_t mod_alloc; - u_int mod_count; - - /* Scratch buffer for in-memory log records. */ - WT_ITEM *logrec; - - /* Requested notification when transactions are resolved. */ - WT_TXN_NOTIFY *notify; - - /* Checkpoint status. */ - WT_LSN ckpt_lsn; - uint32_t ckpt_nsnapshot; - WT_ITEM *ckpt_snapshot; - bool full_ckpt; - - const char *rollback_reason; /* If rollback, the reason */ + uint64_t id; + + WT_TXN_ISOLATION isolation; + + uint32_t forced_iso; /* Isolation is currently forced. */ + + /* + * Snapshot data: + * ids < snap_min are visible, + * ids > snap_max are invisible, + * everything else is visible unless it is in the snapshot. + */ + uint64_t snap_min, snap_max; + uint64_t *snapshot; + uint32_t snapshot_count; + uint32_t txn_logsync; /* Log sync configuration */ + + /* + * Timestamp copied into updates created by this transaction. + * + * In some use cases, this can be updated while the transaction is + * running. + */ + wt_timestamp_t commit_timestamp; + + /* + * Durable timestamp copied into updates created by this transaction. It is used to decide + * whether to consider this update to be persisted or not by stable checkpoint. + */ + wt_timestamp_t durable_timestamp; + + /* + * Set to the first commit timestamp used in the transaction and fixed while the transaction is + * on the public list of committed timestamps. + */ + wt_timestamp_t first_commit_timestamp; + + /* + * Timestamp copied into updates created by this transaction, when this transaction is prepared. + */ + wt_timestamp_t prepare_timestamp; + + /* Read updates committed as of this timestamp. */ + wt_timestamp_t read_timestamp; + + TAILQ_ENTRY(__wt_txn) durable_timestampq; + TAILQ_ENTRY(__wt_txn) read_timestampq; + /* Set if need to clear from the durable queue */ + bool clear_durable_q; + bool clear_read_q; /* Set if need to clear from the read queue */ + + /* Array of modifications by this transaction. */ + WT_TXN_OP *mod; + size_t mod_alloc; + u_int mod_count; + + /* Scratch buffer for in-memory log records. */ + WT_ITEM *logrec; + + /* Requested notification when transactions are resolved. */ + WT_TXN_NOTIFY *notify; + + /* Checkpoint status. */ + WT_LSN ckpt_lsn; + uint32_t ckpt_nsnapshot; + WT_ITEM *ckpt_snapshot; + bool full_ckpt; + + const char *rollback_reason; /* If rollback, the reason */ /* * WT_TXN_HAS_TS_COMMIT -- @@ -346,31 +340,31 @@ struct __wt_txn { */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_TXN_AUTOCOMMIT 0x0000001u -#define WT_TXN_ERROR 0x0000002u -#define WT_TXN_HAS_ID 0x0000004u -#define WT_TXN_HAS_SNAPSHOT 0x0000008u -#define WT_TXN_HAS_TS_COMMIT 0x0000010u -#define WT_TXN_HAS_TS_DURABLE 0x0000020u -#define WT_TXN_HAS_TS_PREPARE 0x0000040u -#define WT_TXN_HAS_TS_READ 0x0000080u -#define WT_TXN_IGNORE_PREPARE 0x0000100u -#define WT_TXN_NAMED_SNAPSHOT 0x0000200u -#define WT_TXN_PREPARE 0x0000400u -#define WT_TXN_PUBLIC_TS_READ 0x0000800u -#define WT_TXN_READONLY 0x0001000u -#define WT_TXN_RUNNING 0x0002000u -#define WT_TXN_SYNC_SET 0x0004000u -#define WT_TXN_TS_COMMIT_ALWAYS 0x0008000u -#define WT_TXN_TS_COMMIT_KEYS 0x0010000u -#define WT_TXN_TS_COMMIT_NEVER 0x0020000u -#define WT_TXN_TS_DURABLE_ALWAYS 0x0040000u -#define WT_TXN_TS_DURABLE_KEYS 0x0080000u -#define WT_TXN_TS_DURABLE_NEVER 0x0100000u -#define WT_TXN_TS_PUBLISHED 0x0200000u -#define WT_TXN_TS_ROUND_PREPARED 0x0400000u -#define WT_TXN_TS_ROUND_READ 0x0800000u -#define WT_TXN_UPDATE 0x1000000u -/* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint32_t flags; +#define WT_TXN_AUTOCOMMIT 0x0000001u +#define WT_TXN_ERROR 0x0000002u +#define WT_TXN_HAS_ID 0x0000004u +#define WT_TXN_HAS_SNAPSHOT 0x0000008u +#define WT_TXN_HAS_TS_COMMIT 0x0000010u +#define WT_TXN_HAS_TS_DURABLE 0x0000020u +#define WT_TXN_HAS_TS_PREPARE 0x0000040u +#define WT_TXN_HAS_TS_READ 0x0000080u +#define WT_TXN_IGNORE_PREPARE 0x0000100u +#define WT_TXN_NAMED_SNAPSHOT 0x0000200u +#define WT_TXN_PREPARE 0x0000400u +#define WT_TXN_PUBLIC_TS_READ 0x0000800u +#define WT_TXN_READONLY 0x0001000u +#define WT_TXN_RUNNING 0x0002000u +#define WT_TXN_SYNC_SET 0x0004000u +#define WT_TXN_TS_COMMIT_ALWAYS 0x0008000u +#define WT_TXN_TS_COMMIT_KEYS 0x0010000u +#define WT_TXN_TS_COMMIT_NEVER 0x0020000u +#define WT_TXN_TS_DURABLE_ALWAYS 0x0040000u +#define WT_TXN_TS_DURABLE_KEYS 0x0080000u +#define WT_TXN_TS_DURABLE_NEVER 0x0100000u +#define WT_TXN_TS_PUBLISHED 0x0200000u +#define WT_TXN_TS_ROUND_PREPARED 0x0400000u +#define WT_TXN_TS_ROUND_READ 0x0800000u +#define WT_TXN_UPDATE 0x1000000u + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index e9c6f7f8e9d..6ba337218cc 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -8,1258 +8,1202 @@ /* * __wt_ref_cas_state_int -- - * Try to do a compare and swap, if successful update the ref history in - * diagnostic mode. + * Try to do a compare and swap, if successful update the ref history in diagnostic mode. */ static inline bool -__wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref, - uint32_t old_state, uint32_t new_state, const char *func, int line) +__wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t old_state, + uint32_t new_state, const char *func, int line) { - bool cas_result; + bool cas_result; - /* Parameters that are used in a macro for diagnostic builds */ - WT_UNUSED(session); - WT_UNUSED(func); - WT_UNUSED(line); + /* Parameters that are used in a macro for diagnostic builds */ + WT_UNUSED(session); + WT_UNUSED(func); + WT_UNUSED(line); - cas_result = __wt_atomic_casv32(&ref->state, old_state, new_state); + cas_result = __wt_atomic_casv32(&ref->state, old_state, new_state); #ifdef HAVE_DIAGNOSTIC - /* - * The history update here has potential to race; if the state gets - * updated again after the CAS above but before the history has been - * updated. - */ - if (cas_result) - WT_REF_SAVE_STATE(ref, new_state, func, line); + /* + * The history update here has potential to race; if the state gets updated again after the CAS + * above but before the history has been updated. + */ + if (cas_result) + WT_REF_SAVE_STATE(ref, new_state, func, line); #endif - return (cas_result); + return (cas_result); } /* * __wt_txn_timestamp_flags -- - * Set transaction related timestamp flags. + * Set transaction related timestamp flags. */ static inline void __wt_txn_timestamp_flags(WT_SESSION_IMPL *session) { - WT_BTREE *btree; - - if (session->dhandle == NULL) - return; - btree = S2BT(session); - if (btree == NULL) - return; - if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS)) - F_SET(&session->txn, WT_TXN_TS_COMMIT_ALWAYS); - if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_KEYS)) - F_SET(&session->txn, WT_TXN_TS_COMMIT_KEYS); - if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER)) - F_SET(&session->txn, WT_TXN_TS_COMMIT_NEVER); - if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_ALWAYS)) - F_SET(&session->txn, WT_TXN_TS_DURABLE_ALWAYS); - if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_KEYS)) - F_SET(&session->txn, WT_TXN_TS_DURABLE_KEYS); - if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_NEVER)) - F_SET(&session->txn, WT_TXN_TS_DURABLE_NEVER); + WT_BTREE *btree; + + if (session->dhandle == NULL) + return; + btree = S2BT(session); + if (btree == NULL) + return; + if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS)) + F_SET(&session->txn, WT_TXN_TS_COMMIT_ALWAYS); + if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_KEYS)) + F_SET(&session->txn, WT_TXN_TS_COMMIT_KEYS); + if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER)) + F_SET(&session->txn, WT_TXN_TS_COMMIT_NEVER); + if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_ALWAYS)) + F_SET(&session->txn, WT_TXN_TS_DURABLE_ALWAYS); + if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_KEYS)) + F_SET(&session->txn, WT_TXN_TS_DURABLE_KEYS); + if (FLD_ISSET(btree->assert_flags, WT_ASSERT_DURABLE_TS_NEVER)) + F_SET(&session->txn, WT_TXN_TS_DURABLE_NEVER); } /* * __wt_txn_op_set_recno -- - * Set the latest transaction operation with the given recno. + * Set the latest transaction operation with the given recno. */ static inline void __wt_txn_op_set_recno(WT_SESSION_IMPL *session, uint64_t recno) { - WT_TXN *txn; - WT_TXN_OP *op; - - txn = &session->txn; - - WT_ASSERT(session, txn->mod_count > 0 && recno != WT_RECNO_OOB); - op = txn->mod + txn->mod_count - 1; - - if (WT_SESSION_IS_CHECKPOINT(session) || - F_ISSET(op->btree, WT_BTREE_LOOKASIDE) || - WT_IS_METADATA(op->btree->dhandle)) - return; - - WT_ASSERT(session, op->type == WT_TXN_OP_BASIC_COL || - op->type == WT_TXN_OP_INMEM_COL); - - /* - * Copy the recno into the transaction operation structure, so when - * update is evicted to lookaside, we have a chance of finding it - * again. Even though only prepared updates can be evicted, at this - * stage we don't know whether this transaction will be prepared or - * not, hence we are copying the key for all operations, so that we can - * use this key to fetch the update in case this transaction is - * prepared. - */ - op->u.op_col.recno = recno; + WT_TXN *txn; + WT_TXN_OP *op; + + txn = &session->txn; + + WT_ASSERT(session, txn->mod_count > 0 && recno != WT_RECNO_OOB); + op = txn->mod + txn->mod_count - 1; + + if (WT_SESSION_IS_CHECKPOINT(session) || F_ISSET(op->btree, WT_BTREE_LOOKASIDE) || + WT_IS_METADATA(op->btree->dhandle)) + return; + + WT_ASSERT(session, op->type == WT_TXN_OP_BASIC_COL || op->type == WT_TXN_OP_INMEM_COL); + + /* + * Copy the recno into the transaction operation structure, so when update is evicted to + * lookaside, we have a chance of finding it again. Even though only prepared updates can be + * evicted, at this stage we don't know whether this transaction will be prepared or not, hence + * we are copying the key for all operations, so that we can use this key to fetch the update in + * case this transaction is prepared. + */ + op->u.op_col.recno = recno; } /* * __wt_txn_op_set_key -- - * Set the latest transaction operation with the given key. + * Set the latest transaction operation with the given key. */ static inline int __wt_txn_op_set_key(WT_SESSION_IMPL *session, const WT_ITEM *key) { - WT_TXN *txn; - WT_TXN_OP *op; + WT_TXN *txn; + WT_TXN_OP *op; - txn = &session->txn; + txn = &session->txn; - WT_ASSERT(session, txn->mod_count > 0 && key->data != NULL); + WT_ASSERT(session, txn->mod_count > 0 && key->data != NULL); - op = txn->mod + txn->mod_count - 1; + op = txn->mod + txn->mod_count - 1; - if (WT_SESSION_IS_CHECKPOINT(session) || - F_ISSET(op->btree, WT_BTREE_LOOKASIDE) || - WT_IS_METADATA(op->btree->dhandle)) - return (0); + if (WT_SESSION_IS_CHECKPOINT(session) || F_ISSET(op->btree, WT_BTREE_LOOKASIDE) || + WT_IS_METADATA(op->btree->dhandle)) + return (0); - WT_ASSERT(session, op->type == WT_TXN_OP_BASIC_ROW || - op->type == WT_TXN_OP_INMEM_ROW); + WT_ASSERT(session, op->type == WT_TXN_OP_BASIC_ROW || op->type == WT_TXN_OP_INMEM_ROW); - /* - * Copy the key into the transaction operation structure, so when - * update is evicted to lookaside, we have a chance of finding it - * again. Even though only prepared updates can be evicted, at this - * stage we don't know whether this transaction will be prepared or - * not, hence we are copying the key for all operations, so that we can - * use this key to fetch the update in case this transaction is - * prepared. - */ - return (__wt_buf_set(session, &op->u.op_row.key, key->data, key->size)); + /* + * Copy the key into the transaction operation structure, so when update is evicted to + * lookaside, we have a chance of finding it again. Even though only prepared updates can be + * evicted, at this stage we don't know whether this transaction will be prepared or not, hence + * we are copying the key for all operations, so that we can use this key to fetch the update in + * case this transaction is prepared. + */ + return (__wt_buf_set(session, &op->u.op_row.key, key->data, key->size)); } /* * __txn_resolve_prepared_update -- - * Resolve a prepared update as committed update. + * Resolve a prepared update as committed update. */ static inline void __txn_resolve_prepared_update(WT_SESSION_IMPL *session, WT_UPDATE *upd) { - WT_TXN *txn; - - txn = &session->txn; - /* - * In case of a prepared transaction, the order of modification of the - * prepare timestamp to commit timestamp in the update chain will not - * affect the data visibility, a reader will encounter a prepared - * update resulting in prepare conflict. - * - * As updating timestamp might not be an atomic operation, we will - * manage using state. - */ - upd->prepare_state = WT_PREPARE_LOCKED; - WT_WRITE_BARRIER(); - upd->start_ts = txn->commit_timestamp; - upd->durable_ts = txn->durable_timestamp; - WT_PUBLISH(upd->prepare_state, WT_PREPARE_RESOLVED); + WT_TXN *txn; + + txn = &session->txn; + /* + * In case of a prepared transaction, the order of modification of the + * prepare timestamp to commit timestamp in the update chain will not + * affect the data visibility, a reader will encounter a prepared + * update resulting in prepare conflict. + * + * As updating timestamp might not be an atomic operation, we will + * manage using state. + */ + upd->prepare_state = WT_PREPARE_LOCKED; + WT_WRITE_BARRIER(); + upd->start_ts = txn->commit_timestamp; + upd->durable_ts = txn->durable_timestamp; + WT_PUBLISH(upd->prepare_state, WT_PREPARE_RESOLVED); } /* * __wt_txn_resolve_prepared_op -- - * Resolve a transaction's operations indirect references. - * - * In case of prepared transactions, the prepared updates could be evicted - * using cache overflow mechanism. Transaction operations referring to - * these prepared updates would be referring to them using indirect - * references (i.e keys/recnos), which need to be resolved as part of that - * transaction commit/rollback. - * - * If no updates are resolved throw an error. Increment resolved update - * count for each resolved update count we locate. + * Resolve a transaction's operations indirect references. In case of prepared transactions, the + * prepared updates could be evicted using cache overflow mechanism. Transaction operations + * referring to these prepared updates would be referring to them using indirect references (i.e + * keys/recnos), which need to be resolved as part of that transaction commit/rollback. If no + * updates are resolved throw an error. Increment resolved update count for each resolved update + * count we locate. */ static inline int __wt_txn_resolve_prepared_op( - WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, - int64_t *resolved_update_countp) + WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit, int64_t *resolved_update_countp) { - WT_CURSOR *cursor; - WT_DECL_RET; - WT_TXN *txn; - WT_UPDATE *upd; - const char *open_cursor_cfg[] = { - WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL }; - - txn = &session->txn; - - if (op->type == WT_TXN_OP_NONE || op->type == WT_TXN_OP_REF_DELETE || - op->type == WT_TXN_OP_TRUNCATE_COL || - op->type == WT_TXN_OP_TRUNCATE_ROW) - return (0); - - WT_RET(__wt_open_cursor(session, - op->btree->dhandle->name, NULL, open_cursor_cfg, &cursor)); - - /* - * Transaction prepare is cleared temporarily as cursor functions are - * not allowed for prepared transactions. - */ - F_CLR(txn, WT_TXN_PREPARE); - if (op->type == WT_TXN_OP_BASIC_ROW || - op->type == WT_TXN_OP_INMEM_ROW) - __wt_cursor_set_raw_key(cursor, &op->u.op_row.key); - else - ((WT_CURSOR_BTREE *)cursor)->iface.recno = - op->u.op_col.recno; - F_SET(txn, WT_TXN_PREPARE); - - WT_WITH_BTREE(session, - op->btree, ret = __wt_btcur_search_uncommitted( - (WT_CURSOR_BTREE *)cursor, &upd)); - WT_ERR(ret); - - /* If we haven't found anything then there's an error. */ - if (upd == NULL) { - WT_ASSERT(session, upd != NULL); - WT_ERR(WT_NOTFOUND); - } - - for (; upd != NULL; upd = upd->next) { - /* - * Aborted updates can exist in the update chain of our txn. - * Generally this will occur due to a reserved update. - * As such we should skip over these updates. If the txn - * id is then different and not aborted we know we've - * reached the end of our update chain and can exit. - */ - if (upd->txnid == WT_TXN_ABORTED) - continue; - if (upd->txnid != txn->id) - break; - - ++(*resolved_update_countp); - - if (!commit) { - upd->txnid = WT_TXN_ABORTED; - continue; - } - - /* - * Newer updates are inserted at head of update chain, and - * transaction operations are added at the tail of the - * transaction modify chain. - * - * For example, a transaction has modified [k,v] as - * [k, v] -> [k, u1] (txn_op : txn_op1) - * [k, u1] -> [k, u2] (txn_op : txn_op2) - * update chain : u2->u1 - * txn_mod : txn_op1->txn_op2. - * - * Only the key is saved in the transaction operation - * structure, hence we cannot identify whether "txn_op1" - * corresponds to "u2" or "u1" during commit/rollback. - * - * To make things simpler we will handle all the updates - * that match the key saved in a transaction operation in a - * single go. As a result, multiple updates of a key, if any - * will be resolved as part of the first transaction operation - * resolution of that key, and subsequent transaction operation - * resolution of the same key will be effectively - * a no-op. - * - * In the above example, we will resolve "u2" and "u1" as part - * of resolving "txn_op1" and will not do any significant - * thing as part of "txn_op2". - */ - - /* Resolve the prepared update to be committed update. */ - __txn_resolve_prepared_update(session, upd); - } -err: WT_TRET(cursor->close(cursor)); - return (ret); + WT_CURSOR *cursor; + WT_DECL_RET; + WT_TXN *txn; + WT_UPDATE *upd; + const char *open_cursor_cfg[] = {WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL}; + + txn = &session->txn; + + if (op->type == WT_TXN_OP_NONE || op->type == WT_TXN_OP_REF_DELETE || + op->type == WT_TXN_OP_TRUNCATE_COL || op->type == WT_TXN_OP_TRUNCATE_ROW) + return (0); + + WT_RET(__wt_open_cursor(session, op->btree->dhandle->name, NULL, open_cursor_cfg, &cursor)); + + /* + * Transaction prepare is cleared temporarily as cursor functions are not allowed for prepared + * transactions. + */ + F_CLR(txn, WT_TXN_PREPARE); + if (op->type == WT_TXN_OP_BASIC_ROW || op->type == WT_TXN_OP_INMEM_ROW) + __wt_cursor_set_raw_key(cursor, &op->u.op_row.key); + else + ((WT_CURSOR_BTREE *)cursor)->iface.recno = op->u.op_col.recno; + F_SET(txn, WT_TXN_PREPARE); + + WT_WITH_BTREE( + session, op->btree, ret = __wt_btcur_search_uncommitted((WT_CURSOR_BTREE *)cursor, &upd)); + WT_ERR(ret); + + /* If we haven't found anything then there's an error. */ + if (upd == NULL) { + WT_ERR_ASSERT(session, upd != NULL, WT_NOTFOUND, + "Unable to" + " locate update associated with a prepared operation."); + } + + for (; upd != NULL; upd = upd->next) { + /* + * Aborted updates can exist in the update chain of our txn. Generally this will occur due + * to a reserved update. As such we should skip over these updates. If the txn id is then + * different and not aborted we know we've reached the end of our update chain and can exit. + */ + if (upd->txnid == WT_TXN_ABORTED) + continue; + if (upd->txnid != txn->id) + break; + + ++(*resolved_update_countp); + + if (!commit) { + upd->txnid = WT_TXN_ABORTED; + continue; + } + + /* + * Newer updates are inserted at head of update chain, and + * transaction operations are added at the tail of the + * transaction modify chain. + * + * For example, a transaction has modified [k,v] as + * [k, v] -> [k, u1] (txn_op : txn_op1) + * [k, u1] -> [k, u2] (txn_op : txn_op2) + * update chain : u2->u1 + * txn_mod : txn_op1->txn_op2. + * + * Only the key is saved in the transaction operation + * structure, hence we cannot identify whether "txn_op1" + * corresponds to "u2" or "u1" during commit/rollback. + * + * To make things simpler we will handle all the updates + * that match the key saved in a transaction operation in a + * single go. As a result, multiple updates of a key, if any + * will be resolved as part of the first transaction operation + * resolution of that key, and subsequent transaction operation + * resolution of the same key will be effectively + * a no-op. + * + * In the above example, we will resolve "u2" and "u1" as part + * of resolving "txn_op1" and will not do any significant + * thing as part of "txn_op2". + */ + + /* Resolve the prepared update to be committed update. */ + __txn_resolve_prepared_update(session, upd); + } +err: + WT_TRET(cursor->close(cursor)); + return (ret); } /* * __txn_next_op -- - * Mark a WT_UPDATE object modified by the current transaction. + * Mark a WT_UPDATE object modified by the current transaction. */ static inline int __txn_next_op(WT_SESSION_IMPL *session, WT_TXN_OP **opp) { - WT_TXN *txn; - WT_TXN_OP *op; + WT_TXN *txn; + WT_TXN_OP *op; - *opp = NULL; + *opp = NULL; - txn = &session->txn; + txn = &session->txn; - /* - * We're about to perform an update. - * Make sure we have allocated a transaction ID. - */ - WT_RET(__wt_txn_id_check(session)); - WT_ASSERT(session, F_ISSET(txn, WT_TXN_HAS_ID)); + /* + * We're about to perform an update. Make sure we have allocated a transaction ID. + */ + WT_RET(__wt_txn_id_check(session)); + WT_ASSERT(session, F_ISSET(txn, WT_TXN_HAS_ID)); - WT_RET(__wt_realloc_def(session, &txn->mod_alloc, - txn->mod_count + 1, &txn->mod)); + WT_RET(__wt_realloc_def(session, &txn->mod_alloc, txn->mod_count + 1, &txn->mod)); - op = &txn->mod[txn->mod_count++]; - WT_CLEAR(*op); - op->btree = S2BT(session); - (void)__wt_atomic_addi32(&session->dhandle->session_inuse, 1); - *opp = op; - return (0); + op = &txn->mod[txn->mod_count++]; + WT_CLEAR(*op); + op->btree = S2BT(session); + (void)__wt_atomic_addi32(&session->dhandle->session_inuse, 1); + *opp = op; + return (0); } /* * __wt_txn_unmodify -- - * If threads race making updates, they may discard the last referenced - * WT_UPDATE item while the transaction is still active. This function - * removes the last update item from the "log". + * If threads race making updates, they may discard the last referenced WT_UPDATE item while the + * transaction is still active. This function removes the last update item from the "log". */ static inline void __wt_txn_unmodify(WT_SESSION_IMPL *session) { - WT_TXN *txn; - WT_TXN_OP *op; - - txn = &session->txn; - if (F_ISSET(txn, WT_TXN_HAS_ID)) { - WT_ASSERT(session, txn->mod_count > 0); - --txn->mod_count; - op = txn->mod + txn->mod_count; - __wt_txn_op_free(session, op); - } + WT_TXN *txn; + WT_TXN_OP *op; + + txn = &session->txn; + if (F_ISSET(txn, WT_TXN_HAS_ID)) { + WT_ASSERT(session, txn->mod_count > 0); + --txn->mod_count; + op = txn->mod + txn->mod_count; + __wt_txn_op_free(session, op); + } } /* * __wt_txn_op_apply_prepare_state -- - * Apply the correct prepare state and the timestamp to the ref and to any - * updates in the page del update list. + * Apply the correct prepare state and the timestamp to the ref and to any updates in the page + * del update list. */ static inline void -__wt_txn_op_apply_prepare_state( - WT_SESSION_IMPL *session, WT_REF *ref, bool commit) +__wt_txn_op_apply_prepare_state(WT_SESSION_IMPL *session, WT_REF *ref, bool commit) { - WT_TXN *txn; - WT_UPDATE **updp; - wt_timestamp_t ts; - uint32_t previous_state; - uint8_t prepare_state; - - txn = &session->txn; - - /* - * Lock the ref to ensure we don't race with eviction freeing the page - * deleted update list or with a page instantiate. - */ - for (;; __wt_yield()) { - previous_state = ref->state; - WT_ASSERT(session, previous_state != WT_REF_READING); - if (previous_state != WT_REF_LOCKED && WT_REF_CAS_STATE( - session, ref, previous_state, WT_REF_LOCKED)) - break; - } - - if (commit) { - ts = txn->commit_timestamp; - prepare_state = WT_PREPARE_RESOLVED; - } else { - ts = txn->prepare_timestamp; - prepare_state = WT_PREPARE_INPROGRESS; - } - for (updp = ref->page_del->update_list; - updp != NULL && *updp != NULL; ++updp) { - (*updp)->start_ts = ts; - /* - * Holding the ref locked means we have exclusive access, so if - * we are committing we don't need to use the prepare locked - * transition state. - */ - (*updp)->prepare_state = prepare_state; - if (commit) - (*updp)->durable_ts = txn->durable_timestamp; - } - ref->page_del->timestamp = ts; - if (commit) - ref->page_del->durable_timestamp = txn->durable_timestamp; - WT_PUBLISH(ref->page_del->prepare_state, prepare_state); - - /* Unlock the page by setting it back to it's previous state */ - WT_REF_SET_STATE(ref, previous_state); + WT_TXN *txn; + WT_UPDATE **updp; + wt_timestamp_t ts; + uint32_t previous_state; + uint8_t prepare_state; + + txn = &session->txn; + + /* + * Lock the ref to ensure we don't race with eviction freeing the page deleted update list or + * with a page instantiate. + */ + for (;; __wt_yield()) { + previous_state = ref->state; + WT_ASSERT(session, previous_state != WT_REF_READING); + if (previous_state != WT_REF_LOCKED && + WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED)) + break; + } + + if (commit) { + ts = txn->commit_timestamp; + prepare_state = WT_PREPARE_RESOLVED; + } else { + ts = txn->prepare_timestamp; + prepare_state = WT_PREPARE_INPROGRESS; + } + for (updp = ref->page_del->update_list; updp != NULL && *updp != NULL; ++updp) { + (*updp)->start_ts = ts; + /* + * Holding the ref locked means we have exclusive access, so if we are committing we don't + * need to use the prepare locked transition state. + */ + (*updp)->prepare_state = prepare_state; + if (commit) + (*updp)->durable_ts = txn->durable_timestamp; + } + ref->page_del->timestamp = ts; + if (commit) + ref->page_del->durable_timestamp = txn->durable_timestamp; + WT_PUBLISH(ref->page_del->prepare_state, prepare_state); + + /* Unlock the page by setting it back to it's previous state */ + WT_REF_SET_STATE(ref, previous_state); } /* * __wt_txn_op_delete_commit_apply_timestamps -- - * Apply the correct start and durable timestamps to any - * updates in the page del update list. + * Apply the correct start and durable timestamps to any updates in the page del update list. */ static inline void -__wt_txn_op_delete_commit_apply_timestamps( - WT_SESSION_IMPL *session, WT_REF *ref) +__wt_txn_op_delete_commit_apply_timestamps(WT_SESSION_IMPL *session, WT_REF *ref) { - WT_TXN *txn; - WT_UPDATE **updp; - uint32_t previous_state; - - txn = &session->txn; - - /* - * Lock the ref to ensure we don't race with eviction freeing the page - * deleted update list or with a page instantiate. - */ - for (;; __wt_yield()) { - previous_state = ref->state; - WT_ASSERT(session, previous_state != WT_REF_READING); - if (previous_state != WT_REF_LOCKED && WT_REF_CAS_STATE( - session, ref, previous_state, WT_REF_LOCKED)) - break; - } - - for (updp = ref->page_del->update_list; - updp != NULL && *updp != NULL; ++updp) { - (*updp)->start_ts = txn->commit_timestamp; - (*updp)->durable_ts = txn->durable_timestamp; - } - - /* Unlock the page by setting it back to it's previous state */ - WT_REF_SET_STATE(ref, previous_state); + WT_TXN *txn; + WT_UPDATE **updp; + uint32_t previous_state; + + txn = &session->txn; + + /* + * Lock the ref to ensure we don't race with eviction freeing the page deleted update list or + * with a page instantiate. + */ + for (;; __wt_yield()) { + previous_state = ref->state; + WT_ASSERT(session, previous_state != WT_REF_READING); + if (previous_state != WT_REF_LOCKED && + WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED)) + break; + } + + for (updp = ref->page_del->update_list; updp != NULL && *updp != NULL; ++updp) { + (*updp)->start_ts = txn->commit_timestamp; + (*updp)->durable_ts = txn->durable_timestamp; + } + + /* Unlock the page by setting it back to it's previous state */ + WT_REF_SET_STATE(ref, previous_state); } /* * __wt_txn_op_set_timestamp -- - * Decide whether to copy a commit timestamp into an update. If the op - * structure doesn't have a populated update or ref field or is in prepared - * state there won't be any check for an existing timestamp. + * Decide whether to copy a commit timestamp into an update. If the op structure doesn't have a + * populated update or ref field or is in prepared state there won't be any check for an + * existing timestamp. */ static inline void __wt_txn_op_set_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op) { - WT_TXN *txn; - WT_UPDATE *upd; - wt_timestamp_t *timestamp; - - txn = &session->txn; - - /* - * Updates in the metadata never get timestamps (either now or at - * commit): metadata cannot be read at a point in time, only the most - * recently committed data matches files on disk. - */ - if (WT_IS_METADATA(op->btree->dhandle) || - !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) - return; - - if (F_ISSET(txn, WT_TXN_PREPARE)) { - /* - * We have a commit timestamp for a prepare transaction, this is - * only possible as part of a transaction commit call. - */ - if (op->type == WT_TXN_OP_REF_DELETE) - __wt_txn_op_apply_prepare_state( - session, op->u.ref, true); - else { - upd = op->u.op_upd; - - /* Resolve prepared update to be committed update. */ - __txn_resolve_prepared_update(session, upd); - } - } else { - /* - * The timestamp is in the page deleted structure for - * truncates, or in the update for other operations. Both - * commit and durable timestamps need to be updated. - */ - timestamp = op->type == WT_TXN_OP_REF_DELETE ? - &op->u.ref->page_del->timestamp : &op->u.op_upd->start_ts; - if (*timestamp == WT_TS_NONE) { - *timestamp = txn->commit_timestamp; - - timestamp = op->type == WT_TXN_OP_REF_DELETE ? - &op->u.ref->page_del->durable_timestamp : - &op->u.op_upd->durable_ts; - *timestamp = txn->durable_timestamp; - } - - if (op->type == WT_TXN_OP_REF_DELETE) - __wt_txn_op_delete_commit_apply_timestamps( - session, op->u.ref); - } + WT_TXN *txn; + WT_UPDATE *upd; + wt_timestamp_t *timestamp; + + txn = &session->txn; + + /* + * Updates in the metadata never get timestamps (either now or at commit): metadata cannot be + * read at a point in time, only the most recently committed data matches files on disk. + */ + if (WT_IS_METADATA(op->btree->dhandle) || !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) + return; + + if (F_ISSET(txn, WT_TXN_PREPARE)) { + /* + * We have a commit timestamp for a prepare transaction, this is only possible as part of a + * transaction commit call. + */ + if (op->type == WT_TXN_OP_REF_DELETE) + __wt_txn_op_apply_prepare_state(session, op->u.ref, true); + else { + upd = op->u.op_upd; + + /* Resolve prepared update to be committed update. */ + __txn_resolve_prepared_update(session, upd); + } + } else { + /* + * The timestamp is in the page deleted structure for truncates, or in the update for other + * operations. Both commit and durable timestamps need to be updated. + */ + timestamp = op->type == WT_TXN_OP_REF_DELETE ? &op->u.ref->page_del->timestamp : + &op->u.op_upd->start_ts; + if (*timestamp == WT_TS_NONE) { + *timestamp = txn->commit_timestamp; + + timestamp = op->type == WT_TXN_OP_REF_DELETE ? &op->u.ref->page_del->durable_timestamp : + &op->u.op_upd->durable_ts; + *timestamp = txn->durable_timestamp; + } + + if (op->type == WT_TXN_OP_REF_DELETE) + __wt_txn_op_delete_commit_apply_timestamps(session, op->u.ref); + } } /* * __wt_txn_modify -- - * Mark a WT_UPDATE object modified by the current transaction. + * Mark a WT_UPDATE object modified by the current transaction. */ static inline int __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd) { - WT_TXN *txn; - WT_TXN_OP *op; - - txn = &session->txn; - - if (F_ISSET(txn, WT_TXN_READONLY)) { - if (F_ISSET(txn, WT_TXN_IGNORE_PREPARE)) - WT_RET_MSG(session, ENOTSUP, - "Transactions with ignore_prepare=true" - " cannot perform updates"); - WT_RET_MSG(session, WT_ROLLBACK, - "Attempt to update in a read-only transaction"); - } - - WT_RET(__txn_next_op(session, &op)); - if (F_ISSET(session, WT_SESSION_LOGGING_INMEM)) { - if (op->btree->type == BTREE_ROW) - op->type = WT_TXN_OP_INMEM_ROW; - else - op->type = WT_TXN_OP_INMEM_COL; - } else { - if (op->btree->type == BTREE_ROW) - op->type = WT_TXN_OP_BASIC_ROW; - else - op->type = WT_TXN_OP_BASIC_COL; - } - op->u.op_upd = upd; - upd->txnid = session->txn.id; - - __wt_txn_op_set_timestamp(session, op); - return (0); + WT_TXN *txn; + WT_TXN_OP *op; + + txn = &session->txn; + + if (F_ISSET(txn, WT_TXN_READONLY)) { + if (F_ISSET(txn, WT_TXN_IGNORE_PREPARE)) + WT_RET_MSG(session, ENOTSUP, + "Transactions with ignore_prepare=true" + " cannot perform updates"); + WT_RET_MSG(session, WT_ROLLBACK, "Attempt to update in a read-only transaction"); + } + + WT_RET(__txn_next_op(session, &op)); + if (F_ISSET(session, WT_SESSION_LOGGING_INMEM)) { + if (op->btree->type == BTREE_ROW) + op->type = WT_TXN_OP_INMEM_ROW; + else + op->type = WT_TXN_OP_INMEM_COL; + } else { + if (op->btree->type == BTREE_ROW) + op->type = WT_TXN_OP_BASIC_ROW; + else + op->type = WT_TXN_OP_BASIC_COL; + } + op->u.op_upd = upd; + upd->txnid = session->txn.id; + + __wt_txn_op_set_timestamp(session, op); + return (0); } /* * __wt_txn_modify_page_delete -- - * Remember a page truncated by the current transaction. + * Remember a page truncated by the current transaction. */ static inline int __wt_txn_modify_page_delete(WT_SESSION_IMPL *session, WT_REF *ref) { - WT_DECL_RET; - WT_TXN *txn; - WT_TXN_OP *op; + WT_DECL_RET; + WT_TXN *txn; + WT_TXN_OP *op; - txn = &session->txn; + txn = &session->txn; - WT_RET(__txn_next_op(session, &op)); - op->type = WT_TXN_OP_REF_DELETE; + WT_RET(__txn_next_op(session, &op)); + op->type = WT_TXN_OP_REF_DELETE; - op->u.ref = ref; - ref->page_del->txnid = txn->id; - __wt_txn_op_set_timestamp(session, op); + op->u.ref = ref; + ref->page_del->txnid = txn->id; + __wt_txn_op_set_timestamp(session, op); - WT_ERR(__wt_txn_log_op(session, NULL)); - return (0); + WT_ERR(__wt_txn_log_op(session, NULL)); + return (0); -err: __wt_txn_unmodify(session); - return (ret); +err: + __wt_txn_unmodify(session); + return (ret); } /* * __wt_txn_oldest_id -- - * Return the oldest transaction ID that has to be kept for the current - * tree. + * Return the oldest transaction ID that has to be kept for the current tree. */ static inline uint64_t __wt_txn_oldest_id(WT_SESSION_IMPL *session) { - WT_BTREE *btree; - WT_TXN_GLOBAL *txn_global; - uint64_t checkpoint_pinned, oldest_id; - bool include_checkpoint_txn; - - txn_global = &S2C(session)->txn_global; - btree = S2BT_SAFE(session); - - /* - * The metadata is tracked specially because of optimizations for - * checkpoints. - */ - if (session->dhandle != NULL && WT_IS_METADATA(session->dhandle)) - return (txn_global->metadata_pinned); - - /* - * Take a local copy of these IDs in case they are updated while we are - * checking visibility. - */ - oldest_id = txn_global->oldest_id; - include_checkpoint_txn = btree == NULL || - (!F_ISSET(btree, WT_BTREE_LOOKASIDE) && - btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT)); - if (!include_checkpoint_txn) - return (oldest_id); - - /* - * The read of the transaction ID pinned by a checkpoint needs to be - * carefully ordered: if a checkpoint is starting and we have to start - * checking the pinned ID, we take the minimum of it with the oldest - * ID, which is what we want. - */ - WT_READ_BARRIER(); - - /* - * Checkpoint transactions often fall behind ordinary application - * threads. Take special effort to not keep changes pinned in cache - * if they are only required for the checkpoint and it has already - * seen them. - * - * If there is no active checkpoint or this handle is up to date with - * the active checkpoint then it's safe to ignore the checkpoint ID in - * the visibility check. - */ - checkpoint_pinned = txn_global->checkpoint_state.pinned_id; - if (checkpoint_pinned == WT_TXN_NONE || - WT_TXNID_LT(oldest_id, checkpoint_pinned)) - return (oldest_id); - - return (checkpoint_pinned); + WT_BTREE *btree; + WT_TXN_GLOBAL *txn_global; + uint64_t checkpoint_pinned, oldest_id; + bool include_checkpoint_txn; + + txn_global = &S2C(session)->txn_global; + btree = S2BT_SAFE(session); + + /* + * The metadata is tracked specially because of optimizations for checkpoints. + */ + if (session->dhandle != NULL && WT_IS_METADATA(session->dhandle)) + return (txn_global->metadata_pinned); + + /* + * Take a local copy of these IDs in case they are updated while we are checking visibility. + */ + oldest_id = txn_global->oldest_id; + include_checkpoint_txn = + btree == NULL || (!F_ISSET(btree, WT_BTREE_LOOKASIDE) && + btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT)); + if (!include_checkpoint_txn) + return (oldest_id); + + /* + * The read of the transaction ID pinned by a checkpoint needs to be carefully ordered: if a + * checkpoint is starting and we have to start checking the pinned ID, we take the minimum of it + * with the oldest ID, which is what we want. + */ + WT_READ_BARRIER(); + + /* + * Checkpoint transactions often fall behind ordinary application + * threads. Take special effort to not keep changes pinned in cache + * if they are only required for the checkpoint and it has already + * seen them. + * + * If there is no active checkpoint or this handle is up to date with + * the active checkpoint then it's safe to ignore the checkpoint ID in + * the visibility check. + */ + checkpoint_pinned = txn_global->checkpoint_state.pinned_id; + if (checkpoint_pinned == WT_TXN_NONE || WT_TXNID_LT(oldest_id, checkpoint_pinned)) + return (oldest_id); + + return (checkpoint_pinned); } /* * __wt_txn_pinned_timestamp -- - * Get the first timestamp that has to be kept for the current tree. + * Get the first timestamp that has to be kept for the current tree. */ static inline void __wt_txn_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *pinned_tsp) { - WT_BTREE *btree; - WT_TXN_GLOBAL *txn_global; - wt_timestamp_t checkpoint_ts, pinned_ts; - bool include_checkpoint_txn; - - btree = S2BT_SAFE(session); - txn_global = &S2C(session)->txn_global; - - *pinned_tsp = pinned_ts = txn_global->pinned_timestamp; - - /* - * Checkpoint transactions often fall behind ordinary application - * threads. Take special effort to not keep changes pinned in cache if - * they are only required for the checkpoint and it has already seen - * them. - * - * If there is no active checkpoint or this handle is up to date with - * the active checkpoint then it's safe to ignore the checkpoint ID in - * the visibility check. - */ - include_checkpoint_txn = btree == NULL || - (!F_ISSET(btree, WT_BTREE_LOOKASIDE) && - btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT)); - if (!include_checkpoint_txn) - return; - - /* - * The read of the timestamp pinned by a checkpoint needs to be - * carefully ordered: if a checkpoint is starting and we have to use - * the checkpoint timestamp, we take the minimum of it with the oldest - * timestamp, which is what we want. - */ - WT_READ_BARRIER(); - - checkpoint_ts = txn_global->checkpoint_timestamp; - - if (checkpoint_ts != 0 && checkpoint_ts < pinned_ts) - *pinned_tsp = checkpoint_ts; + WT_BTREE *btree; + WT_TXN_GLOBAL *txn_global; + wt_timestamp_t checkpoint_ts, pinned_ts; + bool include_checkpoint_txn; + + btree = S2BT_SAFE(session); + txn_global = &S2C(session)->txn_global; + + *pinned_tsp = pinned_ts = txn_global->pinned_timestamp; + + /* + * Checkpoint transactions often fall behind ordinary application + * threads. Take special effort to not keep changes pinned in cache if + * they are only required for the checkpoint and it has already seen + * them. + * + * If there is no active checkpoint or this handle is up to date with + * the active checkpoint then it's safe to ignore the checkpoint ID in + * the visibility check. + */ + include_checkpoint_txn = + btree == NULL || (!F_ISSET(btree, WT_BTREE_LOOKASIDE) && + btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT)); + if (!include_checkpoint_txn) + return; + + /* + * The read of the timestamp pinned by a checkpoint needs to be carefully ordered: if a + * checkpoint is starting and we have to use the checkpoint timestamp, we take the minimum of it + * with the oldest timestamp, which is what we want. + */ + WT_READ_BARRIER(); + + checkpoint_ts = txn_global->checkpoint_timestamp; + + if (checkpoint_ts != 0 && checkpoint_ts < pinned_ts) + *pinned_tsp = checkpoint_ts; } /* * __txn_visible_all_id -- - * Check if a given transaction ID is "globally visible". This is, if - * all sessions in the system will see the transaction ID including the - * ID that belongs to a running checkpoint. + * Check if a given transaction ID is "globally visible". This is, if all sessions in the system + * will see the transaction ID including the ID that belongs to a running checkpoint. */ static inline bool __txn_visible_all_id(WT_SESSION_IMPL *session, uint64_t id) { - uint64_t oldest_id; + uint64_t oldest_id; - oldest_id = __wt_txn_oldest_id(session); + oldest_id = __wt_txn_oldest_id(session); - return (WT_TXNID_LT(id, oldest_id)); + return (WT_TXNID_LT(id, oldest_id)); } /* * __wt_txn_visible_all -- - * Check if a given transaction is "globally visible". This is, if all - * sessions in the system will see the transaction ID including the ID - * that belongs to a running checkpoint. + * Check if a given transaction is "globally visible". This is, if all sessions in the system + * will see the transaction ID including the ID that belongs to a running checkpoint. */ static inline bool -__wt_txn_visible_all( - WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp) +__wt_txn_visible_all(WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp) { - wt_timestamp_t pinned_ts; + wt_timestamp_t pinned_ts; - if (!__txn_visible_all_id(session, id)) - return (false); + if (!__txn_visible_all_id(session, id)) + return (false); - /* Timestamp check. */ - if (timestamp == WT_TS_NONE) - return (true); + /* Timestamp check. */ + if (timestamp == WT_TS_NONE) + return (true); - /* - * If no oldest timestamp has been supplied, updates have to stay in - * cache until we are shutting down. - */ - if (!S2C(session)->txn_global.has_pinned_timestamp) - return (F_ISSET(S2C(session), WT_CONN_CLOSING)); + /* + * If no oldest timestamp has been supplied, updates have to stay in cache until we are shutting + * down. + */ + if (!S2C(session)->txn_global.has_pinned_timestamp) + return (F_ISSET(S2C(session), WT_CONN_CLOSING)); - __wt_txn_pinned_timestamp(session, &pinned_ts); - return (timestamp <= pinned_ts); + __wt_txn_pinned_timestamp(session, &pinned_ts); + return (timestamp <= pinned_ts); } /* * __wt_txn_upd_visible_all -- - * Is the given update visible to all (possible) readers? + * Is the given update visible to all (possible) readers? */ static inline bool __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd) { - if (upd->prepare_state == WT_PREPARE_LOCKED || - upd->prepare_state == WT_PREPARE_INPROGRESS) - return (false); - - /* - * This function is used to determine when an update is obsolete: that - * should take into account the durable timestamp which is greater than - * or equal to the start timestamp. - */ - return (__wt_txn_visible_all(session, upd->txnid, upd->durable_ts)); + if (upd->prepare_state == WT_PREPARE_LOCKED || upd->prepare_state == WT_PREPARE_INPROGRESS) + return (false); + + /* + * This function is used to determine when an update is obsolete: that should take into account + * the durable timestamp which is greater than or equal to the start timestamp. + */ + return (__wt_txn_visible_all(session, upd->txnid, upd->durable_ts)); } /* * __txn_visible_id -- - * Can the current transaction see the given ID? + * Can the current transaction see the given ID? */ static inline bool __txn_visible_id(WT_SESSION_IMPL *session, uint64_t id) { - WT_TXN *txn; - bool found; - - txn = &session->txn; - - /* Changes with no associated transaction are always visible. */ - if (id == WT_TXN_NONE) - return (true); - - /* Nobody sees the results of aborted transactions. */ - if (id == WT_TXN_ABORTED) - return (false); - - /* Read-uncommitted transactions see all other changes. */ - if (txn->isolation == WT_ISO_READ_UNCOMMITTED) - return (true); - - /* - * If we don't have a transactional snapshot, only make stable updates - * visible. - */ - if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)) - return (__txn_visible_all_id(session, id)); - - /* Transactions see their own changes. */ - if (id == txn->id) - return (true); - - /* - * WT_ISO_SNAPSHOT, WT_ISO_READ_COMMITTED: the ID is visible if it is - * not the result of a concurrent transaction, that is, if was - * committed before the snapshot was taken. - * - * The order here is important: anything newer than the maximum ID we - * saw when taking the snapshot should be invisible, even if the - * snapshot is empty. - */ - if (WT_TXNID_LE(txn->snap_max, id)) - return (false); - if (txn->snapshot_count == 0 || WT_TXNID_LT(id, txn->snap_min)) - return (true); - - WT_BINARY_SEARCH(id, txn->snapshot, txn->snapshot_count, found); - return (!found); + WT_TXN *txn; + bool found; + + txn = &session->txn; + + /* Changes with no associated transaction are always visible. */ + if (id == WT_TXN_NONE) + return (true); + + /* Nobody sees the results of aborted transactions. */ + if (id == WT_TXN_ABORTED) + return (false); + + /* Read-uncommitted transactions see all other changes. */ + if (txn->isolation == WT_ISO_READ_UNCOMMITTED) + return (true); + + /* + * If we don't have a transactional snapshot, only make stable updates visible. + */ + if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)) + return (__txn_visible_all_id(session, id)); + + /* Transactions see their own changes. */ + if (id == txn->id) + return (true); + + /* + * WT_ISO_SNAPSHOT, WT_ISO_READ_COMMITTED: the ID is visible if it is + * not the result of a concurrent transaction, that is, if was + * committed before the snapshot was taken. + * + * The order here is important: anything newer than the maximum ID we + * saw when taking the snapshot should be invisible, even if the + * snapshot is empty. + */ + if (WT_TXNID_LE(txn->snap_max, id)) + return (false); + if (txn->snapshot_count == 0 || WT_TXNID_LT(id, txn->snap_min)) + return (true); + + WT_BINARY_SEARCH(id, txn->snapshot, txn->snapshot_count, found); + return (!found); } /* * __wt_txn_visible -- - * Can the current transaction see the given ID / timestamp? + * Can the current transaction see the given ID / timestamp? */ static inline bool -__wt_txn_visible( - WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp) +__wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp) { - WT_TXN *txn; + WT_TXN *txn; - txn = &session->txn; + txn = &session->txn; - if (!__txn_visible_id(session, id)) - return (false); + if (!__txn_visible_id(session, id)) + return (false); - /* Transactions read their writes, regardless of timestamps. */ - if (F_ISSET(&session->txn, WT_TXN_HAS_ID) && id == session->txn.id) - return (true); + /* Transactions read their writes, regardless of timestamps. */ + if (F_ISSET(&session->txn, WT_TXN_HAS_ID) && id == session->txn.id) + return (true); - /* Timestamp check. */ - if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) || timestamp == WT_TS_NONE) - return (true); + /* Timestamp check. */ + if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) || timestamp == WT_TS_NONE) + return (true); - return (timestamp <= txn->read_timestamp); + return (timestamp <= txn->read_timestamp); } /* * __wt_txn_upd_visible_type -- - * Visible type of given update for the current transaction. + * Visible type of given update for the current transaction. */ static inline WT_VISIBLE_TYPE __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd) { - uint8_t prepare_state, previous_state; - bool upd_visible; - - for (;;__wt_yield()) { - /* Prepare state change is in progress, yield and try again. */ - WT_ORDERED_READ(prepare_state, upd->prepare_state); - if (prepare_state == WT_PREPARE_LOCKED) - continue; - - upd_visible = - __wt_txn_visible(session, upd->txnid, upd->start_ts); - - /* - * The visibility check is only valid if the update does not - * change state. If the state does change, recheck visibility. - */ - previous_state = prepare_state; - WT_ORDERED_READ(prepare_state, upd->prepare_state); - if (previous_state == prepare_state) - break; - - WT_STAT_CONN_INCR(session, prepared_transition_blocked_page); - } - - if (!upd_visible) - return (WT_VISIBLE_FALSE); - - /* Ignore the prepared update, if transaction configuration says so. */ - if (prepare_state == WT_PREPARE_INPROGRESS) - return (F_ISSET(&session->txn, WT_TXN_IGNORE_PREPARE) ? - WT_VISIBLE_FALSE : WT_VISIBLE_PREPARE); - - return (WT_VISIBLE_TRUE); + uint8_t prepare_state, previous_state; + bool upd_visible; + + for (;; __wt_yield()) { + /* Prepare state change is in progress, yield and try again. */ + WT_ORDERED_READ(prepare_state, upd->prepare_state); + if (prepare_state == WT_PREPARE_LOCKED) + continue; + + upd_visible = __wt_txn_visible(session, upd->txnid, upd->start_ts); + + /* + * The visibility check is only valid if the update does not change state. If the state does + * change, recheck visibility. + */ + previous_state = prepare_state; + WT_ORDERED_READ(prepare_state, upd->prepare_state); + if (previous_state == prepare_state) + break; + + WT_STAT_CONN_INCR(session, prepared_transition_blocked_page); + } + + if (!upd_visible) + return (WT_VISIBLE_FALSE); + + /* Ignore the prepared update, if transaction configuration says so. */ + if (prepare_state == WT_PREPARE_INPROGRESS) + return ( + F_ISSET(&session->txn, WT_TXN_IGNORE_PREPARE) ? WT_VISIBLE_FALSE : WT_VISIBLE_PREPARE); + + return (WT_VISIBLE_TRUE); } /* * __wt_txn_upd_durable -- - * Can the current transaction make the given update durable. + * Can the current transaction make the given update durable. */ static inline bool __wt_txn_upd_durable(WT_SESSION_IMPL *session, WT_UPDATE *upd) { - /* If update is visible then check if it is durable. */ - if (__wt_txn_upd_visible_type(session, upd) != WT_VISIBLE_TRUE) - return (false); - return (__wt_txn_visible(session, upd->txnid, upd->durable_ts)); + /* If update is visible then check if it is durable. */ + if (__wt_txn_upd_visible_type(session, upd) != WT_VISIBLE_TRUE) + return (false); + return (__wt_txn_visible(session, upd->txnid, upd->durable_ts)); } /* * __wt_txn_upd_visible -- - * Can the current transaction see the given update. + * Can the current transaction see the given update. */ static inline bool __wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd) { - return (__wt_txn_upd_visible_type(session, upd) == WT_VISIBLE_TRUE); + return (__wt_txn_upd_visible_type(session, upd) == WT_VISIBLE_TRUE); } /* * __wt_txn_read -- - * Get the first visible update in a list (or NULL if none are visible). + * Get the first visible update in a list (or NULL if none are visible). */ static inline int __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp) { - static WT_UPDATE tombstone = { - .txnid = WT_TXN_NONE, .type = WT_UPDATE_TOMBSTONE - }; - WT_VISIBLE_TYPE upd_visible; - bool skipped_birthmark; - - *updp = NULL; - for (skipped_birthmark = false; upd != NULL; upd = upd->next) { - /* Skip reserved place-holders, they're never visible. */ - if (upd->type != WT_UPDATE_RESERVE) { - upd_visible = __wt_txn_upd_visible_type(session, upd); - if (upd_visible == WT_VISIBLE_TRUE) - break; - if (upd_visible == WT_VISIBLE_PREPARE) - return (WT_PREPARE_CONFLICT); - } - /* An invisible birthmark is equivalent to a tombstone. */ - if (upd->type == WT_UPDATE_BIRTHMARK) - skipped_birthmark = true; - } - - if (upd == NULL && skipped_birthmark) - upd = &tombstone; - - *updp = upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd; - return (0); + static WT_UPDATE tombstone = {.txnid = WT_TXN_NONE, .type = WT_UPDATE_TOMBSTONE}; + WT_VISIBLE_TYPE upd_visible; + bool skipped_birthmark; + + *updp = NULL; + for (skipped_birthmark = false; upd != NULL; upd = upd->next) { + /* Skip reserved place-holders, they're never visible. */ + if (upd->type != WT_UPDATE_RESERVE) { + upd_visible = __wt_txn_upd_visible_type(session, upd); + if (upd_visible == WT_VISIBLE_TRUE) + break; + if (upd_visible == WT_VISIBLE_PREPARE) + return (WT_PREPARE_CONFLICT); + } + /* An invisible birthmark is equivalent to a tombstone. */ + if (upd->type == WT_UPDATE_BIRTHMARK) + skipped_birthmark = true; + } + + if (upd == NULL && skipped_birthmark) + upd = &tombstone; + + *updp = upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd; + return (0); } /* * __wt_txn_begin -- - * Begin a transaction. + * Begin a transaction. */ static inline int __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) { - WT_TXN *txn; + WT_TXN *txn; - txn = &session->txn; - txn->isolation = session->isolation; - txn->txn_logsync = S2C(session)->txn_logsync; + txn = &session->txn; + txn->isolation = session->isolation; + txn->txn_logsync = S2C(session)->txn_logsync; - if (cfg != NULL) - WT_RET(__wt_txn_config(session, cfg)); + if (cfg != NULL) + WT_RET(__wt_txn_config(session, cfg)); - /* - * Allocate a snapshot if required. Named snapshot transactions already - * have an ID setup. - */ - if (txn->isolation == WT_ISO_SNAPSHOT && - !F_ISSET(txn, WT_TXN_NAMED_SNAPSHOT)) { - if (session->ncursors > 0) - WT_RET(__wt_session_copy_values(session)); + /* + * Allocate a snapshot if required. Named snapshot transactions already have an ID setup. + */ + if (txn->isolation == WT_ISO_SNAPSHOT && !F_ISSET(txn, WT_TXN_NAMED_SNAPSHOT)) { + if (session->ncursors > 0) + WT_RET(__wt_session_copy_values(session)); - /* Stall here if the cache is completely full. */ - WT_RET(__wt_cache_eviction_check(session, false, true, NULL)); + /* Stall here if the cache is completely full. */ + WT_RET(__wt_cache_eviction_check(session, false, true, NULL)); - __wt_txn_get_snapshot(session); - } + __wt_txn_get_snapshot(session); + } - F_SET(txn, WT_TXN_RUNNING); - if (F_ISSET(S2C(session), WT_CONN_READONLY)) - F_SET(txn, WT_TXN_READONLY); + F_SET(txn, WT_TXN_RUNNING); + if (F_ISSET(S2C(session), WT_CONN_READONLY)) + F_SET(txn, WT_TXN_READONLY); - return (0); + return (0); } /* * __wt_txn_autocommit_check -- - * If an auto-commit transaction is required, start one. + * If an auto-commit transaction is required, start one. */ static inline int __wt_txn_autocommit_check(WT_SESSION_IMPL *session) { - WT_TXN *txn; - - txn = &session->txn; - if (F_ISSET(txn, WT_TXN_AUTOCOMMIT)) { - F_CLR(txn, WT_TXN_AUTOCOMMIT); - return (__wt_txn_begin(session, NULL)); - } - return (0); + WT_TXN *txn; + + txn = &session->txn; + if (F_ISSET(txn, WT_TXN_AUTOCOMMIT)) { + F_CLR(txn, WT_TXN_AUTOCOMMIT); + return (__wt_txn_begin(session, NULL)); + } + return (0); } /* * __wt_txn_idle_cache_check -- - * If there is no transaction active in this thread and we haven't checked - * if the cache is full, do it now. If we have to block for eviction, - * this is the best time to do it. + * If there is no transaction active in this thread and we haven't checked if the cache is full, + * do it now. If we have to block for eviction, this is the best time to do it. */ static inline int __wt_txn_idle_cache_check(WT_SESSION_IMPL *session) { - WT_TXN *txn; - WT_TXN_STATE *txn_state; - - txn = &session->txn; - txn_state = WT_SESSION_TXN_STATE(session); - - /* - * Check the published snap_min because read-uncommitted never sets - * WT_TXN_HAS_SNAPSHOT. We don't have any transaction information at - * this point, so assume the transaction will be read-only. The dirty - * cache check will be performed when the transaction completes, if - * necessary. - */ - if (F_ISSET(txn, WT_TXN_RUNNING) && - !F_ISSET(txn, WT_TXN_HAS_ID) && txn_state->pinned_id == WT_TXN_NONE) - WT_RET(__wt_cache_eviction_check(session, false, true, NULL)); - - return (0); + WT_TXN *txn; + WT_TXN_STATE *txn_state; + + txn = &session->txn; + txn_state = WT_SESSION_TXN_STATE(session); + + /* + * Check the published snap_min because read-uncommitted never sets WT_TXN_HAS_SNAPSHOT. We + * don't have any transaction information at this point, so assume the transaction will be + * read-only. The dirty cache check will be performed when the transaction completes, if + * necessary. + */ + if (F_ISSET(txn, WT_TXN_RUNNING) && !F_ISSET(txn, WT_TXN_HAS_ID) && + txn_state->pinned_id == WT_TXN_NONE) + WT_RET(__wt_cache_eviction_check(session, false, true, NULL)); + + return (0); } /* * __wt_txn_id_alloc -- - * Allocate a new transaction ID. + * Allocate a new transaction ID. */ static inline uint64_t __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish) { - WT_TXN_GLOBAL *txn_global; - WT_TXN_STATE *txn_state; - uint64_t id; - - txn_global = &S2C(session)->txn_global; - txn_state = WT_SESSION_TXN_STATE(session); - - /* - * Allocating transaction IDs involves several steps. - * - * Firstly, publish that this transaction is allocating its ID, then - * publish the transaction ID as the current global ID. Note that this - * transaction ID might not be unique among threads and hence not valid - * at this moment. The flag will notify other transactions that are - * attempting to get their own snapshot for this transaction ID to - * retry. - * - * Then we do an atomic increment to allocate a unique ID. This will - * give the valid ID to this transaction that we publish to the global - * transaction table. - * - * We want the global value to lead the allocated values, so that any - * allocated transaction ID eventually becomes globally visible. When - * there are no transactions running, the oldest_id will reach the - * global current ID, so we want post-increment semantics. Our atomic - * add primitive does pre-increment, so adjust the result here. - * - * We rely on atomic reads of the current ID to create snapshots, so - * for unlocked reads to be well defined, we must use an atomic - * increment here. - */ - if (publish) { - WT_PUBLISH(txn_state->is_allocating, true); - WT_PUBLISH(txn_state->id, txn_global->current); - id = __wt_atomic_addv64(&txn_global->current, 1) - 1; - session->txn.id = id; - WT_PUBLISH(txn_state->id, id); - WT_PUBLISH(txn_state->is_allocating, false); - } else - id = __wt_atomic_addv64(&txn_global->current, 1) - 1; - - return (id); + WT_TXN_GLOBAL *txn_global; + WT_TXN_STATE *txn_state; + uint64_t id; + + txn_global = &S2C(session)->txn_global; + txn_state = WT_SESSION_TXN_STATE(session); + + /* + * Allocating transaction IDs involves several steps. + * + * Firstly, publish that this transaction is allocating its ID, then + * publish the transaction ID as the current global ID. Note that this + * transaction ID might not be unique among threads and hence not valid + * at this moment. The flag will notify other transactions that are + * attempting to get their own snapshot for this transaction ID to + * retry. + * + * Then we do an atomic increment to allocate a unique ID. This will + * give the valid ID to this transaction that we publish to the global + * transaction table. + * + * We want the global value to lead the allocated values, so that any + * allocated transaction ID eventually becomes globally visible. When + * there are no transactions running, the oldest_id will reach the + * global current ID, so we want post-increment semantics. Our atomic + * add primitive does pre-increment, so adjust the result here. + * + * We rely on atomic reads of the current ID to create snapshots, so + * for unlocked reads to be well defined, we must use an atomic + * increment here. + */ + if (publish) { + WT_PUBLISH(txn_state->is_allocating, true); + WT_PUBLISH(txn_state->id, txn_global->current); + id = __wt_atomic_addv64(&txn_global->current, 1) - 1; + session->txn.id = id; + WT_PUBLISH(txn_state->id, id); + WT_PUBLISH(txn_state->is_allocating, false); + } else + id = __wt_atomic_addv64(&txn_global->current, 1) - 1; + + return (id); } /* * __wt_txn_id_check -- - * A transaction is going to do an update, allocate a transaction ID. + * A transaction is going to do an update, allocate a transaction ID. */ static inline int __wt_txn_id_check(WT_SESSION_IMPL *session) { - WT_TXN *txn; + WT_TXN *txn; - txn = &session->txn; + txn = &session->txn; - WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); + WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); - if (F_ISSET(txn, WT_TXN_HAS_ID)) - return (0); + if (F_ISSET(txn, WT_TXN_HAS_ID)) + return (0); - /* If the transaction is idle, check that the cache isn't full. */ - WT_RET(__wt_txn_idle_cache_check(session)); + /* If the transaction is idle, check that the cache isn't full. */ + WT_RET(__wt_txn_idle_cache_check(session)); - WT_IGNORE_RET(__wt_txn_id_alloc(session, true)); + WT_IGNORE_RET(__wt_txn_id_alloc(session, true)); - /* - * If we have used 64-bits of transaction IDs, there is nothing - * more we can do. - */ - if (txn->id == WT_TXN_ABORTED) - WT_RET_MSG(session, WT_ERROR, "out of transaction IDs"); - F_SET(txn, WT_TXN_HAS_ID); + /* + * If we have used 64-bits of transaction IDs, there is nothing more we can do. + */ + if (txn->id == WT_TXN_ABORTED) + WT_RET_MSG(session, WT_ERROR, "out of transaction IDs"); + F_SET(txn, WT_TXN_HAS_ID); - return (0); + return (0); } /* * __wt_txn_search_check -- - * Check if the current transaction can search. + * Check if the current transaction can search. */ static inline int __wt_txn_search_check(WT_SESSION_IMPL *session) { - WT_BTREE *btree; - WT_TXN *txn; - - txn = &session->txn; - btree = S2BT(session); - /* - * If the user says a table should always use a read timestamp, - * verify this transaction has one. Same if it should never have - * a read timestamp. - */ - if (!F_ISSET(S2C(session), WT_CONN_RECOVERING) && - FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS) && - !F_ISSET(txn, WT_TXN_PUBLIC_TS_READ)) - WT_RET_MSG(session, EINVAL, "read_timestamp required and " - "none set on this transaction"); - if (FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_NEVER) && - F_ISSET(txn, WT_TXN_PUBLIC_TS_READ)) - WT_RET_MSG(session, EINVAL, "no read_timestamp required and " - "timestamp set on this transaction"); - return (0); + WT_BTREE *btree; + WT_TXN *txn; + + txn = &session->txn; + btree = S2BT(session); + /* + * If the user says a table should always use a read timestamp, verify this transaction has one. + * Same if it should never have a read timestamp. + */ + if (!F_ISSET(S2C(session), WT_CONN_RECOVERING) && + FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS) && + !F_ISSET(txn, WT_TXN_PUBLIC_TS_READ)) + WT_RET_MSG(session, EINVAL, + "read_timestamp required and " + "none set on this transaction"); + if (FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_NEVER) && + F_ISSET(txn, WT_TXN_PUBLIC_TS_READ)) + WT_RET_MSG(session, EINVAL, + "no read_timestamp required and " + "timestamp set on this transaction"); + return (0); } /* * __wt_txn_update_check -- - * Check if the current transaction can update an item. + * Check if the current transaction can update an item. */ static inline int __wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd) { - WT_TXN *txn; - WT_TXN_GLOBAL *txn_global; - bool ignore_prepare_set; - - txn = &session->txn; - txn_global = &S2C(session)->txn_global; - - if (txn->isolation != WT_ISO_SNAPSHOT) - return (0); - - if (txn_global->debug_rollback != 0 && - ++txn_global->debug_ops % txn_global->debug_rollback == 0) - return (__wt_txn_rollback_required(session, - "debug mode simulated conflict")); - /* - * Always include prepared transactions in this check: they are not - * supposed to affect visibility for update operations. - */ - ignore_prepare_set = F_ISSET(txn, WT_TXN_IGNORE_PREPARE); - F_CLR(txn, WT_TXN_IGNORE_PREPARE); - for (;upd != NULL && !__wt_txn_upd_visible(session, upd); - upd = upd->next) { - if (upd->txnid != WT_TXN_ABORTED) { - if (ignore_prepare_set) - F_SET(txn, WT_TXN_IGNORE_PREPARE); - WT_STAT_CONN_INCR(session, txn_update_conflict); - WT_STAT_DATA_INCR(session, txn_update_conflict); - return (__wt_txn_rollback_required(session, - "conflict between concurrent operations")); - } - } - - if (ignore_prepare_set) - F_SET(txn, WT_TXN_IGNORE_PREPARE); - return (0); + WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + bool ignore_prepare_set; + + txn = &session->txn; + txn_global = &S2C(session)->txn_global; + + if (txn->isolation != WT_ISO_SNAPSHOT) + return (0); + + if (txn_global->debug_rollback != 0 && + ++txn_global->debug_ops % txn_global->debug_rollback == 0) + return (__wt_txn_rollback_required(session, "debug mode simulated conflict")); + /* + * Always include prepared transactions in this check: they are not supposed to affect + * visibility for update operations. + */ + ignore_prepare_set = F_ISSET(txn, WT_TXN_IGNORE_PREPARE); + F_CLR(txn, WT_TXN_IGNORE_PREPARE); + for (; upd != NULL && !__wt_txn_upd_visible(session, upd); upd = upd->next) { + if (upd->txnid != WT_TXN_ABORTED) { + if (ignore_prepare_set) + F_SET(txn, WT_TXN_IGNORE_PREPARE); + WT_STAT_CONN_INCR(session, txn_update_conflict); + WT_STAT_DATA_INCR(session, txn_update_conflict); + return (__wt_txn_rollback_required(session, "conflict between concurrent operations")); + } + } + + if (ignore_prepare_set) + F_SET(txn, WT_TXN_IGNORE_PREPARE); + return (0); } /* * __wt_txn_read_last -- - * Called when the last page for a session is released. + * Called when the last page for a session is released. */ static inline void __wt_txn_read_last(WT_SESSION_IMPL *session) { - WT_TXN *txn; - - txn = &session->txn; - - /* - * Release the snap_min ID we put in the global table. - * - * If the isolation has been temporarily forced, don't touch the - * snapshot here: it will be restored by WT_WITH_TXN_ISOLATION. - */ - if ((!F_ISSET(txn, WT_TXN_RUNNING) || - txn->isolation != WT_ISO_SNAPSHOT) && txn->forced_iso == 0) - __wt_txn_release_snapshot(session); + WT_TXN *txn; + + txn = &session->txn; + + /* + * Release the snap_min ID we put in the global table. + * + * If the isolation has been temporarily forced, don't touch the + * snapshot here: it will be restored by WT_WITH_TXN_ISOLATION. + */ + if ((!F_ISSET(txn, WT_TXN_RUNNING) || txn->isolation != WT_ISO_SNAPSHOT) && + txn->forced_iso == 0) + __wt_txn_release_snapshot(session); } /* * __wt_txn_cursor_op -- - * Called for each cursor operation. + * Called for each cursor operation. */ static inline void __wt_txn_cursor_op(WT_SESSION_IMPL *session) { - WT_TXN *txn; - WT_TXN_GLOBAL *txn_global; - WT_TXN_STATE *txn_state; - - txn = &session->txn; - txn_global = &S2C(session)->txn_global; - txn_state = WT_SESSION_TXN_STATE(session); - - /* - * We are about to read data, which means we need to protect against - * updates being freed from underneath this cursor. Read-uncommitted - * isolation protects values by putting a transaction ID in the global - * table to prevent any update that we are reading from being freed. - * Other isolation levels get a snapshot to protect their reads. - * - * !!! - * Note: We are updating the global table unprotected, so the global - * oldest_id may move past our snap_min if a scan races with this value - * being published. That said, read-uncommitted operations always see - * the most recent update for each record that has not been aborted - * regardless of the snap_min value published here. Even if there is a - * race while publishing this ID, it prevents the oldest ID from moving - * further forward, so that once a read-uncommitted cursor is - * positioned on a value, it can't be freed. - */ - if (txn->isolation == WT_ISO_READ_UNCOMMITTED) { - if (txn_state->pinned_id == WT_TXN_NONE) - txn_state->pinned_id = txn_global->last_running; - if (txn_state->metadata_pinned == WT_TXN_NONE) - txn_state->metadata_pinned = txn_state->pinned_id; - } else if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)) - __wt_txn_get_snapshot(session); + WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + WT_TXN_STATE *txn_state; + + txn = &session->txn; + txn_global = &S2C(session)->txn_global; + txn_state = WT_SESSION_TXN_STATE(session); + + /* + * We are about to read data, which means we need to protect against + * updates being freed from underneath this cursor. Read-uncommitted + * isolation protects values by putting a transaction ID in the global + * table to prevent any update that we are reading from being freed. + * Other isolation levels get a snapshot to protect their reads. + * + * !!! + * Note: We are updating the global table unprotected, so the global + * oldest_id may move past our snap_min if a scan races with this value + * being published. That said, read-uncommitted operations always see + * the most recent update for each record that has not been aborted + * regardless of the snap_min value published here. Even if there is a + * race while publishing this ID, it prevents the oldest ID from moving + * further forward, so that once a read-uncommitted cursor is + * positioned on a value, it can't be freed. + */ + if (txn->isolation == WT_ISO_READ_UNCOMMITTED) { + if (txn_state->pinned_id == WT_TXN_NONE) + txn_state->pinned_id = txn_global->last_running; + if (txn_state->metadata_pinned == WT_TXN_NONE) + txn_state->metadata_pinned = txn_state->pinned_id; + } else if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)) + __wt_txn_get_snapshot(session); } /* * __wt_txn_am_oldest -- - * Am I the oldest transaction in the system? + * Am I the oldest transaction in the system? */ static inline bool __wt_txn_am_oldest(WT_SESSION_IMPL *session) { - WT_CONNECTION_IMPL *conn; - WT_TXN *txn; - WT_TXN_GLOBAL *txn_global; - WT_TXN_STATE *s; - uint64_t id; - uint32_t i, session_cnt; - - conn = S2C(session); - txn = &session->txn; - txn_global = &conn->txn_global; - - if (txn->id == WT_TXN_NONE || F_ISSET(txn, WT_TXN_PREPARE)) - return (false); - - WT_ORDERED_READ(session_cnt, conn->session_cnt); - for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) - /* - * We are checking if the transaction is oldest one in the - * system. It is safe to ignore any sessions that are - * allocating transaction IDs, since we already have an ID, - * they are guaranteed to be newer. - */ - if (!s->is_allocating && (id = s->id) != WT_TXN_NONE && - WT_TXNID_LT(id, txn->id)) - return (false); - - return (true); + WT_CONNECTION_IMPL *conn; + WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + WT_TXN_STATE *s; + uint64_t id; + uint32_t i, session_cnt; + + conn = S2C(session); + txn = &session->txn; + txn_global = &conn->txn_global; + + if (txn->id == WT_TXN_NONE || F_ISSET(txn, WT_TXN_PREPARE)) + return (false); + + WT_ORDERED_READ(session_cnt, conn->session_cnt); + for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) + /* + * We are checking if the transaction is oldest one in the system. It is safe to ignore any + * sessions that are allocating transaction IDs, since we already have an ID, they are + * guaranteed to be newer. + */ + if (!s->is_allocating && (id = s->id) != WT_TXN_NONE && WT_TXNID_LT(id, txn->id)) + return (false); + + return (true); } /* * __wt_txn_activity_check -- - * Check whether there are any running transactions. + * Check whether there are any running transactions. */ static inline int __wt_txn_activity_check(WT_SESSION_IMPL *session, bool *txn_active) { - WT_TXN_GLOBAL *txn_global; + WT_TXN_GLOBAL *txn_global; + + txn_global = &S2C(session)->txn_global; - txn_global = &S2C(session)->txn_global; + /* + * Default to true - callers shouldn't rely on this if an error is returned, but let's give them + * deterministic behaviour if they do. + */ + *txn_active = true; - /* - * Ensure the oldest ID is as up to date as possible so we can use a - * simple check to find if there are any running transactions. - */ - WT_RET(__wt_txn_update_oldest(session, - WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT)); + /* + * Ensure the oldest ID is as up to date as possible so we can use a simple check to find if + * there are any running transactions. + */ + WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT)); - *txn_active = (txn_global->oldest_id != txn_global->current || - txn_global->metadata_pinned != txn_global->current); + *txn_active = (txn_global->oldest_id != txn_global->current || + txn_global->metadata_pinned != txn_global->current); - return (0); + return (0); } diff --git a/src/third_party/wiredtiger/src/include/verify_build.h b/src/third_party/wiredtiger/src/include/verify_build.h index dc085826241..a72289cc03f 100644 --- a/src/third_party/wiredtiger/src/include/verify_build.h +++ b/src/third_party/wiredtiger/src/include/verify_build.h @@ -7,10 +7,9 @@ */ /* - * NOTE: If you see a compile failure in this file, your compiler is laying out - * structs in memory in a way WiredTiger does not expect. Please refer to the - * build instructions in the documentation (docs/html/install.html) for more - * information. + * NOTE: If you see a compile failure in this file, your compiler is laying out structs in memory in + * a way WiredTiger does not expect. Please refer to the build instructions in the documentation + * (docs/html/install.html) for more information. */ /* @@ -31,63 +30,59 @@ * For more details about why this works, see * http://scaryreasoner.wordpress.com/2009/02/28/ */ -#define WT_STATIC_ASSERT(cond) (void)sizeof(char[1 - 2 * !(cond)]) +#define WT_STATIC_ASSERT(cond) (void)sizeof(char[1 - 2 * !(cond)]) -#define WT_SIZE_CHECK(type, e) do { \ - char __check_##type[1 - 2 * !(sizeof(type) == (e))]; \ - (void)__check_##type; \ -} while (0) +#define WT_SIZE_CHECK(type, e) \ + do { \ + char __check_##type[1 - 2 * !(sizeof(type) == (e))]; \ + (void)__check_##type; \ + } while (0) -#define WT_ALIGN_CHECK(type, a) \ - WT_STATIC_ASSERT(WT_ALIGN(sizeof(type), (a)) == sizeof(type)) +#define WT_ALIGN_CHECK(type, a) WT_STATIC_ASSERT(WT_ALIGN(sizeof(type), (a)) == sizeof(type)) /* * __wt_verify_build -- - * This function is never called: it exists so there is a place for code - * that checks build-time conditions. + * This function is never called: it exists so there is a place for code that checks build-time + * conditions. */ static inline void __wt_verify_build(void) { - /* Check specific structures weren't padded. */ - WT_SIZE_CHECK(WT_BLOCK_DESC, WT_BLOCK_DESC_SIZE); - WT_SIZE_CHECK(WT_REF, WT_REF_SIZE); + /* Check specific structures weren't padded. */ + WT_SIZE_CHECK(WT_BLOCK_DESC, WT_BLOCK_DESC_SIZE); + WT_SIZE_CHECK(WT_REF, WT_REF_SIZE); - /* - * WT_UPDATE is special: we arrange fields to avoid padding within the - * structure but it could be padded at the end depending on the - * timestamp size. Further check that the data field in the update - * structure is where we expect it. - */ - WT_SIZE_CHECK(WT_UPDATE, WT_ALIGN(WT_UPDATE_SIZE, 8)); - WT_STATIC_ASSERT(offsetof(WT_UPDATE, data) == WT_UPDATE_SIZE); + /* + * WT_UPDATE is special: we arrange fields to avoid padding within the structure but it could be + * padded at the end depending on the timestamp size. Further check that the data field in the + * update structure is where we expect it. + */ + WT_SIZE_CHECK(WT_UPDATE, WT_ALIGN(WT_UPDATE_SIZE, 8)); + WT_STATIC_ASSERT(offsetof(WT_UPDATE, data) == WT_UPDATE_SIZE); - /* Check specific structures were padded. */ -#define WT_PADDING_CHECK(s) \ - WT_STATIC_ASSERT( \ - sizeof(s) > WT_CACHE_LINE_ALIGNMENT || \ - sizeof(s) % WT_CACHE_LINE_ALIGNMENT == 0) - WT_PADDING_CHECK(WT_LOGSLOT); - WT_PADDING_CHECK(WT_TXN_STATE); +/* Check specific structures were padded. */ +#define WT_PADDING_CHECK(s) \ + WT_STATIC_ASSERT( \ + sizeof(s) > WT_CACHE_LINE_ALIGNMENT || sizeof(s) % WT_CACHE_LINE_ALIGNMENT == 0) + WT_PADDING_CHECK(WT_LOGSLOT); + WT_PADDING_CHECK(WT_TXN_STATE); - /* - * The btree code encodes key/value pairs in size_t's, and requires at - * least 8B size_t's. - */ - WT_STATIC_ASSERT(sizeof(size_t) >= 8); + /* + * The btree code encodes key/value pairs in size_t's, and requires at least 8B size_t's. + */ + WT_STATIC_ASSERT(sizeof(size_t) >= 8); - /* - * We require a wt_off_t fit into an 8B chunk because 8B is the largest - * integral value we can encode into an address cookie. - * - * WiredTiger has never been tested on a system with 4B file offsets, - * disallow them for now. - */ - WT_STATIC_ASSERT(sizeof(wt_off_t) == 8); + /* + * We require a wt_off_t fit into an 8B chunk because 8B is the largest + * integral value we can encode into an address cookie. + * + * WiredTiger has never been tested on a system with 4B file offsets, + * disallow them for now. + */ + WT_STATIC_ASSERT(sizeof(wt_off_t) == 8); - /* - * We require a time_t be an integral type and fit into a uint64_t for - * simplicity. - */ - WT_STATIC_ASSERT(sizeof(time_t) <= sizeof(uint64_t)); + /* + * We require a time_t be an integral type and fit into a uint64_t for simplicity. + */ + WT_STATIC_ASSERT(sizeof(time_t) <= sizeof(uint64_t)); } diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 13a3c637407..b9fed57f9ad 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -655,15 +655,13 @@ struct __wt_cursor { * * @param cursor the cursor handle * @configstart{WT_CURSOR.reconfigure, see dist/api_data.py} - * @config{append, append the value as a new record\, creating a new - * record number key; valid only for cursors with record number keys., a - * boolean flag; default \c false.} - * @config{overwrite, configures whether the cursor's insert\, update - * and remove methods check the existing state of the record. If \c - * overwrite is \c false\, WT_CURSOR::insert fails with - * ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and - * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not - * exist., a boolean flag; default \c true.} + * @config{append, append the value as a new record\, creating a new record number key; + * valid only for cursors with record number keys., a boolean flag; default \c false.} + * @config{overwrite, configures whether the cursor's insert\, update and remove methods + * check the existing state of the record. If \c overwrite is \c false\, WT_CURSOR::insert + * fails with ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and + * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not exist., a boolean flag; + * default \c true.} * @configend * @errors */ @@ -973,21 +971,18 @@ struct __wt_session { * * @param session the session handle * @configstart{WT_SESSION.reconfigure, see dist/api_data.py} - * @config{cache_cursors, enable caching of cursors for reuse. Any - * calls to WT_CURSOR::close for a cursor created in this session will - * mark the cursor as cached and keep it available to be reused for - * later calls to WT_SESSION::open_cursor. Cached cursors may be - * eventually closed. This value is inherited from ::wiredtiger_open \c + * @config{cache_cursors, enable caching of cursors for reuse. Any calls to + * WT_CURSOR::close for a cursor created in this session will mark the cursor as cached and + * keep it available to be reused for later calls to WT_SESSION::open_cursor. Cached + * cursors may be eventually closed. This value is inherited from ::wiredtiger_open \c * cache_cursors., a boolean flag; default \c true.} - * @config{ignore_cache_size, when set\, operations performed by this - * session ignore the cache size and are not blocked when the cache is - * full. Note that use of this option for operations that create cache - * pressure can starve ordinary sessions that obey the cache size., a - * boolean flag; default \c false.} - * @config{isolation, the default isolation level for operations in this - * session., a string\, chosen from the following options: \c - * "read-uncommitted"\, \c "read-committed"\, \c "snapshot"; default \c - * read-committed.} + * @config{ignore_cache_size, when set\, operations performed by this session ignore the + * cache size and are not blocked when the cache is full. Note that use of this option for + * operations that create cache pressure can starve ordinary sessions that obey the cache + * size., a boolean flag; default \c false.} + * @config{isolation, the default isolation level for operations in this session., a + * string\, chosen from the following options: \c "read-uncommitted"\, \c "read-committed"\, + * \c "snapshot"; default \c read-committed.} * @configend * @errors */ @@ -1047,86 +1042,67 @@ struct __wt_session { * @copydoc doc_cursor_types * @param to_dup a cursor to duplicate or gather statistics on * @configstart{WT_SESSION.open_cursor, see dist/api_data.py} - * @config{append, append the value as a new record\, creating a new - * record number key; valid only for cursors with record number keys., a - * boolean flag; default \c false.} - * @config{bulk, configure the cursor for bulk-loading\, a fast\, - * initial load path (see @ref tune_bulk_load for more information). - * Bulk-load may only be used for newly created objects and applications - * should use the WT_CURSOR::insert method to insert rows. When - * bulk-loading\, rows must be loaded in sorted order. The value is - * usually a true/false flag; when bulk-loading fixed-length column - * store objects\, the special value \c bitmap allows chunks of a memory - * resident bitmap to be loaded directly into a file by passing a \c - * WT_ITEM to WT_CURSOR::set_value where the \c size field indicates the - * number of records in the bitmap (as specified by the object's \c - * value_format configuration). Bulk-loaded bitmap values must end on a - * byte boundary relative to the bit count (except for the last set of - * values loaded)., a string; default \c false.} - * @config{checkpoint, the name of a checkpoint to open (the reserved - * name "WiredTigerCheckpoint" opens the most recent internal checkpoint - * taken for the object). The cursor does not support data - * modification., a string; default empty.} - * @config{dump, configure the cursor for dump format inputs and - * outputs: "hex" selects a simple hexadecimal format\, "json" selects a - * JSON format with each record formatted as fields named by column - * names if available\, and "print" selects a format where only - * non-printing characters are hexadecimal encoded. These formats are - * compatible with the @ref util_dump and @ref util_load commands., a - * string\, chosen from the following options: \c "hex"\, \c "json"\, \c - * "print"; default empty.} - * @config{next_random, configure the cursor to return a pseudo-random - * record from the object when the WT_CURSOR::next method is called; - * valid only for row-store cursors. See @ref cursor_random for - * details., a boolean flag; default \c false.} - * @config{next_random_sample_size, cursors configured by \c next_random - * to return pseudo-random records from the object randomly select from - * the entire object\, by default. Setting \c next_random_sample_size - * to a non-zero value sets the number of samples the application - * expects to take using the \c next_random cursor. A cursor configured - * with both \c next_random and \c next_random_sample_size attempts to - * divide the object into \c next_random_sample_size equal-sized - * pieces\, and each retrieval returns a record from one of those - * pieces. See @ref cursor_random for details., a string; default \c - * 0.} - * @config{overwrite, configures whether the cursor's insert\, update - * and remove methods check the existing state of the record. If \c - * overwrite is \c false\, WT_CURSOR::insert fails with - * ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and - * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not - * exist., a boolean flag; default \c true.} - * @config{raw, ignore the encodings for the key and value\, manage data - * as if the formats were \c "u". See @ref cursor_raw for details., a - * boolean flag; default \c false.} - * @config{read_once, results that are brought into cache from disk by - * this cursor will be given less priority in the cache., a boolean - * flag; default \c false.} - * @config{readonly, only query operations are supported by this cursor. - * An error is returned if a modification is attempted using the cursor. - * The default is false for all cursor types except for log and metadata - * cursors., a boolean flag; default \c false.} - * @config{statistics, Specify the statistics to be gathered. Choosing - * "all" gathers statistics regardless of cost and may include - * traversing on-disk files; "fast" gathers a subset of relatively - * inexpensive statistics. The selection must agree with the database - * \c statistics configuration specified to ::wiredtiger_open or - * WT_CONNECTION::reconfigure. For example\, "all" or "fast" can be - * configured when the database is configured with "all"\, but the - * cursor open will fail if "all" is specified when the database is - * configured with "fast"\, and the cursor open will fail in all cases - * when the database is configured with "none". If "size" is - * configured\, only the underlying size of the object on disk is filled - * in and the object is not opened. If \c statistics is not - * configured\, the default configuration is the database configuration. - * The "clear" configuration resets statistics after gathering them\, - * where appropriate (for example\, a cache size statistic is not - * cleared\, while the count of cursor insert operations will be - * cleared). See @ref statistics for more information., a list\, with - * values chosen from the following options: \c "all"\, \c - * "cache_walk"\, \c "fast"\, \c "clear"\, \c "size"\, \c "tree_walk"; - * default empty.} - * @config{target, if non-empty\, backup the list of objects; valid only - * for a backup data source., a list of strings; default empty.} + * @config{append, append the value as a new record\, creating a new record number key; + * valid only for cursors with record number keys., a boolean flag; default \c false.} + * @config{bulk, configure the cursor for bulk-loading\, a fast\, initial load path (see + * @ref tune_bulk_load for more information). Bulk-load may only be used for newly created + * objects and applications should use the WT_CURSOR::insert method to insert rows. When + * bulk-loading\, rows must be loaded in sorted order. The value is usually a true/false + * flag; when bulk-loading fixed-length column store objects\, the special value \c bitmap + * allows chunks of a memory resident bitmap to be loaded directly into a file by passing a + * \c WT_ITEM to WT_CURSOR::set_value where the \c size field indicates the number of + * records in the bitmap (as specified by the object's \c value_format configuration). + * Bulk-loaded bitmap values must end on a byte boundary relative to the bit count (except + * for the last set of values loaded)., a string; default \c false.} + * @config{checkpoint, the name of a checkpoint to open (the reserved name + * "WiredTigerCheckpoint" opens the most recent internal checkpoint taken for the object). + * The cursor does not support data modification., a string; default empty.} + * @config{dump, configure the cursor for dump format inputs and outputs: "hex" selects a + * simple hexadecimal format\, "json" selects a JSON format with each record formatted as + * fields named by column names if available\, and "print" selects a format where only + * non-printing characters are hexadecimal encoded. These formats are compatible with the + * @ref util_dump and @ref util_load commands., a string\, chosen from the following + * options: \c "hex"\, \c "json"\, \c "print"; default empty.} + * @config{next_random, configure the cursor to return a pseudo-random record from the + * object when the WT_CURSOR::next method is called; valid only for row-store cursors. See + * @ref cursor_random for details., a boolean flag; default \c false.} + * @config{next_random_sample_size, cursors configured by \c next_random to return + * pseudo-random records from the object randomly select from the entire object\, by + * default. Setting \c next_random_sample_size to a non-zero value sets the number of + * samples the application expects to take using the \c next_random cursor. A cursor + * configured with both \c next_random and \c next_random_sample_size attempts to divide the + * object into \c next_random_sample_size equal-sized pieces\, and each retrieval returns a + * record from one of those pieces. See @ref cursor_random for details., a string; default + * \c 0.} + * @config{overwrite, configures whether the cursor's insert\, update and remove methods + * check the existing state of the record. If \c overwrite is \c false\, WT_CURSOR::insert + * fails with ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and + * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not exist., a boolean flag; + * default \c true.} + * @config{raw, ignore the encodings for the key and value\, manage data as if the formats + * were \c "u". See @ref cursor_raw for details., a boolean flag; default \c false.} + * @config{read_once, results that are brought into cache from disk by this cursor will be + * given less priority in the cache., a boolean flag; default \c false.} + * @config{readonly, only query operations are supported by this cursor. An error is + * returned if a modification is attempted using the cursor. The default is false for all + * cursor types except for log and metadata cursors., a boolean flag; default \c false.} + * @config{statistics, Specify the statistics to be gathered. Choosing "all" gathers + * statistics regardless of cost and may include traversing on-disk files; "fast" gathers a + * subset of relatively inexpensive statistics. The selection must agree with the database + * \c statistics configuration specified to ::wiredtiger_open or WT_CONNECTION::reconfigure. + * For example\, "all" or "fast" can be configured when the database is configured with + * "all"\, but the cursor open will fail if "all" is specified when the database is + * configured with "fast"\, and the cursor open will fail in all cases when the database is + * configured with "none". If "size" is configured\, only the underlying size of the object + * on disk is filled in and the object is not opened. If \c statistics is not configured\, + * the default configuration is the database configuration. The "clear" configuration + * resets statistics after gathering them\, where appropriate (for example\, a cache size + * statistic is not cleared\, while the count of cursor insert operations will be cleared). + * See @ref statistics for more information., a list\, with values chosen from the following + * options: \c "all"\, \c "cache_walk"\, \c "fast"\, \c "clear"\, \c "size"\, \c + * "tree_walk"; default empty.} + * @config{target, if non-empty\, backup the list of objects; valid only for a backup data + * source., a list of strings; default empty.} * @configend * @param[out] cursorp a pointer to the newly opened cursor * @errors @@ -1153,34 +1129,30 @@ struct __wt_session { * @param session the session handle * @param name the URI of the object to alter, such as \c "table:stock" * @configstart{WT_SESSION.alter, see dist/api_data.py} - * @config{access_pattern_hint, It is recommended that workloads that - * consist primarily of updates and/or point queries specify \c random. - * Workloads that do many cursor scans through large ranges of data - * specify \c sequential and other workloads specify \c none. The - * option leads to an advisory call to an appropriate operating system - * API where available., a string\, chosen from the following options: - * \c "none"\, \c "random"\, \c "sequential"; default \c none.} - * @config{app_metadata, application-owned metadata for this object., a - * string; default empty.} - * @config{cache_resident, do not ever evict the object's pages from - * cache. Not compatible with LSM tables; see @ref - * tuning_cache_resident for more information., a boolean flag; default - * \c false.} - * @config{log = (, the transaction log configuration for this object. - * Only valid if log is enabled in ::wiredtiger_open., a set of related - * configuration options defined below.} - * @config{ enabled, if false\, this object has - * checkpoint-level durability., a boolean flag; default \c true.} + * @config{access_pattern_hint, It is recommended that workloads that consist primarily of + * updates and/or point queries specify \c random. Workloads that do many cursor scans + * through large ranges of data specify \c sequential and other workloads specify \c none. + * The option leads to an advisory call to an appropriate operating system API where + * available., a string\, chosen from the following options: \c "none"\, \c "random"\, \c + * "sequential"; default \c none.} + * @config{app_metadata, application-owned metadata for this object., a string; default + * empty.} + * @config{cache_resident, do not ever evict the object's pages from cache. Not compatible + * with LSM tables; see @ref tuning_cache_resident for more information., a boolean flag; + * default \c false.} + * @config{log = (, the transaction log configuration for this object. Only valid if log is + * enabled in ::wiredtiger_open., a set of related configuration options defined below.} + * @config{ enabled, if false\, this object has checkpoint-level + * durability., a boolean flag; default \c true.} * @config{ ),,} - * @config{os_cache_dirty_max, maximum dirty system buffer cache usage\, - * in bytes. If non-zero\, schedule writes for dirty blocks belonging - * to this object in the system buffer cache after that many bytes from - * this object are written into the buffer cache., an integer greater - * than or equal to 0; default \c 0.} - * @config{os_cache_max, maximum system buffer cache usage\, in bytes. - * If non-zero\, evict object blocks from the system buffer cache after - * that many bytes from this object are read or written into the buffer - * cache., an integer greater than or equal to 0; default \c 0.} + * @config{os_cache_dirty_max, maximum dirty system buffer cache usage\, in bytes. If + * non-zero\, schedule writes for dirty blocks belonging to this object in the system buffer + * cache after that many bytes from this object are written into the buffer cache., an + * integer greater than or equal to 0; default \c 0.} + * @config{os_cache_max, maximum system buffer cache usage\, in bytes. If non-zero\, evict + * object blocks from the system buffer cache after that many bytes from this object are + * read or written into the buffer cache., an integer greater than or equal to 0; default \c + * 0.} * @configend * @errors */ @@ -1199,250 +1171,203 @@ struct __wt_session { * \c "table:stock". For a description of URI formats * see @ref data_sources. * @configstart{WT_SESSION.create, see dist/api_data.py} - * @config{access_pattern_hint, It is recommended that workloads that - * consist primarily of updates and/or point queries specify \c random. - * Workloads that do many cursor scans through large ranges of data - * specify \c sequential and other workloads specify \c none. The - * option leads to an advisory call to an appropriate operating system - * API where available., a string\, chosen from the following options: - * \c "none"\, \c "random"\, \c "sequential"; default \c none.} - * @config{allocation_size, the file unit allocation size\, in bytes\, - * must a power-of-two; smaller values decrease the file space required - * by overflow items\, and the default value of 4KB is a good choice - * absent requirements from the operating system or storage device., an - * integer between 512B and 128MB; default \c 4KB.} - * @config{app_metadata, application-owned metadata for this object., a - * string; default empty.} - * @config{block_allocation, configure block allocation. Permitted - * values are \c "first" or \c "best"; the \c "first" configuration uses - * a first-available algorithm during block allocation\, the \c "best" - * configuration uses a best-fit algorithm., a string\, chosen from the - * following options: \c "first"\, \c "best"; default \c best.} - * @config{block_compressor, configure a compressor for file blocks. - * Permitted values are \c "none" or custom compression engine name - * created with WT_CONNECTION::add_compressor. If WiredTiger has - * builtin support for \c "lz4"\, \c "snappy"\, \c "zlib" or \c "zstd" - * compression\, these names are also available. See @ref compression - * for more information., a string; default \c none.} - * @config{cache_resident, do not ever evict the object's pages from - * cache. Not compatible with LSM tables; see @ref - * tuning_cache_resident for more information., a boolean flag; default - * \c false.} - * @config{checksum, configure block checksums; permitted values are - * <code>on</code> (checksum all blocks)\, <code>off</code> (checksum no - * blocks) and <code>uncompresssed</code> (checksum only blocks which - * are not compressed for any reason). The \c uncompressed setting is - * for applications which can rely on decompression to fail if a block - * has been corrupted., a string\, chosen from the following options: \c + * @config{access_pattern_hint, It is recommended that workloads that consist primarily of + * updates and/or point queries specify \c random. Workloads that do many cursor scans + * through large ranges of data specify \c sequential and other workloads specify \c none. + * The option leads to an advisory call to an appropriate operating system API where + * available., a string\, chosen from the following options: \c "none"\, \c "random"\, \c + * "sequential"; default \c none.} + * @config{allocation_size, the file unit allocation size\, in bytes\, must a power-of-two; + * smaller values decrease the file space required by overflow items\, and the default value + * of 4KB is a good choice absent requirements from the operating system or storage device., + * an integer between 512B and 128MB; default \c 4KB.} + * @config{app_metadata, application-owned metadata for this object., a string; default + * empty.} + * @config{block_allocation, configure block allocation. Permitted values are \c "first" or + * \c "best"; the \c "first" configuration uses a first-available algorithm during block + * allocation\, the \c "best" configuration uses a best-fit algorithm., a string\, chosen + * from the following options: \c "first"\, \c "best"; default \c best.} + * @config{block_compressor, configure a compressor for file blocks. Permitted values are + * \c "none" or custom compression engine name created with WT_CONNECTION::add_compressor. + * If WiredTiger has builtin support for \c "lz4"\, \c "snappy"\, \c "zlib" or \c "zstd" + * compression\, these names are also available. See @ref compression for more + * information., a string; default \c none.} + * @config{cache_resident, do not ever evict the object's pages from cache. Not compatible + * with LSM tables; see @ref tuning_cache_resident for more information., a boolean flag; + * default \c false.} + * @config{checksum, configure block checksums; permitted values are <code>on</code> + * (checksum all blocks)\, <code>off</code> (checksum no blocks) and + * <code>uncompresssed</code> (checksum only blocks which are not compressed for any + * reason). The \c uncompressed setting is for applications which can rely on decompression + * to fail if a block has been corrupted., a string\, chosen from the following options: \c * "on"\, \c "off"\, \c "uncompressed"; default \c uncompressed.} - * @config{colgroups, comma-separated list of names of column groups. - * Each column group is stored separately\, keyed by the primary key of - * the table. If no column groups are specified\, all columns are - * stored together in a single file. All value columns in the table - * must appear in at least one column group. Each column group must be - * created with a separate call to WT_SESSION::create., a list of - * strings; default empty.} - * @config{collator, configure custom collation for keys. Permitted - * values are \c "none" or a custom collator name created with - * WT_CONNECTION::add_collator., a string; default \c none.} - * @config{columns, list of the column names. Comma-separated list of - * the form <code>(column[\,...])</code>. For tables\, the number of - * entries must match the total number of values in \c key_format and \c - * value_format. For colgroups and indices\, all column names must - * appear in the list of columns for the table., a list of strings; + * @config{colgroups, comma-separated list of names of column groups. Each column group is + * stored separately\, keyed by the primary key of the table. If no column groups are + * specified\, all columns are stored together in a single file. All value columns in the + * table must appear in at least one column group. Each column group must be created with a + * separate call to WT_SESSION::create., a list of strings; default empty.} + * @config{collator, configure custom collation for keys. Permitted values are \c "none" or + * a custom collator name created with WT_CONNECTION::add_collator., a string; default \c + * none.} + * @config{columns, list of the column names. Comma-separated list of the form + * <code>(column[\,...])</code>. For tables\, the number of entries must match the total + * number of values in \c key_format and \c value_format. For colgroups and indices\, all + * column names must appear in the list of columns for the table., a list of strings; * default empty.} - * @config{dictionary, the maximum number of unique values remembered in - * the Btree row-store leaf page value dictionary; see @ref - * file_formats_compression for more information., an integer greater - * than or equal to 0; default \c 0.} - * @config{encryption = (, configure an encryptor for file blocks. When - * a table is created\, its encryptor is not implicitly used for any - * related indices or column groups., a set of related configuration - * options defined below.} - * @config{ keyid, An - * identifier that identifies a unique instance of the encryptor. It is - * stored in clear text\, and thus is available when the wiredtiger - * database is reopened. On the first use of a (name\, keyid) - * combination\, the WT_ENCRYPTOR::customize function is called with the - * keyid as an argument., a string; default empty.} - * @config{ name, Permitted values are \c "none" - * or custom encryption engine name created with - * WT_CONNECTION::add_encryptor. See @ref encryption for more + * @config{dictionary, the maximum number of unique values remembered in the Btree row-store + * leaf page value dictionary; see @ref file_formats_compression for more information., an + * integer greater than or equal to 0; default \c 0.} + * @config{encryption = (, configure an encryptor for file blocks. When a table is + * created\, its encryptor is not implicitly used for any related indices or column groups., + * a set of related configuration options defined below.} + * @config{ + * keyid, An identifier that identifies a unique instance of the encryptor. It is stored in + * clear text\, and thus is available when the wiredtiger database is reopened. On the + * first use of a (name\, keyid) combination\, the WT_ENCRYPTOR::customize function is + * called with the keyid as an argument., a string; default empty.} + * @config{ name, Permitted values are \c "none" or custom encryption + * engine name created with WT_CONNECTION::add_encryptor. See @ref encryption for more * information., a string; default \c none.} * @config{ ),,} - * @config{exclusive, fail if the object exists. When false (the - * default)\, if the object exists\, check that its settings match the - * specified configuration., a boolean flag; default \c false.} - * @config{extractor, configure custom extractor for indices. Permitted - * values are \c "none" or an extractor name created with - * WT_CONNECTION::add_extractor., a string; default \c none.} - * @config{format, the file format., a string\, chosen from the - * following options: \c "btree"; default \c btree.} - * @config{huffman_key, configure Huffman encoding for keys. Permitted - * values are \c "none"\, \c "english"\, \c "utf8<file>" or \c - * "utf16<file>". See @ref huffman for more information., a string; - * default \c none.} - * @config{huffman_value, configure Huffman encoding for values. - * Permitted values are \c "none"\, \c "english"\, \c "utf8<file>" or \c - * "utf16<file>". See @ref huffman for more information., a string; - * default \c none.} - * @config{ignore_in_memory_cache_size, allow update and insert - * operations to proceed even if the cache is already at capacity. Only - * valid in conjunction with in-memory databases. Should be used with - * caution - this configuration allows WiredTiger to consume memory over + * @config{exclusive, fail if the object exists. When false (the default)\, if the object + * exists\, check that its settings match the specified configuration., a boolean flag; + * default \c false.} + * @config{extractor, configure custom extractor for indices. Permitted values are \c + * "none" or an extractor name created with WT_CONNECTION::add_extractor., a string; default + * \c none.} + * @config{format, the file format., a string\, chosen from the following options: \c + * "btree"; default \c btree.} + * @config{huffman_key, configure Huffman encoding for keys. Permitted values are \c + * "none"\, \c "english"\, \c "utf8<file>" or \c "utf16<file>". See @ref huffman for more + * information., a string; default \c none.} + * @config{huffman_value, configure Huffman encoding for values. Permitted values are \c + * "none"\, \c "english"\, \c "utf8<file>" or \c "utf16<file>". See @ref huffman for more + * information., a string; default \c none.} + * @config{ignore_in_memory_cache_size, allow update and insert operations to proceed even + * if the cache is already at capacity. Only valid in conjunction with in-memory databases. + * Should be used with caution - this configuration allows WiredTiger to consume memory over * the configured cache limit., a boolean flag; default \c false.} - * @config{immutable, configure the index to be immutable - that is an - * index is not changed by any update to a record in the table., a - * boolean flag; default \c false.} - * @config{internal_key_max, the largest key stored in an internal - * node\, in bytes. If set\, keys larger than the specified size are - * stored as overflow items (which may require additional I/O to - * access). The default and the maximum allowed value are both one-tenth - * the size of a newly split internal page., an integer greater than or - * equal to 0; default \c 0.} - * @config{internal_key_truncate, configure internal key truncation\, - * discarding unnecessary trailing bytes on internal keys (ignored for - * custom collators)., a boolean flag; default \c true.} - * @config{internal_page_max, the maximum page size for internal nodes\, - * in bytes; the size must be a multiple of the allocation size and is - * significant for applications wanting to avoid excessive L2 cache - * misses while searching the tree. The page maximum is the bytes of - * uncompressed data\, that is\, the limit is applied before any block - * compression is done., an integer between 512B and 512MB; default \c - * 4KB.} - * @config{key_format, the format of the data packed into key items. - * See @ref schema_format_types for details. By default\, the - * key_format is \c 'u' and applications use WT_ITEM structures to - * manipulate raw byte arrays. By default\, records are stored in - * row-store files: keys of type \c 'r' are record numbers and records - * referenced by record number are stored in column-store files., a - * format string; default \c u.} - * @config{leaf_key_max, the largest key stored in a leaf node\, in - * bytes. If set\, keys larger than the specified size are stored as - * overflow items (which may require additional I/O to access). The - * default value is one-tenth the size of a newly split leaf page., an + * @config{immutable, configure the index to be immutable - that is an index is not changed + * by any update to a record in the table., a boolean flag; default \c false.} + * @config{internal_key_max, the largest key stored in an internal node\, in bytes. If + * set\, keys larger than the specified size are stored as overflow items (which may require + * additional I/O to access). The default and the maximum allowed value are both one-tenth + * the size of a newly split internal page., an integer greater than or equal to 0; default + * \c 0.} + * @config{internal_key_truncate, configure internal key truncation\, discarding unnecessary + * trailing bytes on internal keys (ignored for custom collators)., a boolean flag; default + * \c true.} + * @config{internal_page_max, the maximum page size for internal nodes\, in bytes; the size + * must be a multiple of the allocation size and is significant for applications wanting to + * avoid excessive L2 cache misses while searching the tree. The page maximum is the bytes + * of uncompressed data\, that is\, the limit is applied before any block compression is + * done., an integer between 512B and 512MB; default \c 4KB.} + * @config{key_format, the format of the data packed into key items. See @ref + * schema_format_types for details. By default\, the key_format is \c 'u' and applications + * use WT_ITEM structures to manipulate raw byte arrays. By default\, records are stored in + * row-store files: keys of type \c 'r' are record numbers and records referenced by record + * number are stored in column-store files., a format string; default \c u.} + * @config{leaf_key_max, the largest key stored in a leaf node\, in bytes. If set\, keys + * larger than the specified size are stored as overflow items (which may require additional + * I/O to access). The default value is one-tenth the size of a newly split leaf page., an * integer greater than or equal to 0; default \c 0.} - * @config{leaf_page_max, the maximum page size for leaf nodes\, in - * bytes; the size must be a multiple of the allocation size\, and is - * significant for applications wanting to maximize sequential data - * transfer from a storage device. The page maximum is the bytes of - * uncompressed data\, that is\, the limit is applied before any block - * compression is done., an integer between 512B and 512MB; default \c - * 32KB.} - * @config{leaf_value_max, the largest value stored in a leaf node\, in - * bytes. If set\, values larger than the specified size are stored as - * overflow items (which may require additional I/O to access). If the - * size is larger than the maximum leaf page size\, the page size is - * temporarily ignored when large values are written. The default is - * one-half the size of a newly split leaf page., an integer greater - * than or equal to 0; default \c 0.} - * @config{log = (, the transaction log configuration for this object. - * Only valid if log is enabled in ::wiredtiger_open., a set of related - * configuration options defined below.} - * @config{ enabled, if false\, this object has - * checkpoint-level durability., a boolean flag; default \c true.} + * @config{leaf_page_max, the maximum page size for leaf nodes\, in bytes; the size must be + * a multiple of the allocation size\, and is significant for applications wanting to + * maximize sequential data transfer from a storage device. The page maximum is the bytes + * of uncompressed data\, that is\, the limit is applied before any block compression is + * done., an integer between 512B and 512MB; default \c 32KB.} + * @config{leaf_value_max, the largest value stored in a leaf node\, in bytes. If set\, + * values larger than the specified size are stored as overflow items (which may require + * additional I/O to access). If the size is larger than the maximum leaf page size\, the + * page size is temporarily ignored when large values are written. The default is one-half + * the size of a newly split leaf page., an integer greater than or equal to 0; default \c + * 0.} + * @config{log = (, the transaction log configuration for this object. Only valid if log is + * enabled in ::wiredtiger_open., a set of related configuration options defined below.} + * @config{ enabled, if false\, this object has checkpoint-level + * durability., a boolean flag; default \c true.} * @config{ ),,} - * @config{lsm = (, options only relevant for LSM data sources., a set - * of related configuration options defined below.} - * @config{ auto_throttle, Throttle inserts into - * LSM trees if flushing to disk isn't keeping up., a boolean flag; - * default \c true.} - * @config{ bloom, create bloom - * filters on LSM tree chunks as they are merged., a boolean flag; + * @config{lsm = (, options only relevant for LSM data sources., a set of related + * configuration options defined below.} + * @config{ auto_throttle, + * Throttle inserts into LSM trees if flushing to disk isn't keeping up., a boolean flag; * default \c true.} - * @config{ bloom_bit_count, - * the number of bits used per item for LSM bloom filters., an integer - * between 2 and 1000; default \c 16.} - * @config{ - * bloom_config, config string used when creating Bloom filter files\, - * passed to WT_SESSION::create., a string; default empty.} - * @config{ bloom_hash_count, the number of hash - * values per item used for LSM bloom filters., an integer between 2 and - * 100; default \c 8.} - * @config{ bloom_oldest, - * create a bloom filter on the oldest LSM tree chunk. Only supported - * if bloom filters are enabled., a boolean flag; default \c false.} - * @config{ chunk_count_limit, the maximum number - * of chunks to allow in an LSM tree. This option automatically times - * out old data. As new chunks are added old chunks will be removed. - * Enabling this option disables LSM background merges., an integer; - * default \c 0.} - * @config{ chunk_max, the maximum - * size a single chunk can be. Chunks larger than this size are not - * considered for further merges. This is a soft limit\, and chunks - * larger than this value can be created. Must be larger than - * chunk_size., an integer between 100MB and 10TB; default \c 5GB.} - * @config{ chunk_size, the maximum size of the - * in-memory chunk of an LSM tree. This limit is soft - it is possible - * for chunks to be temporarily larger than this value. This overrides - * the \c memory_page_max setting., an integer between 512K and 500MB; - * default \c 10MB.} - * @config{ merge_custom = (, - * configure the tree to merge into a custom data source., a set of - * related configuration options defined below.} - * @config{ prefix, - * custom data source prefix instead of \c "file"., a string; default - * empty.} - * @config{ - * start_generation, merge generation at which the custom data source is - * used (zero indicates no custom data source)., an integer between 0 - * and 10; default \c 0.} - * @config{ suffix, - * custom data source suffix instead of \c ".lsm"., a string; default - * empty.} + * @config{ bloom, create bloom filters on LSM tree + * chunks as they are merged., a boolean flag; default \c true.} + * @config{ bloom_bit_count, the number of bits used per item for LSM + * bloom filters., an integer between 2 and 1000; default \c 16.} + * @config{ bloom_config, config string used when creating Bloom + * filter files\, passed to WT_SESSION::create., a string; default empty.} + * @config{ bloom_hash_count, the number of hash values per item used + * for LSM bloom filters., an integer between 2 and 100; default \c 8.} + * @config{ bloom_oldest, create a bloom filter on the oldest LSM + * tree chunk. Only supported if bloom filters are enabled., a boolean flag; default \c + * false.} + * @config{ chunk_count_limit, the maximum number of chunks + * to allow in an LSM tree. This option automatically times out old data. As new chunks + * are added old chunks will be removed. Enabling this option disables LSM background + * merges., an integer; default \c 0.} + * @config{ chunk_max, the + * maximum size a single chunk can be. Chunks larger than this size are not considered for + * further merges. This is a soft limit\, and chunks larger than this value can be created. + * Must be larger than chunk_size., an integer between 100MB and 10TB; default \c 5GB.} + * @config{ chunk_size, the maximum size of the in-memory chunk of an + * LSM tree. This limit is soft - it is possible for chunks to be temporarily larger than + * this value. This overrides the \c memory_page_max setting., an integer between 512K and + * 500MB; default \c 10MB.} + * @config{ merge_custom = (, configure the + * tree to merge into a custom data source., a set of related configuration options defined + * below.} + * @config{ prefix, custom data + * source prefix instead of \c "file"., a string; default empty.} + * @config{ start_generation, merge + * generation at which the custom data source is used (zero indicates no custom data + * source)., an integer between 0 and 10; default \c 0.} + * @config{ suffix, custom data source suffix + * instead of \c ".lsm"., a string; default empty.} * @config{ ),,} - * @config{ merge_max, the - * maximum number of chunks to include in a merge operation., an integer - * between 2 and 100; default \c 15.} - * @config{ - * merge_min, the minimum number of chunks to include in a merge - * operation. If set to 0 or 1 half the value of merge_max is used., an - * integer no more than 100; default \c 0.} + * @config{ merge_max, the maximum number of chunks to include in a + * merge operation., an integer between 2 and 100; default \c 15.} + * @config{ merge_min, the minimum number of chunks to include in a + * merge operation. If set to 0 or 1 half the value of merge_max is used., an integer no + * more than 100; default \c 0.} * @config{ ),,} - * @config{memory_page_image_max, the maximum in-memory page image - * represented by a single storage block. Depending on compression - * efficiency\, compression can create storage blocks which require - * significant resources to re-instantiate in the cache\, penalizing the - * performance of future point updates. The value limits the maximum - * in-memory page image a storage block will need. If set to 0\, a - * default of 4 times \c leaf_page_max is used., an integer greater than - * or equal to 0; default \c 0.} - * @config{memory_page_max, the maximum size a page can grow to in - * memory before being reconciled to disk. The specified size will be - * adjusted to a lower bound of <code>leaf_page_max</code>\, and an - * upper bound of <code>cache_size / 10</code>. This limit is soft - it - * is possible for pages to be temporarily larger than this value. This - * setting is ignored for LSM trees\, see \c chunk_size., an integer - * between 512B and 10TB; default \c 5MB.} - * @config{os_cache_dirty_max, maximum dirty system buffer cache usage\, - * in bytes. If non-zero\, schedule writes for dirty blocks belonging - * to this object in the system buffer cache after that many bytes from - * this object are written into the buffer cache., an integer greater - * than or equal to 0; default \c 0.} - * @config{os_cache_max, maximum system buffer cache usage\, in bytes. - * If non-zero\, evict object blocks from the system buffer cache after - * that many bytes from this object are read or written into the buffer - * cache., an integer greater than or equal to 0; default \c 0.} - * @config{prefix_compression, configure prefix compression on row-store - * leaf pages., a boolean flag; default \c false.} - * @config{prefix_compression_min, minimum gain before prefix - * compression will be used on row-store leaf pages., an integer greater - * than or equal to 0; default \c 4.} - * @config{split_pct, the Btree page split size as a percentage of the - * maximum Btree page size\, that is\, when a Btree page is split\, it - * will be split into smaller pages\, where each page is the specified - * percentage of the maximum Btree page size., an integer between 50 and - * 100; default \c 90.} - * @config{type, set the type of data source used to store a column - * group\, index or simple table. By default\, a \c "file:" URI is - * derived from the object name. The \c type configuration can be used - * to switch to a different data source\, such as LSM or an extension - * configured by the application., a string; default \c file.} - * @config{value_format, the format of the data packed into value items. - * See @ref schema_format_types for details. By default\, the - * value_format is \c 'u' and applications use a WT_ITEM structure to - * manipulate raw byte arrays. Value items of type 't' are bitfields\, - * and when configured with record number type keys\, will be stored + * @config{memory_page_image_max, the maximum in-memory page image represented by a single + * storage block. Depending on compression efficiency\, compression can create storage + * blocks which require significant resources to re-instantiate in the cache\, penalizing + * the performance of future point updates. The value limits the maximum in-memory page + * image a storage block will need. If set to 0\, a default of 4 times \c leaf_page_max is + * used., an integer greater than or equal to 0; default \c 0.} + * @config{memory_page_max, the maximum size a page can grow to in memory before being + * reconciled to disk. The specified size will be adjusted to a lower bound of + * <code>leaf_page_max</code>\, and an upper bound of <code>cache_size / 10</code>. This + * limit is soft - it is possible for pages to be temporarily larger than this value. This + * setting is ignored for LSM trees\, see \c chunk_size., an integer between 512B and 10TB; + * default \c 5MB.} + * @config{os_cache_dirty_max, maximum dirty system buffer cache usage\, in bytes. If + * non-zero\, schedule writes for dirty blocks belonging to this object in the system buffer + * cache after that many bytes from this object are written into the buffer cache., an + * integer greater than or equal to 0; default \c 0.} + * @config{os_cache_max, maximum system buffer cache usage\, in bytes. If non-zero\, evict + * object blocks from the system buffer cache after that many bytes from this object are + * read or written into the buffer cache., an integer greater than or equal to 0; default \c + * 0.} + * @config{prefix_compression, configure prefix compression on row-store leaf pages., a + * boolean flag; default \c false.} + * @config{prefix_compression_min, minimum gain before prefix compression will be used on + * row-store leaf pages., an integer greater than or equal to 0; default \c 4.} + * @config{split_pct, the Btree page split size as a percentage of the maximum Btree page + * size\, that is\, when a Btree page is split\, it will be split into smaller pages\, where + * each page is the specified percentage of the maximum Btree page size., an integer between + * 50 and 100; default \c 90.} + * @config{type, set the type of data source used to store a column group\, index or simple + * table. By default\, a \c "file:" URI is derived from the object name. The \c type + * configuration can be used to switch to a different data source\, such as LSM or an + * extension configured by the application., a string; default \c file.} + * @config{value_format, the format of the data packed into value items. See @ref + * schema_format_types for details. By default\, the value_format is \c 'u' and + * applications use a WT_ITEM structure to manipulate raw byte arrays. Value items of type + * 't' are bitfields\, and when configured with record number type keys\, will be stored * using a fixed-length store., a format string; default \c u.} * @configend * @errors @@ -1474,10 +1399,9 @@ struct __wt_session { * @param name the URI of the object to compact, such as * \c "table:stock" * @configstart{WT_SESSION.compact, see dist/api_data.py} - * @config{timeout, maximum amount of time to allow for compact in - * seconds. The actual amount of time spent in compact may exceed the - * configured value. A value of zero disables the timeout., an integer; - * default \c 1200.} + * @config{timeout, maximum amount of time to allow for compact in seconds. The actual + * amount of time spent in compact may exceed the configured value. A value of zero + * disables the timeout., an integer; default \c 1200.} * @configend * @errors */ @@ -1496,10 +1420,10 @@ struct __wt_session { * @param session the session handle * @param name the URI of the object to drop, such as \c "table:stock" * @configstart{WT_SESSION.drop, see dist/api_data.py} - * @config{force, return success if the object does not exist., a - * boolean flag; default \c false.} - * @config{remove_files, if the underlying files should be removed., a - * boolean flag; default \c true.} + * @config{force, return success if the object does not exist., a boolean flag; default \c + * false.} + * @config{remove_files, if the underlying files should be removed., a boolean flag; default + * \c true.} * @configend * @ebusy_errors */ @@ -1539,35 +1463,28 @@ struct __wt_session { * finished with it, although not before the join_cursor is closed. * * @configstart{WT_SESSION.join, see dist/api_data.py} - * @config{bloom_bit_count, the number of bits used per item for the - * bloom filter., an integer between 2 and 1000; default \c 16.} - * @config{bloom_false_positives, return all values that pass the bloom - * filter\, without eliminating any false positives., a boolean flag; - * default \c false.} - * @config{bloom_hash_count, the number of hash values per item for the - * bloom filter., an integer between 2 and 100; default \c 8.} - * @config{compare, modifies the set of items to be returned so that the - * index key satisfies the given comparison relative to the key set in - * this cursor., a string\, chosen from the following options: \c "eq"\, - * \c "ge"\, \c "gt"\, \c "le"\, \c "lt"; default \c "eq".} - * @config{count, set an approximate count of the elements that would be - * included in the join. This is used in sizing the bloom filter\, and - * also influences evaluation order for cursors in the join. When the - * count is equal for multiple bloom filters in a composition of joins\, - * the bloom filter may be shared., an integer; default \c .} - * @config{operation, the operation applied between this and other - * joined cursors. When "operation=and" is specified\, all the - * conditions implied by joins must be satisfied for an entry to be - * returned by the join cursor; when "operation=or" is specified\, only - * one must be satisfied. All cursors joined to a join cursor must have - * matching operations., a string\, chosen from the following options: - * \c "and"\, \c "or"; default \c "and".} - * @config{strategy, when set to bloom\, a bloom filter is created and - * populated for this index. This has an up front cost but may reduce - * the number of accesses to the main table when iterating the joined - * cursor. The bloom setting requires that count be set., a string\, - * chosen from the following options: \c "bloom"\, \c "default"; default - * empty.} + * @config{bloom_bit_count, the number of bits used per item for the bloom filter., an + * integer between 2 and 1000; default \c 16.} + * @config{bloom_false_positives, return all values that pass the bloom filter\, without + * eliminating any false positives., a boolean flag; default \c false.} + * @config{bloom_hash_count, the number of hash values per item for the bloom filter., an + * integer between 2 and 100; default \c 8.} + * @config{compare, modifies the set of items to be returned so that the index key satisfies + * the given comparison relative to the key set in this cursor., a string\, chosen from the + * following options: \c "eq"\, \c "ge"\, \c "gt"\, \c "le"\, \c "lt"; default \c "eq".} + * @config{count, set an approximate count of the elements that would be included in the + * join. This is used in sizing the bloom filter\, and also influences evaluation order for + * cursors in the join. When the count is equal for multiple bloom filters in a composition + * of joins\, the bloom filter may be shared., an integer; default \c .} + * @config{operation, the operation applied between this and other joined cursors. When + * "operation=and" is specified\, all the conditions implied by joins must be satisfied for + * an entry to be returned by the join cursor; when "operation=or" is specified\, only one + * must be satisfied. All cursors joined to a join cursor must have matching operations., a + * string\, chosen from the following options: \c "and"\, \c "or"; default \c "and".} + * @config{strategy, when set to bloom\, a bloom filter is created and populated for this + * index. This has an up front cost but may reduce the number of accesses to the main table + * when iterating the joined cursor. The bloom setting requires that count be set., a + * string\, chosen from the following options: \c "bloom"\, \c "default"; default empty.} * @configend * @errors */ @@ -1579,14 +1496,12 @@ struct __wt_session { * * @param session the session handle * @configstart{WT_SESSION.log_flush, see dist/api_data.py} - * @config{sync, forcibly flush the log and wait for it to achieve the - * synchronization level specified. The \c background setting initiates - * a background synchronization intended to be used with a later call to - * WT_SESSION::transaction_sync. The \c off setting forces any buffered - * log records to be written to the file system. The \c on setting - * forces log records to be written to the storage device., a string\, - * chosen from the following options: \c "background"\, \c "off"\, \c - * "on"; default \c on.} + * @config{sync, forcibly flush the log and wait for it to achieve the synchronization level + * specified. The \c background setting initiates a background synchronization intended to + * be used with a later call to WT_SESSION::transaction_sync. The \c off setting forces any + * buffered log records to be written to the file system. The \c on setting forces log + * records to be written to the storage device., a string\, chosen from the following + * options: \c "background"\, \c "off"\, \c "on"; default \c on.} * @configend * @errors */ @@ -1673,8 +1588,8 @@ struct __wt_session { * @param session the session handle * @param name the URI of the table or file to salvage * @configstart{WT_SESSION.salvage, see dist/api_data.py} - * @config{force, force salvage even of files that do not appear to be - * WiredTiger files., a boolean flag; default \c false.} + * @config{force, force salvage even of files that do not appear to be WiredTiger files., a + * boolean flag; default \c false.} * @configend * @ebusy_errors */ @@ -1752,26 +1667,24 @@ struct __wt_session { * @param session the session handle * @param name the URI of the table or file to verify * @configstart{WT_SESSION.verify, see dist/api_data.py} - * @config{dump_address, Display addresses and page types as pages are - * verified\, using the application's message handler\, intended for - * debugging., a boolean flag; default \c false.} - * @config{dump_blocks, Display the contents of on-disk blocks as they - * are verified\, using the application's message handler\, intended for - * debugging., a boolean flag; default \c false.} - * @config{dump_layout, Display the layout of the files as they are - * verified\, using the application's message handler\, intended for - * debugging; requires optional support from the block manager., a - * boolean flag; default \c false.} - * @config{dump_offsets, Display the contents of specific on-disk - * blocks\, using the application's message handler\, intended for - * debugging., a list of strings; default empty.} - * @config{dump_pages, Display the contents of in-memory pages as they - * are verified\, using the application's message handler\, intended for - * debugging., a boolean flag; default \c false.} - * @config{strict, Treat any verification problem as an error; by - * default\, verify will warn\, but not fail\, in the case of errors - * that won't affect future behavior (for example\, a leaked block)., a - * boolean flag; default \c false.} + * @config{dump_address, Display addresses and page types as pages are verified\, using the + * application's message handler\, intended for debugging., a boolean flag; default \c + * false.} + * @config{dump_blocks, Display the contents of on-disk blocks as they are verified\, using + * the application's message handler\, intended for debugging., a boolean flag; default \c + * false.} + * @config{dump_layout, Display the layout of the files as they are verified\, using the + * application's message handler\, intended for debugging; requires optional support from + * the block manager., a boolean flag; default \c false.} + * @config{dump_offsets, Display the contents of specific on-disk blocks\, using the + * application's message handler\, intended for debugging., a list of strings; default + * empty.} + * @config{dump_pages, Display the contents of in-memory pages as they are verified\, using + * the application's message handler\, intended for debugging., a boolean flag; default \c + * false.} + * @config{strict, Treat any verification problem as an error; by default\, verify will + * warn\, but not fail\, in the case of errors that won't affect future behavior (for + * example\, a leaked block)., a boolean flag; default \c false.} * @configend * @ebusy_errors */ @@ -1799,48 +1712,40 @@ struct __wt_session { * * @param session the session handle * @configstart{WT_SESSION.begin_transaction, see dist/api_data.py} - * @config{ignore_prepare, whether to ignore the updates by other - * prepared transactions as part of read operations of this transaction. - * When \c true\, forces the transaction to be read-only. Use \c force - * to ignore prepared updates and permit writes (which can cause lost - * updates unless the application knows something about the relationship - * between prepared transactions and the updates that are ignoring - * them)., a string\, chosen from the following options: \c "false"\, \c - * "force"\, \c "true"; default \c false.} - * @config{isolation, the isolation level for this transaction; defaults - * to the session's isolation level., a string\, chosen from the - * following options: \c "read-uncommitted"\, \c "read-committed"\, \c - * "snapshot"; default empty.} - * @config{name, name of the transaction for tracing and debugging., a - * string; default empty.} - * @config{priority, priority of the transaction for resolving - * conflicts. Transactions with higher values are less likely to - * abort., an integer between -100 and 100; default \c 0.} - * @config{read_timestamp, read using the specified timestamp. The - * supplied value must not be older than the current oldest timestamp. - * See @ref transaction_timestamps., a string; default empty.} - * @config{roundup_timestamps = (, round up timestamps of the - * transaction. This setting alters the visibility expected in a - * transaction. See @ref transaction_timestamps., a set of related - * configuration options defined below.} - * @config{ prepared, applicable only for - * prepared transactions. Indicates if the prepare timestamp and the - * commit timestamp of this transaction can be rounded up. If the - * prepare timestamp is less than the oldest timestamp\, the prepare - * timestamp will be rounded to the oldest timestamp. If the commit - * timestamp is less than the prepare timestamp\, the commit timestamp - * will be rounded up to the prepare timestamp., a boolean flag; default - * \c false.} - * @config{ read, if the read - * timestamp is less than the oldest timestamp\, the read timestamp will - * be rounded up to the oldest timestamp., a boolean flag; default \c + * @config{ignore_prepare, whether to ignore the updates by other prepared transactions as + * part of read operations of this transaction. When \c true\, forces the transaction to be + * read-only. Use \c force to ignore prepared updates and permit writes (which can cause + * lost updates unless the application knows something about the relationship between + * prepared transactions and the updates that are ignoring them)., a string\, chosen from + * the following options: \c "false"\, \c "force"\, \c "true"; default \c false.} + * @config{isolation, the isolation level for this transaction; defaults to the session's + * isolation level., a string\, chosen from the following options: \c "read-uncommitted"\, + * \c "read-committed"\, \c "snapshot"; default empty.} + * @config{name, name of the transaction for tracing and debugging., a string; default + * empty.} + * @config{priority, priority of the transaction for resolving conflicts. Transactions with + * higher values are less likely to abort., an integer between -100 and 100; default \c 0.} + * @config{read_timestamp, read using the specified timestamp. The supplied value must not + * be older than the current oldest timestamp. See @ref transaction_timestamps., a string; + * default empty.} + * @config{roundup_timestamps = (, round up timestamps of the transaction. This setting + * alters the visibility expected in a transaction. See @ref transaction_timestamps., a set + * of related configuration options defined below.} + * @config{ + * prepared, applicable only for prepared transactions. Indicates if the prepare timestamp + * and the commit timestamp of this transaction can be rounded up. If the prepare timestamp + * is less than the oldest timestamp\, the prepare timestamp will be rounded to the oldest + * timestamp. If the commit timestamp is less than the prepare timestamp\, the commit + * timestamp will be rounded up to the prepare timestamp., a boolean flag; default \c * false.} + * @config{ read, if the read timestamp is less than the + * oldest timestamp\, the read timestamp will be rounded up to the oldest timestamp., a + * boolean flag; default \c false.} * @config{ ),,} * @config{snapshot, use a named\, in-memory snapshot\, see @ref * transaction_named_snapshots., a string; default empty.} - * @config{sync, whether to sync log records when the transaction - * commits\, inherited from ::wiredtiger_open \c transaction_sync., a - * boolean flag; default empty.} + * @config{sync, whether to sync log records when the transaction commits\, inherited from + * ::wiredtiger_open \c transaction_sync., a boolean flag; default empty.} * @configend * @errors */ @@ -1860,25 +1765,21 @@ struct __wt_session { * * @param session the session handle * @configstart{WT_SESSION.commit_transaction, see dist/api_data.py} - * @config{commit_timestamp, set the commit timestamp for the current - * transaction. The supplied value must not be older than the first - * commit timestamp set for the current transaction. The value must - * also not be older than the current oldest and stable timestamps. See + * @config{commit_timestamp, set the commit timestamp for the current transaction. The + * supplied value must not be older than the first commit timestamp set for the current + * transaction. The value must also not be older than the current oldest and stable + * timestamps. See @ref transaction_timestamps., a string; default empty.} + * @config{durable_timestamp, set the durable timestamp for the current transaction. The + * supplied value must not be older than the commit timestamp set for the current + * transaction. The value must also not be older than the current stable timestamp. See * @ref transaction_timestamps., a string; default empty.} - * @config{durable_timestamp, set the durable timestamp for the current - * transaction. The supplied value must not be older than the commit - * timestamp set for the current transaction. The value must also not - * be older than the current stable timestamp. See @ref - * transaction_timestamps., a string; default empty.} - * @config{sync, override whether to sync log records when the - * transaction commits\, inherited from ::wiredtiger_open \c - * transaction_sync. The \c background setting initiates a background - * synchronization intended to be used with a later call to - * WT_SESSION::transaction_sync. The \c off setting does not wait for - * record to be written or synchronized. The \c on setting forces log - * records to be written to the storage device., a string\, chosen from - * the following options: \c "background"\, \c "off"\, \c "on"; default - * empty.} + * @config{sync, override whether to sync log records when the transaction commits\, + * inherited from ::wiredtiger_open \c transaction_sync. The \c background setting + * initiates a background synchronization intended to be used with a later call to + * WT_SESSION::transaction_sync. The \c off setting does not wait for record to be written + * or synchronized. The \c on setting forces log records to be written to the storage + * device., a string\, chosen from the following options: \c "background"\, \c "off"\, \c + * "on"; default empty.} * @configend * @errors */ @@ -1901,10 +1802,9 @@ struct __wt_session { * * @param session the session handle * @configstart{WT_SESSION.prepare_transaction, see dist/api_data.py} - * @config{prepare_timestamp, set the prepare timestamp for the updates - * of the current transaction. The supplied value must not be older - * than any active read timestamps. See @ref transaction_timestamps., a - * string; default empty.} + * @config{prepare_timestamp, set the prepare timestamp for the updates of the current + * transaction. The supplied value must not be older than any active read timestamps. See + * @ref transaction_timestamps., a string; default empty.} * @configend * @errors */ @@ -1936,24 +1836,20 @@ struct __wt_session { * * @param session the session handle * @configstart{WT_SESSION.timestamp_transaction, see dist/api_data.py} - * @config{commit_timestamp, set the commit timestamp for the current - * transaction. The supplied value must not be older than the first - * commit timestamp set for the current transaction. The value must - * also not be older than the current oldest and stable timestamps. See + * @config{commit_timestamp, set the commit timestamp for the current transaction. The + * supplied value must not be older than the first commit timestamp set for the current + * transaction. The value must also not be older than the current oldest and stable + * timestamps. See @ref transaction_timestamps., a string; default empty.} + * @config{durable_timestamp, set the durable timestamp for the current transaction. The + * supplied value must not be older than the commit timestamp set for the current + * transaction. The value must also not be older than the current stable timestamp. See * @ref transaction_timestamps., a string; default empty.} - * @config{durable_timestamp, set the durable timestamp for the current - * transaction. The supplied value must not be older than the commit - * timestamp set for the current transaction. The value must also not - * be older than the current stable timestamp. See @ref - * transaction_timestamps., a string; default empty.} - * @config{prepare_timestamp, set the prepare timestamp for the updates - * of the current transaction. The supplied value must not be older - * than any active read timestamps. See @ref transaction_timestamps., a - * string; default empty.} - * @config{read_timestamp, read using the specified timestamp. The - * supplied value must not be older than the current oldest timestamp. - * This can only be set once for a transaction. See @ref - * transaction_timestamps., a string; default empty.} + * @config{prepare_timestamp, set the prepare timestamp for the updates of the current + * transaction. The supplied value must not be older than any active read timestamps. See + * @ref transaction_timestamps., a string; default empty.} + * @config{read_timestamp, read using the specified timestamp. The supplied value must not + * be older than the current oldest timestamp. This can only be set once for a transaction. + * See @ref transaction_timestamps., a string; default empty.} * @configend * @errors */ @@ -1967,13 +1863,12 @@ struct __wt_session { * hexadecimal encoding of the timestamp being queried. Must be large * enough to hold a NUL terminated, hex-encoded 8B timestamp (17 bytes). * @configstart{WT_SESSION.query_timestamp, see dist/api_data.py} - * @config{get, specify which timestamp to query: \c commit returns the - * most recently set commit_timestamp. \c first_commit returns the - * first set commit_timestamp. \c prepare returns the timestamp used in - * preparing a transaction. \c read returns the timestamp at which the - * transaction is reading at. See @ref transaction_timestamps., a - * string\, chosen from the following options: \c "commit"\, \c - * "first_commit"\, \c "prepare"\, \c "read"; default \c read.} + * @config{get, specify which timestamp to query: \c commit returns the most recently set + * commit_timestamp. \c first_commit returns the first set commit_timestamp. \c prepare + * returns the timestamp used in preparing a transaction. \c read returns the timestamp at + * which the transaction is reading at. See @ref transaction_timestamps., a string\, chosen + * from the following options: \c "commit"\, \c "first_commit"\, \c "prepare"\, \c "read"; + * default \c read.} * @configend * @errors * If the session is not in a transaction ::WT_NOTFOUND will be @@ -2005,25 +1900,21 @@ struct __wt_session { * * @param session the session handle * @configstart{WT_SESSION.checkpoint, see dist/api_data.py} - * @config{drop, specify a list of checkpoints to drop. The list may - * additionally contain one of the following keys: \c "from=all" to drop - * all checkpoints\, \c "from=<checkpoint>" to drop all checkpoints - * after and including the named checkpoint\, or \c "to=<checkpoint>" to - * drop all checkpoints before and including the named checkpoint. - * Checkpoints cannot be dropped while a hot backup is in progress or if - * open in a cursor., a list of strings; default empty.} - * @config{force, by default\, checkpoints may be skipped if the - * underlying object has not been modified\, this option forces the - * checkpoint., a boolean flag; default \c false.} - * @config{name, if set\, specify a name for the checkpoint (note that - * checkpoints including LSM trees may not be named)., a string; default - * empty.} - * @config{target, if non-empty\, checkpoint the list of objects., a - * list of strings; default empty.} - * @config{use_timestamp, by default\, create the checkpoint as of the - * last stable timestamp if timestamps are in use\, or all current - * updates if there is no stable timestamp set. If false\, this option - * generates a checkpoint with all updates including those later than + * @config{drop, specify a list of checkpoints to drop. The list may additionally contain + * one of the following keys: \c "from=all" to drop all checkpoints\, \c "from=<checkpoint>" + * to drop all checkpoints after and including the named checkpoint\, or \c + * "to=<checkpoint>" to drop all checkpoints before and including the named checkpoint. + * Checkpoints cannot be dropped while a hot backup is in progress or if open in a cursor., + * a list of strings; default empty.} + * @config{force, by default\, checkpoints may be skipped if the underlying object has not + * been modified\, this option forces the checkpoint., a boolean flag; default \c false.} + * @config{name, if set\, specify a name for the checkpoint (note that checkpoints including + * LSM trees may not be named)., a string; default empty.} + * @config{target, if non-empty\, checkpoint the list of objects., a list of strings; + * default empty.} + * @config{use_timestamp, by default\, create the checkpoint as of the last stable timestamp + * if timestamps are in use\, or all current updates if there is no stable timestamp set. + * If false\, this option generates a checkpoint with all updates including those later than * the timestamp., a boolean flag; default \c true.} * @configend * @errors @@ -2039,28 +1930,22 @@ struct __wt_session { * * @param session the session handle * @configstart{WT_SESSION.snapshot, see dist/api_data.py} - * @config{drop = (, if non-empty\, specifies which snapshots to drop. - * Where a group of snapshots are being dropped\, the order is based on - * snapshot creation order not alphanumeric name order., a set of - * related configuration options defined below.} - * @config{ all, drop all named snapshots., a - * boolean flag; default \c false.} + * @config{drop = (, if non-empty\, specifies which snapshots to drop. Where a group of + * snapshots are being dropped\, the order is based on snapshot creation order not + * alphanumeric name order., a set of related configuration options defined below.} + * @config{ all, drop all named snapshots., a boolean flag; default + * \c false.} + * @config{ before, drop all snapshots up to but not + * including the specified name., a string; default empty.} * @config{ - * before, drop all snapshots up to but not including the specified + * names, drop specific named snapshots., a list of strings; default empty.} + * @config{ to, drop all snapshots up to and including the specified * name., a string; default empty.} - * @config{ - * names, drop specific named snapshots., a list of strings; default - * empty.} - * @config{ to, drop all snapshots up to - * and including the specified name., a string; default empty.} - * @config{ - * ),,} - * @config{include_updates, make updates from the current transaction - * visible to users of the named snapshot. Transactions started with - * such a named snapshot are restricted to being read-only., a boolean - * flag; default \c false.} - * @config{name, specify a name for the snapshot., a string; default - * empty.} + * @config{ ),,} + * @config{include_updates, make updates from the current transaction visible to users of + * the named snapshot. Transactions started with such a named snapshot are restricted to + * being read-only., a boolean flag; default \c false.} + * @config{name, specify a name for the snapshot., a string; default empty.} * @configend * @errors */ @@ -2093,9 +1978,9 @@ struct __wt_session { * * @param session the session handle * @configstart{WT_SESSION.transaction_sync, see dist/api_data.py} - * @config{timeout_ms, maximum amount of time to wait for background - * sync to complete in milliseconds. A value of zero disables the - * timeout and returns immediately., an integer; default \c 1200000.} + * @config{timeout_ms, maximum amount of time to wait for background sync to complete in + * milliseconds. A value of zero disables the timeout and returns immediately., an integer; + * default \c 1200000.} * @configend * @errors */ @@ -2152,22 +2037,18 @@ struct __wt_connection { * @param connection the connection handle * @param uri the connection handle * @configstart{WT_CONNECTION.async_new_op, see dist/api_data.py} - * @config{append, append the value as a new record\, creating a new - * record number key; valid only for operations with record number - * keys., a boolean flag; default \c false.} - * @config{overwrite, configures whether the cursor's insert\, update - * and remove methods check the existing state of the record. If \c - * overwrite is \c false\, WT_CURSOR::insert fails with - * ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and - * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not - * exist., a boolean flag; default \c true.} - * @config{raw, ignore the encodings for the key and value\, manage data - * as if the formats were \c "u". See @ref cursor_raw for details., a - * boolean flag; default \c false.} - * @config{timeout, maximum amount of time to allow for compact in - * seconds. The actual amount of time spent in compact may exceed the - * configured value. A value of zero disables the timeout., an integer; - * default \c 1200.} + * @config{append, append the value as a new record\, creating a new record number key; + * valid only for operations with record number keys., a boolean flag; default \c false.} + * @config{overwrite, configures whether the cursor's insert\, update and remove methods + * check the existing state of the record. If \c overwrite is \c false\, WT_CURSOR::insert + * fails with ::WT_DUPLICATE_KEY if the record exists\, WT_CURSOR::update and + * WT_CURSOR::remove fail with ::WT_NOTFOUND if the record does not exist., a boolean flag; + * default \c true.} + * @config{raw, ignore the encodings for the key and value\, manage data as if the formats + * were \c "u". See @ref cursor_raw for details., a boolean flag; default \c false.} + * @config{timeout, maximum amount of time to allow for compact in seconds. The actual + * amount of time spent in compact may exceed the configured value. A value of zero + * disables the timeout., an integer; default \c 1200.} * @configend * @param callback the operation callback * @param[out] asyncopp the new op handle @@ -2191,13 +2072,11 @@ struct __wt_connection { * * @param connection the connection handle * @configstart{WT_CONNECTION.close, see dist/api_data.py} - * @config{leak_memory, don't free memory during close., a boolean flag; - * default \c false.} - * @config{use_timestamp, by default\, create the close checkpoint as of - * the last stable timestamp if timestamps are in use\, or all current - * updates if there is no stable timestamp set. If false\, this option - * generates a checkpoint with all updates., a boolean flag; default \c - * true.} + * @config{leak_memory, don't free memory during close., a boolean flag; default \c false.} + * @config{use_timestamp, by default\, create the close checkpoint as of the last stable + * timestamp if timestamps are in use\, or all current updates if there is no stable + * timestamp set. If false\, this option generates a checkpoint with all updates., a + * boolean flag; default \c true.} * @configend * @errors */ @@ -2213,18 +2092,12 @@ struct __wt_connection { * * @param connection the connection handle * @configstart{WT_CONNECTION.debug_info, see dist/api_data.py} - * @config{cache, print cache information., a boolean flag; default \c - * false.} - * @config{cursors, print all open cursor information., a boolean flag; - * default \c false.} - * @config{handles, print open handles information., a boolean flag; - * default \c false.} - * @config{log, print log information., a boolean flag; default \c - * false.} - * @config{sessions, print open session information., a boolean flag; - * default \c false.} - * @config{txn, print global txn information., a boolean flag; default - * \c false.} + * @config{cache, print cache information., a boolean flag; default \c false.} + * @config{cursors, print all open cursor information., a boolean flag; default \c false.} + * @config{handles, print open handles information., a boolean flag; default \c false.} + * @config{log, print log information., a boolean flag; default \c false.} + * @config{sessions, print open session information., a boolean flag; default \c false.} + * @config{txn, print global txn information., a boolean flag; default \c false.} * @configend * @errors */ @@ -2238,271 +2111,224 @@ struct __wt_connection { * * @param connection the connection handle * @configstart{WT_CONNECTION.reconfigure, see dist/api_data.py} - * @config{async = (, asynchronous operations configuration options., a - * set of related configuration options defined below.} - * @config{ enabled, enable asynchronous - * operation., a boolean flag; default \c false.} - * @config{ ops_max, maximum number of expected - * simultaneous asynchronous operations., an integer between 1 and 4096; - * default \c 1024.} - * @config{ threads, the number - * of worker threads to service asynchronous requests. Each worker - * thread uses a session from the configured session_max., an integer - * between 1 and 20; default \c 2.} + * @config{async = (, asynchronous operations configuration options., a set of related + * configuration options defined below.} + * @config{ enabled, enable + * asynchronous operation., a boolean flag; default \c false.} + * @config{ ops_max, maximum number of expected simultaneous + * asynchronous operations., an integer between 1 and 4096; default \c 1024.} + * @config{ threads, the number of worker threads to service + * asynchronous requests. Each worker thread uses a session from the configured + * session_max., an integer between 1 and 20; default \c 2.} * @config{ ),,} - * @config{cache_max_wait_ms, the maximum number of milliseconds an - * application thread will wait for space to be available in cache - * before giving up. Default will wait forever., an integer greater - * than or equal to 0; default \c 0.} - * @config{cache_overflow = (, cache overflow configuration options., a - * set of related configuration options defined below.} - * @config{ file_max, The maximum number of bytes - * that WiredTiger is allowed to use for its cache overflow mechanism. - * If the cache overflow file exceeds this size\, a panic will be - * triggered. The default value means that the cache overflow file is - * unbounded and may use as much space as the filesystem will - * accommodate. The minimum non-zero setting is 100MB., an integer - * greater than or equal to 0; default \c 0.} + * @config{cache_max_wait_ms, the maximum number of milliseconds an application thread will + * wait for space to be available in cache before giving up. Default will wait forever., an + * integer greater than or equal to 0; default \c 0.} + * @config{cache_overflow = (, cache overflow configuration options., a set of related + * configuration options defined below.} + * @config{ file_max, The + * maximum number of bytes that WiredTiger is allowed to use for its cache overflow + * mechanism. If the cache overflow file exceeds this size\, a panic will be triggered. + * The default value means that the cache overflow file is unbounded and may use as much + * space as the filesystem will accommodate. The minimum non-zero setting is 100MB., an + * integer greater than or equal to 0; default \c 0.} * @config{ ),,} - * @config{cache_overhead, assume the heap allocator overhead is the - * specified percentage\, and adjust the cache usage by that amount (for - * example\, if there is 10GB of data in cache\, a percentage of 10 - * means WiredTiger treats this as 11GB). This value is configurable - * because different heap allocators have different overhead and - * different workloads will have different heap allocation sizes and - * patterns\, therefore applications may need to adjust this value based - * on allocator choice and behavior in measured workloads., an integer - * between 0 and 30; default \c 8.} - * @config{cache_size, maximum heap memory to allocate for the cache. A - * database should configure either \c cache_size or \c shared_cache but - * not both., an integer between 1MB and 10TB; default \c 100MB.} - * @config{checkpoint = (, periodically checkpoint the database. - * Enabling the checkpoint server uses a session from the configured - * session_max., a set of related configuration options defined below.} - * @config{ log_size, wait for this amount of log - * record bytes to be written to the log between each checkpoint. If - * non-zero\, this value will use a minimum of the log file size. A - * database can configure both log_size and wait to set an upper bound - * for checkpoints; setting this value above 0 configures periodic - * checkpoints., an integer between 0 and 2GB; default \c 0.} - * @config{ wait, seconds to wait between each - * checkpoint; setting this value above 0 configures periodic - * checkpoints., an integer between 0 and 100000; default \c 0.} + * @config{cache_overhead, assume the heap allocator overhead is the specified percentage\, + * and adjust the cache usage by that amount (for example\, if there is 10GB of data in + * cache\, a percentage of 10 means WiredTiger treats this as 11GB). This value is + * configurable because different heap allocators have different overhead and different + * workloads will have different heap allocation sizes and patterns\, therefore applications + * may need to adjust this value based on allocator choice and behavior in measured + * workloads., an integer between 0 and 30; default \c 8.} + * @config{cache_size, maximum heap memory to allocate for the cache. A database should + * configure either \c cache_size or \c shared_cache but not both., an integer between 1MB + * and 10TB; default \c 100MB.} + * @config{checkpoint = (, periodically checkpoint the database. Enabling the checkpoint + * server uses a session from the configured session_max., a set of related configuration + * options defined below.} + * @config{ log_size, wait for this amount of + * log record bytes to be written to the log between each checkpoint. If non-zero\, this + * value will use a minimum of the log file size. A database can configure both log_size + * and wait to set an upper bound for checkpoints; setting this value above 0 configures + * periodic checkpoints., an integer between 0 and 2GB; default \c 0.} + * @config{ wait, seconds to wait between each checkpoint; setting + * this value above 0 configures periodic checkpoints., an integer between 0 and 100000; + * default \c 0.} * @config{ ),,} - * @config{compatibility = (, set compatibility version of database. - * Changing the compatibility version requires that there are no active - * operations for the duration of the call., a set of related - * configuration options defined below.} - * @config{ release, compatibility release - * version string., a string; default empty.} + * @config{compatibility = (, set compatibility version of database. Changing the + * compatibility version requires that there are no active operations for the duration of + * the call., a set of related configuration options defined below.} + * @config{ release, compatibility release version string., a string; + * default empty.} * @config{ ),,} - * @config{debug_mode = (, control the settings of various extended - * debugging features., a set of related configuration options defined - * below.} - * @config{ checkpoint_retention, adjust - * log archiving to retain the log records of this number of - * checkpoints. Zero or one means perform normal archiving., an integer - * between 0 and 1024; default \c 0.} + * @config{debug_mode = (, control the settings of various extended debugging features., a + * set of related configuration options defined below.} * @config{ - * eviction, if true\, modify internal algorithms to change skew to - * force lookaside eviction to happen more aggressively. This includes - * but is not limited to not skewing newest\, not favoring leaf pages\, - * and modifying the eviction score mechanism., a boolean flag; default - * \c false.} - * @config{ rollback_error, return a - * WT_ROLLBACK error from a transaction operation about every Nth - * operation to simulate a collision., an integer between 0 and 10M; + * checkpoint_retention, adjust log archiving to retain the log records of this number of + * checkpoints. Zero or one means perform normal archiving., an integer between 0 and 1024; * default \c 0.} + * @config{ eviction, if true\, modify internal + * algorithms to change skew to force lookaside eviction to happen more aggressively. This + * includes but is not limited to not skewing newest\, not favoring leaf pages\, and + * modifying the eviction score mechanism., a boolean flag; default \c false.} + * @config{ rollback_error, return a WT_ROLLBACK error from a + * transaction operation about every Nth operation to simulate a collision., an integer + * between 0 and 10M; default \c 0.} * @config{ table_logging, if - * true\, write transaction related information to the log for all - * operations\, even operations for tables with logging turned off. - * This setting introduces a log format change that may break older - * versions of WiredTiger. These operations are informational and - * skipped in recovery., a boolean flag; default \c false.} - * @config{ - * ),,} - * @config{error_prefix, prefix string for error messages., a string; - * default empty.} - * @config{eviction = (, eviction configuration options., a set of - * related configuration options defined below.} + * true\, write transaction related information to the log for all operations\, even + * operations for tables with logging turned off. This setting introduces a log format + * change that may break older versions of WiredTiger. These operations are informational + * and skipped in recovery., a boolean flag; default \c false.} + * @config{ ),,} + * @config{error_prefix, prefix string for error messages., a string; default empty.} + * @config{eviction = (, eviction configuration options., a set of related configuration + * options defined below.} * @config{ threads_max, maximum number of - * threads WiredTiger will start to help evict pages from cache. The - * number of threads started will vary depending on the current eviction - * load. Each eviction worker thread uses a session from the configured - * session_max., an integer between 1 and 20; default \c 8.} - * @config{ threads_min, minimum number of - * threads WiredTiger will start to help evict pages from cache. The - * number of threads currently running will vary depending on the - * current eviction load., an integer between 1 and 20; default \c 1.} + * threads WiredTiger will start to help evict pages from cache. The number of threads + * started will vary depending on the current eviction load. Each eviction worker thread + * uses a session from the configured session_max., an integer between 1 and 20; default \c + * 8.} + * @config{ threads_min, minimum number of threads WiredTiger + * will start to help evict pages from cache. The number of threads currently running will + * vary depending on the current eviction load., an integer between 1 and 20; default \c 1.} * @config{ ),,} - * @config{eviction_checkpoint_target, perform eviction at the beginning - * of checkpoints to bring the dirty content in cache to this level. It - * is a percentage of the cache size if the value is within the range of - * 0 to 100 or an absolute size when greater than 100. The value is not - * allowed to exceed the \c cache_size. Ignored if set to zero or \c - * in_memory is \c true., an integer between 0 and 10TB; default \c 1.} - * @config{eviction_dirty_target, perform eviction in worker threads - * when the cache contains at least this much dirty content. It is a - * percentage of the cache size if the value is within the range of 1 to - * 100 or an absolute size when greater than 100. The value is not - * allowed to exceed the \c cache_size., an integer between 1 and 10TB; - * default \c 5.} - * @config{eviction_dirty_trigger, trigger application threads to - * perform eviction when the cache contains at least this much dirty - * content. It is a percentage of the cache size if the value is within - * the range of 1 to 100 or an absolute size when greater than 100. The - * value is not allowed to exceed the \c cache_size. This setting only - * alters behavior if it is lower than eviction_trigger., an integer - * between 1 and 10TB; default \c 20.} - * @config{eviction_target, perform eviction in worker threads when the - * cache contains at least this much content. It is a percentage of the - * cache size if the value is within the range of 10 to 100 or an - * absolute size when greater than 100. The value is not allowed to - * exceed the \c cache_size., an integer between 10 and 10TB; default \c - * 80.} - * @config{eviction_trigger, trigger application threads to perform - * eviction when the cache contains at least this much content. It is a - * percentage of the cache size if the value is within the range of 10 - * to 100 or an absolute size when greater than 100. The value is not - * allowed to exceed the \c cache_size., an integer between 10 and 10TB; - * default \c 95.} - * @config{file_manager = (, control how file handles are managed., a - * set of related configuration options defined below.} - * @config{ close_handle_minimum, number of - * handles open before the file manager will look for handles to close., - * an integer greater than or equal to 0; default \c 250.} - * @config{ close_idle_time, amount of time in - * seconds a file handle needs to be idle before attempting to close it. - * A setting of 0 means that idle handles are not closed., an integer - * between 0 and 100000; default \c 30.} - * @config{ close_scan_interval, interval in - * seconds at which to check for files that are inactive and close - * them., an integer between 1 and 100000; default \c 10.} + * @config{eviction_checkpoint_target, perform eviction at the beginning of checkpoints to + * bring the dirty content in cache to this level. It is a percentage of the cache size if + * the value is within the range of 0 to 100 or an absolute size when greater than 100. The + * value is not allowed to exceed the \c cache_size. Ignored if set to zero or \c in_memory + * is \c true., an integer between 0 and 10TB; default \c 1.} + * @config{eviction_dirty_target, perform eviction in worker threads when the cache contains + * at least this much dirty content. It is a percentage of the cache size if the value is + * within the range of 1 to 100 or an absolute size when greater than 100. The value is not + * allowed to exceed the \c cache_size., an integer between 1 and 10TB; default \c 5.} + * @config{eviction_dirty_trigger, trigger application threads to perform eviction when the + * cache contains at least this much dirty content. It is a percentage of the cache size if + * the value is within the range of 1 to 100 or an absolute size when greater than 100. The + * value is not allowed to exceed the \c cache_size. This setting only alters behavior if + * it is lower than eviction_trigger., an integer between 1 and 10TB; default \c 20.} + * @config{eviction_target, perform eviction in worker threads when the cache contains at + * least this much content. It is a percentage of the cache size if the value is within the + * range of 10 to 100 or an absolute size when greater than 100. The value is not allowed to + * exceed the \c cache_size., an integer between 10 and 10TB; default \c 80.} + * @config{eviction_trigger, trigger application threads to perform eviction when the cache + * contains at least this much content. It is a percentage of the cache size if the value + * is within the range of 10 to 100 or an absolute size when greater than 100. The value is + * not allowed to exceed the \c cache_size., an integer between 10 and 10TB; default \c 95.} + * @config{file_manager = (, control how file handles are managed., a set of related + * configuration options defined below.} + * @config{ + * close_handle_minimum, number of handles open before the file manager will look for + * handles to close., an integer greater than or equal to 0; default \c 250.} + * @config{ close_idle_time, amount of time in seconds a file handle + * needs to be idle before attempting to close it. A setting of 0 means that idle handles + * are not closed., an integer between 0 and 100000; default \c 30.} + * @config{ close_scan_interval, interval in seconds at which to + * check for files that are inactive and close them., an integer between 1 and 100000; + * default \c 10.} * @config{ ),,} - * @config{io_capacity = (, control how many bytes per second are - * written and read. Exceeding the capacity results in throttling., a - * set of related configuration options defined below.} + * @config{io_capacity = (, control how many bytes per second are written and read. + * Exceeding the capacity results in throttling., a set of related configuration options + * defined below.} * @config{ total, number of bytes per second - * available to all subsystems in total. When set\, decisions about - * what subsystems are throttled\, and in what proportion\, are made - * internally. The minimum non-zero setting is 1MB., an integer between - * 0 and 1TB; default \c 0.} + * available to all subsystems in total. When set\, decisions about what subsystems are + * throttled\, and in what proportion\, are made internally. The minimum non-zero setting + * is 1MB., an integer between 0 and 1TB; default \c 0.} * @config{ ),,} - * @config{log = (, enable logging. Enabling logging uses three - * sessions from the configured session_max., a set of related - * configuration options defined below.} - * @config{ archive, automatically archive - * unneeded log files., a boolean flag; default \c true.} - * @config{ os_cache_dirty_pct, maximum dirty - * system buffer cache usage\, as a percentage of the log's \c file_max. - * If non-zero\, schedule writes for dirty blocks belonging to the log - * in the system buffer cache after that percentage of the log has been - * written into the buffer cache without an intervening file sync., an - * integer between 0 and 100; default \c 0.} - * @config{ prealloc, pre-allocate log files., a + * @config{log = (, enable logging. Enabling logging uses three sessions from the + * configured session_max., a set of related configuration options defined below.} + * @config{ archive, automatically archive unneeded log files., a * boolean flag; default \c true.} - * @config{ - * zero_fill, manually write zeroes into log files., a boolean flag; - * default \c false.} + * @config{ os_cache_dirty_pct, + * maximum dirty system buffer cache usage\, as a percentage of the log's \c file_max. If + * non-zero\, schedule writes for dirty blocks belonging to the log in the system buffer + * cache after that percentage of the log has been written into the buffer cache without an + * intervening file sync., an integer between 0 and 100; default \c 0.} + * @config{ prealloc, pre-allocate log files., a boolean flag; + * default \c true.} + * @config{ zero_fill, manually write zeroes into + * log files., a boolean flag; default \c false.} * @config{ ),,} - * @config{lsm_manager = (, configure database wide options for LSM tree - * management. The LSM manager is started automatically the first time - * an LSM tree is opened. The LSM manager uses a session from the - * configured session_max., a set of related configuration options - * defined below.} - * @config{ merge, merge LSM - * chunks where possible., a boolean flag; default \c true.} - * @config{ worker_thread_max, Configure a set of - * threads to manage merging LSM trees in the database. Each worker - * thread uses a session handle from the configured session_max., an + * @config{lsm_manager = (, configure database wide options for LSM tree management. The + * LSM manager is started automatically the first time an LSM tree is opened. The LSM + * manager uses a session from the configured session_max., a set of related configuration + * options defined below.} + * @config{ merge, merge LSM chunks where + * possible., a boolean flag; default \c true.} + * @config{ + * worker_thread_max, Configure a set of threads to manage merging LSM trees in the + * database. Each worker thread uses a session handle from the configured session_max., an * integer between 3 and 20; default \c 4.} * @config{ ),,} - * @config{operation_tracking = (, enable tracking of - * performance-critical functions. See @ref operation_tracking for more - * information., a set of related configuration options defined below.} + * @config{operation_tracking = (, enable tracking of performance-critical functions. See + * @ref operation_tracking for more information., a set of related configuration options + * defined below.} * @config{ enabled, enable operation tracking * subsystem., a boolean flag; default \c false.} - * @config{ path, the name of a directory into - * which operation tracking files are written. The directory must - * already exist. If the value is not an absolute path\, the path is - * relative to the database home (see @ref absolute_path for more - * information)., a string; default \c ".".} + * @config{ path, the + * name of a directory into which operation tracking files are written. The directory must + * already exist. If the value is not an absolute path\, the path is relative to the + * database home (see @ref absolute_path for more information)., a string; default \c ".".} * @config{ ),,} - * @config{shared_cache = (, shared cache configuration options. A - * database should configure either a cache_size or a shared_cache not - * both. Enabling a shared cache uses a session from the configured - * session_max. A shared cache can not have absolute values configured - * for cache eviction settings., a set of related configuration options - * defined below.} - * @config{ chunk, the - * granularity that a shared cache is redistributed., an integer between - * 1MB and 10TB; default \c 10MB.} - * @config{ name, - * the name of a cache that is shared between databases or \c "none" - * when no shared cache is configured., a string; default \c none.} - * @config{ quota, maximum size of cache this - * database can be allocated from the shared cache. Defaults to the - * entire shared cache size., an integer; default \c 0.} - * @config{ reserve, amount of cache this - * database is guaranteed to have available from the shared cache. This - * setting is per database. Defaults to the chunk size., an integer; + * @config{shared_cache = (, shared cache configuration options. A database should + * configure either a cache_size or a shared_cache not both. Enabling a shared cache uses a + * session from the configured session_max. A shared cache can not have absolute values + * configured for cache eviction settings., a set of related configuration options defined + * below.} + * @config{ chunk, the granularity that a shared cache is + * redistributed., an integer between 1MB and 10TB; default \c 10MB.} + * @config{ name, the name of a cache that is shared between + * databases or \c "none" when no shared cache is configured., a string; default \c none.} + * @config{ quota, maximum size of cache this database can be + * allocated from the shared cache. Defaults to the entire shared cache size., an integer; * default \c 0.} - * @config{ size, maximum memory - * to allocate for the shared cache. Setting this will update the value - * if one is already set., an integer between 1MB and 10TB; default \c - * 500MB.} + * @config{ reserve, amount of cache this database is + * guaranteed to have available from the shared cache. This setting is per database. + * Defaults to the chunk size., an integer; default \c 0.} + * @config{ + * size, maximum memory to allocate for the shared cache. Setting this will update the + * value if one is already set., an integer between 1MB and 10TB; default \c 500MB.} * @config{ ),,} - * @config{statistics, Maintain database statistics\, which may impact - * performance. Choosing "all" maintains all statistics regardless of - * cost\, "fast" maintains a subset of statistics that are relatively - * inexpensive\, "none" turns off all statistics. The "clear" - * configuration resets statistics after they are gathered\, where - * appropriate (for example\, a cache size statistic is not cleared\, - * while the count of cursor insert operations will be cleared). When - * "clear" is configured for the database\, gathered statistics are - * reset each time a statistics cursor is used to gather statistics\, as - * well as each time statistics are logged using the \c statistics_log - * configuration. See @ref statistics for more information., a list\, - * with values chosen from the following options: \c "all"\, \c - * "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c "tree_walk"; - * default \c none.} - * @config{statistics_log = (, log any statistics the database is - * configured to maintain\, to a file. See @ref statistics for more - * information. Enabling the statistics log server uses a session from - * the configured session_max., a set of related configuration options + * @config{statistics, Maintain database statistics\, which may impact performance. + * Choosing "all" maintains all statistics regardless of cost\, "fast" maintains a subset of + * statistics that are relatively inexpensive\, "none" turns off all statistics. The + * "clear" configuration resets statistics after they are gathered\, where appropriate (for + * example\, a cache size statistic is not cleared\, while the count of cursor insert + * operations will be cleared). When "clear" is configured for the database\, gathered + * statistics are reset each time a statistics cursor is used to gather statistics\, as well + * as each time statistics are logged using the \c statistics_log configuration. See @ref + * statistics for more information., a list\, with values chosen from the following options: + * \c "all"\, \c "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c "tree_walk"; default + * \c none.} + * @config{statistics_log = (, log any statistics the database is configured to maintain\, + * to a file. See @ref statistics for more information. Enabling the statistics log server + * uses a session from the configured session_max., a set of related configuration options * defined below.} - * @config{ json, encode - * statistics in JSON format., a boolean flag; default \c false.} - * @config{ on_close, log statistics on database - * close., a boolean flag; default \c false.} - * @config{ sources, if non-empty\, include - * statistics for the list of data source URIs\, if they are open at the - * time of the statistics logging. The list may include URIs matching a - * single data source ("table:mytable")\, or a URI matching all data - * sources of a particular type ("table:")., a list of strings; default - * empty.} - * @config{ timestamp, a timestamp - * prepended to each log record\, may contain strftime conversion - * specifications\, when \c json is configured\, defaults to \c + * @config{ json, encode statistics in JSON format., + * a boolean flag; default \c false.} + * @config{ on_close, log + * statistics on database close., a boolean flag; default \c false.} + * @config{ sources, if non-empty\, include statistics for the list + * of data source URIs\, if they are open at the time of the statistics logging. The list + * may include URIs matching a single data source ("table:mytable")\, or a URI matching all + * data sources of a particular type ("table:")., a list of strings; default empty.} + * @config{ timestamp, a timestamp prepended to each log record\, may + * contain strftime conversion specifications\, when \c json is configured\, defaults to \c * "%FT%Y.000Z"., a string; default \c "%b %d %H:%M:%S".} - * @config{ wait, seconds to wait between each - * write of the log records; setting this value above 0 configures - * statistics logging., an integer between 0 and 100000; default \c 0.} - * @config{ ),,} - * @config{verbose, enable messages for various events. Options are - * given as a list\, such as - * <code>"verbose=[evictserver\,read]"</code>., a list\, with values - * chosen from the following options: \c "api"\, \c "block"\, \c - * "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c - * "compact_progress"\, \c "error_returns"\, \c "evict"\, \c - * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c - * "log"\, \c "lookaside"\, \c "lookaside_activity"\, \c "lsm"\, \c - * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c - * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c - * "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, - * \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c - * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default - * empty.} + * @config{ + * wait, seconds to wait between each write of the log records; setting this value above 0 + * configures statistics logging., an integer between 0 and 100000; default \c 0.} + * @config{ + * ),,} + * @config{verbose, enable messages for various events. Options are given as a list\, such + * as <code>"verbose=[evictserver\,read]"</code>., a list\, with values chosen from the + * following options: \c "api"\, \c "block"\, \c "checkpoint"\, \c "checkpoint_progress"\, + * \c "compact"\, \c "compact_progress"\, \c "error_returns"\, \c "evict"\, \c + * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c + * "lookaside"\, \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c + * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, + * \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, + * \c "thread_group"\, \c "timestamp"\, \c "transaction"\, \c "verify"\, \c "version"\, \c + * "write"; default empty.} * @configend * @errors */ @@ -2564,21 +2390,18 @@ struct __wt_connection { * connection's event handler is used. See @ref event_message_handling * for more information. * @configstart{WT_CONNECTION.open_session, see dist/api_data.py} - * @config{cache_cursors, enable caching of cursors for reuse. Any - * calls to WT_CURSOR::close for a cursor created in this session will - * mark the cursor as cached and keep it available to be reused for - * later calls to WT_SESSION::open_cursor. Cached cursors may be - * eventually closed. This value is inherited from ::wiredtiger_open \c + * @config{cache_cursors, enable caching of cursors for reuse. Any calls to + * WT_CURSOR::close for a cursor created in this session will mark the cursor as cached and + * keep it available to be reused for later calls to WT_SESSION::open_cursor. Cached + * cursors may be eventually closed. This value is inherited from ::wiredtiger_open \c * cache_cursors., a boolean flag; default \c true.} - * @config{ignore_cache_size, when set\, operations performed by this - * session ignore the cache size and are not blocked when the cache is - * full. Note that use of this option for operations that create cache - * pressure can starve ordinary sessions that obey the cache size., a - * boolean flag; default \c false.} - * @config{isolation, the default isolation level for operations in this - * session., a string\, chosen from the following options: \c - * "read-uncommitted"\, \c "read-committed"\, \c "snapshot"; default \c - * read-committed.} + * @config{ignore_cache_size, when set\, operations performed by this session ignore the + * cache size and are not blocked when the cache is full. Note that use of this option for + * operations that create cache pressure can starve ordinary sessions that obey the cache + * size., a boolean flag; default \c false.} + * @config{isolation, the default isolation level for operations in this session., a + * string\, chosen from the following options: \c "read-uncommitted"\, \c "read-committed"\, + * \c "snapshot"; default \c read-committed.} * @configend * @param[out] sessionp the new session handle * @errors @@ -2602,23 +2425,19 @@ struct __wt_connection { * hexadecimal encoding of the timestamp being queried. Must be large * enough to hold a NUL terminated, hex-encoded 8B timestamp (17 bytes). * @configstart{WT_CONNECTION.query_timestamp, see dist/api_data.py} - * @config{get, specify which timestamp to query: \c all_committed - * returns the largest timestamp such that all timestamps up to that - * value have committed\, \c all_durable returns the largest timestamp - * such that all timestamps up to that value have been made durable\, \c - * last_checkpoint returns the timestamp of the most recent stable - * checkpoint\, \c oldest returns the most recent \c oldest_timestamp - * set with WT_CONNECTION::set_timestamp\, \c oldest_reader returns the - * minimum of the read timestamps of all active readers \c pinned - * returns the minimum of the \c oldest_timestamp and the read - * timestamps of all active readers\, \c recovery returns the timestamp - * of the most recent stable checkpoint taken prior to a shutdown and \c - * stable returns the most recent \c stable_timestamp set with - * WT_CONNECTION::set_timestamp. See @ref transaction_timestamps., a - * string\, chosen from the following options: \c "all_committed"\, \c - * "all_durable"\, \c "last_checkpoint"\, \c "oldest"\, \c - * "oldest_reader"\, \c "pinned"\, \c "recovery"\, \c "stable"; default - * \c all_durable.} + * @config{get, specify which timestamp to query: \c all_committed returns the largest + * timestamp such that all timestamps up to that value have committed\, \c all_durable + * returns the largest timestamp such that all timestamps up to that value have been made + * durable\, \c last_checkpoint returns the timestamp of the most recent stable checkpoint\, + * \c oldest returns the most recent \c oldest_timestamp set with + * WT_CONNECTION::set_timestamp\, \c oldest_reader returns the minimum of the read + * timestamps of all active readers \c pinned returns the minimum of the \c oldest_timestamp + * and the read timestamps of all active readers\, \c recovery returns the timestamp of the + * most recent stable checkpoint taken prior to a shutdown and \c stable returns the most + * recent \c stable_timestamp set with WT_CONNECTION::set_timestamp. See @ref + * transaction_timestamps., a string\, chosen from the following options: \c + * "all_committed"\, \c "all_durable"\, \c "last_checkpoint"\, \c "oldest"\, \c + * "oldest_reader"\, \c "pinned"\, \c "recovery"\, \c "stable"; default \c all_durable.} * @configend * @errors * If there is no matching timestamp (e.g., if this method is called @@ -2638,40 +2457,33 @@ struct __wt_connection { * * @param connection the connection handle * @configstart{WT_CONNECTION.set_timestamp, see dist/api_data.py} - * @config{commit_timestamp, (deprecated) reset the maximum commit - * timestamp tracked by WiredTiger. This will cause future calls to - * WT_CONNECTION::query_timestamp to ignore commit timestamps greater - * than the specified value until the next commit moves the tracked - * commit timestamp forwards. This is only intended for use where the - * application is rolling back locally committed transactions. The - * supplied value must not be older than the current oldest and stable - * timestamps. See @ref transaction_timestamps., a string; default - * empty.} - * @config{durable_timestamp, reset the maximum durable timestamp - * tracked by WiredTiger. This will cause future calls to - * WT_CONNECTION::query_timestamp to ignore durable timestamps greater - * than the specified value until the next durable timestamp moves the - * tracked durable timestamp forwards. This is only intended for use - * where the application is rolling back locally committed transactions. - * The supplied value must not be older than the current oldest and - * stable timestamps. See @ref transaction_timestamps., a string; - * default empty.} - * @config{force, set timestamps even if they violate normal ordering - * requirements. For example allow the \c oldest_timestamp to move - * backwards., a boolean flag; default \c false.} - * @config{oldest_timestamp, future commits and queries will be no - * earlier than the specified timestamp. Supplied values must be - * monotonically increasing\, any attempt to set the value to older than - * the current is silently ignored. The supplied value must not be - * newer than the current stable timestamp. See @ref - * transaction_timestamps., a string; default empty.} - * @config{stable_timestamp, checkpoints will not include commits that - * are newer than the specified timestamp in tables configured with \c - * log=(enabled=false). Supplied values must be monotonically - * increasing\, any attempt to set the value to older than the current - * is silently ignored. The supplied value must not be older than the - * current oldest timestamp. See @ref transaction_timestamps., a + * @config{commit_timestamp, (deprecated) reset the maximum commit timestamp tracked by + * WiredTiger. This will cause future calls to WT_CONNECTION::query_timestamp to ignore + * commit timestamps greater than the specified value until the next commit moves the + * tracked commit timestamp forwards. This is only intended for use where the application + * is rolling back locally committed transactions. The supplied value must not be older + * than the current oldest and stable timestamps. See @ref transaction_timestamps., a * string; default empty.} + * @config{durable_timestamp, reset the maximum durable timestamp tracked by WiredTiger. + * This will cause future calls to WT_CONNECTION::query_timestamp to ignore durable + * timestamps greater than the specified value until the next durable timestamp moves the + * tracked durable timestamp forwards. This is only intended for use where the application + * is rolling back locally committed transactions. The supplied value must not be older + * than the current oldest and stable timestamps. See @ref transaction_timestamps., a + * string; default empty.} + * @config{force, set timestamps even if they violate normal ordering requirements. For + * example allow the \c oldest_timestamp to move backwards., a boolean flag; default \c + * false.} + * @config{oldest_timestamp, future commits and queries will be no earlier than the + * specified timestamp. Supplied values must be monotonically increasing\, any attempt to + * set the value to older than the current is silently ignored. The supplied value must not + * be newer than the current stable timestamp. See @ref transaction_timestamps., a string; + * default empty.} + * @config{stable_timestamp, checkpoints will not include commits that are newer than the + * specified timestamp in tables configured with \c log=(enabled=false). Supplied values + * must be monotonically increasing\, any attempt to set the value to older than the current + * is silently ignored. The supplied value must not be older than the current oldest + * timestamp. See @ref transaction_timestamps., a string; default empty.} * @configend * @errors */ @@ -2719,20 +2531,17 @@ struct __wt_connection { * search the current application binary for the initialization * function, see @ref extensions for more details. * @configstart{WT_CONNECTION.load_extension, see dist/api_data.py} - * @config{config, configuration string passed to the entry point of the - * extension as its WT_CONFIG_ARG argument., a string; default empty.} - * @config{early_load, whether this extension should be loaded at the - * beginning of ::wiredtiger_open. Only applicable to extensions loaded - * via the wiredtiger_open configurations string., a boolean flag; - * default \c false.} - * @config{entry, the entry point of the extension\, called to - * initialize the extension when it is loaded. The signature of the - * function must match ::wiredtiger_extension_init., a string; default - * \c wiredtiger_extension_init.} - * @config{terminate, an optional function in the extension that is - * called before the extension is unloaded during WT_CONNECTION::close. - * The signature of the function must match - * ::wiredtiger_extension_terminate., a string; default \c + * @config{config, configuration string passed to the entry point of the extension as its + * WT_CONFIG_ARG argument., a string; default empty.} + * @config{early_load, whether this extension should be loaded at the beginning of + * ::wiredtiger_open. Only applicable to extensions loaded via the wiredtiger_open + * configurations string., a boolean flag; default \c false.} + * @config{entry, the entry point of the extension\, called to initialize the extension when + * it is loaded. The signature of the function must match ::wiredtiger_extension_init., a + * string; default \c wiredtiger_extension_init.} + * @config{terminate, an optional function in the extension that is called before the + * extension is unloaded during WT_CONNECTION::close. The signature of the function must + * match ::wiredtiger_extension_terminate., a string; default \c * wiredtiger_extension_terminate.} * @configend * @errors @@ -2875,393 +2684,336 @@ struct __wt_connection { * event handler is installed that writes error messages to stderr. See * @ref event_message_handling for more information. * @configstart{wiredtiger_open, see dist/api_data.py} - * @config{async = (, asynchronous operations configuration options., a set of - * related configuration options defined below.} - * @config{ enabled, enable asynchronous operation., a - * boolean flag; default \c false.} - * @config{ ops_max, - * maximum number of expected simultaneous asynchronous operations., an integer - * between 1 and 4096; default \c 1024.} - * @config{ - * threads, the number of worker threads to service asynchronous requests. Each - * worker thread uses a session from the configured session_max., an integer - * between 1 and 20; default \c 2.} - * @config{ ),,} - * @config{buffer_alignment, in-memory alignment (in bytes) for buffers used for - * I/O. The default value of -1 indicates a platform-specific alignment value - * should be used (4KB on Linux systems when direct I/O is configured\, zero - * elsewhere)., an integer between -1 and 1MB; default \c -1.} - * @config{builtin_extension_config, A structure where the keys are the names of - * builtin extensions and the values are passed to WT_CONNECTION::load_extension - * as the \c config parameter (for example\, - * <code>builtin_extension_config={zlib={compression_level=3}}</code>)., a - * string; default empty.} - * @config{cache_cursors, enable caching of cursors for reuse. This is the - * default value for any sessions created\, and can be overridden in configuring - * \c cache_cursors in WT_CONNECTION.open_session., a boolean flag; default \c - * true.} - * @config{cache_max_wait_ms, the maximum number of milliseconds an application - * thread will wait for space to be available in cache before giving up. - * Default will wait forever., an integer greater than or equal to 0; default \c - * 0.} - * @config{cache_overflow = (, cache overflow configuration options., a set of - * related configuration options defined below.} - * @config{ file_max, The maximum number of bytes that - * WiredTiger is allowed to use for its cache overflow mechanism. If the cache - * overflow file exceeds this size\, a panic will be triggered. The default - * value means that the cache overflow file is unbounded and may use as much - * space as the filesystem will accommodate. The minimum non-zero setting is - * 100MB., an integer greater than or equal to 0; default \c 0.} + * @config{async = (, asynchronous operations configuration options., a set of related configuration + * options defined below.} + * @config{ enabled, enable asynchronous operation., + * a boolean flag; default \c false.} + * @config{ ops_max, maximum number of + * expected simultaneous asynchronous operations., an integer between 1 and 4096; default \c 1024.} + * @config{ threads, the number of worker threads to service asynchronous + * requests. Each worker thread uses a session from the configured session_max., an integer between + * 1 and 20; default \c 2.} * @config{ ),,} - * @config{cache_overhead, assume the heap allocator overhead is the specified - * percentage\, and adjust the cache usage by that amount (for example\, if - * there is 10GB of data in cache\, a percentage of 10 means WiredTiger treats - * this as 11GB). This value is configurable because different heap allocators - * have different overhead and different workloads will have different heap - * allocation sizes and patterns\, therefore applications may need to adjust - * this value based on allocator choice and behavior in measured workloads., an - * integer between 0 and 30; default \c 8.} - * @config{cache_size, maximum heap memory to allocate for the cache. A - * database should configure either \c cache_size or \c shared_cache but not - * both., an integer between 1MB and 10TB; default \c 100MB.} - * @config{checkpoint = (, periodically checkpoint the database. Enabling the - * checkpoint server uses a session from the configured session_max., a set of - * related configuration options defined below.} - * @config{ log_size, wait for this amount of log record - * bytes to be written to the log between each checkpoint. If non-zero\, this - * value will use a minimum of the log file size. A database can configure both - * log_size and wait to set an upper bound for checkpoints; setting this value - * above 0 configures periodic checkpoints., an integer between 0 and 2GB; - * default \c 0.} - * @config{ wait, seconds to wait between - * each checkpoint; setting this value above 0 configures periodic checkpoints., - * an integer between 0 and 100000; default \c 0.} + * @config{buffer_alignment, in-memory alignment (in bytes) for buffers used for I/O. The default + * value of -1 indicates a platform-specific alignment value should be used (4KB on Linux systems + * when direct I/O is configured\, zero elsewhere)., an integer between -1 and 1MB; default \c -1.} + * @config{builtin_extension_config, A structure where the keys are the names of builtin extensions + * and the values are passed to WT_CONNECTION::load_extension as the \c config parameter (for + * example\, <code>builtin_extension_config={zlib={compression_level=3}}</code>)., a string; default + * empty.} + * @config{cache_cursors, enable caching of cursors for reuse. This is the default value for any + * sessions created\, and can be overridden in configuring \c cache_cursors in + * WT_CONNECTION.open_session., a boolean flag; default \c true.} + * @config{cache_max_wait_ms, the maximum number of milliseconds an application thread will wait for + * space to be available in cache before giving up. Default will wait forever., an integer greater + * than or equal to 0; default \c 0.} + * @config{cache_overflow = (, cache overflow configuration options., a set of related configuration + * options defined below.} + * @config{ file_max, The maximum number of bytes + * that WiredTiger is allowed to use for its cache overflow mechanism. If the cache overflow file + * exceeds this size\, a panic will be triggered. The default value means that the cache overflow + * file is unbounded and may use as much space as the filesystem will accommodate. The minimum + * non-zero setting is 100MB., an integer greater than or equal to 0; default \c 0.} * @config{ ),,} - * @config{checkpoint_sync, flush files to stable storage when closing or - * writing checkpoints., a boolean flag; default \c true.} - * @config{compatibility = (, set compatibility version of database. Changing - * the compatibility version requires that there are no active operations for - * the duration of the call., a set of related configuration options defined + * @config{cache_overhead, assume the heap allocator overhead is the specified percentage\, and + * adjust the cache usage by that amount (for example\, if there is 10GB of data in cache\, a + * percentage of 10 means WiredTiger treats this as 11GB). This value is configurable because + * different heap allocators have different overhead and different workloads will have different + * heap allocation sizes and patterns\, therefore applications may need to adjust this value based + * on allocator choice and behavior in measured workloads., an integer between 0 and 30; default \c + * 8.} + * @config{cache_size, maximum heap memory to allocate for the cache. A database should configure + * either \c cache_size or \c shared_cache but not both., an integer between 1MB and 10TB; default + * \c 100MB.} + * @config{checkpoint = (, periodically checkpoint the database. Enabling the checkpoint server + * uses a session from the configured session_max., a set of related configuration options defined * below.} - * @config{ release, compatibility release - * version string., a string; default empty.} + * @config{ log_size, wait for this amount of log record bytes to be + * written to the log between each checkpoint. If non-zero\, this value will use a minimum of the + * log file size. A database can configure both log_size and wait to set an upper bound for + * checkpoints; setting this value above 0 configures periodic checkpoints., an integer between 0 + * and 2GB; default \c 0.} + * @config{ wait, seconds to wait between each + * checkpoint; setting this value above 0 configures periodic checkpoints., an integer between 0 and + * 100000; default \c 0.} + * @config{ ),,} + * @config{checkpoint_sync, flush files to stable storage when closing or writing checkpoints., a + * boolean flag; default \c true.} + * @config{compatibility = (, set compatibility version of database. Changing the compatibility + * version requires that there are no active operations for the duration of the call., a set of + * related configuration options defined below.} + * @config{ release, + * compatibility release version string., a string; default empty.} * @config{ - * require_max, required maximum compatibility version of existing data files. - * Must be greater than or equal to any release version set in the \c release - * setting. Has no effect if creating the database., a string; default empty.} - * @config{ require_min, required minimum compatibility - * version of existing data files. Must be less than or equal to any release - * version set in the \c release setting. Has no effect if creating the + * require_max, required maximum compatibility version of existing data files. Must be greater than + * or equal to any release version set in the \c release setting. Has no effect if creating the * database., a string; default empty.} + * @config{ require_min, required + * minimum compatibility version of existing data files. Must be less than or equal to any release + * version set in the \c release setting. Has no effect if creating the database., a string; + * default empty.} * @config{ ),,} - * @config{config_base, write the base configuration file if creating the - * database. If \c false in the config passed directly to ::wiredtiger_open\, - * will ignore any existing base configuration file in addition to not creating - * one. See @ref config_base for more information., a boolean flag; default \c - * true.} - * @config{create, create the database if it does not exist., a boolean flag; - * default \c false.} - * @config{debug_mode = (, control the settings of various extended debugging - * features., a set of related configuration options defined below.} - * @config{ checkpoint_retention, adjust log archiving to - * retain the log records of this number of checkpoints. Zero or one means - * perform normal archiving., an integer between 0 and 1024; default \c 0.} - * @config{ eviction, if true\, modify internal - * algorithms to change skew to force lookaside eviction to happen more - * aggressively. This includes but is not limited to not skewing newest\, not - * favoring leaf pages\, and modifying the eviction score mechanism., a boolean - * flag; default \c false.} - * @config{ rollback_error, - * return a WT_ROLLBACK error from a transaction operation about every Nth - * operation to simulate a collision., an integer between 0 and 10M; default \c - * 0.} - * @config{ table_logging, if true\, write - * transaction related information to the log for all operations\, even - * operations for tables with logging turned off. This setting introduces a log - * format change that may break older versions of WiredTiger. These operations - * are informational and skipped in recovery., a boolean flag; default \c - * false.} + * @config{config_base, write the base configuration file if creating the database. If \c false in + * the config passed directly to ::wiredtiger_open\, will ignore any existing base configuration + * file in addition to not creating one. See @ref config_base for more information., a boolean + * flag; default \c true.} + * @config{create, create the database if it does not exist., a boolean flag; default \c false.} + * @config{debug_mode = (, control the settings of various extended debugging features., a set of + * related configuration options defined below.} + * @config{ + * checkpoint_retention, adjust log archiving to retain the log records of this number of + * checkpoints. Zero or one means perform normal archiving., an integer between 0 and 1024; default + * \c 0.} + * @config{ eviction, if true\, modify internal algorithms to change + * skew to force lookaside eviction to happen more aggressively. This includes but is not limited + * to not skewing newest\, not favoring leaf pages\, and modifying the eviction score mechanism., a + * boolean flag; default \c false.} + * @config{ rollback_error, return a + * WT_ROLLBACK error from a transaction operation about every Nth operation to simulate a + * collision., an integer between 0 and 10M; default \c 0.} + * @config{ + * table_logging, if true\, write transaction related information to the log for all operations\, + * even operations for tables with logging turned off. This setting introduces a log format change + * that may break older versions of WiredTiger. These operations are informational and skipped in + * recovery., a boolean flag; default \c false.} * @config{ ),,} - * @config{direct_io, Use \c O_DIRECT on POSIX systems\, and \c - * FILE_FLAG_NO_BUFFERING on Windows to access files. Options are given as a - * list\, such as <code>"direct_io=[data]"</code>. Configuring \c direct_io - * requires care\, see @ref tuning_system_buffer_cache_direct_io for important - * warnings. Including \c "data" will cause WiredTiger data files to use direct - * I/O\, including \c "log" will cause WiredTiger log files to use direct I/O\, - * and including \c "checkpoint" will cause WiredTiger data files opened at a - * checkpoint (i.e: read-only) to use direct I/O. \c direct_io should be - * combined with \c write_through to get the equivalent of \c O_DIRECT on - * Windows., a list\, with values chosen from the following options: \c - * "checkpoint"\, \c "data"\, \c "log"; default empty.} - * @config{encryption = (, configure an encryptor for system wide metadata and - * logs. If a system wide encryptor is set\, it is also used for encrypting - * data files and tables\, unless encryption configuration is explicitly set for - * them when they are created with WT_SESSION::create., a set of related - * configuration options defined below.} - * @config{ keyid, - * An identifier that identifies a unique instance of the encryptor. It is - * stored in clear text\, and thus is available when the wiredtiger database is - * reopened. On the first use of a (name\, keyid) combination\, the - * WT_ENCRYPTOR::customize function is called with the keyid as an argument., a + * @config{direct_io, Use \c O_DIRECT on POSIX systems\, and \c FILE_FLAG_NO_BUFFERING on Windows to + * access files. Options are given as a list\, such as <code>"direct_io=[data]"</code>. Configuring + * \c direct_io requires care\, see @ref tuning_system_buffer_cache_direct_io for important + * warnings. Including \c "data" will cause WiredTiger data files to use direct I/O\, including \c + * "log" will cause WiredTiger log files to use direct I/O\, and including \c "checkpoint" will + * cause WiredTiger data files opened at a checkpoint (i.e: read-only) to use direct I/O. \c + * direct_io should be combined with \c write_through to get the equivalent of \c O_DIRECT on + * Windows., a list\, with values chosen from the following options: \c "checkpoint"\, \c "data"\, + * \c "log"; default empty.} + * @config{encryption = (, configure an encryptor for system wide metadata and logs. If a system + * wide encryptor is set\, it is also used for encrypting data files and tables\, unless encryption + * configuration is explicitly set for them when they are created with WT_SESSION::create., a set of + * related configuration options defined below.} + * @config{ keyid, An + * identifier that identifies a unique instance of the encryptor. It is stored in clear text\, and + * thus is available when the wiredtiger database is reopened. On the first use of a (name\, keyid) + * combination\, the WT_ENCRYPTOR::customize function is called with the keyid as an argument., a * string; default empty.} - * @config{ name, Permitted - * values are \c "none" or custom encryption engine name created with - * WT_CONNECTION::add_encryptor. See @ref encryption for more information., a - * string; default \c none.} - * @config{ secretkey, A string - * that is passed to the WT_ENCRYPTOR::customize function. It is never stored - * in clear text\, so must be given to any subsequent ::wiredtiger_open calls to - * reopen the database. It must also be provided to any "wt" commands used with - * this database., a string; default empty.} + * @config{ name, Permitted values are \c "none" or + * custom encryption engine name created with WT_CONNECTION::add_encryptor. See @ref encryption for + * more information., a string; default \c none.} + * @config{ secretkey, A + * string that is passed to the WT_ENCRYPTOR::customize function. It is never stored in clear + * text\, so must be given to any subsequent ::wiredtiger_open calls to reopen the database. It + * must also be provided to any "wt" commands used with this database., a string; default empty.} * @config{ ),,} - * @config{error_prefix, prefix string for error messages., a string; default - * empty.} - * @config{eviction = (, eviction configuration options., a set of related - * configuration options defined below.} + * @config{error_prefix, prefix string for error messages., a string; default empty.} + * @config{eviction = (, eviction configuration options., a set of related configuration options + * defined below.} + * @config{ threads_max, maximum number of threads WiredTiger + * will start to help evict pages from cache. The number of threads started will vary depending on + * the current eviction load. Each eviction worker thread uses a session from the configured + * session_max., an integer between 1 and 20; default \c 8.} * @config{ - * threads_max, maximum number of threads WiredTiger will start to help evict - * pages from cache. The number of threads started will vary depending on the - * current eviction load. Each eviction worker thread uses a session from the - * configured session_max., an integer between 1 and 20; default \c 8.} - * @config{ threads_min, minimum number of threads - * WiredTiger will start to help evict pages from cache. The number of threads - * currently running will vary depending on the current eviction load., an - * integer between 1 and 20; default \c 1.} + * threads_min, minimum number of threads WiredTiger will start to help evict pages from cache. The + * number of threads currently running will vary depending on the current eviction load., an integer + * between 1 and 20; default \c 1.} * @config{ ),,} - * @config{eviction_checkpoint_target, perform eviction at the beginning of - * checkpoints to bring the dirty content in cache to this level. It is a - * percentage of the cache size if the value is within the range of 0 to 100 or - * an absolute size when greater than 100. The value is not allowed to exceed - * the \c cache_size. Ignored if set to zero or \c in_memory is \c true., an - * integer between 0 and 10TB; default \c 1.} - * @config{eviction_dirty_target, perform eviction in worker threads when the - * cache contains at least this much dirty content. It is a percentage of the - * cache size if the value is within the range of 1 to 100 or an absolute size - * when greater than 100. The value is not allowed to exceed the \c cache_size., - * an integer between 1 and 10TB; default \c 5.} - * @config{eviction_dirty_trigger, trigger application threads to perform - * eviction when the cache contains at least this much dirty content. It is a - * percentage of the cache size if the value is within the range of 1 to 100 or - * an absolute size when greater than 100. The value is not allowed to exceed - * the \c cache_size. This setting only alters behavior if it is lower than + * @config{eviction_checkpoint_target, perform eviction at the beginning of checkpoints to bring the + * dirty content in cache to this level. It is a percentage of the cache size if the value is + * within the range of 0 to 100 or an absolute size when greater than 100. The value is not allowed + * to exceed the \c cache_size. Ignored if set to zero or \c in_memory is \c true., an integer + * between 0 and 10TB; default \c 1.} + * @config{eviction_dirty_target, perform eviction in worker threads when the cache contains at + * least this much dirty content. It is a percentage of the cache size if the value is within the + * range of 1 to 100 or an absolute size when greater than 100. The value is not allowed to exceed + * the \c cache_size., an integer between 1 and 10TB; default \c 5.} + * @config{eviction_dirty_trigger, trigger application threads to perform eviction when the cache + * contains at least this much dirty content. It is a percentage of the cache size if the value is + * within the range of 1 to 100 or an absolute size when greater than 100. The value is not allowed + * to exceed the \c cache_size. This setting only alters behavior if it is lower than * eviction_trigger., an integer between 1 and 10TB; default \c 20.} - * @config{eviction_target, perform eviction in worker threads when the cache - * contains at least this much content. It is a percentage of the cache size if - * the value is within the range of 10 to 100 or an absolute size when greater - * than 100. The value is not allowed to exceed the \c cache_size., an integer - * between 10 and 10TB; default \c 80.} - * @config{eviction_trigger, trigger application threads to perform eviction - * when the cache contains at least this much content. It is a percentage of - * the cache size if the value is within the range of 10 to 100 or an absolute - * size when greater than 100. The value is not allowed to exceed the \c - * cache_size., an integer between 10 and 10TB; default \c 95.} - * @config{exclusive, fail if the database already exists\, generally used with - * the \c create option., a boolean flag; default \c false.} - * @config{extensions, list of shared library extensions to load (using dlopen). - * Any values specified to a library extension are passed to - * WT_CONNECTION::load_extension as the \c config parameter (for example\, - * <code>extensions=(/path/ext.so={entry=my_entry})</code>)., a list of strings; - * default empty.} - * @config{file_extend, file extension configuration. If set\, extend files of - * the set type in allocations of the set size\, instead of a block at a time as - * each new block is written. For example\, - * <code>file_extend=(data=16MB)</code>. If set to 0\, disable the file - * extension for the set type. For log files\, the allowed range is between - * 100KB and 2GB; values larger than the configured maximum log size and the - * default config would extend log files in allocations of the maximum log file - * size., a list\, with values chosen from the following options: \c "data"\, \c + * @config{eviction_target, perform eviction in worker threads when the cache contains at least this + * much content. It is a percentage of the cache size if the value is within the range of 10 to 100 + * or an absolute size when greater than 100. The value is not allowed to exceed the \c cache_size., + * an integer between 10 and 10TB; default \c 80.} + * @config{eviction_trigger, trigger application threads to perform eviction when the cache contains + * at least this much content. It is a percentage of the cache size if the value is within the + * range of 10 to 100 or an absolute size when greater than 100. The value is not allowed to exceed + * the \c cache_size., an integer between 10 and 10TB; default \c 95.} + * @config{exclusive, fail if the database already exists\, generally used with the \c create + * option., a boolean flag; default \c false.} + * @config{extensions, list of shared library extensions to load (using dlopen). Any values + * specified to a library extension are passed to WT_CONNECTION::load_extension as the \c config + * parameter (for example\, <code>extensions=(/path/ext.so={entry=my_entry})</code>)., a list of + * strings; default empty.} + * @config{file_extend, file extension configuration. If set\, extend files of the set type in + * allocations of the set size\, instead of a block at a time as each new block is written. For + * example\, <code>file_extend=(data=16MB)</code>. If set to 0\, disable the file extension for the + * set type. For log files\, the allowed range is between 100KB and 2GB; values larger than the + * configured maximum log size and the default config would extend log files in allocations of the + * maximum log file size., a list\, with values chosen from the following options: \c "data"\, \c * "log"; default empty.} - * @config{file_manager = (, control how file handles are managed., a set of - * related configuration options defined below.} - * @config{ close_handle_minimum, number of handles open - * before the file manager will look for handles to close., an integer greater - * than or equal to 0; default \c 250.} - * @config{ - * close_idle_time, amount of time in seconds a file handle needs to be idle - * before attempting to close it. A setting of 0 means that idle handles are - * not closed., an integer between 0 and 100000; default \c 30.} - * @config{ close_scan_interval, interval in seconds at - * which to check for files that are inactive and close them., an integer - * between 1 and 100000; default \c 10.} - * @config{ ),,} - * @config{in_memory, keep data in-memory only. See @ref in_memory for more - * information., a boolean flag; default \c false.} - * @config{io_capacity = (, control how many bytes per second are written and - * read. Exceeding the capacity results in throttling., a set of related - * configuration options defined below.} - * @config{ total, - * number of bytes per second available to all subsystems in total. When set\, - * decisions about what subsystems are throttled\, and in what proportion\, are - * made internally. The minimum non-zero setting is 1MB., an integer between 0 - * and 1TB; default \c 0.} + * @config{file_manager = (, control how file handles are managed., a set of related configuration + * options defined below.} + * @config{ close_handle_minimum, number of handles + * open before the file manager will look for handles to close., an integer greater than or equal to + * 0; default \c 250.} + * @config{ close_idle_time, amount of time in seconds a + * file handle needs to be idle before attempting to close it. A setting of 0 means that idle + * handles are not closed., an integer between 0 and 100000; default \c 30.} + * @config{ close_scan_interval, interval in seconds at which to check for + * files that are inactive and close them., an integer between 1 and 100000; default \c 10.} * @config{ ),,} - * @config{log = (, enable logging. Enabling logging uses three sessions from - * the configured session_max., a set of related configuration options defined - * below.} - * @config{ archive, automatically archive - * unneeded log files., a boolean flag; default \c true.} - * @config{ compressor, configure a compressor for log - * records. Permitted values are \c "none" or custom compression engine name - * created with WT_CONNECTION::add_compressor. If WiredTiger has builtin - * support for \c "lz4"\, \c "snappy"\, \c "zlib" or \c "zstd" compression\, - * these names are also available. See @ref compression for more information., - * a string; default \c none.} - * @config{ enabled, enable - * logging subsystem., a boolean flag; default \c false.} - * @config{ file_max, the maximum size of log files., an - * integer between 100KB and 2GB; default \c 100MB.} - * @config{ os_cache_dirty_pct, maximum dirty system - * buffer cache usage\, as a percentage of the log's \c file_max. If non-zero\, - * schedule writes for dirty blocks belonging to the log in the system buffer - * cache after that percentage of the log has been written into the buffer cache - * without an intervening file sync., an integer between 0 and 100; default \c + * @config{in_memory, keep data in-memory only. See @ref in_memory for more information., a boolean + * flag; default \c false.} + * @config{io_capacity = (, control how many bytes per second are written and read. Exceeding the + * capacity results in throttling., a set of related configuration options defined below.} + * @config{ total, number of bytes per second available to all subsystems in + * total. When set\, decisions about what subsystems are throttled\, and in what proportion\, are + * made internally. The minimum non-zero setting is 1MB., an integer between 0 and 1TB; default \c * 0.} - * @config{ path, the name of a directory into which - * log files are written. The directory must already exist. If the value is - * not an absolute path\, the path is relative to the database home (see @ref - * absolute_path for more information)., a string; default \c ".".} - * @config{ prealloc, pre-allocate log files., a boolean - * flag; default \c true.} - * @config{ recover, run recovery - * or error if recovery needs to run after an unclean shutdown., a string\, - * chosen from the following options: \c "error"\, \c "on"; default \c on.} - * @config{ zero_fill, manually write zeroes into log - * files., a boolean flag; default \c false.} * @config{ ),,} - * @config{lsm_manager = (, configure database wide options for LSM tree - * management. The LSM manager is started automatically the first time an LSM - * tree is opened. The LSM manager uses a session from the configured + * @config{log = (, enable logging. Enabling logging uses three sessions from the configured * session_max., a set of related configuration options defined below.} - * @config{ merge, merge LSM chunks where possible., a - * boolean flag; default \c true.} - * @config{ - * worker_thread_max, Configure a set of threads to manage merging LSM trees in - * the database. Each worker thread uses a session handle from the configured - * session_max., an integer between 3 and 20; default \c 4.} - * @config{ ),,} - * @config{mmap, Use memory mapping to access files when possible., a boolean + * @config{ archive, automatically archive unneeded log files., a boolean * flag; default \c true.} - * @config{multiprocess, permit sharing between processes (will automatically - * start an RPC server for primary processes and use RPC for secondary - * processes). <b>Not yet supported in WiredTiger</b>., a boolean flag; default - * \c false.} - * @config{operation_tracking = (, enable tracking of performance-critical - * functions. See @ref operation_tracking for more information., a set of - * related configuration options defined below.} - * @config{ enabled, enable operation tracking + * @config{ compressor, configure a compressor for + * log records. Permitted values are \c "none" or custom compression engine name created with + * WT_CONNECTION::add_compressor. If WiredTiger has builtin support for \c "lz4"\, \c "snappy"\, \c + * "zlib" or \c "zstd" compression\, these names are also available. See @ref compression for more + * information., a string; default \c none.} + * @config{ enabled, enable logging * subsystem., a boolean flag; default \c false.} + * @config{ file_max, the + * maximum size of log files., an integer between 100KB and 2GB; default \c 100MB.} + * @config{ os_cache_dirty_pct, maximum dirty system buffer cache usage\, as + * a percentage of the log's \c file_max. If non-zero\, schedule writes for dirty blocks belonging + * to the log in the system buffer cache after that percentage of the log has been written into the + * buffer cache without an intervening file sync., an integer between 0 and 100; default \c 0.} + * @config{ path, the name of a directory into which log files are written. + * The directory must already exist. If the value is not an absolute path\, the path is relative to + * the database home (see @ref absolute_path for more information)., a string; default \c ".".} + * @config{ prealloc, pre-allocate log files., a boolean flag; default \c + * true.} + * @config{ recover, run recovery or error if recovery needs to run + * after an unclean shutdown., a string\, chosen from the following options: \c "error"\, \c "on"; + * default \c on.} + * @config{ zero_fill, manually write zeroes into log files., + * a boolean flag; default \c false.} + * @config{ ),,} + * @config{lsm_manager = (, configure database wide options for LSM tree management. The LSM + * manager is started automatically the first time an LSM tree is opened. The LSM manager uses a + * session from the configured session_max., a set of related configuration options defined below.} + * @config{ merge, merge LSM chunks where possible., a boolean flag; default + * \c true.} + * @config{ worker_thread_max, Configure a set of threads to manage + * merging LSM trees in the database. Each worker thread uses a session handle from the configured + * session_max., an integer between 3 and 20; default \c 4.} + * @config{ ),,} + * @config{mmap, Use memory mapping to access files when possible., a boolean flag; default \c + * true.} + * @config{multiprocess, permit sharing between processes (will automatically start an RPC server + * for primary processes and use RPC for secondary processes). <b>Not yet supported in + * WiredTiger</b>., a boolean flag; default \c false.} + * @config{operation_tracking = (, enable tracking of performance-critical functions. See @ref + * operation_tracking for more information., a set of related configuration options defined below.} + * @config{ enabled, enable operation tracking subsystem., a boolean flag; + * default \c false.} * @config{ path, the name of a directory into which - * operation tracking files are written. The directory must already exist. If - * the value is not an absolute path\, the path is relative to the database home - * (see @ref absolute_path for more information)., a string; default \c ".".} + * operation tracking files are written. The directory must already exist. If the value is not an + * absolute path\, the path is relative to the database home (see @ref absolute_path for more + * information)., a string; default \c ".".} * @config{ ),,} - * @config{readonly, open connection in read-only mode. The database must - * exist. All methods that may modify a database are disabled. See @ref - * readonly for more information., a boolean flag; default \c false.} - * @config{salvage, open connection and salvage any WiredTiger-owned database - * and log files that it detects as corrupted. This API should only be used - * after getting an error return of WT_TRY_SALVAGE. Salvage rebuilds files in - * place\, overwriting existing files. We recommend making a backup copy of all - * files with the WiredTiger prefix prior to passing this flag., a boolean flag; + * @config{readonly, open connection in read-only mode. The database must exist. All methods that + * may modify a database are disabled. See @ref readonly for more information., a boolean flag; * default \c false.} - * @config{session_max, maximum expected number of sessions (including server - * threads)., an integer greater than or equal to 1; default \c 100.} - * @config{shared_cache = (, shared cache configuration options. A database - * should configure either a cache_size or a shared_cache not both. Enabling a - * shared cache uses a session from the configured session_max. A shared cache - * can not have absolute values configured for cache eviction settings., a set - * of related configuration options defined below.} - * @config{ chunk, the granularity that a shared cache is - * redistributed., an integer between 1MB and 10TB; default \c 10MB.} - * @config{ name, the name of a cache that is shared - * between databases or \c "none" when no shared cache is configured., a string; - * default \c none.} - * @config{ quota, maximum size of - * cache this database can be allocated from the shared cache. Defaults to the - * entire shared cache size., an integer; default \c 0.} + * @config{salvage, open connection and salvage any WiredTiger-owned database and log files that it + * detects as corrupted. This API should only be used after getting an error return of + * WT_TRY_SALVAGE. Salvage rebuilds files in place\, overwriting existing files. We recommend + * making a backup copy of all files with the WiredTiger prefix prior to passing this flag., a + * boolean flag; default \c false.} + * @config{session_max, maximum expected number of sessions (including server threads)., an integer + * greater than or equal to 1; default \c 100.} + * @config{shared_cache = (, shared cache configuration options. A database should configure either + * a cache_size or a shared_cache not both. Enabling a shared cache uses a session from the + * configured session_max. A shared cache can not have absolute values configured for cache + * eviction settings., a set of related configuration options defined below.} + * @config{ chunk, the granularity that a shared cache is redistributed., an + * integer between 1MB and 10TB; default \c 10MB.} + * @config{ name, the name of + * a cache that is shared between databases or \c "none" when no shared cache is configured., a + * string; default \c none.} + * @config{ quota, maximum size of cache this + * database can be allocated from the shared cache. Defaults to the entire shared cache size., an + * integer; default \c 0.} * @config{ reserve, amount of cache this database is - * guaranteed to have available from the shared cache. This setting is per - * database. Defaults to the chunk size., an integer; default \c 0.} - * @config{ size, maximum memory to allocate for the - * shared cache. Setting this will update the value if one is already set., an + * guaranteed to have available from the shared cache. This setting is per database. Defaults to + * the chunk size., an integer; default \c 0.} + * @config{ size, maximum memory + * to allocate for the shared cache. Setting this will update the value if one is already set., an * integer between 1MB and 10TB; default \c 500MB.} * @config{ ),,} - * @config{statistics, Maintain database statistics\, which may impact - * performance. Choosing "all" maintains all statistics regardless of cost\, - * "fast" maintains a subset of statistics that are relatively inexpensive\, - * "none" turns off all statistics. The "clear" configuration resets statistics - * after they are gathered\, where appropriate (for example\, a cache size - * statistic is not cleared\, while the count of cursor insert operations will - * be cleared). When "clear" is configured for the database\, gathered - * statistics are reset each time a statistics cursor is used to gather - * statistics\, as well as each time statistics are logged using the \c - * statistics_log configuration. See @ref statistics for more information., a - * list\, with values chosen from the following options: \c "all"\, \c - * "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c "tree_walk"; default - * \c none.} - * @config{statistics_log = (, log any statistics the database is configured to - * maintain\, to a file. See @ref statistics for more information. Enabling - * the statistics log server uses a session from the configured session_max., a - * set of related configuration options defined below.} - * @config{ json, encode statistics in JSON format., a - * boolean flag; default \c false.} - * @config{ on_close, - * log statistics on database close., a boolean flag; default \c false.} + * @config{statistics, Maintain database statistics\, which may impact performance. Choosing "all" + * maintains all statistics regardless of cost\, "fast" maintains a subset of statistics that are + * relatively inexpensive\, "none" turns off all statistics. The "clear" configuration resets + * statistics after they are gathered\, where appropriate (for example\, a cache size statistic is + * not cleared\, while the count of cursor insert operations will be cleared). When "clear" is + * configured for the database\, gathered statistics are reset each time a statistics cursor is used + * to gather statistics\, as well as each time statistics are logged using the \c statistics_log + * configuration. See @ref statistics for more information., a list\, with values chosen from the + * following options: \c "all"\, \c "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c + * "tree_walk"; default \c none.} + * @config{statistics_log = (, log any statistics the database is configured to maintain\, to a + * file. See @ref statistics for more information. Enabling the statistics log server uses a + * session from the configured session_max., a set of related configuration options defined below.} + * @config{ json, encode statistics in JSON format., a boolean flag; default + * \c false.} + * @config{ on_close, log statistics on database close., a boolean + * flag; default \c false.} * @config{ path, the name of a directory into which - * statistics files are written. The directory must already exist. If the - * value is not an absolute path\, the path is relative to the database home - * (see @ref absolute_path for more information)., a string; default \c ".".} - * @config{ sources, if non-empty\, include statistics - * for the list of data source URIs\, if they are open at the time of the - * statistics logging. The list may include URIs matching a single data source - * ("table:mytable")\, or a URI matching all data sources of a particular type - * ("table:")., a list of strings; default empty.} - * @config{ timestamp, a timestamp prepended to each log - * record\, may contain strftime conversion specifications\, when \c json is - * configured\, defaults to \c "%FT%Y.000Z"., a string; default \c "%b %d - * %H:%M:%S".} - * @config{ wait, seconds to wait between - * each write of the log records; setting this value above 0 configures - * statistics logging., an integer between 0 and 100000; default \c 0.} - * @config{ - * ),,} - * @config{transaction_sync = (, how to sync log records when the transaction - * commits., a set of related configuration options defined below.} - * @config{ enabled, whether to sync the log on every - * commit by default\, can be overridden by the \c sync setting to + * statistics files are written. The directory must already exist. If the value is not an absolute + * path\, the path is relative to the database home (see @ref absolute_path for more information)., + * a string; default \c ".".} + * @config{ sources, if non-empty\, include + * statistics for the list of data source URIs\, if they are open at the time of the statistics + * logging. The list may include URIs matching a single data source ("table:mytable")\, or a URI + * matching all data sources of a particular type ("table:")., a list of strings; default empty.} + * @config{ timestamp, a timestamp prepended to each log record\, may contain + * strftime conversion specifications\, when \c json is configured\, defaults to \c "%FT%Y.000Z"., a + * string; default \c "%b %d %H:%M:%S".} + * @config{ wait, seconds to wait + * between each write of the log records; setting this value above 0 configures statistics logging., + * an integer between 0 and 100000; default \c 0.} + * @config{ ),,} + * @config{transaction_sync = (, how to sync log records when the transaction commits., a set of + * related configuration options defined below.} + * @config{ enabled, whether to + * sync the log on every commit by default\, can be overridden by the \c sync setting to * WT_SESSION::commit_transaction., a boolean flag; default \c false.} - * @config{ method, the method used to ensure log records - * are stable on disk\, see @ref tune_durability for more information., a - * string\, chosen from the following options: \c "dsync"\, \c "fsync"\, \c - * "none"; default \c fsync.} + * @config{ method, the method used to ensure log records are stable on + * disk\, see @ref tune_durability for more information., a string\, chosen from the following + * options: \c "dsync"\, \c "fsync"\, \c "none"; default \c fsync.} * @config{ ),,} - * @config{use_environment, use the \c WIREDTIGER_CONFIG and \c WIREDTIGER_HOME - * environment variables if the process is not running with special privileges. - * See @ref home for more information., a boolean flag; default \c true.} - * @config{use_environment_priv, use the \c WIREDTIGER_CONFIG and \c - * WIREDTIGER_HOME environment variables even if the process is running with - * special privileges. See @ref home for more information., a boolean flag; - * default \c false.} - * @config{verbose, enable messages for various events. Options are given as a - * list\, such as <code>"verbose=[evictserver\,read]"</code>., a list\, with - * values chosen from the following options: \c "api"\, \c "block"\, \c - * "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c - * "compact_progress"\, \c "error_returns"\, \c "evict"\, \c "evict_stuck"\, \c - * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lookaside"\, - * \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c - * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c - * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c - * "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c - * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.} - * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to - * files. Ignored on non-Windows systems. Options are given as a list\, such - * as <code>"write_through=[data]"</code>. Configuring \c write_through requires - * care\, see @ref tuning_system_buffer_cache_direct_io for important warnings. - * Including \c "data" will cause WiredTiger data files to write through cache\, - * including \c "log" will cause WiredTiger log files to write through cache. - * \c write_through should be combined with \c direct_io to get the equivalent - * of POSIX \c O_DIRECT on Windows., a list\, with values chosen from the - * following options: \c "data"\, \c "log"; default empty.} + * @config{use_environment, use the \c WIREDTIGER_CONFIG and \c WIREDTIGER_HOME environment + * variables if the process is not running with special privileges. See @ref home for more + * information., a boolean flag; default \c true.} + * @config{use_environment_priv, use the \c WIREDTIGER_CONFIG and \c WIREDTIGER_HOME environment + * variables even if the process is running with special privileges. See @ref home for more + * information., a boolean flag; default \c false.} + * @config{verbose, enable messages for various events. Options are given as a list\, such as + * <code>"verbose=[evictserver\,read]"</code>., a list\, with values chosen from the following + * options: \c "api"\, \c "block"\, \c "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c + * "compact_progress"\, \c "error_returns"\, \c "evict"\, \c "evict_stuck"\, \c "evictserver"\, \c + * "fileops"\, \c "handleops"\, \c "log"\, \c "lookaside"\, \c "lookaside_activity"\, \c "lsm"\, \c + * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c + * "reconcile"\, \c "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c + * "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c "transaction"\, \c "verify"\, + * \c "version"\, \c "write"; default empty.} + * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to files. Ignored on + * non-Windows systems. Options are given as a list\, such as <code>"write_through=[data]"</code>. + * Configuring \c write_through requires care\, see @ref tuning_system_buffer_cache_direct_io for + * important warnings. Including \c "data" will cause WiredTiger data files to write through + * cache\, including \c "log" will cause WiredTiger log files to write through cache. \c + * write_through should be combined with \c direct_io to get the equivalent of POSIX \c O_DIRECT on + * Windows., a list\, with values chosen from the following options: \c "data"\, \c "log"; default + * empty.} * @configend * Additionally, if files named \c WiredTiger.config or \c WiredTiger.basecfg * appear in the WiredTiger home directory, they are read for configuration diff --git a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h index 4e7498e0c07..b10face9948 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h +++ b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h @@ -6,8 +6,8 @@ * See the file LICENSE for redistribution information. */ -#ifndef __WIREDTIGER_EXT_H_ -#define __WIREDTIGER_EXT_H_ +#ifndef __WIREDTIGER_EXT_H_ +#define __WIREDTIGER_EXT_H_ #include <wiredtiger.h> @@ -26,17 +26,17 @@ extern "C" { * Read-committed isolation level, returned by * WT_EXTENSION_API::transaction_isolation_level. */ -#define WT_TXN_ISO_READ_COMMITTED 1 +#define WT_TXN_ISO_READ_COMMITTED 1 /*! * Read-uncommitted isolation level, returned by * WT_EXTENSION_API::transaction_isolation_level. */ -#define WT_TXN_ISO_READ_UNCOMMITTED 2 +#define WT_TXN_ISO_READ_UNCOMMITTED 2 /*! * Snapshot isolation level, returned by * WT_EXTENSION_API::transaction_isolation_level. */ -#define WT_TXN_ISO_SNAPSHOT 3 +#define WT_TXN_ISO_SNAPSHOT 3 typedef struct __wt_txn_notify WT_TXN_NOTIFY; /*! @@ -44,18 +44,17 @@ typedef struct __wt_txn_notify WT_TXN_NOTIFY; * WT_EXTENSION_API::transaction_isolation_level. */ struct __wt_txn_notify { - /*! - * A method called when the session's current transaction is committed - * or rolled back. - * - * @param notify a pointer to the event handler - * @param session the current session handle - * @param txnid the transaction ID - * @param committed an integer value which is non-zero if the - * transaction is being committed. - */ - int (*notify)(WT_TXN_NOTIFY *notify, WT_SESSION *session, - uint64_t txnid, int committed); + /*! + * A method called when the session's current transaction is committed + * or rolled back. + * + * @param notify a pointer to the event handler + * @param session the current session handle + * @param txnid the transaction ID + * @param committed an integer value which is non-zero if the + * transaction is being committed. + */ + int (*notify)(WT_TXN_NOTIFY *notify, WT_SESSION *session, uint64_t txnid, int committed); }; /*! @@ -86,464 +85,442 @@ struct __wt_txn_notify { struct __wt_extension_api { /* !!! To maintain backwards compatibility, this structure is append-only. */ #if !defined(DOXYGEN) - /* - * Private fields. - */ - WT_CONNECTION *conn; /* Enclosing connection */ + /* + * Private fields. + */ + WT_CONNECTION *conn; /* Enclosing connection */ #endif - /*! - * Insert an error message into the WiredTiger error stream. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param fmt a printf-like format specification - * @errors - * - * @snippet ex_data_source.c WT_EXTENSION_API err_printf - */ - int (*err_printf)(WT_EXTENSION_API *wt_api, - WT_SESSION *session, const char *fmt, ...); - - /*! - * Insert a message into the WiredTiger message stream. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param fmt a printf-like format specification - * @errors - * - * @snippet ex_data_source.c WT_EXTENSION_API msg_printf - */ - int (*msg_printf)( - WT_EXTENSION_API *, WT_SESSION *session, const char *fmt, ...); - - /*! - * Return information about an error as a string. - * - * @snippet ex_data_source.c WT_EXTENSION_API strerror - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param error a return value from a WiredTiger function - * @returns a string representation of the error - */ - const char *(*strerror)( - WT_EXTENSION_API *, WT_SESSION *session, int error); - - /*! - * Map a Windows system error code to a POSIX 1003.1/ANSI C error. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param windows_error a Windows system error code - * @returns a string representation of the error - * - * @snippet ex_data_source.c WT_EXTENSION_API map_windows_error - */ - int (*map_windows_error)(WT_EXTENSION_API *wt_api, - WT_SESSION *session, uint32_t windows_error); - - /*! - * Allocate short-term use scratch memory. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param bytes the number of bytes of memory needed - * @returns A valid memory reference on success or NULL on error - * - * @snippet ex_data_source.c WT_EXTENSION_API scr_alloc - */ - void *(*scr_alloc)( - WT_EXTENSION_API *wt_api, WT_SESSION *session, size_t bytes); - - /*! - * Free short-term use scratch memory. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param ref a memory reference returned by WT_EXTENSION_API::scr_alloc - * - * @snippet ex_data_source.c WT_EXTENSION_API scr_free - */ - void (*scr_free)(WT_EXTENSION_API *, WT_SESSION *session, void *ref); - - /*! - * Configure the extension collator method. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param uri the URI of the handle being configured - * @param config the configuration information passed to an application - * @param collatorp the selector collator, if any - * @param ownp set if the collator terminate method should be called - * when no longer needed - * @errors - * - * @snippet ex_data_source.c WT_EXTENSION collator config - */ - int (*collator_config)(WT_EXTENSION_API *wt_api, WT_SESSION *session, - const char *uri, WT_CONFIG_ARG *config, - WT_COLLATOR **collatorp, int *ownp); - - /*! - * The extension collator method. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param collator the collator (or NULL if none available) - * @param first first item - * @param second second item - * @param[out] cmp set less than 0 if \c first collates less than - * \c second, set equal to 0 if \c first collates equally to \c second, - * set greater than 0 if \c first collates greater than \c second - * @errors - * - * @snippet ex_data_source.c WT_EXTENSION collate - */ - int (*collate)(WT_EXTENSION_API *wt_api, WT_SESSION *session, - WT_COLLATOR *collator, WT_ITEM *first, WT_ITEM *second, int *cmp); - - /*! - * Return the value of a configuration key. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param config the configuration information passed to an application - * @param key configuration key string - * @param value the returned value - * @errors - * - * @snippet ex_data_source.c WT_EXTENSION config_get - */ - int (*config_get)(WT_EXTENSION_API *wt_api, WT_SESSION *session, - WT_CONFIG_ARG *config, const char *key, WT_CONFIG_ITEM *value); - - /*! - * Return the value of a configuration key from a string. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param config the configuration string - * @param key configuration key string - * @param value the returned value - * @errors - * - * @snippet ex_data_source.c WT_EXTENSION config_get - */ - int (*config_get_string)(WT_EXTENSION_API *wt_api, WT_SESSION *session, - const char *config, const char *key, WT_CONFIG_ITEM *value); - - /*! - * @copydoc wiredtiger_config_parser_open - */ - int (*config_parser_open)(WT_EXTENSION_API *wt_api, WT_SESSION *session, - const char *config, size_t len, WT_CONFIG_PARSER **config_parserp); - - /*! - * @copydoc wiredtiger_config_parser_open - */ - int (*config_parser_open_arg)(WT_EXTENSION_API *wt_api, - WT_SESSION *session, WT_CONFIG_ARG *config, - WT_CONFIG_PARSER **config_parserp); - - /*! - * Insert a row into the metadata if it does not already exist. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param key row key - * @param value row value - * @errors - * - * @snippet ex_data_source.c WT_EXTENSION metadata insert - */ - int (*metadata_insert)(WT_EXTENSION_API *wt_api, - WT_SESSION *session, const char *key, const char *value); - - /*! - * Remove a row from the metadata. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param key row key - * @errors - * - * @snippet ex_data_source.c WT_EXTENSION metadata remove - */ - int (*metadata_remove)( - WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *key); - - /*! - * Return a row from the metadata. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param key row key - * @param [out] valuep the row value - * @errors - * - * @snippet ex_data_source.c WT_EXTENSION metadata search - */ - int (*metadata_search)(WT_EXTENSION_API *wt_api, - WT_SESSION *session, const char *key, char **valuep); - - /*! - * Update a row in the metadata by either inserting a new record or - * updating an existing record. - * - * @param wt_api the extension handle - * @param session the session handle (or NULL if none available) - * @param key row key - * @param value row value - * @errors - * - * @snippet ex_data_source.c WT_EXTENSION metadata update - */ - int (*metadata_update)(WT_EXTENSION_API *wt_api, - WT_SESSION *session, const char *key, const char *value); - - /*! - * Pack a structure into a buffer. Deprecated in favor of stream - * based pack and unpack API. See WT_EXTENSION_API::pack_start for - * details. - * - * @param wt_api the extension handle - * @param session the session handle - * @param buffer a pointer to a packed byte array - * @param size the number of valid bytes in the buffer - * @param format the data format, see @ref packing - * @errors - */ - int (*struct_pack)(WT_EXTENSION_API *wt_api, WT_SESSION *session, - void *buffer, size_t size, const char *format, ...); - - /*! - * Calculate the size required to pack a structure. Deprecated in - * favor of stream based pack and unpack API. - * - * @param wt_api the extension handle - * @param session the session handle - * @param sizep a location where the number of bytes needed for the - * matching call to WT_EXTENSION_API::struct_pack is returned - * @param format the data format, see @ref packing - * @errors - */ - int (*struct_size)(WT_EXTENSION_API *wt_api, WT_SESSION *session, - size_t *sizep, const char *format, ...); - - /*! - * Unpack a structure from a buffer. Deprecated in favor of stream - * based pack and unpack API. See WT_EXTENSION_API::unpack_start for - * details. - * - * @param wt_api the extension handle - * @param session the session handle - * @param buffer a pointer to a packed byte array - * @param size the number of valid bytes in the buffer - * @param format the data format, see @ref packing - * @errors - */ - int (*struct_unpack)(WT_EXTENSION_API *wt_api, WT_SESSION *session, - const void *buffer, size_t size, const char *format, ...); - - /* - * Streaming pack/unpack API. - */ - /*! - * Start a packing operation into a buffer. - * See ::wiredtiger_pack_start for details. - * - * @param session the session handle - * @param format the data format, see @ref packing - * @param buffer a pointer to memory to hold the packed data - * @param size the size of the buffer - * @param[out] psp the new packing stream handle - * @errors - */ - int (*pack_start)(WT_EXTENSION_API *wt_api, - WT_SESSION *session, const char *format, - void *buffer, size_t size, WT_PACK_STREAM **psp); - - /*! - * Start an unpacking operation from a buffer. - * See ::wiredtiger_unpack_start for details. - * - * @param session the session handle - * @param format the data format, see @ref packing - * @param buffer a pointer to memory holding the packed data - * @param size the size of the buffer - * @param[out] psp the new packing stream handle - * @errors - */ - int (*unpack_start)(WT_EXTENSION_API *wt_api, - WT_SESSION *session, const char *format, - const void *buffer, size_t size, WT_PACK_STREAM **psp); - - /*! - * Close a packing stream. - * - * @param ps the packing stream handle - * @param[out] usedp the number of bytes in the buffer used by the - * stream - * @errors - */ - int (*pack_close)(WT_EXTENSION_API *wt_api, - WT_PACK_STREAM *ps, size_t *usedp); - - /*! - * Pack an item into a packing stream. - * - * @param ps the packing stream handle - * @param item an item to pack - * @errors - */ - int (*pack_item)(WT_EXTENSION_API *wt_api, - WT_PACK_STREAM *ps, WT_ITEM *item); - - /*! - * Pack a signed integer into a packing stream. - * - * @param ps the packing stream handle - * @param i a signed integer to pack - * @errors - */ - int (*pack_int)(WT_EXTENSION_API *wt_api, - WT_PACK_STREAM *ps, int64_t i); - - /*! - * Pack a string into a packing stream. - * - * @param ps the packing stream handle - * @param s a string to pack - * @errors - */ - int (*pack_str)(WT_EXTENSION_API *wt_api, - WT_PACK_STREAM *ps, const char *s); - - /*! - * Pack an unsigned integer into a packing stream. - * - * @param ps the packing stream handle - * @param u an unsigned integer to pack - * @errors - */ - int (*pack_uint)(WT_EXTENSION_API *wt_api, - WT_PACK_STREAM *ps, uint64_t u); - - /*! - * Unpack an item from a packing stream. - * - * @param ps the packing stream handle - * @param item an item to unpack - * @errors - */ - int (*unpack_item)(WT_EXTENSION_API *wt_api, - WT_PACK_STREAM *ps, WT_ITEM *item); - - /*! - * Unpack a signed integer from a packing stream. - * - * @param ps the packing stream handle - * @param[out] ip the unpacked signed integer - * @errors - */ - int (*unpack_int)(WT_EXTENSION_API *wt_api, - WT_PACK_STREAM *ps, int64_t *ip); - - /*! - * Unpack a string from a packing stream. - * - * @param ps the packing stream handle - * @param[out] sp the unpacked string - * @errors - */ - int (*unpack_str)(WT_EXTENSION_API *wt_api, - WT_PACK_STREAM *ps, const char **sp); - - /*! - * Unpack an unsigned integer from a packing stream. - * - * @param ps the packing stream handle - * @param[out] up the unpacked unsigned integer - * @errors - */ - int (*unpack_uint)(WT_EXTENSION_API *wt_api, - WT_PACK_STREAM *ps, uint64_t *up); - - /*! - * Return the current transaction ID. - * - * @param wt_api the extension handle - * @param session the session handle - * @returns the current transaction ID. - * - * @snippet ex_data_source.c WT_EXTENSION transaction ID - */ - uint64_t (*transaction_id)(WT_EXTENSION_API *wt_api, - WT_SESSION *session); - - /*! - * Return the current transaction's isolation level; returns one of - * ::WT_TXN_ISO_READ_COMMITTED, ::WT_TXN_ISO_READ_UNCOMMITTED, or - * ::WT_TXN_ISO_SNAPSHOT. - * - * @param wt_api the extension handle - * @param session the session handle - * @returns the current transaction's isolation level. - * - * @snippet ex_data_source.c WT_EXTENSION transaction isolation level - */ - int (*transaction_isolation_level)(WT_EXTENSION_API *wt_api, - WT_SESSION *session); - - /*! - * Request notification of transaction resolution by specifying a - * function to be called when the session's current transaction is - * either committed or rolled back. If the transaction is being - * committed, but the notification function returns an error, the - * transaction will be rolled back. - * - * @param wt_api the extension handle - * @param session the session handle - * @param notify a handler for commit or rollback events - * @errors - * - * @snippet ex_data_source.c WT_EXTENSION transaction notify - */ - int (*transaction_notify)(WT_EXTENSION_API *wt_api, - WT_SESSION *session, WT_TXN_NOTIFY *notify); - - /*! - * Return the oldest transaction ID not yet visible to a running - * transaction. - * - * @param wt_api the extension handle - * @param session the session handle - * @returns the oldest transaction ID not yet visible to a running - * transaction. - * - * @snippet ex_data_source.c WT_EXTENSION transaction oldest - */ - uint64_t (*transaction_oldest)(WT_EXTENSION_API *wt_api); - - /*! - * Return if the current transaction can see the given transaction ID. - * - * @param wt_api the extension handle - * @param session the session handle - * @param transaction_id the transaction ID - * @returns true (non-zero) if the transaction ID is visible to the - * current transaction. - * - * @snippet ex_data_source.c WT_EXTENSION transaction visible - */ - int (*transaction_visible)(WT_EXTENSION_API *wt_api, - WT_SESSION *session, uint64_t transaction_id); - - /*! - * @copydoc wiredtiger_version - */ - const char *(*version)(int *majorp, int *minorp, int *patchp); + /*! + * Insert an error message into the WiredTiger error stream. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param fmt a printf-like format specification + * @errors + * + * @snippet ex_data_source.c WT_EXTENSION_API err_printf + */ + int (*err_printf)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *fmt, ...); + + /*! + * Insert a message into the WiredTiger message stream. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param fmt a printf-like format specification + * @errors + * + * @snippet ex_data_source.c WT_EXTENSION_API msg_printf + */ + int (*msg_printf)(WT_EXTENSION_API *, WT_SESSION *session, const char *fmt, ...); + + /*! + * Return information about an error as a string. + * + * @snippet ex_data_source.c WT_EXTENSION_API strerror + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param error a return value from a WiredTiger function + * @returns a string representation of the error + */ + const char *(*strerror)(WT_EXTENSION_API *, WT_SESSION *session, int error); + + /*! + * Map a Windows system error code to a POSIX 1003.1/ANSI C error. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param windows_error a Windows system error code + * @returns a string representation of the error + * + * @snippet ex_data_source.c WT_EXTENSION_API map_windows_error + */ + int (*map_windows_error)(WT_EXTENSION_API *wt_api, WT_SESSION *session, uint32_t windows_error); + + /*! + * Allocate short-term use scratch memory. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param bytes the number of bytes of memory needed + * @returns A valid memory reference on success or NULL on error + * + * @snippet ex_data_source.c WT_EXTENSION_API scr_alloc + */ + void *(*scr_alloc)(WT_EXTENSION_API *wt_api, WT_SESSION *session, size_t bytes); + + /*! + * Free short-term use scratch memory. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param ref a memory reference returned by WT_EXTENSION_API::scr_alloc + * + * @snippet ex_data_source.c WT_EXTENSION_API scr_free + */ + void (*scr_free)(WT_EXTENSION_API *, WT_SESSION *session, void *ref); + + /*! + * Configure the extension collator method. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param uri the URI of the handle being configured + * @param config the configuration information passed to an application + * @param collatorp the selector collator, if any + * @param ownp set if the collator terminate method should be called + * when no longer needed + * @errors + * + * @snippet ex_data_source.c WT_EXTENSION collator config + */ + int (*collator_config)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *uri, + WT_CONFIG_ARG *config, WT_COLLATOR **collatorp, int *ownp); + + /*! + * The extension collator method. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param collator the collator (or NULL if none available) + * @param first first item + * @param second second item + * @param[out] cmp set less than 0 if \c first collates less than + * \c second, set equal to 0 if \c first collates equally to \c second, + * set greater than 0 if \c first collates greater than \c second + * @errors + * + * @snippet ex_data_source.c WT_EXTENSION collate + */ + int (*collate)(WT_EXTENSION_API *wt_api, WT_SESSION *session, WT_COLLATOR *collator, + WT_ITEM *first, WT_ITEM *second, int *cmp); + + /*! + * Return the value of a configuration key. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param config the configuration information passed to an application + * @param key configuration key string + * @param value the returned value + * @errors + * + * @snippet ex_data_source.c WT_EXTENSION config_get + */ + int (*config_get)(WT_EXTENSION_API *wt_api, WT_SESSION *session, WT_CONFIG_ARG *config, + const char *key, WT_CONFIG_ITEM *value); + + /*! + * Return the value of a configuration key from a string. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param config the configuration string + * @param key configuration key string + * @param value the returned value + * @errors + * + * @snippet ex_data_source.c WT_EXTENSION config_get + */ + int (*config_get_string)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *config, + const char *key, WT_CONFIG_ITEM *value); + + /*! + * @copydoc wiredtiger_config_parser_open + */ + int (*config_parser_open)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *config, + size_t len, WT_CONFIG_PARSER **config_parserp); + + /*! + * @copydoc wiredtiger_config_parser_open + */ + int (*config_parser_open_arg)(WT_EXTENSION_API *wt_api, WT_SESSION *session, + WT_CONFIG_ARG *config, WT_CONFIG_PARSER **config_parserp); + + /*! + * Insert a row into the metadata if it does not already exist. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param key row key + * @param value row value + * @errors + * + * @snippet ex_data_source.c WT_EXTENSION metadata insert + */ + int (*metadata_insert)( + WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *key, const char *value); + + /*! + * Remove a row from the metadata. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param key row key + * @errors + * + * @snippet ex_data_source.c WT_EXTENSION metadata remove + */ + int (*metadata_remove)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *key); + + /*! + * Return a row from the metadata. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param key row key + * @param [out] valuep the row value + * @errors + * + * @snippet ex_data_source.c WT_EXTENSION metadata search + */ + int (*metadata_search)( + WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *key, char **valuep); + + /*! + * Update a row in the metadata by either inserting a new record or + * updating an existing record. + * + * @param wt_api the extension handle + * @param session the session handle (or NULL if none available) + * @param key row key + * @param value row value + * @errors + * + * @snippet ex_data_source.c WT_EXTENSION metadata update + */ + int (*metadata_update)( + WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *key, const char *value); + + /*! + * Pack a structure into a buffer. Deprecated in favor of stream + * based pack and unpack API. See WT_EXTENSION_API::pack_start for + * details. + * + * @param wt_api the extension handle + * @param session the session handle + * @param buffer a pointer to a packed byte array + * @param size the number of valid bytes in the buffer + * @param format the data format, see @ref packing + * @errors + */ + int (*struct_pack)(WT_EXTENSION_API *wt_api, WT_SESSION *session, void *buffer, size_t size, + const char *format, ...); + + /*! + * Calculate the size required to pack a structure. Deprecated in + * favor of stream based pack and unpack API. + * + * @param wt_api the extension handle + * @param session the session handle + * @param sizep a location where the number of bytes needed for the + * matching call to WT_EXTENSION_API::struct_pack is returned + * @param format the data format, see @ref packing + * @errors + */ + int (*struct_size)( + WT_EXTENSION_API *wt_api, WT_SESSION *session, size_t *sizep, const char *format, ...); + + /*! + * Unpack a structure from a buffer. Deprecated in favor of stream + * based pack and unpack API. See WT_EXTENSION_API::unpack_start for + * details. + * + * @param wt_api the extension handle + * @param session the session handle + * @param buffer a pointer to a packed byte array + * @param size the number of valid bytes in the buffer + * @param format the data format, see @ref packing + * @errors + */ + int (*struct_unpack)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const void *buffer, + size_t size, const char *format, ...); + + /* + * Streaming pack/unpack API. + */ + /*! + * Start a packing operation into a buffer. + * See ::wiredtiger_pack_start for details. + * + * @param session the session handle + * @param format the data format, see @ref packing + * @param buffer a pointer to memory to hold the packed data + * @param size the size of the buffer + * @param[out] psp the new packing stream handle + * @errors + */ + int (*pack_start)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *format, + void *buffer, size_t size, WT_PACK_STREAM **psp); + + /*! + * Start an unpacking operation from a buffer. + * See ::wiredtiger_unpack_start for details. + * + * @param session the session handle + * @param format the data format, see @ref packing + * @param buffer a pointer to memory holding the packed data + * @param size the size of the buffer + * @param[out] psp the new packing stream handle + * @errors + */ + int (*unpack_start)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const char *format, + const void *buffer, size_t size, WT_PACK_STREAM **psp); + + /*! + * Close a packing stream. + * + * @param ps the packing stream handle + * @param[out] usedp the number of bytes in the buffer used by the + * stream + * @errors + */ + int (*pack_close)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp); + + /*! + * Pack an item into a packing stream. + * + * @param ps the packing stream handle + * @param item an item to pack + * @errors + */ + int (*pack_item)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item); + + /*! + * Pack a signed integer into a packing stream. + * + * @param ps the packing stream handle + * @param i a signed integer to pack + * @errors + */ + int (*pack_int)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i); + + /*! + * Pack a string into a packing stream. + * + * @param ps the packing stream handle + * @param s a string to pack + * @errors + */ + int (*pack_str)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s); + + /*! + * Pack an unsigned integer into a packing stream. + * + * @param ps the packing stream handle + * @param u an unsigned integer to pack + * @errors + */ + int (*pack_uint)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u); + + /*! + * Unpack an item from a packing stream. + * + * @param ps the packing stream handle + * @param item an item to unpack + * @errors + */ + int (*unpack_item)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item); + + /*! + * Unpack a signed integer from a packing stream. + * + * @param ps the packing stream handle + * @param[out] ip the unpacked signed integer + * @errors + */ + int (*unpack_int)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip); + + /*! + * Unpack a string from a packing stream. + * + * @param ps the packing stream handle + * @param[out] sp the unpacked string + * @errors + */ + int (*unpack_str)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp); + + /*! + * Unpack an unsigned integer from a packing stream. + * + * @param ps the packing stream handle + * @param[out] up the unpacked unsigned integer + * @errors + */ + int (*unpack_uint)(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up); + + /*! + * Return the current transaction ID. + * + * @param wt_api the extension handle + * @param session the session handle + * @returns the current transaction ID. + * + * @snippet ex_data_source.c WT_EXTENSION transaction ID + */ + uint64_t (*transaction_id)(WT_EXTENSION_API *wt_api, WT_SESSION *session); + + /*! + * Return the current transaction's isolation level; returns one of + * ::WT_TXN_ISO_READ_COMMITTED, ::WT_TXN_ISO_READ_UNCOMMITTED, or + * ::WT_TXN_ISO_SNAPSHOT. + * + * @param wt_api the extension handle + * @param session the session handle + * @returns the current transaction's isolation level. + * + * @snippet ex_data_source.c WT_EXTENSION transaction isolation level + */ + int (*transaction_isolation_level)(WT_EXTENSION_API *wt_api, WT_SESSION *session); + + /*! + * Request notification of transaction resolution by specifying a + * function to be called when the session's current transaction is + * either committed or rolled back. If the transaction is being + * committed, but the notification function returns an error, the + * transaction will be rolled back. + * + * @param wt_api the extension handle + * @param session the session handle + * @param notify a handler for commit or rollback events + * @errors + * + * @snippet ex_data_source.c WT_EXTENSION transaction notify + */ + int (*transaction_notify)(WT_EXTENSION_API *wt_api, WT_SESSION *session, WT_TXN_NOTIFY *notify); + + /*! + * Return the oldest transaction ID not yet visible to a running + * transaction. + * + * @param wt_api the extension handle + * @param session the session handle + * @returns the oldest transaction ID not yet visible to a running + * transaction. + * + * @snippet ex_data_source.c WT_EXTENSION transaction oldest + */ + uint64_t (*transaction_oldest)(WT_EXTENSION_API *wt_api); + + /*! + * Return if the current transaction can see the given transaction ID. + * + * @param wt_api the extension handle + * @param session the session handle + * @param transaction_id the transaction ID + * @returns true (non-zero) if the transaction ID is visible to the + * current transaction. + * + * @snippet ex_data_source.c WT_EXTENSION transaction visible + */ + int (*transaction_visible)( + WT_EXTENSION_API *wt_api, WT_SESSION *session, uint64_t transaction_id); + + /*! + * @copydoc wiredtiger_version + */ + const char *(*version)(int *majorp, int *minorp, int *patchp); }; /*! diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index 5d2205f7718..3bc4f02c258 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -7,7 +7,7 @@ */ #ifndef __WT_INTERNAL_H -#define __WT_INTERNAL_H +#define __WT_INTERNAL_H #if defined(__cplusplus) extern "C" { @@ -57,7 +57,7 @@ extern "C" { #endif #include <time.h> #ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN #include <windows.h> #endif @@ -66,267 +66,267 @@ extern "C" { * Forward type declarations for internal types: BEGIN */ struct __wt_addr; - typedef struct __wt_addr WT_ADDR; +typedef struct __wt_addr WT_ADDR; struct __wt_async; - typedef struct __wt_async WT_ASYNC; +typedef struct __wt_async WT_ASYNC; struct __wt_async_cursor; - typedef struct __wt_async_cursor WT_ASYNC_CURSOR; +typedef struct __wt_async_cursor WT_ASYNC_CURSOR; struct __wt_async_format; - typedef struct __wt_async_format WT_ASYNC_FORMAT; +typedef struct __wt_async_format WT_ASYNC_FORMAT; struct __wt_async_op_impl; - typedef struct __wt_async_op_impl WT_ASYNC_OP_IMPL; +typedef struct __wt_async_op_impl WT_ASYNC_OP_IMPL; struct __wt_async_worker_state; - typedef struct __wt_async_worker_state WT_ASYNC_WORKER_STATE; +typedef struct __wt_async_worker_state WT_ASYNC_WORKER_STATE; struct __wt_block; - typedef struct __wt_block WT_BLOCK; +typedef struct __wt_block WT_BLOCK; struct __wt_block_ckpt; - typedef struct __wt_block_ckpt WT_BLOCK_CKPT; +typedef struct __wt_block_ckpt WT_BLOCK_CKPT; struct __wt_block_desc; - typedef struct __wt_block_desc WT_BLOCK_DESC; +typedef struct __wt_block_desc WT_BLOCK_DESC; struct __wt_block_header; - typedef struct __wt_block_header WT_BLOCK_HEADER; +typedef struct __wt_block_header WT_BLOCK_HEADER; struct __wt_bloom; - typedef struct __wt_bloom WT_BLOOM; +typedef struct __wt_bloom WT_BLOOM; struct __wt_bloom_hash; - typedef struct __wt_bloom_hash WT_BLOOM_HASH; +typedef struct __wt_bloom_hash WT_BLOOM_HASH; struct __wt_bm; - typedef struct __wt_bm WT_BM; +typedef struct __wt_bm WT_BM; struct __wt_btree; - typedef struct __wt_btree WT_BTREE; +typedef struct __wt_btree WT_BTREE; struct __wt_cache; - typedef struct __wt_cache WT_CACHE; +typedef struct __wt_cache WT_CACHE; struct __wt_cache_pool; - typedef struct __wt_cache_pool WT_CACHE_POOL; +typedef struct __wt_cache_pool WT_CACHE_POOL; struct __wt_capacity; - typedef struct __wt_capacity WT_CAPACITY; +typedef struct __wt_capacity WT_CAPACITY; struct __wt_cell; - typedef struct __wt_cell WT_CELL; +typedef struct __wt_cell WT_CELL; struct __wt_cell_unpack; - typedef struct __wt_cell_unpack WT_CELL_UNPACK; +typedef struct __wt_cell_unpack WT_CELL_UNPACK; struct __wt_ckpt; - typedef struct __wt_ckpt WT_CKPT; +typedef struct __wt_ckpt WT_CKPT; struct __wt_col; - typedef struct __wt_col WT_COL; +typedef struct __wt_col WT_COL; struct __wt_col_rle; - typedef struct __wt_col_rle WT_COL_RLE; +typedef struct __wt_col_rle WT_COL_RLE; struct __wt_col_var_repeat; - typedef struct __wt_col_var_repeat WT_COL_VAR_REPEAT; +typedef struct __wt_col_var_repeat WT_COL_VAR_REPEAT; struct __wt_colgroup; - typedef struct __wt_colgroup WT_COLGROUP; +typedef struct __wt_colgroup WT_COLGROUP; struct __wt_compact_state; - typedef struct __wt_compact_state WT_COMPACT_STATE; +typedef struct __wt_compact_state WT_COMPACT_STATE; struct __wt_condvar; - typedef struct __wt_condvar WT_CONDVAR; +typedef struct __wt_condvar WT_CONDVAR; struct __wt_config; - typedef struct __wt_config WT_CONFIG; +typedef struct __wt_config WT_CONFIG; struct __wt_config_check; - typedef struct __wt_config_check WT_CONFIG_CHECK; +typedef struct __wt_config_check WT_CONFIG_CHECK; struct __wt_config_entry; - typedef struct __wt_config_entry WT_CONFIG_ENTRY; +typedef struct __wt_config_entry WT_CONFIG_ENTRY; struct __wt_config_parser_impl; - typedef struct __wt_config_parser_impl WT_CONFIG_PARSER_IMPL; +typedef struct __wt_config_parser_impl WT_CONFIG_PARSER_IMPL; struct __wt_connection_impl; - typedef struct __wt_connection_impl WT_CONNECTION_IMPL; +typedef struct __wt_connection_impl WT_CONNECTION_IMPL; struct __wt_connection_stats; - typedef struct __wt_connection_stats WT_CONNECTION_STATS; +typedef struct __wt_connection_stats WT_CONNECTION_STATS; struct __wt_cursor_backup; - typedef struct __wt_cursor_backup WT_CURSOR_BACKUP; +typedef struct __wt_cursor_backup WT_CURSOR_BACKUP; struct __wt_cursor_btree; - typedef struct __wt_cursor_btree WT_CURSOR_BTREE; +typedef struct __wt_cursor_btree WT_CURSOR_BTREE; struct __wt_cursor_bulk; - typedef struct __wt_cursor_bulk WT_CURSOR_BULK; +typedef struct __wt_cursor_bulk WT_CURSOR_BULK; struct __wt_cursor_config; - typedef struct __wt_cursor_config WT_CURSOR_CONFIG; +typedef struct __wt_cursor_config WT_CURSOR_CONFIG; struct __wt_cursor_data_source; - typedef struct __wt_cursor_data_source WT_CURSOR_DATA_SOURCE; +typedef struct __wt_cursor_data_source WT_CURSOR_DATA_SOURCE; struct __wt_cursor_dump; - typedef struct __wt_cursor_dump WT_CURSOR_DUMP; +typedef struct __wt_cursor_dump WT_CURSOR_DUMP; struct __wt_cursor_index; - typedef struct __wt_cursor_index WT_CURSOR_INDEX; +typedef struct __wt_cursor_index WT_CURSOR_INDEX; struct __wt_cursor_join; - typedef struct __wt_cursor_join WT_CURSOR_JOIN; +typedef struct __wt_cursor_join WT_CURSOR_JOIN; struct __wt_cursor_join_endpoint; - typedef struct __wt_cursor_join_endpoint WT_CURSOR_JOIN_ENDPOINT; +typedef struct __wt_cursor_join_endpoint WT_CURSOR_JOIN_ENDPOINT; struct __wt_cursor_join_entry; - typedef struct __wt_cursor_join_entry WT_CURSOR_JOIN_ENTRY; +typedef struct __wt_cursor_join_entry WT_CURSOR_JOIN_ENTRY; struct __wt_cursor_join_iter; - typedef struct __wt_cursor_join_iter WT_CURSOR_JOIN_ITER; +typedef struct __wt_cursor_join_iter WT_CURSOR_JOIN_ITER; struct __wt_cursor_json; - typedef struct __wt_cursor_json WT_CURSOR_JSON; +typedef struct __wt_cursor_json WT_CURSOR_JSON; struct __wt_cursor_log; - typedef struct __wt_cursor_log WT_CURSOR_LOG; +typedef struct __wt_cursor_log WT_CURSOR_LOG; struct __wt_cursor_lsm; - typedef struct __wt_cursor_lsm WT_CURSOR_LSM; +typedef struct __wt_cursor_lsm WT_CURSOR_LSM; struct __wt_cursor_metadata; - typedef struct __wt_cursor_metadata WT_CURSOR_METADATA; +typedef struct __wt_cursor_metadata WT_CURSOR_METADATA; struct __wt_cursor_stat; - typedef struct __wt_cursor_stat WT_CURSOR_STAT; +typedef struct __wt_cursor_stat WT_CURSOR_STAT; struct __wt_cursor_table; - typedef struct __wt_cursor_table WT_CURSOR_TABLE; +typedef struct __wt_cursor_table WT_CURSOR_TABLE; struct __wt_data_handle; - typedef struct __wt_data_handle WT_DATA_HANDLE; +typedef struct __wt_data_handle WT_DATA_HANDLE; struct __wt_data_handle_cache; - typedef struct __wt_data_handle_cache WT_DATA_HANDLE_CACHE; +typedef struct __wt_data_handle_cache WT_DATA_HANDLE_CACHE; struct __wt_dlh; - typedef struct __wt_dlh WT_DLH; +typedef struct __wt_dlh WT_DLH; struct __wt_dsrc_stats; - typedef struct __wt_dsrc_stats WT_DSRC_STATS; +typedef struct __wt_dsrc_stats WT_DSRC_STATS; struct __wt_evict_entry; - typedef struct __wt_evict_entry WT_EVICT_ENTRY; +typedef struct __wt_evict_entry WT_EVICT_ENTRY; struct __wt_evict_queue; - typedef struct __wt_evict_queue WT_EVICT_QUEUE; +typedef struct __wt_evict_queue WT_EVICT_QUEUE; struct __wt_ext; - typedef struct __wt_ext WT_EXT; +typedef struct __wt_ext WT_EXT; struct __wt_extlist; - typedef struct __wt_extlist WT_EXTLIST; +typedef struct __wt_extlist WT_EXTLIST; struct __wt_fh; - typedef struct __wt_fh WT_FH; +typedef struct __wt_fh WT_FH; struct __wt_file_handle_inmem; - typedef struct __wt_file_handle_inmem WT_FILE_HANDLE_INMEM; +typedef struct __wt_file_handle_inmem WT_FILE_HANDLE_INMEM; struct __wt_file_handle_posix; - typedef struct __wt_file_handle_posix WT_FILE_HANDLE_POSIX; +typedef struct __wt_file_handle_posix WT_FILE_HANDLE_POSIX; struct __wt_file_handle_win; - typedef struct __wt_file_handle_win WT_FILE_HANDLE_WIN; +typedef struct __wt_file_handle_win WT_FILE_HANDLE_WIN; struct __wt_fstream; - typedef struct __wt_fstream WT_FSTREAM; +typedef struct __wt_fstream WT_FSTREAM; struct __wt_hazard; - typedef struct __wt_hazard WT_HAZARD; +typedef struct __wt_hazard WT_HAZARD; struct __wt_ikey; - typedef struct __wt_ikey WT_IKEY; +typedef struct __wt_ikey WT_IKEY; struct __wt_index; - typedef struct __wt_index WT_INDEX; +typedef struct __wt_index WT_INDEX; struct __wt_insert; - typedef struct __wt_insert WT_INSERT; +typedef struct __wt_insert WT_INSERT; struct __wt_insert_head; - typedef struct __wt_insert_head WT_INSERT_HEAD; +typedef struct __wt_insert_head WT_INSERT_HEAD; struct __wt_join_stats; - typedef struct __wt_join_stats WT_JOIN_STATS; +typedef struct __wt_join_stats WT_JOIN_STATS; struct __wt_join_stats_group; - typedef struct __wt_join_stats_group WT_JOIN_STATS_GROUP; +typedef struct __wt_join_stats_group WT_JOIN_STATS_GROUP; struct __wt_keyed_encryptor; - typedef struct __wt_keyed_encryptor WT_KEYED_ENCRYPTOR; +typedef struct __wt_keyed_encryptor WT_KEYED_ENCRYPTOR; struct __wt_log; - typedef struct __wt_log WT_LOG; +typedef struct __wt_log WT_LOG; struct __wt_log_desc; - typedef struct __wt_log_desc WT_LOG_DESC; +typedef struct __wt_log_desc WT_LOG_DESC; struct __wt_log_op_desc; - typedef struct __wt_log_op_desc WT_LOG_OP_DESC; +typedef struct __wt_log_op_desc WT_LOG_OP_DESC; struct __wt_log_rec_desc; - typedef struct __wt_log_rec_desc WT_LOG_REC_DESC; +typedef struct __wt_log_rec_desc WT_LOG_REC_DESC; struct __wt_log_record; - typedef struct __wt_log_record WT_LOG_RECORD; +typedef struct __wt_log_record WT_LOG_RECORD; struct __wt_logslot; - typedef struct __wt_logslot WT_LOGSLOT; +typedef struct __wt_logslot WT_LOGSLOT; struct __wt_lsm_chunk; - typedef struct __wt_lsm_chunk WT_LSM_CHUNK; +typedef struct __wt_lsm_chunk WT_LSM_CHUNK; struct __wt_lsm_cursor_chunk; - typedef struct __wt_lsm_cursor_chunk WT_LSM_CURSOR_CHUNK; +typedef struct __wt_lsm_cursor_chunk WT_LSM_CURSOR_CHUNK; struct __wt_lsm_data_source; - typedef struct __wt_lsm_data_source WT_LSM_DATA_SOURCE; +typedef struct __wt_lsm_data_source WT_LSM_DATA_SOURCE; struct __wt_lsm_manager; - typedef struct __wt_lsm_manager WT_LSM_MANAGER; +typedef struct __wt_lsm_manager WT_LSM_MANAGER; struct __wt_lsm_tree; - typedef struct __wt_lsm_tree WT_LSM_TREE; +typedef struct __wt_lsm_tree WT_LSM_TREE; struct __wt_lsm_work_unit; - typedef struct __wt_lsm_work_unit WT_LSM_WORK_UNIT; +typedef struct __wt_lsm_work_unit WT_LSM_WORK_UNIT; struct __wt_lsm_worker_args; - typedef struct __wt_lsm_worker_args WT_LSM_WORKER_ARGS; +typedef struct __wt_lsm_worker_args WT_LSM_WORKER_ARGS; struct __wt_lsm_worker_cookie; - typedef struct __wt_lsm_worker_cookie WT_LSM_WORKER_COOKIE; +typedef struct __wt_lsm_worker_cookie WT_LSM_WORKER_COOKIE; struct __wt_multi; - typedef struct __wt_multi WT_MULTI; +typedef struct __wt_multi WT_MULTI; struct __wt_myslot; - typedef struct __wt_myslot WT_MYSLOT; +typedef struct __wt_myslot WT_MYSLOT; struct __wt_named_collator; - typedef struct __wt_named_collator WT_NAMED_COLLATOR; +typedef struct __wt_named_collator WT_NAMED_COLLATOR; struct __wt_named_compressor; - typedef struct __wt_named_compressor WT_NAMED_COMPRESSOR; +typedef struct __wt_named_compressor WT_NAMED_COMPRESSOR; struct __wt_named_data_source; - typedef struct __wt_named_data_source WT_NAMED_DATA_SOURCE; +typedef struct __wt_named_data_source WT_NAMED_DATA_SOURCE; struct __wt_named_encryptor; - typedef struct __wt_named_encryptor WT_NAMED_ENCRYPTOR; +typedef struct __wt_named_encryptor WT_NAMED_ENCRYPTOR; struct __wt_named_extractor; - typedef struct __wt_named_extractor WT_NAMED_EXTRACTOR; +typedef struct __wt_named_extractor WT_NAMED_EXTRACTOR; struct __wt_named_snapshot; - typedef struct __wt_named_snapshot WT_NAMED_SNAPSHOT; +typedef struct __wt_named_snapshot WT_NAMED_SNAPSHOT; struct __wt_optrack_header; - typedef struct __wt_optrack_header WT_OPTRACK_HEADER; +typedef struct __wt_optrack_header WT_OPTRACK_HEADER; struct __wt_optrack_record; - typedef struct __wt_optrack_record WT_OPTRACK_RECORD; +typedef struct __wt_optrack_record WT_OPTRACK_RECORD; struct __wt_ovfl_reuse; - typedef struct __wt_ovfl_reuse WT_OVFL_REUSE; +typedef struct __wt_ovfl_reuse WT_OVFL_REUSE; struct __wt_ovfl_track; - typedef struct __wt_ovfl_track WT_OVFL_TRACK; +typedef struct __wt_ovfl_track WT_OVFL_TRACK; struct __wt_page; - typedef struct __wt_page WT_PAGE; +typedef struct __wt_page WT_PAGE; struct __wt_page_deleted; - typedef struct __wt_page_deleted WT_PAGE_DELETED; +typedef struct __wt_page_deleted WT_PAGE_DELETED; struct __wt_page_header; - typedef struct __wt_page_header WT_PAGE_HEADER; +typedef struct __wt_page_header WT_PAGE_HEADER; struct __wt_page_index; - typedef struct __wt_page_index WT_PAGE_INDEX; +typedef struct __wt_page_index WT_PAGE_INDEX; struct __wt_page_lookaside; - typedef struct __wt_page_lookaside WT_PAGE_LOOKASIDE; +typedef struct __wt_page_lookaside WT_PAGE_LOOKASIDE; struct __wt_page_modify; - typedef struct __wt_page_modify WT_PAGE_MODIFY; +typedef struct __wt_page_modify WT_PAGE_MODIFY; struct __wt_process; - typedef struct __wt_process WT_PROCESS; +typedef struct __wt_process WT_PROCESS; struct __wt_rec_chunk; - typedef struct __wt_rec_chunk WT_REC_CHUNK; +typedef struct __wt_rec_chunk WT_REC_CHUNK; struct __wt_rec_dictionary; - typedef struct __wt_rec_dictionary WT_REC_DICTIONARY; +typedef struct __wt_rec_dictionary WT_REC_DICTIONARY; struct __wt_rec_kv; - typedef struct __wt_rec_kv WT_REC_KV; +typedef struct __wt_rec_kv WT_REC_KV; struct __wt_reconcile; - typedef struct __wt_reconcile WT_RECONCILE; +typedef struct __wt_reconcile WT_RECONCILE; struct __wt_ref; - typedef struct __wt_ref WT_REF; +typedef struct __wt_ref WT_REF; struct __wt_ref_hist; - typedef struct __wt_ref_hist WT_REF_HIST; +typedef struct __wt_ref_hist WT_REF_HIST; struct __wt_row; - typedef struct __wt_row WT_ROW; +typedef struct __wt_row WT_ROW; struct __wt_rwlock; - typedef struct __wt_rwlock WT_RWLOCK; +typedef struct __wt_rwlock WT_RWLOCK; struct __wt_salvage_cookie; - typedef struct __wt_salvage_cookie WT_SALVAGE_COOKIE; +typedef struct __wt_salvage_cookie WT_SALVAGE_COOKIE; struct __wt_save_upd; - typedef struct __wt_save_upd WT_SAVE_UPD; +typedef struct __wt_save_upd WT_SAVE_UPD; struct __wt_scratch_track; - typedef struct __wt_scratch_track WT_SCRATCH_TRACK; +typedef struct __wt_scratch_track WT_SCRATCH_TRACK; struct __wt_session_impl; - typedef struct __wt_session_impl WT_SESSION_IMPL; +typedef struct __wt_session_impl WT_SESSION_IMPL; struct __wt_session_stash; - typedef struct __wt_session_stash WT_SESSION_STASH; +typedef struct __wt_session_stash WT_SESSION_STASH; struct __wt_session_stats; - typedef struct __wt_session_stats WT_SESSION_STATS; +typedef struct __wt_session_stats WT_SESSION_STATS; struct __wt_size; - typedef struct __wt_size WT_SIZE; +typedef struct __wt_size WT_SIZE; struct __wt_spinlock; - typedef struct __wt_spinlock WT_SPINLOCK; +typedef struct __wt_spinlock WT_SPINLOCK; struct __wt_stash; - typedef struct __wt_stash WT_STASH; +typedef struct __wt_stash WT_STASH; struct __wt_table; - typedef struct __wt_table WT_TABLE; +typedef struct __wt_table WT_TABLE; struct __wt_thread; - typedef struct __wt_thread WT_THREAD; +typedef struct __wt_thread WT_THREAD; struct __wt_thread_group; - typedef struct __wt_thread_group WT_THREAD_GROUP; +typedef struct __wt_thread_group WT_THREAD_GROUP; struct __wt_txn; - typedef struct __wt_txn WT_TXN; +typedef struct __wt_txn WT_TXN; struct __wt_txn_global; - typedef struct __wt_txn_global WT_TXN_GLOBAL; +typedef struct __wt_txn_global WT_TXN_GLOBAL; struct __wt_txn_op; - typedef struct __wt_txn_op WT_TXN_OP; +typedef struct __wt_txn_op WT_TXN_OP; struct __wt_txn_printlog_args; - typedef struct __wt_txn_printlog_args WT_TXN_PRINTLOG_ARGS; +typedef struct __wt_txn_printlog_args WT_TXN_PRINTLOG_ARGS; struct __wt_txn_state; - typedef struct __wt_txn_state WT_TXN_STATE; +typedef struct __wt_txn_state WT_TXN_STATE; struct __wt_update; - typedef struct __wt_update WT_UPDATE; +typedef struct __wt_update WT_UPDATE; union __wt_lsn; - typedef union __wt_lsn WT_LSN; +typedef union __wt_lsn WT_LSN; union __wt_rand_state; - typedef union __wt_rand_state WT_RAND_STATE; +typedef union __wt_rand_state WT_RAND_STATE; typedef uint64_t wt_timestamp_t; @@ -346,12 +346,12 @@ typedef uint64_t wt_timestamp_t; #include "msvc.h" #endif /* - * GLIBC 2.26 and later use the openat syscall to implement open. - * Set this flag so that our strace tests know to expect this. + * GLIBC 2.26 and later use the openat syscall to implement open. Set this flag so that our strace + * tests know to expect this. */ #ifdef __GLIBC_PREREQ #if __GLIBC_PREREQ(2, 26) -#define WT_USE_OPENAT 1 +#define WT_USE_OPENAT 1 #endif #endif @@ -369,8 +369,8 @@ typedef uint64_t wt_timestamp_t; #include "misc.h" #include "mutex.h" -#include "stat.h" /* required by dhandle.h */ -#include "dhandle.h" /* required by btree.h */ +#include "stat.h" /* required by dhandle.h */ +#include "dhandle.h" /* required by btree.h */ #include "api.h" #include "async.h" @@ -396,7 +396,7 @@ typedef uint64_t wt_timestamp_t; #include "thread_group.h" #include "txn.h" -#include "session.h" /* required by connection.h */ +#include "session.h" /* required by connection.h */ #include "connection.h" #include "extern.h" @@ -407,19 +407,19 @@ typedef uint64_t wt_timestamp_t; #endif #include "verify_build.h" -#include "cache.i" /* required by misc.i */ -#include "ctype.i" /* required by packing.i */ -#include "intpack.i" /* required by cell.i, packing.i */ -#include "misc.i" /* required by mutex.i */ +#include "cache.i" /* required by misc.i */ +#include "ctype.i" /* required by packing.i */ +#include "intpack.i" /* required by cell.i, packing.i */ +#include "misc.i" /* required by mutex.i */ -#include "buf.i" /* required by cell.i */ -#include "cell.i" /* required by btree.i */ -#include "mutex.i" /* required by btree.i */ -#include "txn.i" /* required by btree.i */ +#include "buf.i" /* required by cell.i */ +#include "cell.i" /* required by btree.i */ +#include "mutex.i" /* required by btree.i */ +#include "txn.i" /* required by btree.i */ #include "bitstring.i" #include "block.i" -#include "btree.i" /* required by cursor.i */ +#include "btree.i" /* required by cursor.i */ #include "btree_cmp.i" #include "column.i" #include "cursor.i" @@ -434,4 +434,4 @@ typedef uint64_t wt_timestamp_t; #if defined(__cplusplus) } #endif -#endif /* !__WT_INTERNAL_H */ +#endif /* !__WT_INTERNAL_H */ |