diff options
author | Ramon Fernandez <ramon.fernandez@mongodb.com> | 2015-04-27 11:33:41 -0400 |
---|---|---|
committer | Ramon Fernandez <ramon.fernandez@mongodb.com> | 2015-04-27 11:33:47 -0400 |
commit | 34b853ed0566a3bbffcd825191b9000de331bddc (patch) | |
tree | 06f3f81436489f252da15900b2fef169e7ca1bde /src/third_party/wiredtiger/src/include | |
parent | 076cd926ab476f872afdd89a0e5e7e733d26c3ae (diff) | |
download | mongo-34b853ed0566a3bbffcd825191b9000de331bddc.tar.gz |
SERVER-18199 Import wiredtiger-wiredtiger-2.5.3-371-g1f44c05.tar.gz from wiredtiger branch mongodb-3.2
Diffstat (limited to 'src/third_party/wiredtiger/src/include')
23 files changed, 629 insertions, 471 deletions
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h index 70068e32b9b..8f8fd8e98b1 100644 --- a/src/third_party/wiredtiger/src/include/api.h +++ b/src/third_party/wiredtiger/src/include/api.h @@ -24,9 +24,9 @@ { WT_CONFIG_BASE(s, h##_##n), config, NULL }; \ API_SESSION_INIT(s, h, n, cur, dh); \ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ - WT_ERR(((config) != NULL) ? \ - __wt_config_check((s), \ - WT_CONFIG_REF(session, h##_##n), (config), 0) : 0); \ + if ((config) != NULL) \ + WT_ERR(__wt_config_check((s), \ + WT_CONFIG_REF(session, h##_##n), (config), 0)); \ WT_ERR(__wt_verbose((s), WT_VERB_API, "CALL: " #h ":" #n)) #define API_END(s, ret) \ @@ -96,29 +96,29 @@ #define CONNECTION_API_CALL(conn, s, n, config, cfg) \ s = (conn)->default_session; \ - API_CALL(s, connection, n, NULL, NULL, config, cfg) + API_CALL(s, WT_CONNECTION, n, NULL, NULL, config, cfg) #define CONNECTION_API_CALL_NOCONF(conn, s, n) \ s = (conn)->default_session; \ - API_CALL_NOCONF(s, connection, n, NULL, NULL) + API_CALL_NOCONF(s, WT_CONNECTION, n, NULL, NULL) #define SESSION_API_CALL(s, n, config, cfg) \ - API_CALL(s, session, n, NULL, NULL, config, cfg) + API_CALL(s, WT_SESSION, n, NULL, NULL, config, cfg) #define SESSION_API_CALL_NOCONF(s, n) \ - API_CALL_NOCONF(s, session, n, NULL, NULL) + API_CALL_NOCONF(s, WT_SESSION, n, NULL, NULL) #define SESSION_TXN_API_CALL(s, n, config, cfg) \ - TXN_API_CALL(s, session, n, NULL, NULL, config, cfg) + TXN_API_CALL(s, WT_SESSION, n, NULL, NULL, config, cfg) #define CURSOR_API_CALL(cur, s, n, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - API_CALL_NOCONF(s, cursor, n, cur, \ + API_CALL_NOCONF(s, WT_CURSOR, n, cur, \ ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle) #define CURSOR_UPDATE_API_CALL(cur, s, n, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - TXN_API_CALL_NOCONF(s, cursor, n, cur, \ + TXN_API_CALL_NOCONF(s, WT_CURSOR, n, cur, \ ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle) #define CURSOR_UPDATE_API_END(s, ret) \ diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index cda672bc7b4..e9b6b5a1d6e 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -412,8 +412,17 @@ struct __wt_page { /* * Macros to copy/set the index because the name is obscured to ensure * the field isn't read multiple times. + * + * There are two versions of WT_INTL_INDEX_GET because the session split + * generation is usually set, but it's not always required: for example, + * if a page is locked for splitting, or being created or destroyed. */ -#define WT_INTL_INDEX_COPY(page) ((page)->u.intl.__index) +#define WT_INTL_INDEX_GET_SAFE(page) \ + ((page)->u.intl.__index) +#define WT_INTL_INDEX_GET(session, page, pindex) do { \ + WT_ASSERT(session, session->split_gen != 0); \ + (pindex) = WT_INTL_INDEX_GET_SAFE(page); \ +} while (0) #define WT_INTL_INDEX_SET(page, v) do { \ WT_WRITE_BARRIER(); \ ((page)->u.intl.__index) = (v); \ @@ -421,21 +430,15 @@ struct __wt_page { /* * Macro to walk the list of references in an internal page. - * Two flavors: by default, check that we have a split_gen, but - * provide a "SAFE" version for code that can safely read the - * page index without a split_gen. */ -#define WT_INTL_FOREACH_BEGIN_SAFE(session, page, ref) do { \ +#define WT_INTL_FOREACH_BEGIN(session, page, ref) do { \ WT_PAGE_INDEX *__pindex; \ WT_REF **__refp; \ uint32_t __entries; \ - for (__pindex = WT_INTL_INDEX_COPY(page), \ - __refp = __pindex->index, \ + WT_INTL_INDEX_GET(session, page, __pindex); \ + for (__refp = __pindex->index, \ __entries = __pindex->entries; __entries > 0; --__entries) {\ (ref) = *__refp++; -#define WT_INTL_FOREACH_BEGIN(session, page, ref) \ - WT_ASSERT(session, session->split_gen != 0); \ - WT_INTL_FOREACH_BEGIN_SAFE(session, page, ref) #define WT_INTL_FOREACH_END \ } \ } while (0) diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index cc571124207..44ec40364cc 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -144,8 +144,8 @@ struct __wt_btree { /* Flags values up to 0xff are reserved for WT_DHANDLE_* */ #define WT_BTREE_BULK 0x00100 /* Bulk-load handle */ -#define WT_BTREE_NO_EVICTION 0x00200 /* Disable eviction */ -#define WT_BTREE_NO_HAZARD 0x00400 /* Disable hazard pointers */ +#define WT_BTREE_IN_MEMORY 0x00200 /* Cache-resident object */ +#define WT_BTREE_NO_EVICTION 0x00400 /* Disable eviction */ #define WT_BTREE_SALVAGE 0x00800 /* Handle is for salvage */ #define WT_BTREE_UPGRADE 0x01000 /* Handle is for upgrade */ #define WT_BTREE_VERIFY 0x02000 /* Handle is for verify */ diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 7d9a3095a0c..9038dab2b34 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -279,13 +279,11 @@ __wt_page_refp(WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex; uint32_t i; - WT_ASSERT(session, session->split_gen != 0); - /* * Copy the parent page's index value: the page can split at any time, * but the index's value is always valid, even if it's not up-to-date. */ -retry: pindex = WT_INTL_INDEX_COPY(ref->home); +retry: WT_INTL_INDEX_GET(session, ref->home, pindex); /* * Use the page's reference hint: it should be correct unless the page @@ -967,10 +965,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits) if (mod == NULL) return (1); - /* Skip pages that are already being evicted. */ - if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)) - return (0); - /* * If the tree was deepened, there's a requirement that newly created * internal pages not be evicted until all threads are known to have @@ -1098,7 +1092,13 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) */ if (ref == NULL || __wt_ref_is_root(ref)) return (0); - page = ref->page; + + /* + * If hazard pointers aren't necessary for this file, we can't be + * evicting, we're done. + */ + if (F_ISSET(btree, WT_BTREE_IN_MEMORY)) + return (0); /* * Attempt to evict pages with the special "oldest" read generation. @@ -1112,9 +1112,10 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) * it contains an update that isn't stable. Also skip forced eviction * if we just did an in-memory split. */ - if (page->read_gen != WT_READGEN_OLDEST || + page = ref->page; + if (F_ISSET(btree, WT_BTREE_NO_EVICTION) || LF_ISSET(WT_READ_NO_EVICT) || - F_ISSET(btree, WT_BTREE_NO_EVICTION) || + page->read_gen != WT_READGEN_OLDEST || !__wt_page_can_evict(session, page, 1)) return (__wt_hazard_clear(session, page)); @@ -1229,13 +1230,13 @@ __wt_skip_choose_depth(WT_SESSION_IMPL *session) } /* - * __wt_btree_size_overflow -- - * Check if the size of an in-memory tree with a single leaf page is over + * __wt_btree_lsm_size -- + * Return if the size of an in-memory tree with a single leaf page is over * a specified maximum. If called on anything other than a simple tree with a - * single leaf page, returns true so the calling code will switch to a new tree. + * single leaf page, returns true so our LSM caller will switch to a new tree. */ static inline int -__wt_btree_size_overflow(WT_SESSION_IMPL *session, uint64_t maxsize) +__wt_btree_lsm_size(WT_SESSION_IMPL *session, uint64_t maxsize) { WT_BTREE *btree; WT_PAGE *child, *root; @@ -1254,7 +1255,7 @@ __wt_btree_size_overflow(WT_SESSION_IMPL *session, uint64_t maxsize) return (1); /* Check for a tree with a single leaf page. */ - pindex = WT_INTL_INDEX_COPY(root); + WT_INTL_INDEX_GET(session, root, pindex); if (pindex->entries != 1) /* > 1 child page, switch */ return (1); @@ -1273,104 +1274,3 @@ __wt_btree_size_overflow(WT_SESSION_IMPL *session, uint64_t maxsize) return (child->memory_footprint > maxsize); } - -/* - * __wt_lex_compare -- - * Lexicographic comparison routine. - * - * Returns: - * < 0 if user_item is lexicographically < tree_item - * = 0 if user_item is lexicographically = tree_item - * > 0 if user_item is lexicographically > tree_item - * - * We use the names "user" and "tree" so it's clear in the btree code which - * the application is looking at when we call its comparison func. - */ -static inline int -__wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item) -{ - const uint8_t *userp, *treep; - size_t len, usz, tsz; - - usz = user_item->size; - tsz = tree_item->size; - len = WT_MIN(usz, tsz); - - for (userp = user_item->data, treep = tree_item->data; - len > 0; - --len, ++userp, ++treep) - if (*userp != *treep) - return (*userp < *treep ? -1 : 1); - - /* Contents are equal up to the smallest length. */ - return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1); -} - -/* - * __wt_compare -- - * The same as __wt_lex_compare, but using the application's collator - * function when configured. - */ -static inline int -__wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator, - const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp) -{ - if (collator == NULL) { - *cmpp = __wt_lex_compare(user_item, tree_item); - return (0); - } - return (collator->compare( - collator, &session->iface, user_item, tree_item, cmpp)); -} - -/* - * __wt_lex_compare_skip -- - * Lexicographic comparison routine, skipping leading bytes. - * - * Returns: - * < 0 if user_item is lexicographically < tree_item - * = 0 if user_item is lexicographically = tree_item - * > 0 if user_item is lexicographically > tree_item - * - * We use the names "user" and "tree" so it's clear in the btree code which - * the application is looking at when we call its comparison func. - */ -static inline int -__wt_lex_compare_skip( - const WT_ITEM *user_item, const WT_ITEM *tree_item, size_t *matchp) -{ - const uint8_t *userp, *treep; - size_t len, usz, tsz; - - usz = user_item->size; - tsz = tree_item->size; - len = WT_MIN(usz, tsz) - *matchp; - - for (userp = (uint8_t *)user_item->data + *matchp, - treep = (uint8_t *)tree_item->data + *matchp; - len > 0; - --len, ++userp, ++treep, ++*matchp) - if (*userp != *treep) - return (*userp < *treep ? -1 : 1); - - /* Contents are equal up to the smallest length. */ - return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1); -} - -/* - * __wt_compare_skip -- - * The same as __wt_lex_compare_skip, but using the application's collator - * function when configured. - */ -static inline int -__wt_compare_skip(WT_SESSION_IMPL *session, WT_COLLATOR *collator, - const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp, - size_t *matchp) -{ - if (collator == NULL) { - *cmpp = __wt_lex_compare_skip(user_item, tree_item, matchp); - return (0); - } - return (collator->compare( - collator, &session->iface, user_item, tree_item, cmpp)); -} diff --git a/src/third_party/wiredtiger/src/include/btree_cmp.i b/src/third_party/wiredtiger/src/include/btree_cmp.i new file mode 100644 index 00000000000..76f1ad4317a --- /dev/null +++ b/src/third_party/wiredtiger/src/include/btree_cmp.i @@ -0,0 +1,190 @@ +/*- + * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#ifdef HAVE_X86INTRIN_H +#if !defined(_MSC_VER) +#include <x86intrin.h> +#endif + /* 16B alignment */ +#define WT_ALIGNED_16(p) (((uintptr_t)(p) & 0x0f) == 0) +#define WT_VECTOR_SIZE 16 /* chunk size */ +#endif + +/* + * __wt_lex_compare -- + * Lexicographic comparison routine. + * + * Returns: + * < 0 if user_item is lexicographically < tree_item + * = 0 if user_item is lexicographically = tree_item + * > 0 if user_item is lexicographically > tree_item + * + * We use the names "user" and "tree" so it's clear in the btree code which + * the application is looking at when we call its comparison function. + */ +static inline int +__wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item) +{ + size_t len, usz, tsz; + const uint8_t *userp, *treep; + + usz = user_item->size; + tsz = tree_item->size; + len = WT_MIN(usz, tsz); + + userp = user_item->data; + treep = tree_item->data; + +#ifdef HAVE_X86INTRIN_H + /* Use vector instructions if we'll execute at least 2 of them. */ + if (len >= WT_VECTOR_SIZE * 2) { + size_t remain; + __m128i res_eq, u, t; + + remain = len % WT_VECTOR_SIZE; + len -= remain; + if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep)) + for (; len > 0; + len -= WT_VECTOR_SIZE, + userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { + u = _mm_load_si128((__m128i *)userp); + t = _mm_load_si128((__m128i *)treep); + res_eq = _mm_cmpeq_epi8(u, t); + if (_mm_movemask_epi8(res_eq) != 65535) + break; + } + else + for (; len > 0; + len -= WT_VECTOR_SIZE, + userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) { + u = _mm_loadu_si128((__m128i *)userp); + t = _mm_loadu_si128((__m128i *)treep); + res_eq = _mm_cmpeq_epi8(u, t); + if (_mm_movemask_epi8(res_eq) != 65535) + break; + } + len += remain; + } +#endif + /* + * Use the non-vectorized version for the remaining bytes and for the + * small key sizes. + */ + for (; len > 0; --len, ++userp, ++treep) + if (*userp != *treep) + return (*userp < *treep ? -1 : 1); + + /* Contents are equal up to the smallest length. */ + return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1); +} + +/* + * __wt_compare -- + * The same as __wt_lex_compare, but using the application's collator + * function when configured. + */ +static inline int +__wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator, + const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp) +{ + if (collator == NULL) { + *cmpp = __wt_lex_compare(user_item, tree_item); + return (0); + } + return (collator->compare( + collator, &session->iface, user_item, tree_item, cmpp)); +} + +/* + * __wt_lex_compare_skip -- + * Lexicographic comparison routine, skipping leading bytes. + * + * Returns: + * < 0 if user_item is lexicographically < tree_item + * = 0 if user_item is lexicographically = tree_item + * > 0 if user_item is lexicographically > tree_item + * + * We use the names "user" and "tree" so it's clear in the btree code which + * the application is looking at when we call its comparison function. + */ +static inline int +__wt_lex_compare_skip( + const WT_ITEM *user_item, const WT_ITEM *tree_item, size_t *matchp) +{ + size_t len, usz, tsz; + const uint8_t *userp, *treep; + + usz = user_item->size; + tsz = tree_item->size; + len = WT_MIN(usz, tsz) - *matchp; + + userp = (uint8_t *)user_item->data + *matchp; + treep = (uint8_t *)tree_item->data + *matchp; + +#ifdef HAVE_X86INTRIN_H + /* Use vector instructions if we'll execute at least 2 of them. */ + if (len >= WT_VECTOR_SIZE * 2) { + size_t remain; + __m128i res_eq, u, t; + + remain = len % WT_VECTOR_SIZE; + len -= remain; + if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep)) + for (; len > 0; + len -= WT_VECTOR_SIZE, + userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, + *matchp += WT_VECTOR_SIZE) { + u = _mm_load_si128((__m128i *)userp); + t = _mm_load_si128((__m128i *)treep); + res_eq = _mm_cmpeq_epi8(u, t); + if (_mm_movemask_epi8(res_eq) != 65535) + break; + } + else + for (; len > 0; + len -= WT_VECTOR_SIZE, + userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE, + *matchp += WT_VECTOR_SIZE) { + u = _mm_loadu_si128((__m128i *)userp); + t = _mm_loadu_si128((__m128i *)treep); + res_eq = _mm_cmpeq_epi8(u, t); + if (_mm_movemask_epi8(res_eq) != 65535) + break; + } + len += remain; + } +#endif + /* + * Use the non-vectorized version for the remaining bytes and for the + * small key sizes. + */ + for (; len > 0; --len, ++userp, ++treep, ++*matchp) + if (*userp != *treep) + return (*userp < *treep ? -1 : 1); + + /* Contents are equal up to the smallest length. */ + return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1); +} + +/* + * __wt_compare_skip -- + * The same as __wt_lex_compare_skip, but using the application's collator + * function when configured. + */ +static inline int +__wt_compare_skip(WT_SESSION_IMPL *session, WT_COLLATOR *collator, + const WT_ITEM *user_item, const WT_ITEM *tree_item, int *cmpp, + size_t *matchp) +{ + if (collator == NULL) { + *cmpp = __wt_lex_compare_skip(user_item, tree_item, matchp); + return (0); + } + return (collator->compare( + collator, &session->iface, user_item, tree_item, cmpp)); +} diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i index 77e9fa1e3c1..80e0396d4a9 100644 --- a/src/third_party/wiredtiger/src/include/cell.i +++ b/src/third_party/wiredtiger/src/include/cell.i @@ -549,14 +549,15 @@ __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell) static inline int __wt_cell_unpack_safe(WT_CELL *cell, WT_CELL_UNPACK *unpack, uint8_t *end) { - uint64_t saved_v, v; - uint32_t saved_len; - int copied; + struct { + uint32_t len; + uint64_t v; + } copy; + uint64_t v; const uint8_t *p; - copied = 0; - saved_len = 0; - saved_v = 0; + copy.len = 0; + copy.v = 0; /* -Werror=maybe-uninitialized */ /* * The verification code specifies an end argument, a pointer to 1 past @@ -572,14 +573,18 @@ __wt_cell_unpack_safe(WT_CELL *cell, WT_CELL_UNPACK *unpack, uint8_t *end) restart: /* - * This code is performance critical for scans through read-only trees. - * Avoid WT_CLEAR here: it makes this code run significantly slower. + * This path is performance critical for read-only trees, we're parsing + * on-page structures. For that reason we don't clear the unpacked cell + * structure (although that would be simpler), instead we make sure we + * initialize all structure elements either here or in the immediately + * following switch. */ - WT_CLEAR_INLINE(WT_CELL_UNPACK, *unpack); WT_CELL_LEN_CHK(cell, 0); unpack->cell = cell; - unpack->type = __wt_cell_type(cell); + unpack->v = 0; unpack->raw = __wt_cell_type_raw(cell); + unpack->type = __wt_cell_type(cell); + unpack->ovfl = 0; /* * Handle cells with neither an RLE count or data length: short key/data @@ -589,19 +594,24 @@ restart: case WT_CELL_KEY_SHORT_PFX: WT_CELL_LEN_CHK(cell, 1); /* skip prefix */ unpack->prefix = cell->__chunk[1]; - unpack->data = cell->__chunk + 2; unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT; unpack->__len = 2 + unpack->size; goto done; case WT_CELL_KEY_SHORT: case WT_CELL_VALUE_SHORT: + unpack->prefix = 0; unpack->data = cell->__chunk + 1; unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT; unpack->__len = 1 + unpack->size; goto done; } + unpack->prefix = 0; + unpack->data = NULL; + unpack->size = 0; + unpack->__len = 0; + p = (uint8_t *)cell + 1; /* skip cell */ /* @@ -638,10 +648,9 @@ restart: */ WT_RET(__wt_vunpack_uint( &p, end == NULL ? 0 : (size_t)(end - p), &v)); - saved_len = WT_PTRDIFF32(p, cell); - saved_v = unpack->v; + copy.len = WT_PTRDIFF32(p, cell); + copy.v = unpack->v; cell = (WT_CELL *)((uint8_t *)cell - v); - copied = 1; goto restart; case WT_CELL_KEY_OVFL: @@ -691,10 +700,10 @@ restart: * we need the right length). */ done: WT_CELL_LEN_CHK(cell, unpack->__len); - if (copied) { + if (copy.len != 0) { unpack->raw = WT_CELL_VALUE_COPY; - unpack->__len = saved_len; - unpack->v = saved_v; + unpack->__len = copy.len; + unpack->v = copy.v; } return (0); diff --git a/src/third_party/wiredtiger/src/include/column.i b/src/third_party/wiredtiger/src/include/column.i index bf12a48a3e4..fc1f372b2a9 100644 --- a/src/third_party/wiredtiger/src/include/column.i +++ b/src/third_party/wiredtiger/src/include/column.i @@ -7,8 +7,92 @@ */ /* + * __col_insert_search_gt -- + * Search a column-store insert list for the next larger record. + */ +static inline WT_INSERT * +__col_insert_search_gt(WT_INSERT_HEAD *inshead, uint64_t recno) +{ + WT_INSERT *ins, **insp; + int i; + + /* If there's no insert chain to search, we're done. */ + if ((ins = WT_SKIP_LAST(inshead)) == NULL) + return (NULL); + + /* Fast path check for targets past the end of the skiplist. */ + if (recno >= WT_INSERT_RECNO(ins)) + return (NULL); + + /* + * The insert list is a skip list: start at the highest skip level, then + * go as far as possible at each level before stepping down to the next. + */ + ins = NULL; + for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0;) + if (*insp != NULL && recno >= WT_INSERT_RECNO(*insp)) { + ins = *insp; /* GTE: keep going at this level */ + insp = &(*insp)->next[i]; + } else { + --i; /* LT: drop down a level */ + --insp; + } + + /* + * If we didn't find any records smaller than the target, we never set + * the return value, set it to the first record in the list. Otherwise, + * it references a record less-than-or-equal to the target, move to a + * later record, that is, a subsequent record greater than the target. + * Because inserts happen concurrently, additional records might be + * inserted after the searched-for record that are still smaller than + * the target, continue to move forward until reaching a record larger + * than the target. There isn't any safety testing because we confirmed + * such a record exists before searching. + */ + if (ins == NULL) + ins = WT_SKIP_FIRST(inshead); + while (recno >= WT_INSERT_RECNO(ins)) + ins = WT_SKIP_NEXT(ins); + return (ins); +} + +/* + * __col_insert_search_lt -- + * Search a column-store insert list for the next smaller record. + */ +static inline WT_INSERT * +__col_insert_search_lt(WT_INSERT_HEAD *inshead, uint64_t recno) +{ + WT_INSERT *ins, **insp; + int i; + + /* If there's no insert chain to search, we're done. */ + if ((ins = WT_SKIP_FIRST(inshead)) == NULL) + return (NULL); + + /* Fast path check for targets before the skiplist. */ + if (recno <= WT_INSERT_RECNO(ins)) + return (NULL); + + /* + * The insert list is a skip list: start at the highest skip level, then + * go as far as possible at each level before stepping down to the next. + */ + for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0;) + if (*insp != NULL && recno > WT_INSERT_RECNO(*insp)) { + ins = *insp; /* GT: keep going at this level */ + insp = &(*insp)->next[i]; + } else { + --i; /* LTE: drop down a level */ + --insp; + } + + return (ins); +} + +/* * __col_insert_search_match -- - * Search an column-store insert list for an exact match. + * Search a column-store insert list for an exact match. */ static inline WT_INSERT * __col_insert_search_match(WT_INSERT_HEAD *inshead, uint64_t recno) @@ -154,7 +238,7 @@ __col_fix_last_recno(WT_PAGE *page) * Search a variable-length column-store page for a record. */ static inline WT_COL * -__col_var_search(WT_PAGE *page, uint64_t recno) +__col_var_search(WT_PAGE *page, uint64_t recno, uint64_t *start_recnop) { WT_COL_RLE *repeat; uint64_t start_recno; @@ -174,8 +258,11 @@ __col_var_search(WT_PAGE *page, uint64_t recno) repeat = page->pg_var_repeats + indx; if (recno >= repeat->recno && - recno < repeat->recno + repeat->rle) + recno < repeat->recno + repeat->rle) { + if (start_recnop != NULL) + *start_recnop = repeat->recno; return (page->pg_var_d + repeat->indx); + } if (recno < repeat->recno) continue; base = indx + 1; diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h index 1f30667b8d6..baf81b823a6 100644 --- a/src/third_party/wiredtiger/src/include/config.h +++ b/src/third_party/wiredtiger/src/include/config.h @@ -22,6 +22,7 @@ struct __wt_config_check { int (*checkf)(WT_SESSION_IMPL *, WT_CONFIG_ITEM *); const char *checks; const WT_CONFIG_CHECK *subconfigs; + u_int subconfigs_entries; }; #define WT_CONFIG_REF(session, n) \ @@ -33,6 +34,7 @@ struct __wt_config_entry { const char *base; /* configuration base */ const WT_CONFIG_CHECK *checks; /* check array */ + u_int checks_entries; }; struct __wt_config_parser_impl { @@ -47,37 +49,37 @@ struct __wt_config_parser_impl { * DO NOT EDIT: automatically built by dist/api_config.py. * configuration section: BEGIN */ -#define WT_CONFIG_ENTRY_colgroup_meta 0 -#define WT_CONFIG_ENTRY_connection_add_collator 1 -#define WT_CONFIG_ENTRY_connection_add_compressor 2 -#define WT_CONFIG_ENTRY_connection_add_data_source 3 -#define WT_CONFIG_ENTRY_connection_add_extractor 4 -#define WT_CONFIG_ENTRY_connection_async_new_op 5 -#define WT_CONFIG_ENTRY_connection_close 6 -#define WT_CONFIG_ENTRY_connection_load_extension 7 -#define WT_CONFIG_ENTRY_connection_open_session 8 -#define WT_CONFIG_ENTRY_connection_reconfigure 9 -#define WT_CONFIG_ENTRY_cursor_close 10 -#define WT_CONFIG_ENTRY_cursor_reconfigure 11 -#define WT_CONFIG_ENTRY_file_meta 12 -#define WT_CONFIG_ENTRY_index_meta 13 -#define WT_CONFIG_ENTRY_session_begin_transaction 14 -#define WT_CONFIG_ENTRY_session_checkpoint 15 -#define WT_CONFIG_ENTRY_session_close 16 -#define WT_CONFIG_ENTRY_session_commit_transaction 17 -#define WT_CONFIG_ENTRY_session_compact 18 -#define WT_CONFIG_ENTRY_session_create 19 -#define WT_CONFIG_ENTRY_session_drop 20 -#define WT_CONFIG_ENTRY_session_log_printf 21 -#define WT_CONFIG_ENTRY_session_open_cursor 22 -#define WT_CONFIG_ENTRY_session_reconfigure 23 -#define WT_CONFIG_ENTRY_session_rename 24 -#define WT_CONFIG_ENTRY_session_rollback_transaction 25 -#define WT_CONFIG_ENTRY_session_salvage 26 -#define WT_CONFIG_ENTRY_session_strerror 27 -#define WT_CONFIG_ENTRY_session_truncate 28 -#define WT_CONFIG_ENTRY_session_upgrade 29 -#define WT_CONFIG_ENTRY_session_verify 30 +#define WT_CONFIG_ENTRY_WT_CONNECTION_add_collator 0 +#define WT_CONFIG_ENTRY_WT_CONNECTION_add_compressor 1 +#define WT_CONFIG_ENTRY_WT_CONNECTION_add_data_source 2 +#define WT_CONFIG_ENTRY_WT_CONNECTION_add_extractor 3 +#define WT_CONFIG_ENTRY_WT_CONNECTION_async_new_op 4 +#define WT_CONFIG_ENTRY_WT_CONNECTION_close 5 +#define WT_CONFIG_ENTRY_WT_CONNECTION_load_extension 6 +#define WT_CONFIG_ENTRY_WT_CONNECTION_open_session 7 +#define WT_CONFIG_ENTRY_WT_CONNECTION_reconfigure 8 +#define WT_CONFIG_ENTRY_WT_CURSOR_close 9 +#define WT_CONFIG_ENTRY_WT_CURSOR_reconfigure 10 +#define WT_CONFIG_ENTRY_WT_SESSION_begin_transaction 11 +#define WT_CONFIG_ENTRY_WT_SESSION_checkpoint 12 +#define WT_CONFIG_ENTRY_WT_SESSION_close 13 +#define WT_CONFIG_ENTRY_WT_SESSION_commit_transaction 14 +#define WT_CONFIG_ENTRY_WT_SESSION_compact 15 +#define WT_CONFIG_ENTRY_WT_SESSION_create 16 +#define WT_CONFIG_ENTRY_WT_SESSION_drop 17 +#define WT_CONFIG_ENTRY_WT_SESSION_log_printf 18 +#define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 19 +#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 20 +#define WT_CONFIG_ENTRY_WT_SESSION_rename 21 +#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 22 +#define WT_CONFIG_ENTRY_WT_SESSION_salvage 23 +#define WT_CONFIG_ENTRY_WT_SESSION_strerror 24 +#define WT_CONFIG_ENTRY_WT_SESSION_truncate 25 +#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 26 +#define WT_CONFIG_ENTRY_WT_SESSION_verify 27 +#define WT_CONFIG_ENTRY_colgroup_meta 28 +#define WT_CONFIG_ENTRY_file_meta 29 +#define WT_CONFIG_ENTRY_index_meta 30 #define WT_CONFIG_ENTRY_table_meta 31 #define WT_CONFIG_ENTRY_wiredtiger_open 32 #define WT_CONFIG_ENTRY_wiredtiger_open_all 33 diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 533b9ea8bbe..0121a1625c5 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -202,6 +202,7 @@ struct __wt_connection_impl { u_int open_btree_count; /* Locked: open writable btree count */ uint32_t next_file_id; /* Locked: file ID counter */ + uint32_t open_file_count; /* Atomic: open file handle count */ /* * WiredTiger allocates space for 50 simultaneous sessions (threads of @@ -255,29 +256,6 @@ struct __wt_connection_impl { WT_CONNECTION_STATS stats; /* Connection statistics */ -#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_LOGGING - /* - * Spinlock registration, so we can track which spinlocks are heavily - * used, which are blocking and where. - * - * There's an array of spinlocks, and an array of blocking IDs. - */ -#define WT_SPINLOCK_MAX 1024 -#define WT_SPINLOCK_MAX_LOCATION_ID 60 - WT_SPINLOCK *spinlock_list[WT_SPINLOCK_MAX]; - - /* Spinlock blocking matrix */ - struct __wt_connection_stats_spinlock { - const char *name; /* Mutex name */ - - const char *file; /* Caller's file/line, ID location */ - int line; - - u_int total; /* Count of total, blocked calls */ - u_int blocked[WT_SPINLOCK_MAX_LOCATION_ID]; - } spinlock_block[WT_SPINLOCK_MAX_LOCATION_ID]; -#endif - WT_ASYNC *async; /* Async structure */ int async_cfg; /* Global async configuration */ uint32_t async_size; /* Async op array size */ @@ -337,6 +315,7 @@ struct __wt_connection_impl { WT_CONDVAR *sweep_cond; /* Handle sweep wait mutex */ time_t sweep_idle_time;/* Handle sweep idle time */ time_t sweep_interval;/* Handle sweep interval */ + u_int sweep_handles_min;/* Handle sweep minimum open */ /* Locked: collator list */ TAILQ_HEAD(__wt_coll_qh, __wt_named_collator) collqh; diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index d23deee8c98..4dba18ff558 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -112,12 +112,16 @@ struct __wt_cursor_btree { int compare; /* - * The key value from a binary search of a row-store files; we keep a - * copy of the last key we retrieved in the search, it avoids having - * doing the additional work of getting the key again for return to - * the application. + * A key returned from a binary search or cursor movement on a row-store + * page; if we find an exact match on a row-store leaf page in a search + * operation, keep a copy of key we built during the search to avoid + * doing the additional work of getting the key again for return to the + * application. Note, this only applies to exact matches when searching + * disk-image structures, so it's not, for example, a key from an insert + * list. Additionally, this structure is used to build keys when moving + * a cursor through a row-store leaf page. */ - WT_ITEM search_key; + WT_ITEM *row_key, _row_key; /* * It's relatively expensive to calculate the last record on a variable- @@ -163,9 +167,15 @@ struct __wt_cursor_btree { WT_ROW *rip_saved; /* Last-returned key reference */ /* - * A temporary buffer for caching RLE values for column-store files. + * A temporary buffer for caching RLE values for column-store files (if + * RLE is non-zero, then we don't unpack the value every time we move + * to the next cursor position, we re-use the unpacked value we stored + * here the first time we hit the value). + * + * A temporary buffer for building on-page keys when searching row-store + * files. */ - WT_ITEM tmp; + WT_ITEM *tmp, _tmp; /* * The update structure allocated by the row- and column-store modify diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i index dd38a5746c1..57c19f50417 100644 --- a/src/third_party/wiredtiger/src/include/cursor.i +++ b/src/third_party/wiredtiger/src/include/cursor.i @@ -264,7 +264,7 @@ __cursor_row_slot_return(WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_UPDATE *upd) __wt_cell_unpack(cell, unpack); if (unpack->type == WT_CELL_KEY && cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) { - WT_ASSERT(session, cbt->tmp.size >= unpack->prefix); + WT_ASSERT(session, cbt->row_key->size >= unpack->prefix); /* * Grow the buffer as necessary as well as ensure data has been @@ -274,22 +274,22 @@ __cursor_row_slot_return(WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_UPDATE *upd) * Don't grow the buffer unnecessarily or copy data we don't * need, truncate the item's data length to the prefix bytes. */ - cbt->tmp.size = unpack->prefix; + cbt->row_key->size = unpack->prefix; WT_RET(__wt_buf_grow( - session, &cbt->tmp, cbt->tmp.size + unpack->size)); - memcpy((uint8_t *)cbt->tmp.data + cbt->tmp.size, + session, cbt->row_key, cbt->row_key->size + unpack->size)); + memcpy((uint8_t *)cbt->row_key->data + cbt->row_key->size, unpack->data, unpack->size); - cbt->tmp.size += unpack->size; + cbt->row_key->size += unpack->size; } else { /* * Call __wt_row_leaf_key_work instead of __wt_row_leaf_key: we * already did __wt_row_leaf_key's fast-path checks inline. */ -slow: WT_RET( - __wt_row_leaf_key_work(session, page, rip, &cbt->tmp, 0)); +slow: WT_RET(__wt_row_leaf_key_work( + session, page, rip, cbt->row_key, 0)); } - kb->data = cbt->tmp.data; - kb->size = cbt->tmp.size; + kb->data = cbt->row_key->data; + kb->size = cbt->row_key->size; cbt->rip_saved = rip; value: diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h index 300e8e735b9..034db30a0a2 100644 --- a/src/third_party/wiredtiger/src/include/dhandle.h +++ b/src/third_party/wiredtiger/src/include/dhandle.h @@ -65,11 +65,12 @@ struct __wt_data_handle { WT_DSRC_STATS stats; /* Data-source statistics */ /* Flags values over 0xff are reserved for WT_BTREE_* */ -#define WT_DHANDLE_DISCARD 0x01 /* Discard on release */ -#define WT_DHANDLE_DISCARD_CLOSE 0x02 /* Close on release */ -#define WT_DHANDLE_EXCLUSIVE 0x04 /* Need exclusive access */ -#define WT_DHANDLE_HAVE_REF 0x08 /* Already have ref */ -#define WT_DHANDLE_LOCK_ONLY 0x10 /* Handle only used as a lock */ -#define WT_DHANDLE_OPEN 0x20 /* Handle is open */ +#define WT_DHANDLE_DEAD 0x01 /* Dead, awaiting discard */ +#define WT_DHANDLE_DISCARD 0x02 /* Discard on release */ +#define WT_DHANDLE_DISCARD_FORCE 0x04 /* Force discard on release */ +#define WT_DHANDLE_EXCLUSIVE 0x08 /* Need exclusive access */ +#define WT_DHANDLE_HAVE_REF 0x10 /* Already have ref */ +#define WT_DHANDLE_LOCK_ONLY 0x20 /* Handle only used as a lock */ +#define WT_DHANDLE_OPEN 0x40 /* Handle is open */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 86c8f501eb2..48bf792bcf5 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -71,6 +71,7 @@ extern int __wt_verify_ckpt_load( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block); extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size); extern u_int __wt_block_header(WT_BLOCK *block); +extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len); extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep); extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, int data_cksum); extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, int data_cksum, int caller_locked); @@ -99,6 +100,7 @@ extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt); extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp); extern int __wt_btcur_equals( WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp); extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop); +extern void __wt_btcur_open(WT_CURSOR_BTREE *cbt); extern int __wt_btcur_close(WT_CURSOR_BTREE *cbt); extern int __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v); extern int __wt_debug_addr_print( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size); @@ -181,7 +183,7 @@ extern int __wt_config_initn( WT_SESSION_IMPL *session, WT_CONFIG *conf, const c extern int __wt_config_init(WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str); extern int __wt_config_subinit( WT_SESSION_IMPL *session, WT_CONFIG *conf, WT_CONFIG_ITEM *item); extern int __wt_config_next(WT_CONFIG *conf, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value); -extern int __wt_config_get(WT_SESSION_IMPL *session, const char **cfg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value); +extern int __wt_config_get(WT_SESSION_IMPL *session, const char **cfg_arg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value); extern int __wt_config_gets(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value); extern int __wt_config_gets_none(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value); extern int __wt_config_getone(WT_SESSION_IMPL *session, const char *config, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value); @@ -194,10 +196,10 @@ extern void __wt_conn_foc_discard(WT_SESSION_IMPL *session); extern int __wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri, const char *config, const char *type, const char *check); extern int __wt_config_check(WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry, const char *config, size_t config_len); extern int __wt_config_collapse( WT_SESSION_IMPL *session, const char **cfg, char **config_ret); -extern int __wt_config_merge( WT_SESSION_IMPL *session, const char **cfg, const char **config_ret); -extern int __wt_config_concat( WT_SESSION_IMPL *session, const char **cfg, char **config_ret); +extern int __wt_config_merge(WT_SESSION_IMPL *session, const char **cfg, const char *cfg_strip, const char **config_ret); extern int __wt_conn_config_init(WT_SESSION_IMPL *session); extern void __wt_conn_config_discard(WT_SESSION_IMPL *session); +extern const WT_CONFIG_ENTRY *__wt_conn_config_match(const char *method); extern int __wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, const char *config, size_t len, WT_CONFIG_PARSER **config_parserp); extern int __wt_ext_config_get(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg, const char *key, WT_CONFIG_ITEM *cval); extern int __wt_config_upgrade(WT_SESSION_IMPL *session, WT_ITEM *buf); @@ -231,7 +233,7 @@ extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, int apply_checkpoints extern int __wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]); extern int __wt_conn_btree_apply_single(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]); extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *name, int force); -extern int __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final); +extern int __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final, int force); extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session); extern int __wt_connection_init(WT_CONNECTION_IMPL *conn); extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn); @@ -326,7 +328,7 @@ extern int __wt_log_open(WT_SESSION_IMPL *session); extern int __wt_log_close(WT_SESSION_IMPL *session); extern int __wt_log_newfile(WT_SESSION_IMPL *session, int conn_create, int *created); extern int __wt_log_read(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags); -extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, void *cookie, int firstrecord), void *cookie); +extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp, void *cookie, int firstrecord), void *cookie); extern int __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags); extern int __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap); extern int __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp); @@ -425,6 +427,7 @@ extern int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key); extern int __wt_metadata_search( WT_SESSION_IMPL *session, const char *key, char **valuep); extern void __wt_meta_track_discard(WT_SESSION_IMPL *session); extern int __wt_meta_track_on(WT_SESSION_IMPL *session); +extern int __wt_meta_track_find_handle( WT_SESSION_IMPL *session, const char *name, const char *checkpoint); extern int __wt_meta_track_off(WT_SESSION_IMPL *session, int need_sync, int unroll); extern int __wt_meta_track_sub_on(WT_SESSION_IMPL *session); extern int __wt_meta_track_sub_off(WT_SESSION_IMPL *session); @@ -491,10 +494,10 @@ extern int __wt_read( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size extern int __wt_write(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, const void *buf); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, WT_FHANDLE_MODE mode_flag, u_int flags, FILE **fpp); -extern int __wt_vfprintf(WT_SESSION_IMPL *session, FILE *fp, const char *fmt, va_list ap); -extern int __wt_fprintf(WT_SESSION_IMPL *session, FILE *fp, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))); -extern int __wt_fflush(WT_SESSION_IMPL *session, FILE *fp); -extern int __wt_fclose(WT_SESSION_IMPL *session, FILE **fpp, WT_FHANDLE_MODE mode_flag); +extern int __wt_vfprintf(FILE *fp, const char *fmt, va_list ap); +extern int __wt_fprintf(FILE *fp, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))); +extern int __wt_fflush(FILE *fp); +extern int __wt_fclose(FILE **fpp, WT_FHANDLE_MODE mode_flag); extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); @@ -574,7 +577,7 @@ extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, WT_SESSION_IMPL **sessionp); extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, int *skip); extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config); -extern int __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags); +extern int __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags, int *deadp); extern int __wt_session_release_btree(WT_SESSION_IMPL *session); extern int __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], uint32_t flags); extern void __wt_session_close_cache(WT_SESSION_IMPL *session); @@ -626,10 +629,6 @@ extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg); extern int __wt_print_huffman_code(void *huffman_arg, uint16_t symbol); extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf); extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf); -extern int __wt_spin_lock_register_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t); -extern void __wt_spin_lock_unregister_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t); -extern int __wt_spin_lock_register_caller(WT_SESSION_IMPL *session, const char *name, const char *file, int line, int *idp); -extern int __wt_statlog_dump_spinlock(WT_CONNECTION_IMPL *conn, const char *tag); extern uint32_t __wt_nlpo2_round(uint32_t v); extern uint32_t __wt_nlpo2(uint32_t v); extern uint32_t __wt_log2_int(uint32_t n); @@ -658,7 +657,7 @@ extern int WT_CDECL __wt_txnid_cmp(const void *v1, const void *v2); extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session); extern void __wt_txn_update_oldest(WT_SESSION_IMPL *session); extern void __wt_txn_refresh(WT_SESSION_IMPL *session, int get_snapshot); -extern int __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]); +extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]); extern void __wt_txn_release(WT_SESSION_IMPL *session); extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]); @@ -672,7 +671,7 @@ extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]); -extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force); +extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final); extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session); extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session); extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify); diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h index 760321d9abb..ebe3a00b19f 100644 --- a/src/third_party/wiredtiger/src/include/log.h +++ b/src/third_party/wiredtiger/src/include/log.h @@ -128,7 +128,8 @@ typedef struct { WT_LSN sync_dir_lsn; /* LSN of the last directory sync */ WT_LSN sync_lsn; /* LSN of the last sync */ WT_LSN trunc_lsn; /* End LSN for recovery truncation */ - WT_LSN write_lsn; /* Last LSN written to log file */ + WT_LSN write_lsn; /* End of last LSN written */ + WT_LSN write_start_lsn;/* Beginning of last LSN written */ /* * Synchronization resources diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h index 12cf2dec375..8f4a730ae90 100644 --- a/src/third_party/wiredtiger/src/include/misc.h +++ b/src/third_party/wiredtiger/src/include/misc.h @@ -126,6 +126,33 @@ #define FLD_ISSET(field, mask) ((field) & ((uint32_t)(mask))) #define FLD_SET(field, mask) ((field) |= ((uint32_t)(mask))) +/* + * Insertion sort, for sorting small sets of values. + * + * The "compare_lt" argument is a function or macro that returns true when + * its first argument is less than its second argument. + */ +#define WT_INSERTION_SORT(arrayp, n, value_type, compare_lt) do { \ + value_type __v; \ + int __i, __j, __n = (int)(n); \ + if (__n == 2) { \ + __v = (arrayp)[1]; \ + if (compare_lt(__v, (arrayp)[0])) { \ + (arrayp)[1] = (arrayp)[0]; \ + (arrayp)[0] = __v; \ + } \ + } \ + if (__n > 2) { \ + for (__i = 1; __i < __n; ++__i) { \ + __v = (arrayp)[__i]; \ + for (__j = __i - 1; __j >= 0 && \ + compare_lt(__v, (arrayp)[__j]); --__j) \ + (arrayp)[__j + 1] = (arrayp)[__j]; \ + (arrayp)[__j + 1] = __v; \ + } \ + } \ +} while (0) + /* Verbose messages. */ #ifdef HAVE_VERBOSE #define WT_VERBOSE_ISSET(session, f) \ @@ -134,17 +161,6 @@ #define WT_VERBOSE_ISSET(session, f) 0 #endif -/* - * Clear a structure, two flavors: inline when we want to guarantee there's - * no function call or setup/tear-down of a loop, and the default where the - * compiler presumably chooses. Gcc 4.3 is supposed to get this right, but - * we've seen problems when calling memset to clear structures in performance - * critical paths. - */ -#define WT_CLEAR_INLINE(type, s) do { \ - static const type __clear; \ - s = __clear; \ -} while (0) #define WT_CLEAR(s) \ memset(&(s), 0, sizeof(s)) diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h index 07aa740c525..fd7fd16dea7 100644 --- a/src/third_party/wiredtiger/src/include/mutex.h +++ b/src/third_party/wiredtiger/src/include/mutex.h @@ -61,10 +61,9 @@ struct __wt_rwlock { * instructions). */ #define SPINLOCK_GCC 0 -#define SPINLOCK_PTHREAD_MUTEX 1 -#define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 2 -#define SPINLOCK_PTHREAD_MUTEX_LOGGING 3 -#define SPINLOCK_MSVC 4 +#define SPINLOCK_MSVC 1 +#define SPINLOCK_PTHREAD_MUTEX 2 +#define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 3 #if SPINLOCK_TYPE == SPINLOCK_GCC @@ -73,16 +72,12 @@ typedef volatile int WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) #elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\ - SPINLOCK_TYPE == SPINLOCK_MSVC ||\ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_LOGGING + SPINLOCK_TYPE == SPINLOCK_MSVC typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct { wt_mutex_t lock; - uint64_t counter; /* Statistics: counter */ - const char *name; /* Statistics: mutex name */ - int8_t id; /* Statistics: current holder ID */ int8_t initialized; /* Lock initialized, for cleanup */ } WT_SPINLOCK; diff --git a/src/third_party/wiredtiger/src/include/mutex.i b/src/third_party/wiredtiger/src/include/mutex.i index e4b497af2a7..8bca50635e6 100644 --- a/src/third_party/wiredtiger/src/include/mutex.i +++ b/src/third_party/wiredtiger/src/include/mutex.i @@ -16,10 +16,6 @@ #if SPINLOCK_TYPE == SPINLOCK_GCC -#define WT_DECL_SPINLOCK_ID(i) -#define __wt_spin_trylock(session, lock, idp) \ - __wt_spin_trylock_func(session, lock) - /* Default to spinning 1000 times before yielding. */ #ifndef WT_SPIN_COUNT #define WT_SPIN_COUNT 1000 @@ -52,11 +48,11 @@ __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t) } /* - * __wt_spin_trylock_func -- + * __wt_spin_trylock -- * Try to lock a spinlock or fail immediately if it is busy. */ static inline int -__wt_spin_trylock_func(WT_SESSION_IMPL *session, WT_SPINLOCK *t) +__wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { WT_UNUSED(session); @@ -95,8 +91,7 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) } #elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_LOGGING + SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE /* * __wt_spin_init -- @@ -118,10 +113,6 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) t->name = name; t->initialized = 1; -#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_LOGGING - WT_RET(__wt_spin_lock_register_lock(session, t)); -#endif - WT_UNUSED(session); return (0); } @@ -135,9 +126,6 @@ __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { WT_UNUSED(session); -#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_LOGGING - __wt_spin_lock_unregister_lock(session, t); -#endif if (t->initialized) { (void)pthread_mutex_destroy(&t->lock); t->initialized = 0; @@ -147,16 +135,12 @@ __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t) #if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE -#define WT_DECL_SPINLOCK_ID(i) -#define __wt_spin_trylock(session, lock, idp) \ - __wt_spin_trylock_func(session, lock) - /* - * __wt_spin_trylock_func -- + * __wt_spin_trylock -- * Try to lock a spinlock or fail immediately if it is busy. */ static inline int -__wt_spin_trylock_func(WT_SESSION_IMPL *session, WT_SPINLOCK *t) +__wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { WT_UNUSED(session); @@ -176,106 +160,6 @@ __wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) } #endif -#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_LOGGING - -/* - * When logging statistics, we track which spinlocks block and why. - */ -#define WT_DECL_SPINLOCK_ID(i) \ - static int i = WT_SPINLOCK_REGISTER -#define WT_SPINLOCK_REGISTER -1 -#define WT_SPINLOCK_REGISTER_FAILED -2 -#define __wt_spin_trylock(session, lock, idp) \ - __wt_spin_trylock_func(session, lock, idp, __FILE__, __LINE__) -#define __wt_spin_lock(session, lock) do { \ - WT_DECL_SPINLOCK_ID(__id); \ - __wt_spin_lock_func(session, lock, &__id, __FILE__, __LINE__); \ -} while (0) - -/* - * __wt_spin_trylock_func -- - * Try to lock a spinlock or fail immediately if it is busy. - */ -static inline int -__wt_spin_trylock_func(WT_SESSION_IMPL *session, - WT_SPINLOCK *t, int *idp, const char *file, int line) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - - conn = S2C_SAFE(session); - /* If we're not maintaining statistics, it's simple. */ - if (session == NULL || !FLD_ISSET(conn->stat_flags, WT_STAT_CONN_FAST)) - return (pthread_mutex_trylock(&t->lock)); - - /* - * If this caller hasn't yet registered, do so. The caller's location - * ID is a static offset into a per-connection structure, and that has - * problems: first, if there are multiple connections, we'll need to - * hold some kind of lock to avoid racing when setting that value, and - * second, if/when there are multiple connections and/or a single - * connection is closed and re-opened, the variable may be initialized - * and the underlying connection information may not. Check both. - */ - if (*idp == WT_SPINLOCK_REGISTER || - conn->spinlock_block[*idp].name == NULL) - WT_RET(__wt_spin_lock_register_caller( - session, t->name, file, line, idp)); - - /* - * Try to acquire the mutex: on failure, update blocking statistics, on - * success, set our ID as the mutex holder. - * - * Note the race between acquiring the lock and setting our ID as the - * holder, this can appear in the output as mutexes blocking in ways - * that can't actually happen (although still an indicator of a mutex - * that's busier than we'd like). - */ - if ((ret = pthread_mutex_trylock(&t->lock)) == 0) - t->id = *idp; - else - if (*idp >= 0) { - ++conn->spinlock_block[*idp].total; - if (t->id >= 0) - ++conn->spinlock_block[*idp].blocked[t->id]; - } - - /* Update the mutex counter and flush to minimize the windows. */ - ++t->counter; - WT_FULL_BARRIER(); - return (ret); -} - -/* - * __wt_spin_lock_func -- - * Spin until the lock is acquired. - */ -static inline void -__wt_spin_lock_func(WT_SESSION_IMPL *session, - WT_SPINLOCK *t, int *idp, const char *file, int line) -{ - /* If we're not maintaining statistics, it's simple. */ - if (session == NULL || - !FLD_ISSET(conn->stat_flags, WT_STAT_CONN_FAST)) { - pthread_mutex_lock(&t->lock); - return; - } - - /* Try to acquire the mutex. */ - if (__wt_spin_trylock_func(session, t, idp, file, line) == 0) - return; - - /* - * On failure, wait on the mutex; once acquired, set our ID as the - * holder and flush to minimize the windows. - */ - pthread_mutex_lock(&t->lock); - t->id = *idp; - WT_FULL_BARRIER(); -} - -#endif - /* * __wt_spin_unlock -- * Release the spinlock. @@ -290,13 +174,9 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) #elif SPINLOCK_TYPE == SPINLOCK_MSVC -#define WT_DECL_SPINLOCK_ID(i) #define WT_SPINLOCK_REGISTER -1 #define WT_SPINLOCK_REGISTER_FAILED -2 -#define __wt_spin_trylock(session, lock, idp) \ - __wt_spin_trylock_func(session, lock) - /* * __wt_spin_init -- * Initialize a spinlock. @@ -305,7 +185,9 @@ static inline int __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) { WT_UNUSED(session); - WT_UNUSED(name); + + t->name = name; + t->initialized = 1; InitializeCriticalSectionAndSpinCount(&t->lock, 4000); @@ -321,15 +203,18 @@ __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { WT_UNUSED(session); - DeleteCriticalSection(&t->lock); + if (t->initialized) { + DeleteCriticalSection(&t->lock); + t->initialized = 0; + } } /* - * __wt_spin_trylock_func -- + * __wt_spin_trylock -- * Try to lock a spinlock or fail immediately if it is busy. */ static inline int -__wt_spin_trylock_func(WT_SESSION_IMPL *session, WT_SPINLOCK *t) +__wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { WT_UNUSED(session); diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h index 25c1baae60f..5d524534b39 100644 --- a/src/third_party/wiredtiger/src/include/schema.h +++ b/src/third_party/wiredtiger/src/include/schema.h @@ -125,12 +125,18 @@ struct __wt_table { WT_CONNECTION_IMPL *__conn = S2C(session); \ int __handle_locked = \ F_ISSET(session, WT_SESSION_HANDLE_LIST_LOCKED);\ + int __table_locked = \ + F_ISSET(session, WT_SESSION_TABLE_LOCKED); \ int __schema_locked = \ F_ISSET(session, WT_SESSION_SCHEMA_LOCKED); \ if (__handle_locked) { \ F_CLR(session, WT_SESSION_HANDLE_LIST_LOCKED); \ __wt_spin_unlock(session, &__conn->dhandle_lock);\ } \ + if (__table_locked) { \ + F_CLR(session, WT_SESSION_TABLE_LOCKED); \ + __wt_spin_unlock(session, &__conn->table_lock);\ + } \ if (__schema_locked) { \ F_CLR(session, WT_SESSION_SCHEMA_LOCKED); \ __wt_spin_unlock(session, &__conn->schema_lock);\ @@ -140,6 +146,10 @@ struct __wt_table { __wt_spin_lock(session, &__conn->schema_lock); \ F_SET(session, WT_SESSION_SCHEMA_LOCKED); \ } \ + if (__table_locked) { \ + __wt_spin_lock(session, &__conn->table_lock); \ + F_SET(session, WT_SESSION_TABLE_LOCKED); \ + } \ if (__handle_locked) { \ __wt_spin_lock(session, &__conn->dhandle_lock); \ F_SET(session, WT_SESSION_HANDLE_LIST_LOCKED); \ diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index 2c88727c662..daa47d6e776 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -32,7 +32,6 @@ struct __wt_hazard { /* Get the connection implementation for a session */ #define S2C(session) ((WT_CONNECTION_IMPL *)(session)->iface.connection) -#define S2C_SAFE(session) ((session) == NULL ? NULL : S2C(session)) /* Get the btree for a session */ #define S2BT(session) ((WT_BTREE *)(session)->dhandle->handle) diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index e04e645b3ea..cbe8167907a 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -23,7 +23,7 @@ struct __wt_stats { #define WT_STAT_ATOMIC_INCRV(stats, fld, value) do { \ (void)WT_ATOMIC_ADD8(WT_STAT(stats, fld), (value)); \ } while (0) -#define WT_STAT_ATOMIC_INCR(stats, fld) WT_ATOMIC_ADD(WT_STAT(stats, fld), 1) +#define WT_STAT_ATOMIC_INCR(stats, fld) WT_STAT_ATOMIC_INCRV(stats, fld, 1) #define WT_STAT_DECRV(stats, fld, value) do { \ (stats)->fld.v -= (value); \ } while (0) diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 3a3bdde2b73..4141d829f1d 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -204,9 +204,35 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd) } /* + * __wt_txn_begin -- + * Begin a transaction. + */ +static inline int +__wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_TXN *txn; + + txn = &session->txn; + txn->isolation = session->isolation; + txn->txn_logsync = S2C(session)->txn_logsync; + + if (cfg != NULL) + WT_RET(__wt_txn_config(session, cfg)); + + if (txn->isolation == TXN_ISO_SNAPSHOT) { + if (session->ncursors > 0) + WT_RET(__wt_session_copy_values(session)); + __wt_txn_refresh(session, 1); + } + + F_SET(txn, TXN_RUNNING); + return (0); +} + +/* * __wt_txn_autocommit_check -- * If an auto-commit transaction is required, start one. -*/ + */ static inline int __wt_txn_autocommit_check(WT_SESSION_IMPL *session) { diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index bfd2641785f..05e92d313f2 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -522,7 +522,7 @@ struct __wt_cursor { * @snippet ex_all.c Reconfigure a cursor * * @param cursor the cursor handle - * @configstart{cursor.reconfigure, see dist/api_data.py} + * @configstart{WT_CURSOR.reconfigure, see dist/api_data.py} * @config{append, append the value as a new record\, creating a new * record number key; valid only for cursors with record number keys., a * boolean flag; default \c false.} @@ -809,7 +809,7 @@ struct __wt_session { * @snippet ex_all.c Close a session * * @param session the session handle - * @configempty{session.close, see dist/api_data.py} + * @configempty{WT_SESSION.close, see dist/api_data.py} * @errors */ int __F(close)(WT_HANDLE_CLOSED(WT_SESSION) *session, @@ -826,7 +826,7 @@ struct __wt_session { * All cursors are reset. * * @param session the session handle - * @configstart{session.reconfigure, see dist/api_data.py} + * @configstart{WT_SESSION.reconfigure, see dist/api_data.py} * @config{isolation, the default isolation level for operations in this * session., a string\, chosen from the following options: \c * "read-uncommitted"\, \c "read-committed"\, \c "snapshot"; default \c @@ -888,7 +888,7 @@ struct __wt_session { * <br> * @copydoc doc_cursor_types * @param to_dup a cursor to duplicate - * @configstart{session.open_cursor, see dist/api_data.py} + * @configstart{WT_SESSION.open_cursor, see dist/api_data.py} * @config{append, append the value as a new record\, creating a new * record number key; valid only for cursors with record number keys., a * boolean flag; default \c false.} @@ -977,7 +977,7 @@ struct __wt_session { * @param name the URI of the object to create, such as * \c "table:stock". For a description of URI formats * see @ref data_sources. - * @configstart{session.create, see dist/api_data.py} + * @configstart{WT_SESSION.create, see dist/api_data.py} * @config{allocation_size, the file unit allocation size\, in bytes\, * must a power-of-two; smaller values decrease the file space required * by overflow items\, and the default value of 4KB is a good choice @@ -1181,7 +1181,7 @@ struct __wt_session { * @param session the session handle * @param name the URI of the object to compact, such as * \c "table:stock" - * @configstart{session.compact, see dist/api_data.py} + * @configstart{WT_SESSION.compact, see dist/api_data.py} * @config{timeout, maximum amount of time to allow for compact in * seconds. The actual amount of time spent in compact may exceed the * configured value. A value of zero disables the timeout., an integer; @@ -1199,7 +1199,7 @@ struct __wt_session { * * @param session the session handle * @param name the URI of the object to drop, such as \c "table:stock" - * @configstart{session.drop, see dist/api_data.py} + * @configstart{WT_SESSION.drop, see dist/api_data.py} * @config{force, return success if the object does not exist., a * boolean flag; default \c false.} * @config{remove_files, should the underlying files be removed?., a @@ -1229,7 +1229,7 @@ struct __wt_session { * @param session the session handle * @param uri the current URI of the object, such as \c "table:old" * @param newuri the new URI of the object, such as \c "table:new" - * @configempty{session.rename, see dist/api_data.py} + * @configempty{WT_SESSION.rename, see dist/api_data.py} * @ebusy_errors */ int __F(rename)(WT_SESSION *session, @@ -1254,7 +1254,7 @@ struct __wt_session { * * @param session the session handle * @param name the URI of the file or table to salvage - * @configstart{session.salvage, see dist/api_data.py} + * @configstart{WT_SESSION.salvage, see dist/api_data.py} * @config{force, force salvage even of files that do not appear to be * WiredTiger files., a boolean flag; default \c false.} * @configend @@ -1284,7 +1284,7 @@ struct __wt_session { * @param stop optional cursor marking the last record discarded; * if <code>NULL</code>, the truncate continues to the end of the * object - * @configempty{session.truncate, see dist/api_data.py} + * @configempty{WT_SESSION.truncate, see dist/api_data.py} * @ebusy_errors */ int __F(truncate)(WT_SESSION *session, @@ -1302,7 +1302,7 @@ struct __wt_session { * * @param session the session handle * @param name the URI of the file or table to upgrade - * @configempty{session.upgrade, see dist/api_data.py} + * @configempty{WT_SESSION.upgrade, see dist/api_data.py} * @ebusy_errors */ int __F(upgrade)(WT_SESSION *session, @@ -1319,7 +1319,7 @@ struct __wt_session { * * @param session the session handle * @param name the URI of the file or table to verify - * @configstart{session.verify, see dist/api_data.py} + * @configstart{WT_SESSION.verify, see dist/api_data.py} * @config{dump_address, Display addresses and page types as pages are * verified\, using the application's message handler\, intended for * debugging., a boolean flag; default \c false.} @@ -1362,7 +1362,7 @@ struct __wt_session { * @snippet ex_all.c transaction commit/rollback * * @param session the session handle - * @configstart{session.begin_transaction, see dist/api_data.py} + * @configstart{WT_SESSION.begin_transaction, see dist/api_data.py} * @config{isolation, the isolation level for this transaction; defaults * to the session's isolation level., a string\, chosen from the * following options: \c "read-uncommitted"\, \c "read-committed"\, \c @@ -1391,7 +1391,7 @@ struct __wt_session { * @snippet ex_all.c transaction commit/rollback * * @param session the session handle - * @configempty{session.commit_transaction, see dist/api_data.py} + * @configempty{WT_SESSION.commit_transaction, see dist/api_data.py} * @errors */ int __F(commit_transaction)(WT_SESSION *session, const char *config); @@ -1406,7 +1406,7 @@ struct __wt_session { * @snippet ex_all.c transaction commit/rollback * * @param session the session handle - * @configempty{session.rollback_transaction, see dist/api_data.py} + * @configempty{WT_SESSION.rollback_transaction, see dist/api_data.py} * @errors */ int __F(rollback_transaction)(WT_SESSION *session, const char *config); @@ -1420,7 +1420,7 @@ struct __wt_session { * @snippet ex_all.c Checkpoint examples * * @param session the session handle - * @configstart{session.checkpoint, see dist/api_data.py} + * @configstart{WT_SESSION.checkpoint, see dist/api_data.py} * @config{drop, specify a list of checkpoints to drop. The list may * additionally contain one of the following keys: \c "from=all" to drop * all checkpoints\, \c "from=<checkpoint>" to drop all checkpoints @@ -1495,7 +1495,7 @@ struct __wt_connection { * * @param connection the connection handle * @param uri the connection handle - * @configstart{connection.async_new_op, see dist/api_data.py} + * @configstart{WT_CONNECTION.async_new_op, see dist/api_data.py} * @config{append, append the value as a new record\, creating a new * record number key; valid only for operations with record number * keys., a boolean flag; default \c false.} @@ -1531,7 +1531,7 @@ struct __wt_connection { * @snippet ex_all.c Close a connection * * @param connection the connection handle - * @configstart{connection.close, see dist/api_data.py} + * @configstart{WT_CONNECTION.close, see dist/api_data.py} * @config{leak_memory, don't free memory during close., a boolean flag; * default \c false.} * @configend @@ -1546,7 +1546,7 @@ struct __wt_connection { * @snippet ex_all.c Reconfigure a connection * * @param connection the connection handle - * @configstart{connection.reconfigure, see dist/api_data.py} + * @configstart{WT_CONNECTION.reconfigure, see dist/api_data.py} * @config{async = (, asynchronous operations configuration options., a * set of related configuration options defined below.} * @config{ enabled, enable asynchronous @@ -1555,7 +1555,8 @@ struct __wt_connection { * simultaneous asynchronous operations., an integer between 1 and 4096; * default \c 1024.} * @config{ threads, the number - * of worker threads to service asynchronous requests., an integer + * of worker threads to service asynchronous requests. Each worker + * thread uses a session from the configured session_max., an integer * between 1 and 20; default \c 2.} * @config{ ),,} * @config{cache_overhead, assume the heap allocator overhead is the @@ -1570,8 +1571,9 @@ struct __wt_connection { * @config{cache_size, maximum heap memory to allocate for the cache. A * database should configure either \c cache_size or \c shared_cache but * not both., an integer between 1MB and 10TB; default \c 100MB.} - * @config{checkpoint = (, periodically checkpoint the database., a set - * of related configuration options defined below.} + * @config{checkpoint = (, periodically checkpoint the database. + * Enabling the checkpoint server uses a session from the configured + * session_max., a set of related configuration options defined below.} * @config{ log_size, wait for this amount of log * record bytes to be written to the log between each checkpoint. A * database can configure both log_size and wait to set an upper bound @@ -1590,7 +1592,8 @@ struct __wt_connection { * @config{ threads_max, maximum number of * threads WiredTiger will start to help evict pages from cache. The * number of threads started will vary depending on the current eviction - * load., an integer between 1 and 20; default \c 1.} + * load. Each eviction worker thread uses a session from the configured + * session_max., an integer between 1 and 20; default \c 1.} * @config{ threads_min, minimum number of * threads WiredTiger will start to help evict pages from cache. The * number of threads currently running will vary depending on the @@ -1609,24 +1612,32 @@ struct __wt_connection { * integer between 10 and 99; default \c 95.} * @config{file_manager = (, control how file handles are managed., a * set of related configuration options defined below.} + * @config{ close_handle_minimum, number of + * handles open before the file manager will look for handles to close., + * an integer greater than or equal to 0; default \c 250.} * @config{ close_idle_time, amount of time in * seconds a file handle needs to be idle before attempting to close - * it., an integer between 1 and 1000; default \c 30.} + * it., an integer between 1 and 100000; default \c 30.} * @config{ close_scan_interval, interval in * seconds at which to check for files that are inactive and close - * them., an integer between 1 and 1000; default \c 10.} + * them., an integer between 1 and 100000; default \c 10.} * @config{ ),,} * @config{lsm_manager = (, configure database wide options for LSM tree - * management., a set of related configuration options defined below.} - * @config{ merge, merge LSM chunks where - * possible., a boolean flag; default \c true.} + * management. The LSM manager is started automatically the first time + * an LSM tree is opened. The LSM manager uses a session from the + * configured session_max., a set of related configuration options + * defined below.} + * @config{ merge, merge LSM + * chunks where possible., a boolean flag; default \c true.} * @config{ worker_thread_max, Configure a set of - * threads to manage merging LSM trees in the database., an integer - * between 3 and 20; default \c 4.} + * threads to manage merging LSM trees in the database. Each worker + * thread uses a session handle from the configured session_max., an + * integer between 3 and 20; default \c 4.} * @config{ ),,} * @config{shared_cache = (, shared cache configuration options. A * database should configure either a cache_size or a shared_cache not - * both., a set of related configuration options defined below.} + * both. Enabling a shared cache uses a session from the configured + * session_max., a set of related configuration options defined below.} * @config{ chunk, the granularity that a shared * cache is redistributed., an integer between 1MB and 10TB; default \c * 10MB.} @@ -1657,9 +1668,11 @@ struct __wt_connection { * \c "none"\, \c "clear"; default \c none.} * @config{statistics_log = (, log any statistics the database is * configured to maintain\, to a file. See @ref statistics for more - * information., a set of related configuration options defined below.} - * @config{ on_close, log statistics on database - * close., a boolean flag; default \c false.} + * information. Enabling the statistics log server uses a session from + * the configured session_max., a set of related configuration options + * defined below.} + * @config{ on_close, log + * statistics on database close., a boolean flag; default \c false.} * @config{ path, the pathname to a file into * which the log records are written\, may contain ISO C standard * strftime conversion specifications. If the value is not an absolute @@ -1710,7 +1723,7 @@ struct __wt_connection { * @snippet ex_all.c Configure method configuration * * @param connection the connection handle - * @param method the name of the method + * @param method the method being configured * @param uri the object type or NULL for all object types * @param config the additional configuration's name and default value * @param type the additional configuration's type (must be one of @@ -1747,7 +1760,7 @@ struct __wt_connection { * @param connection the connection handle * @param errhandler An error handler. If <code>NULL</code>, the * connection's error handler is used - * @configstart{connection.open_session, see dist/api_data.py} + * @configstart{WT_CONNECTION.open_session, see dist/api_data.py} * @config{isolation, the default isolation level for operations in this * session., a string\, chosen from the following options: \c * "read-uncommitted"\, \c "read-committed"\, \c "snapshot"; default \c @@ -1774,7 +1787,7 @@ struct __wt_connection { * @param path the filename of the extension module, or \c "local" to * search the current application binary for the initialization * function, see @ref extensions for more details. - * @configstart{connection.load_extension, see dist/api_data.py} + * @configstart{WT_CONNECTION.load_extension, see dist/api_data.py} * @config{config, configuration string passed to the entry point of the * extension as its WT_CONFIG_ARG argument., a string; default empty.} * @config{entry, the entry point of the extension\, called to @@ -1805,7 +1818,7 @@ struct __wt_connection { * @param prefix the URI prefix for this data source, e.g., "file:" * @param data_source the application-supplied implementation of * WT_DATA_SOURCE to manage this data source. - * @configempty{connection.add_data_source, see dist/api_data.py} + * @configempty{WT_CONNECTION.add_data_source, see dist/api_data.py} * @errors */ int __F(add_data_source)(WT_CONNECTION *connection, const char *prefix, @@ -1823,7 +1836,7 @@ struct __wt_connection { * @param name the name of the collation to be used in calls to * WT_SESSION::create, may not be \c "none" * @param collator the application-supplied collation handler - * @configempty{connection.add_collator, see dist/api_data.py} + * @configempty{WT_CONNECTION.add_collator, see dist/api_data.py} * @errors */ int __F(add_collator)(WT_CONNECTION *connection, @@ -1843,7 +1856,7 @@ struct __wt_connection { * @param name the name of the compression function to be used in calls * to WT_SESSION::create, may not be \c "none" * @param compressor the application-supplied compression handler - * @configempty{connection.add_compressor, see dist/api_data.py} + * @configempty{WT_CONNECTION.add_compressor, see dist/api_data.py} * @errors */ int __F(add_compressor)(WT_CONNECTION *connection, @@ -1861,7 +1874,7 @@ struct __wt_connection { * @param name the name of the extractor to be used in calls to * WT_SESSION::create, may not be \c "none" * @param extractor the application-supplied extractor - * @configempty{connection.add_extractor, see dist/api_data.py} + * @configempty{WT_CONNECTION.add_extractor, see dist/api_data.py} * @errors */ int __F(add_extractor)(WT_CONNECTION *connection, const char *name, @@ -1897,8 +1910,10 @@ struct __wt_connection { * maximum number of expected simultaneous asynchronous operations., an integer * between 1 and 4096; default \c 1024.} * @config{ threads, the number of worker threads to - * service asynchronous requests., an integer between 1 and 20; default \c 2.} - * @config{ ),,} + * service asynchronous requests. Each worker thread uses a session from the + * configured session_max., an integer between 1 and 20; default \c 2.} + * @config{ + * ),,} * @config{buffer_alignment, in-memory alignment (in bytes) for buffers used for * I/O. The default value of -1 indicates a platform-specific alignment value * should be used (4KB on Linux systems when direct I/O is configured\, zero @@ -1914,7 +1929,8 @@ struct __wt_connection { * @config{cache_size, maximum heap memory to allocate for the cache. A * database should configure either \c cache_size or \c shared_cache but not * both., an integer between 1MB and 10TB; default \c 100MB.} - * @config{checkpoint = (, periodically checkpoint the database., a set of + * @config{checkpoint = (, periodically checkpoint the database. Enabling the + * checkpoint server uses a session from the configured session_max., a set of * related configuration options defined below.} * @config{ log_size, wait for this amount of log record * bytes to be written to the log between each checkpoint. A database can @@ -1949,13 +1965,15 @@ struct __wt_connection { * configuration options defined below.} * @config{ threads_max, maximum number of threads * WiredTiger will start to help evict pages from cache. The number of threads - * started will vary depending on the current eviction load., an integer between - * 1 and 20; default \c 1.} - * @config{ threads_min, minimum - * number of threads WiredTiger will start to help evict pages from cache. The - * number of threads currently running will vary depending on the current - * eviction load., an integer between 1 and 20; default \c 1.} - * @config{ ),,} + * started will vary depending on the current eviction load. Each eviction + * worker thread uses a session from the configured session_max., an integer + * between 1 and 20; default \c 1.} + * @config{ threads_min, + * minimum number of threads WiredTiger will start to help evict pages from + * cache. The number of threads currently running will vary depending on the + * current eviction load., an integer between 1 and 20; default \c 1.} + * @config{ + * ),,} * @config{eviction_dirty_target, continue evicting until the cache has less * dirty memory than the value\, as a percentage of the total cache size. Dirty * pages will only be evicted if the cache is full enough to trigger eviction., @@ -1980,19 +1998,23 @@ struct __wt_connection { * following options: \c "data"\, \c "log"; default empty.} * @config{file_manager = (, control how file handles are managed., a set of * related configuration options defined below.} + * @config{ close_handle_minimum, number of handles open + * before the file manager will look for handles to close., an integer greater + * than or equal to 0; default \c 250.} * @config{ close_idle_time, amount of time in seconds a * file handle needs to be idle before attempting to close it., an integer - * between 1 and 1000; default \c 30.} + * between 1 and 100000; default \c 30.} * @config{ close_scan_interval, interval in seconds at * which to check for files that are inactive and close them., an integer - * between 1 and 1000; default \c 10.} + * between 1 and 100000; default \c 10.} * @config{ ),,} * @config{hazard_max, maximum number of simultaneous hazard pointers per * session handle., an integer greater than or equal to 15; default \c 1000.} - * @config{log = (, enable logging., a set of related configuration options - * defined below.} - * @config{ archive, automatically - * archive unneeded log files., a boolean flag; default \c true.} + * @config{log = (, enable logging. Enabling logging uses three sessions from + * the configured session_max., a set of related configuration options defined + * below.} + * @config{ archive, automatically archive + * unneeded log files., a boolean flag; default \c true.} * @config{ compressor, configure a compressor for log * records. Permitted values are \c "none" or custom compression engine name * created with WT_CONNECTION::add_compressor. If WiredTiger has builtin @@ -2013,11 +2035,14 @@ struct __wt_connection { * chosen from the following options: \c "error"\, \c "on"; default \c on.} * @config{ ),,} * @config{lsm_manager = (, configure database wide options for LSM tree - * management., a set of related configuration options defined below.} + * management. The LSM manager is started automatically the first time an LSM + * tree is opened. The LSM manager uses a session from the configured + * session_max., a set of related configuration options defined below.} * @config{ merge, merge LSM chunks where possible., a * boolean flag; default \c true.} * @config{ worker_thread_max, Configure a set of threads - * to manage merging LSM trees in the database., an integer between 3 and 20; + * to manage merging LSM trees in the database. Each worker thread uses a + * session handle from the configured session_max., an integer between 3 and 20; * default \c 4.} * @config{ ),,} * @config{mmap, Use memory mapping to access files when possible., a boolean @@ -2029,7 +2054,8 @@ struct __wt_connection { * @config{session_max, maximum expected number of sessions (including server * threads)., an integer greater than or equal to 1; default \c 100.} * @config{shared_cache = (, shared cache configuration options. A database - * should configure either a cache_size or a shared_cache not both., a set of + * should configure either a cache_size or a shared_cache not both. Enabling a + * shared cache uses a session from the configured session_max., a set of * related configuration options defined below.} * @config{ chunk, the granularity that a shared cache is * redistributed., an integer between 1MB and 10TB; default \c 10MB.} @@ -2057,8 +2083,9 @@ struct __wt_connection { * list\, with values chosen from the following options: \c "all"\, \c "fast"\, * \c "none"\, \c "clear"; default \c none.} * @config{statistics_log = (, log any statistics the database is configured to - * maintain\, to a file. See @ref statistics for more information., a set of - * related configuration options defined below.} + * maintain\, to a file. See @ref statistics for more information. Enabling + * the statistics log server uses a session from the configured session_max., a + * set of related configuration options defined below.} * @config{ on_close, log statistics on database close., * a boolean flag; default \c false.} * @config{ path, the @@ -2421,7 +2448,7 @@ int wiredtiger_unpack_uint(WT_PACK_STREAM *ps, uint64_t *up); /*! @} */ /*! - * @name Configuration string parsing + * @name Configuration strings * @{ */ @@ -2477,6 +2504,27 @@ struct __wt_config_item { type; }; +#if !defined(SWIG) && !defined(DOXYGEN) +/*! + * Validate a configuration string for a WiredTiger API. + * This API is outside the scope of a WiredTiger connection handle, since + * applications may need to validate configuration strings prior to calling + * ::wiredtiger_open. + * @param session the session handle (may be \c NULL if the database not yet + * opened). + * @param errhandler An error handler (used if \c session is \c NULL; if both + * \c session and \c errhandler are \c NULL, error messages will be written to + * stderr). + * @param name the WiredTiger function or method to validate. + * @param config the configuration string being parsed. + * @returns zero for success, non-zero to indicate an error. + * + * @snippet ex_all.c Validate a configuration string + */ +int wiredtiger_config_validate(WT_SESSION *session, + WT_EVENT_HANDLER *errhandler, const char *name, const char *config); +#endif + /*! * Create a handle that can be used to parse or create configuration strings * compatible with WiredTiger APIs. @@ -2490,6 +2538,8 @@ struct __wt_config_item { * @param len the number of valid bytes in \c config * @param[out] config_parserp A pointer to the newly opened handle * @errors + * + * @snippet ex_config_parse.c Create a configuration parser */ int wiredtiger_config_parser_open(WT_SESSION *session, const char *config, size_t len, WT_CONFIG_PARSER **config_parserp); diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index 576827bebcd..abc552a2835 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -41,9 +41,6 @@ extern "C" { #else #include <pthread.h> #endif -#ifdef HAVE_PTHREAD_NP_H -#include <pthread_np.h> -#endif #include <stddef.h> #include <stdio.h> #include <stdint.h> @@ -127,8 +124,6 @@ struct __wt_connection_impl; typedef struct __wt_connection_impl WT_CONNECTION_IMPL; struct __wt_connection_stats; typedef struct __wt_connection_stats WT_CONNECTION_STATS; -struct __wt_connection_stats_spinlock; - typedef struct __wt_connection_stats_spinlock WT_CONNECTION_STATS_SPINLOCK; struct __wt_cursor_backup; typedef struct __wt_cursor_backup WT_CURSOR_BACKUP; struct __wt_cursor_backup_entry; @@ -331,6 +326,7 @@ struct __wt_update; #include "txn.i" /* required by btree.i */ #include "btree.i" /* required by cursor.i */ +#include "btree_cmp.i" #include "cursor.i" #include "bitstring.i" |